X_train, X_test, y_train, y_test = train_test_split(
X, y_reg, test_size=0.3, random_state=65)
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
X_cls, y_cls, test_size=0.3, random_state=65)
# Regression: predict trip duration (X includes is_member as a predictor)
xgb_reg = XGBRegressor(n_estimators=200, max_depth=5, learning_rate=0.1,
importance_type='gain', random_state=65, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
# Classification: predict annual member vs casual (X_cls excludes is_member)
xgb_cls = XGBClassifier(n_estimators=200, max_depth=5, learning_rate=0.1,
importance_type='gain', random_state=65, n_jobs=-1)
xgb_cls.fit(X_train_c, y_train_c)
imp_reg = pd.Series(xgb_reg.feature_importances_, index=X.columns)
imp_cls = pd.Series(xgb_cls.feature_importances_, index=X_cls.columns)
# Test metrics — regression
y_pred_reg = xgb_reg.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_reg))
r2 = r2_score(y_test, y_pred_reg)
print(f"Regression — RMSE: {rmse:.2f} min | R²: {r2:.4f}")
# Test metrics — classification
y_pred_cls = xgb_cls.predict(X_test_c)
acc = accuracy_score(y_test_c, y_pred_cls)
f1 = f1_score(y_test_c, y_pred_cls)
print(f"Classification — Accuracy: {acc:.4f} | F1: {f1:.4f}")
print("\nTop 3 (regression):", imp_reg.nlargest(3).round(3).to_dict())
print("Top 3 (classification):", imp_cls.nlargest(3).round(3).to_dict())