From 5a3d2089b497cd9a431a118d25293dce3dc66ebe Mon Sep 17 00:00:00 2001 From: bfernandezrivas Date: Sat, 21 Mar 2026 16:56:33 +0100 Subject: [PATCH] Update lab-hyper-tuning.ipynb --- lab-hyper-tuning.ipynb | 612 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 595 insertions(+), 17 deletions(-) diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..b65f50c 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -221,11 +221,40 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Drop missing values\n", + "spaceship = spaceship.dropna()\n", + "\n", + "# Cabin -> deck\n", + "spaceship[\"Cabin\"] = spaceship[\"Cabin\"].str.split(\"/\").str[0]\n", + "\n", + "# Drop useless columns\n", + "spaceship = spaceship.drop([\"PassengerId\", \"Name\"], axis=1)\n", + "\n", + "# Dummies for categorical columns\n", + "spaceship = pd.get_dummies(spaceship, drop_first=True)\n", + "\n", + "# Features and target\n", + "X = spaceship.drop(\"Transported\", axis=1)\n", + "y = spaceship[\"Transported\"]\n", + "\n", + "# Train/test split\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42, stratify=y\n", + ")\n", + "\n", + "# Feature scaling\n", + "scaler = StandardScaler()\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" ] }, { @@ -237,11 +266,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 3 folds for each of 24 candidates, totalling 72 fits\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "gb = GradientBoostingClassifier(random_state=42)\n", + "\n", + "param_grid = {\n", + " \"n_estimators\": [100, 200],\n", + " \"learning_rate\": [0.01, 0.1, 0.2],\n", + " \"max_depth\": [3, 5],\n", + " \"subsample\": [0.8, 1.0]\n", + "}\n", + "\n", + "grid = GridSearchCV(\n", + " estimator=gb,\n", + " param_grid=param_grid,\n", + " cv=3,\n", + " scoring=\"accuracy\",\n", + " n_jobs=-1,\n", + " verbose=1\n", + ")\n", + "\n", + "grid.fit(X_train_scaled, y_train)\n", + "\n", + "\n", + "\n", + "print(grid.best_params_)\n" ] }, { @@ -253,11 +325,27 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tuned Accuracy: 0.783661119515885\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "best_model = grid.best_estimator_\n", + "\n", + "y_pred = best_model.predict(X_test_scaled)\n", + "\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "print(\"Tuned Accuracy:\", accuracy_score(y_test, y_pred))" ] }, { @@ -283,11 +371,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "gb = GradientBoostingClassifier(random_state=42)\n", + "\n", + "param_grid = {\n", + " \"n_estimators\": [100, 200],\n", + " \"learning_rate\": [0.01, 0.1, 0.2],\n", + " \"max_depth\": [3, 5],\n", + " \"subsample\": [0.8, 1.0]\n", + "}" ] }, { @@ -299,10 +398,471 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 3 folds for each of 24 candidates, totalling 72 fits\n" + ] + }, + { + "data": { + "text/html": [ + "
GridSearchCV(cv=3, estimator=GradientBoostingClassifier(random_state=42),\n",
+       "             n_jobs=-1,\n",
+       "             param_grid={'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5],\n",
+       "                         'n_estimators': [100, 200], 'subsample': [0.8, 1.0]},\n",
+       "             scoring='accuracy', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=3, estimator=GradientBoostingClassifier(random_state=42),\n", + " n_jobs=-1,\n", + " param_grid={'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5],\n", + " 'n_estimators': [100, 200], 'subsample': [0.8, 1.0]},\n", + " scoring='accuracy', verbose=1)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "grid = GridSearchCV(\n", + " estimator=gb,\n", + " param_grid=param_grid,\n", + " cv=3,\n", + " scoring=\"accuracy\",\n", + " n_jobs=-1,\n", + " verbose=1\n", + ")\n", + "\n", + "grid.fit(X_train_scaled, y_train)" + ] }, { "cell_type": "markdown", @@ -313,10 +873,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}\n", + "Tuned Test Accuracy: 0.783661119515885\n" + ] + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "\n", + "best_model = grid.best_estimator_\n", + "\n", + "y_pred = best_model.predict(X_test_scaled)\n", + "\n", + "print(\"Best Parameters:\", grid.best_params_)\n", + "print(\"Tuned Test Accuracy:\", accuracy_score(y_test, y_pred))" + ] } ], "metadata": { @@ -335,7 +913,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.9.6" } }, "nbformat": 4,