diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..b65f50c 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -221,11 +221,40 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Drop missing values\n", + "spaceship = spaceship.dropna()\n", + "\n", + "# Cabin -> deck\n", + "spaceship[\"Cabin\"] = spaceship[\"Cabin\"].str.split(\"/\").str[0]\n", + "\n", + "# Drop useless columns\n", + "spaceship = spaceship.drop([\"PassengerId\", \"Name\"], axis=1)\n", + "\n", + "# Dummies for categorical columns\n", + "spaceship = pd.get_dummies(spaceship, drop_first=True)\n", + "\n", + "# Features and target\n", + "X = spaceship.drop(\"Transported\", axis=1)\n", + "y = spaceship[\"Transported\"]\n", + "\n", + "# Train/test split\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42, stratify=y\n", + ")\n", + "\n", + "# Feature scaling\n", + "scaler = StandardScaler()\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" ] }, { @@ -237,11 +266,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 3 folds for each of 24 candidates, totalling 72 fits\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "gb = GradientBoostingClassifier(random_state=42)\n", + "\n", + "param_grid = {\n", + " \"n_estimators\": [100, 200],\n", + " \"learning_rate\": [0.01, 0.1, 0.2],\n", + " \"max_depth\": [3, 5],\n", + " \"subsample\": [0.8, 1.0]\n", + "}\n", + "\n", + "grid = GridSearchCV(\n", + " estimator=gb,\n", + " param_grid=param_grid,\n", + " cv=3,\n", + " scoring=\"accuracy\",\n", + " n_jobs=-1,\n", + " verbose=1\n", + ")\n", + "\n", + "grid.fit(X_train_scaled, y_train)\n", + "\n", + "\n", + "\n", + "print(grid.best_params_)\n" ] }, { @@ -253,11 +325,27 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tuned Accuracy: 0.783661119515885\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "\n", + "best_model = grid.best_estimator_\n", + "\n", + "y_pred = best_model.predict(X_test_scaled)\n", + "\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "print(\"Tuned Accuracy:\", accuracy_score(y_test, y_pred))" ] }, { @@ -283,11 +371,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "#your code here\n", + "\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "gb = GradientBoostingClassifier(random_state=42)\n", + "\n", + "param_grid = {\n", + " \"n_estimators\": [100, 200],\n", + " \"learning_rate\": [0.01, 0.1, 0.2],\n", + " \"max_depth\": [3, 5],\n", + " \"subsample\": [0.8, 1.0]\n", + "}" ] }, { @@ -299,10 +398,471 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 3 folds for each of 24 candidates, totalling 72 fits\n" + ] + }, + { + "data": { + "text/html": [ + "
GridSearchCV(cv=3, estimator=GradientBoostingClassifier(random_state=42),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5],\n",
+ " 'n_estimators': [100, 200], 'subsample': [0.8, 1.0]},\n",
+ " scoring='accuracy', verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=3, estimator=GradientBoostingClassifier(random_state=42),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 5],\n",
+ " 'n_estimators': [100, 200], 'subsample': [0.8, 1.0]},\n",
+ " scoring='accuracy', verbose=1)GradientBoostingClassifier(random_state=42)
GradientBoostingClassifier(random_state=42)