ptyadana
diff --git a/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/01.Logistic Regression - Hyperparameters.ipynb
Lines changed: 152 additions & 0 deletions b/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/01.Logistic Regression - Hyperparameters.ipynb
Lines changed: 152 additions & 0 deletions
diff --git a/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/02.Logistic Regression - Fit and evaluate a model.ipynb
Lines changed: 179 additions & 0 deletions b/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/02.Logistic Regression - Fit and evaluate a model.ipynb
Lines changed: 179 additions & 0 deletions
diff --git a/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/img/CV.png
289 KB b/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/img/CV.png
289 KB
diff --git a/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/img/Cross-Val.png
96.2 KB b/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/img/Cross-Val.png
96.2 KB
diff --git a/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/img/c.png
181 KB b/‎ML - Applied Machine Learning - Algorithms/02.Logistic Regression/img/c.png
181 KB
diff --git a/‎ML - Applied Machine Learning - Algorithms/Pickled_Models/LR_model.pkl
836 Bytes b/‎ML - Applied Machine Learning - Algorithms/Pickled_Models/LR_model.pkl
836 Bytes
@@ -0,0 +1,152 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Logistic Regression: Hyperparameters\n",
+    "\n",
+    "Import [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) from `sklearn` and explore the hyperparameters."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import Logistic Regression Algorithm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression()"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "\n",
+    "LogisticRegression()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['C',\n",
+       " '__class__',\n",
+       " '__delattr__',\n",
+       " '__dict__',\n",
+       " '__dir__',\n",
+       " '__doc__',\n",
+       " '__eq__',\n",
+       " '__format__',\n",
+       " '__ge__',\n",
+       " '__getattribute__',\n",
+       " '__getstate__',\n",
+       " '__gt__',\n",
+       " '__hash__',\n",
+       " '__init__',\n",
+       " '__init_subclass__',\n",
+       " '__le__',\n",
+       " '__lt__',\n",
+       " '__module__',\n",
+       " '__ne__',\n",
+       " '__new__',\n",
+       " '__reduce__',\n",
+       " '__reduce_ex__',\n",
+       " '__repr__',\n",
+       " '__setattr__',\n",
+       " '__setstate__',\n",
+       " '__sizeof__',\n",
+       " '__str__',\n",
+       " '__subclasshook__',\n",
+       " '__weakref__',\n",
+       " '_check_n_features',\n",
+       " '_estimator_type',\n",
+       " '_get_param_names',\n",
+       " '_get_tags',\n",
+       " '_more_tags',\n",
+       " '_predict_proba_lr',\n",
+       " '_repr_html_',\n",
+       " '_repr_html_inner',\n",
+       " '_repr_mimebundle_',\n",
+       " '_validate_data',\n",
+       " 'class_weight',\n",
+       " 'decision_function',\n",
+       " 'densify',\n",
+       " 'dual',\n",
+       " 'fit',\n",
+       " 'fit_intercept',\n",
+       " 'get_params',\n",
+       " 'intercept_scaling',\n",
+       " 'l1_ratio',\n",
+       " 'max_iter',\n",
+       " 'multi_class',\n",
+       " 'n_jobs',\n",
+       " 'penalty',\n",
+       " 'predict',\n",
+       " 'predict_log_proba',\n",
+       " 'predict_proba',\n",
+       " 'random_state',\n",
+       " 'score',\n",
+       " 'set_params',\n",
+       " 'solver',\n",
+       " 'sparsify',\n",
+       " 'tol',\n",
+       " 'verbose',\n",
+       " 'warm_start']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dir(LogisticRegression())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,179 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Logistic Regression: Fit and evaluate a model\n",
+    "\n",
+    "Using the Titanic dataset from [this](https://www.kaggle.com/c/titanic/overview) Kaggle competition.\n",
+    "\n",
+    "In this section, we will fit and evaluate a simple Logistic Regression model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read in Data\n",
+    "\n",
+    "![CV](img/CV.png)\n",
+    "![Cross-Val](img/Cross-Val.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "import pandas as pd\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.model_selection import GridSearchCV #it is for faciliate algorithm and hypertuning\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore', category=FutureWarning)\n",
+    "warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
+    "\n",
+    "train_features = pd.read_csv('../Data/train_features.csv')\n",
+    "train_labels = pd.read_csv('../Data/train_labels.csv', header=None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Hyperparameter tuning\n",
+    "\n",
+    "![C](img/c.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def print_results(results):\n",
+    "    print('BEST PARAMS: {}\\n'.format(results.best_params_))\n",
+    "\n",
+    "    means = results.cv_results_['mean_test_score']\n",
+    "    stds = results.cv_results_['std_test_score']\n",
+    "    for mean, std, params in zip(means, stds, results.cv_results_['params']):\n",
+    "        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BEST PARAMS: {'C': 1, 'max_iter': 1000}\n",
+      "\n",
+      "0.67 (+/-0.077) for {'C': 0.001, 'max_iter': 1000}\n",
+      "0.708 (+/-0.098) for {'C': 0.01, 'max_iter': 1000}\n",
+      "0.777 (+/-0.134) for {'C': 0.1, 'max_iter': 1000}\n",
+      "0.8 (+/-0.118) for {'C': 1, 'max_iter': 1000}\n",
+      "0.794 (+/-0.116) for {'C': 10, 'max_iter': 1000}\n",
+      "0.794 (+/-0.116) for {'C': 100, 'max_iter': 1000}\n",
+      "0.794 (+/-0.116) for {'C': 1000, 'max_iter': 1000}\n"
+     ]
+    }
+   ],
+   "source": [
+    "lr = LogisticRegression()\n",
+    "parameters = {\n",
+    "    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],\n",
+    "    'max_iter':[1000]\n",
+    "}\n",
+    "\n",
+    "cv = GridSearchCV(lr, parameters, cv = 5)\n",
+    "cv.fit(train_features, train_labels.values.ravel())\n",
+    "\n",
+    "print_results(cv)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=1, max_iter=1000)"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# check the best fit model\n",
+    "cv.best_estimator_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Write out pickled model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['../Pickled_Models/LR_model.pkl']"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# for future usage, we will pickle the fitted model \n",
+    "joblib.dump(cv.best_estimator_, '../Pickled_Models/LR_model.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}