Skip to content

Commit 93e697e

Browse files
committed
Logistic Regression module completed
1 parent 637729e commit 93e697e

File tree

6 files changed

+331
-0
lines changed

6 files changed

+331
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Logistic Regression: Hyperparameters\n",
8+
"\n",
9+
"Import [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) from `sklearn` and explore the hyperparameters."
10+
]
11+
},
12+
{
13+
"cell_type": "markdown",
14+
"metadata": {},
15+
"source": [
16+
"### Import Logistic Regression Algorithm"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": 8,
22+
"metadata": {},
23+
"outputs": [
24+
{
25+
"data": {
26+
"text/plain": [
27+
"LogisticRegression()"
28+
]
29+
},
30+
"execution_count": 8,
31+
"metadata": {},
32+
"output_type": "execute_result"
33+
}
34+
],
35+
"source": [
36+
"from sklearn.linear_model import LogisticRegression\n",
37+
"\n",
38+
"LogisticRegression()"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 9,
44+
"metadata": {},
45+
"outputs": [
46+
{
47+
"data": {
48+
"text/plain": [
49+
"['C',\n",
50+
" '__class__',\n",
51+
" '__delattr__',\n",
52+
" '__dict__',\n",
53+
" '__dir__',\n",
54+
" '__doc__',\n",
55+
" '__eq__',\n",
56+
" '__format__',\n",
57+
" '__ge__',\n",
58+
" '__getattribute__',\n",
59+
" '__getstate__',\n",
60+
" '__gt__',\n",
61+
" '__hash__',\n",
62+
" '__init__',\n",
63+
" '__init_subclass__',\n",
64+
" '__le__',\n",
65+
" '__lt__',\n",
66+
" '__module__',\n",
67+
" '__ne__',\n",
68+
" '__new__',\n",
69+
" '__reduce__',\n",
70+
" '__reduce_ex__',\n",
71+
" '__repr__',\n",
72+
" '__setattr__',\n",
73+
" '__setstate__',\n",
74+
" '__sizeof__',\n",
75+
" '__str__',\n",
76+
" '__subclasshook__',\n",
77+
" '__weakref__',\n",
78+
" '_check_n_features',\n",
79+
" '_estimator_type',\n",
80+
" '_get_param_names',\n",
81+
" '_get_tags',\n",
82+
" '_more_tags',\n",
83+
" '_predict_proba_lr',\n",
84+
" '_repr_html_',\n",
85+
" '_repr_html_inner',\n",
86+
" '_repr_mimebundle_',\n",
87+
" '_validate_data',\n",
88+
" 'class_weight',\n",
89+
" 'decision_function',\n",
90+
" 'densify',\n",
91+
" 'dual',\n",
92+
" 'fit',\n",
93+
" 'fit_intercept',\n",
94+
" 'get_params',\n",
95+
" 'intercept_scaling',\n",
96+
" 'l1_ratio',\n",
97+
" 'max_iter',\n",
98+
" 'multi_class',\n",
99+
" 'n_jobs',\n",
100+
" 'penalty',\n",
101+
" 'predict',\n",
102+
" 'predict_log_proba',\n",
103+
" 'predict_proba',\n",
104+
" 'random_state',\n",
105+
" 'score',\n",
106+
" 'set_params',\n",
107+
" 'solver',\n",
108+
" 'sparsify',\n",
109+
" 'tol',\n",
110+
" 'verbose',\n",
111+
" 'warm_start']"
112+
]
113+
},
114+
"execution_count": 9,
115+
"metadata": {},
116+
"output_type": "execute_result"
117+
}
118+
],
119+
"source": [
120+
"dir(LogisticRegression())"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": null,
126+
"metadata": {},
127+
"outputs": [],
128+
"source": []
129+
}
130+
],
131+
"metadata": {
132+
"kernelspec": {
133+
"display_name": "Python 3",
134+
"language": "python",
135+
"name": "python3"
136+
},
137+
"language_info": {
138+
"codemirror_mode": {
139+
"name": "ipython",
140+
"version": 3
141+
},
142+
"file_extension": ".py",
143+
"mimetype": "text/x-python",
144+
"name": "python",
145+
"nbconvert_exporter": "python",
146+
"pygments_lexer": "ipython3",
147+
"version": "3.8.3"
148+
}
149+
},
150+
"nbformat": 4,
151+
"nbformat_minor": 2
152+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Logistic Regression: Fit and evaluate a model\n",
8+
"\n",
9+
"Using the Titanic dataset from [this](https://www.kaggle.com/c/titanic/overview) Kaggle competition.\n",
10+
"\n",
11+
"In this section, we will fit and evaluate a simple Logistic Regression model."
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"### Read in Data\n",
19+
"\n",
20+
"![CV](img/CV.png)\n",
21+
"![Cross-Val](img/Cross-Val.png)"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": 15,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"import joblib\n",
31+
"import pandas as pd\n",
32+
"from sklearn.linear_model import LogisticRegression\n",
33+
"from sklearn.model_selection import GridSearchCV #it is for faciliate algorithm and hypertuning\n",
34+
"\n",
35+
"import warnings\n",
36+
"warnings.filterwarnings('ignore', category=FutureWarning)\n",
37+
"warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
38+
"\n",
39+
"train_features = pd.read_csv('../Data/train_features.csv')\n",
40+
"train_labels = pd.read_csv('../Data/train_labels.csv', header=None)"
41+
]
42+
},
43+
{
44+
"cell_type": "markdown",
45+
"metadata": {},
46+
"source": [
47+
"### Hyperparameter tuning\n",
48+
"\n",
49+
"![C](img/c.png)"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 16,
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"def print_results(results):\n",
59+
" print('BEST PARAMS: {}\\n'.format(results.best_params_))\n",
60+
"\n",
61+
" means = results.cv_results_['mean_test_score']\n",
62+
" stds = results.cv_results_['std_test_score']\n",
63+
" for mean, std, params in zip(means, stds, results.cv_results_['params']):\n",
64+
" print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": 21,
70+
"metadata": {},
71+
"outputs": [
72+
{
73+
"name": "stdout",
74+
"output_type": "stream",
75+
"text": [
76+
"BEST PARAMS: {'C': 1, 'max_iter': 1000}\n",
77+
"\n",
78+
"0.67 (+/-0.077) for {'C': 0.001, 'max_iter': 1000}\n",
79+
"0.708 (+/-0.098) for {'C': 0.01, 'max_iter': 1000}\n",
80+
"0.777 (+/-0.134) for {'C': 0.1, 'max_iter': 1000}\n",
81+
"0.8 (+/-0.118) for {'C': 1, 'max_iter': 1000}\n",
82+
"0.794 (+/-0.116) for {'C': 10, 'max_iter': 1000}\n",
83+
"0.794 (+/-0.116) for {'C': 100, 'max_iter': 1000}\n",
84+
"0.794 (+/-0.116) for {'C': 1000, 'max_iter': 1000}\n"
85+
]
86+
}
87+
],
88+
"source": [
89+
"lr = LogisticRegression()\n",
90+
"parameters = {\n",
91+
" 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],\n",
92+
" 'max_iter':[1000]\n",
93+
"}\n",
94+
"\n",
95+
"cv = GridSearchCV(lr, parameters, cv = 5)\n",
96+
"cv.fit(train_features, train_labels.values.ravel())\n",
97+
"\n",
98+
"print_results(cv)"
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": 22,
104+
"metadata": {},
105+
"outputs": [
106+
{
107+
"data": {
108+
"text/plain": [
109+
"LogisticRegression(C=1, max_iter=1000)"
110+
]
111+
},
112+
"execution_count": 22,
113+
"metadata": {},
114+
"output_type": "execute_result"
115+
}
116+
],
117+
"source": [
118+
"# check the best fit model\n",
119+
"cv.best_estimator_"
120+
]
121+
},
122+
{
123+
"cell_type": "markdown",
124+
"metadata": {},
125+
"source": [
126+
"### Write out pickled model"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": 23,
132+
"metadata": {},
133+
"outputs": [
134+
{
135+
"data": {
136+
"text/plain": [
137+
"['../Pickled_Models/LR_model.pkl']"
138+
]
139+
},
140+
"execution_count": 23,
141+
"metadata": {},
142+
"output_type": "execute_result"
143+
}
144+
],
145+
"source": [
146+
"# for future usage, we will pickle the fitted model \n",
147+
"joblib.dump(cv.best_estimator_, '../Pickled_Models/LR_model.pkl')"
148+
]
149+
},
150+
{
151+
"cell_type": "code",
152+
"execution_count": null,
153+
"metadata": {},
154+
"outputs": [],
155+
"source": []
156+
}
157+
],
158+
"metadata": {
159+
"kernelspec": {
160+
"display_name": "Python 3",
161+
"language": "python",
162+
"name": "python3"
163+
},
164+
"language_info": {
165+
"codemirror_mode": {
166+
"name": "ipython",
167+
"version": 3
168+
},
169+
"file_extension": ".py",
170+
"mimetype": "text/x-python",
171+
"name": "python",
172+
"nbconvert_exporter": "python",
173+
"pygments_lexer": "ipython3",
174+
"version": "3.8.3"
175+
}
176+
},
177+
"nbformat": 4,
178+
"nbformat_minor": 2
179+
}
Loading
Loading
Loading
Binary file not shown.

0 commit comments

Comments
 (0)