Skip to content

Commit f24ce12

Browse files
committed
ml basics
1 parent bf41736 commit f24ce12

File tree

2 files changed

+201
-0
lines changed

2 files changed

+201
-0
lines changed

numpy_class/classification_example.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# https://deeplearningcourses.com/c/deep-learning-prerequisites-the-numpy-stack-in-python
2+
# https://www.udemy.com/deep-learning-prerequisites-the-numpy-stack-in-python
3+
4+
from __future__ import print_function, division
5+
from future.utils import iteritems
6+
from builtins import range, input
7+
# Note: you may need to update your version of future
8+
# sudo pip install -U future
9+
10+
11+
# just in case we need it
12+
import numpy as np
13+
14+
15+
# import the function that will get the data
16+
# yes, sklearn comes with built-in datasets!
17+
from sklearn.datasets import load_breast_cancer
18+
19+
# load the data
20+
data = load_breast_cancer()
21+
22+
# check the type of 'data'
23+
type(data)
24+
25+
# note: it is a Bunch object
26+
# this basically acts like a dictionary where you can treat the keys like attributes
27+
data.keys()
28+
29+
# 'data' (the attribute) means the input data
30+
data.data.shape
31+
# it has 569 samples, 30 features
32+
33+
# 'targets'
34+
data.target
35+
# note how the targets are just 0s and 1s
36+
# normally, when you have K targets, they are labeled 0..K-1
37+
38+
# their meaning is not lost
39+
data.target_names
40+
41+
# there are also 569 corresponding targets
42+
data.target.shape
43+
44+
# you can also determinw the meaning of each feature
45+
data.feature_names
46+
47+
48+
# normally we would put all of our imports at the top
49+
# but this lets us tell a story
50+
from sklearn.model_selection import train_test_split
51+
52+
53+
# split the data into train and test sets
54+
# this lets us simulate how our model will perform in the future
55+
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.33)
56+
57+
58+
# instantiate a classifer and train it
59+
from sklearn.ensemble import RandomForestClassifier
60+
61+
62+
model = RandomForestClassifier()
63+
model.fit(X_train, y_train)
64+
65+
66+
# evaluate the model's performance
67+
model.score(X_train, y_train)
68+
model.score(X_test, y_test)
69+
70+
71+
# how you can make predictions
72+
predictions = model.predict(X_test)
73+
74+
# what did we get?
75+
predictions
76+
77+
# manually check the accuracy of your predictions
78+
N = len(y_test)
79+
np.sum(predictions == y_test) / N # can also just call np.mean()
80+
81+
82+
83+
# we can even use deep learning to solve the same problem!
84+
from sklearn.neural_network import MLPClassifier
85+
86+
# you'll learn why scaling is needed in a later course
87+
from sklearn.preprocessing import StandardScaler
88+
89+
scaler = StandardScaler()
90+
X_train2 = scaler.fit_transform(X_train)
91+
X_test2 = scaler.transform(X_test)
92+
93+
model = MLPClassifier(max_iter=500)
94+
model.fit(X_train2, y_train)
95+
96+
97+
# evaluate the model's performance
98+
model.score(X_train2, y_train)
99+
model.score(X_test2, y_test)

numpy_class/regression_example.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# https://deeplearningcourses.com/c/deep-learning-prerequisites-the-numpy-stack-in-python
2+
# https://www.udemy.com/deep-learning-prerequisites-the-numpy-stack-in-python
3+
4+
# Get the data from:
5+
# https://archive.ics.uci.edu/ml/datasets/Airfoil+Self-Noise
6+
7+
from __future__ import print_function, division
8+
from future.utils import iteritems
9+
from builtins import range, input
10+
# Note: you may need to update your version of future
11+
# sudo pip install -U future
12+
13+
14+
# just in case we need it
15+
import numpy as np
16+
import pandas as pd
17+
18+
19+
# load the data
20+
# important note: this is where we will usually put data files
21+
df = pd.read_csv('../large_files/airfoil_self_noise.dat', sep='\t', header=None)
22+
23+
# check the data
24+
df.head()
25+
df.info()
26+
27+
# get the inputs
28+
data = df[[0,1,2,3,4]].values
29+
30+
# get the outputs
31+
target = df[5].values
32+
33+
# tiny update: pandas is moving from .as_matrix() to the equivalent .values
34+
35+
36+
# normally we would put all of our imports at the top
37+
# but this lets us tell a story
38+
from sklearn.model_selection import train_test_split
39+
40+
41+
# split the data into train and test sets
42+
# this lets us simulate how our model will perform in the future
43+
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33)
44+
45+
46+
# instantiate a classifer and train it
47+
from sklearn.linear_model import LinearRegression
48+
49+
50+
model = LinearRegression()
51+
model.fit(X_train, y_train)
52+
53+
54+
# evaluate the model's performance
55+
print(model.score(X_train, y_train))
56+
print(model.score(X_test, y_test))
57+
58+
59+
# how you can make predictions
60+
predictions = model.predict(X_test)
61+
62+
# what did we get?
63+
predictions
64+
65+
66+
67+
# we can even use random forest to solve the same problem!
68+
from sklearn.ensemble import RandomForestRegressor
69+
70+
model2 = RandomForestRegressor()
71+
model2.fit(X_train, y_train)
72+
73+
74+
# evaluate the model's performance
75+
print(model2.score(X_train, y_train))
76+
print(model2.score(X_test, y_test))
77+
78+
79+
80+
81+
# we can even use deep learning to solve the same problem!
82+
from sklearn.neural_network import MLPRegressor
83+
84+
# you'll learn why scaling is needed in a later course
85+
from sklearn.preprocessing import StandardScaler
86+
87+
scaler = StandardScaler()
88+
X_train2 = scaler.fit_transform(X_train)
89+
X_test2 = scaler.transform(X_test)
90+
scaler2 = StandardScaler()
91+
y_train2 = scaler2.fit_transform(np.expand_dims(y_train, -1)).ravel()
92+
y_test2 = scaler2.fit_transform(np.expand_dims(y_test, -1)).ravel()
93+
94+
model = MLPRegressor(max_iter=500)
95+
model.fit(X_train2, y_train2)
96+
97+
98+
# evaluate the model's performance
99+
print(model.score(X_train2, y_train2))
100+
print(model.score(X_test2, y_test2))
101+
# not as good as a random forest!
102+
# but not as bad as linear regression

0 commit comments

Comments
 (0)