|
| 1 | +# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow |
| 2 | +# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow |
| 3 | +from __future__ import print_function, division |
| 4 | +from builtins import range |
| 5 | +# Note: you may need to update your version of future |
| 6 | +# sudo pip install -U future |
| 7 | + |
| 8 | +# Linux and Mac instructions: |
| 9 | +# http://pytorch.org/#pip-install-pytorch |
| 10 | + |
| 11 | +# Windows instructions (just one line): |
| 12 | +# conda install -c peterjc123 pytorch |
| 13 | + |
| 14 | +# Note: is helpful to look at keras_example.py first |
| 15 | + |
| 16 | + |
| 17 | +import numpy as np |
| 18 | +import matplotlib.pyplot as plt |
| 19 | +from util import get_normalized_data |
| 20 | + |
| 21 | +import torch |
| 22 | +from torch.autograd import Variable |
| 23 | +from torch import optim |
| 24 | + |
| 25 | + |
| 26 | + |
| 27 | +# get the data, same as Theano + Tensorflow examples |
| 28 | +# no need to split now, the fit() function will do it |
| 29 | +Xtrain, Xtest, Ytrain, Ytest = get_normalized_data() |
| 30 | + |
| 31 | +# get shapes |
| 32 | +_, D = Xtrain.shape |
| 33 | +K = len(set(Ytrain)) |
| 34 | + |
| 35 | +# Note: no need to convert Y to indicator matrix |
| 36 | + |
| 37 | + |
| 38 | +# the model will be a sequence of layers |
| 39 | +model = torch.nn.Sequential() |
| 40 | + |
| 41 | + |
| 42 | +# ANN with layers [784] -> [500] -> [300] -> [10] |
| 43 | +# NOTE: the "p" is p_drop, not p_keep |
| 44 | +model.add_module("dropout1", torch.nn.Dropout(p=0.2)) |
| 45 | +model.add_module("dense1", torch.nn.Linear(D, 500)) |
| 46 | +model.add_module("relu1", torch.nn.ReLU()) |
| 47 | +model.add_module("dropout2", torch.nn.Dropout(p=0.5)) |
| 48 | +model.add_module("dense2", torch.nn.Linear(500, 300)) |
| 49 | +model.add_module("relu2", torch.nn.ReLU()) |
| 50 | +model.add_module("dropout3", torch.nn.Dropout(p=0.5)) |
| 51 | +model.add_module("dense3", torch.nn.Linear(300, K)) |
| 52 | +# Note: no final softmax! |
| 53 | +# just like Tensorflow, it's included in cross-entropy function |
| 54 | + |
| 55 | + |
| 56 | +# define a loss function |
| 57 | +# other loss functions can be found here: |
| 58 | +# http://pytorch.org/docs/master/nn.html#loss-functions |
| 59 | +loss = torch.nn.CrossEntropyLoss(size_average=True) |
| 60 | +# Note: this returns a function! |
| 61 | +# e.g. use it like: loss(logits, labels) |
| 62 | + |
| 63 | + |
| 64 | +# define an optimizer |
| 65 | +# other optimizers can be found here: |
| 66 | +# http://pytorch.org/docs/master/optim.html |
| 67 | +optimizer = optim.Adam(model.parameters(), lr=1e-4) |
| 68 | + |
| 69 | + |
| 70 | +# define the training procedure |
| 71 | +# i.e. one step of gradient descent |
| 72 | +# there are lots of steps |
| 73 | +# so we encapsulate it in a function |
| 74 | +# Note: inputs and labels are torch tensors |
| 75 | +def train(model, loss, optimizer, inputs, labels): |
| 76 | + # set the model to training mode |
| 77 | + # because dropout has 2 different modes! |
| 78 | + model.train() |
| 79 | + |
| 80 | + inputs = Variable(inputs, requires_grad=False) |
| 81 | + labels = Variable(labels, requires_grad=False) |
| 82 | + |
| 83 | + # Reset gradient |
| 84 | + optimizer.zero_grad() |
| 85 | + |
| 86 | + # Forward |
| 87 | + logits = model.forward(inputs) |
| 88 | + output = loss.forward(logits, labels) |
| 89 | + |
| 90 | + # Backward |
| 91 | + output.backward() |
| 92 | + |
| 93 | + # Update parameters |
| 94 | + optimizer.step() |
| 95 | + |
| 96 | + # what's the difference between backward() and step()? |
| 97 | + |
| 98 | + return output.item() |
| 99 | + |
| 100 | + |
| 101 | +# similar to train() but not doing the backprop step |
| 102 | +def get_cost(model, loss, inputs, labels): |
| 103 | + # set the model to testing mode |
| 104 | + # because dropout has 2 different modes! |
| 105 | + model.eval() |
| 106 | + |
| 107 | + inputs = Variable(inputs, requires_grad=False) |
| 108 | + labels = Variable(labels, requires_grad=False) |
| 109 | + |
| 110 | + # Forward |
| 111 | + logits = model.forward(inputs) |
| 112 | + output = loss.forward(logits, labels) |
| 113 | + |
| 114 | + return output.item() |
| 115 | + |
| 116 | + |
| 117 | +# define the prediction procedure |
| 118 | +# also encapsulate these steps |
| 119 | +# Note: inputs is a torch tensor |
| 120 | +def predict(model, inputs): |
| 121 | + # set the model to testing mode |
| 122 | + # because dropout has 2 different modes! |
| 123 | + model.eval() |
| 124 | + |
| 125 | + inputs = Variable(inputs, requires_grad=False) |
| 126 | + logits = model.forward(inputs) |
| 127 | + return logits.data.numpy().argmax(axis=1) |
| 128 | + |
| 129 | + |
| 130 | +# return the accuracy |
| 131 | +# labels is a torch tensor |
| 132 | +# to get back the internal numpy data |
| 133 | +# use the instance method .numpy() |
| 134 | +def score(model, inputs, labels): |
| 135 | + predictions = predict(model, inputs) |
| 136 | + return np.mean(labels.numpy() == predictions) |
| 137 | + |
| 138 | + |
| 139 | +### prepare for training loop ### |
| 140 | + |
| 141 | +# convert the data arrays into torch tensors |
| 142 | +Xtrain = torch.from_numpy(Xtrain).float() |
| 143 | +Ytrain = torch.from_numpy(Ytrain).long() |
| 144 | +Xtest = torch.from_numpy(Xtest).float() |
| 145 | +Ytest = torch.from_numpy(Ytest).long() |
| 146 | + |
| 147 | +# training parameters |
| 148 | +epochs = 15 |
| 149 | +batch_size = 32 |
| 150 | +n_batches = Xtrain.size()[0] // batch_size |
| 151 | + |
| 152 | +# things to keep track of |
| 153 | +train_costs = [] |
| 154 | +test_costs = [] |
| 155 | +train_accuracies = [] |
| 156 | +test_accuracies = [] |
| 157 | + |
| 158 | +# main training loop |
| 159 | +for i in range(epochs): |
| 160 | + cost = 0 |
| 161 | + test_cost = 0 |
| 162 | + for j in range(n_batches): |
| 163 | + Xbatch = Xtrain[j*batch_size:(j+1)*batch_size] |
| 164 | + Ybatch = Ytrain[j*batch_size:(j+1)*batch_size] |
| 165 | + cost += train(model, loss, optimizer, Xbatch, Ybatch) |
| 166 | + |
| 167 | + |
| 168 | + # we could have also calculated the train cost here |
| 169 | + # but I wanted to show you that we could also return it |
| 170 | + # from the train function itself |
| 171 | + train_acc = score(model, Xtrain, Ytrain) |
| 172 | + test_acc = score(model, Xtest, Ytest) |
| 173 | + test_cost = get_cost(model, loss, Xtest, Ytest) |
| 174 | + |
| 175 | + print("Epoch: %d, cost: %f, acc: %.2f" % (i, test_cost, test_acc)) |
| 176 | + |
| 177 | + # for plotting |
| 178 | + train_costs.append(cost / n_batches) |
| 179 | + train_accuracies.append(train_acc) |
| 180 | + test_costs.append(test_cost) |
| 181 | + test_accuracies.append(test_acc) |
| 182 | + |
| 183 | + |
| 184 | + |
| 185 | +# plot the results |
| 186 | +plt.plot(train_costs, label='Train cost') |
| 187 | +plt.plot(test_costs, label='Test cost') |
| 188 | +plt.title('Cost') |
| 189 | +plt.legend() |
| 190 | +plt.show() |
| 191 | + |
| 192 | +plt.plot(train_accuracies, label='Train accuracy') |
| 193 | +plt.plot(test_accuracies, label='Test accuracy') |
| 194 | +plt.title('Accuracy') |
| 195 | +plt.legend() |
| 196 | +plt.show() |
0 commit comments