|
| 1 | +# https://udemy.com/recommender-systems |
| 2 | +# https://deeplearningcourses.com/recommender-systems |
| 3 | +from __future__ import print_function, division |
| 4 | +from builtins import range |
| 5 | +# Note: you may need to update your version of future |
| 6 | +# sudo pip install -U future |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import tensorflow as tf |
| 10 | +import matplotlib.pyplot as plt |
| 11 | +from sklearn.utils import shuffle |
| 12 | + |
| 13 | +import pandas as pd |
| 14 | +from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz |
| 15 | +from datetime import datetime |
| 16 | + |
| 17 | + |
| 18 | +def dot1(V, W): |
| 19 | + # V is N x D x K (batch of visible units) |
| 20 | + # W is D x K x M (weights) |
| 21 | + # returns N x M (hidden layer size) |
| 22 | + return tf.tensordot(V, W, axes=[[1,2], [0,1]]) |
| 23 | + |
| 24 | +def dot2(H, W): |
| 25 | + # H is N x M (batch of hiddens) |
| 26 | + # W is D x K x M (weights transposed) |
| 27 | + # returns N x D x K (visible) |
| 28 | + return tf.tensordot(H, W, axes=[[1], [2]]) |
| 29 | + |
| 30 | + |
| 31 | +class RBM(object): |
| 32 | + def __init__(self, D, M, K): |
| 33 | + self.D = D # input feature size |
| 34 | + self.M = M # hidden size |
| 35 | + self.K = K # number of ratings |
| 36 | + self.build(D, M, K) |
| 37 | + |
| 38 | + |
| 39 | + def build(self, D, M, K): |
| 40 | + # params |
| 41 | + self.W = tf.Variable(tf.random_normal(shape=(D, K, M)) * np.sqrt(2.0 / M)) |
| 42 | + self.c = tf.Variable(np.zeros(M).astype(np.float32)) |
| 43 | + self.b = tf.Variable(np.zeros((D, K)).astype(np.float32)) |
| 44 | + |
| 45 | + # data |
| 46 | + self.X_in = tf.placeholder(tf.float32, shape=(None, D)) |
| 47 | + |
| 48 | + # one hot encode X |
| 49 | + # first, make each rating an int |
| 50 | + X = tf.cast(self.X_in * 2 - 1, tf.int32) |
| 51 | + X = tf.one_hot(X, K) |
| 52 | + |
| 53 | + # conditional probabilities |
| 54 | + # NOTE: tf.contrib.distributions.Bernoulli API has changed in Tensorflow v1.2 |
| 55 | + V = X |
| 56 | + p_h_given_v = tf.nn.sigmoid(dot1(V, self.W) + self.c) |
| 57 | + self.p_h_given_v = p_h_given_v # save for later |
| 58 | + |
| 59 | + # draw a sample from p(h | v) |
| 60 | + r = tf.random_uniform(shape=tf.shape(p_h_given_v)) |
| 61 | + H = tf.to_float(r < p_h_given_v) |
| 62 | + |
| 63 | + # draw a sample from p(v | h) |
| 64 | + # note: we don't have to actually do the softmax |
| 65 | + logits = dot2(H, self.W) + self.b |
| 66 | + cdist = tf.distributions.Categorical(logits=logits) |
| 67 | + X_sample = cdist.sample() # shape is (N, D) |
| 68 | + X_sample = tf.one_hot(X_sample, depth=self.K) # turn it into (N, D, K) |
| 69 | + |
| 70 | + # mask X_sample to remove missing ratings |
| 71 | + mask2d = tf.cast(self.X_in > 0, tf.float32) |
| 72 | + mask3d = tf.stack([mask2d]*K, axis=-1) # repeat K times in last dimension |
| 73 | + X_sample = X_sample * mask3d |
| 74 | + |
| 75 | + |
| 76 | + # build the objective |
| 77 | + objective = tf.reduce_mean(self.free_energy(X)) - tf.reduce_mean(self.free_energy(X_sample)) |
| 78 | + self.train_op = tf.train.AdamOptimizer(1e-2).minimize(objective) |
| 79 | + # self.train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(objective) |
| 80 | + |
| 81 | + # build the cost |
| 82 | + # we won't use this to optimize the model parameters |
| 83 | + # just to observe what happens during training |
| 84 | + logits = self.forward_logits(X) |
| 85 | + self.cost = tf.reduce_mean( |
| 86 | + tf.nn.softmax_cross_entropy_with_logits( |
| 87 | + labels=X, |
| 88 | + logits=logits, |
| 89 | + ) |
| 90 | + ) |
| 91 | + |
| 92 | + # to get the output |
| 93 | + self.output_visible = self.forward_output(X) |
| 94 | + |
| 95 | + |
| 96 | + # for calculating SSE |
| 97 | + self.one_to_ten = tf.constant(one_to_ten.astype(np.float32) / 2) |
| 98 | + self.pred = tf.tensordot(self.output_visible, self.one_to_ten, axes=[[2], [0]]) |
| 99 | + mask = tf.cast(self.X_in > 0, tf.float32) |
| 100 | + se = mask * (self.X_in - self.pred) * (self.X_in - self.pred) |
| 101 | + self.sse = tf.reduce_sum(se) |
| 102 | + |
| 103 | + # test SSE |
| 104 | + self.X_test = tf.placeholder(tf.float32, shape=(None, D)) |
| 105 | + mask = tf.cast(self.X_test > 0, tf.float32) |
| 106 | + tse = mask * (self.X_test - self.pred) * (self.X_test - self.pred) |
| 107 | + self.tsse = tf.reduce_sum(tse) |
| 108 | + |
| 109 | + |
| 110 | + initop = tf.global_variables_initializer() |
| 111 | + self.session = tf.Session() |
| 112 | + self.session.run(initop) |
| 113 | + |
| 114 | + def fit(self, X, X_test, epochs=10, batch_sz=256, show_fig=True): |
| 115 | + N, D = X.shape |
| 116 | + n_batches = N // batch_sz |
| 117 | + |
| 118 | + |
| 119 | + costs = [] |
| 120 | + test_costs = [] |
| 121 | + for i in range(epochs): |
| 122 | + t0 = datetime.now() |
| 123 | + print("epoch:", i) |
| 124 | + X, X_test = shuffle(X, X_test) # everything has to be shuffled accordingly |
| 125 | + for j in range(n_batches): |
| 126 | + x = X[j*batch_sz:(j*batch_sz + batch_sz)].toarray() |
| 127 | + |
| 128 | + _, c = self.session.run( |
| 129 | + (self.train_op, self.cost), |
| 130 | + feed_dict={self.X_in: x} |
| 131 | + ) |
| 132 | + |
| 133 | + if j % 100 == 0: |
| 134 | + print("j / n_batches:", j, "/", n_batches, "cost:", c) |
| 135 | + print("duration:", datetime.now() - t0) |
| 136 | + |
| 137 | + # calculate the true train and test cost |
| 138 | + t0 = datetime.now() |
| 139 | + sse = 0 |
| 140 | + test_sse = 0 |
| 141 | + n = 0 |
| 142 | + test_n = 0 |
| 143 | + for j in range(n_batches): |
| 144 | + x = X[j*batch_sz:(j*batch_sz + batch_sz)].toarray() |
| 145 | + xt = X_test[j*batch_sz:(j*batch_sz + batch_sz)].toarray() |
| 146 | + |
| 147 | + # number of train ratings |
| 148 | + n += np.count_nonzero(x) |
| 149 | + |
| 150 | + # number of test ratings |
| 151 | + test_n += np.count_nonzero(xt) |
| 152 | + |
| 153 | + # use tensorflow to get SSEs |
| 154 | + sse_j, tsse_j = self.get_sse(x, xt) |
| 155 | + sse += sse_j |
| 156 | + test_sse += tsse_j |
| 157 | + c = sse/n |
| 158 | + ct = test_sse/test_n |
| 159 | + print("train mse:", c) |
| 160 | + print("test mse:", ct) |
| 161 | + print("calculate cost duration:", datetime.now() - t0) |
| 162 | + costs.append(c) |
| 163 | + test_costs.append(ct) |
| 164 | + if show_fig: |
| 165 | + plt.plot(costs, label='train mse') |
| 166 | + plt.plot(test_costs, label='test mse') |
| 167 | + plt.legend() |
| 168 | + plt.show() |
| 169 | + |
| 170 | + def free_energy(self, V): |
| 171 | + first_term = -tf.reduce_sum(dot1(V, self.b)) |
| 172 | + second_term = -tf.reduce_sum( |
| 173 | + # tf.log(1 + tf.exp(tf.matmul(V, self.W) + self.c)), |
| 174 | + tf.nn.softplus(dot1(V, self.W) + self.c), |
| 175 | + axis=1 |
| 176 | + ) |
| 177 | + return first_term + second_term |
| 178 | + |
| 179 | + def forward_hidden(self, X): |
| 180 | + return tf.nn.sigmoid(dot1(X, self.W) + self.c) |
| 181 | + |
| 182 | + def forward_logits(self, X): |
| 183 | + Z = self.forward_hidden(X) |
| 184 | + return dot2(Z, self.W) + self.b |
| 185 | + |
| 186 | + def forward_output(self, X): |
| 187 | + return tf.nn.softmax(self.forward_logits(X)) |
| 188 | + |
| 189 | + def transform(self, X): |
| 190 | + # accepts and returns a real numpy array |
| 191 | + # unlike forward_hidden and forward_output |
| 192 | + # which deal with tensorflow variables |
| 193 | + return self.session.run(self.p_h_given_v, feed_dict={self.X_in: X}) |
| 194 | + |
| 195 | + def get_visible(self, X): |
| 196 | + return self.session.run(self.output_visible, feed_dict={self.X_in: X}) |
| 197 | + |
| 198 | + def get_sse(self, X, Xt): |
| 199 | + return self.session.run( |
| 200 | + (self.sse, self.tsse), |
| 201 | + feed_dict={ |
| 202 | + self.X_in: X, |
| 203 | + self.X_test: Xt, |
| 204 | + }) |
| 205 | + |
| 206 | + |
| 207 | + |
| 208 | +def main(): |
| 209 | + A = load_npz("Atrain.npz") |
| 210 | + A_test = load_npz("Atest.npz") |
| 211 | + |
| 212 | + N, M = A.shape |
| 213 | + rbm = RBM(M, 50, 10) |
| 214 | + rbm.fit(A, A_test) |
| 215 | + |
| 216 | + |
| 217 | +if __name__ == '__main__': |
| 218 | + main() |
0 commit comments