lazyprogrammer
diff --git a/‎nlp_class3/attention.py
Lines changed: 460 additions & 0 deletions b/‎nlp_class3/attention.py
Lines changed: 460 additions & 0 deletions
diff --git a/‎nlp_class3/bilstm_mnist.py
Lines changed: 100 additions & 0 deletions b/‎nlp_class3/bilstm_mnist.py
Lines changed: 100 additions & 0 deletions
diff --git a/‎nlp_class3/bilstm_test.py
Lines changed: 33 additions & 0 deletions b/‎nlp_class3/bilstm_test.py
Lines changed: 33 additions & 0 deletions
diff --git a/‎nlp_class3/cnn_toxic.py
Lines changed: 158 additions & 0 deletions b/‎nlp_class3/cnn_toxic.py
Lines changed: 158 additions & 0 deletions
diff --git a/‎nlp_class3/extra_reading.txt
Lines changed: 44 additions & 0 deletions b/‎nlp_class3/extra_reading.txt
Lines changed: 44 additions & 0 deletions
@@ -0,0 +1,100 @@
+# https://deeplearningcourses.com/c/deep-learning-advanced-nlp
+from __future__ import print_function, division
+from builtins import range, input
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
+import os
+from keras.models import Model
+from keras.layers import Input, LSTM, GRU, Bidirectional, GlobalMaxPooling1D, Lambda, Concatenate, Dense
+import keras.backend as K
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+def get_mnist(limit=None):
+  if not os.path.exists('../large_files'):
+    print("You must create a folder called large_files adjacent to the class folder first.")
+  if not os.path.exists('../large_files/train.csv'):
+    print("Looks like you haven't downloaded the data or it's not in the right spot.")
+    print("Please get train.csv from https://www.kaggle.com/c/digit-recognizer")
+    print("and place it in the large_files folder.")
+
+  print("Reading in and transforming data...")
+  df = pd.read_csv('../large_files/train.csv')
+  data = df.as_matrix()
+  np.random.shuffle(data)
+  X = data[:, 1:].reshape(-1, 28, 28) / 255.0 # data is from 0..255
+  Y = data[:, 0]
+  if limit is not None:
+    X, Y = X[:limit], Y[:limit]
+  return X, Y
+
+
+
+
+# get data
+X, Y = get_mnist()
+
+# config
+D = 28
+M = 15
+
+
+# input is an image of size 28x28
+input_ = Input(shape=(D, D))
+
+# up-down
+rnn1 = Bidirectional(LSTM(M, return_sequences=True))
+x1 = rnn1(input_) # output is N x D x 2M
+x1 = GlobalMaxPooling1D()(x1) # output is N x 2M
+
+# left-right
+rnn2 = Bidirectional(LSTM(M, return_sequences=True))
+
+# custom layer
+permutor = Lambda(lambda t: K.permute_dimensions(t, pattern=(0, 2, 1)))
+
+x2 = permutor(input_)
+x2 = rnn2(x2) # output is N x D x 2M
+x2 = GlobalMaxPooling1D()(x2) # output is N x 2M
+
+# put them together
+concatenator = Concatenate(axis=1)
+x = concatenator([x1, x2]) # output is N x 4M
+
+# final dense layer
+output = Dense(10, activation='softmax')(x)
+
+model = Model(inputs=input_, outputs=output)
+
+# testing
+# o = model.predict(X)
+# print("o.shape:", o.shape)
+
+# compile
+model.compile(
+  loss='sparse_categorical_crossentropy',
+  optimizer='adam',
+  metrics=['accuracy']
+)
+
+# train
+print('Training model...')
+r = model.fit(X, Y, batch_size=32, epochs=10, validation_split=0.3)
+
+
+# plot some data
+plt.plot(r.history['loss'], label='loss')
+plt.plot(r.history['val_loss'], label='val_loss')
+plt.legend()
+plt.show()
+
+# accuracies
+plt.plot(r.history['acc'], label='acc')
+plt.plot(r.history['val_acc'], label='val_acc')
+plt.legend()
+plt.show()
+
@@ -0,0 +1,33 @@
+# https://deeplearningcourses.com/c/deep-learning-advanced-nlp
+from __future__ import print_function, division
+from builtins import range, input
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+from keras.models import Model
+from keras.layers import Input, LSTM, GRU, Bidirectional
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+T = 8
+D = 2
+M = 3
+
+
+X = np.random.randn(1, T, D)
+
+
+input_ = Input(shape=(T, D))
+# rnn = Bidirectional(LSTM(M, return_state=True, return_sequences=True))
+rnn = Bidirectional(LSTM(M, return_state=True, return_sequences=False))
+x = rnn(input_)
+
+model = Model(inputs=input_, outputs=x)
+o, h1, c1, h2, c2 = model.predict(X)
+print("o:", o)
+print("o.shape:", o.shape)
+print("h1:", h1)
+print("c1:", c1)
+print("h2:", h2)
+print("c2:", c2)
@@ -0,0 +1,158 @@
+# https://deeplearningcourses.com/c/deep-learning-advanced-nlp
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+import os
+import sys
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from keras.preprocessing.text import Tokenizer
+from keras.preprocessing.sequence import pad_sequences
+from keras.layers import Dense, Input, GlobalMaxPooling1D
+from keras.layers import Conv1D, MaxPooling1D, Embedding
+from keras.models import Model
+from sklearn.metrics import roc_auc_score
+
+
+# Download the data:
+# https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge
+# Download the word vectors:
+# http://nlp.stanford.edu/data/glove.6B.zip
+
+
+# some configuration
+MAX_SEQUENCE_LENGTH = 100
+MAX_VOCAB_SIZE = 20000
+EMBEDDING_DIM = 100
+VALIDATION_SPLIT = 0.2
+BATCH_SIZE = 128
+EPOCHS = 10
+
+
+
+# load in pre-trained word vectors
+print('Loading word vectors...')
+word2vec = {}
+with open(os.path.join('../large_files/glove.6B/glove.6B.%sd.txt' % EMBEDDING_DIM)) as f:
+  # is just a space-separated text file in the format:
+  # word vec[0] vec[1] vec[2] ...
+  for line in f:
+    values = line.split()
+    word = values[0]
+    vec = np.asarray(values[1:], dtype='float32')
+    word2vec[word] = vec
+print('Found %s word vectors.' % len(word2vec))
+
+
+
+# prepare text samples and their labels
+print('Loading in comments...')
+
+train = pd.read_csv("../large_files/toxic-comment/train.csv")
+sentences = train["comment_text"].fillna("DUMMY_VALUE").values
+possible_labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
+targets = train[possible_labels].values
+
+print("max sequence length:", max(len(s) for s in sentences))
+print("min sequence length:", min(len(s) for s in sentences))
+s = sorted(len(s) for s in sentences)
+print("median sequence length:", s[len(s) // 2])
+
+
+
+
+# convert the sentences (strings) into integers
+tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
+tokenizer.fit_on_texts(sentences)
+sequences = tokenizer.texts_to_sequences(sentences)
+# print("sequences:", sequences); exit()
+
+
+# get word -> integer mapping
+word2idx = tokenizer.word_index
+print('Found %s unique tokens.' % len(word2idx))
+
+
+# pad sequences so that we get a N x T matrix
+data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
+print('Shape of data tensor:', data.shape)
+
+
+
+# prepare embedding matrix
+print('Filling pre-trained embeddings...')
+num_words = min(MAX_VOCAB_SIZE, len(word2idx) + 1)
+embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
+for word, i in word2idx.items():
+  if i < MAX_VOCAB_SIZE:
+    embedding_vector = word2vec.get(word)
+    if embedding_vector is not None:
+      # words not found in embedding index will be all zeros.
+      embedding_matrix[i] = embedding_vector
+
+
+
+# load pre-trained word embeddings into an Embedding layer
+# note that we set trainable = False so as to keep the embeddings fixed
+embedding_layer = Embedding(
+  num_words,
+  EMBEDDING_DIM,
+  weights=[embedding_matrix],
+  input_length=MAX_SEQUENCE_LENGTH,
+  trainable=False
+)
+
+
+print('Building model...')
+
+# train a 1D convnet with global maxpooling
+input_ = Input(shape=(MAX_SEQUENCE_LENGTH,))
+x = embedding_layer(input_)
+x = Conv1D(128, 3, activation='relu')(x)
+x = MaxPooling1D(3)(x)
+x = Conv1D(128, 3, activation='relu')(x)
+x = MaxPooling1D(3)(x)
+x = Conv1D(128, 3, activation='relu')(x)
+x = GlobalMaxPooling1D()(x)
+x = Dense(128, activation='relu')(x)
+output = Dense(len(possible_labels), activation='sigmoid')(x)
+
+model = Model(input_, output)
+model.compile(
+  loss='binary_crossentropy',
+  optimizer='rmsprop',
+  metrics=['accuracy']
+)
+
+print('Training model...')
+r = model.fit(
+  data,
+  targets,
+  batch_size=BATCH_SIZE,
+  epochs=EPOCHS,
+  validation_split=VALIDATION_SPLIT
+)
+
+
+# plot some data
+plt.plot(r.history['loss'], label='loss')
+plt.plot(r.history['val_loss'], label='val_loss')
+plt.legend()
+plt.show()
+
+# accuracies
+plt.plot(r.history['acc'], label='acc')
+plt.plot(r.history['val_acc'], label='val_acc')
+plt.legend()
+plt.show()
+
+# plot the mean AUC over each label
+p = model.predict(data)
+aucs = []
+for j in range(6):
+    auc = roc_auc_score(targets[:,j], p[:,j])
+    aucs.append(auc)
+print(np.mean(aucs))
@@ -0,0 +1,44 @@
+https://deeplearningcourses.com/c/deep-learning-advanced-nlp
+
+Bidirectional Recurrent Neural Networks
+https://maxwell.ict.griffith.edu.au/spl/publications/papers/ieeesp97_schuster.pdf
+
+Translation Modeling with Bidirectional Recurrent Neural Networks
+http://emnlp2014.org/papers/pdf/EMNLP2014003.pdf
+
+Sequence to Sequence Learning with Neural Networks
+https://arxiv.org/abs/1409.3215
+
+A Neural Conversational Model
+https://arxiv.org/abs/1506.05869v3
+
+Neural Machine Translation by Jointly Learning to Align and Translate (Attention)
+https://arxiv.org/abs/1409.0473
+
+Feed-Forward Networks with Attention Can Solve Some Long-Term Memory Problems (Simplified Attention)
+https://arxiv.org/abs/1512.08756
+
+Memory Networks
+https://arxiv.org/abs/1410.3916
+
+Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks
+http://arxiv.org/abs/1502.05698
+
+End-To-End Memory Networks
+http://arxiv.org/abs/1503.08895
+
+Ask Me Anything: Dynamic Memory Networks for Natural Language Processing
+https://arxiv.org/abs/1506.07285
+
+WaveNet
+https://deepmind.com/blog/wavenet-generative-model-raw-audio/
+
+Tacotron
+https://google.github.io/tacotron/
+
+Tacotron 2
+https://research.googleblog.com/2017/12/tacotron-2-generating-human-like-speech.html
+
+An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling
+https://arxiv.org/abs/1803.01271
+(just released March 2018!)