X_my_sentences = np.array(["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "you are not happy"]) Y_my_labels = np.array([[0], [0], [2], [1], [4],[3]])
pred = predict(X_my_sentences, Y_my_labels , W, b, word_to_vec_map) print_predictions(X_my_sentences, pred)
1 2 3 4 5 6 7 8
Accuracy: 0.8333333333333334
i adore you ❤️ i love you ❤️ funny lol 😄 lets play with a ball ⚾ food is ready 🍴 you are not happy ❤️
但是该模型并不能分析 not happy 是表示不开心,而只是简单地学习了 happy 这个单词。输出模型的混淆矩阵看一下模型的表现:
矩阵对角线上的颜色比较深,表示模型的表现还不错。但是模型却无法分析 not xxx 这类的短语,因为嵌入矩阵中没有对应的表示,而且单纯地对所有单词的嵌入求平均会丢失输入的单词的顺序,因此需要更好的算法。
Emojifier-V2: 在 Keras 中使用 LSTMs
Emojifier-V2 的概况如下图所示:
这是一个两层的 LSTM 序列分类器。这次实验使用 mini-batches 来训练 Keras,因此一个 batch 中的序列的长度应该相同,因此需要补 0。例如一个 batch 中的序列的最大长度为 5,那么 "I love you" 这个句子的表示为。
Embedding 层
在 Keras 中,嵌入矩阵被表示成一个层,然后将词的索引匹配成嵌入向量。嵌入矩阵可以被训练出来,也可以用一个训练好的矩阵来初始化它。Embedding() 层输出是一个 (batch size, max input length, dimension of word vectors) 的矩阵。word_to_index 的实现如下所示:
defsentences_to_indices(X, word_to_index, max_len): m = X.shape[0] # number of training examples # Initialize X_indices as a numpy matrix of zeros and the correct shape X_indices = np.zeros((m, max_len)) for i inrange(m): # loop over training examples # Convert the ith training sentence in lower case and split is into words. You should get a list of words. sentence_words = X[i].lower().split() # Initialize j to 0 j = 0 # Loop over the words of sentence_words for w in sentence_words: # Set the (i,j)th entry of X_indices to the index of the correct word. X_indices[i, j] = word_to_index[w] # Increment j to j + 1 j += 1 return X_indices
defpretrained_embedding_layer(word_to_vec_map, word_to_index): vocab_len = len(word_to_index) + 1# adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map["cucumber"].shape[0] # define dimensionality of your GloVe word vectors (= 50) # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim) emb_matrix = np.zeros((vocab_len, emb_dim)) # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word]
# Define Keras embedding layer with the correct output/input sizes, make it trainable. # Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(vocab_len, emb_dim, trainable = False)
# Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None". embedding_layer.build((None,)) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
defEmojify_V2(input_shape, word_to_vec_map, word_to_index): # Define sentence_indices as the input of the graph, it should be of shape input_shape and dtype 'int32' (as it contains indices). sentence_indices = Input(input_shape, dtype='int32') # Create the embedding layer pretrained with GloVe Vectors (≈1 line) embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index) # Propagate sentence_indices through your embedding layer, you get back the embeddings embeddings = embedding_layer(sentence_indices) # Propagate the embeddings through an LSTM layer with 128-dimensional hidden state # Be careful, the returned output should be a batch of sequences. X = LSTM(128, return_sequences=True)(embeddings) # Add dropout with a probability of 0.5 X = Dropout(0.5)(X) # Propagate X trough another LSTM layer with 128-dimensional hidden state # Be careful, the returned output should be a single hidden state, not a batch of sequences. X = LSTM(128, return_sequences=False)(X) # Add dropout with a probability of 0.5 X = Dropout(0.5)(X) # Propagate X through a Dense layer with softmax activation to get back a batch of 5-dimensional vectors. X = Dense(5)(X) # Add a softmax activation X = Activation('softmax')(X) # Create Model instance which converts sentence_indices into X. model = Model(inputs=sentence_indices, outputs=X) return model