Merge pull request #325 from donglihe-hub/embed

Eleven1Liu · web-flow · commit 71cb5894ad60 · 2023-08-10T06:59:07.000+08:00
Extend arguments accepted by Embedding
diff --git a/docs/examples/plot_KimCNN_quickstart.py b/docs/examples/plot_KimCNN_quickstart.py
@@ -45,7 +45,12 @@
 # We consider the following settings for the KimCNN model.
 
 model_name = "KimCNN"
-network_config = {"embed_dropout": 0.2, "post_encoder_dropout": 0.2, "filter_sizes": [2, 4, 8], "num_filter_per_size": 128}
+network_config = {
+    "embed_dropout": 0.2,
+    "post_encoder_dropout": 0.2,
+    "filter_sizes": [2, 4, 8],
+    "num_filter_per_size": 128,
+}
 learning_rate = 0.0003
 model = init_model(
     model_name=model_name,
diff --git a/libmultilabel/nn/networks/kim_cnn.py b/libmultilabel/nn/networks/kim_cnn.py
@@ -28,7 +28,7 @@ def __init__(
         activation="relu",
     ):
         super(KimCNN, self).__init__()
-        self.embedding = Embedding(embed_vecs, embed_dropout)
+        self.embedding = Embedding(embed_vecs, dropout=embed_dropout)
         self.encoder = CNNEncoder(
             embed_vecs.shape[1], filter_sizes, num_filter_per_size, activation, post_encoder_dropout, num_pool=1
         )
diff --git a/libmultilabel/nn/networks/labelwise_attention_networks.py b/libmultilabel/nn/networks/labelwise_attention_networks.py
@@ -27,7 +27,7 @@ class LabelwiseAttentionNetwork(ABC, nn.Module):
 
     def __init__(self, embed_vecs, num_classes, embed_dropout, encoder_dropout, post_encoder_dropout, hidden_dim):
         super(LabelwiseAttentionNetwork, self).__init__()
-        self.embedding = Embedding(embed_vecs, embed_dropout)
+        self.embedding = Embedding(embed_vecs, dropout=embed_dropout)
         self.encoder = self._get_encoder(embed_vecs.shape[1], encoder_dropout, post_encoder_dropout)
         self.attention = self._get_attention()
         self.output = LabelwiseLinearOutput(hidden_dim, num_classes)
@@ -199,7 +199,9 @@ def _get_encoder(self, input_size, encoder_dropout, post_encoder_dropout):
         return LSTMEncoder(input_size, self.rnn_dim // 2, self.rnn_layers, encoder_dropout, post_encoder_dropout)
 
     def _get_attention(self):
-        return LabelwiseMultiHeadAttention(self.rnn_dim, self.num_classes, self.num_heads, self.labelwise_attention_dropout)
+        return LabelwiseMultiHeadAttention(
+            self.rnn_dim, self.num_classes, self.num_heads, self.labelwise_attention_dropout
+        )
 
     def forward(self, input):
         # (batch_size, sequence_length, embed_dim)
@@ -246,7 +248,12 @@ def __init__(
     def _get_encoder(self, input_size, encoder_dropout, post_encoder_dropout):
         # encoder dropout is unused for CNN, we accept it to satisfy LabelwiseAttentionNetwork API
         return CNNEncoder(
-            input_size, self.filter_sizes, self.num_filter_per_size, self.activation, post_encoder_dropout, channel_last=True
+            input_size,
+            self.filter_sizes,
+            self.num_filter_per_size,
+            self.activation,
+            post_encoder_dropout,
+            channel_last=True,
         )
 
     def _get_attention(self):
diff --git a/libmultilabel/nn/networks/modules.py b/libmultilabel/nn/networks/modules.py
@@ -11,12 +11,14 @@ class Embedding(nn.Module):
 
     Args:
         embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
+        freeze (bool): If True, the tensor does not get updated in the learning process.
+            Equivalent to embedding.weight.requires_grad = False. Default: False.
         dropout (float): The dropout rate of the word embedding. Defaults to 0.2.
     """
 
-    def __init__(self, embed_vecs, dropout=0.2):
+    def __init__(self, embed_vecs, freeze=False, dropout=0.2):
         super(Embedding, self).__init__()
-        self.embedding = nn.Embedding.from_pretrained(embed_vecs, freeze=False, padding_idx=0)
+        self.embedding = nn.Embedding.from_pretrained(embed_vecs, freeze=freeze, padding_idx=0)
         self.dropout = nn.Dropout(dropout)
 
     def forward(self, input):
@@ -105,7 +107,14 @@ class CNNEncoder(nn.Module):
     """
 
     def __init__(
-        self, input_size, filter_sizes, num_filter_per_size, activation, post_encoder_dropout=0, num_pool=0, channel_last=False
+        self,
+        input_size,
+        filter_sizes,
+        num_filter_per_size,
+        activation,
+        post_encoder_dropout=0,
+        num_pool=0,
+        channel_last=False,
     ):
         super(CNNEncoder, self).__init__()
         if not filter_sizes:
diff --git a/libmultilabel/nn/networks/xml_cnn.py b/libmultilabel/nn/networks/xml_cnn.py
@@ -33,7 +33,7 @@ def __init__(
         activation="relu",
     ):
         super(XMLCNN, self).__init__()
-        self.embedding = Embedding(embed_vecs, embed_dropout)
+        self.embedding = Embedding(embed_vecs, dropout=embed_dropout)
         self.encoder = CNNEncoder(embed_vecs.shape[1], filter_sizes, num_filter_per_size, activation, num_pool=num_pool)
         total_output_size = len(filter_sizes) * num_filter_per_size * num_pool
         self.linear1 = nn.Linear(total_output_size, hidden_dim)

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ def __init__(`
`28`	`28`	`activation="relu",`
`29`	`29`	`):`
`30`	`30`	`super(KimCNN, self).__init__()`
`31`		`- self.embedding = Embedding(embed_vecs, embed_dropout)`
	`31`	`+ self.embedding = Embedding(embed_vecs, dropout=embed_dropout)`
`32`	`32`	`self.encoder = CNNEncoder(`
`33`	`33`	`embed_vecs.shape[1], filter_sizes, num_filter_per_size, activation, post_encoder_dropout, num_pool=1`
`34`	`34`	`)`