add docstring and comments. remove redundant flag from dataset.

sohaib023 · sohaib023 · commit 9339723a0e2d · 2021-04-25T22:09:58.000+05:00
diff --git a/eval.py b/eval.py
@@ -41,9 +41,10 @@
 
     os.makedirs(args.out_path, exist_ok=True)
 
+    # Set device to CUDA if a CUDA device is available, else CPU
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     
-    val_dataset     = Dataset(args.val_path, shuffle_pairs=False, augment=False, testing=True)
+    val_dataset     = Dataset(args.val_path, shuffle_pairs=False, augment=False)
     val_dataloader   = DataLoader(val_dataset, batch_size=1)
 
     criterion = torch.nn.BCELoss()
@@ -81,6 +82,7 @@
         fig = plt.figure("class1={}\tclass2={}".format(class1, class2), figsize=(4, 2))
         plt.suptitle("cls1={}  conf={:.2f}  cls2={}".format(class1, prob[0][0].item(), class2))
 
+        # Apply inverse transform (denormalization) on the images to retrieve original images.
         img1 = inv_transform(img1).cpu().numpy()[0]
         img2 = inv_transform(img2).cpu().numpy()[0]
         # show first image
diff --git a/libs/dataset.py b/libs/dataset.py
@@ -12,16 +12,30 @@
 from torchvision import transforms
 
 class Dataset(torch.utils.data.IterableDataset):
-    def __init__(self, path, shuffle_pairs=True, augment=False, testing=False):
+    def __init__(self, path, shuffle_pairs=True, augment=False):
+        '''
+        Create an iterable dataset from a directory containing sub-directories of 
+        entities with their images contained inside each sub-directory.
+
+            Parameters:
+                    path (str):                 Path to directory containing the dataset.
+                    shuffle_pairs (boolean):    Pass True when training, False otherwise. When set to false, the image pair generation will be deterministic
+                    augment (boolean):          When True, images will be augmented using a standard set of transformations.
+
+            where b = batch size
+
+            Returns:
+                    output (torch.Tensor): shape=[b, 1], Similarity of each pair of images
+        '''
         self.path = path
 
         self.feed_shape = [3, 224, 224]
         self.shuffle_pairs = shuffle_pairs
-        self.testing = testing
 
         self.augment = augment
 
         if self.augment:
+            # If images are to be augmented, add extra operations for it (first two).
             self.transform = transforms.Compose([
                 transforms.RandomAffine(degrees=20, translate=(0.2, 0.2), scale=(0.8, 1.2), shear=0.2),
                 transforms.RandomHorizontalFlip(p=0.5),
@@ -30,6 +44,7 @@ def __init__(self, path, shuffle_pairs=True, augment=False, testing=False):
                 transforms.Resize(self.feed_shape[1:])
             ])
         else:
+            # If no augmentation is needed then apply only the normalization and resizing operations.
             self.transform = transforms.Compose([
                 transforms.ToTensor(),
                 transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
@@ -39,6 +54,10 @@ def __init__(self, path, shuffle_pairs=True, augment=False, testing=False):
         self.create_pairs()
 
     def create_pairs(self):
+        '''
+        Creates two lists of indices that will form the pairs, to be fed for training or evaluation.
+        '''
+
         self.image_paths = glob.glob(os.path.join(self.path, "*/*.png"))
         self.image_classes = []
         self.class_indices = {}
@@ -56,9 +75,8 @@ def create_pairs(self):
         if self.shuffle_pairs:
             np.random.seed(int(time.time()))
             np.random.shuffle(self.indices1)
-        # elif self.testing:
-            # np.random.seed(int(time.time()))
         else:
+            # If shuffling is set to off, set the random seed to 1, to make it deterministic.
             np.random.seed(1)
 
         select_pos_pair = np.random.rand(len(self.image_paths)) < 0.5
@@ -79,7 +97,6 @@ def __iter__(self):
         self.create_pairs()
 
         for idx, idx2 in zip(self.indices1, self.indices2):
-            # idx2 = self.indices_pairs[idx]
 
             image_path1 = self.image_paths[idx]
             image_path2 = self.image_paths[idx2]
@@ -94,14 +111,7 @@ def __iter__(self):
                 image1 = self.transform(image1).float()
                 image2 = self.transform(image2).float()
 
-            # plt.imshow(image1[0])
-            # plt.imshow(image2[0])
-            # plt.show()
-
-            if self.testing:
-                yield (image1, image2), torch.FloatTensor([class1==class2]), (class1, class2)
-            else:
-                yield (image1, image2), torch.FloatTensor([class1==class2])
+            yield (image1, image2), torch.FloatTensor([class1==class2]), (class1, class2)
         
     def __len__(self):
         return len(self.image_paths)
diff --git a/libs/plot_training.py b/libs/plot_training.py
diff --git a/siamese/siamese_network.py b/siamese/siamese_network.py
@@ -6,14 +6,26 @@
 
 class SiameseNetwork(nn.Module):
     def __init__(self, backbone="resnet18"):
+        '''
+        Creates a siamese network with a network from torchvision.models as backbone.
+
+            Parameters:
+                    backbone (str): Options of the backbone networks can be found at https://pytorch.org/vision/stable/models.html
+        '''
+
         super().__init__()
 
         if backbone not in models.__dict__:
             raise Exception("No model named {} exists in torchvision.models.".format(backbone))
 
+        # Create a backbone network from the pretrained models provided in torchvision.models 
         self.backbone = models.__dict__[backbone](pretrained=True, progress=True)
+
+        # Get the number of features that are outputted by the last layer of backbone network.
         out_features = list(self.backbone.modules())[-1].out_features
 
+        # Create an MLP (multi-layer perceptron) as the classification head. 
+        # Classifies if provided combined feature vector of the 2 images represent same player or different.
         self.cls_head = nn.Sequential(
             nn.Dropout(p=0.5),
             nn.Linear(out_features, 512),
@@ -24,17 +36,34 @@ def __init__(self, backbone="resnet18"):
             nn.Linear(512, 64),
             nn.BatchNorm1d(64),
             nn.Sigmoid(),
-            nn.Dropout(),
+            nn.Dropout(p=0.5),
 
             nn.Linear(64, 1),
             nn.Sigmoid(),
         )
 
     def forward(self, img1, img2):
+        '''
+        Returns the similarity value between two images.
+
+            Parameters:
+                    img1 (torch.Tensor): shape=[b, 3, 224, 224]
+                    img2 (torch.Tensor): shape=[b, 3, 224, 224]
+
+            where b = batch size
+
+            Returns:
+                    output (torch.Tensor): shape=[b, 1], Similarity of each pair of images
+        '''
+
+        # Pass the both images through the backbone network to get their seperate feature vectors
         feat1 = self.backbone(img1)
         feat2 = self.backbone(img2)
         
+        # Multiply (element-wise) the feature vectors of the two images together, 
+        # to generate a combined feature vector representing the similarity between the two.
         combined_features = feat1 * feat2
 
+        # Pass the combined feature vector through classification head to get similarity value in the range of 0 to 1.
         output = self.cls_head(combined_features)
         return output
diff --git a/train.py b/train.py
@@ -67,10 +67,11 @@
 
     os.makedirs(args.out_path, exist_ok=True)
 
+    # Set device to CUDA if a CUDA device is available, else CPU
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     
-    train_dataset   = Dataset(args.train_path, shuffle_pairs=True, augment=True, testing=False)
-    val_dataset     = Dataset(args.val_path, shuffle_pairs=False, augment=False, testing=False)
+    train_dataset   = Dataset(args.train_path, shuffle_pairs=True, augment=True)
+    val_dataset     = Dataset(args.val_path, shuffle_pairs=False, augment=False)
     
     train_dataloader = DataLoader(train_dataset, batch_size=8, drop_last=True)
     val_dataloader   = DataLoader(val_dataset, batch_size=8)
@@ -93,8 +94,8 @@
         correct = 0
         total = 0
 
-        # pbar = tqdm()
-        for (img1, img2), y in train_dataloader:
+        # Training Loop Start
+        for (img1, img2), y, (class1, class2) in train_dataloader:
             img1, img2, y = map(lambda x: x.to(device), [img1, img2, y])
 
             prob = model(img1, img2)
@@ -112,14 +113,16 @@
         writer.add_scalar('train_acc', correct / total, epoch)
 
         print("\tTraining: Loss={:.2f}\t Accuracy={:.2f}\t".format(sum(losses)/len(losses), correct / total))
+        # Training Loop End
 
+        # Evaluation Loop Start
         model.eval()
 
         losses = []
         correct = 0
         total = 0
 
-        for (img1, img2), y in val_dataloader:
+        for (img1, img2), y, (class1, class2) in val_dataloader:
             img1, img2, y = map(lambda x: x.to(device), [img1, img2, y])
 
             prob = model(img1, img2)
@@ -134,7 +137,9 @@
         writer.add_scalar('val_acc', correct / total, epoch)
 
         print("\tValidation: Loss={:.2f}\t Accuracy={:.2f}\t".format(val_loss, correct / total))
+        # Evaluation Loop End
 
+        # Update "best.pth" model if val_loss in current epoch is lower than the best validation loss
         if val_loss < best_val:
             best_val = val_loss
             torch.save(
@@ -147,6 +152,7 @@
                 os.path.join(args.out_path, "best.pth")
             )            
 
+        # Save model based on the frequency defined by "args.save_after"
         if (epoch + 1) % args.save_after == 0:
             torch.save(
                 {