Fix data preparation, mask uncertainty value

zsef123 · zsef123 · commit 83ded0aa3080 · 2020-02-10T18:13:00.000+09:00
diff --git a/configs/default.yaml b/configs/default.yaml
@@ -12,15 +12,15 @@ net:
     pretrained: False
     resnet: "res53"
     head_in_ch: 2048
-    num_classes: 20
+    num_classes: 34
   pointhead:
-    in_c: 532 # 512 + 20
-    num_classes: 20
+    in_c: 546 # 512 + num_classes
+    num_classes: 34
     k: 3
     beta: 0.75
 
 run:
   epochs: 200
 
 apex:
-  opt: "O0"
+  opt: "O0"
diff --git a/datas/__init__.py b/datas/__init__.py
@@ -11,7 +11,7 @@ def get_voc(C, split="train"):
             ToTensor(),
             RandomCrop((256, 512)),
             Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-        ])
+        ]) 
     else:
         transforms = Compose([
             ToTensor(),
diff --git a/datas/transforms.py b/datas/transforms.py
@@ -1,5 +1,8 @@
 import random
 
+import numpy as np
+
+import torch
 import torch.nn.functional as F
 from torchvision import transforms
 from torchvision.transforms.functional import normalize
@@ -10,10 +13,10 @@ def __init__(self, shape):
         self.shape = [shape, shape] if isinstance(shape, int) else shape
 
     def __call__(self, img, mask):
-        img, mask = img.unsqueeze(0), mask.unsqueeze(0)
+        img, mask = img.unsqueeze(0), mask.unsqueeze(0).float()
         img = F.interpolate(img, size=self.shape, mode="bilinear", align_corners=False)
         mask = F.interpolate(mask, size=self.shape, mode="bilinear", align_corners=False)
-        return img[0], mask[0]
+        return img[0], mask[0].byte()
 
 
 class RandomCrop:
@@ -54,7 +57,9 @@ def __init__(self):
         self.to_tensor = transforms.ToTensor()
 
     def __call__(self, img, mask):
-        return self.to_tensor(img), self.to_tensor(mask)
+        img = self.to_tensor(img)
+        mask = torch.from_numpy(np.array(mask))
+        return img, mask[None]
 
 
 class Normalize:
diff --git a/infer.py b/infer.py
@@ -7,7 +7,7 @@
 @torch.no_grad()
 def infer(loader, net, device):
     net.eval()
-    metric = ConfusionMatrix(21)
+    metric = ConfusionMatrix(len(loader.dataset.classes) - 1)
     for i, (x, gt) in enumerate(loader):
         x = x.to(device, non_blocking=True)
         gt = gt.squeeze(1).to(device, dtype=torch.long, non_blocking=True)
diff --git a/model/sampling_points.py b/model/sampling_points.py
@@ -52,11 +52,11 @@ def sampling_points(mask, N, k=3, beta=0.75, training=True):
     assert mask.dim() == 4, "Dim must be N(Batch)CHW"
     device = mask.device
     B, _, H, W = mask.shape
+    mask, _ = mask.sort(1, descending=True)
 
     if not training:
         H_step, W_step = 1 / H, 1 / W
         N = min(H * W, N)
-
         uncertainty_map = -1 * (mask[:, 0] - mask[:, 1])
         _, idx = uncertainty_map.view(B, -1).topk(N, dim=1)