updatre

JavaZeroo · JavaZeroo · commit 7963e841cc20 · 2022-10-18T23:18:56.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 __pycache__/
+.ipynb_checkpoints/
 wandb/
 data/
 data_gen/
diff --git a/config.py b/config.py
@@ -5,10 +5,10 @@
 
 class Config:
     def __init__(self, ):
-        self.DATA_DIR = Path('data')
+        self.DATA_DIR = Path('data_gen')
         self.SOURCE_DIR = self.DATA_DIR / 'source'
         self.TARGET_DIR = self.DATA_DIR / 'target'
-        self.NUM_EPOCHS = 100
+        self.NUM_EPOCHS = 50
         self.N_FOLD = 5
         self.CROP_RATIO = 12
         self.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
diff --git a/gen.ipynb b/gen.ipynb
diff --git a/test.py b/test.py
@@ -1,15 +1,23 @@
 import torch
 import torch.nn as nn
-
+from ssim import ssim
 
 class fujiModel(nn.Module):
     def __init__(self):
         super().__init__()
-        self.fc1 = nn.Conv2d(3, 3, (3, 3))
+        self.fc1 = nn.Linear(4896, 4896)
+        self.sigmoid = nn.Sigmoid()
+        self.fc2 = nn.Linear(4896, 4896)
+
     def forward(self, x):
-        x = self.fc1(x)
-        return x
+        out = self.fc1(x)
+        out = self.sigmoid(out)
+        out = self.fc2(out)
+        out = self.sigmoid(out)
+        return out.mul(x)
 
-test = torch.randn((32, 3, 240, 128))
+test = torch.randn((4, 3, 4896, 4896))
 model = fujiModel()
-print(model(test).size())
+pred = model(test)
+loss = 1 - ssim(test, pred)
+print(loss)
diff --git a/train.py b/train.py
@@ -67,52 +67,67 @@ def get_kfold_ds(fold, source_imgs, target_imgs):
     return train_ds, valid_ds
 
 def valid(model, valid_ds, shuffle=False):
-    valid_dl = DataLoader(valid_ds, batch_size=config.BATCH_SIZE, shuffle=shuffle)
+    valid_dl = DataLoader(valid_ds, batch_size=config.BATCH_SIZE, num_workers=int(os.cpu_count()/8), shuffle=shuffle)
+    criterion = nn.MSELoss(reduction='mean')
     losses = []
     with torch.no_grad():
         model.eval()
         with tqdm(valid_dl, desc='Eval', miniters=10) as progress:
             for i, (source, target) in enumerate(progress):
+                source = source.to(config.DEVICE)
+                target = target.to(config.DEVICE)
                 with autocast():
                     img_pred = model(source)
-                    ssim_loss = 1 - ssim(img_pred, target)
-                    losses.append(ssim_loss)
+                    ssim_loss = criterion(img_pred, target)
+                    # ssim_loss = 1 - ssim(img_pred, target)
+                    # losses.append(ssim_loss)
+                progress.set_description(f'Valid loss: {ssim_loss :.02f}')
+
         return np.mean(losses)
 
 
 
 
 
-def train(train_ds, logger, name):
+def train(train_ds, valid_ds, logger, name):
     print(len(train_ds))
-    set_seed(123)
-    train_dl = DataLoader(train_ds, batch_size=config.BATCH_SIZE, shuffle=True)
+    set_seed(11)
+    train_dl = DataLoader(train_ds, batch_size=config.BATCH_SIZE, num_workers=int(os.cpu_count()/8), shuffle=True)
     model = fujiModel()
     model = model.to(config.DEVICE)
     optim = torch.optim.Adam(model.parameters())
     scheduler = torch.optim.lr_scheduler.OneCycleLR(optim, max_lr=config.ONE_CYCLE_MAX_LR, epochs=config.NUM_EPOCHS, steps_per_epoch=len(train_dl))
     scaler = GradScaler()
+    
+    criterion = nn.MSELoss(reduction='mean')
+
     for epoch in tqdm(range(config.NUM_EPOCHS)):
-        for batch_idx, (source, target) in enumerate(train_dl):
-            optim.zero_grad()
-            source = source.to(config.DEVICE)
-            target = target.to(config.DEVICE)
-            with autocast():
-                img_pred = model(source)
-                ssim_loss = 1 - ssim(img_pred, target)
-                print(ssim_loss.is_cuda)
-                if torch.isinf(ssim_loss).any() or torch.isnan(ssim_loss).any():
-                    print(f'Bad loss, skipping the batch {batch_idx}')
-                    del ssim_loss, img_pred
-                    gc_collect()
-                    continue
-
-            # scaler is needed to prevent "gradient underflow"
-            scaler.scale(ssim_loss).backward()
-            scaler.step(optim)
-            scaler.update()
-            logger.log({'loss': (ssim_loss), 'lr': scheduler.get_last_lr()[0]})
+        with tqdm(train_dl, desc='Train', miniters=10) as progress:
+            for batch_idx, (source, target) in enumerate(progress):
+                optim.zero_grad()
+                source = source.to(config.DEVICE)
+                target = target.to(config.DEVICE)
+                with autocast():
+                    img_pred = model(source)
+                    # ssim_loss = 1 - ssim(img_pred, target)
+                    ssim_loss = criterion(img_pred, target)
+                    if torch.isinf(ssim_loss).any() or torch.isnan(ssim_loss).any():
+                        print(ssim_loss)
+                        print(f'Bad loss: {ssim_loss}, skipping the batch {batch_idx}')
+                        del ssim_loss, img_pred
+                        gc_collect()
+                        continue
+
+                # scaler is needed to prevent "gradient underflow"
+                scaler.scale(ssim_loss).backward()
+                scaler.step(optim)
+                scaler.update()
+                # optim.steap()
+                logger.log({'loss': (ssim_loss), 'lr': scheduler.get_last_lr()[0]})
+                progress.set_description(f'Train loss: {ssim_loss :.02f}')
+
         scheduler.step()
+        valid(model, valid_ds)
     save_model(name, model)
     return model
     
@@ -140,7 +155,7 @@ def main():
         name = f'fakeji-fold{fold}'
         with wandb.init(project='fakeji', name=name, entity='jimmydut') as run:
             gc_collect()
-            models.append(train(train_ds, run, name))
+            models.append(train(train_ds, valid_ds, run, name))
 
 
 if __name__ == '__main__':

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`__pycache__/`
	`2`	`+.ipynb_checkpoints/`
`2`	`3`	`wandb/`
`3`	`4`	`data/`
`4`	`5`	`data_gen/`