okankop
diff --git a/‎.gitignore
Lines changed: 14 additions & 0 deletions b/‎.gitignore
Lines changed: 14 additions & 0 deletions
diff --git a/‎LICENSE
Lines changed: 52 additions & 0 deletions b/‎LICENSE
Lines changed: 52 additions & 0 deletions
diff --git a/‎MLPmodule.py
Lines changed: 32 additions & 0 deletions b/‎MLPmodule.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 67 additions & 2 deletions b/‎README.md
Lines changed: 67 additions & 2 deletions
diff --git a/‎__pycache__/MLPmodule.cpython-36.pyc
1.24 KB b/‎__pycache__/MLPmodule.cpython-36.pyc
1.24 KB
diff --git a/‎__pycache__/TRNmodule.cpython-36.pyc
5.89 KB b/‎__pycache__/TRNmodule.cpython-36.pyc
5.89 KB
diff --git a/‎__pycache__/dataset.cpython-36.pyc
5.56 KB b/‎__pycache__/dataset.cpython-36.pyc
5.56 KB
diff --git a/‎__pycache__/datasets_video.cpython-36.pyc
2.11 KB b/‎__pycache__/datasets_video.cpython-36.pyc
2.11 KB
diff --git a/‎__pycache__/models.cpython-36.pyc
10.7 KB b/‎__pycache__/models.cpython-36.pyc
10.7 KB
diff --git a/‎__pycache__/opts.cpython-36.pyc
2.4 KB b/‎__pycache__/opts.cpython-36.pyc
2.4 KB
diff --git a/‎__pycache__/transforms.cpython-36.pyc
14.7 KB b/‎__pycache__/transforms.cpython-36.pyc
14.7 KB
diff --git a/‎dataset.py
Lines changed: 160 additions & 0 deletions b/‎dataset.py
Lines changed: 160 additions & 0 deletions
@@ -0,0 +1,14 @@
+# files types to exculde
+*.tar
+*.mp4
+*.h5
+
+jester
+*.txt
+
+model
+*.pth.tar
+*.pth
+
+log
+*.csv
@@ -0,0 +1,52 @@
+BSD 2-Clause License for Motion Fused Frames 
+
+Copyright (c) 2017, Okan Köpüklü
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+BSD 2-Clause License for TSN-PyTorch
+
+Copyright (c) 2017, Multimedia Laboratary, The Chinese University of Hong Kong
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,32 @@
+import torch
+import torch.nn as nn
+
+class MLPmodule(torch.nn.Module):
+    """
+    This is the 2-layer MLP implementation used for linking spatio-temporal
+    features coming from different segments.
+    """
+    def __init__(self, img_feature_dim, num_frames, num_class):
+        super(MLPmodule, self).__init__()
+        self.num_frames = num_frames
+        self.num_class = num_class
+        self.img_feature_dim = img_feature_dim
+        self.num_bottleneck = 512
+        self.classifier = nn.Sequential(
+                                       nn.ReLU(),
+                                       nn.Linear(self.num_frames * self.img_feature_dim,
+                                                 self.num_bottleneck),
+                                       #nn.Dropout(0.90), # Add an extra DO if necess.
+                                       nn.ReLU(),
+                                       nn.Linear(self.num_bottleneck,self.num_class),
+                                       )
+    def forward(self, input):
+        input = input.view(input.size(0), self.num_frames*self.img_feature_dim)
+        input = self.classifier(input)
+        return input
+
+
+def return_MLP(relation_type, img_feature_dim, num_frames, num_class):
+    MLPmodel = MLPmodule(img_feature_dim, num_frames, num_class)
+
+    return MLPmodel
@@ -1,2 +1,67 @@
-# MFF-pytorch
-Motion Fused Frames implementation in PyTorch
+# Motion Fused Frames (MFFs)
+
+Pytorch implementation of Motion Fused Frames, built on top of the codebase [TSN-pytorch](https://github.com/yjxiong/temporal-segment-networks).
+
+<p align="center"><img src="https://github.com/okankop/MFF-pytorch/blob/master/images/motion_fused_frames.jpg" align="middle" width="500" title="Motion Fused Frames" /></p>
+
+**Note**: always use `git clone --recursive https://github.com/okankop/MFF-pytorch` to clone this project
+Otherwise you will not be able to use the inception series CNN architecture.
+
+### Dataset Preparation
+Download the [jester dataset](https://www.twentybn.com/datasets/something-something) or [NVIDIA dataset](http://research.nvidia.com/publication/online-detection-and-classification-dynamic-hand-gestures-recurrent-3d-convolutional) or [ChaLearn LAP IsoGD dataset](http://www.cbsr.ia.ac.cn/users/jwan/database/isogd.html). Decompress them into the same folder and use [process_dataset.py](process_dataset.py) to generate the index files for train, val, and test split. Poperly set up the train, validatin, and category meta files in [datasets_video.py](datasets_video.py). Finally, use directory [flow_computation](flow_computation) to calculate the optical flow images using Brox method.
+
+Assume the structure of data directories is the following:
+
+```misc
+~/MFF-pytorch/
+   datasets/
+      jester/
+         rgb/
+            .../ (directories of video samples)
+                .../ (jpg color frames)
+         flow/
+            u/
+               .../ (directories of video samples)
+                  .../ (jpg optical-flow-u frames)
+            v/
+               .../ (directories of video samples)
+                  .../ (jpg optical-flow-v frames)
+    model/
+       .../(saved models for the last checkpoint and best model)
+```
+
+
+### Running the Code
+You can simply run 'python main.py' to start a training with the default parameters. Followings are some examples for training under different scenarios:
+
+* Train 4-segment network with 3 flow, 1 color frames (4-MFFs-3f1c architecture)
+```bash
+python main.py jester RGBFlow --arch BNInception --num_segments 4 \
+--consensus_type MLP --num_motion 3  --batch-size 32
+```
+
+* Train resuming the last checkpoint (4-MFFs-3f1c architecture)
+```bash
+python main.py jester RGBFlow --resume=<path-to-last-checkpoint> --arch BNInception \
+--consensus_type MLP --num_segments 4 --num_motion 3  --batch-size 32
+```
+
+* The command to test trained model (8-MFFs-3f1c architecture)
+
+```bash
+python test_models.py jester RGBFlow model/MFF_jester_RGBFlow_BNInception_segment8_3f1c_best.pth.tar --arch BNInception --consensus_type MLP --test_crops 1 --num_motion 3 --test_segments 8
+```
+
+All GPUs is used for the training. If you want a part of GPUs, use CUDA_VISIBLE_DEVICES=...
+
+### Citation
+O. Köpüklü, N. Köse, G. Rigoll. Motion Fused Frames: Data Level Fusion Strategy for Hand Gesture Recognition, 2018 [PDF]
+```
+@article{kopuklu2018motion,
+    title = {Motion Fused Frames: Data Level Fusion Strategy for Hand Gesture Recognition},
+    author = {K\"op\"ukl\"u, Okan and K\"ose, Neslihan and Rigoll, Gerhard},
+}
+```
+
+### Acknowledgement
+We thank Yuanjun Xiong for releasing [TSN-Pytorch codebase](https://github.com/yjxiong/temporal-segment-networks), which we build our work on top. We also thank Bolei Zhou for the insprational work [Temporal Segment Networks](https://arxiv.org/pdf/1711.08496.pdf), from which we imported [process_dataset.py](https://github.com/metalbubble/TRN-pytorch/blob/master/process_dataset.py) to our project.
@@ -0,0 +1,160 @@
+import torch.utils.data as data
+
+import random
+from PIL import Image
+import os
+import os.path
+import numpy as np
+from numpy.random import randint
+
+class VideoRecord(object):
+    def __init__(self, row):
+        self._data = row
+
+    @property
+    def path(self):
+        return self._data[0]
+
+    @property
+    def num_frames(self):
+        return int(self._data[1])
+
+    @property
+    def label(self):
+        return int(self._data[2])
+
+
+class TSNDataSet(data.Dataset):
+    def __init__(self, root_path, list_file,
+                 num_segments=3, new_length=1, modality='RGB',
+                 image_tmpl='img_{:05d}.jpg', transform=None,
+                 force_grayscale=False, random_shift=True,
+                 test_mode=False, dataset='jester'):
+
+        self.root_path = root_path
+        self.list_file = list_file
+        self.num_segments = num_segments
+        self.new_length = new_length
+        self.modality = modality
+        self.image_tmpl = image_tmpl
+        self.transform = transform
+        self.random_shift = random_shift
+        self.test_mode = test_mode
+        self.dataset = dataset
+
+        if self.modality == 'RGBDiff' or self.modality == 'RGBFlow':
+            self.new_length += 1# Diff needs one more image to calculate diff
+
+        self._parse_list()
+
+    def _load_image(self, directory, idx, isLast=False):
+        if self.modality == 'RGB' or self.modality == 'RGBDiff':
+            try:
+                return [Image.open(os.path.join(self.root_path, directory, self.image_tmpl.format(idx))).convert('RGB')]
+            except Exception:
+                print('error loading image:', os.path.join(self.root_path, directory, self.image_tmpl.format(idx)))
+                return [Image.open(os.path.join(self.root_path, directory, self.image_tmpl.format(1))).convert('RGB')]
+            
+        elif self.modality == 'Flow':
+            try:
+                idx_skip = 1 + (idx-1)*5
+                flow = Image.open(os.path.join(self.root_path, directory, self.image_tmpl.format(idx_skip))).convert('RGB')
+            except Exception:
+                print('error loading flow file:', os.path.join(self.root_path, directory, self.image_tmpl.format(idx_skip)))
+                flow = Image.open(os.path.join(self.root_path, directory, self.image_tmpl.format(1))).convert('RGB')
+            # the input flow file is RGB image with (flow_x, flow_y, blank) for each channel
+            flow_x, flow_y, _ = flow.split()
+            x_img = flow_x.convert('L')
+            y_img = flow_y.convert('L')
+            return [x_img, y_img]
+
+        elif self.modality == 'RGBFlow':
+            if isLast:
+                return [Image.open(os.path.join(self.root_path, "rgb", directory, self.image_tmpl.format(idx))).convert('RGB')]
+            else:
+                x_img = Image.open(os.path.join(self.root_path, "flow/u", directory, self.image_tmpl.format(idx))).convert('L')
+                y_img = Image.open(os.path.join(self.root_path, "flow/v", directory, self.image_tmpl.format(idx))).convert('L')
+                return [x_img, y_img]
+
+
+    def _parse_list(self):
+        # check the frame number is large >3:
+        # usualy it is [video_id, num_frames, class_idx]
+        tmp = [x.strip().split(' ') for x in open(self.list_file)]
+        tmp = [item for item in tmp if int(item[1])>=3]
+        self.video_list = [VideoRecord(item) for item in tmp]
+        print('video number:%d'%(len(self.video_list)))
+
+    def _sample_indices(self, record):
+        """
+
+        :param record: VideoRecord
+        :return: list
+        """
+        average_duration = (record.num_frames - self.new_length + 1) // self.num_segments
+
+        if average_duration > 0:
+            offsets = np.multiply(list(range(self.num_segments)), average_duration) + randint(average_duration, size=self.num_segments)
+        elif record.num_frames > self.num_segments:
+            offsets = np.sort(randint(record.num_frames - self.new_length + 1, size=self.num_segments))
+        else:
+            offsets = np.zeros((self.num_segments,))
+        return offsets + 1
+
+    def _get_val_indices(self, record):
+        if record.num_frames > self.num_segments + self.new_length - 1:
+            tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)
+            offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])
+        else:
+            offsets = np.zeros((self.num_segments,))
+        return offsets + 1
+
+    def _get_test_indices(self, record):
+        tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)
+        offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])
+        return offsets + 1
+
+    def __getitem__(self, index):
+        record = self.video_list[index]
+        # check this is a legit video folder
+        if self.modality == 'RGBFlow':
+            while not os.path.exists(os.path.join(self.root_path, "rgb", record.path, self.image_tmpl.format(1))):
+                index = np.random.randint(len(self.video_list))
+                record = self.video_list[index]
+        else:
+            while not os.path.exists(os.path.join(self.root_path, record.path, self.image_tmpl.format(1))):
+                index = np.random.randint(len(self.video_list))
+                record = self.video_list[index]
+
+        if not self.test_mode:
+            segment_indices = self._sample_indices(record) if self.random_shift else self._get_val_indices(record)
+        else:
+            segment_indices = self._get_test_indices(record)
+
+        return self.get(record, segment_indices)
+
+    def get(self, record, indices):
+        images = list()
+        for seg_ind in indices:
+            p = int(seg_ind)
+            for i in range(self.new_length):
+                if self.modality == 'RGBFlow':
+                    if i == self.new_length - 1:
+                        seg_imgs = self._load_image(record.path, p, True)
+                    else:
+                        if p == record.num_frames:
+                            seg_imgs = self._load_image(record.path, p-1)
+                        else:
+                            seg_imgs = self._load_image(record.path, p)
+                else:
+                    seg_imgs = self._load_image(record.path, p)
+
+                images.extend(seg_imgs)
+                if p < record.num_frames:
+                    p += 1
+
+        process_data = self.transform(images)
+        return process_data, record.label
+
+    def __len__(self):
+        return len(self.video_list)
-Original file line number
+Diff line change
@@ @@ -0,0 +1,14 @@ @@
 +# files types to exculde
 +*.tar
 +*.mp4
 +*.h5
++
 +jester
 +*.txt
++
 +model
 +*.pth.tar
 +*.pth
++
 +log
 +*.csv