7 years ago · cd87f34290
--- a/data.py
+++ b/data.py
@@ -4,13 +4,75 @@ import json
 
															 import os
														
 
															 import pickle
														
 
															 from glob import glob
														
 
															-from typing import Tuple
														
 
															+from typing import Tuple, List
														
 
															 import torch
														
 
															 import torch.utils.data as data
														
 
															 from PIL import Image
														
 
															+class AV(data.Dataset):
														
 
															+    def __init__(self, path: str):
														
 
															+        self.path = path
														
 
															+        self.data = []
														
 
															+
														
 
															+    def __len__(self):
														
 
															+        return len(self.data)
														
 
															+
														
 
															+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, int]:
														
 
															+        return self.data[idx]
														
 
															+
														
 
															+
														
 
															+class AudioVideo(AV):
														
 
															+    def __init__(self, path: str):
														
 
															+        # output format:
														
 
															+        # return (
														
 
															+        #     torch.rand((1, 96, 64)),
														
 
															+        #     torch.rand((3, 224, 224)),
														
 
															+        #     np.random.choice([0, 1])
														
 
															+        # )
														
 
															+        super().__init__(path)
														
 
															+
														
 
															+        for file_path in glob(f'{path}/*.pkl'):
														
 
															+            audios, images, label = pickle.load(open(file_path, 'rb'))
														
 
															+            self.data += [(audios[i], images[i], label) for i in range(len(audios))]
														
 
															+
														
 
															+
														
 
															+class AudioVideo3D(AV):
														
 
															+    def __init__(self, path: str):
														
 
															+        # output format:
														
 
															+        # return (
														
 
															+        #     torch.rand((1, 96, 64)),
														
 
															+        #     torch.rand((3, 16, 224, 224)),
														
 
															+        #     np.random.choice([0, 1])
														
 
															+        # )
														
 
															+        super().__init__(path)
														
 
															+        frames = 16
														
 
															+
														
 
															+        for file_path in glob(f'{path}/*.pkl'):
														
 
															+            audios, images, label = pickle.load(open(file_path, 'rb'))
														
 
															+            images_temporal = self._process_temporal_tensor(images, frames)
														
 
															+            self.data += [(audios[i], images_temporal[i], label) for i in range(len(audios))]
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def _process_temporal_tensor(images: List[torch.Tensor],
														
 
															+                                 frames: int) -> List[torch.Tensor]:
														
 
															+        out = []
														
 
															+
														
 
															+        for i in range(len(images)):
														
 
															+            e = torch.zeros((frames, 3, 224, 224))
														
 
															+            e[-1] = images[0]
														
 
															+            for j in range(min(i, frames)):
														
 
															+                e[-1 - j] = images[j]
														
 
															+                # try:
														
 
															+                #     e[-1 - j] = images[j]
														
 
															+                # except:
														
 
															+                #     raise ValueError(f"trying to get {i} from images with len = {len(images)}")
														
 
															+            ee = e.permute((1, 0, 2, 3))
														
 
															+            out.append(ee)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															 def pil_loader(path):
														
 
															     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
														
 
															     with open(path, 'rb') as f:
														
@@ -139,25 +201,3 @@ class Video(data.Dataset):
 
															     def __len__(self):
														
 
															         return len(self.data)
														
 
															-
														
 
															-
														
 
															-class AudioVideo(data.Dataset):
														
 
															-    def __init__(self, path: str):
														
 
															-        self.path = path
														
 
															-        self.data = []
														
 
															-
														
 
															-        for file_path in glob(f'{path}/*.pkl'):
														
 
															-            audios, images, label = pickle.load(open(file_path, 'rb'))
														
 
															-            self.data += [(audios[i], images[i], label) for i in range(len(audios))]
														
 
															-
														
 
															-    def __len__(self):
														
 
															-        return len(self.data)
														
 
															-
														
 
															-    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, int]:
														
 
															-        # output format:
														
 
															-        # return (
														
 
															-        #     torch.rand((1, 96, 64)),
														
 
															-        #     torch.rand((3, 224, 224)),
														
 
															-        #     np.random.choice([0, 1])
														
 
															-        # )
														
 
															-        return self.data[idx]
														
--- a/kissing_detector.py
+++ b/kissing_detector.py
@@ -4,7 +4,7 @@ import torch
 
															 from torch import nn
														
 
															 import vggish
														
 
															-from conv import convnet_init
														
 
															+from conv import convnet_init, set_parameter_requires_grad
														
 
															 import conv3d
														
@@ -67,6 +67,7 @@ class KissingDetector3DConv(nn.Module):
 
															             sample_size=224,
														
 
															             sample_duration=10
														
 
															         )
														
 
															+        set_parameter_requires_grad(conv, feature_extract)
														
 
															         conv.fc = nn.Identity()
														
 
															         if use_vggish:
														
--- a/params.py
+++ b/params.py
@@ -16,10 +16,22 @@ experiment_test = {
 
															     'feature_extract': {True},
														
 
															     'batch_size': {64},
														
 
															     'lr': {0.001},
														
 
															-    'use_vggish': {False},
														
 
															+    'use_vggish': {True},
														
 
															     'momentum': {0.9}
														
 
															 }
														
 
															+experiment_test_3d = {
														
 
															+    'data_path_base': {data_path_base},
														
 
															+    'conv_model_name': {'resnet'},
														
 
															+    'num_epochs': {10},
														
 
															+    'feature_extract': {True},
														
 
															+    'batch_size': {64},
														
 
															+    'lr': {0.001},
														
 
															+    'use_vggish': {True},
														
 
															+    'momentum': {0.9},
														
 
															+    'use_3d': {True}
														
 
															+}
														
 
															+
														
 
															 experiments = {
														
 
															     'data_path_base': {data_path_base},
														
 
															     'conv_model_name': {'resnet', None},  # vgg
														
--- a/train.py
+++ b/train.py
@@ -6,8 +6,8 @@ import torch
 
															 import torch.optim as optim
														
 
															 from torch import nn
														
 
															-from data import AudioVideo
														
 
															-from kissing_detector import KissingDetector
														
 
															+from data import AudioVideo, AudioVideo3D
														
 
															+from kissing_detector import KissingDetector, KissingDetector3DConv
														
 
															 ExperimentResults = Tuple[Optional[nn.Module], List[float], List[float]]
														
@@ -36,17 +36,23 @@ def train_kd(data_path_base: str,
 
															              num_workers: int = 4,
														
 
															              shuffle: bool = True,
														
 
															              lr: float = 0.001,
														
 
															-             momentum: float = 0.9) -> ExperimentResults:
														
 
															+             momentum: float = 0.9,
														
 
															+             use_3d: bool = False) -> ExperimentResults:
														
 
															     num_classes = 2
														
 
															     try:
														
 
															-        kd = KissingDetector(conv_model_name, num_classes, feature_extract, use_vggish=use_vggish)
														
 
															+        if use_3d:
														
 
															+            kd = KissingDetector3DConv(num_classes, feature_extract, use_vggish)
														
 
															+        else:
														
 
															+            kd = KissingDetector(conv_model_name, num_classes, feature_extract, use_vggish=use_vggish)
														
 
															     except ValueError:
														
 
															         # if the combination is not valid
														
 
															         return None, [-1.0], [-1.0]
														
 
															     params_to_update = _get_params_to_update(kd, feature_extract)
														
 
															-    datasets = {set_: AudioVideo(f'{data_path_base}/{set_}') for set_ in ['train', 'val']}
														
 
															+    av = AudioVideo3D if use_3d else AudioVideo
														
 
															+
														
 
															+    datasets = {set_: av(f'{data_path_base}/{set_}') for set_ in ['train', 'val']}
														
 
															     dataloaders_dict = {x: torch.utils.data.DataLoader(datasets[x],
														
 
															                                                        batch_size=batch_size,
														
 
															                                                        shuffle=shuffle, num_workers=num_workers)