7 years ago · b55822f0f3
--- a/data.py
+++ b/data.py
@@ -0,0 +1,139 @@
 
				+import copy
			
 
				+import functools
			
 
				+import os
			
 
				+
			
 
				+import torch
			
 
				+import torch.utils.data as data
			
 
				+from PIL import Image
			
 
				+# import accimage
			
 
				+import json
			
 
				+
			
 
				+
			
 
				+def pil_loader(path):
			
 
				+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
			
 
				+    with open(path, 'rb') as f:
			
 
				+        with Image.open(f) as img:
			
 
				+            return img.convert('RGB')
			
 
				+
			
 
				+
			
 
				+def accimage_loader(path):
			
 
				+    # try:
			
 
				+    #     return accimage.Image(path)
			
 
				+    # except IOError:
			
 
				+    #     # Potentially a decoding problem, fall back to PIL.Image
			
 
				+    #     return pil_loader(path)
			
 
				+    return pil_loader(path)
			
 
				+
			
 
				+
			
 
				+def get_default_image_loader():
			
 
				+    from torchvision import get_image_backend
			
 
				+    if get_image_backend() == 'accimage':
			
 
				+        return accimage_loader
			
 
				+    else:
			
 
				+        return pil_loader
			
 
				+
			
 
				+
			
 
				+def video_loader(video_dir_path, frame_indices, image_loader):
			
 
				+    video = []
			
 
				+    for i in frame_indices:
			
 
				+        image_path = os.path.join(video_dir_path, 'image_{:05d}.jpg'.format(i))
			
 
				+        if os.path.exists(image_path):
			
 
				+            video.append(image_loader(image_path))
			
 
				+        else:
			
 
				+            return video
			
 
				+
			
 
				+    return video
			
 
				+
			
 
				+
			
 
				+def get_default_video_loader():
			
 
				+    image_loader = get_default_image_loader()
			
 
				+    return functools.partial(video_loader, image_loader=image_loader)
			
 
				+
			
 
				+
			
 
				+def load_annotation_data(data_file_path):
			
 
				+    with open(data_file_path, 'r') as data_file:
			
 
				+        return json.load(data_file)
			
 
				+
			
 
				+
			
 
				+def get_class_labels(data):
			
 
				+    class_labels_map = {}
			
 
				+    index = 0
			
 
				+    for class_label in data['labels']:
			
 
				+        class_labels_map[class_label] = index
			
 
				+        index += 1
			
 
				+    return class_labels_map
			
 
				+
			
 
				+
			
 
				+def get_video_names_and_annotations(data, subset):
			
 
				+    video_names = []
			
 
				+    annotations = []
			
 
				+
			
 
				+    for key, value in data['database'].items():
			
 
				+        this_subset = value['subset']
			
 
				+        if this_subset == subset:
			
 
				+            if subset == 'testing':
			
 
				+                video_names.append('test/{}'.format(key))
			
 
				+            else:
			
 
				+                label = value['annotations']['label']
			
 
				+                video_names.append('{}/{}'.format(label, key))
			
 
				+                annotations.append(value['annotations'])
			
 
				+
			
 
				+    return video_names, annotations
			
 
				+
			
 
				+
			
 
				+def make_dataset(video_path, sample_duration):
			
 
				+    dataset = []
			
 
				+
			
 
				+    n_frames = len(os.listdir(video_path))
			
 
				+
			
 
				+    begin_t = 1
			
 
				+    end_t = n_frames
			
 
				+    sample = {
			
 
				+        'video': video_path,
			
 
				+        'segment': [begin_t, end_t],
			
 
				+        'n_frames': n_frames,
			
 
				+    }
			
 
				+
			
 
				+    step = sample_duration
			
 
				+    for i in range(1, (n_frames - sample_duration + 1), step):
			
 
				+        sample_i = copy.deepcopy(sample)
			
 
				+        sample_i['frame_indices'] = list(range(i, i + sample_duration))
			
 
				+        sample_i['segment'] = torch.IntTensor([i, i + sample_duration - 1])
			
 
				+        dataset.append(sample_i)
			
 
				+
			
 
				+    return dataset
			
 
				+
			
 
				+
			
 
				+class Video(data.Dataset):
			
 
				+    def __init__(self, video_path,
			
 
				+                 spatial_transform=None, temporal_transform=None,
			
 
				+                 sample_duration=16, get_loader=get_default_video_loader):
			
 
				+        self.data = make_dataset(video_path, sample_duration)
			
 
				+
			
 
				+        self.spatial_transform = spatial_transform
			
 
				+        self.temporal_transform = temporal_transform
			
 
				+        self.loader = get_loader()
			
 
				+
			
 
				+    def __getitem__(self, index):
			
 
				+        """
			
 
				+        Args:
			
 
				+            index (int): Index
			
 
				+        Returns:
			
 
				+            tuple: (image, target) where target is class_index of the target class.
			
 
				+        """
			
 
				+        path = self.data[index]['video']
			
 
				+
			
 
				+        frame_indices = self.data[index]['frame_indices']
			
 
				+        if self.temporal_transform is not None:
			
 
				+            frame_indices = self.temporal_transform(frame_indices)
			
 
				+        clip = self.loader(path, frame_indices)
			
 
				+        if self.spatial_transform is not None:
			
 
				+            clip = [self.spatial_transform(img) for img in clip]
			
 
				+        clip = torch.stack(clip, 0).permute(1, 0, 2, 3)
			
 
				+
			
 
				+        target = self.data[index]['segment']
			
 
				+
			
 
				+        return clip, target
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self.data)
			
--- a/dev.ipynb
+++ b/dev.ipynb
--- a/dev2.ipynb
+++ b/dev2.ipynb
--- a/kissing_detector.py
+++ b/kissing_detector.py
@@ -1,6 +1,5 @@
 
				 import torch
			
 
				 from torch import nn
			
 
				-
			
 
				 import vggish
			
 
				 from conv import convnet_init
			
 
				 
			
--- a/pipeline.py
+++ b/pipeline.py
@@ -0,0 +1,28 @@
 
				+import cv2
			
 
				+from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def slice_clips(segments, root, fps=2):
			
 
				+    for path, classes in segments.items():
			
 
				+
			
 
				+        for cls, ts in classes.items():
			
 
				+            for i, (t1, t2) in enumerate(ts):
			
 
				+                set_ = np.random.choice(['train', 'val'], p=[2 / 3, 1 / 3])
			
 
				+                # get all the still frames
			
 
				+                file_name, ext = path.split('.')
			
 
				+                target = f"{root}{file_name}_{cls}_{i + 1}.{ext}"
			
 
				+                print(f'target: {target}')
			
 
				+                ffmpeg_extract_subclip(f'{root}{path}', t1, t2, targetname=target)
			
 
				+                vidcap = cv2.VideoCapture(target)
			
 
				+                vidcap.set(cv2.CAP_PROP_FPS, fps)
			
 
				+                print(cv2.CAP_PROP_FPS)
			
 
				+                success, image = vidcap.read()
			
 
				+                count = 0
			
 
				+                while success:
			
 
				+                    frame_path = f'{root}casino/{set_}/{cls}/{file_name}_{i}_{count + 1}.jpg'
			
 
				+                    # print(frame_path)
			
 
				+                    cv2.imwrite(frame_path, image)  # save frame as JPEG file
			
 
				+                    success, image = vidcap.read()
			
 
				+                    # print('Read a new frame: ', success)
			
 
				+                    count += 1
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,8 @@ torch
 
				 torchvision
			
 
				 resampy
			
 
				 soundfile
			
 
				+PIL
			
 
				+accimage
			
 
				+numpy
			
 
				+moviepy
			
 
				+cv2
			
--- a/train.py
+++ b/train.py
@@ -0,0 +1,129 @@
 
				+import copy
			
 
				+import time
			
 
				+
			
 
				+import torch
			
 
				+import torch.optim as optim
			
 
				+from torch import nn
			
 
				+
			
 
				+# TODO: get these properly
			
 
				+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
			
 
				+feature_extract = True
			
 
				+model_ft = None  # TODO
			
 
				+dataloaders_dict = None  # TODO
			
 
				+model_name = None  # TODO
			
 
				+
			
 
				+
			
 
				+def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
			
 
				+    since = time.time()
			
 
				+
			
 
				+    val_acc_history = []
			
 
				+
			
 
				+    best_model_wts = copy.deepcopy(model.state_dict())
			
 
				+    best_acc = 0.0
			
 
				+
			
 
				+    # Detect if we have a GPU available
			
 
				+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
			
 
				+
			
 
				+    for epoch in range(num_epochs):
			
 
				+        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
			
 
				+        print('-' * 10)
			
 
				+
			
 
				+        # Each epoch has a training and validation phase
			
 
				+        for phase in ['train', 'val']:
			
 
				+            if phase == 'train':
			
 
				+                model.train()  # Set model to training mode
			
 
				+            else:
			
 
				+                model.eval()  # Set model to evaluate mode
			
 
				+
			
 
				+            running_loss = 0.0
			
 
				+            running_corrects = 0
			
 
				+
			
 
				+            # Iterate over data.
			
 
				+            for inputs, labels in dataloaders[phase]:
			
 
				+                inputs = inputs.to(device)
			
 
				+                labels = labels.to(device)
			
 
				+
			
 
				+                # zero the parameter gradients
			
 
				+                optimizer.zero_grad()
			
 
				+
			
 
				+                # forward
			
 
				+                # track history if only in train
			
 
				+                with torch.set_grad_enabled(phase == 'train'):
			
 
				+                    # Get model outputs and calculate loss
			
 
				+                    # Special case for inception because in training it has an auxiliary output. In train
			
 
				+                    #   mode we calculate the loss by summing the final output and the auxiliary output
			
 
				+                    #   but in testing we only consider the final output.
			
 
				+                    if is_inception and phase == 'train':
			
 
				+                        # https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
			
 
				+                        outputs, aux_outputs = model(inputs)
			
 
				+                        loss1 = criterion(outputs, labels)
			
 
				+                        loss2 = criterion(aux_outputs, labels)
			
 
				+                        loss = loss1 + 0.4 * loss2
			
 
				+                    else:
			
 
				+                        outputs = model(inputs)
			
 
				+                        loss = criterion(outputs, labels)
			
 
				+
			
 
				+                    _, preds = torch.max(outputs, 1)
			
 
				+
			
 
				+                    # backward + optimize only if in training phase
			
 
				+                    if phase == 'train':
			
 
				+                        loss.backward()
			
 
				+                        optimizer.step()
			
 
				+
			
 
				+                # statistics
			
 
				+                running_loss += loss.item() * inputs.size(0)
			
 
				+                running_corrects += torch.sum(preds == labels.data)
			
 
				+
			
 
				+            epoch_loss = running_loss / len(dataloaders[phase].dataset)
			
 
				+            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
			
 
				+
			
 
				+            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
			
 
				+
			
 
				+            # deep copy the model
			
 
				+            if phase == 'val' and epoch_acc > best_acc:
			
 
				+                best_acc = epoch_acc
			
 
				+                best_model_wts = copy.deepcopy(model.state_dict())
			
 
				+            if phase == 'val':
			
 
				+                val_acc_history.append(epoch_acc)
			
 
				+
			
 
				+        print()
			
 
				+
			
 
				+    time_elapsed = time.time() - since
			
 
				+    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
			
 
				+    print('Best val Acc: {:4f}'.format(best_acc))
			
 
				+
			
 
				+    # load best model weights
			
 
				+    model.load_state_dict(best_model_wts)
			
 
				+    return model, val_acc_history
			
 
				+
			
 
				+
			
 
				+# Send the model to GPU
			
 
				+model_ft = model_ft.to(device)
			
 
				+
			
 
				+# Gather the parameters to be optimized/updated in this run. If we are
			
 
				+#  finetuning we will be updating all parameters. However, if we are
			
 
				+#  doing feature extract method, we will only update the parameters
			
 
				+#  that we have just initialized, i.e. the parameters with requires_grad
			
 
				+#  is True.
			
 
				+params_to_update = model_ft.parameters()
			
 
				+print("Params to learn:")
			
 
				+if feature_extract:
			
 
				+    params_to_update = []
			
 
				+    for name, param in model_ft.named_parameters():
			
 
				+        if param.requires_grad is True:
			
 
				+            params_to_update.append(param)
			
 
				+            print("\t", name)
			
 
				+else:
			
 
				+    for name, param in model_ft.named_parameters():
			
 
				+        if param.requires_grad is True:
			
 
				+            print("\t", name)
			
 
				+
			
 
				+# Observe that all parameters are being optimized
			
 
				+optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
			
 
				+
			
 
				+# Setup the loss fxn
			
 
				+criterion = nn.CrossEntropyLoss()
			
 
				+
			
 
				+# Train and evaluate
			
 
				+model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs,
			
 
				+                             is_inception=(model_name == "inception"))