Browse Source

add notebooks

Amir Ziai 5 years ago
parent
commit
b55822f0f3
7 changed files with 771 additions and 1 deletions
  1. 139 0
      data.py
  2. 235 0
      dev.ipynb
  3. 235 0
      dev2.ipynb
  4. 0 1
      kissing_detector.py
  5. 28 0
      pipeline.py
  6. 5 0
      requirements.txt
  7. 129 0
      train.py

+ 139 - 0
data.py

@@ -0,0 +1,139 @@
+import copy
+import functools
+import os
+
+import torch
+import torch.utils.data as data
+from PIL import Image
+# import accimage
+import json
+
+
+def pil_loader(path):
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        with Image.open(f) as img:
+            return img.convert('RGB')
+
+
+def accimage_loader(path):
+    # try:
+    #     return accimage.Image(path)
+    # except IOError:
+    #     # Potentially a decoding problem, fall back to PIL.Image
+    #     return pil_loader(path)
+    return pil_loader(path)
+
+
+def get_default_image_loader():
+    from torchvision import get_image_backend
+    if get_image_backend() == 'accimage':
+        return accimage_loader
+    else:
+        return pil_loader
+
+
+def video_loader(video_dir_path, frame_indices, image_loader):
+    video = []
+    for i in frame_indices:
+        image_path = os.path.join(video_dir_path, 'image_{:05d}.jpg'.format(i))
+        if os.path.exists(image_path):
+            video.append(image_loader(image_path))
+        else:
+            return video
+
+    return video
+
+
+def get_default_video_loader():
+    image_loader = get_default_image_loader()
+    return functools.partial(video_loader, image_loader=image_loader)
+
+
+def load_annotation_data(data_file_path):
+    with open(data_file_path, 'r') as data_file:
+        return json.load(data_file)
+
+
+def get_class_labels(data):
+    class_labels_map = {}
+    index = 0
+    for class_label in data['labels']:
+        class_labels_map[class_label] = index
+        index += 1
+    return class_labels_map
+
+
+def get_video_names_and_annotations(data, subset):
+    video_names = []
+    annotations = []
+
+    for key, value in data['database'].items():
+        this_subset = value['subset']
+        if this_subset == subset:
+            if subset == 'testing':
+                video_names.append('test/{}'.format(key))
+            else:
+                label = value['annotations']['label']
+                video_names.append('{}/{}'.format(label, key))
+                annotations.append(value['annotations'])
+
+    return video_names, annotations
+
+
+def make_dataset(video_path, sample_duration):
+    dataset = []
+
+    n_frames = len(os.listdir(video_path))
+
+    begin_t = 1
+    end_t = n_frames
+    sample = {
+        'video': video_path,
+        'segment': [begin_t, end_t],
+        'n_frames': n_frames,
+    }
+
+    step = sample_duration
+    for i in range(1, (n_frames - sample_duration + 1), step):
+        sample_i = copy.deepcopy(sample)
+        sample_i['frame_indices'] = list(range(i, i + sample_duration))
+        sample_i['segment'] = torch.IntTensor([i, i + sample_duration - 1])
+        dataset.append(sample_i)
+
+    return dataset
+
+
+class Video(data.Dataset):
+    def __init__(self, video_path,
+                 spatial_transform=None, temporal_transform=None,
+                 sample_duration=16, get_loader=get_default_video_loader):
+        self.data = make_dataset(video_path, sample_duration)
+
+        self.spatial_transform = spatial_transform
+        self.temporal_transform = temporal_transform
+        self.loader = get_loader()
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is class_index of the target class.
+        """
+        path = self.data[index]['video']
+
+        frame_indices = self.data[index]['frame_indices']
+        if self.temporal_transform is not None:
+            frame_indices = self.temporal_transform(frame_indices)
+        clip = self.loader(path, frame_indices)
+        if self.spatial_transform is not None:
+            clip = [self.spatial_transform(img) for img in clip]
+        clip = torch.stack(clip, 0).permute(1, 0, 2, 3)
+
+        target = self.data[index]['segment']
+
+        return clip, target
+
+    def __len__(self):
+        return len(self.data)

File diff suppressed because it is too large
+ 235 - 0
dev.ipynb


File diff suppressed because it is too large
+ 235 - 0
dev2.ipynb


+ 0 - 1
kissing_detector.py

@@ -1,6 +1,5 @@
 import torch
 from torch import nn
-
 import vggish
 from conv import convnet_init
 

+ 28 - 0
pipeline.py

@@ -0,0 +1,28 @@
+import cv2
+from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
+import numpy as np
+
+
+def slice_clips(segments, root, fps=2):
+    for path, classes in segments.items():
+
+        for cls, ts in classes.items():
+            for i, (t1, t2) in enumerate(ts):
+                set_ = np.random.choice(['train', 'val'], p=[2 / 3, 1 / 3])
+                # get all the still frames
+                file_name, ext = path.split('.')
+                target = f"{root}{file_name}_{cls}_{i + 1}.{ext}"
+                print(f'target: {target}')
+                ffmpeg_extract_subclip(f'{root}{path}', t1, t2, targetname=target)
+                vidcap = cv2.VideoCapture(target)
+                vidcap.set(cv2.CAP_PROP_FPS, fps)
+                print(cv2.CAP_PROP_FPS)
+                success, image = vidcap.read()
+                count = 0
+                while success:
+                    frame_path = f'{root}casino/{set_}/{cls}/{file_name}_{i}_{count + 1}.jpg'
+                    # print(frame_path)
+                    cv2.imwrite(frame_path, image)  # save frame as JPEG file
+                    success, image = vidcap.read()
+                    # print('Read a new frame: ', success)
+                    count += 1

+ 5 - 0
requirements.txt

@@ -2,3 +2,8 @@ torch
 torchvision
 resampy
 soundfile
+PIL
+accimage
+numpy
+moviepy
+cv2

+ 129 - 0
train.py

@@ -0,0 +1,129 @@
+import copy
+import time
+
+import torch
+import torch.optim as optim
+from torch import nn
+
+# TODO: get these properly
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+feature_extract = True
+model_ft = None  # TODO
+dataloaders_dict = None  # TODO
+model_name = None  # TODO
+
+
+def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
+    since = time.time()
+
+    val_acc_history = []
+
+    best_model_wts = copy.deepcopy(model.state_dict())
+    best_acc = 0.0
+
+    # Detect if we have a GPU available
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+    for epoch in range(num_epochs):
+        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
+        print('-' * 10)
+
+        # Each epoch has a training and validation phase
+        for phase in ['train', 'val']:
+            if phase == 'train':
+                model.train()  # Set model to training mode
+            else:
+                model.eval()  # Set model to evaluate mode
+
+            running_loss = 0.0
+            running_corrects = 0
+
+            # Iterate over data.
+            for inputs, labels in dataloaders[phase]:
+                inputs = inputs.to(device)
+                labels = labels.to(device)
+
+                # zero the parameter gradients
+                optimizer.zero_grad()
+
+                # forward
+                # track history if only in train
+                with torch.set_grad_enabled(phase == 'train'):
+                    # Get model outputs and calculate loss
+                    # Special case for inception because in training it has an auxiliary output. In train
+                    #   mode we calculate the loss by summing the final output and the auxiliary output
+                    #   but in testing we only consider the final output.
+                    if is_inception and phase == 'train':
+                        # https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
+                        outputs, aux_outputs = model(inputs)
+                        loss1 = criterion(outputs, labels)
+                        loss2 = criterion(aux_outputs, labels)
+                        loss = loss1 + 0.4 * loss2
+                    else:
+                        outputs = model(inputs)
+                        loss = criterion(outputs, labels)
+
+                    _, preds = torch.max(outputs, 1)
+
+                    # backward + optimize only if in training phase
+                    if phase == 'train':
+                        loss.backward()
+                        optimizer.step()
+
+                # statistics
+                running_loss += loss.item() * inputs.size(0)
+                running_corrects += torch.sum(preds == labels.data)
+
+            epoch_loss = running_loss / len(dataloaders[phase].dataset)
+            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
+
+            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
+
+            # deep copy the model
+            if phase == 'val' and epoch_acc > best_acc:
+                best_acc = epoch_acc
+                best_model_wts = copy.deepcopy(model.state_dict())
+            if phase == 'val':
+                val_acc_history.append(epoch_acc)
+
+        print()
+
+    time_elapsed = time.time() - since
+    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
+    print('Best val Acc: {:4f}'.format(best_acc))
+
+    # load best model weights
+    model.load_state_dict(best_model_wts)
+    return model, val_acc_history
+
+
+# Send the model to GPU
+model_ft = model_ft.to(device)
+
+# Gather the parameters to be optimized/updated in this run. If we are
+#  finetuning we will be updating all parameters. However, if we are
+#  doing feature extract method, we will only update the parameters
+#  that we have just initialized, i.e. the parameters with requires_grad
+#  is True.
+params_to_update = model_ft.parameters()
+print("Params to learn:")
+if feature_extract:
+    params_to_update = []
+    for name, param in model_ft.named_parameters():
+        if param.requires_grad is True:
+            params_to_update.append(param)
+            print("\t", name)
+else:
+    for name, param in model_ft.named_parameters():
+        if param.requires_grad is True:
+            print("\t", name)
+
+# Observe that all parameters are being optimized
+optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
+
+# Setup the loss fxn
+criterion = nn.CrossEntropyLoss()
+
+# Train and evaluate
+model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs,
+                             is_inception=(model_name == "inception"))

Some files were not shown because too many files changed in this diff