Amir Ziai 4 years ago
parent
commit
089b8572d7
5 changed files with 130 additions and 105 deletions
  1. 4 1
      README.md
  2. 122 102
      dev4.ipynb
  3. 1 1
      params.py
  4. 2 1
      train.py
  5. 1 0
      vggish.py

+ 4 - 1
README.md

@@ -64,7 +64,10 @@ s.visualize_segments('path/to/file.mp4')
 
 See examples in [examples/detector.ipynb](examples/detector.ipynb).
 
-## Heavily used the following resources:
+## References
 - [Video Classification Using 3D ResNet](https://github.com/kenshohara/video-classification-3d-cnn-pytorch)
+- [3D ResNets for Action Recognition (CVPR 2018)](https://github.com/kenshohara/3D-ResNets-PyTorch/)
 - [AudioSet](https://research.google.com/audioset/download.html)
+- [TensorFlow AudioSet](https://github.com/tensorflow/models/tree/master/research/audioset)
 - [CS231N Saliency maps and class viz PyTorch code](http://cs231n.github.io/assignments2019/assignment3/)
+- [Torch VGGish](https://github.com/harritaylor/torchvggish)

+ 122 - 102
dev/dev4.ipynb → dev4.ipynb

@@ -2,9 +2,18 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 95,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2"
@@ -12,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 102,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 106,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,115 +43,126 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
+   "execution_count": 104,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "3d4d0ae6efebf77f6b7f5a4163558c892daae15e174c5b791e9e5db1\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "c7b84e8c420a3be3a333ba41c2618399c2baebb470fc6a23a0a433d2\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "0fd6f6ebe947c54bdcb0340e244f0a27254f0a358be9a038f8fc9825\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "bc7e48700485911fd1dcfa46fb408e93d744c2ec3afd26356af872f9\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "54aaa1ed52b83839a188927adb63f64a6e8cc8f9d5ce37a7be960ed4\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "9af39512bef330a9bbab90f91063597b4d3d798a0a7dec9054354d6e\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "57fac69720ed9689a5805e15ddbb89ee26d9b64fbd2cf2b65c432a52\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "8686806f634f6da4be6bf84cca80c305e8a7f751dd8ca88ee2112398\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "dddc0808ccee29d6d96d7922bdce1e66e319ad29cd72065ceb76086f\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "1613a061bc0ee4cb3e89ff00d7b183241ee4fccf84420cb0cfa06ea4\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "10ece59983d31536981268d1e8bbdf4460e65b24a7567b1027d92a7c\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "db6e5ff043c83f9643e5fa4aecb3c85a03e2be1c8de56993246b6f23\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "33ac0b6c3357fb2cfc22194be83a872435a7b3495506b83f8ee76fcf\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "41fd07df53b8400f9386cf6d6fd5e400290fc19cc96bd1c1c66991d2\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "c2c04441a29f2bcf11fade126cd304849d3d5e9a1c98fab8a4b9a2fc\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': None, 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "4d227fadf14ff66ea2c7f253a96c2111a78234f95089d003a3d3f88f\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "a9d9eb5afabbea4dff607846fe1a0782c760f44cfbd0f982c2f8bbb4\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "b5d41c44c98e2fa432656659c4fbcb77ef2a66d30dbe5148940ec3bb\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "a60fa62389b6418273ca349479228d39e55b7b357ba2e7ec95423d41\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "dcb42ec2f883718f58a1612e4afff99249aa628afa844252c67ed670\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "8c4da8f6f9157db4dbd6f70b67c1db469ea034730c12ebfccf0c5329\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "091aaed6d121608dc449c2f33a7c74bbe835ccc5abadc937263b9f90\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "c6cd69a63c6b7670802afa33af35b13d6c687340f71bf7e299e5711c\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': False, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "af38e4c808ff3998977167f1e5138bdd0301f92a2dc206a6319d9823\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "9715fe9781571c61eb6bf38cbc5173df40e9e41ae3eaec6789cf771b\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "21f4a1394a48e89171341403ff9eccc6e080a9acb66005b9b14a035d\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "454e165857fea306cac78a54c635a15545fab0e7a5f6067e6509aeb3\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.01, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "4566c335e71f215a1110a36a7cfec1882c86f86dbb7e3e5787dfdc26\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "6fcff331f0df5ded20113ae3e7f2d1568e3f3fba9f2a922715326fcf\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': False, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "81bf2d2fb5c8083afc54b43f8c561e6c5c36304a2d002444a02bb0d9\n",
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.9}\n",
-      "Loading experiment results from cache\n",
-      "5f938c364c0de0bc3ab556f1b48971520006d0ed581d53c1d89787d7\n"
+      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 20, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.9}\n"
      ]
     },
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Running param set: {'data_path_base': 'vtest_new2', 'conv_model_name': 'resnet', 'num_epochs': 10, 'feature_extract': True, 'batch_size': 64, 'lr': 0.001, 'use_vggish': True, 'momentum': 0.95}\n",
-      "Loading experiment results from cache\n",
-      "b0ba9005b21c06336b8a58c2ed784dcf5fec6028dd7bca0eefbc3c7a\n"
+      "Params to update\n",
+      "* combined.weight\n",
+      "* combined.bias\n",
+      "Epoch 0/19\n",
+      "----------\n",
+      "train Loss: 0.6817 F1: 0.5897 Acc: 0.5949\n",
+      "val Loss: 0.7901 F1: 0.4043 Acc: 0.3412\n",
+      "\n",
+      "Epoch 1/19\n",
+      "----------\n",
+      "train Loss: 0.6104 F1: 0.7294 Acc: 0.7089\n",
+      "val Loss: 0.6452 F1: 0.8125 Acc: 0.7176\n",
+      "\n",
+      "Epoch 2/19\n",
+      "----------\n",
+      "train Loss: 0.6116 F1: 0.7222 Acc: 0.6203\n",
+      "val Loss: 0.6177 F1: 0.8217 Acc: 0.7294\n",
+      "\n",
+      "Epoch 3/19\n",
+      "----------\n",
+      "train Loss: 0.5695 F1: 0.7692 Acc: 0.6962\n",
+      "val Loss: 0.6339 F1: 0.8095 Acc: 0.7176\n",
+      "\n",
+      "Epoch 4/19\n",
+      "----------\n",
+      "train Loss: 0.5504 F1: 0.7816 Acc: 0.7595\n",
+      "val Loss: 0.6923 F1: 0.5743 Acc: 0.4941\n",
+      "\n",
+      "Epoch 5/19\n",
+      "----------\n",
+      "train Loss: 0.4833 F1: 0.8148 Acc: 0.8101\n",
+      "val Loss: 0.6607 F1: 0.6789 Acc: 0.5882\n",
+      "\n",
+      "Epoch 6/19\n",
+      "----------\n",
+      "train Loss: 0.4592 F1: 0.8293 Acc: 0.8228\n",
+      "val Loss: 0.6051 F1: 0.8130 Acc: 0.7294\n",
+      "\n",
+      "Epoch 7/19\n",
+      "----------\n",
+      "train Loss: 0.4290 F1: 0.8706 Acc: 0.8608\n",
+      "val Loss: 0.5585 F1: 0.8722 Acc: 0.8000\n",
+      "\n",
+      "Epoch 8/19\n",
+      "----------\n",
+      "train Loss: 0.4170 F1: 0.8696 Acc: 0.8481\n",
+      "val Loss: 0.5424 F1: 0.8722 Acc: 0.8000\n",
+      "\n",
+      "Epoch 9/19\n",
+      "----------\n",
+      "train Loss: 0.4004 F1: 0.8696 Acc: 0.8481\n",
+      "val Loss: 0.5398 F1: 0.8788 Acc: 0.8118\n",
+      "\n",
+      "Epoch 10/19\n",
+      "----------\n",
+      "train Loss: 0.3825 F1: 0.9011 Acc: 0.8861\n",
+      "val Loss: 0.5567 F1: 0.8594 Acc: 0.7882\n",
+      "\n",
+      "Epoch 11/19\n",
+      "----------\n",
+      "train Loss: 0.3598 F1: 0.9213 Acc: 0.9114\n",
+      "val Loss: 0.5710 F1: 0.8571 Acc: 0.7882\n",
+      "\n",
+      "Epoch 12/19\n",
+      "----------\n",
+      "train Loss: 0.3281 F1: 0.9412 Acc: 0.9367\n",
+      "val Loss: 0.5708 F1: 0.8480 Acc: 0.7765\n",
+      "\n",
+      "Epoch 13/19\n",
+      "----------\n",
+      "train Loss: 0.3187 F1: 0.9302 Acc: 0.9241\n",
+      "val Loss: 0.5715 F1: 0.8226 Acc: 0.7412\n",
+      "\n",
+      "Epoch 14/19\n",
+      "----------\n",
+      "train Loss: 0.3049 F1: 0.9647 Acc: 0.9620\n",
+      "val Loss: 0.5688 F1: 0.8033 Acc: 0.7176\n",
+      "\n",
+      "Epoch 15/19\n",
+      "----------\n",
+      "train Loss: 0.2929 F1: 0.9535 Acc: 0.9494\n",
+      "val Loss: 0.5558 F1: 0.8320 Acc: 0.7529\n",
+      "\n",
+      "Epoch 16/19\n",
+      "----------\n",
+      "train Loss: 0.2833 F1: 0.9425 Acc: 0.9367\n",
+      "val Loss: 0.5366 F1: 0.8504 Acc: 0.7765\n",
+      "\n",
+      "Epoch 17/19\n",
+      "----------\n",
+      "train Loss: 0.2662 F1: 0.9647 Acc: 0.9620\n",
+      "val Loss: 0.5275 F1: 0.8594 Acc: 0.7882\n",
+      "\n",
+      "Epoch 18/19\n",
+      "----------\n",
+      "train Loss: 0.2576 F1: 0.9535 Acc: 0.9494\n",
+      "val Loss: 0.5512 F1: 0.8293 Acc: 0.7529\n",
+      "\n",
+      "Epoch 19/19\n",
+      "----------\n",
+      "train Loss: 0.2463 F1: 0.9535 Acc: 0.9494\n",
+      "val Loss: 0.5820 F1: 0.7863 Acc: 0.7059\n",
+      "\n",
+      "Training complete in 2m 51s\n",
+      "Best val F1  : 0.878788\n",
+      "Best val Acc : 0.811765\n"
      ]
     }
    ],

+ 1 - 1
params.py

@@ -13,7 +13,7 @@ vggish_frame_rate = 0.96
 experiment_test = {
     'data_path_base': {data_path_base},
     'conv_model_name': {'resnet'},
-    'num_epochs': {10},
+    'num_epochs': {20},
     'feature_extract': {True},
     'batch_size': {64},
     'lr': {0.001},

+ 2 - 1
train.py

@@ -58,7 +58,8 @@ def train_kd(data_path_base: str,
                                                        batch_size=batch_size,
                                                        shuffle=shuffle, num_workers=num_workers)
                         for x in ['train', 'val']}
-    optimizer_ft = optim.SGD(params_to_update, lr=lr, momentum=momentum)
+    # optimizer_ft = optim.SGD(params_to_update, lr=lr, momentum=momentum)
+    optimizer_ft = optim.Adam(params_to_update, lr=lr)
 
     # Setup the loss fxn
     criterion = nn.CrossEntropyLoss()

+ 1 - 0
vggish.py

@@ -1,3 +1,4 @@
+# adapted from https://github.com/harritaylor/torchvggish
 from typing import Tuple
 
 import torch.nn as nn