{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 查看当前挂载的数据集目录\n", "!ls /home/kesci/input/\n", "# 查看个人持久化工作区文件\n", "!ls /home/kesci/work/\n", "# 查看当前kernerl下的package\n", "# !pip list --format=columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 下载提交工具至当前目录,仅需执行一次\n", "#!wget -nv -O kesci_submit https://cdn.kesci.com/submit_tool/v1/kesci_submit&&chmod +x kesci_submit\n", "# 提交文件myresult.json进行评审,注意,提交的文件必须为json格式\n", "# !./kesci_submit -token e0abe3caa44ed5b4 -file result.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 查看BROAD数据集目录\n", "!ls /mnt/BROAD-datasets/video/\n", "# 查看文件个数,以training data举例\n", "!ls /mnt/BROAD-datasets/video/training |wc -l\n", "# 查看meta.json\n", "#!cat /mnt/BROAD-datasets/video/meta.json\n", "\n", "# 可以看到meta.json有1241个视频,而初赛只有124个视频。所以对meta.json提取:\n", "BROAD_training_filepath=\"/mnt/BROAD-datasets/video/training\"\n", "BROAD_testing_filepath=\"/mnt/BROAD-datasets/video/testing\"\n", "BROAD_validation_filepath=\"/mnt/BROAD-datasets/video/validation\"\n", "# !ls /mnt/BROAD-datasets/video/training\n", "\n", "# 以查看get_detection_performance.py进行说明\n", "# !cat /mnt/BROAD-datasets/video/eval_script/get_detection_performance.py" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 可能导致不输出\n", "# import sys\n", "# reload(sys)\n", "# sys.setdefaultencoding(\"utf-8\")\n", "# %matplotlib inline " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import json\n", "f=file(\"/mnt/BROAD-datasets/video/meta.json\");\n", "mjson=json.load(f)\n", "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 预览一下meta数据格式。\n", "step=0\n", "database=[]\n", "for d,x in mjson[\"database\"].items():\n", " step=step+1\n", " database.append(x)\n", " if step>=2:\n", " break\n", "print(json.dumps(database))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os ,cPickle,pprint\n", "import pandas as pd\n", "\n", "def getLabelArr(video_name_no_behfix):\n", " labelArr=[]\n", " for video_name in os.listdir(BROAD_training_filepath):\n", " video_name_no_behfix=video_name[:-4]\n", " has_video=mjson[\"database\"].has_key(video_name_no_behfix)\n", " if(has_video):\n", " # 构造训练集和标签\n", " video_info=database[video_name_no_behfix]\n", " with open(BROAD_training_filepath+\"/\"+video_name,'rb') as f:\n", " img_fea=cPickle.load(f)\n", " len_image=len(img_fea)\n", " print(img_fea.shape)\n", " # pprint.pprint(tmp2)\n", " # print(tmp2[1][1])\n", " # array init\n", " labelArr = np.zeros([len_image])\n", " segment_info=video_info['annotations']\n", "\n", " # add seg flag\n", " for seg_info in segment_info:\n", " begin = float(seg_info[\"segment\"][0])\n", " begin = math.ceil(begin)\n", " begin = int(begin)\n", "\n", " end = float(seg_info[\"segment\"][1])\n", " end = math.floor(end)\n", " end = int(end)\n", " labelArr[begin:end+1] = 1\n", " else:\n", " print(list,\"空\")\n", " return labelArr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import json\n", "import matplotlib.pyplot as plt\n", "with open(\"/mnt/BROAD-datasets/video/meta.json\") as fobj:\n", " mjson = json.load(fobj)\n", "\n", "database = data[\"database\"]\n", "version = data[\"version\"]\n", "\n", "def get_sample_frame_from_video(videoid, duration, start_time, end_time,\n", " video_path=VIDEO_PATH):\n", " filename = glob.glob(os.path.join(video_path, \"v_%s*\" % videoid))[0]\n", " nr_frames = get_video_number_of_frames(filename)\n", " fps = (nr_frames*1.0)/duration\n", " start_frame, end_frame = int(start_time*fps), int(end_time*fps)\n", " frame_idx = random.choice(range(start_frame, end_frame))\n", " cap = cv2.VideoCapture(filename)\n", " keepdoing, cnt = True, 1\n", " while keepdoing:\n", " ret, img = cap.read()\n", " if cnt==frame_idx:\n", " break\n", " assert ret==True, \"Ended video and frame not selected.\"\n", " cnt+=1\n", " return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", "\n", "def get_random_video_from_activity(database, activity, subset=\"validation\"):\n", " videos = []\n", " for x in database:\n", " if database[x][\"subset\"] != subset: continue\n", " xx = random.choice(database[x][\"annotations\"])\n", " if xx[\"label\"]==activity:\n", " yy = {\"videoid\": x, \"duration\": database[x][\"duration\"],\n", " \"start_time\": xx[\"segment\"][0], \"end_time\": xx[\"segment\"][1]}\n", " videos.append(yy)\n", " return random.choice(videos)\n", "\n", "def plotSampleImages():\n", " plt.figure(num=None, figsize=(18, 50), dpi=100)\n", " idx = 1\n", " for ll in leaf_nodes:\n", " activity = ll[\"nodeName\"]\n", " keepdoing = True\n", " while keepdoing:\n", " try:\n", " video = get_random_video_from_activity(database, activity)\n", " img = get_sample_frame_from_video(**video)\n", " keepdoing = False\n", " except:\n", " keepdoing = True\n", " plt.subplot(20,5,idx)\n", " idx+=1\n", " plt.imshow(img), plt.axis(\"off\"), plt.title(\"%s\" % activity)\n", " plt.show()\n", "plotSampleImages()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def plotWonderfulTime():\n", " import pandas as pd\n", " import matplotlib.pyplot as plt\n", " import numpy as np\n", " import math\n", " step=0\n", " wonderfulTimeArr=[]\n", " for d,x in mjson[\"database\"].items():\n", " segment_info=x['annotations']\n", " for seg_info in segment_info:\n", " begin = float(seg_info[\"segment\"][0])/60.0\n", " end = float(seg_info[\"segment\"][1])/60.0\n", " wonderfulTimeArr.append(begin)\n", " wonderfulTimeArr.append(end)\n", "# step=step+1\n", "# if ste/p>=2:\n", "# break\n", "# total_duration = sum(wonderfulTimeArr)\n", "# print(\"总共精彩时间按段:%d\" % total_duration)\n", "\n", " #添加图形属性\n", " plt.xlabel('time')\n", " plt.ylabel('number')\n", " plt.title('wonderfulTime')\n", " a = plt.subplot(1, 1, 1)\n", " plt.ylim=(10, 40000)\n", " plt.hist(wonderfulTimeArr,40,normed=1,histtype='bar',facecolor='yellowgreen',alpha=0.75) \n", " plt.legend()\n", " plt.show()\n", "\n", "plotWonderfulTime()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def extractMP4toJPG():\n", " downloaded = os.listdir(baseDir+'videos') # get list of file\n", " downloaded = [d for d in downloaded if d.endswith('.mp4') or d.endswith('.mkv')] # keep only .mp4 files\n", " print 'number of videos downloded are ', len(downloaded) \n", " ############################\n", " fps = 15; # set fps = 0 if you want to extract at original frame rate\n", " extractframes(sorted(downloaded),fps)\n", " ########################### \n", " saveVidInfo(sorted(downloaded))\n", "\n", "# 调用ffmpeg执行mp4提取每帧\n", "def extractframes(vids,fps): # take all .mp4 videos and extract frames using ffmpeg\n", " for idx,vid in enumerate(vids):\n", " vidfile = baseDir+'videos/'+vid\n", " imgdir = baseDir+'images/'+vid.split('.')[0]+'/'\n", " print idx, vid \n", " if not os.path.isdir(imgdir):\n", " os.mkdir(imgdir)\n", "\n", " imglist = os.listdir(imgdir);\n", " imglist = [i for i in imglist if i.endswith('.jpg')];\n", "\n", " if len(imglist)<10:\n", " if fps>0:\n", " cmd = 'ffmpeg -i {} -qscale:v 5 -r {} {}%05d.jpg'.format(vidfile,fps,imgdir); #-vsync 0\n", " else:\n", " cmd = 'ffmpeg -i {} -qscale:v 5 {}%05d.jpg'.format(vidfile,imgdir); #-vsync 0\n", " # PNG format is very storage heavy so I choose jpg.\n", " # images will be generated in JPG format with quality scale = 5; you can adjust according to you liking \n", " # In appearence it doen't look that deblurred as opposed to default settings by ffmpeg\n", " # @v 5 images will take alomst 145GB\n", " #f.write(cmd+'\\n')\n", " os.system(cmd)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 加载数据分析常用库\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "% matplotlib inline\n", "\n", "import paddle.v2 as paddle" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 方法一:传统Python方法\n", "import cPickle\n", "with open(BROAD_training_filepath+\"/100259500.pkl\",'rb') as f:\n", " tmp1 = cPickle.load(f)\n", "print(tmp1.shape)\n", "\n", "# # 方法二:利用Pandas读取\n", "# import pandas as pd\n", "# tmp2 = pd.read_pickle(BROAD_training_filepath/xxxx.pkl)\n", "# tmp2\n", "\n", "# # 方法三:利用Numpy读取\n", "# import numpy as np\n", "# tmp3 = np.load(BROAD_training_filepath/xxxx.pkl)\n", "# tmp3" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dense_len = 512\n", "rnn_size = 512\n", "num_surround = 50\n", "dim_features = 2048\n", "window_size = 2*num_surround + 1\n", "classnum = 2\n", "\n", "reader = paddle.batch(\n", " mygenerator,\n", " batch_size=5)\n", "\n", "xxx = next(reader())\n", "print(np.array(xxx[0]).shape)\n", "\n", "# 0-2047 for np.float32 feature vector and 2048 for 0/1 label vector with length of 101\n", "print(xxx[0][0],xxx[0][2048])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 定义卷积神经网络,输入img矩阵\n", "def convolutional_neural_network(img):\n", " # 第一个卷积-池化层\n", " conv_pool_1 = paddle.networks.simple_img_conv_pool(\n", " input=img,\n", " filter_size=5,\n", " num_filters=20,\n", " num_channel=1,\n", " pool_size=2,\n", " pool_stride=2,\n", " act=paddle.activation.Relu())\n", " # 第二个卷积-池化层\n", " conv_pool_2 = paddle.networks.simple_img_conv_pool(\n", " input=conv_pool_1,\n", " filter_size=5,\n", " num_filters=50,\n", " num_channel=20,\n", " pool_size=2,\n", " pool_stride=2,\n", " act=paddle.activation.Relu())\n", " # 以softmax为激活函数的全连接输出层,输出层的大小必须为数字的个数10\n", " predict = paddle.layer.fc(input=conv_pool_2,\n", " size=10,\n", " act=paddle.activation.Softmax())\n", " return predict" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "paddle.init(use_gpu=False, trainer_count=4)\n", "\n", "x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))\n", "y_predict = paddle.layer.fc(input=x,\n", " size=1,\n", " act=paddle.activation.Linear())\n", "y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))\n", "predict=\n", "cost = paddle.layer.mse_cost(input=y_predict, label=y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "paddle.init(use_gpu=False, trainer_count=1)\n", "\n", "default_std = 1 / math.sqrt(rnn_size) / 3.0\n", "std_default = paddle.attr.Param(initial_std=default_std)\n", "std_0 = paddle.attr.Param(initial_std=0.)\n", "mix_hidden_lr = 1e-3\n", "lstm_para_attr = paddle.attr.Param(\n", " initial_std = 0.0, \n", " learning_rate = 1.0\n", ")\n", "hidden_para_attr = paddle.attr.Param(\n", " initial_std = default_std, \n", " learning_rate = mix_hidden_lr\n", ")\n", "\n", "l_x0 = [paddle.layer.data(name='rnn_layer_%d' % (i), type=paddle.data_type.dense_vector(window_size)) \n", " for i in range(dim_features)\n", "]\n", "target = paddle.layer.data(name='label', type=paddle.data_type.integer_value_sequence(classnum))\n", "l_x1 = [\n", " paddle.layer.embedding(\n", " size=dense_len, input=x0\n", " ) for x0 in l_x0\n", "]\n", "\n", "hidden_0 = paddle.layer.mixed(\n", " size=rnn_size,\n", " bias_attr=std_default,\n", " input=[\n", " paddle.layer.full_matrix_projection(\n", " input=x1, param_attr=std_default\n", " ) for x1 in l_x1\n", " ]\n", ")\n", "\n", "lstm_0 = paddle.layer.lstmemory(\n", " input=hidden_0,\n", " act=paddle.activation.Relu(),\n", " gate_act=paddle.activation.Sigmoid(),\n", " state_act=paddle.activation.Sigmoid(),\n", " bias_attr=std_0,\n", " param_attr=lstm_para_attr\n", ")\n", "\n", "feature_out = paddle.layer.mixed(\n", " size=classnum,\n", " bias_attr=std_default,\n", " input=[\n", " paddle.layer.full_matrix_projection(\n", " input=hidden_0, param_attr=hidden_para_attr),\n", " paddle.layer.full_matrix_projection(\n", " input=lstm_0, param_attr=lstm_para_attr)\n", " ]\n", ")\n", "\n", "crf_cost = paddle.layer.crf(\n", " size=classnum,\n", " input=feature_out,\n", " label=target,\n", " param_attr=paddle.attr.Param(\n", " name='crfw',\n", " initial_std=default_std,\n", " learning_rate=mix_hidden_lr\n", " )\n", ")\n", "\n", "crf_dec = paddle.layer.crf_decoding(\n", " size=classnum,\n", " input=feature_out,\n", " label=target,\n", " param_attr=paddle.attr.Param(name='crfw'))\n", "\n", "\n", "evaluator.sum(input=crf_dec)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "parameters = paddle.parameters.create(crf_cost)\n", "optimizer = paddle.optimizer.Momentum(\n", " momentum=0,\n", " learning_rate=1e-3,\n", " regularization=paddle.optimizer.L2Regularization(rate=8e-4),\n", " model_average=paddle.optimizer.ModelAverage(\n", " average_window=0.5, max_average_window=10000), )\n", "\n", "trainer = paddle.trainer.SGD(cost=crf_cost,\n", " parameters=parameters,\n", " update_equation=optimizer,\n", " extra_layers=crf_dec)\n", "\n", "l_name = ['rnn_layer_%d' % (i) for i in range(dim_features)] + ['label']\n", "l_idx = range(dim_features + 1)\n", "feeding = dict(zip(l_name, l_idx))\n", "\n", "trainer.train(\n", " reader=reader,\n", " num_passes=1,\n", " feeding=feeding\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "trainer.train(\n", " reader=paddle.batch(\n", " paddle.reader.shuffle(\n", " uci_housing.train(), buf_size=500),\n", " batch_size=2),\n", " feeding=feeding,\n", " event_handler=event_handler,\n", " num_passes=30)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# result.json\n", "data={\n", " \"version\": \"VERSION 1.0\",\n", " \"results\":\n", " {\n", " \"164161500\": [\n", " {\n", " \"score\": 1.0,\n", " \"segment\": [94, 130]\n", " },\n", " {\n", " \"score\": 0.6, \n", " \"segment\": [150, 196] \n", " }\n", " ]\n", " }\n", "}\n", "with open('result.json', 'w') as json_file:\n", " json_file.write(json.dumps(data))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('result.json') as json_file:\n", " data1 = json.load(json_file)\n", "print(data1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "run -i '/mnt/BROAD-datasets/video/eval_script/get_detection_performance.py' /mnt/BROAD-datasets/meta.json path_of_result_file --subset validation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!python2 -m pip install pillow\n", "from PIL import Image\n", "import numpy as np\n", "import os" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def load_image(file):\n", " im=Image.open(file).convert(\"L\")\n", " im.resize((28,28),Image.ANTIALIAS)\n", " im=np.array(im).astype(np.float32).flatten()\n", " im.im/255.0\n", " return im" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "test_data=[]\n", "cur_dir=os.path.dirname(os.path.realpath(__file__))\n", "test_data.append(load_image(cur_dir+\"/image/test.png\"))\n", "\n", "prob=panddle.infer(\n", " output_layer=predict,parameters=)\n", "lab=np.argsort(-prob)\n", "print(\"label of images/test.png is %d\" % lab[0][0])" ] } ], "metadata": { "kernelspec": { "display_name": "py36", "language": "python", "name": "py36" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }