7 years ago · 998498dce6
--- a/.gitignore
+++ b/.gitignore
@@ -1,78 +1 @@
 
				-# ---> Python
			
 
				-# Byte-compiled / optimized / DLL files
			
 
				-__pycache__/
			
 
				-*.py[cod]
			
 
				-*$py.class
			
 
				-
			
 
				-# C extensions
			
 
				-*.so
			
 
				-
			
 
				-# Distribution / packaging
			
 
				-.Python
			
 
				-env/
			
 
				-build/
			
 
				-develop-eggs/
			
 
				-dist/
			
 
				-downloads/
			
 
				-eggs/
			
 
				-.eggs/
			
 
				-lib/
			
 
				-lib64/
			
 
				-parts/
			
 
				-sdist/
			
 
				-var/
			
 
				-*.egg-info/
			
 
				-.installed.cfg
			
 
				-*.egg
			
 
				-
			
 
				-# PyInstaller
			
 
				-#  Usually these files are written by a python script from a template
			
 
				-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				-*.manifest
			
 
				-*.spec
			
 
				-
			
 
				-# Installer logs
			
 
				-pip-log.txt
			
 
				-pip-delete-this-directory.txt
			
 
				-
			
 
				-# Unit test / coverage reports
			
 
				-htmlcov/
			
 
				-.tox/
			
 
				-.coverage
			
 
				-.coverage.*
			
 
				-.cache
			
 
				-nosetests.xml
			
 
				-coverage.xml
			
 
				-*,cover
			
 
				-
			
 
				-# Translations
			
 
				-*.mo
			
 
				-*.pot
			
 
				-
			
 
				-# Django stuff:
			
 
				-*.log
			
 
				-
			
 
				-# Sphinx documentation
			
 
				-docs/_build/
			
 
				-
			
 
				-# PyBuilder
			
 
				-target/
			
 
				-
			
 
				-# ---> R
			
 
				-# History files
			
 
				-.Rhistory
			
 
				-.Rapp.history
			
 
				-
			
 
				-# Example code in package build process
			
 
				-*-Ex.R
			
 
				-
			
 
				-# RStudio files
			
 
				-.Rproj.user/
			
 
				-
			
 
				-# produced vignettes
			
 
				-vignettes/*.html
			
 
				-vignettes/*.pdf
			
 
				-
			
 
				-# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
			
 
				-.httr-oauth
			
 
				-
			
 
				+.ipynb_checkpoints
			
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,38 @@
 
				 # PredictWonderfulTV
			
 
				 
			
 
				-twtech-Paddle-综艺节目精彩片段预测
			
 
				+twtech-Paddle-综艺节目精彩片段预测
			
 
				+
			
 
				+## 大赛数据说明
			
 
				+
			
 
				+百度提供了1470个来自爱奇艺的电视综艺视频的数据，总共约1200个小时。
			
 
				+
			
 
				+视频每秒都抽取了一帧图像提取图片特征，因此视频已都被转换为了视频帧的图片特征序列，但也都提供了观看链接。
			
 
				+
			
 
				+其中每个视频都被仔细地标注了“精彩片段”的时间戳，单位为秒，总共标记出了18000段、约750小时的“精彩片段”。
			
 
				+
			
 
				+BROAD数据集的格式为pkl\(pickle\), 读取的方式总结如下：
			
 
				+
			
 
				+```
			
 
				+数据集的路径请移步参赛指南中查看。
			
 
				+# 方法一：传统Python方法
			
 
				+import cPickle
			
 
				+with open(BROAD_filepath/xxxx.pkl,'rb') as f:
			
 
				+tmp1 = cPickle.load(f)
			
 
				+print tmp1
			
 
				+
			
 
				+# 方法二：利用Pandas读取
			
 
				+import pandas as pd
			
 
				+tmp2 = pd.read_pickle(BROAD_filepath/xxxx.pkl)
			
 
				+tmp2
			
 
				+
			
 
				+# 方法三：利用Numpy读取
			
 
				+import numpy as np
			
 
				+tmp3 = np.load(BROAD_filepath/xxxx.pkl)
			
 
				+tmp3
			
 
				+```
			
 
				+
			
 
				+## 使用Paddle
			
 
				+
			
 
				+
			
 
				+## 使用Tensorflow
			
 
				+
			
--- a/twtech-Paddle-综艺节目精彩片段预测.ipynb
+++ b/twtech-Paddle-综艺节目精彩片段预测.ipynb
@@ -0,0 +1,677 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 查看当前挂载的数据集目录\n",
			
 
				+    "!ls /home/kesci/input/\n",
			
 
				+    "# 查看个人持久化工作区文件\n",
			
 
				+    "!ls /home/kesci/work/\n",
			
 
				+    "# 查看当前kernerl下的package\n",
			
 
				+    "# !pip list --format=columns"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 下载提交工具至当前目录，仅需执行一次\n",
			
 
				+    "#!wget -nv -O kesci_submit https://cdn.kesci.com/submit_tool/v1/kesci_submit&&chmod +x kesci_submit\n",
			
 
				+    "# 提交文件myresult.json进行评审,注意，提交的文件必须为json格式\n",
			
 
				+    "# !./kesci_submit -token e0abe3caa44ed5b4 -file result.json"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 查看BROAD数据集目录\n",
			
 
				+    "!ls /mnt/BROAD-datasets/video/\n",
			
 
				+    "# 查看文件个数,以training data举例\n",
			
 
				+    "!ls /mnt/BROAD-datasets/video/training |wc -l\n",
			
 
				+    "# 查看meta.json\n",
			
 
				+    "#!cat /mnt/BROAD-datasets/video/meta.json\n",
			
 
				+    "\n",
			
 
				+    "# 可以看到meta.json有1241个视频，而初赛只有124个视频。所以对meta.json提取：\n",
			
 
				+    "BROAD_training_filepath=\"/mnt/BROAD-datasets/video/training\"\n",
			
 
				+    "BROAD_testing_filepath=\"/mnt/BROAD-datasets/video/testing\"\n",
			
 
				+    "BROAD_validation_filepath=\"/mnt/BROAD-datasets/video/validation\"\n",
			
 
				+    "# !ls /mnt/BROAD-datasets/video/training\n",
			
 
				+    "\n",
			
 
				+    "# 以查看get_detection_performance.py进行说明\n",
			
 
				+    "# !cat /mnt/BROAD-datasets/video/eval_script/get_detection_performance.py"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 可能导致不输出\n",
			
 
				+    "# import sys\n",
			
 
				+    "# reload(sys)\n",
			
 
				+    "# sys.setdefaultencoding(\"utf-8\")\n",
			
 
				+    "# %matplotlib inline "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import json\n",
			
 
				+    "f=file(\"/mnt/BROAD-datasets/video/meta.json\");\n",
			
 
				+    "mjson=json.load(f)\n",
			
 
				+    "f.close()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 预览一下meta数据格式。\n",
			
 
				+    "step=0\n",
			
 
				+    "database=[]\n",
			
 
				+    "for d,x in mjson[\"database\"].items():\n",
			
 
				+    "    step=step+1\n",
			
 
				+    "    database.append(x)\n",
			
 
				+    "    if step>=2:\n",
			
 
				+    "        break\n",
			
 
				+    "print(json.dumps(database))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import os ,cPickle,pprint\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "\n",
			
 
				+    "def getLabelArr(video_name_no_behfix):\n",
			
 
				+    "    labelArr=[]\n",
			
 
				+    "    for video_name in os.listdir(BROAD_training_filepath):\n",
			
 
				+    "        video_name_no_behfix=video_name[:-4]\n",
			
 
				+    "        has_video=mjson[\"database\"].has_key(video_name_no_behfix)\n",
			
 
				+    "        if(has_video):\n",
			
 
				+    "    #         构造训练集和标签\n",
			
 
				+    "            video_info=database[video_name_no_behfix]\n",
			
 
				+    "            with open(BROAD_training_filepath+\"/\"+video_name,'rb') as f:\n",
			
 
				+    "                img_fea=cPickle.load(f)\n",
			
 
				+    "                len_image=len(img_fea)\n",
			
 
				+    "                print(img_fea.shape)\n",
			
 
				+    "    #             pprint.pprint(tmp2)\n",
			
 
				+    "    #             print(tmp2[1][1])\n",
			
 
				+    "               # array init\n",
			
 
				+    "                labelArr = np.zeros([len_image])\n",
			
 
				+    "                segment_info=video_info['annotations']\n",
			
 
				+    "\n",
			
 
				+    "                # add seg flag\n",
			
 
				+    "                for seg_info in segment_info:\n",
			
 
				+    "                    begin = float(seg_info[\"segment\"][0])\n",
			
 
				+    "                    begin = math.ceil(begin)\n",
			
 
				+    "                    begin = int(begin)\n",
			
 
				+    "\n",
			
 
				+    "                    end = float(seg_info[\"segment\"][1])\n",
			
 
				+    "                    end = math.floor(end)\n",
			
 
				+    "                    end = int(end)\n",
			
 
				+    "                    labelArr[begin:end+1] = 1\n",
			
 
				+    "        else:\n",
			
 
				+    "            print(list,\"空\")\n",
			
 
				+    "    return labelArr"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": false
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import json\n",
			
 
				+    "import matplotlib.pyplot as plt\n",
			
 
				+    "with open(\"/mnt/BROAD-datasets/video/meta.json\") as fobj:\n",
			
 
				+    "    mjson = json.load(fobj)\n",
			
 
				+    "\n",
			
 
				+    "database = data[\"database\"]\n",
			
 
				+    "version = data[\"version\"]\n",
			
 
				+    "\n",
			
 
				+    "def get_sample_frame_from_video(videoid, duration, start_time, end_time,\n",
			
 
				+    "                                video_path=VIDEO_PATH):\n",
			
 
				+    "    filename = glob.glob(os.path.join(video_path, \"v_%s*\" % videoid))[0]\n",
			
 
				+    "    nr_frames = get_video_number_of_frames(filename)\n",
			
 
				+    "    fps = (nr_frames*1.0)/duration\n",
			
 
				+    "    start_frame, end_frame = int(start_time*fps), int(end_time*fps)\n",
			
 
				+    "    frame_idx = random.choice(range(start_frame, end_frame))\n",
			
 
				+    "    cap = cv2.VideoCapture(filename)\n",
			
 
				+    "    keepdoing, cnt = True, 1\n",
			
 
				+    "    while keepdoing:\n",
			
 
				+    "        ret, img = cap.read()\n",
			
 
				+    "        if cnt==frame_idx:\n",
			
 
				+    "            break\n",
			
 
				+    "        assert ret==True, \"Ended video and frame not selected.\"\n",
			
 
				+    "        cnt+=1\n",
			
 
				+    "    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
			
 
				+    "\n",
			
 
				+    "def get_random_video_from_activity(database, activity, subset=\"validation\"):\n",
			
 
				+    "    videos = []\n",
			
 
				+    "    for x in database:\n",
			
 
				+    "        if database[x][\"subset\"] != subset: continue\n",
			
 
				+    "        xx = random.choice(database[x][\"annotations\"])\n",
			
 
				+    "        if xx[\"label\"]==activity:\n",
			
 
				+    "            yy = {\"videoid\": x, \"duration\": database[x][\"duration\"],\n",
			
 
				+    "                  \"start_time\": xx[\"segment\"][0], \"end_time\": xx[\"segment\"][1]}\n",
			
 
				+    "            videos.append(yy)\n",
			
 
				+    "    return random.choice(videos)\n",
			
 
				+    "\n",
			
 
				+    "def plotSampleImages():\n",
			
 
				+    "    plt.figure(num=None, figsize=(18, 50), dpi=100)\n",
			
 
				+    "    idx = 1\n",
			
 
				+    "    for ll in leaf_nodes:\n",
			
 
				+    "        activity = ll[\"nodeName\"]\n",
			
 
				+    "        keepdoing = True\n",
			
 
				+    "        while keepdoing:\n",
			
 
				+    "            try:\n",
			
 
				+    "                video = get_random_video_from_activity(database, activity)\n",
			
 
				+    "                img = get_sample_frame_from_video(**video)\n",
			
 
				+    "                keepdoing = False\n",
			
 
				+    "            except:\n",
			
 
				+    "                keepdoing = True\n",
			
 
				+    "        plt.subplot(20,5,idx)\n",
			
 
				+    "        idx+=1\n",
			
 
				+    "        plt.imshow(img), plt.axis(\"off\"), plt.title(\"%s\" % activity)\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "plotSampleImages()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def plotWonderfulTime():\n",
			
 
				+    "    import pandas as pd\n",
			
 
				+    "    import matplotlib.pyplot as plt\n",
			
 
				+    "    import numpy as np\n",
			
 
				+    "    import math\n",
			
 
				+    "    step=0\n",
			
 
				+    "    wonderfulTimeArr=[]\n",
			
 
				+    "    for d,x in mjson[\"database\"].items():\n",
			
 
				+    "        segment_info=x['annotations']\n",
			
 
				+    "        for seg_info in segment_info:\n",
			
 
				+    "            begin = float(seg_info[\"segment\"][0])/60.0\n",
			
 
				+    "            end = float(seg_info[\"segment\"][1])/60.0\n",
			
 
				+    "            wonderfulTimeArr.append(begin)\n",
			
 
				+    "            wonderfulTimeArr.append(end)\n",
			
 
				+    "#         step=step+1\n",
			
 
				+    "#         if ste/p>=2:\n",
			
 
				+    "#             break\n",
			
 
				+    "#     total_duration = sum(wonderfulTimeArr)\n",
			
 
				+    "#     print(\"总共精彩时间按段：%d\" % total_duration)\n",
			
 
				+    "\n",
			
 
				+    "    #添加图形属性\n",
			
 
				+    "    plt.xlabel('time')\n",
			
 
				+    "    plt.ylabel('number')\n",
			
 
				+    "    plt.title('wonderfulTime')\n",
			
 
				+    "    a = plt.subplot(1, 1, 1)\n",
			
 
				+    "    plt.ylim=(10, 40000)\n",
			
 
				+    "    plt.hist(wonderfulTimeArr,40,normed=1,histtype='bar',facecolor='yellowgreen',alpha=0.75)      \n",
			
 
				+    "    plt.legend()\n",
			
 
				+    "    plt.show()\n",
			
 
				+    "\n",
			
 
				+    "plotWonderfulTime()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def extractMP4toJPG():\n",
			
 
				+    "    downloaded = os.listdir(baseDir+'videos') # get list of file\n",
			
 
				+    "    downloaded = [d for d in downloaded if d.endswith('.mp4') or d.endswith('.mkv')]  # keep only .mp4 files\n",
			
 
				+    "    print 'number of videos downloded are ', len(downloaded) \n",
			
 
				+    "    ############################\n",
			
 
				+    "    fps = 15; # set fps = 0 if you want to extract at original frame rate\n",
			
 
				+    "    extractframes(sorted(downloaded),fps)\n",
			
 
				+    "    ###########################    \n",
			
 
				+    "    saveVidInfo(sorted(downloaded))\n",
			
 
				+    "\n",
			
 
				+    "# 调用ffmpeg执行mp4提取每帧\n",
			
 
				+    "def extractframes(vids,fps): # take all .mp4 videos and extract frames using ffmpeg\n",
			
 
				+    "    for idx,vid in enumerate(vids):\n",
			
 
				+    "        vidfile = baseDir+'videos/'+vid\n",
			
 
				+    "        imgdir = baseDir+'images/'+vid.split('.')[0]+'/'\n",
			
 
				+    "        print idx, vid \n",
			
 
				+    "        if not os.path.isdir(imgdir):\n",
			
 
				+    "            os.mkdir(imgdir)\n",
			
 
				+    "\n",
			
 
				+    "        imglist = os.listdir(imgdir);\n",
			
 
				+    "        imglist = [i for i in imglist if i.endswith('.jpg')];\n",
			
 
				+    "\n",
			
 
				+    "        if len(imglist)<10:\n",
			
 
				+    "            if fps>0:\n",
			
 
				+    "                cmd = 'ffmpeg -i {} -qscale:v 5 -r {} {}%05d.jpg'.format(vidfile,fps,imgdir); #-vsync 0\n",
			
 
				+    "            else:\n",
			
 
				+    "                cmd = 'ffmpeg -i {} -qscale:v 5 {}%05d.jpg'.format(vidfile,imgdir); #-vsync 0\n",
			
 
				+    "            # PNG format is very storage heavy so I choose jpg.\n",
			
 
				+    "            # images will be generated in JPG format with quality scale = 5; you can adjust according to you liking \n",
			
 
				+    "            # In appearence it doen't look that deblurred as opposed to default settings by ffmpeg\n",
			
 
				+    "            # @v 5 images will take alomst 145GB\n",
			
 
				+    "            #f.write(cmd+'\\n')\n",
			
 
				+    "            os.system(cmd)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 加载数据分析常用库\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import matplotlib.pyplot as plt\n",
			
 
				+    "% matplotlib inline\n",
			
 
				+    "\n",
			
 
				+    "import paddle.v2 as paddle"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 方法一：传统Python方法\n",
			
 
				+    "import cPickle\n",
			
 
				+    "with open(BROAD_training_filepath+\"/100259500.pkl\",'rb') as f:\n",
			
 
				+    "    tmp1 = cPickle.load(f)\n",
			
 
				+    "print(tmp1.shape)\n",
			
 
				+    "\n",
			
 
				+    "# # 方法二：利用Pandas读取\n",
			
 
				+    "# import pandas as pd\n",
			
 
				+    "# tmp2 = pd.read_pickle(BROAD_training_filepath/xxxx.pkl)\n",
			
 
				+    "# tmp2\n",
			
 
				+    "\n",
			
 
				+    "# # 方法三：利用Numpy读取\n",
			
 
				+    "# import numpy as np\n",
			
 
				+    "# tmp3 = np.load(BROAD_training_filepath/xxxx.pkl)\n",
			
 
				+    "# tmp3"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "dense_len = 512\n",
			
 
				+    "rnn_size = 512\n",
			
 
				+    "num_surround = 50\n",
			
 
				+    "dim_features = 2048\n",
			
 
				+    "window_size = 2*num_surround + 1\n",
			
 
				+    "classnum = 2\n",
			
 
				+    "\n",
			
 
				+    "reader = paddle.batch(\n",
			
 
				+    "        mygenerator,\n",
			
 
				+    "        batch_size=5)\n",
			
 
				+    "\n",
			
 
				+    "xxx = next(reader())\n",
			
 
				+    "print(np.array(xxx[0]).shape)\n",
			
 
				+    "\n",
			
 
				+    "# 0-2047 for np.float32 feature vector and 2048 for 0/1 label vector with length of 101\n",
			
 
				+    "print(xxx[0][0],xxx[0][2048])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 定义卷积神经网络，输入img矩阵\n",
			
 
				+    "def convolutional_neural_network(img):\n",
			
 
				+    "    # 第一个卷积-池化层\n",
			
 
				+    "    conv_pool_1 = paddle.networks.simple_img_conv_pool(\n",
			
 
				+    "        input=img,\n",
			
 
				+    "        filter_size=5,\n",
			
 
				+    "        num_filters=20,\n",
			
 
				+    "        num_channel=1,\n",
			
 
				+    "        pool_size=2,\n",
			
 
				+    "        pool_stride=2,\n",
			
 
				+    "        act=paddle.activation.Relu())\n",
			
 
				+    "    # 第二个卷积-池化层\n",
			
 
				+    "    conv_pool_2 = paddle.networks.simple_img_conv_pool(\n",
			
 
				+    "        input=conv_pool_1,\n",
			
 
				+    "        filter_size=5,\n",
			
 
				+    "        num_filters=50,\n",
			
 
				+    "        num_channel=20,\n",
			
 
				+    "        pool_size=2,\n",
			
 
				+    "        pool_stride=2,\n",
			
 
				+    "        act=paddle.activation.Relu())\n",
			
 
				+    "    # 以softmax为激活函数的全连接输出层，输出层的大小必须为数字的个数10\n",
			
 
				+    "    predict = paddle.layer.fc(input=conv_pool_2,\n",
			
 
				+    "                              size=10,\n",
			
 
				+    "                              act=paddle.activation.Softmax())\n",
			
 
				+    "    return predict"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "paddle.init(use_gpu=False, trainer_count=4)\n",
			
 
				+    "\n",
			
 
				+    "x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))\n",
			
 
				+    "y_predict = paddle.layer.fc(input=x,\n",
			
 
				+    "                                size=1,\n",
			
 
				+    "                                act=paddle.activation.Linear())\n",
			
 
				+    "y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))\n",
			
 
				+    "predict=\n",
			
 
				+    "cost = paddle.layer.mse_cost(input=y_predict, label=y)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "paddle.init(use_gpu=False, trainer_count=1)\n",
			
 
				+    "\n",
			
 
				+    "default_std = 1 / math.sqrt(rnn_size) / 3.0\n",
			
 
				+    "std_default = paddle.attr.Param(initial_std=default_std)\n",
			
 
				+    "std_0       = paddle.attr.Param(initial_std=0.)\n",
			
 
				+    "mix_hidden_lr     = 1e-3\n",
			
 
				+    "lstm_para_attr    = paddle.attr.Param(\n",
			
 
				+    "    initial_std   = 0.0, \n",
			
 
				+    "    learning_rate = 1.0\n",
			
 
				+    ")\n",
			
 
				+    "hidden_para_attr = paddle.attr.Param(\n",
			
 
				+    "    initial_std   = default_std, \n",
			
 
				+    "    learning_rate = mix_hidden_lr\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "l_x0 = [paddle.layer.data(name='rnn_layer_%d' % (i), type=paddle.data_type.dense_vector(window_size)) \n",
			
 
				+    "            for i in range(dim_features)\n",
			
 
				+    "]\n",
			
 
				+    "target = paddle.layer.data(name='label', type=paddle.data_type.integer_value_sequence(classnum))\n",
			
 
				+    "l_x1 = [\n",
			
 
				+    "    paddle.layer.embedding(\n",
			
 
				+    "        size=dense_len, input=x0\n",
			
 
				+    "    ) for x0 in l_x0\n",
			
 
				+    "]\n",
			
 
				+    "\n",
			
 
				+    "hidden_0 = paddle.layer.mixed(\n",
			
 
				+    "    size=rnn_size,\n",
			
 
				+    "    bias_attr=std_default,\n",
			
 
				+    "    input=[\n",
			
 
				+    "        paddle.layer.full_matrix_projection(\n",
			
 
				+    "            input=x1, param_attr=std_default\n",
			
 
				+    "        ) for x1 in l_x1\n",
			
 
				+    "    ]\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "lstm_0 = paddle.layer.lstmemory(\n",
			
 
				+    "    input=hidden_0,\n",
			
 
				+    "    act=paddle.activation.Relu(),\n",
			
 
				+    "    gate_act=paddle.activation.Sigmoid(),\n",
			
 
				+    "    state_act=paddle.activation.Sigmoid(),\n",
			
 
				+    "    bias_attr=std_0,\n",
			
 
				+    "    param_attr=lstm_para_attr\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "feature_out = paddle.layer.mixed(\n",
			
 
				+    "    size=classnum,\n",
			
 
				+    "    bias_attr=std_default,\n",
			
 
				+    "    input=[\n",
			
 
				+    "        paddle.layer.full_matrix_projection(\n",
			
 
				+    "            input=hidden_0, param_attr=hidden_para_attr),\n",
			
 
				+    "        paddle.layer.full_matrix_projection(\n",
			
 
				+    "            input=lstm_0, param_attr=lstm_para_attr)\n",
			
 
				+    "    ]\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "crf_cost = paddle.layer.crf(\n",
			
 
				+    "    size=classnum,\n",
			
 
				+    "    input=feature_out,\n",
			
 
				+    "    label=target,\n",
			
 
				+    "    param_attr=paddle.attr.Param(\n",
			
 
				+    "        name='crfw',\n",
			
 
				+    "        initial_std=default_std,\n",
			
 
				+    "        learning_rate=mix_hidden_lr\n",
			
 
				+    "    )\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "crf_dec = paddle.layer.crf_decoding(\n",
			
 
				+    "   size=classnum,\n",
			
 
				+    "   input=feature_out,\n",
			
 
				+    "   label=target,\n",
			
 
				+    "   param_attr=paddle.attr.Param(name='crfw'))\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "evaluator.sum(input=crf_dec)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "parameters = paddle.parameters.create(crf_cost)\n",
			
 
				+    "optimizer = paddle.optimizer.Momentum(\n",
			
 
				+    "    momentum=0,\n",
			
 
				+    "    learning_rate=1e-3,\n",
			
 
				+    "    regularization=paddle.optimizer.L2Regularization(rate=8e-4),\n",
			
 
				+    "    model_average=paddle.optimizer.ModelAverage(\n",
			
 
				+    "        average_window=0.5, max_average_window=10000), )\n",
			
 
				+    "\n",
			
 
				+    "trainer = paddle.trainer.SGD(cost=crf_cost,\n",
			
 
				+    "                             parameters=parameters,\n",
			
 
				+    "                             update_equation=optimizer,\n",
			
 
				+    "                             extra_layers=crf_dec)\n",
			
 
				+    "\n",
			
 
				+    "l_name = ['rnn_layer_%d' % (i) for i in range(dim_features)] + ['label']\n",
			
 
				+    "l_idx = range(dim_features + 1)\n",
			
 
				+    "feeding = dict(zip(l_name, l_idx))\n",
			
 
				+    "\n",
			
 
				+    "trainer.train(\n",
			
 
				+    "    reader=reader,\n",
			
 
				+    "    num_passes=1,\n",
			
 
				+    "    feeding=feeding\n",
			
 
				+    ")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "trainer.train(\n",
			
 
				+    "    reader=paddle.batch(\n",
			
 
				+    "        paddle.reader.shuffle(\n",
			
 
				+    "            uci_housing.train(), buf_size=500),\n",
			
 
				+    "        batch_size=2),\n",
			
 
				+    "    feeding=feeding,\n",
			
 
				+    "    event_handler=event_handler,\n",
			
 
				+    "    num_passes=30)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# result.json\n",
			
 
				+    "data={\n",
			
 
				+    "  \"version\": \"VERSION 1.0\",\n",
			
 
				+    "  \"results\":\n",
			
 
				+    "  {\n",
			
 
				+    "    \"164161500\": [\n",
			
 
				+    "      {\n",
			
 
				+    "        \"score\": 1.0,\n",
			
 
				+    "        \"segment\": [94, 130]\n",
			
 
				+    "      },\n",
			
 
				+    "      {\n",
			
 
				+    "        \"score\": 0.6, \n",
			
 
				+    "        \"segment\": [150, 196] \n",
			
 
				+    "      }\n",
			
 
				+    "    ]\n",
			
 
				+    "  }\n",
			
 
				+    "}\n",
			
 
				+    "with open('result.json', 'w') as json_file:\n",
			
 
				+    "    json_file.write(json.dumps(data))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "with open('result.json') as json_file:\n",
			
 
				+    "    data1 = json.load(json_file)\n",
			
 
				+    "print(data1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "run -i '/mnt/BROAD-datasets/video/eval_script/get_detection_performance.py' /mnt/BROAD-datasets/meta.json path_of_result_file --subset validation"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!python2 -m pip install pillow\n",
			
 
				+    "from PIL import Image\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import os"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def load_image（file):\n",
			
 
				+    "    im=Image.open(file).convert(\"L\")\n",
			
 
				+    "    im.resize((28,28),Image.ANTIALIAS)\n",
			
 
				+    "    im=np.array(im).astype(np.float32).flatten()\n",
			
 
				+    "    im.im/255.0\n",
			
 
				+    "    return im"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "test_data=[]\n",
			
 
				+    "cur_dir=os.path.dirname(os.path.realpath(__file__))\n",
			
 
				+    "test_data.append(load_image(cur_dir+\"/image/test.png\"))\n",
			
 
				+    "\n",
			
 
				+    "prob=panddle.infer(\n",
			
 
				+    "    output_layer=predict,parameters=)\n",
			
 
				+    "lab=np.argsort(-prob)\n",
			
 
				+    "print(\"label of images/test.png is %d\" % lab[0][0])"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "py36",
			
 
				+   "language": "python",
			
 
				+   "name": "py36"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.6.0"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 2
			
 
				+}
			
--- a/twtech-Tensorflow-综艺节目精彩片段预测.ipynb
+++ b/twtech-Tensorflow-综艺节目精彩片段预测.ipynb
@@ -0,0 +1,25 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "collapsed": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# !python --version\n",
			
 
				+    "import tensorflow as tf"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "py36",
			
 
				+   "language": "python",
			
 
				+   "name": "py36"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 2
			
 
				+}