Browse Source

添加 ipynb

liuyuqi-dellpc 6 years ago
parent
commit
b535031f83

+ 100 - 0
.vscode/.ropeproject/config.py

@@ -0,0 +1,100 @@
+# The default ``config.py``
+# flake8: noqa
+
+
+def set_prefs(prefs):
+    """This function is called before opening the project"""
+
+    # Specify which files and folders to ignore in the project.
+    # Changes to ignored resources are not added to the history and
+    # VCSs.  Also they are not returned in `Project.get_files()`.
+    # Note that ``?`` and ``*`` match all characters but slashes.
+    # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
+    # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
+    # '.svn': matches 'pkg/.svn' and all of its children
+    # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
+    # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
+    prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
+                                  '.hg', '.svn', '_svn', '.git', '.tox']
+
+    # Specifies which files should be considered python files.  It is
+    # useful when you have scripts inside your project.  Only files
+    # ending with ``.py`` are considered to be python files by
+    # default.
+    #prefs['python_files'] = ['*.py']
+
+    # Custom source folders:  By default rope searches the project
+    # for finding source folders (folders that should be searched
+    # for finding modules).  You can add paths to that list.  Note
+    # that rope guesses project source folders correctly most of the
+    # time; use this if you have any problems.
+    # The folders should be relative to project root and use '/' for
+    # separating folders regardless of the platform rope is running on.
+    # 'src/my_source_folder' for instance.
+    #prefs.add('source_folders', 'src')
+
+    # You can extend python path for looking up modules
+    #prefs.add('python_path', '~/python/')
+
+    # Should rope save object information or not.
+    prefs['save_objectdb'] = True
+    prefs['compress_objectdb'] = False
+
+    # If `True`, rope analyzes each module when it is being saved.
+    prefs['automatic_soa'] = True
+    # The depth of calls to follow in static object analysis
+    prefs['soa_followed_calls'] = 0
+
+    # If `False` when running modules or unit tests "dynamic object
+    # analysis" is turned off.  This makes them much faster.
+    prefs['perform_doa'] = True
+
+    # Rope can check the validity of its object DB when running.
+    prefs['validate_objectdb'] = True
+
+    # How many undos to hold?
+    prefs['max_history_items'] = 32
+
+    # Shows whether to save history across sessions.
+    prefs['save_history'] = True
+    prefs['compress_history'] = False
+
+    # Set the number spaces used for indenting.  According to
+    # :PEP:`8`, it is best to use 4 spaces.  Since most of rope's
+    # unit-tests use 4 spaces it is more reliable, too.
+    prefs['indent_size'] = 4
+
+    # Builtin and c-extension modules that are allowed to be imported
+    # and inspected by rope.
+    prefs['extension_modules'] = []
+
+    # Add all standard c-extensions to extension_modules list.
+    prefs['import_dynload_stdmods'] = True
+
+    # If `True` modules with syntax errors are considered to be empty.
+    # The default value is `False`; When `False` syntax errors raise
+    # `rope.base.exceptions.ModuleSyntaxError` exception.
+    prefs['ignore_syntax_errors'] = False
+
+    # If `True`, rope ignores unresolvable imports.  Otherwise, they
+    # appear in the importing namespace.
+    prefs['ignore_bad_imports'] = False
+
+    # If `True`, rope will insert new module imports as
+    # `from <package> import <module>` by default.
+    prefs['prefer_module_from_imports'] = False
+
+    # If `True`, rope will transform a comma list of imports into
+    # multiple separate import statements when organizing
+    # imports.
+    prefs['split_imports'] = False
+
+    # If `True`, rope will sort imports alphabetically by module name
+    # instead of alphabetically by import statement, with from imports
+    # after normal imports.
+    prefs['sort_imports_alphabetically'] = False
+
+
+def project_opened(project):
+    """This function is called after opening the project"""
+    # Do whatever you like here!

BIN
.vscode/.ropeproject/objectdb


+ 1 - 0
.vscode/launch.json

@@ -4,6 +4,7 @@
     // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
+        
         {
             "name": "Python",
             "type": "python",

+ 0 - 23
src/python/Main.py

@@ -1,23 +0,0 @@
-# coding=utf-8
-'''
-Created on 2017年9月12日
-@vsersion:python 3.6
-@author: liuyuqi
-'''
-
-from nt import chdir
-
-projectPath = "D:\\t"
-
-
-def main():
-    print("start!")
-    chdir(projectPath)
-    print("finish!")
-
-
-main()
-
-
-def main():
-    print("23", sep=' ', end='\n')

+ 0 - 49
src/python/NBAClass.py

@@ -1,49 +0,0 @@
-#coding=utf-8
-'''
-Created on 2017年9月12日
-@vsersion:python 3.6
-@author: liuyuqi
-'''
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-
-class NBA:
-    team_season = ""
-    advanced_basic = ""
-    advanced_shooting =""
-    avg = ""
-    coach_playoff = ""
-    coach_season = ""
-    player_playoff = ""
-    player_salary = ""
-    player_season =""
-    single =""
-    team_playoff = ""
-    team_season = ""
-    tot = ""
-    
-    def init(self):
-        importData(self)
-    
-    # 导入数据,预览数据
-    def importData(self):
-        team_season = pd.read_csv('../input/NBAdata/team_season.csv')
-        advanced_basic = pd.read_csv('../input/NBAdata/advanced_basic.csv')
-        advanced_shooting = pd.read_csv('../input/NBAdata/advanced_shooting.csv')
-        avg = pd.read_csv('../input/NBAdata/avg.csv')
-        coach_playoff = pd.read_csv('../input/NBAdata/coach_playoff.csv')
-        coach_season = pd.read_csv('../input/NBAdata/coach_season.csv')
-        player_playoff = pd.read_csv('../input/NBAdata/player_playoff.csv')
-        player_salary = pd.read_csv('../input/NBAdata/player_salary.csv')
-        player_season = pd.read_csv('../input/NBAdata/player_season.csv')
-        single = pd.read_csv('../input/NBAdata/single.csv')
-        team_playoff = pd.read_csv('../input/NBAdata/team_playoff.csv')
-        team_season = pd.read_csv('../input/NBAdata/team_season.csv')
-        tot = pd.read_csv('../input/NBAdata/tot.csv')
-        
-        team_season.head()
-        team_playoff.columns
-
-
- 

+ 429 - 0
src/python/NBA比赛分析.ipynb

@@ -0,0 +1,429 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "start!\n",
+      "D:\\app\\NBA\n"
+     ]
+    }
+   ],
+   "source": [
+    "'''准备工作'''\n",
+    "# 导入需要的第三方库\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "from nt import chdir\n",
+    "\n",
+    "#初始化系统设置和参数\n",
+    "projectPath = \"D:/app/NBA\"\n",
+    "\n",
+    "def preImport():\n",
+    "    print(\"start!\")\n",
+    "    chdir(projectPath)\n",
+    "    print(os.getcwd())\n",
+    "preImport()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " 驱动器 D 中的卷是 program\n",
+      " 卷的序列号是 EA42-CC87\n",
+      "\n",
+      " D:\\app\\NBA\\input\\NBAdata 的目录\n",
+      "\n",
+      "2017/09/12  00:04    <DIR>          .\n",
+      "2017/09/12  00:04    <DIR>          ..\n",
+      "2017/08/28  10:27         2,775,366 advanced_basic.csv\n",
+      "2017/08/28  10:27         2,218,437 advanced_shooting.csv\n",
+      "2017/08/28  10:25         3,237,569 avg.csv\n",
+      "2017/08/21  15:58            86,004 coach_playoff.csv\n",
+      "2017/08/21  15:58           180,461 coach_season.csv\n",
+      "2017/09/01  10:29         4,349,700 player_playoff.csv\n",
+      "2017/08/25  15:49           394,587 player_salary.csv\n",
+      "2017/09/01  10:28        68,215,244 player_season.csv\n",
+      "2017/08/28  10:26         1,843,231 single.csv\n",
+      "2017/09/01  10:30           443,718 team_playoff.csv\n",
+      "2017/09/01  10:31         6,323,713 team_season.csv\n",
+      "2017/08/28  10:26         3,710,009 tot.csv\n",
+      "              12 个文件     93,778,039 字节\n",
+      "               2 个目录 165,393,342,464 可用字节\n"
+     ]
+    }
+   ],
+   "source": [
+    "ls \"./input/NBAdata/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['球队', '时间', '结果', '主/客场', '比分', '投篮', '命中', '出手', '三分', '三分命中', '三分出手',\n",
+       "       '罚球', '罚球命中', '罚球出手', '篮板', '前场', '后场', '助攻', '抢断', '盖帽', '失误', '犯规',\n",
+       "       '得分'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "''' 导入数据,预览数据'''\n",
+    "\n",
+    "team_season = pd.read_csv('./input/NBAdata/team_season.csv')\n",
+    "advanced_basic = pd.read_csv('./input/NBAdata/advanced_basic.csv')\n",
+    "advanced_shooting = pd.read_csv('./input/NBAdata/advanced_shooting.csv')\n",
+    "avg = pd.read_csv('./input/NBAdata/avg.csv')\n",
+    "coach_playoff = pd.read_csv('./input/NBAdata/coach_playoff.csv')\n",
+    "coach_season = pd.read_csv('./input/NBAdata/coach_season.csv')\n",
+    "player_playoff = pd.read_csv('./input/NBAdata/player_playoff.csv')\n",
+    "player_salary = pd.read_csv('./input/NBAdata/player_salary.csv')\n",
+    "player_season = pd.read_csv('./input/NBAdata/player_season.csv')\n",
+    "single = pd.read_csv('./input/NBAdata/single.csv')\n",
+    "team_playoff = pd.read_csv('./input/NBAdata/team_playoff.csv')\n",
+    "team_season = pd.read_csv('./input/NBAdata/team_season.csv')\n",
+    "tot = pd.read_csv('./input/NBAdata/tot.csv')\n",
+    "\n",
+    "team_season.head()\n",
+    "#team_playoff.columns\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>球队</th>\n",
+       "      <th>时间</th>\n",
+       "      <th>结果</th>\n",
+       "      <th>主/客场</th>\n",
+       "      <th>比分</th>\n",
+       "      <th>投篮</th>\n",
+       "      <th>命中</th>\n",
+       "      <th>出手</th>\n",
+       "      <th>三分</th>\n",
+       "      <th>三分命中</th>\n",
+       "      <th>...</th>\n",
+       "      <th>后场</th>\n",
+       "      <th>助攻</th>\n",
+       "      <th>抢断</th>\n",
+       "      <th>盖帽</th>\n",
+       "      <th>失误</th>\n",
+       "      <th>犯规</th>\n",
+       "      <th>得分</th>\n",
+       "      <th>失分</th>\n",
+       "      <th>赛季</th>\n",
+       "      <th>回合</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>CHI</td>\n",
+       "      <td>1986-04-17</td>\n",
+       "      <td>L</td>\n",
+       "      <td>客</td>\n",
+       "      <td>BOS123-104CHI</td>\n",
+       "      <td>0.488</td>\n",
+       "      <td>42</td>\n",
+       "      <td>86</td>\n",
+       "      <td>0.500</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>19</td>\n",
+       "      <td>17</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>17</td>\n",
+       "      <td>30</td>\n",
+       "      <td>104</td>\n",
+       "      <td>123</td>\n",
+       "      <td>1986</td>\n",
+       "      <td>51.28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>CHI</td>\n",
+       "      <td>1986-04-20</td>\n",
+       "      <td>L</td>\n",
+       "      <td>客</td>\n",
+       "      <td>BOS135-131CHI</td>\n",
+       "      <td>0.461</td>\n",
+       "      <td>47</td>\n",
+       "      <td>102</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>30</td>\n",
+       "      <td>15</td>\n",
+       "      <td>9</td>\n",
+       "      <td>4</td>\n",
+       "      <td>11</td>\n",
+       "      <td>34</td>\n",
+       "      <td>131</td>\n",
+       "      <td>135</td>\n",
+       "      <td>1986</td>\n",
+       "      <td>59.90</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>CHI</td>\n",
+       "      <td>1986-04-22</td>\n",
+       "      <td>L</td>\n",
+       "      <td>主</td>\n",
+       "      <td>BOS122-104CHI</td>\n",
+       "      <td>0.438</td>\n",
+       "      <td>39</td>\n",
+       "      <td>89</td>\n",
+       "      <td>0.250</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>29</td>\n",
+       "      <td>19</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>15</td>\n",
+       "      <td>28</td>\n",
+       "      <td>104</td>\n",
+       "      <td>122</td>\n",
+       "      <td>1986</td>\n",
+       "      <td>52.24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>CHI</td>\n",
+       "      <td>1987-04-23</td>\n",
+       "      <td>L</td>\n",
+       "      <td>客</td>\n",
+       "      <td>BOS108-104CHI</td>\n",
+       "      <td>0.487</td>\n",
+       "      <td>38</td>\n",
+       "      <td>78</td>\n",
+       "      <td>0.333</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>22</td>\n",
+       "      <td>26</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>9</td>\n",
+       "      <td>22</td>\n",
+       "      <td>104</td>\n",
+       "      <td>108</td>\n",
+       "      <td>1987</td>\n",
+       "      <td>45.82</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>CHI</td>\n",
+       "      <td>1987-04-26</td>\n",
+       "      <td>L</td>\n",
+       "      <td>客</td>\n",
+       "      <td>BOS105-96CHI</td>\n",
+       "      <td>0.442</td>\n",
+       "      <td>34</td>\n",
+       "      <td>77</td>\n",
+       "      <td>0.571</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>26</td>\n",
+       "      <td>14</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>15</td>\n",
+       "      <td>25</td>\n",
+       "      <td>96</td>\n",
+       "      <td>105</td>\n",
+       "      <td>1987</td>\n",
+       "      <td>46.40</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    球队          时间 结果 主/客场             比分     投篮  命中   出手     三分  三分命中  ...    \\\n",
+       "0  CHI  1986-04-17  L    客  BOS123-104CHI  0.488  42   86  0.500     1  ...     \n",
+       "1  CHI  1986-04-20  L    客  BOS135-131CHI  0.461  47  102  0.000     0  ...     \n",
+       "2  CHI  1986-04-22  L    主  BOS122-104CHI  0.438  39   89  0.250     1  ...     \n",
+       "3  CHI  1987-04-23  L    客  BOS108-104CHI  0.487  38   78  0.333     1  ...     \n",
+       "4  CHI  1987-04-26  L    客   BOS105-96CHI  0.442  34   77  0.571     4  ...     \n",
+       "\n",
+       "   后场  助攻  抢断  盖帽  失误  犯规   得分   失分    赛季     回合  \n",
+       "0  19  17   8   2  17  30  104  123  1986  51.28  \n",
+       "1  30  15   9   4  11  34  131  135  1986  59.90  \n",
+       "2  29  19   7   5  15  28  104  122  1986  52.24  \n",
+       "3  22  26   5   3   9  22  104  108  1987  45.82  \n",
+       "4  26  14   1   4  15  25   96  105  1987  46.40  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "''''''\n",
+    "# 将比赛时间转换成所处赛季,按照季后赛所在年为标准\n",
+    "def convert_time_to_season(s):\n",
+    "    assert type(s) == str\n",
+    "    return int(s[:4])\n",
+    "\n",
+    "# 将失分单独列出\n",
+    "def get_loss_score(s):\n",
+    "    assert type(s) == str\n",
+    "    index_of_divider = s.index('-')\n",
+    "    loss_score = int(s[:index_of_divider][3:])\n",
+    "    return loss_score\n",
+    "\n",
+    "team_season['失分'] = team_season['比分'].map(get_loss_score)\n",
+    "team_season['赛季'] = team_season['时间'].map(convert_time_to_season)\n",
+    "team_season['回合'] = (team_season['出手'] + 0.44 * team_season['罚球出手'] - 0.96 * team_season['前场'] + team_season['失误']) / 2\n",
+    "team_season.head()\n",
+    "team_playoff['失分'] = team_playoff['比分'].map(get_loss_score)\n",
+    "team_playoff['赛季'] = team_playoff['时间'].map(convert_time_to_season)\n",
+    "team_playoff['回合'] = (team_playoff['出手'] + 0.44 * team_playoff['罚球出手'] - 0.96 * team_playoff['前场'] + team_playoff['失误']) / 2\n",
+    "team_playoff.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1986: 'BOS',\n",
+       " 1987: 'LAL',\n",
+       " 1988: 'LAL',\n",
+       " 1989: 'DET',\n",
+       " 1990: 'DET',\n",
+       " 1991: 'CHI',\n",
+       " 1992: 'CHI',\n",
+       " 1993: 'CHI',\n",
+       " 1994: 'HOU',\n",
+       " 1995: 'HOU',\n",
+       " 1996: 'CHI',\n",
+       " 1997: 'CHI',\n",
+       " 1998: 'CHI',\n",
+       " 1999: 'SAS',\n",
+       " 2000: 'LAL',\n",
+       " 2001: 'LAL',\n",
+       " 2002: 'LAL',\n",
+       " 2003: 'SAS',\n",
+       " 2004: 'DET',\n",
+       " 2005: 'SAS',\n",
+       " 2006: 'MIA',\n",
+       " 2007: 'SAS',\n",
+       " 2008: 'BOS',\n",
+       " 2009: 'LAL',\n",
+       " 2010: 'LAL',\n",
+       " 2011: 'DAL',\n",
+       " 2012: 'MIA',\n",
+       " 2013: 'MIA',\n",
+       " 2014: 'SAS',\n",
+       " 2015: 'GSW',\n",
+       " 2016: 'CLE'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "champions = {}\n",
+    "\n",
+    "for year in range(1986, 2017):\n",
+    "    current_playoff = team_playoff[team_playoff['赛季'] == year]\n",
+    "    current_win = 0\n",
+    "    single_playoff = {}\n",
+    "    for i in range(len(current_playoff)):\n",
+    "        if current_playoff.iloc[i]['结果'] == 'W':\n",
+    "            if current_playoff.iloc[i]['球队'] in single_playoff.keys():\n",
+    "                single_playoff[current_playoff.iloc[i]['球队']] += 1\n",
+    "            else:\n",
+    "                single_playoff[current_playoff.iloc[i]['球队']] = 1\n",
+    "    for team in single_playoff.keys():\n",
+    "        if single_playoff[team] > current_win:\n",
+    "            current_win = single_playoff[team]\n",
+    "            champions[year] = team\n",
+    "\n",
+    "champions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 10 - 4
src/python/improtData.py

@@ -1,5 +1,6 @@
 # coding=utf-8
 '''
+导入数据,13个csv文件,并查看相关数据格式。
 Created on 2017年9月12日
 @vsersion:python 3.6
 @author: liuyuqi
@@ -10,6 +11,15 @@ Created on 2017年9月12日
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from nt import chdir
+
+projectPath = "D:/app/NBA"
+
+
+def preImport():
+    print("start!")
+    chdir(projectPath)
+    print("finish!")
 
 # 导入数据,预览数据
 def importData():
@@ -30,7 +40,3 @@ def importData():
     team_season.head()
     team_playoff.columns
 
-
-
-
-

+ 1 - 1
src/python/plotData.py

@@ -1,4 +1,4 @@
-#coding=utf-8
+# coding=utf-8
 '''
 Created on 2017年9月12日
 @vsersion:python 3.6

+ 15 - 0
src/python/test1.py

@@ -0,0 +1,15 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+%matplotlib inline
+
+#导入数据
+data=pd.read_csv("creditcard.csv")
+print(data.head())
+
+#作图
+count_classes=pd.value_counts(data["Class"],sort=True).sort_index()
+count_classes.plot(kind="bar")
+plt.xlabel("Class")
+plt.ylabel("Frequency")

+ 2 - 0
src/python/test2.py

@@ -0,0 +1,2 @@
+a=3
+print(a)

+ 14 - 0
src/test/location_regain_by_time.py

@@ -0,0 +1,14 @@
+#!usr/bin/python
+import sys
+
+current_loc="START_OF_APP"
+govt_regains=[]
+for line in sys.stdin:
+    (loc,event_date,event_type)=line.strip("\n").split("\t");
+    if loc!=current_loc :
+        if current_loc!="START_OF_APP":
+            print(current_loc+"\t"+"\t".join(govt_regains))
+        current_loc=loc
+        govt_regains=[]
+    if event_type.find("regains")!=1:
+        govt_regains.append(event_date)D:\app\NBA\src\test\location_regain_by_time.py

+ 17 - 0
src/test/location_regains_mapper.py

@@ -0,0 +1,17 @@
+#!usr/bin/python
+import sys
+for line in sys.stdin :
+    (loc,event_date,event_type,actor,lat,lon,src,
+    fatalities)=line.strip().split("\t");
+    (day,month,year)=event_date.split("/")
+    if len(day)==1 :
+        day="0"+day
+    if len(month)==1 :
+        month="0"+month
+    if len(year)==2 :
+        if int(year)>30 and int(year)<99:
+            year="19"+year
+        else:
+            year="20"+year
+    event_date=year+"-"+month+"-"+day
+    print("\t".join([loc,event_date,event_type]))

+ 11 - 0
src/test/run_localtion_reganis.sh

@@ -0,0 +1,11 @@
+#!/bin/bash
+#运行Hadoop streaming hadoop-streaming.jar任务
+
+hadoop jar /opt/hadoop-2.x/contrib/streaming/hadoop-streaming-0.1.jar \
+    -input /input/acled/Niger.tsv \
+    -output /output/acled/acled.out \
+    -mapper location_regain_mapper.py \
+    -reducer location_regain_by_time.py \
+    -file location_regain_mapper.py \
+    -jobconf stream.num.map.output.key.fileds=2
+    -jobconf mappred.reduce.tasks=1