123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459 |
- # coding=utf-8
- '''
- Created on 2017年9月12日
- @vsersion:python 3.6
- @author: liuyuqi
- '''
- # 导入需要的第三方库
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- # 导入数据,预览数据
- team_season = pd.read_csv('../input/NBAdata/team_season.csv')
- advanced_basic = pd.read_csv('../input/NBAdata/advanced_basic.csv')
- advanced_shooting = pd.read_csv('../input/NBAdata/advanced_shooting.csv')
- avg = pd.read_csv('../input/NBAdata/avg.csv')
- coach_playoff = pd.read_csv('../input/NBAdata/coach_playoff.csv')
- coach_season = pd.read_csv('../input/NBAdata/coach_season.csv')
- player_playoff = pd.read_csv('../input/NBAdata/player_playoff.csv')
- player_salary = pd.read_csv('../input/NBAdata/player_salary.csv')
- player_season = pd.read_csv('../input/NBAdata/player_season.csv')
- single = pd.read_csv('../input/NBAdata/single.csv')
- team_playoff = pd.read_csv('../input/NBAdata/team_playoff.csv')
- team_season = pd.read_csv('../input/NBAdata/team_season.csv')
- tot = pd.read_csv('../input/NBAdata/tot.csv')
- team_season.head()
- team_playoff.columns
- # 将比赛时间转换成所处赛季,按照季后赛所在年为标准
- def convert_time_to_season(s):
- assert type(s) == str
- return int(s[:4])
- # 将失分单独列出
- def get_loss_score(s):
- assert type(s) == str
- index_of_divider = s.index('-')
- loss_score = int(s[:index_of_divider][3:])
- return loss_score
- team_season['失分'] = team_season['比分'].map(get_loss_score)
- team_season['赛季'] = team_season['时间'].map(convert_time_to_season)
- team_season['回合'] = (team_season['出手'] + 0.44 * team_season['罚球出手'] - 0.96 * team_season['前场'] + team_season['失误']) / 2
- team_season.head()
- team_playoff['失分'] = team_playoff['比分'].map(get_loss_score)
- team_playoff['赛季'] = team_playoff['时间'].map(convert_time_to_season)
- team_playoff['回合'] = (team_playoff['出手'] + 0.44 * team_playoff['罚球出手'] - 0.96 * team_playoff['前场'] + team_playoff['失误']) / 2
- team_playoff.head()
- champions = {}
- for year in range(1986, 2017):
- current_playoff = team_playoff[team_playoff['赛季'] == year]
- current_win = 0
- single_playoff = {}
- for i in range(len(current_playoff)):
- if current_playoff.iloc[i]['结果'] == 'W':
- if current_playoff.iloc[i]['球队'] in single_playoff.keys():
- single_playoff[current_playoff.iloc[i]['球队']] += 1
- else:
- single_playoff[current_playoff.iloc[i]['球队']] = 1
- for team in single_playoff.keys():
- if single_playoff[team] > current_win:
- current_win = single_playoff[team]
- champions[year] = team
- champions
- # 生成Series对象
- champion_series = pd.Series(champions)
- # 查看哪些队伍、分别夺得几次冠军
- champions_count = champion_series.value_counts()
- champions_count.sort_values(ascending=False, inplace=True)
- champions_count
- plt.bar(np.arange(10), champions_count.values, width=0.5)
- plt.xticks(np.arange(10), list(champions_count.index))
- plt.xlabel('Team Name')
- plt.ylabel('Champion Number')
- plt.grid(True)
- plt.title('Champions Statistics From 1986 to 2016')
- score_loss_ratio = []
- for i in range(31):
- score_loss_ratio.append(abs(champion_score[i] / champion_loss[i]))
- plt.scatter(np.arange(31), score_loss_ratio)
- plt.hlines(np.array(score_loss_ratio).mean(), 0, 30, linestyles='dashed')
- plt.xticks(np.arange(31), champion_teams, size='small', rotation=90)
- plt.xlabel('Champion Team')
- plt.ylabel('Score Loss Ratio')
- plt.title('Score Loss Ratio of Champion Team in Playoff from 1986 to 2016')
- plt.grid(True)
- round_count = []
- for year in range(1986, 2017):
- champion_team = team_playoff[(team_playoff['赛季'] == year) & (team_playoff['球队'] == champions[year])]
- round_count.append(champion_team['回合'].mean())
- plt.bar(np.arange(31), round_count)
- plt.xlabel('Champion Team')
- plt.ylabel('Average Round in Playoff')
- plt.xticks(np.arange(31), champion_teams, size='small', rotation=90)
- plt.hlines(np.array(round_count).mean(), 0, 30, linestyles='dashed')
- plt.title('Average Round of Champion from 1986 to 2016')
- shoot = {}
- for year in range(1986, 2017):
- shoot[year] = {}
- champion_team = team_playoff[(team_playoff['赛季'] == year) & (team_playoff['球队'] == champions[year])]
- shoot[year]['三分出手'] = champion_team['三分出手'].sum()
- shoot[year]['三分命中'] = champion_team['三分命中'].sum()
- shoot[year]['场均三分出手'] = champion_team['三分出手'].mean()
- shoot[year]['场均三分命中'] = champion_team['三分命中'].mean()
- shoot[year]['场均两分出手'] = champion_team['出手'].mean() - shoot[year]['场均三分出手']
- shoot[year]['场均两分命中'] = champion_team['命中'].mean() - shoot[year]['场均三分命中']
- shoot[year]['出手'] = champion_team['出手'].sum()
- shoot[year]['命中'] = champion_team['命中'].sum()
- shoot[year]['场均出手'] = champion_team['出手'].mean()
- shoot[year]['场均命中'] = champion_team['命中'].mean()
- shoot[year]['两分出手'] = champion_team['出手'].sum() - champion_team['三分出手'].sum()
- shoot[year]['两分命中'] = champion_team['命中'].sum() - champion_team['三分命中'].sum()
- shoot[year]['罚球出手'] = champion_team['罚球出手'].sum()
- shoot[year]['罚球命中'] = champion_team['罚球命中'].sum()
- shoot[year]['罚球命中率'] = shoot[year]['罚球命中'] / shoot[year]['罚球出手']
- shoot[year]['两分命中率'] = shoot[year]['两分命中'] / shoot[year]['两分出手']
- shoot[year]['三分命中率'] = shoot[year]['三分命中'] / shoot[year]['三分出手']
- shoot[year]['得分'] = champion_team['得分'].sum()
- shoot[year]['场均得分'] = champion_team['得分'].mean()
- shoot[year]['真实命中率'] = shoot[year]['得分'] / (2 * (shoot[year]['出手'] + 0.44 * shoot[year]['罚球出手']))
- shoot_data = pd.DataFrame(shoot).T
- shoot_data.head()
- plt.scatter(shoot_data['场均得分'], shoot_data['真实命中率'])
- plt.vlines(shoot_data['场均得分'].mean(), 0.48, 0.6, linestyles='dashed')
- plt.hlines(shoot_data['真实命中率'].mean(), 85, 125, linestyles='dashed')
- plt.xlabel('Average Score')
- plt.ylabel('TS')
- plt.title('TS-AverageScore of Champions of 1986-2016')
- plt.grid(True)
- print(shoot_data.sort_values(by='场均得分', ascending=False).iloc[0].name)
- print(shoot_data.sort_values(by='场均得分', ascending=False).iloc[1].name)
- print(shoot_data.sort_values(by='场均得分', ascending=True).iloc[0].name)
- three_of_champions = shoot_data[['场均三分出手', '场均三分命中']]
- three_of_champions = three_of_champions.rename(columns={'场均三分出手': '3PA', '场均三分命中': '3P'})
- three_of_champions.plot(kind='bar')
- plt.hlines(three_of_champions['3PA'].mean(), 0, 30, linestyles='dashed')
- plt.hlines(three_of_champions['3P'].mean(), 0, 30, linestyles='dashed')
- plt.xticks(np.arange(31), champion_teams, size='small', rotation=90)
- plt.xlabel('Champion Team')
- plt.ylabel('Three Point Statistics')
- plt.title('Three Point Statistics of Champion Team From 1986 to 2016')
- two_of_champions = shoot_data[['场均两分出手', '场均两分命中']]
- two_of_champions = two_of_champions.rename(columns={'场均两分出手': '2PA', '场均两分命中': '2P'})
- two_of_champions.plot(kind='bar')
- plt.hlines(two_of_champions['2PA'].mean(), 0, 30, linestyles='dashed')
- plt.hlines(two_of_champions['2P'].mean(), 0, 30, linestyles='dashed')
- plt.xticks(np.arange(31), champion_teams, size='small', rotation=90)
- plt.xlabel('Champion Team')
- plt.ylabel('Three Point Statistics')
- plt.title('Three Point Statistics of Champion Team From 1986 to 2016')
- team_playoff['百回合得分'] = team_playoff['得分'] / (2 * team_playoff['回合']) * 100
- team_playoff['百回合失分'] = team_playoff['失分'] / (2 * team_playoff['回合']) * 100
- team_playoff.head()
- team_season['百回合得分'] = team_season['得分'] / (2 * team_season['回合']) * 100
- team_season['百回合失分'] = team_season['失分'] / (2 * team_season['回合']) * 100
- team_season.head()
- # 计算百回合得分、失分以及百回合得失分比
- efficiency = {}
- for year in range(1986, 2017):
- efficiency[year] = {}
- champion_team = team_playoff[(team_playoff['赛季'] == year) & (team_playoff['球队'] == champions[year])]
- efficiency[year]['offensive'] = champion_team['百回合得分'].mean()
- efficiency[year]['defensive'] = champion_team['百回合失分'].mean()
- efficiency[year]['ratio'] = efficiency[year]['offensive'] / efficiency[year]['defensive']
- efficiency = pd.DataFrame(efficiency).T
- efficiency
- plt.bar(np.arange(31), list(efficiency['offensive'].values))
- plt.bar(np.arange(31), list(-1 * efficiency['defensive'].values))
- plt.hlines(efficiency['offensive'].mean(), 0, 30, linestyles='dashed')
- plt.hlines(-1 * efficiency['defensive'].mean(), 0, 30, linestyles='dashed')
- plt.xticks(np.arange(31), champion_teams, size='small', rotation=90)
- plt.xlabel('Champion Team')
- plt.ylabel('Offensive & Defensive Efficiency')
- plt.title('Offensive & Defensive Efficiency of Champion Team from 1986 to 2016')
- plt.scatter(np.arange(31), list(efficiency['ratio'].values))
- plt.hlines(efficiency['ratio'].mean(), 0, 30, linestyles='dashed')
- plt.xticks(np.arange(31), champion_teams, size='small', rotation=90)
- plt.xlabel('Champion Team')
- plt.ylabel('Offensive & Defensive Efficiency Ratio')
- plt.title('Offensive & Defensive Efficiency Ratio of Champion Team in Playoff from 1986 to 2016')
- plt.grid(True)
- CHI1991 = team_playoff[(team_playoff['赛季'] == 1991) & (team_playoff['球队'] == 'CHI')]
- CHI1996 = team_playoff[(team_playoff['赛季'] == 1996) & (team_playoff['球队'] == 'CHI')]
- LAL2001 = team_playoff[(team_playoff['赛季'] == 2001) & (team_playoff['球队'] == 'LAL')]
- CHI1991
- team_playoff.columns
- NYK_season_1991 = team_season[(team_season['球队'] == 'NYK') & (team_season['赛季'] == 1991)]
- NYK_season_average_1991 = NYK_season_1991.mean()
- NYK_playoff_1991 = team_playoff[(team_playoff['球队'] == 'NYK') & (team_playoff['赛季'] == 1991)].tail(3)
- NYK_playoff_average_1991 = NYK_playoff_1991.mean()
- PHI_season_1991 = team_season[(team_season['球队'] == 'PHI') & (team_season['赛季'] == 1991)]
- PHI_season_average_1991 = PHI_season_1991.mean()
- PHI_playoff_1991 = team_playoff[(team_playoff['球队'] == 'PHI') & (team_playoff['赛季'] == 1991)].tail(5)
- PHI_playoff_average_1991 = PHI_playoff_1991.mean()
- DET_season_1991 = team_season[(team_season['球队'] == 'DET') & (team_season['赛季'] == 1991)]
- DET_season_average_1991 = DET_season_1991.mean()
- DET_playoff_1991 = team_playoff[(team_playoff['球队'] == 'DET') & (team_playoff['赛季'] == 1991)].tail(4)
- DET_playoff_average_1991 = DET_playoff_1991.mean()
- LAL_season_1991 = team_season[(team_season['球队'] == 'LAL') & (team_season['赛季'] == 1991)]
- LAL_season_average_1991 = LAL_season_1991.mean()
- LAL_playoff_1991 = team_playoff[(team_playoff['球队'] == 'LAL') & (team_playoff['赛季'] == 1991)].tail(5)
- LAL_playoff_average_1991 = LAL_playoff_1991.mean()
- total_1991 = [NYK_season_average_1991, NYK_playoff_average_1991, PHI_season_average_1991, PHI_playoff_average_1991,
- DET_season_average_1991, DET_playoff_average_1991, LAL_season_average_1991, LAL_playoff_average_1991]
- season_score_1991 = []
- season_loss_1991 = []
- playoff_score_1991 = []
- playoff_loss_1991 = []
- for i in range(len(total_1991)):
- if i % 2 == 0:
- season_score_1991.append(total_1991[i]['百回合得分'])
- season_loss_1991.append(-total_1991[i]['百回合失分'])
- else:
- playoff_score_1991.append(total_1991[i]['百回合得分'])
- playoff_loss_1991.append(-total_1991[i]['百回合失分'])
- change1991 = pd.DataFrame({'season_score': season_score_1991, 'season_loss': season_loss_1991,
- 'playoff_score': playoff_score_1991, 'playoff_loss': playoff_loss_1991})
- change1991[['season_score', 'playoff_score']].plot(kind='bar')
- plt.xticks(np.arange(4), ['NYK', 'PHI', 'DET', 'LAL'])
- plt.xlabel('Team')
- plt.ylabel('Score per 100 Round')
- change1991[['season_loss', 'playoff_loss']].plot(kind='bar')
- plt.xticks(np.arange(4), ['NYK', 'PHI', 'DET', 'LAL'])
- plt.xlabel('Team')
- plt.ylabel('Loss per 100 Round')
- season1991 = team_season[team_season['赛季'] == 1991]
- season1991_score = season1991['百回合得分'].groupby(season1991['球队']).mean()
- season1991_loss = season1991['百回合失分'].groupby(season1991['球队']).mean()
- season1991_average = pd.concat([season1991_score, season1991_loss], axis=1)
- season1991_average['得失分比'] = season1991_average['百回合得分'] / season1991_average['百回合失分']
- season1991_average.sort_values(by='得失分比', ascending=False)
- compare1991 = pd.DataFrame([season_score_1991, season_loss_1991, playoff_score_1991, playoff_loss_1991],
- index=['season score', 'season loss', 'playoff score', 'playoff loss'],
- columns=['NYK', 'PHI', 'DET', 'LAL']).T
- compare1991['season ratio'] = compare1991['season score'] / (-1 * compare1991['season loss'])
- compare1991['playoff ratio'] = compare1991['playoff score'] / (-1 * compare1991['playoff loss'])
- compare1991
- CHI1996
- MIA_season_1996 = team_season[(team_season['球队'] == 'MIA') & (team_season['赛季'] == 1996)]
- MIA_season_average_1996 = MIA_season_1996.mean()
- MIA_playoff_1996 = team_playoff[(team_playoff['球队'] == 'MIA') & (team_playoff['赛季'] == 1996)].tail(3)
- MIA_playoff_average_1996 = MIA_playoff_1996.mean()
- NYK_season_1996 = team_season[(team_season['球队'] == 'NYK') & (team_season['赛季'] == 1996)]
- NYK_season_average_1996 = NYK_season_1996.mean()
- NYK_playoff_1996 = team_playoff[(team_playoff['球队'] == 'NYK') & (team_playoff['赛季'] == 1996)].tail(5)
- NYK_playoff_average_1996 = NYK_playoff_1996.mean()
- ORL_season_1996 = team_season[(team_season['球队'] == 'ORL') & (team_season['赛季'] == 1996)]
- ORL_season_average_1996 = ORL_season_1996.mean()
- ORL_playoff_1996 = team_playoff[(team_playoff['球队'] == 'ORL') & (team_playoff['赛季'] == 1996)].tail(4)
- ORL_playoff_average_1996 = ORL_playoff_1996.mean()
- total_1996 = [MIA_season_average_1996, MIA_playoff_average_1996, NYK_season_average_1996, NYK_playoff_average_1996,
- ORL_season_average_1996, ORL_playoff_average_1996]
- CHI1996.tail(6)['百回合得分'].mean()
- CHI1996.tail(6)['百回合失分'].mean()
- season_score_1996 = []
- season_loss_1996 = []
- playoff_score_1996 = []
- playoff_loss_1996 = []
- for i in range(len(total_1996)):
- if i % 2 == 0:
- season_score_1996.append(total_1996[i]['百回合得分'])
- season_loss_1996.append(-total_1996[i]['百回合失分'])
- else:
- playoff_score_1996.append(total_1996[i]['百回合得分'])
- playoff_loss_1996.append(-total_1996[i]['百回合失分'])
- season_score_1996.append(107.523563)
- season_loss_1996.append(-99.497880)
- playoff_score_1996.append(100.551875)
- playoff_loss_1996.append(-104.907100)
- change1996 = pd.DataFrame({'season_score': season_score_1996, 'season_loss': season_loss_1996,
- 'playoff_score': playoff_score_1996, 'playoff_loss': playoff_loss_1996})
- change1996[['season_score', 'playoff_score']].plot(kind='bar')
- plt.xticks(np.arange(4), ['MIA', 'NYK', 'ORL', 'SEA'])
- plt.xlabel('Team')
- plt.ylabel('Score per 100 Round')
- change1996[['season_loss', 'playoff_loss']].plot(kind='bar')
- plt.xticks(np.arange(4), ['MIA', 'NYK', 'ORL', 'SEA'])
- plt.xlabel('Team')
- plt.ylabel('Loss per 100 Round')
- season1996 = team_season[team_season['赛季'] == 1996]
- season1996_score = season1996['百回合得分'].groupby(season1996['球队']).mean()
- season1996_loss = season1996['百回合失分'].groupby(season1996['球队']).mean()
- season1996_average = pd.concat([season1996_score, season1996_loss], axis=1).T
- season1996_average['SEA'] = [107.523563, 99.497880]
- season1996_average = season1996_average.T
- season1996_average['得失分比'] = season1996_average['百回合得分'] / season1996_average['百回合失分']
- season1996_average.sort_values(by='得失分比', ascending=False)
- compare1996 = pd.DataFrame([season_score_1996, season_loss_1996, playoff_score_1996, playoff_loss_1996],
- index=['season score', 'season loss', 'playoff score', 'playoff loss'],
- columns=['MIA', 'NYK', 'ORL', 'SEA']).T
- compare1996['season ratio'] = compare1996['season score'] / (-1 * compare1996['season loss'])
- compare1996['playoff ratio'] = compare1996['playoff score'] / (-1 * compare1996['playoff loss'])
- compare1996
- LAL2001
- POR_season_2001 = team_season[(team_season['球队'] == 'POR') & (team_season['赛季'] == 2001)]
- POR_season_average_2001 = POR_season_2001.mean()
- POR_playoff_2001 = team_playoff[(team_playoff['球队'] == 'POR') & (team_playoff['赛季'] == 2001)].tail(3)
- POR_playoff_average_2001 = POR_playoff_2001.mean()
- SAC_season_2001 = team_season[(team_season['球队'] == 'SAC') & (team_season['赛季'] == 2001)]
- SAC_season_average_2001 = SAC_season_2001.mean()
- SAC_playoff_2001 = team_playoff[(team_playoff['球队'] == 'SAC') & (team_playoff['赛季'] == 2001)].tail(4)
- SAC_playoff_average_2001 = SAC_playoff_2001.mean()
- SAS_season_2001 = team_season[(team_season['球队'] == 'SAS') & (team_season['赛季'] == 2001)]
- SAS_season_average_2001 = SAS_season_2001.mean()
- SAS_playoff_2001 = team_playoff[(team_playoff['球队'] == 'SAS') & (team_playoff['赛季'] == 2001)].tail(4)
- SAS_playoff_average_2001 = SAS_playoff_2001.mean()
- PHI_season_2001 = team_season[(team_season['球队'] == 'PHI') & (team_season['赛季'] == 2001)]
- PHI_season_average_2001 = PHI_season_2001.mean()
- PHI_playoff_2001 = team_playoff[(team_playoff['球队'] == 'PHI') & (team_playoff['赛季'] == 2001)].tail(5)
- PHI_playoff_average_2001 = PHI_playoff_2001.mean()
- total_2001 = [POR_season_average_2001, POR_playoff_average_2001, SAC_season_average_2001, SAC_playoff_average_2001,
- SAS_season_average_2001, SAS_playoff_average_2001, PHI_season_average_2001, PHI_playoff_average_2001]
- season_score_2001 = []
- season_loss_2001 = []
- playoff_score_2001 = []
- playoff_loss_2001 = []
- for i in range(len(total_2001)):
- if i % 2 == 0:
- season_score_2001.append(total_2001[i]['百回合得分'])
- season_loss_2001.append(-total_2001[i]['百回合失分'])
- else:
- playoff_score_2001.append(total_2001[i]['百回合得分'])
- playoff_loss_2001.append(-total_2001[i]['百回合失分'])
- change2001 = pd.DataFrame({'season_score': season_score_2001, 'season_loss': season_loss_2001,
- 'playoff_score': playoff_score_2001, 'playoff_loss': playoff_loss_2001})
- change2001[['season_score', 'playoff_score']].plot(kind='bar')
- plt.xticks(np.arange(4), ['POR', 'SAC', 'SAS', 'PHI'])
- plt.xlabel('Team')
- plt.ylabel('Score per 100 Round')
- change2001[['season_loss', 'playoff_loss']].plot(kind='bar')
- plt.xticks(np.arange(4), ['POR', 'SAC', 'SAS', 'PHI'])
- plt.xlabel('Team')
- plt.ylabel('Loss per 100 Round')
- season2001 = team_season[team_season['赛季'] == 2001]
- season2001_score = season2001['百回合得分'].groupby(season2001['球队']).mean()
- season2001_loss = season2001['百回合失分'].groupby(season2001['球队']).mean()
- season2001_average = pd.concat([season2001_score, season2001_loss], axis=1)
- season2001_average['得失分比'] = season2001_average['百回合得分'] / season2001_average['百回合失分']
- season2001_average.sort_values(by='得失分比', ascending=False)
- compare2001 = pd.DataFrame([season_score_2001, season_loss_2001, playoff_score_2001, playoff_loss_2001],
- index=['season score', 'season loss', 'playoff score', 'playoff loss'],
- columns=['POR', 'SAC', 'SAS', 'PHI']).T
- compare2001['season ratio'] = compare2001['season score'] / (-1 * compare2001['season loss'])
- compare2001['playoff ratio'] = compare2001['playoff score'] / (-1 * compare2001['playoff loss'])
- compare2001
|