|
@@ -1,79 +1,153 @@
|
|
|
{
|
|
|
- "metadata": {
|
|
|
- "language_info": {
|
|
|
- "codemirror_mode": {
|
|
|
- "name": "ipython",
|
|
|
- "version": 3
|
|
|
- },
|
|
|
- "file_extension": ".py",
|
|
|
- "mimetype": "text/x-python",
|
|
|
- "name": "python",
|
|
|
- "nbconvert_exporter": "python",
|
|
|
- "pygments_lexer": "ipython3",
|
|
|
- "version": "3.6.0-final"
|
|
|
- },
|
|
|
- "orig_nbformat": 2,
|
|
|
- "kernelspec": {
|
|
|
- "name": "python_defaultSpec_1599503433913",
|
|
|
- "display_name": "Python 3.6.0 64-bit ('root': conda)"
|
|
|
- }
|
|
|
- },
|
|
|
- "nbformat": 4,
|
|
|
- "nbformat_minor": 2,
|
|
|
"cells": [
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 3,
|
|
|
+ "execution_count": 1,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"import pandas as pd \n",
|
|
|
"import numpy as np \n",
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
- "import os,sys,re,time\n",
|
|
|
- "\n"
|
|
|
+ "import os,sys,re,time"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 36,
|
|
|
+ "metadata": {
|
|
|
+ "tags": []
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df =pd.read_csv(\"../data/res.csv\",header=None)\n",
|
|
|
+ "df=df.drop([0,1,2],axis=1).reset_index()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 37,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 删除重复的\n",
|
|
|
+ "df=df.drop_duplicates()"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 26,
|
|
|
+ "execution_count": 47,
|
|
|
"metadata": {
|
|
|
"tags": []
|
|
|
},
|
|
|
"outputs": [
|
|
|
{
|
|
|
- "output_type": "stream",
|
|
|
"name": "stdout",
|
|
|
- "text": "0 2015-07-01\n1 2015-07-01\nName: 3, dtype: datetime64[ns]\n"
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " index 3 4 5 6 7 8\n",
|
|
|
+ "0 0 2015-07-01 科目一第01考试点申通 3 0 0 0\n",
|
|
|
+ "1 1 2015-07-01 科目一第05考试点马陆 83 0 0 0\n",
|
|
|
+ "2 2 2015-07-01 科目一第09考试点刘行 16 0 0 0\n",
|
|
|
+ "3 3 2015-07-01 科目一第50考试点银都 0 0 0 2\n",
|
|
|
+ "4 4 2015-07-01 科目一第16考试点福赐 3 0 0 0\n"
|
|
|
+ ]
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "df =pd.read_csv(\"data/res.csv\",header=None)\n",
|
|
|
- "df2= pd.to_datetime(df[3],format='%Y%m%d')\n",
|
|
|
+ "df[3]= pd.to_datetime(df[3],format='%Y%m%d')\n",
|
|
|
+ "# df[5]=df[5].replace(\"--\",0).apply(pd.to_numeric)\n",
|
|
|
+ "df[5]=df[5].replace(\"--\",0).astype(int) # 科目一\n",
|
|
|
+ "df[6]=df[6].replace(\"--\",0).astype(int)\n",
|
|
|
+ "df[7]=df[7].replace(\"--\",0).astype(int)\n",
|
|
|
+ "df[8]=df[8].replace(\"--\",0).astype(int)\n",
|
|
|
+ "print(df.head())"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## 聚合函数统计\n",
|
|
|
"\n",
|
|
|
- "print(df2.head(2))"
|
|
|
+ "可以看到 科目1,2,3,4等,上海总共有249各考点。 \n",
|
|
|
+ "\n",
|
|
|
+ "前十考点:\n",
|
|
|
+ "第01考点市中心, 第08考点大众 , 第07考点马陆 , 第02考点安技 , 第22考点恒通 , 第04考点小昆山, 第27考点和悦 , 第17考点邮佳 , 第11考点刘行 , 三分所\n",
|
|
|
+ "\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 70,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " 4 index 3 5 6 7 8\n",
|
|
|
+ "61 第01考点市中心 2107 2107 2107 2107 2107 2107\n",
|
|
|
+ "87 第08考点大众 1372 1372 1372 1372 1372 1372\n",
|
|
|
+ "84 第07考点马陆 1288 1288 1288 1288 1288 1288\n",
|
|
|
+ "65 第02考点安技 1233 1233 1233 1233 1233 1233\n",
|
|
|
+ "130 第22考点恒通 1229 1229 1229 1229 1229 1229\n",
|
|
|
+ "71 第04考点小昆山 1148 1148 1148 1148 1148 1148\n",
|
|
|
+ "146 第27考点和悦 1140 1140 1140 1140 1140 1140\n",
|
|
|
+ "115 第17考点邮佳 1089 1089 1089 1089 1089 1089\n",
|
|
|
+ "96 第11考点刘行 1049 1049 1049 1049 1049 1049\n",
|
|
|
+ "3 三分所 1047 1047 1047 1047 1047 1047\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# 统计多少个考点\n",
|
|
|
+ "# df[4].apply(lambda x:x[:]).tolist()\n",
|
|
|
+ "df[4]=df[4].str.replace('科目一','')\n",
|
|
|
+ "df[4]=df[4].str.replace('科目二','')\n",
|
|
|
+ "df[4]=df[4].str.replace('科目三','')\n",
|
|
|
+ "df_line = df.groupby(4).count().reset_index().sort_values(by=[5],ascending=False)\n",
|
|
|
+ "print(df_line.head(10))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## "
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 21,
|
|
|
+ "execution_count": 71,
|
|
|
"metadata": {
|
|
|
"tags": []
|
|
|
},
|
|
|
"outputs": [
|
|
|
{
|
|
|
- "output_type": "stream",
|
|
|
"name": "stdout",
|
|
|
- "text": "index 3 4 5 6 7 8\n0 0 20150701 科目一第01考试点申通 3 -- -- --\n1 1 20150701 科目一第05考试点马陆 83 -- -- --\n"
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " index 4 5 6 7 8\n",
|
|
|
+ "3 \n",
|
|
|
+ "2017-07-31 1618 1618 1618 1618 1618 1618\n",
|
|
|
+ "2018-08-31 903 903 903 903 903 903\n",
|
|
|
+ "2016-08-31 847 847 847 847 847 847\n",
|
|
|
+ "2017-04-01 744 744 744 744 744 744\n",
|
|
|
+ "2017-04-05 732 732 732 732 732 732\n",
|
|
|
+ "... ... ... ... ... ... ...\n",
|
|
|
+ "2019-08-18 1 1 1 1 1 1\n",
|
|
|
+ "2019-09-14 1 1 1 1 1 1\n",
|
|
|
+ "2020-08-09 1 1 1 1 1 1\n",
|
|
|
+ "2019-10-02 1 1 1 1 1 1\n",
|
|
|
+ "2020-08-30 1 1 1 1 1 1\n",
|
|
|
+ "\n",
|
|
|
+ "[969 rows x 6 columns]\n"
|
|
|
+ ]
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "df2=df.drop([0,1,2],axis=1).reset_index()\n",
|
|
|
- "\n",
|
|
|
- "print(df2.head(2))\n",
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- "# df_all = df_all[df_all['purchase_num'].str.contains('人付款')] \n"
|
|
|
+ "print(df.groupby(3).count().sort_values(by=[4],ascending=False)) # select date,sum(1),sum(2),sum(3) from car group by date"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -111,5 +185,26 @@
|
|
|
"outputs": [],
|
|
|
"source": []
|
|
|
}
|
|
|
- ]
|
|
|
-}
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3.6.0 64-bit ('root': conda)",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python36064bitrootconda12dcd85ef9c147fdbdf4c10492696076"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.6.0"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 4
|
|
|
+}
|