liuyuqi-dellpc 3 years ago
parent
commit
773fe5e982
1 changed files with 136 additions and 41 deletions
  1. 136 41
      notebook/驾考分析.ipynb

+ 136 - 41
notebook/驾考分析.ipynb

@@ -1,79 +1,153 @@
 {
- "metadata": {
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.0-final"
-  },
-  "orig_nbformat": 2,
-  "kernelspec": {
-   "name": "python_defaultSpec_1599503433913",
-   "display_name": "Python 3.6.0 64-bit ('root': conda)"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2,
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd \n",
     "import numpy as np \n",
     "import matplotlib.pyplot as plt\n",
-    "import os,sys,re,time\n",
-    "\n"
+    "import os,sys,re,time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df =pd.read_csv(\"../data/res.csv\",header=None)\n",
+    "df=df.drop([0,1,2],axis=1).reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 删除重复的\n",
+    "df=df.drop_duplicates()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 47,
    "metadata": {
     "tags": []
    },
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
-     "text": "0   2015-07-01\n1   2015-07-01\nName: 3, dtype: datetime64[ns]\n"
+     "output_type": "stream",
+     "text": [
+      "   index          3            4   5  6  7  8\n",
+      "0      0 2015-07-01  科目一第01考试点申通   3  0  0  0\n",
+      "1      1 2015-07-01  科目一第05考试点马陆  83  0  0  0\n",
+      "2      2 2015-07-01  科目一第09考试点刘行  16  0  0  0\n",
+      "3      3 2015-07-01  科目一第50考试点银都   0  0  0  2\n",
+      "4      4 2015-07-01  科目一第16考试点福赐   3  0  0  0\n"
+     ]
     }
    ],
    "source": [
-    "df =pd.read_csv(\"data/res.csv\",header=None)\n",
-    "df2= pd.to_datetime(df[3],format='%Y%m%d')\n",
+    "df[3]= pd.to_datetime(df[3],format='%Y%m%d')\n",
+    "# df[5]=df[5].replace(\"--\",0).apply(pd.to_numeric)\n",
+    "df[5]=df[5].replace(\"--\",0).astype(int)  # 科目一\n",
+    "df[6]=df[6].replace(\"--\",0).astype(int)\n",
+    "df[7]=df[7].replace(\"--\",0).astype(int)\n",
+    "df[8]=df[8].replace(\"--\",0).astype(int)\n",
+    "print(df.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 聚合函数统计\n",
     "\n",
-    "print(df2.head(2))"
+    "可以看到 科目1,2,3,4等,上海总共有249各考点。 \n",
+    "\n",
+    "前十考点:\n",
+    "第01考点市中心, 第08考点大众 , 第07考点马陆 , 第02考点安技 , 第22考点恒通 , 第04考点小昆山, 第27考点和悦 , 第17考点邮佳 , 第11考点刘行 ,   三分所\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "            4  index     3     5     6     7     8\n",
+      "61   第01考点市中心   2107  2107  2107  2107  2107  2107\n",
+      "87    第08考点大众   1372  1372  1372  1372  1372  1372\n",
+      "84    第07考点马陆   1288  1288  1288  1288  1288  1288\n",
+      "65    第02考点安技   1233  1233  1233  1233  1233  1233\n",
+      "130   第22考点恒通   1229  1229  1229  1229  1229  1229\n",
+      "71   第04考点小昆山   1148  1148  1148  1148  1148  1148\n",
+      "146   第27考点和悦   1140  1140  1140  1140  1140  1140\n",
+      "115   第17考点邮佳   1089  1089  1089  1089  1089  1089\n",
+      "96    第11考点刘行   1049  1049  1049  1049  1049  1049\n",
+      "3         三分所   1047  1047  1047  1047  1047  1047\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 统计多少个考点\n",
+    "# df[4].apply(lambda x:x[:]).tolist()\n",
+    "df[4]=df[4].str.replace('科目一','')\n",
+    "df[4]=df[4].str.replace('科目二','')\n",
+    "df[4]=df[4].str.replace('科目三','')\n",
+    "df_line = df.groupby(4).count().reset_index().sort_values(by=[5],ascending=False)\n",
+    "print(df_line.head(10))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 71,
    "metadata": {
     "tags": []
    },
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
-     "text": "index         3            4   5   6   7   8\n0      0  20150701  科目一第01考试点申通   3  --  --  --\n1      1  20150701  科目一第05考试点马陆  83  --  --  --\n"
+     "output_type": "stream",
+     "text": [
+      "            index     4     5     6     7     8\n",
+      "3                                              \n",
+      "2017-07-31   1618  1618  1618  1618  1618  1618\n",
+      "2018-08-31    903   903   903   903   903   903\n",
+      "2016-08-31    847   847   847   847   847   847\n",
+      "2017-04-01    744   744   744   744   744   744\n",
+      "2017-04-05    732   732   732   732   732   732\n",
+      "...           ...   ...   ...   ...   ...   ...\n",
+      "2019-08-18      1     1     1     1     1     1\n",
+      "2019-09-14      1     1     1     1     1     1\n",
+      "2020-08-09      1     1     1     1     1     1\n",
+      "2019-10-02      1     1     1     1     1     1\n",
+      "2020-08-30      1     1     1     1     1     1\n",
+      "\n",
+      "[969 rows x 6 columns]\n"
+     ]
     }
    ],
    "source": [
-    "df2=df.drop([0,1,2],axis=1).reset_index()\n",
-    "\n",
-    "print(df2.head(2))\n",
-    "\n",
-    "\n",
-    "# df_all = df_all[df_all['purchase_num'].str.contains('人付款')] \n"
+    "print(df.groupby(3).count().sort_values(by=[4],ascending=False))  # select date,sum(1),sum(2),sum(3) from car group by date"
    ]
   },
   {
@@ -111,5 +185,26 @@
    "outputs": [],
    "source": []
   }
- ]
-}
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.6.0 64-bit ('root': conda)",
+   "language": "python",
+   "name": "python36064bitrootconda12dcd85ef9c147fdbdf4c10492696076"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}