liuyuqi-dellpc 6 years ago
commit
8d63399513
8 changed files with 1702 additions and 0 deletions
  1. 1 0
      .gitignore
  2. 1059 0
      car-R.ipynb
  3. 262 0
      car-py.ipynb
  4. 380 0
      output/car-py.md
  5. BIN
      output/output_14_0.png
  6. BIN
      output/output_15_0.png
  7. BIN
      output/output_21_0.png
  8. BIN
      output/output_9_0.png

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+/.ipynb_checkpoints

+ 1059 - 0
car-R.ipynb

@@ -0,0 +1,1059 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "setwd(\"/media/sf_share/linux/car\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# vehicles <- read.csv(unz(\"vehicles.csv.zip\", \"vehicles.csv\"), stringsAs Factors = F)\n",
+    "vehicles = read.csv(\"input/vehicles.csv\",)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<thead><tr><th scope=col>barrels08</th><th scope=col>barrelsA08</th><th scope=col>charge120</th><th scope=col>charge240</th><th scope=col>city08</th><th scope=col>city08U</th><th scope=col>cityA08</th><th scope=col>cityA08U</th><th scope=col>cityCD</th><th scope=col>cityE</th><th scope=col>⋯</th><th scope=col>mfrCode</th><th scope=col>c240Dscr</th><th scope=col>charge240b</th><th scope=col>c240bDscr</th><th scope=col>createdOn</th><th scope=col>modifiedOn</th><th scope=col>startStop</th><th scope=col>phevCity</th><th scope=col>phevHwy</th><th scope=col>phevComb</th></tr></thead>\n",
+       "<tbody>\n",
+       "\t<tr><td>15.69571                    </td><td>0                           </td><td>0                           </td><td>0                           </td><td>19                          </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>⋯                           </td><td>                            </td><td>                            </td><td>0                           </td><td>                            </td><td>Tue Jan 01 00:00:00 EST 2013</td><td>Tue Jan 01 00:00:00 EST 2013</td><td>                            </td><td>0                           </td><td>0                           </td><td>0                           </td></tr>\n",
+       "\t<tr><td>29.96455                    </td><td>0                           </td><td>0                           </td><td>0                           </td><td> 9                          </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>⋯                           </td><td>                            </td><td>                            </td><td>0                           </td><td>                            </td><td>Tue Jan 01 00:00:00 EST 2013</td><td>Tue Jan 01 00:00:00 EST 2013</td><td>                            </td><td>0                           </td><td>0                           </td><td>0                           </td></tr>\n",
+       "\t<tr><td>12.20778                    </td><td>0                           </td><td>0                           </td><td>0                           </td><td>23                          </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>⋯                           </td><td>                            </td><td>                            </td><td>0                           </td><td>                            </td><td>Tue Jan 01 00:00:00 EST 2013</td><td>Tue Jan 01 00:00:00 EST 2013</td><td>                            </td><td>0                           </td><td>0                           </td><td>0                           </td></tr>\n",
+       "\t<tr><td>29.96455                    </td><td>0                           </td><td>0                           </td><td>0                           </td><td>10                          </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>⋯                           </td><td>                            </td><td>                            </td><td>0                           </td><td>                            </td><td>Tue Jan 01 00:00:00 EST 2013</td><td>Tue Jan 01 00:00:00 EST 2013</td><td>                            </td><td>0                           </td><td>0                           </td><td>0                           </td></tr>\n",
+       "\t<tr><td>17.34789                    </td><td>0                           </td><td>0                           </td><td>0                           </td><td>17                          </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>⋯                           </td><td>                            </td><td>                            </td><td>0                           </td><td>                            </td><td>Tue Jan 01 00:00:00 EST 2013</td><td>Tue Jan 01 00:00:00 EST 2013</td><td>                            </td><td>0                           </td><td>0                           </td><td>0                           </td></tr>\n",
+       "\t<tr><td>14.98227                    </td><td>0                           </td><td>0                           </td><td>0                           </td><td>21                          </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>0                           </td><td>⋯                           </td><td>                            </td><td>                            </td><td>0                           </td><td>                            </td><td>Tue Jan 01 00:00:00 EST 2013</td><td>Tue Jan 01 00:00:00 EST 2013</td><td>                            </td><td>0                           </td><td>0                           </td><td>0                           </td></tr>\n",
+       "</tbody>\n",
+       "</table>\n"
+      ],
+      "text/latex": [
+       "\\begin{tabular}{r|lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll}\n",
+       " barrels08 & barrelsA08 & charge120 & charge240 & city08 & city08U & cityA08 & cityA08U & cityCD & cityE & ⋯ & mfrCode & c240Dscr & charge240b & c240bDscr & createdOn & modifiedOn & startStop & phevCity & phevHwy & phevComb\\\\\n",
+       "\\hline\n",
+       "\t 15.69571                     & 0                            & 0                            & 0                            & 19                           & 0                            & 0                            & 0                            & 0                            & 0                            & ⋯                            &                              &                              & 0                            &                              & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 &                              & 0                            & 0                            & 0                           \\\\\n",
+       "\t 29.96455                     & 0                            & 0                            & 0                            &  9                           & 0                            & 0                            & 0                            & 0                            & 0                            & ⋯                            &                              &                              & 0                            &                              & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 &                              & 0                            & 0                            & 0                           \\\\\n",
+       "\t 12.20778                     & 0                            & 0                            & 0                            & 23                           & 0                            & 0                            & 0                            & 0                            & 0                            & ⋯                            &                              &                              & 0                            &                              & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 &                              & 0                            & 0                            & 0                           \\\\\n",
+       "\t 29.96455                     & 0                            & 0                            & 0                            & 10                           & 0                            & 0                            & 0                            & 0                            & 0                            & ⋯                            &                              &                              & 0                            &                              & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 &                              & 0                            & 0                            & 0                           \\\\\n",
+       "\t 17.34789                     & 0                            & 0                            & 0                            & 17                           & 0                            & 0                            & 0                            & 0                            & 0                            & ⋯                            &                              &                              & 0                            &                              & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 &                              & 0                            & 0                            & 0                           \\\\\n",
+       "\t 14.98227                     & 0                            & 0                            & 0                            & 21                           & 0                            & 0                            & 0                            & 0                            & 0                            & ⋯                            &                              &                              & 0                            &                              & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 &                              & 0                            & 0                            & 0                           \\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/markdown": [
+       "\n",
+       "barrels08 | barrelsA08 | charge120 | charge240 | city08 | city08U | cityA08 | cityA08U | cityCD | cityE | ⋯ | mfrCode | c240Dscr | charge240b | c240bDscr | createdOn | modifiedOn | startStop | phevCity | phevHwy | phevComb | \n",
+       "|---|---|---|---|---|---|\n",
+       "| 15.69571                     | 0                            | 0                            | 0                            | 19                           | 0                            | 0                            | 0                            | 0                            | 0                            | ⋯                            |                              |                              | 0                            |                              | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 |                              | 0                            | 0                            | 0                            | \n",
+       "| 29.96455                     | 0                            | 0                            | 0                            |  9                           | 0                            | 0                            | 0                            | 0                            | 0                            | ⋯                            |                              |                              | 0                            |                              | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 |                              | 0                            | 0                            | 0                            | \n",
+       "| 12.20778                     | 0                            | 0                            | 0                            | 23                           | 0                            | 0                            | 0                            | 0                            | 0                            | ⋯                            |                              |                              | 0                            |                              | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 |                              | 0                            | 0                            | 0                            | \n",
+       "| 29.96455                     | 0                            | 0                            | 0                            | 10                           | 0                            | 0                            | 0                            | 0                            | 0                            | ⋯                            |                              |                              | 0                            |                              | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 |                              | 0                            | 0                            | 0                            | \n",
+       "| 17.34789                     | 0                            | 0                            | 0                            | 17                           | 0                            | 0                            | 0                            | 0                            | 0                            | ⋯                            |                              |                              | 0                            |                              | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 |                              | 0                            | 0                            | 0                            | \n",
+       "| 14.98227                     | 0                            | 0                            | 0                            | 21                           | 0                            | 0                            | 0                            | 0                            | 0                            | ⋯                            |                              |                              | 0                            |                              | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 |                              | 0                            | 0                            | 0                            | \n",
+       "\n",
+       "\n"
+      ],
+      "text/plain": [
+       "  barrels08 barrelsA08 charge120 charge240 city08 city08U cityA08 cityA08U\n",
+       "1 15.69571  0          0         0         19     0       0       0       \n",
+       "2 29.96455  0          0         0          9     0       0       0       \n",
+       "3 12.20778  0          0         0         23     0       0       0       \n",
+       "4 29.96455  0          0         0         10     0       0       0       \n",
+       "5 17.34789  0          0         0         17     0       0       0       \n",
+       "6 14.98227  0          0         0         21     0       0       0       \n",
+       "  cityCD cityE ⋯ mfrCode c240Dscr charge240b c240bDscr\n",
+       "1 0      0     ⋯                  0                   \n",
+       "2 0      0     ⋯                  0                   \n",
+       "3 0      0     ⋯                  0                   \n",
+       "4 0      0     ⋯                  0                   \n",
+       "5 0      0     ⋯                  0                   \n",
+       "6 0      0     ⋯                  0                   \n",
+       "  createdOn                    modifiedOn                   startStop phevCity\n",
+       "1 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013           0       \n",
+       "2 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013           0       \n",
+       "3 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013           0       \n",
+       "4 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013           0       \n",
+       "5 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013           0       \n",
+       "6 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013           0       \n",
+       "  phevHwy phevComb\n",
+       "1 0       0       \n",
+       "2 0       0       \n",
+       "3 0       0       \n",
+       "4 0       0       \n",
+       "5 0       0       \n",
+       "6 0       0       "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "39270"
+      ],
+      "text/latex": [
+       "39270"
+      ],
+      "text/markdown": [
+       "39270"
+      ],
+      "text/plain": [
+       "[1] 39270"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "83"
+      ],
+      "text/latex": [
+       "83"
+      ],
+      "text/markdown": [
+       "83"
+      ],
+      "text/plain": [
+       "[1] 83"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<ol class=list-inline>\n",
+       "\t<li>'barrels08'</li>\n",
+       "\t<li>'barrelsA08'</li>\n",
+       "\t<li>'charge120'</li>\n",
+       "\t<li>'charge240'</li>\n",
+       "\t<li>'city08'</li>\n",
+       "\t<li>'city08U'</li>\n",
+       "\t<li>'cityA08'</li>\n",
+       "\t<li>'cityA08U'</li>\n",
+       "\t<li>'cityCD'</li>\n",
+       "\t<li>'cityE'</li>\n",
+       "\t<li>'cityUF'</li>\n",
+       "\t<li>'co2'</li>\n",
+       "\t<li>'co2A'</li>\n",
+       "\t<li>'co2TailpipeAGpm'</li>\n",
+       "\t<li>'co2TailpipeGpm'</li>\n",
+       "\t<li>'comb08'</li>\n",
+       "\t<li>'comb08U'</li>\n",
+       "\t<li>'combA08'</li>\n",
+       "\t<li>'combA08U'</li>\n",
+       "\t<li>'combE'</li>\n",
+       "\t<li>'combinedCD'</li>\n",
+       "\t<li>'combinedUF'</li>\n",
+       "\t<li>'cylinders'</li>\n",
+       "\t<li>'displ'</li>\n",
+       "\t<li>'drive'</li>\n",
+       "\t<li>'engId'</li>\n",
+       "\t<li>'eng_dscr'</li>\n",
+       "\t<li>'feScore'</li>\n",
+       "\t<li>'fuelCost08'</li>\n",
+       "\t<li>'fuelCostA08'</li>\n",
+       "\t<li>'fuelType'</li>\n",
+       "\t<li>'fuelType1'</li>\n",
+       "\t<li>'ghgScore'</li>\n",
+       "\t<li>'ghgScoreA'</li>\n",
+       "\t<li>'highway08'</li>\n",
+       "\t<li>'highway08U'</li>\n",
+       "\t<li>'highwayA08'</li>\n",
+       "\t<li>'highwayA08U'</li>\n",
+       "\t<li>'highwayCD'</li>\n",
+       "\t<li>'highwayE'</li>\n",
+       "\t<li>'highwayUF'</li>\n",
+       "\t<li>'hlv'</li>\n",
+       "\t<li>'hpv'</li>\n",
+       "\t<li>'id'</li>\n",
+       "\t<li>'lv2'</li>\n",
+       "\t<li>'lv4'</li>\n",
+       "\t<li>'make'</li>\n",
+       "\t<li>'model'</li>\n",
+       "\t<li>'mpgData'</li>\n",
+       "\t<li>'phevBlended'</li>\n",
+       "\t<li>'pv2'</li>\n",
+       "\t<li>'pv4'</li>\n",
+       "\t<li>'range'</li>\n",
+       "\t<li>'rangeCity'</li>\n",
+       "\t<li>'rangeCityA'</li>\n",
+       "\t<li>'rangeHwy'</li>\n",
+       "\t<li>'rangeHwyA'</li>\n",
+       "\t<li>'trany'</li>\n",
+       "\t<li>'UCity'</li>\n",
+       "\t<li>'UCityA'</li>\n",
+       "\t<li>'UHighway'</li>\n",
+       "\t<li>'UHighwayA'</li>\n",
+       "\t<li>'VClass'</li>\n",
+       "\t<li>'year'</li>\n",
+       "\t<li>'youSaveSpend'</li>\n",
+       "\t<li>'guzzler'</li>\n",
+       "\t<li>'trans_dscr'</li>\n",
+       "\t<li>'tCharger'</li>\n",
+       "\t<li>'sCharger'</li>\n",
+       "\t<li>'atvType'</li>\n",
+       "\t<li>'fuelType2'</li>\n",
+       "\t<li>'rangeA'</li>\n",
+       "\t<li>'evMotor'</li>\n",
+       "\t<li>'mfrCode'</li>\n",
+       "\t<li>'c240Dscr'</li>\n",
+       "\t<li>'charge240b'</li>\n",
+       "\t<li>'c240bDscr'</li>\n",
+       "\t<li>'createdOn'</li>\n",
+       "\t<li>'modifiedOn'</li>\n",
+       "\t<li>'startStop'</li>\n",
+       "\t<li>'phevCity'</li>\n",
+       "\t<li>'phevHwy'</li>\n",
+       "\t<li>'phevComb'</li>\n",
+       "</ol>\n"
+      ],
+      "text/latex": [
+       "\\begin{enumerate*}\n",
+       "\\item 'barrels08'\n",
+       "\\item 'barrelsA08'\n",
+       "\\item 'charge120'\n",
+       "\\item 'charge240'\n",
+       "\\item 'city08'\n",
+       "\\item 'city08U'\n",
+       "\\item 'cityA08'\n",
+       "\\item 'cityA08U'\n",
+       "\\item 'cityCD'\n",
+       "\\item 'cityE'\n",
+       "\\item 'cityUF'\n",
+       "\\item 'co2'\n",
+       "\\item 'co2A'\n",
+       "\\item 'co2TailpipeAGpm'\n",
+       "\\item 'co2TailpipeGpm'\n",
+       "\\item 'comb08'\n",
+       "\\item 'comb08U'\n",
+       "\\item 'combA08'\n",
+       "\\item 'combA08U'\n",
+       "\\item 'combE'\n",
+       "\\item 'combinedCD'\n",
+       "\\item 'combinedUF'\n",
+       "\\item 'cylinders'\n",
+       "\\item 'displ'\n",
+       "\\item 'drive'\n",
+       "\\item 'engId'\n",
+       "\\item 'eng\\_dscr'\n",
+       "\\item 'feScore'\n",
+       "\\item 'fuelCost08'\n",
+       "\\item 'fuelCostA08'\n",
+       "\\item 'fuelType'\n",
+       "\\item 'fuelType1'\n",
+       "\\item 'ghgScore'\n",
+       "\\item 'ghgScoreA'\n",
+       "\\item 'highway08'\n",
+       "\\item 'highway08U'\n",
+       "\\item 'highwayA08'\n",
+       "\\item 'highwayA08U'\n",
+       "\\item 'highwayCD'\n",
+       "\\item 'highwayE'\n",
+       "\\item 'highwayUF'\n",
+       "\\item 'hlv'\n",
+       "\\item 'hpv'\n",
+       "\\item 'id'\n",
+       "\\item 'lv2'\n",
+       "\\item 'lv4'\n",
+       "\\item 'make'\n",
+       "\\item 'model'\n",
+       "\\item 'mpgData'\n",
+       "\\item 'phevBlended'\n",
+       "\\item 'pv2'\n",
+       "\\item 'pv4'\n",
+       "\\item 'range'\n",
+       "\\item 'rangeCity'\n",
+       "\\item 'rangeCityA'\n",
+       "\\item 'rangeHwy'\n",
+       "\\item 'rangeHwyA'\n",
+       "\\item 'trany'\n",
+       "\\item 'UCity'\n",
+       "\\item 'UCityA'\n",
+       "\\item 'UHighway'\n",
+       "\\item 'UHighwayA'\n",
+       "\\item 'VClass'\n",
+       "\\item 'year'\n",
+       "\\item 'youSaveSpend'\n",
+       "\\item 'guzzler'\n",
+       "\\item 'trans\\_dscr'\n",
+       "\\item 'tCharger'\n",
+       "\\item 'sCharger'\n",
+       "\\item 'atvType'\n",
+       "\\item 'fuelType2'\n",
+       "\\item 'rangeA'\n",
+       "\\item 'evMotor'\n",
+       "\\item 'mfrCode'\n",
+       "\\item 'c240Dscr'\n",
+       "\\item 'charge240b'\n",
+       "\\item 'c240bDscr'\n",
+       "\\item 'createdOn'\n",
+       "\\item 'modifiedOn'\n",
+       "\\item 'startStop'\n",
+       "\\item 'phevCity'\n",
+       "\\item 'phevHwy'\n",
+       "\\item 'phevComb'\n",
+       "\\end{enumerate*}\n"
+      ],
+      "text/markdown": [
+       "1. 'barrels08'\n",
+       "2. 'barrelsA08'\n",
+       "3. 'charge120'\n",
+       "4. 'charge240'\n",
+       "5. 'city08'\n",
+       "6. 'city08U'\n",
+       "7. 'cityA08'\n",
+       "8. 'cityA08U'\n",
+       "9. 'cityCD'\n",
+       "10. 'cityE'\n",
+       "11. 'cityUF'\n",
+       "12. 'co2'\n",
+       "13. 'co2A'\n",
+       "14. 'co2TailpipeAGpm'\n",
+       "15. 'co2TailpipeGpm'\n",
+       "16. 'comb08'\n",
+       "17. 'comb08U'\n",
+       "18. 'combA08'\n",
+       "19. 'combA08U'\n",
+       "20. 'combE'\n",
+       "21. 'combinedCD'\n",
+       "22. 'combinedUF'\n",
+       "23. 'cylinders'\n",
+       "24. 'displ'\n",
+       "25. 'drive'\n",
+       "26. 'engId'\n",
+       "27. 'eng_dscr'\n",
+       "28. 'feScore'\n",
+       "29. 'fuelCost08'\n",
+       "30. 'fuelCostA08'\n",
+       "31. 'fuelType'\n",
+       "32. 'fuelType1'\n",
+       "33. 'ghgScore'\n",
+       "34. 'ghgScoreA'\n",
+       "35. 'highway08'\n",
+       "36. 'highway08U'\n",
+       "37. 'highwayA08'\n",
+       "38. 'highwayA08U'\n",
+       "39. 'highwayCD'\n",
+       "40. 'highwayE'\n",
+       "41. 'highwayUF'\n",
+       "42. 'hlv'\n",
+       "43. 'hpv'\n",
+       "44. 'id'\n",
+       "45. 'lv2'\n",
+       "46. 'lv4'\n",
+       "47. 'make'\n",
+       "48. 'model'\n",
+       "49. 'mpgData'\n",
+       "50. 'phevBlended'\n",
+       "51. 'pv2'\n",
+       "52. 'pv4'\n",
+       "53. 'range'\n",
+       "54. 'rangeCity'\n",
+       "55. 'rangeCityA'\n",
+       "56. 'rangeHwy'\n",
+       "57. 'rangeHwyA'\n",
+       "58. 'trany'\n",
+       "59. 'UCity'\n",
+       "60. 'UCityA'\n",
+       "61. 'UHighway'\n",
+       "62. 'UHighwayA'\n",
+       "63. 'VClass'\n",
+       "64. 'year'\n",
+       "65. 'youSaveSpend'\n",
+       "66. 'guzzler'\n",
+       "67. 'trans_dscr'\n",
+       "68. 'tCharger'\n",
+       "69. 'sCharger'\n",
+       "70. 'atvType'\n",
+       "71. 'fuelType2'\n",
+       "72. 'rangeA'\n",
+       "73. 'evMotor'\n",
+       "74. 'mfrCode'\n",
+       "75. 'c240Dscr'\n",
+       "76. 'charge240b'\n",
+       "77. 'c240bDscr'\n",
+       "78. 'createdOn'\n",
+       "79. 'modifiedOn'\n",
+       "80. 'startStop'\n",
+       "81. 'phevCity'\n",
+       "82. 'phevHwy'\n",
+       "83. 'phevComb'\n",
+       "\n",
+       "\n"
+      ],
+      "text/plain": [
+       " [1] \"barrels08\"       \"barrelsA08\"      \"charge120\"       \"charge240\"      \n",
+       " [5] \"city08\"          \"city08U\"         \"cityA08\"         \"cityA08U\"       \n",
+       " [9] \"cityCD\"          \"cityE\"           \"cityUF\"          \"co2\"            \n",
+       "[13] \"co2A\"            \"co2TailpipeAGpm\" \"co2TailpipeGpm\"  \"comb08\"         \n",
+       "[17] \"comb08U\"         \"combA08\"         \"combA08U\"        \"combE\"          \n",
+       "[21] \"combinedCD\"      \"combinedUF\"      \"cylinders\"       \"displ\"          \n",
+       "[25] \"drive\"           \"engId\"           \"eng_dscr\"        \"feScore\"        \n",
+       "[29] \"fuelCost08\"      \"fuelCostA08\"     \"fuelType\"        \"fuelType1\"      \n",
+       "[33] \"ghgScore\"        \"ghgScoreA\"       \"highway08\"       \"highway08U\"     \n",
+       "[37] \"highwayA08\"      \"highwayA08U\"     \"highwayCD\"       \"highwayE\"       \n",
+       "[41] \"highwayUF\"       \"hlv\"             \"hpv\"             \"id\"             \n",
+       "[45] \"lv2\"             \"lv4\"             \"make\"            \"model\"          \n",
+       "[49] \"mpgData\"         \"phevBlended\"     \"pv2\"             \"pv4\"            \n",
+       "[53] \"range\"           \"rangeCity\"       \"rangeCityA\"      \"rangeHwy\"       \n",
+       "[57] \"rangeHwyA\"       \"trany\"           \"UCity\"           \"UCityA\"         \n",
+       "[61] \"UHighway\"        \"UHighwayA\"       \"VClass\"          \"year\"           \n",
+       "[65] \"youSaveSpend\"    \"guzzler\"         \"trans_dscr\"      \"tCharger\"       \n",
+       "[69] \"sCharger\"        \"atvType\"         \"fuelType2\"       \"rangeA\"         \n",
+       "[73] \"evMotor\"         \"mfrCode\"         \"c240Dscr\"        \"charge240b\"     \n",
+       "[77] \"c240bDscr\"       \"createdOn\"       \"modifiedOn\"      \"startStop\"      \n",
+       "[81] \"phevCity\"        \"phevHwy\"         \"phevComb\"       "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "head(vehicles)\n",
+    "nrow(vehicles) \n",
+    "ncol(vehicles)\n",
+    "names(vehicles)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "35"
+      ],
+      "text/latex": [
+       "35"
+      ],
+      "text/markdown": [
+       "35"
+      ],
+      "text/plain": [
+       "[1] 35"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "1984"
+      ],
+      "text/latex": [
+       "1984"
+      ],
+      "text/markdown": [
+       "1984"
+      ],
+      "text/plain": [
+       "[1] 1984"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "2018"
+      ],
+      "text/latex": [
+       "2018"
+      ],
+      "text/markdown": [
+       "2018"
+      ],
+      "text/plain": [
+       "[1] 2018"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "length(unique(vehicles[, \"year\"]))\n",
+    "first_year <- min(vehicles[, \"year\"])\n",
+    "first_year\n",
+    "## 1984\n",
+    "\n",
+    "last_year <- max(vehicles[, \"year\"])\n",
+    "last_year\n",
+    "## 2014\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "35"
+      ],
+      "text/latex": [
+       "35"
+      ],
+      "text/markdown": [
+       "35"
+      ],
+      "text/plain": [
+       "[1] 35"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "length(unique(vehicles$year))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\n",
+       "           Diesel       Electricity Midgrade Gasoline       Natural Gas \n",
+       "             1103               145                90                60 \n",
+       " Premium Gasoline  Regular Gasoline \n",
+       "            10852             27020 "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "table(vehicles$fuelType1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "  vehicles$trany[vehicles$trany == \"\"] <- NA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vehicles$trany2 <- ifelse(substr(vehicles$trany, 1, 4) ==\n",
+    "   \"Auto\", \"Auto\", \"Manual\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\n",
+       "  Auto Manual \n",
+       " 26666  12593 "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "vehicles$trany <- as.factor(vehicles$trany)\n",
+    "    table(vehicles$trany2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "        year\n",
+       "sCharger 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997\n",
+       "         1964 1701 1210 1247 1130 1149 1074 1130 1116 1088  979  962  767  757\n",
+       "       S    0    0    0    0    0    4    4    2    5    5    3    5    6    5\n",
+       "        year\n",
+       "sCharger 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011\n",
+       "          800  840  826  891  949 1015 1089 1136 1067 1098 1152 1163 1089 1103\n",
+       "       S   12   12   14   20   26   29   33   30   37   28   35   19   18   25\n",
+       "        year\n",
+       "sCharger 2012 2013 2014 2015 2016 2017 2018\n",
+       "         1122 1142 1152 1214 1192 1222  995\n",
+       "       S   28   42   65   60   61   63   43"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "with(vehicles, table(sCharger, year))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "'factor'"
+      ],
+      "text/latex": [
+       "'factor'"
+      ],
+      "text/markdown": [
+       "'factor'"
+      ],
+      "text/plain": [
+       "[1] \"factor\""
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<ol class=list-inline>\n",
+       "\t<li></li>\n",
+       "\t<li>S</li>\n",
+       "</ol>\n",
+       "\n",
+       "<details>\n",
+       "\t<summary style=display:list-item;cursor:pointer>\n",
+       "\t\t<strong>Levels</strong>:\n",
+       "\t</summary>\n",
+       "\t<ol class=list-inline>\n",
+       "\t\t<li>''</li>\n",
+       "\t\t<li>'S'</li>\n",
+       "\t</ol>\n",
+       "</details>"
+      ],
+      "text/latex": [
+       "\\begin{enumerate*}\n",
+       "\\item \n",
+       "\\item S\n",
+       "\\end{enumerate*}\n",
+       "\n",
+       "\\emph{Levels}: \\begin{enumerate*}\n",
+       "\\item ''\n",
+       "\\item 'S'\n",
+       "\\end{enumerate*}\n"
+      ],
+      "text/markdown": [
+       "1. \n",
+       "2. S\n",
+       "\n",
+       "\n",
+       "\n",
+       "**Levels**: 1. ''\n",
+       "2. 'S'\n",
+       "\n",
+       "\n"
+      ],
+      "text/plain": [
+       "[1]   S\n",
+       "Levels:  S"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "'logical'"
+      ],
+      "text/latex": [
+       "'logical'"
+      ],
+      "text/markdown": [
+       "'logical'"
+      ],
+      "text/plain": [
+       "[1] \"logical\""
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<ol class=list-inline>\n",
+       "\t<li>&lt;NA&gt;</li>\n",
+       "\t<li>TRUE</li>\n",
+       "</ol>\n"
+      ],
+      "text/latex": [
+       "\\begin{enumerate*}\n",
+       "\\item <NA>\n",
+       "\\item TRUE\n",
+       "\\end{enumerate*}\n"
+      ],
+      "text/markdown": [
+       "1. &lt;NA&gt;\n",
+       "2. TRUE\n",
+       "\n",
+       "\n"
+      ],
+      "text/plain": [
+       "[1]   NA TRUE"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# sCharger\n",
+    "class(vehicles$sCharger)\n",
+    "unique(vehicles$sCharger)\n",
+    "\n",
+    "# tCharger\n",
+    "class(vehicles$tCharger)\n",
+    "unique(vehicles$tCharger)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ERROR",
+     "evalue": "Error in eval(expr, envir, enclos): could not find function \"ddply\"\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in eval(expr, envir, enclos): could not find function \"ddply\"\nTraceback:\n"
+     ]
+    }
+   ],
+   "source": [
+    "mpgByYr <- ddply(vehicles, ~year, summarise, avgMPG =\n",
+    "mean(comb08), avgHghy = mean(highway08), avgCity =\n",
+    "mean(city08))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ERROR",
+     "evalue": "Error in eval(expr, envir, enclos): could not find function \"ggplot\"\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in eval(expr, envir, enclos): could not find function \"ggplot\"\nTraceback:\n"
+     ]
+    }
+   ],
+   "source": [
+    "ggplot(mpgByYr, aes(year, avgMPG)) + geom_point() +\n",
+    "geom_smooth() + xlab(\"Year\") + ylab(\"Average MPG\") +\n",
+    "ggtitle(\"All cars\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\n",
+       "           Diesel       Electricity Midgrade Gasoline       Natural Gas \n",
+       "             1103               145                90                60 \n",
+       " Premium Gasoline  Regular Gasoline \n",
+       "            10852             27020 "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "ERROR",
+     "evalue": "Error in eval(expr, envir, enclos): could not find function \"ddply\"\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in eval(expr, envir, enclos): could not find function \"ddply\"\nTraceback:\n"
+     ]
+    }
+   ],
+   "source": [
+    "table(vehicles$fuelType1)\n",
+    "gasCars <- subset(vehicles, fuelType1 %in% c(\"Regular\n",
+    "Gasoline\", \"Premium Gasoline\", \"Midgrade Gasoline\") &\n",
+    "fuelType2 == \"\" & atvType != \"Hybrid\")\n",
+    "mpgByYr_Gas <- ddply(gasCars, ~year, summarise, avgMPG =\n",
+    "mean(comb08))\n",
+    "ggplot(mpgByYr_Gas, aes(year, avgMPG)) + geom_point() +\n",
+    "geom_smooth() + xlab(\"Year\") + ylab(\"Average MPG\") +\n",
+    "ggtitle(\"Gasoline cars\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ERROR",
+     "evalue": "Error in parse(text = x, srcfile = src): <text>:8:4: unexpected '>='\n7:    ## geom_smooth: method=\"auto\" and size of largest group is\n8:    >=\n      ^\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in parse(text = x, srcfile = src): <text>:8:4: unexpected '>='\n7:    ## geom_smooth: method=\"auto\" and size of largest group is\n8:    >=\n      ^\nTraceback:\n"
+     ]
+    }
+   ],
+   "source": [
+    "  typeof(gasCars$displ)\n",
+    "   ## \"character\"\n",
+    "   gasCars$displ <- as.numeric(gasCars$displ)\n",
+    "   ggplot(gasCars, aes(displ, comb08)) + geom_point() +\n",
+    "   geom_smooth()\n",
+    "\n",
+    "   ## geom_smooth: method=\"auto\" and size of largest group is\n",
+    "   >=1000, so using\n",
+    "   ## gam with formula: y ~ s(x, bs = \"cs\"). Use 'method = x' to\n",
+    "   change the\n",
+    "   ## smoothing method.\n",
+    "   ## Warning: Removed 2 rows containing missing values\n",
+    "   (stat_smooth).\n",
+    "   ## Warning: Removed 2 rows containing missing values\n",
+    "   (geom_point)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "  avgCarSize <- ddply(gasCars, ~year, summarise, avgDispl =\n",
+    "   mean(displ))\n",
+    "   ggplot(avgCarSize, aes(year, avgDispl)) + geom_point() +\n",
+    "   geom_smooth() + xlab(\"Year\") + ylab(\"Average engine\n",
+    "   displacement (l)\")\n",
+    "\n",
+    "   ## geom_smooth: method=\"auto\" and size of largest group is\n",
+    "   <1000, so using\n",
+    "   ## loess. Use 'method = x' to change the smoothing method.\n",
+    "   ## Warning: Removed 1 rows containing missing values\n",
+    "   (stat_smooth).\n",
+    "   ## Warning: Removed 1 rows containing missing values\n",
+    "   (geom_point)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ERROR",
+     "evalue": "Error in eval(expr, envir, enclos): could not find function \"ddply\"\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in eval(expr, envir, enclos): could not find function \"ddply\"\nTraceback:\n"
+     ]
+    }
+   ],
+   "source": [
+    "byYear <- ddply(gasCars, ~year, summarise, avgMPG =\n",
+    "mean(comb08), avgDispl = mean(displ))\n",
+    "head(byYear)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ERROR",
+     "evalue": "Error in eval(expr, envir, enclos): could not find function \"melt\"\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in eval(expr, envir, enclos): could not find function \"melt\"\nTraceback:\n"
+     ]
+    }
+   ],
+   "source": [
+    "byYear2 = melt(byYear, id = \"year\")\n",
+    "levels(byYear2$variable) <- c(\"Average MPG\", \"Avg engine\n",
+    "displacement\")\n",
+    "head(byYear2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ggplot(byYear2, aes(year, value)) + geom_point() +\n",
+    "geom_smooth() + facet_wrap(~variable, ncol = 1, scales =\n",
+    "\"free_y\") + xlab(\"Year\") + ylab(\"\")\n",
+    "## geom_smooth: method=\"auto\" and size of largest group is\n",
+    "<1000, so using\n",
+    "## loess. Use 'method = x' to change the smoothing method.\n",
+    "## geom_smooth: method=\"auto\" and size of largest group is\n",
+    "<1000, so using\n",
+    "## loess. Use 'method = x' to change the smoothing method.\n",
+    "## Warning: Removed 1 rows containing missing values\n",
+    "(stat_smooth).\n",
+    "## Warning: Removed 1 rows containing missing values\n",
+    "(geom_point)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "   gasCars4 <- subset(gasCars, cylinders == \"4\")\n",
+    "\n",
+    "   ggplot(gasCars4, aes(factor(year), comb08)) + geom_boxplot()\n",
+    "   + facet_wrap(~trany2, ncol = 1) + theme(axis.text.x = element_\n",
+    "   text(angle = 45)) + labs(x = \"Year\", y = \"MPG\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ggplot(gasCars4, aes(factor(year), fill = factor(trany2))) +\n",
+    "    geom_bar(position = \"fill\") + labs(x = \"Year\", y = \"Proportion\n",
+    "    of cars\", fill = \"Transmission\") + theme(axis.text.x =\n",
+    "    element_text(angle = 45)) + geom_hline(yintercept = 0.5,\n",
+    "    linetype = 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ggplot(mpgByYr, aes(year, avgMPG)) + geom_point() + geom_smooth() +\n",
+    "xlab(\"Year\") + ylab(\"Average MPG\") + ggtitle(\"All cars\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "   carsMake <- ddply(gasCars4, ~year, summarise, numberOfMakes =\n",
+    "   length(unique(make)))\n",
+    "\n",
+    "   ggplot(carsMake, aes(year, numberOfMakes)) + geom_point() +\n",
+    "   labs(x = \"Year\", y = \"Number of available makes\") + ggtitle(\"Four\n",
+    "   cylinder cars\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "  uniqMakes <- dlply(gasCars4, ~year, function(x)\n",
+    "   unique(x$make))\n",
+    "   commonMakes <- Reduce(intersect, uniqMakes)\n",
+    "   commonMakes\n",
+    "   ##   [1] \"Ford\"  \"Honda\"   \"Toyota\"  \"Volkswagen\"\n",
+    "   \"Chevrolet\"\n",
+    "   ##   [6] \"Chrysler\" \"Nissan\"  \"Dodge\"   \"Mazda\"\n",
+    "   \"Mitsubishi\"\n",
+    "   ##  [11] \"Subaru\"  \"Jeep\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "carsCommonMakes4 <- subset(gasCars4, make %in% commonMakes)\n",
+    "   avgMPG_commonMakes <- ddply(carsCommonMakes4, ~year + make,\n",
+    "   summarise, avgMPG = mean(comb08))\n",
+    "\n",
+    "   ggplot(avgMPG_commonMakes, aes(year, avgMPG)) + geom_line() +\n",
+    "   facet_wrap(~make, nrow = 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "uniqMakes <- dlply(gasCars4, ~year, function(x) unique(x$make))\n",
+    "commonMakes <- Reduce(intersect, uniqMakes)\n",
+    "commonMakes"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "R",
+   "language": "R",
+   "name": "ir"
+  },
+  "language_info": {
+   "codemirror_mode": "r",
+   "file_extension": ".r",
+   "mimetype": "text/x-r-source",
+   "name": "R",
+   "pygments_lexer": "r",
+   "version": "3.2.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

File diff suppressed because it is too large
+ 262 - 0
car-py.ipynb


+ 380 - 0
output/car-py.md

@@ -0,0 +1,380 @@
+
+* 数据集最新版本地址获取:http://www.fueleconomy.gov/feg/epadata/vehicles.csv.zip。
+* 各个变量的信息:http://www.fueleconomy.gov/feg/ws/index.shtml#vehicle。
+
+
+```python
+import pandas as pd  
+import numpy as np
+import ggplot as gp
+# from ggplot import *  
+import matplotlib.pyplot as plt
+import os
+```
+
+    /usr/local/lib/python2.7/dist-packages/ggplot/utils.py:81: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.
+    You can access Timestamp as pandas.Timestamp
+      pd.tslib.Timestamp,
+    /usr/local/lib/python2.7/dist-packages/ggplot/stats/smoothers.py:4: FutureWarning: The pandas.lib module is deprecated and will be removed in a future version. These are private functions and can be accessed from pandas._libs.lib instead
+      from pandas.lib import Timestamp
+    /usr/local/lib/python2.7/dist-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
+      from pandas.core import datetools
+
+
+
+```python
+print(os.getcwd())
+os.chdir("/media/sf_share/linux/car")
+```
+
+    /media/sf_share/linux/car
+
+
+
+```python
+# [39270 rows x 83 columns]
+vehicles = pd.read_csv("input/vehicles.csv")  
+# print(vehicles.head)
+print(len(vehicles))
+print(len(vehicles.columns))
+print(vehicles.columns)
+```
+
+    39270
+    83
+    Index([u'barrels08', u'barrelsA08', u'charge120', u'charge240', u'city08',
+           u'city08U', u'cityA08', u'cityA08U', u'cityCD', u'cityE', u'cityUF',
+           u'co2', u'co2A', u'co2TailpipeAGpm', u'co2TailpipeGpm', u'comb08',
+           u'comb08U', u'combA08', u'combA08U', u'combE', u'combinedCD',
+           u'combinedUF', u'cylinders', u'displ', u'drive', u'engId', u'eng_dscr',
+           u'feScore', u'fuelCost08', u'fuelCostA08', u'fuelType', u'fuelType1',
+           u'ghgScore', u'ghgScoreA', u'highway08', u'highway08U', u'highwayA08',
+           u'highwayA08U', u'highwayCD', u'highwayE', u'highwayUF', u'hlv', u'hpv',
+           u'id', u'lv2', u'lv4', u'make', u'model', u'mpgData', u'phevBlended',
+           u'pv2', u'pv4', u'range', u'rangeCity', u'rangeCityA', u'rangeHwy',
+           u'rangeHwyA', u'trany', u'UCity', u'UCityA', u'UHighway', u'UHighwayA',
+           u'VClass', u'year', u'youSaveSpend', u'guzzler', u'trans_dscr',
+           u'tCharger', u'sCharger', u'atvType', u'fuelType2', u'rangeA',
+           u'evMotor', u'mfrCode', u'c240Dscr', u'charge240b', u'c240bDscr',
+           u'createdOn', u'modifiedOn', u'startStop', u'phevCity', u'phevHwy',
+           u'phevComb'],
+          dtype='object')
+
+
+    /usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (70,71,72,73,74,76,79) have mixed types. Specify dtype option on import or set low_memory=False.
+      interactivity=interactivity, compiler=compiler, result=result)
+
+
+
+```python
+# 查看年份信息,数据总共覆盖35年,从1984年到2018年。
+
+print(len(pd.unique(vehicles.year)))
+print(min(vehicles.year))
+print(max(vehicles.year))
+```
+
+    35
+    1984
+    2018
+
+
+
+```python
+# 查看燃料类型
+pd.value_counts(vehicles.fuelType)
+```
+
+
+
+
+    Regular                        25698
+    Premium                        10671
+    Gasoline or E85                 1269
+    Diesel                          1103
+    Electricity                      145
+    Premium or E85                   125
+    Midgrade                          90
+    CNG                               60
+    Premium and Electricity           37
+    Regular Gas and Electricity       22
+    Gasoline or natural gas           20
+    Premium Gas or Electricity        19
+    Gasoline or propane                8
+    Regular Gas or Electricity         3
+    Name: fuelType, dtype: int64
+
+
+
+### trany变量分析
+
+
+```python
+# 查看变速箱类型,首字母A和M开头
+pd.value_counts(vehicles.trany)
+```
+
+
+
+
+    Automatic 4-spd                     11045
+    Manual 5-spd                         8339
+    Automatic 3-spd                      3151
+    Automatic (S6)                       2879
+    Manual 6-spd                         2584
+    Automatic 5-spd                      2198
+    Automatic 6-spd                      1526
+    Manual 4-spd                         1483
+    Automatic (S8)                       1211
+    Automatic (S5)                        830
+    Automatic (variable gear ratios)      744
+    Automatic 7-spd                       695
+    Automatic (AM-S7)                     334
+    Automatic 8-spd                       326
+    Automatic (S7)                        294
+    Automatic (S4)                        233
+    Automatic (AM7)                       196
+    Automatic (AV-S6)                     174
+    Automatic 9-spd                       170
+    Automatic (A1)                        137
+    Automatic (AM6)                       128
+    Automatic (AM-S6)                     111
+    Automatic (AV-S7)                     101
+    Manual 7-spd                           93
+    Manual 3-spd                           77
+    Automatic (S9)                         48
+    Automatic (S10)                        42
+    Automatic (AV-S8)                      32
+    Automatic (AM-S8)                      28
+    Manual 4-spd Doubled                   17
+    Automatic (AM5)                        14
+    Automatic 10-spd                        8
+    Automatic (AM8)                         5
+    Automatic (L4)                          2
+    Automatic (L3)                          2
+    Automatic (AV-S10)                      1
+    Automatic (AM-S9)                       1
+    Name: trany, dtype: int64
+
+
+
+
+```python
+# trany变量自动挡是以A开头,手动挡是以M开头;故创建一个新变量trany2:
+# 取首字母
+vehicles['trany2'] = vehicles.trany.str[0]
+pd.value_counts(vehicles.trany2)
+# 可以看到,自动挡26666,手动挡12593
+```
+
+
+
+
+    A    26666
+    M    12593
+    Name: trany2, dtype: int64
+
+
+
+
+```python
+# 先按照年份分组
+grouped = vehicles.groupby('year')
+
+# 再计算其中三列的均值
+averaged= grouped['comb08', 'highway08', 'city08'].agg([np.mean])
+
+# 为方便分析,对其进行重命名,然后创建一个‘year’的列,包含该数据框data frame的索引
+averaged.columns = ['comb08_mean', 'highwayo8_mean', 'city08_mean']
+averaged['year'] = averaged.index
+
+# 使用ggplot包将结果绘成散点图
+gp1=gp.ggplot(averaged, gp.aes('year', 'comb08_mean')) + gp.geom_point(colour='steelblue') + gp.xlab("Year") + gp.ylab("Average MPG") + gp.ggtitle("All cars")
+print(gp1)
+```
+
+
+![png](output_9_0.png)
+
+
+    <ggplot: (8771518469409)>
+
+
+
+```python
+# 去除混合动力汽车
+criteria1 = vehicles.fuelType1.isin(['Regular Gasoline', 'Premium Gasoline', 'Midgrade Gasoline'])
+criteria2 = vehicles.fuelType2.isnull()
+criteria3 = vehicles.atvType != 'Hybrid'
+vehicles_non_hybrid = vehicles[criteria1 & criteria2 & criteria3]
+```
+
+
+```python
+# 将得到的数据框data frame按年份分组,并计算平均油耗
+grouped = vehicles_non_hybrid.groupby(['year'])
+averaged = grouped['comb08'].agg([np.mean])
+averaged['hahhahah']  = averaged.index
+```
+
+
+```python
+#  查看是否大引擎的汽车越来越少
+pd.unique(vehicles_non_hybrid.displ)
+```
+
+
+
+
+    array([ 2. ,  4.9,  2.2,  5.2,  1.8,  1.6,  2.3,  2.8,  4. ,  5. ,  3.3,
+            3.1,  3.8,  4.6,  3.4,  3. ,  5.9,  2.5,  4.5,  6.8,  2.4,  2.9,
+            5.7,  4.3,  3.5,  5.8,  3.2,  4.2,  1.9,  2.6,  7.4,  3.9,  1.5,
+            1.3,  4.1,  8. ,  6. ,  3.6,  5.4,  5.6,  1. ,  2.1,  1.2,  6.5,
+            2.7,  4.7,  5.5,  1.1,  5.3,  4.4,  3.7,  6.7,  4.8,  1.7,  6.2,
+            8.3,  1.4,  6.1,  7. ,  8.4,  6.3,  nan,  6.6,  6.4,  0.9])
+
+
+
+
+```python
+# 去掉nan值,并用astype方法保证各个值都是float型的
+criteria = vehicles_non_hybrid.displ.notnull()
+vehicles_non_hybrid = vehicles_non_hybrid[criteria]
+vehicles_non_hybrid.loc[:,'displ'] = vehicles_non_hybrid.displ.astype('float')
+criteria = vehicles_non_hybrid.comb08.notnull()
+vehicles_non_hybrid = vehicles_non_hybrid[criteria]
+vehicles_non_hybrid.loc[:,'comb08'] = vehicles_non_hybrid.comb08.astype('float')
+```
+
+    /usr/local/lib/python2.7/dist-packages/pandas/core/indexing.py:517: SettingWithCopyWarning: 
+    A value is trying to be set on a copy of a slice from a DataFrame.
+    Try using .loc[row_indexer,col_indexer] = value instead
+    
+    See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
+      self.obj[item] = s
+
+
+
+```python
+# 最后用ggplot包来绘图
+gp2=gp.ggplot(vehicles_non_hybrid, gp.aes('displ', 'comb08')) + gp.geom_point(color='steelblue') + \
+gp.xlab('Engine Displacement') + gp.ylab('Average MPG') + gp.ggtitle('Gasoline cars')
+print(gp2)
+```
+
+
+![png](output_14_0.png)
+
+
+    <ggplot: (8771518485969)>
+
+
+
+```python
+# 查看是否平均起来汽车越来越少了
+grouped_by_year = vehicles_non_hybrid.groupby(['year'])
+avg_grouped_by_year = grouped_by_year['displ', 'comb08'].agg([np.mean])
+# -  计算displ和conm08的均值,并改造数据框data frame
+avg_grouped_by_year['year'] = avg_grouped_by_year.index
+melted_avg_grouped_by_year = pd.melt(avg_grouped_by_year, id_vars='year')
+# -  创建分屏绘图
+p = gp.ggplot(gp.aes(x='year', y='value', color = 'variable_0'), data=melted_avg_grouped_by_year)
+p + gp.geom_point() + gp.facet_grid("variable_0",scales="free") #scales参数fixed表示固定坐标轴刻度,free表示反馈坐标轴刻度
+```
+
+
+![png](output_15_0.png)
+
+
+
+
+
+    <ggplot: (8771517963333)>
+
+
+
+
+```python
+pd.unique(vehicles_non_hybrid.cylinders)
+
+```
+
+
+
+
+    array([  4.,  12.,   8.,   6.,   5.,  10.,   2.,   3.,  16.,  nan])
+
+
+
+
+```python
+vehicles_non_hybrid.cylinders = vehicles_non_hybrid.cylinders.astype('float')
+pd.unique(vehicles_non_hybrid.cylinders)
+```
+
+
+
+
+    array([  4.,  12.,   8.,   6.,   5.,  10.,   2.,   3.,  16.,  nan])
+
+
+
+
+```python
+vehicles_non_hybrid_4 = vehicles_non_hybrid[(vehicles_non_hybrid.cylinders==4.0)]
+```
+
+
+```python
+grouped_by_year_4_cylinder = vehicles_non_hybrid_4.groupby(['year']).make.nunique()
+fig = grouped_by_year_4_cylinder.plot()
+fig.set_xlabel('Year')
+fig.set_ylabel('Number of 4-Cylinder Maker')
+```
+
+
+
+
+    <matplotlib.text.Text at 0x7fa4733c98d0>
+
+
+
+
+```python
+grouped_by_year_4_cylinder = vehicles_non_hybrid_4.groupby(['year'])
+unique_makes = []
+for name, group in grouped_by_year_4_cylinder:
+    unique_makes.append(set(pd.unique(group['make'])))
+
+unique_makes = reduce(set.intersection, unique_makes)
+print(unique_makes)
+```
+
+    set(['Dodge', 'Jeep', 'Chevrolet', 'Nissan', 'Honda', 'Toyota', 'Volkswagen', 'Mazda', 'Subaru', 'Ford'])
+
+
+
+```python
+# 最终选取在unique_makes集合中存在的品牌
+boolean_mask = []
+for index, row in vehicles_non_hybrid_4.iterrows():
+    make = row['make']
+    boolean_mask.append(make in unique_makes)
+df_common_makes = vehicles_non_hybrid_4[boolean_mask]
+# 先将数据框data frame按year和make分组,然后计算各组的均值
+df_common_makes_grouped = df_common_makes.groupby(['year', 'make']).agg(np.mean).reset_index()
+# 最后利用ggplot提供的分屏图来显示结果
+gp.ggplot(gp.aes(x='year', y='comb08'), data = df_common_makes_grouped) \
++ gp.geom_line() + gp.facet_wrap('make')
+```
+
+
+![png](output_21_0.png)
+
+
+
+
+
+    <ggplot: (8771516938941)>
+
+

BIN
output/output_14_0.png


BIN
output/output_15_0.png


BIN
output/output_21_0.png


BIN
output/output_9_0.png


Some files were not shown because too many files changed in this diff