#setwd("d:/pdata/pdsc/Chapter02")
vehicles<-read.csv("http://20727931.d.yyupload.com/down/20727931/opendata/vehicles.csv",stringsAsFactors = F)
head(vehicles)
##   barrels08 barrelsA08 charge120 charge240 city08 city08U cityA08 cityA08U
## 1  15.68944          0         0         0     19       0       0        0
## 2  29.95056          0         0         0      9       0       0        0
## 3  12.19557          0         0         0     23       0       0        0
## 4  29.95056          0         0         0     10       0       0        0
## 5  17.33749          0         0         0     17       0       0        0
## 6  14.96429          0         0         0     21       0       0        0
##   cityCD cityE cityUF co2 co2A co2TailpipeAGpm co2TailpipeGpm comb08 comb08U
## 1      0     0      0  -1   -1               0       423.1905     21       0
## 2      0     0      0  -1   -1               0       807.9091     11       0
## 3      0     0      0  -1   -1               0       329.1481     27       0
## 4      0     0      0  -1   -1               0       807.9091     11       0
## 5      0     0      0  -1   -1               0       467.7368     19       0
## 6      0     0      0  -1   -1               0       403.9545     22       0
##   combA08 combA08U combE combinedCD combinedUF cylinders displ
## 1       0        0     0          0          0         4   2.0
## 2       0        0     0          0          0        12   4.9
## 3       0        0     0          0          0         4   2.2
## 4       0        0     0          0          0         8   5.2
## 5       0        0     0          0          0         4   2.2
## 6       0        0     0          0          0         4   1.8
##                        drive engId   eng_dscr feScore fuelCost08 fuelCostA08
## 1           Rear-Wheel Drive  9011      (FFS)      -1       2350           0
## 2           Rear-Wheel Drive 22020  (GUZZLER)      -1       4450           0
## 3          Front-Wheel Drive  2100      (FFS)      -1       1800           0
## 4           Rear-Wheel Drive  2850                 -1       4450           0
## 5 4-Wheel or All-Wheel Drive 66031 (FFS,TRBO)      -1       2850           0
## 6          Front-Wheel Drive 66020      (FFS)      -1       2250           0
##   fuelType        fuelType1 ghgScore ghgScoreA highway08 highway08U highwayA08
## 1  Regular Regular Gasoline       -1        -1        25          0          0
## 2  Regular Regular Gasoline       -1        -1        14          0          0
## 3  Regular Regular Gasoline       -1        -1        33          0          0
## 4  Regular Regular Gasoline       -1        -1        12          0          0
## 5  Premium Premium Gasoline       -1        -1        23          0          0
## 6  Regular Regular Gasoline       -1        -1        24          0          0
##   highwayA08U highwayCD highwayE highwayUF hlv hpv    id lv2 lv4       make
## 1           0         0        0         0   0   0     1   0   0 Alfa Romeo
## 2           0         0        0         0   0   0    10   0   0    Ferrari
## 3           0         0        0         0  19  77   100   0   0      Dodge
## 4           0         0        0         0   0   0  1000   0   0      Dodge
## 5           0         0        0         0   0   0 10000   0  14     Subaru
## 6           0         0        0         0   0   0 10001   0  15     Subaru
##                 model mpgData phevBlended pv2 pv4 range rangeCity rangeCityA
## 1  Spider Veloce 2000       Y       false   0   0     0         0          0
## 2          Testarossa       N       false   0   0     0         0          0
## 3             Charger       Y       false   0   0     0         0          0
## 4 B150/B250 Wagon 2WD       N       false   0   0     0         0          0
## 5    Legacy AWD Turbo       N       false   0  90     0         0          0
## 6              Loyale       N       false   0  88     0         0          0
##   rangeHwy rangeHwyA           trany   UCity UCityA UHighway UHighwayA
## 1        0         0    Manual 5-spd 23.3333      0  35.0000         0
## 2        0         0    Manual 5-spd 11.0000      0  19.0000         0
## 3        0         0    Manual 5-spd 29.0000      0  47.0000         0
## 4        0         0 Automatic 3-spd 12.2222      0  16.6667         0
## 5        0         0    Manual 5-spd 21.0000      0  32.0000         0
## 6        0         0 Automatic 3-spd 27.0000      0  33.0000         0
##            VClass year youSaveSpend guzzler trans_dscr tCharger sCharger
## 1     Two Seaters 1985        -1000                          NA         
## 2     Two Seaters 1985       -11500       T                  NA         
## 3 Subcompact Cars 1985         1750                SIL       NA         
## 4            Vans 1985       -11500                          NA         
## 5    Compact Cars 1993        -3500                        TRUE         
## 6    Compact Cars 1993         -500                          NA         
##   atvType fuelType2 rangeA evMotor mfrCode
## 1                                         
## 2                                         
## 3                                         
## 4                                         
## 5                                         
## 6
labels<-do.call(rbind,strsplit(readLines("http://20727931.d.yyupload.com/down/20727931/opendata/varlabels.txt"),"-"))
## Warning in (function (..., deparse.level = 1) : number of columns of result is
## not a multiple of vector length (arg 1)
head(labels)
##      [,1]          [,2]                                                        
## [1,] "atvtype "    " type of alternative fuel or advanced technology vehicle"  
## [2,] "barrels08 "  " annual petroleum consumption in barrels for fuelType1 (1)"
## [3,] "barrelsA08 " " annual petroleum consumption in barrels for fuelType2 (1)"
## [4,] "charge120 "  " time to charge an electric vehicle in hours at 120 V"     
## [5,] "charge240 "  " time to charge an electric vehicle in hours at 240 V"     
## [6,] "city08 "     " city MPG for fuelType1 (2)"                               
##      [,3]         
## [1,] "atvtype "   
## [2,] "barrels08 " 
## [3,] "barrelsA08 "
## [4,] "charge120 " 
## [5,] "charge240 " 
## [6,] "city08 "
nrow(vehicles)
## [1] 34287
ncol(vehicles)
## [1] 74
names(vehicles)
##  [1] "barrels08"       "barrelsA08"      "charge120"       "charge240"      
##  [5] "city08"          "city08U"         "cityA08"         "cityA08U"       
##  [9] "cityCD"          "cityE"           "cityUF"          "co2"            
## [13] "co2A"            "co2TailpipeAGpm" "co2TailpipeGpm"  "comb08"         
## [17] "comb08U"         "combA08"         "combA08U"        "combE"          
## [21] "combinedCD"      "combinedUF"      "cylinders"       "displ"          
## [25] "drive"           "engId"           "eng_dscr"        "feScore"        
## [29] "fuelCost08"      "fuelCostA08"     "fuelType"        "fuelType1"      
## [33] "ghgScore"        "ghgScoreA"       "highway08"       "highway08U"     
## [37] "highwayA08"      "highwayA08U"     "highwayCD"       "highwayE"       
## [41] "highwayUF"       "hlv"             "hpv"             "id"             
## [45] "lv2"             "lv4"             "make"            "model"          
## [49] "mpgData"         "phevBlended"     "pv2"             "pv4"            
## [53] "range"           "rangeCity"       "rangeCityA"      "rangeHwy"       
## [57] "rangeHwyA"       "trany"           "UCity"           "UCityA"         
## [61] "UHighway"        "UHighwayA"       "VClass"          "year"           
## [65] "youSaveSpend"    "guzzler"         "trans_dscr"      "tCharger"       
## [69] "sCharger"        "atvType"         "fuelType2"       "rangeA"         
## [73] "evMotor"         "mfrCode"
length(unique(vehicles[,'year']))
## [1] 31
first_year<-min(vehicles[,'year'])
last_year<-max(vehicles[,'year'])
length(unique(vehicles$year))
## [1] 31
table(vehicles$fuelType1)
## 
##            Diesel       Electricity Midgrade Gasoline       Natural Gas 
##              1025                56                41                57 
##  Premium Gasoline  Regular Gasoline 
##              8521             24587
table(vehicles$trany)
## 
##                                                      Auto (AV-S6) 
##                               11                                1 
##                     Auto (AV-S8)                        Auto (AV) 
##                                1                                2 
##                         Auto(A1)                         Auto(A8) 
##                                1                                1 
##                      Auto(AM-S6)                      Auto(AM-S7) 
##                               38                               84 
##                        Auto(AM5)                        Auto(AM6) 
##                               10                               68 
##                        Auto(AM7)                      Auto(AV-S6) 
##                               58                               80 
##                      Auto(AV-S7)                      Auto(AV-S8) 
##                               19                               11 
##                         Auto(L3)                         Auto(L4) 
##                                2                                2 
##                   Automatic (A1)                   Automatic (A6) 
##                               41                                4 
##                Automatic (AM-S6)                Automatic (AM-S7) 
##                                1                                2 
##                  Automatic (AM5)                  Automatic (AM6) 
##                                2                                1 
##                Automatic (AV-S6)                   Automatic (AV) 
##                                9                                4 
##                   Automatic (S4)                   Automatic (S5) 
##                              231                              813 
##                   Automatic (S6)                   Automatic (S7) 
##                             1877                              182 
##                   Automatic (S8) Automatic (variable gear ratios) 
##                              366                              534 
##                  Automatic 3-spd                  Automatic 4-spd 
##                             3151                            11029 
##                  Automatic 5-spd                  Automatic 6-spd 
##                             2149                             1093 
##                   Automatic 6spd                  Automatic 7-spd 
##                                1                              493 
##                  Automatic 8-spd                   Automatic 8spd 
##                               78                                4 
##                  Automatic 9-spd                     Manual 3-spd 
##                                8                               77 
##                     Manual 4-spd             Manual 4-spd Doubled 
##                             1483                               17 
##                     Manual 5-spd                     Manual 5 spd 
##                             8213                                1 
##                     Manual 6-spd                     Manual 7-spd 
##                             2009                               24 
##                       Manual(M7) 
##                                1
vehicles$trany[vehicles$trany==""]<-NA
vehicles$trany2<-ifelse(substr(vehicles$trany,1,4)=="Auto","Auto","Manual")
vehicles$trany2<-as.factor(vehicles$trany2)
table(vehicles$trany2)
## 
##   Auto Manual 
##  22451  11825
with(vehicles,table(sCharger,year))
##         year
## sCharger 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997
##          1964 1701 1210 1247 1130 1149 1074 1130 1116 1088  979  962  767  757
##        S    0    0    0    0    0    4    4    2    5    5    3    5    6    5
##         year
## sCharger 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
##           800  840  826  891  949 1015 1089 1136 1067 1098 1152 1166 1091 1077
##        S   12   12   14   20   26   29   33   30   37   28   35   19   18   20
##         year
## sCharger 2012 2013 2014
##          1125 1141 1051
##        S   28   42   57
table(vehicles$year,vehicles$sCharger)
##       
##                S
##   1984 1964    0
##   1985 1701    0
##   1986 1210    0
##   1987 1247    0
##   1988 1130    0
##   1989 1149    4
##   1990 1074    4
##   1991 1130    2
##   1992 1116    5
##   1993 1088    5
##   1994  979    3
##   1995  962    5
##   1996  767    6
##   1997  757    5
##   1998  800   12
##   1999  840   12
##   2000  826   14
##   2001  891   20
##   2002  949   26
##   2003 1015   29
##   2004 1089   33
##   2005 1136   30
##   2006 1067   37
##   2007 1098   28
##   2008 1152   35
##   2009 1166   19
##   2010 1091   18
##   2011 1077   20
##   2012 1125   28
##   2013 1141   42
##   2014 1051   57
class(vehicles$sCharger)
## [1] "character"
unique(vehicles$sCharger)
## [1] ""  "S"
class(vehicles$tCharger)
## [1] "logical"
unique(vehicles$tCharger)
## [1]   NA TRUE
mpgByYr<-ddply(vehicles,~year,summarise,avgMPG=mean(comb08),avgHghy=mean(highway08),avgCity=mean(city08))
head(mpgByYr)
##   year   avgMPG  avgHghy  avgCity
## 1 1984 19.88187 23.07536 17.98269
## 2 1985 19.80835 23.04233 17.87831
## 3 1986 19.55041 22.69917 17.66529
## 4 1987 19.22855 22.44507 17.31034
## 5 1988 19.32832 22.70265 17.33363
## 6 1989 19.12576 22.46574 17.14397
ggplot(mpgByYr,aes(year,avgMPG))+geom_point()+
  geom_smooth()+xlab("Year")+ylab("Average MPG")+
  ggtitle("all cars")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

table(vehicles$fuelType1)
## 
##            Diesel       Electricity Midgrade Gasoline       Natural Gas 
##              1025                56                41                57 
##  Premium Gasoline  Regular Gasoline 
##              8521             24587
gasCars<-subset(vehicles,fuelType1 %in% c("Regular Gasoline","Premium Gasoline","Midgrade Gasoline") & fuelType2 == "" & atvType != "Hybrid")
mpgByYr_Gas<-ddply(gasCars,~year,summarise,avgMPG=mean(comb08))
ggplot(mpgByYr_Gas,aes(year,avgMPG))+geom_point()+
  geom_smooth()+xlab("Year")+ylab("Average MPG")+
  ggtitle("Gasoline cars")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

typeof(gasCars$displ)
## [1] "character"
gasCars$displ<-as.numeric(gasCars$displ)
ggplot(gasCars, aes(displ,comb08))+geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

avgCarSize<-ddply(gasCars,~year,summarise,avgDispl=mean(displ))

ggplot(avgCarSize,aes(year,avgDispl))+geom_point()+
  geom_smooth()+xlab("year")+ylab("Average engine displacement(1)")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

byYear<-ddply(gasCars,~year,summarise,avgMPG = mean(comb08),avgDispl=mean(displ))
head(byYear)
##   year   avgMPG avgDispl
## 1 1984 19.12162 3.068449
## 2 1985 19.39469       NA
## 3 1986 19.32046 3.126514
## 4 1987 19.16457 3.096474
## 5 1988 19.36761 3.113558
## 6 1989 19.14196 3.133393
byYear2=melt(byYear,id="year")
levels(byYear2$variable)<-c("Average MPG","Avg engine displacement")

ggplot(byYear2,aes(year,value))+geom_point()+
  geom_smooth()+facet_wrap(~variable,ncol=1,scales="free_y")+xlab("Year")+ylab("")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

gasCars4<-subset(gasCars,cylinders=="4")

ggplot(gasCars4,aes(factor(year),comb08))+geom_boxplot()+
  facet_wrap(~trany2,ncol=1)+
  theme(axis.text.x=element_text(angle=45))+labs(x="Year",y="MPG")

ggplot(gasCars4,aes(factor(year),fill=factor(trany2)))+
  geom_bar(position ="fill")+labs(x="Year",y="Proportion of cars",fill="Transmission")+
  theme(axis.text.x=element_text(angle=45))+geom_hline(yintercept=0.5,linetype=2)

carsMake<-ddply(gasCars4,~year,summarise,numberOfMakes = length(unique(make)))   

ggplot(carsMake,aes(year,numberOfMakes))+geom_point()+
  labs(x="Year",y="Number of available makes")+ggtitle("Four cylinder cars")

uniqMakes<-dlply(gasCars4,~year,function(x)
  unique(x$make))
commonMakes<-Reduce(intersect,uniqMakes)
carsCommonMakes4<-subset(gasCars4,make %in% commonMakes)
avgMPG_commonMakes<-ddply(carsCommonMakes4,~year+make,summarise,avgMPG=mean(comb08))
ggplot(avgMPG_commonMakes,aes(year,avgMPG))+geom_line()+
  facet_wrap(~make,nrow=3)