HW2

Explaining variables, data wrangling

Nasko
2022-06-03

Explain the variables

The dataset contains information about Battery Electric Vehicles (BEVs) and Plug-in Hybrid Electric Vehicles (PHEVs). I plan to focus on visualizations and analyze relationships between EV features and target price.

# Convert data to tibble and list columns,summary
ev <- read_csv("electric_vehicles/train.csv") %>% as_tibble()
head(ev)
# A tibble: 6 × 18
  ID     `VIN (1-10)` County City  State `ZIP Code` `Model Year` Make 
  <chr>  <chr>        <chr>  <chr> <chr>      <dbl>        <dbl> <chr>
1 EV331… 5YJ3E1EC6L   Snoho… LYNN… WA         98037         2020 TESLA
2 EV402… JN1AZ0CP8B   Skagit BELL… WA         98229         2011 NISS…
3 EV122… WBY1Z2C56F   Pierce TACO… WA         98422         2015 BMW  
4 EV557… 1G1RD6E44D   King   REDM… WA         98053         2013 CHEV…
5 EV287… 1G1FY6S05K   Pierce PUYA… WA         98375         2019 CHEV…
6 EV498… KMHE24L10G   Clark  VANC… WA         98683         2016 HYUN…
# … with 10 more variables: Model <chr>,
#   `Electric Vehicle Type` <chr>,
#   `Clean Alternative Fuel Vehicle (CAFV) Eligibility` <chr>,
#   `Electric Range` <dbl>, `Base MSRP` <dbl>,
#   `Legislative District` <dbl>, `DOL Vehicle ID` <dbl>,
#   `Vehicle Location` <chr>, `Electric Utility` <chr>,
#   `Expected Price ($1k)` <dbl>
colnames(ev)
 [1] "ID"                                               
 [2] "VIN (1-10)"                                       
 [3] "County"                                           
 [4] "City"                                             
 [5] "State"                                            
 [6] "ZIP Code"                                         
 [7] "Model Year"                                       
 [8] "Make"                                             
 [9] "Model"                                            
[10] "Electric Vehicle Type"                            
[11] "Clean Alternative Fuel Vehicle (CAFV) Eligibility"
[12] "Electric Range"                                   
[13] "Base MSRP"                                        
[14] "Legislative District"                             
[15] "DOL Vehicle ID"                                   
[16] "Vehicle Location"                                 
[17] "Electric Utility"                                 
[18] "Expected Price ($1k)"                             
sapply(ev, class)
                                               ID 
                                      "character" 
                                       VIN (1-10) 
                                      "character" 
                                           County 
                                      "character" 
                                             City 
                                      "character" 
                                            State 
                                      "character" 
                                         ZIP Code 
                                        "numeric" 
                                       Model Year 
                                        "numeric" 
                                             Make 
                                      "character" 
                                            Model 
                                      "character" 
                            Electric Vehicle Type 
                                      "character" 
Clean Alternative Fuel Vehicle (CAFV) Eligibility 
                                      "character" 
                                   Electric Range 
                                        "numeric" 
                                        Base MSRP 
                                        "numeric" 
                             Legislative District 
                                        "numeric" 
                                   DOL Vehicle ID 
                                        "numeric" 
                                 Vehicle Location 
                                      "character" 
                                 Electric Utility 
                                      "character" 
                             Expected Price ($1k) 
                                        "numeric" 
sum.ev <- summary(ev)
sum.ev
      ID             VIN (1-10)           County         
 Length:64353       Length:64353       Length:64353      
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  
                                                         
                                                         
                                                         
                                                         
     City              State              ZIP Code       Model Year  
 Length:64353       Length:64353       Min.   :  745   Min.   :1993  
 Class :character   Class :character   1st Qu.:98052   1st Qu.:2017  
 Mode  :character   Mode  :character   Median :98121   Median :2018  
                                       Mean   :98143   Mean   :2018  
                                       3rd Qu.:98370   3rd Qu.:2021  
                                       Max.   :99701   Max.   :2022  
                                       NA's   :6       NA's   :7     
     Make              Model           Electric Vehicle Type
 Length:64353       Length:64353       Length:64353         
 Class :character   Class :character   Class :character     
 Mode  :character   Mode  :character   Mode  :character     
                                                            
                                                            
                                                            
                                                            
 Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range 
 Length:64353                                      Min.   :  0.0  
 Class :character                                  1st Qu.: 14.0  
 Mode  :character                                  Median : 73.0  
                                                   Mean   :106.9  
                                                   3rd Qu.:215.0  
                                                   Max.   :337.0  
                                                                  
   Base MSRP      Legislative District DOL Vehicle ID     
 Min.   :     0   Min.   : 0.00        Min.   :     4385  
 1st Qu.:     0   1st Qu.:19.00        1st Qu.:137286488  
 Median :     0   Median :34.00        Median :175377597  
 Mean   :  2525   Mean   :29.95        Mean   :197290491  
 3rd Qu.:     0   3rd Qu.:43.00        3rd Qu.:229903894  
 Max.   :845000   Max.   :49.00        Max.   :478934571  
                  NA's   :169                             
 Vehicle Location   Electric Utility   Expected Price ($1k)
 Length:64353       Length:64353       Min.   :   0.00     
 Class :character   Class :character   1st Qu.:  22.86     
 Mode  :character   Mode  :character   Median :  40.00     
                                       Mean   :  45.45     
                                       3rd Qu.:  65.90     
                                       Max.   :1100.00     
                                       NA's   :13          

Data exploration

arrange(filter(ev, `Electric Range` > 25), desc(`Base MSRP`))
# A tibble: 41,280 × 18
   ID    `VIN (1-10)` County City  State `ZIP Code` `Model Year` Make 
   <chr> <chr>        <chr>  <chr> <chr>      <dbl>        <dbl> <chr>
 1 EV41… 5YJRE1A18A   King   NORT… WA         98045         2010 TESLA
 2 EV48… 5YJRE1A16A   King   SEAT… WA         98101         2010 TESLA
 3 EV42… 5YJRE1A32A   Island FREE… WA         98249         2010 TESLA
 4 EV56… 5YJRE1A32A   King   BELL… WA         98006         2010 TESLA
 5 EV33… 5YJRE1A33A   King   SEAT… WA         98126         2010 TESLA
 6 EV71… 5YJRE1A32A   King   KIRK… WA         98034         2010 TESLA
 7 EV26… 5YJRE1A11A   King   SEAT… WA         98144         2010 TESLA
 8 EV87… 5YJRE1A14A   King   SEAT… WA         98112         2010 TESLA
 9 EV71… 5YJRE1A30A   Snoho… EDMO… WA         98026         2010 TESLA
10 EV84… 5YJRE1A11A   Snoho… MUKI… WA         98275         2010 TESLA
# … with 41,270 more rows, and 10 more variables: Model <chr>,
#   `Electric Vehicle Type` <chr>,
#   `Clean Alternative Fuel Vehicle (CAFV) Eligibility` <chr>,
#   `Electric Range` <dbl>, `Base MSRP` <dbl>,
#   `Legislative District` <dbl>, `DOL Vehicle ID` <dbl>,
#   `Vehicle Location` <chr>, `Electric Utility` <chr>,
#   `Expected Price ($1k)` <dbl>
slice(ev, 20:40)
# A tibble: 21 × 18
   ID    `VIN (1-10)` County City  State `ZIP Code` `Model Year` Make 
   <chr> <chr>        <chr>  <chr> <chr>      <dbl>        <dbl> <chr>
 1 EV27… WBY1Z4C57E   Snoho… MUKI… WA         98275         2014 BMW  
 2 EV76… KNDCC3LG0L   Pierce UNIV… WA         98467         2020 KIA  
 3 EV31… 5YJ3E1EB7M   Pierce UNIV… WA         98466         2021 TESLA
 4 EV69… 5YJ3E1EA7J   King   COVI… WA         98042         2018 TESLA
 5 EV46… 5YJSA1E24G   King   CLYD… WA         98004         2016 TESLA
 6 EV83… KNDJP3AE4J   Clark  VANC… WA         98683         2018 KIA  
 7 EV75… 5YJYGDEF6L   King   REDM… WA         98053         2020 TESLA
 8 EV70… 5YJSA1E25G   Snoho… STAN… WA         98292         2016 TESLA
 9 EV67… KNDCC3LC4H   King   SAMM… WA         98074         2017 KIA  
10 EV58… 5YJYGDEE7M   King   REDM… WA         98052         2021 TESLA
# … with 11 more rows, and 10 more variables: Model <chr>,
#   `Electric Vehicle Type` <chr>,
#   `Clean Alternative Fuel Vehicle (CAFV) Eligibility` <chr>,
#   `Electric Range` <dbl>, `Base MSRP` <dbl>,
#   `Legislative District` <dbl>, `DOL Vehicle ID` <dbl>,
#   `Vehicle Location` <chr>, `Electric Utility` <chr>,
#   `Expected Price ($1k)` <dbl>
teslas <- filter(ev, `Make` == "TESLA" & `Model Year` > 2016)

ggplot(ev, aes(x=`Expected Price ($1k)`)) + 
geom_histogram(aes(y=..density..), colour="black", fill="white") +
xlim(0,150)
ggplot(ev, aes(x=`Expected Price ($1k)`)) + 
geom_histogram(colour="blue", fill="white") +
xlim(0,150)
ggplot(ev, aes(x=`Electric Range`))+ 
geom_histogram(colour="green", fill="white")