Electric_Vehicle_Population

Author

Zijin Wang

This dataset shows the Battery Electric Vehicles (BEVs) and Plug-in Hybrid Electric Vehicles (PHEVs) that are currently registered through Washington State Department of Licensing (DOL)

Source: Washington State Department of Licensing (DOL).

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
electric_vehicle_data <- read_csv("/Users/zwang30/Downloads/Electric_Vehicle_Population_Data.csv")
Rows: 159467 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (11): VIN (1-10), County, City, State, Make, Model, Electric Vehicle Typ...
dbl  (6): Postal Code, Model Year, Electric Range, Base MSRP, Legislative Di...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(electric_vehicle_data)
spc_tbl_ [159,467 × 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ VIN (1-10)                                       : chr [1:159467] "2C4RC1N71H" "2C4RC1N7XL" "KNDC3DLCXN" "5YJ3E1EA0J" ...
 $ County                                           : chr [1:159467] "Kitsap" "Stevens" "Yakima" "Kitsap" ...
 $ City                                             : chr [1:159467] "Bremerton" "Colville" "Yakima" "Bainbridge Island" ...
 $ State                                            : chr [1:159467] "WA" "WA" "WA" "WA" ...
 $ Postal Code                                      : num [1:159467] 98311 99114 98908 98110 98501 ...
 $ Model Year                                       : num [1:159467] 2017 2020 2022 2018 2018 ...
 $ Make                                             : chr [1:159467] "CHRYSLER" "CHRYSLER" "KIA" "TESLA" ...
 $ Model                                            : chr [1:159467] "PACIFICA" "PACIFICA" "EV6" "MODEL 3" ...
 $ Electric Vehicle Type                            : chr [1:159467] "Plug-in Hybrid Electric Vehicle (PHEV)" "Plug-in Hybrid Electric Vehicle (PHEV)" "Battery Electric Vehicle (BEV)" "Battery Electric Vehicle (BEV)" ...
 $ Clean Alternative Fuel Vehicle (CAFV) Eligibility: chr [1:159467] "Clean Alternative Fuel Vehicle Eligible" "Clean Alternative Fuel Vehicle Eligible" "Eligibility unknown as battery range has not been researched" "Clean Alternative Fuel Vehicle Eligible" ...
 $ Electric Range                                   : num [1:159467] 33 32 0 215 151 239 12 0 6 0 ...
 $ Base MSRP                                        : num [1:159467] 0 0 0 0 0 0 36900 0 0 0 ...
 $ Legislative District                             : num [1:159467] 23 7 14 23 35 26 14 15 26 23 ...
 $ DOL Vehicle ID                                   : num [1:159467] 3.49e+08 1.55e+08 2.20e+08 4.77e+08 2.01e+08 ...
 $ Vehicle Location                                 : chr [1:159467] "POINT (-122.6466274 47.6341188)" "POINT (-117.90431 48.547075)" "POINT (-120.6027202 46.5965625)" "POINT (-122.5235781 47.6293323)" ...
 $ Electric Utility                                 : chr [1:159467] "PUGET SOUND ENERGY INC" "AVISTA CORP" "PACIFICORP" "PUGET SOUND ENERGY INC" ...
 $ 2020 Census Tract                                : chr [1:159467] "53035091800" "53065950500" "53077000904" "53035091001" ...
 - attr(*, "spec")=
  .. cols(
  ..   `VIN (1-10)` = col_character(),
  ..   County = col_character(),
  ..   City = col_character(),
  ..   State = col_character(),
  ..   `Postal Code` = col_double(),
  ..   `Model Year` = col_double(),
  ..   Make = col_character(),
  ..   Model = col_character(),
  ..   `Electric Vehicle Type` = col_character(),
  ..   `Clean Alternative Fuel Vehicle (CAFV) Eligibility` = col_character(),
  ..   `Electric Range` = col_double(),
  ..   `Base MSRP` = col_double(),
  ..   `Legislative District` = col_double(),
  ..   `DOL Vehicle ID` = col_double(),
  ..   `Vehicle Location` = col_character(),
  ..   `Electric Utility` = col_character(),
  ..   `2020 Census Tract` = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
electric_vehicle_data <- electric_vehicle_data %>%
  mutate(
    `Model Year` = as.numeric(`Model Year`),
    `Electric Range` = as.numeric(`Electric Range`)
  )
recent_evs <- electric_vehicle_data %>%
  filter(`Model Year` >= 2015) %>%
  group_by(Make) %>%
  filter(n() > 200) %>%
  ungroup() 
model <- lm(`Electric Range` ~ `Model Year` + `Base MSRP`, data = recent_evs)
summary(model)

Call:
lm(formula = `Electric Range` ~ `Model Year` + `Base MSRP`, data = recent_evs)

Residuals:
    Min      1Q  Median      3Q     Max 
-192.04  -33.97  -10.53   21.47 1191.70 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)   4.743e+04  1.751e+02   270.8   <2e-16 ***
`Model Year` -2.344e+01  8.667e-02  -270.4   <2e-16 ***
`Base MSRP`  -1.630e-03  3.672e-05   -44.4   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 78.99 on 148379 degrees of freedom
Multiple R-squared:  0.3306,    Adjusted R-squared:  0.3306 
F-statistic: 3.665e+04 on 2 and 148379 DF,  p-value: < 2.2e-16
electric_vehicle_data <- electric_vehicle_data %>%
  mutate(`Model Year` = as.numeric(`Model Year`),
         `Electric Range` = as.numeric(`Electric Range`))
# Adjusted ggplot code for the first visualization
ggplot(recent_evs, aes(x = `Model Year`, y = `Electric Range`, color = Make)) +
  geom_point(position = position_jitter(width = 0.1, height = 0), alpha = 0.6) +
  facet_wrap(~Make, scales = "free_y", ncol = 4) +
  scale_color_viridis_d() +
  labs(title = "Electric Vehicle Range by Make and Model Year",
       x = "Model Year",
       y = "Electric Range (miles)",
       color = "Make") +
  theme_minimal() +
  theme(legend.position = "none",  # Hiding the legend due to facet labels
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(size = 14, face = "bold"),
        strip.text.x = element_text(size = 10, face = "bold")) +
  guides(color = guide_legend(override.aes = list(size=5)))

recent_evs <- electric_vehicle_data %>%
  filter(`Model Year` >= 2015) %>%
  group_by(Make) %>%
  filter(n() > 200) %>%
  ungroup() 
ggplot(recent_evs, aes(x = Make, y = `Electric Range`, fill = Make)) +
  geom_boxplot() +
  scale_fill_viridis_d() +
  theme_minimal() +
  labs(title = "Distribution of Electric Range Among Different Vehicle Makes",
       x = "Make",
       y = "Electric Range (miles)",
       fill = "Make") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("EV_Range_Plot.png", width = 20, height = 15, units = "cm")