Stats and visuals
head(ev)
# A tibble: 6 × 18
ID `VIN (1-10)` County City State `ZIP Code` `Model Year` Make
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
1 EV331… 5YJ3E1EC6L Snoho… LYNN… WA 98037 2020 TESLA
2 EV402… JN1AZ0CP8B Skagit BELL… WA 98229 2011 NISS…
3 EV122… WBY1Z2C56F Pierce TACO… WA 98422 2015 BMW
4 EV557… 1G1RD6E44D King REDM… WA 98053 2013 CHEV…
5 EV287… 1G1FY6S05K Pierce PUYA… WA 98375 2019 CHEV…
6 EV498… KMHE24L10G Clark VANC… WA 98683 2016 HYUN…
# … with 10 more variables: Model <chr>,
# `Electric Vehicle Type` <chr>,
# `Clean Alternative Fuel Vehicle (CAFV) Eligibility` <chr>,
# `Electric Range` <dbl>, `Base MSRP` <dbl>,
# `Legislative District` <dbl>, `DOL Vehicle ID` <dbl>,
# `Vehicle Location` <chr>, `Electric Utility` <chr>,
# `Expected Price ($1k)` <dbl>
## TODO: A function which returns summary statistics by providing a column name to reduce redundancy:
# get_summary_statistics <- function(col_n) {
# ev %>%
# summarize(mean.col_n = mean(col_n, na.rm = TRUE),
# median.col_n = median(col_n, na.rm = TRUE),
# sd.col_n = sd(col_n, na.rm = TRUE),
# min.col_n = min(col_n, na.rm= TRUE),
# max.col_n = max(col_n, na.rm= TRUE))
# }
# test <- get_summary_statistics("Electric Range") # returns NAs
#Calculating the summary statistics for the electric range of electric vehicles
el_range_summary_stats <- ev %>% summarize(mean.el_range = mean(`Electric Range`, na.rm = TRUE),
median.el_range = median(`Electric Range`, na.rm = TRUE),
sd.el_range = sd(`Electric Range`, na.rm = TRUE),
min.el_range = min(`Electric Range`, na.rm= TRUE),
max.el_range = max(`Electric Range`, na.rm= TRUE))
#Calculating the summary statistics for the expected price of electric vehicles
expected_price_summary_stats <- ev %>% summarize(mean.exp_price = mean(`Expected Price ($1k)`, na.rm = TRUE),
median.exp_price = median(`Expected Price ($1k)`, na.rm = TRUE),
sd.exp_price = sd(`Expected Price ($1k)`, na.rm = TRUE),
min.exp_price = min(`Expected Price ($1k)`, na.rm= TRUE),
max.exp_price = max(`Expected Price ($1k)`, na.rm= TRUE))
el_range_summary_stats
# A tibble: 1 × 5
mean.el_range median.el_range sd.el_range min.el_range max.el_range
<dbl> <dbl> <dbl> <dbl> <dbl>
1 107. 73 104. 0 337
expected_price_summary_stats
# A tibble: 1 × 5
mean.exp_price median.exp_price sd.exp_price min.exp_price
<dbl> <dbl> <dbl> <dbl>
1 45.4 40 24.7 0
# … with 1 more variable: max.exp_price <dbl>
# Calculate frequencies for the following categorical variables: EV Type, Make, Electric Utility and County
table(ev$`Electric Vehicle Type`)
Battery Electric Vehicle (BEV)
47869
Plug-in Hybrid Electric Vehicle (PHEV)
16484
table(ev$Make)
AUDI AZURE DYNAMICS BENTLEY
1265 3 1
BMW CADILLAC CHEVROLET
2707 69 6651
CHRYSLER DODGE FIAT
1091 1 561
FISKER FORD HONDA
11 3850 528
HYUNDAI JAGUAR JEEP
894 142 436
KIA LAND ROVER LINCOLN
3066 26 59
LUCID MOTORS MERCEDES-BENZ MINI
2 248 303
MITSUBISHI NISSAN POLESTAR
376 8678 164
PORSCHE RIVIAN SMART
455 26 192
SUBARU TESLA TH!NK
34 27903 1
TOYOTA VOLKSWAGEN VOLVO
2700 1217 688
WHEEGO ELECTRIC CARS
1
prop.table(table(ev$`Electric Utility`))
AVISTA CORP
1.933020e-03
AVISTA CORP||INLAND POWER & LIGHT COMPANY
1.571561e-05
BONNEVILLE POWER ADMINISTRATION
3.143122e-05
BONNEVILLE POWER ADMINISTRATION||AVISTA CORP||BIG BEND ELECTRIC COOP, INC
3.300278e-04
BONNEVILLE POWER ADMINISTRATION||AVISTA CORP||INLAND POWER & LIGHT COMPANY
1.436407e-02
BONNEVILLE POWER ADMINISTRATION||AVISTA CORP||PUD NO 1 OF ASOTIN COUNTY
4.557527e-04
BONNEVILLE POWER ADMINISTRATION||BENTON RURAL ELECTRIC ASSN
5.814776e-04
BONNEVILLE POWER ADMINISTRATION||BIG BEND ELECTRIC COOP, INC
1.728717e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF CENTRALIA - (WA)|CITY OF TACOMA - (WA)
1.178671e-03
BONNEVILLE POWER ADMINISTRATION||CITY OF COULEE DAM - (WA)
3.143122e-05
BONNEVILLE POWER ADMINISTRATION||CITY OF ELLENSBURG - (WA)
6.914869e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF MCCLEARY - (WA)
1.257249e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF MILTON - (WA)|CITY OF TACOMA - (WA)
4.400371e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF PORT ANGELES - (WA)
1.540130e-03
BONNEVILLE POWER ADMINISTRATION||CITY OF RICHLAND - (WA)
5.311876e-03
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||BENTON RURAL ELECTRIC ASSN|PENINSULA LIGHT COMPANY
2.200185e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||ELMHURST MUTUAL POWER & LIGHT CO|PENINSULA LIGHT COMPANY
2.184470e-03
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||LAKEVIEW LIGHT & POWER|PENINSULA LIGHT COMPANY
8.486430e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||OHOP MUTUAL LIGHT COMPANY, INC|PENINSULA LIGHT COMPANY
6.757712e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||PARKLAND LIGHT & WATER COMPANY|PENINSULA LIGHT COMPANY
6.286244e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||PENINSULA LIGHT COMPANY
4.555955e-02
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||PUD NO 1 OF LEWIS COUNTY
2.215901e-03
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||PUD NO 1 OF MASON COUNTY
6.914869e-04
BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||PUD NO 3 OF MASON COUNTY
4.211784e-03
BONNEVILLE POWER ADMINISTRATION||COLUMBIA RURAL ELEC ASSN, INC
1.571561e-04
BONNEVILLE POWER ADMINISTRATION||INLAND POWER & LIGHT COMPANY
2.215901e-03
BONNEVILLE POWER ADMINISTRATION||ORCAS POWER & LIGHT COOP
6.553410e-03
BONNEVILLE POWER ADMINISTRATION||PUD 1 OF SNOHOMISH COUNTY
8.722164e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF ASOTIN COUNTY||INLAND POWER & LIGHT COMPANY
3.143122e-05
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF BENTON COUNTY
7.449199e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF CLALLAM COUNTY
5.374739e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF CLALLAM COUNTY|PUD NO 1 OF JEFFERSON COUNTY
1.571561e-05
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF CLARK COUNTY - (WA)
5.921642e-02
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF COWLITZ COUNTY
5.437601e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF FERRY COUNTY
1.414405e-04
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF FRANKLIN COUNTY
2.970250e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF GRAYS HARBOR COUNTY
3.551728e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF KITTITAS COUNTY
9.429366e-05
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF KLICKITAT COUNTY
1.492983e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF MASON COUNTY|PUD NO 1 OF JEFFERSON COUNTY
1.571561e-04
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF SKAMANIA CO
1.021515e-03
BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF WAHKIAKUM COUNTY
3.143122e-04
BONNEVILLE POWER ADMINISTRATION||PUD NO 2 OF PACIFIC COUNTY
1.540130e-03
BONNEVILLE POWER ADMINISTRATION||PUGET SOUND ENERGY INC||PUD NO 1 OF JEFFERSON COUNTY
6.113372e-03
BONNEVILLE POWER ADMINISTRATION||TOWN OF EATONVILLE - (WA)|CITY OF TACOMA - (WA)
1.885873e-04
BONNEVILLE POWER ADMINISTRATION||TOWN OF RUSTON - (WA)|CITY OF TACOMA - (WA)||PENINSULA LIGHT COMPANY
2.357342e-04
BONNEVILLE POWER ADMINISTRATION||TOWN OF STEILACOOM|CITY OF TACOMA - (WA)||PENINSULA LIGHT COMPANY
1.052946e-03
BONNEVILLE POWER ADMINISTRATION||VERA IRRIGATION DISTRICT #15
2.027314e-03
CITY OF BLAINE - (WA)||PUD NO 1 OF WHATCOM COUNTY
1.257249e-03
CITY OF CHENEY - (WA)
2.357342e-04
CITY OF CHEWELAH
6.286244e-05
CITY OF SEATTLE - (WA)
1.571561e-05
CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA)
1.966651e-01
CITY OF SUMAS - (WA)||PUD NO 1 OF WHATCOM COUNTY
4.714683e-05
CITY OF TACOMA - (WA)
4.714683e-05
CITY OF TACOMA - (WA)||TANNER ELECTRIC COOP
1.304396e-03
MODERN ELECTRIC WATER COMPANY
6.537694e-03
OKANOGAN COUNTY ELEC COOP, INC
7.386337e-04
PORTLAND GENERAL ELECTRIC CO
6.286244e-05
PUD NO 1 OF CHELAN COUNTY
5.940501e-03
PUD NO 1 OF DOUGLAS COUNTY
1.933020e-03
PUD NO 1 OF OKANOGAN COUNTY
5.343308e-04
PUD NO 1 OF PEND OREILLE COUNTY
2.985966e-04
PUD NO 1 OF WHATCOM COUNTY
3.614590e-04
PUD NO 2 OF GRANT COUNTY
2.703085e-03
PUGET SOUND ENERGY INC
2.010027e-01
PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA)
3.544656e-01
PUGET SOUND ENERGY INC||PUD NO 1 OF WHATCOM COUNTY
2.527070e-02
table(ev$County)
Adams Alameda Albemarle
21 1 1
Alexandria City Allegheny Anchorage
3 1 1
Anne Arundel Asotin Baltimore
5 31 1
Bartow Bell Benton
1 1 812
Bexar Bradley Calvert
3 1 2
Camden Carroll Carson City
1 1 1
Charles Chaves Chelan
2 1 378
Chesapeake City Clallam Clark
2 441 3771
Columbia Coryell Cowlitz
7 1 346
Cumberland Danville City Davidson
2 1 1
Denton District Of Columbia Douglas
1 3 127
Dupage Duval El Paso
1 1 1
Erie Fairbanks North Star Fairfax
1 1 5
Ferry Flathead Franklin
16 1 201
Frederick Garfield Glacier
3 3 1
Goochland Grant Grays Harbor
2 172 234
Guadalupe Hamilton Harris
1 1 1
Harrison Hillsborough Honolulu
1 1 4
Howard Hudson Humacao
2 1 1
Island Jefferson Kern
773 400 2
King Kings Kitsap
33552 2 2302
Kittitas Klamath Klickitat
213 1 95
Lake Laramie Leavenworth
1 1 1
Lewis Liberty Lincoln
293 1 17
Los Angeles Maricopa Marin
4 1 1
Mason Mecklenburg Monterey
325 1 1
Montgomery Multnomah Muscogee
7 3 1
New Castle New London Newport
1 3 1
Norfolk City Nueces Okanogan
1 1 82
Orange Otero Ozaukee
4 1 1
Pacific Passaic Pend Oreille
98 1 19
Pennington Philadelphia Pierce
1 1 4825
Pinellas Placer Plaquemines
1 1 1
Platte Powhatan Prince Georges
1 1 1
Prince William Queens Riley
3 1 1
Riverside Rock Island Sacramento
2 1 1
Saint Clair Saint Marys Salt Lake
1 1 1
San Bernardino San Diego San Juan
2 16 419
Santa Clara Santa Cruz Sarasota
2 1 1
Sarpy Skagit Skamania
1 779 65
Snohomish Spokane Stafford
6920 1579 1
Stevens Suffolk Thurston
70 2 2446
Tipton Ventura Virginia Beach City
1 2 1
Wahkiakum Wake Walla Walla
20 1 173
Washoe Whatcom Whitman
1 1714 95
Wichita Wilson Yakima
1 1 350
Yamhill
1
# Plot the summary statistics of electric range and expected price using a boxplot
# Examine the two variables individually as well as their relationship using a scatter plot
ggplot(data=ev, aes(x=`Electric Range`)) +
geom_boxplot(outlier.colour="black", outlier.shape=8,
outlier.size=2, notch=FALSE) +
scale_x_continuous(name ="Electric range (miles)", breaks=seq(0,350,50)) +
theme(axis.text.y = element_blank())
ggplot(data=ev, aes(x=`Expected Price ($1k)`)) +
geom_boxplot(outlier.colour="blue", outlier.shape=8,
outlier.size=2, notch=FALSE) +
scale_x_continuous(name ="Expected price (thousands)", limits=c(0,300), breaks=seq(0,300,50)) +
theme(axis.text.y = element_blank())
ggplot(data=ev, aes(x=`Electric Range`, y=`Expected Price ($1k)`)) +
geom_point(outlier.colour="blue", outlier.shape=8,
outlier.size=2, notch=FALSE) +
scale_x_continuous(name="Electric range (miles)",breaks=seq(0,350,50)) +
scale_y_continuous(name="Expected price (thousands)",limits=c(0,200),breaks=seq(0,200,50))
# Focus solely on Washington States as the dataset focuses predominantly on it
evs_under_30k <- ev %>% filter(State == 'WA' & `Expected Price ($1k)` < 30) %>% select(`ZIP Code`, Make, Model, `Electric Vehicle Type`, `Electric Range`, `Expected Price ($1k)`)
# Get stats for Teslas as a luxury brand
teslas <- ev %>% filter(Make == "TESLA") %>% select(`ZIP Code`, Make, Model, `Electric Vehicle Type`, `Electric Range`, `Expected Price ($1k)`)
# Get summary statistics for the electric range of EVs under 30k
evs_under_30k %>% summarize(mean.el_range = mean(`Electric Range`, na.rm = TRUE),
median.el_range = median(`Electric Range`, na.rm = TRUE),
sd.el_range = sd(`Electric Range`, na.rm = TRUE),
min.el_range = min(`Electric Range`, na.rm= TRUE),
max.el_range = max(`Electric Range`, na.rm= TRUE))
# A tibble: 1 × 5
mean.el_range median.el_range sd.el_range min.el_range max.el_range
<dbl> <dbl> <dbl> <dbl> <dbl>
1 72.7 72 62.3 0 258
teslas %>% summarize(mean.el_range = mean(`Electric Range`, na.rm = TRUE),
median.el_range = median(`Electric Range`, na.rm = TRUE),
sd.el_range = sd(`Electric Range`, na.rm = TRUE),
min.el_range = min(`Electric Range`, na.rm= TRUE),
max.el_range = max(`Electric Range`, na.rm= TRUE))
# A tibble: 1 × 5
mean.el_range median.el_range sd.el_range min.el_range max.el_range
<dbl> <dbl> <dbl> <dbl> <dbl>
1 153. 210 119. 0 337
evs_rich_zips <- ev %>%
filter(City == "REDMOND" | City == 'SEATTLE' | City == 'BELLEVUE' | City == 'MEDINA') %>% #
select(`ZIP Code`, City, Make, Model, `Electric Vehicle Type`, `Electric Range`, `Expected Price ($1k)`)
#Univariate barplot showing the tpye of EVs in Washington's most expensive cities
ggplot(data=evs_rich_zips, aes(x=`Electric Vehicle Type`)) +
geom_bar() +
theme_clean()
#Univariate barplot showing the tpye of EVs for all observations
ggplot(data=ev, aes(x=`Electric Vehicle Type`)) +
geom_bar() +
theme_clean()
#Bivariate plot telling us which EV makes are present in each of the 4 wealthier cities
ggplot(data=evs_rich_zips, aes(x=City, y=Make)) +
geom_point()
#Bivariate plot giving an overview of the electric range of EVs in the high-end cities
ggplot(data=evs_rich_zips, aes(x=`Electric Range`,y=Make)) +
geom_point() +
scale_x_continuous(name="Electric range (miles)",breaks=seq(0,350,50))
In my plots I focus on the electric range and expected price of EVs. Through boxplots, I visualize the summary statistics of the two variables.It appears that the median electric range is just under 75 miles and the median expected price under $45k. The exact figures are 73 miles and $40k respectively. Additionally, I consider datasets focusing on specific areas (such as high end cities which may have different distributions) as well as particular models (such as luxury brands). The questions I am trying to answer are: is there any difference in the type of EVs in the general population vs. those in high-end areas, what is the correlation between the electric range of a vehicle and its expected price, how do the distributions of BEV and PHEV stack against each other - both in the general population and in pricier cities. Some of the conclusions I can draw are that cities like Redmond and Seattle prefer BEVs over PHEVs at a higher rate than other areas.Another conclusion we can draw is that TSLA tends to produce the EVs with the highest Electric Range (only maker with vehicles surpassing the 275 mi mark on a single charge).
This dataset focuses predominantly on EVs in Washington State. Source
While giving a high overview of the data, these visualizations may need
further breakdown to convey deeper meaning. For instance, performing
more thorough analysis on the zip codes or cities to provide a better
understanding of the specific models preferred by those populations may
offer an insight into that particular market and would allow a third
party (an EV marketing agency, car manufacturers, etc.) to achieve
better product placement. In the next iteration I will work on
segmenting the data into more easily digestible parts in order to
improve the visualizations.