This dataset shows the Battery Electric Vehicles (BEVs) and Plug-in Hybrid Electric Vehicles (PHEVs) that are currently registered through Washington State Department of Licensing (DOL).
The data is collected from the State of Washington It’s based on the topics of local government
Quan. Variables: Electric.Range, Base.MSRP, X2020.Census.Tract, Categorical Variables: VIN, County, City, State, Postal.Code, Model.Year, Make, Model, Electric.Vehicle.Type, CAFV, Legislative.District, DOL.Vehicle.ID, Vehicle.Location, Electric.Utility
Narrative: 1. The switch from gas cars to electric vehicles 2. looking at the trends of the switch – seeing the effiencies of EV’s 3. looking at how the trends of EV ownership have grown throughout the years.
Questions: 1. While looking at the state of Washington data, what trends do we notice in electric vehicle ownership over the years? 2. Are there differences among the electric vehicle characteristics based on locations? 3. With the increasing production of electric vehicles, how are there effiencies like? Is it better for indivduals to adopt an electric vehicle or to use another transportation alternative?
# BASE CODE:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# DATSET 1 -- Electric Vehicle Population Data
EVP <- read.csv("EVP.csv")
# RENAMING COLUMNS
colnames(EVP)[colnames(EVP) == "Clean.Alternative.Fuel.Vehicle..CAFV..Eligibility"] <- "CAFV"
colnames(EVP)[colnames(EVP) == "VIN..1.10."] <- "VIN"
colnames(EVP)[colnames(EVP) == "X2020.Census.Tract"] <- "Census.Tract"
# DATASET 2 -- Electric Vehicle Population Size History by County
EVP_COUNTY <- read.csv("EVP_COUNTY.csv")
EVP_COUNTY <- EVP_COUNTY %>%
filter(State == "WA")
# RENAMING COLUMNS
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Battery.Electric.Vehicles..BEVs."] <- "BEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Electric.Vehicle..EV..Total"] <- "EV.Total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Plug.In.Hybrid.Electric.Vehicles..PHEVs."] <- "PHEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Non.Electric.Vehicle.Total"] <- "non.EV.total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Percent.Electric.Vehicles"] <- "percent.EV"
date1 <- as.Date(EVP_COUNTY$Date, format = "%B %d %Y")
WA_MEDIAN <- read.csv("WA_MEDIAN.csv")
colnames(WA_MEDIAN)[colnames(WA_MEDIAN) == "WA.Counties"] <- "County"
colnames(WA_MEDIAN)[colnames(WA_MEDIAN) == "Value..Dollars."] <- "Median.Income"
colnames(WA_MEDIAN)[colnames(WA_MEDIAN) == "Rank.Within.US..of.3142.counties."] <- "Rank"
# How do I convert my Median.Income into numeric values?
# REMOVES COMMAS
WA_MEDIAN$Median.Income <- gsub(",", "", WA_MEDIAN$Median.Income)
# CONVERTS TO NUMERIC VARIABLE
WA_MEDIAN$Median.Income <- as.numeric(WA_MEDIAN$Median.Income)
can be shiny by showing a slider of it
# LIBRARIES
library(shiny)
library(ggplot2)
library(dplyr)
# DATASET
WA_MEDIAN <- read.csv("WA_MEDIAN.csv")
colnames(WA_MEDIAN)[colnames(WA_MEDIAN) == "WA.Counties"] <- "County"
colnames(WA_MEDIAN)[colnames(WA_MEDIAN) == "Value..Dollars."] <- "Median.Income"
colnames(WA_MEDIAN)[colnames(WA_MEDIAN) == "Rank.Within.US..of.3142.counties."] <- "Rank"
ui <- fluidPage(
titlePanel("Median Income by County in Washington"),
tabsetPanel(
tabPanel(
"Bottom Counties",
sidebarLayout(
sidebarPanel(
sliderInput(
inputId = "num_counties_Bottom",
label = "Number of Bottom Counties to Display:",
min = 1,
max = 10,
value = 10
)
),
mainPanel(
plotOutput(outputId = "incomePlotBottom")
)
)
),
tabPanel(
"Top Counties",
sidebarLayout(
sidebarPanel(
sliderInput(
inputId = "num_counties_Top",
label = "Number of Top Counties to Display:",
min = 1,
max = 10,
value = 10
)
),
mainPanel(
plotOutput(outputId = "incomePlotTop")
)
)
)
)
)
# SERVER
server <- function(input, output) {
# BOTTOM COUNTIES
output$incomePlotBottom <- renderPlot({
# FILTERS IN BOTTOM COUNTIES -- DESC
filtered_data <- WA_MEDIAN %>%
arrange(desc(Median.Income)) %>%
slice(1:input$num_counties_Bottom) %>%
mutate(County = factor(County, levels = County))
# Plot
ggplot(filtered_data, aes(x = County, y = Median.Income)) +
geom_bar(stat = "identity", fill = "darkred") +
coord_flip() +
labs(
title = paste("Bottom", input$num_counties_Bottom, "Counties by Median Income"),
x = "County",
y = "Median Income (USD)"
) +
theme_minimal()
})
# TOP COUNTIES
output$incomePlotTop <- renderPlot({
# FILTERS IN TOP COUNTIES -- ASCENDING
filtered_data <- WA_MEDIAN %>%
arrange(Median.Income) %>%
slice(1:input$num_counties_Top) %>%
mutate(County = factor(County, levels = County))
ggplot(filtered_data, aes(x = County, y = Median.Income)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(
title = paste("Top", input$num_counties_Top, "Counties by Median Income"),
x = "County",
y = "Median Income (USD)"
) +
theme_minimal()
})
}
# Run the application
shinyApp(ui = ui, server = server)
# EVP_TOP15 <- EVP_COUNTY %>%
# filter(County %in% top_counties$County)
#
#
# p <- ggplot(EVP, aes(x = Model, fill = Make)) +
# geom_bar(aes(text = paste("Count: ", ..count..)), stat = "count") +
# labs(
# title = "Top 10 Most Popular EV Models",
# x = "Model",
# y = "Number of Vehicles",
# fill = "Make"
# ) +
# theme_minimal() +
# theme(
# axis.text.x = element_text(angle = 45, hjust = 1),
# plot.title = element_text(face = "bold", size = 16),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank()
# )
#
# plotly_p <- ggplotly(p, tooltip = "text") # CONVERTS A GGPLOT TO PLOTLY
#
# # SUBTITLE
# plotly_p <- plotly_p %>%
# layout(
# title = list(
# text = paste0(
# "Top 10 Most Popular EV Models", # Title
# "<br><sup>Color-coded by Manufacturer (Make) - Ordered by Popularity</sup>" # Subtitle
# )
# )
# )
#
# plotly_p
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
# Filter for the top 10 most popular EV models and sort in descending order
filtered_EVP <- EVP %>%
count(Model, Make, sort = TRUE) %>% # Count occurrences of each Model-Make combination
arrange(desc(n)) %>% # Ensure descending order
slice_max(n, n = 10) # Select the top 10 models by count
# Create ggplot
p <- ggplot(filtered_EVP, aes(x = reorder(Model, -n), fill = Make)) + # Reorder models in descending order of count
geom_bar(aes(y = n, text = paste("Count: ", n)), stat = "identity") +
labs(
title = "Top 10 Most Popular EV Models",
x = "Model",
y = "Number of Vehicles",
fill = "Make"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold", size = 16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
## Warning in geom_bar(aes(y = n, text = paste("Count: ", n)), stat = "identity"):
## Ignoring unknown aesthetics: text
# Convert ggplot to plotly
plotly_p <- ggplotly(p, tooltip = "text")
# Add subtitle
plotly_p <- plotly_p %>%
layout(
title = list(
text = paste0(
"Top 10 Most Popular EV Models", # Title
"<br><sup>Color-coded by Manufacturer (Make) - Ordered by Popularity</sup>"
)
)
)
# Display the plot
plotly_p
EVP_COUNTY <- read.csv("EVP_COUNTY.csv")
EVP_COUNTY <- EVP_COUNTY %>%
filter(State == "WA")
# RENAMING COLUMNS
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Battery.Electric.Vehicles..BEVs."] <- "BEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Electric.Vehicle..EV..Total"] <- "EV.Total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Plug.In.Hybrid.Electric.Vehicles..PHEVs."] <- "PHEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Non.Electric.Vehicle.Total"] <- "non.EV.total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Percent.Electric.Vehicles"] <- "percent.EV"
EVP_COUNTY <- EVP_COUNTY %>%
filter(State == "WA") %>%
mutate(
Date = as.Date(Date, format = "%B %d %Y"),
Year = format(Date, "%Y")
) %>%
group_by(Year) %>% # Group by year
summarise(
EV.Total = sum(EV.Total, na.rm = TRUE)
)
ggplot(EVP_COUNTY, aes(x = as.numeric(Year), y = EV.Total)) +
geom_area(fill = "lightblue", alpha = 0.5) +
geom_line(color = "steelblue", size = 1) +
geom_point(color = "steelblue", size = 2) +
labs(
title = "Electric Vehicles Over the Years",
x = "Year",
y = "Number of Electric Vehicles"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
EVP_COUNTY <- read.csv("EVP_COUNTY.csv")
EVP_COUNTY <- EVP_COUNTY %>%
filter(State == "WA")
# RENAMING COLUMNS
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Battery.Electric.Vehicles..BEVs."] <- "BEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Electric.Vehicle..EV..Total"] <- "EV.Total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Plug.In.Hybrid.Electric.Vehicles..PHEVs."] <- "PHEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Non.Electric.Vehicle.Total"] <- "non.EV.total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Percent.Electric.Vehicles"] <- "percent.EV"
# Verify and fix the processing step
EVP_COUNTY <- EVP_COUNTY %>%
mutate(
Date = as.Date(Date, format = "%B %d %Y")
) %>%
mutate(
Year = format(Date, "%Y") # Extract the year
) %>%
group_by(Year) %>% # Group by Year
summarise(
Non_EV_Total = sum(non.EV.total, na.rm = TRUE)
) %>%
mutate(Year = as.numeric(Year))
ggplot(EVP_COUNTY, aes(x = Year, y = Non_EV_Total)) +
geom_area(fill = "lightcoral", alpha = 0.5) +
geom_line(color = "darkred", size = 1) +
geom_point(color = "darkred", size = 2) +
labs(
title = "Non-Electric Vehicles Over the Years (WA)",
x = "Year",
y = "Number of Non-Electric Vehicles"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
scale_y_continuous(labels = scales::comma)
# ggplot(combined_data, aes(x = as.numeric(Year), y = Count, color = Vehicle_Type)) +
# geom_line(size = 1) +
# geom_point(size = 2) +
# scale_color_manual(values = c("EV.Total" = "steelblue", "Non_EV_Total" = "darkred")) +
# labs(
# title = "Electric vs Non-Electric Vehicles Over the Years (WA)",
# x = "Year",
# y = "Number of Vehicles",
# color = "Vehicle Type"
# ) +
# scale_y_continuous(
# labels = scales::comma, # Add commas for large numbers
# limits = c(0, NA) # Ensure the y-axis starts at 0 and adjusts to data
# ) +
# theme_minimal() +
# theme(
# axis.text.x = element_text(angle = 45, hjust = 1)
# )
Plot of the top 15 counties in Washington with the most EV registrations (purpose is to demonstrate the comparative ownership across counties)
og data - don’t need it anymore!!!
# re-edit code, might work as plotly
# add different font, title name, subtitle name
# add info based on these counties, maybe a plot measuring wealth among each county (above) would help with interpreting the data
county.sum <- EVP %>%
count(County) %>%
rename(Number_of_EVs = n) %>%
arrange(desc(Number_of_EVs)) %>%
slice_head(n = 10)
# HORIZONTAL BAR CHART
ggplot(county.sum, aes(x = reorder(County, Number_of_EVs), y = Number_of_EVs)) +
geom_col(fill = "darkgreen") +
# adds the values for every county
geom_text(aes(label = Number_of_EVs), hjust = -0.2, size = 3) +
scale_y_continuous(limits = c(0, 125000), breaks = seq(0, 125000, 25000)) + # sets the scale to see all of the labels
coord_flip() +
# titles
labs(
title = "Top 10 Counties by EV Ownership",
x = "County",
y = "Number of EVs",
subtitle = "Distribution of EV registrations across the Top Counties in Washington"
) +
theme_minimal() +
# removing the grid lines
theme(panel.grid.major.x = element_blank(),
#panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank())
# VERTICAL BAR CHART WITH A LINEAR TREND LINE
ggplot(county.sum, aes(x = reorder(County, -Number_of_EVs), y = Number_of_EVs)) +
geom_col(fill = "lightpink") +
# Add the values for every county
geom_text(aes(label = Number_of_EVs), vjust = -0.5, size = 3) +
scale_y_continuous(limits = c(0, 125000), breaks = seq(0, 125000, 25000)) + # Sets the scale to see all of the labels
# Add a linear trend line
geom_smooth(aes(group = 1), method = "lm", color = "red", se = FALSE, linetype = "solid", size = 1) +
# Titles
labs(
title = "Top 10 Counties by EV Ownership",
x = "County",
y = "Number of EVs",
subtitle = "Distribution of EV registrations across the Top Counties in Washington\n(additonal a linear trend line)"
) +
theme_minimal() +
# Removing the grid lines
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
# didn't use log transformation because it was too harsh
library(ggplot2)
library(dplyr)
library(maps)
washington_map <- map_data("county") %>%
filter(region == "washington")
county_summary <- EVP %>%
count(County) %>%
rename(Number_of_EVs = n) %>%
arrange(desc(Number_of_EVs))
county_summary <- county_summary %>%
mutate(County = tolower(County))
map.data <- washington_map %>%
left_join(county_summary, by = c("subregion" = "County"))
ggplot(map.data, aes(long, lat, group = group, fill = sqrt(Number_of_EVs))) + # square root transformation
geom_polygon(color = "white") +
# adjusted scale for square root-transformed values
scale_fill_gradient(
low = "lightblue4",
high = "lightblue",
na.value = "gray90",
breaks = scales::pretty_breaks(n = 5),
labels = ~ scales::comma(.^2)
) +
labs(
title = "EV Ownership Across Washington Counties",
subtitle = "Distribution of EV registrations (Square Root Adjusted)",
caption = "Data Source: Electric Vehicle Population Data",
fill = "Number of Electric Vehicles"
) +
coord_fixed(1.3) +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(scales) # For number formatting
# DATASET 2 -- Electric Vehicle Population Size History by County
EVP_COUNTY <- read.csv("EVP_COUNTY.csv")
EVP_COUNTY <- EVP_COUNTY %>%
filter(State == "WA")
# RENAMING COLUMNS
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Battery.Electric.Vehicles..BEVs."] <- "BEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Electric.Vehicle..EV..Total"] <- "EV.Total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Plug.In.Hybrid.Electric.Vehicles..PHEVs."] <- "PHEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Non.Electric.Vehicle.Total"] <- "non.EV.total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Percent.Electric.Vehicles"] <- "percent.EV"
date1 <- as.Date(EVP_COUNTY$Date, format = "%B %d %Y")
# Load Washington county boundaries
wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2022
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
# Aggregate Non-EV counts by county
non_EV_aggregated <- EVP_COUNTY %>%
group_by(County) %>%
summarise(Non_EV_Count = sum(non.EV.total, na.rm = TRUE), .groups = "drop")
# Convert county names to lowercase for merging
non_EV_aggregated <- non_EV_aggregated %>%
mutate(County = tolower(County))
wa_counties <- wa_counties %>%
mutate(NAME = tolower(NAME))
# Merge map data with aggregated Non-EV data
map_data <- wa_counties %>%
left_join(non_EV_aggregated, by = c("NAME" = "County"))
# Create the map
ggplot(map_data) +
geom_sf(aes(fill = Non_EV_Count), color = "black", size = 0.2) +
scale_fill_viridis_c(
option = "plasma",
name = "Non-EV Count",
na.value = "gray90",
labels = scales::comma # Format numbers with commas
) +
labs(
title = "Geographic Distribution of Non-Electric Vehicles in Washington",
subtitle = "Number of Non-EVs by County",
caption = "Data Source: Electric Vehicle Population Data"
) +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
While noticing the manufacturers models with the competitive electric ranges, a plot of the top 15 makes was created to visualize the best-performing EVs that vary sigificantly across manufacturers
top_models <- EVP %>%
count(Model, sort = TRUE) %>%
slice_head(n = 10) %>%
pull(Model)
filtered_EVP <- EVP %>%
filter(Model %in% top_models)
filtered_EVP <- filtered_EVP %>%
filter(Make %in% unique(filtered_EVP$Make))
label_data <- filtered_EVP %>%
group_by(Make) %>%
slice_max(Electric.Range, n = 1)
ordered_makes <- label_data %>%
arrange(desc(Electric.Range)) %>%
distinct(Make, .keep_all = TRUE) %>%
pull(Make)
filtered_EVP <- filtered_EVP %>%
mutate(Make = factor(Make, levels = ordered_makes))
label_data <- label_data %>%
mutate(Make = factor(Make, levels = ordered_makes))
ggplot(filtered_EVP, aes(x = Make, y = Electric.Range, color = Model)) +
geom_point(size = 3, alpha = 0.3) +
labs(
title = "Top 10 Electric Vehicle Models and Range by Manufacturer",
subtitle = "Data Collected from the State of Washington",
x = "Make",
y = "Electric Range (miles)",
color = "Model"
) +
scale_y_continuous(limits = c(0, 400), breaks = seq(0, 400, 50)) +
geom_text(
data = label_data,
aes(x = Make, y = Electric.Range, label = Electric.Range),
vjust = -1.2,
size = 2.7,
inherit.aes = FALSE
) +
theme_minimal() +
guides(color = guide_legend(nrow = 3, ncol = 5)) +
theme(
legend.title = element_text(size = 8, face = "bold"),
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(face = "italic", size = 12),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.text = element_text(size = 8)
)
library(dplyr)
library(plotly)
# Prepare the data (same as your original ggplot2 preparation)
top_models <- EVP %>%
count(Model, sort = TRUE) %>%
slice_head(n = 10) %>%
pull(Model)
filtered_EVP <- EVP %>%
filter(Model %in% top_models)
filtered_EVP <- filtered_EVP %>%
filter(Make %in% unique(filtered_EVP$Make))
label_data <- filtered_EVP %>%
group_by(Make) %>%
slice_max(Electric.Range, n = 1)
ordered_makes <- label_data %>%
arrange(desc(Electric.Range)) %>%
distinct(Make, .keep_all = TRUE) %>%
pull(Make)
filtered_EVP <- filtered_EVP %>%
mutate(Make = factor(Make, levels = ordered_makes))
label_data <- label_data %>%
mutate(Make = factor(Make, levels = ordered_makes))
# Create the interactive plot
plot_ly(data = filtered_EVP,
x = ~Make,
y = ~Electric.Range,
color = ~Model,
type = "scatter",
mode = "markers",
marker = list(size = 10, opacity = 0.3)) %>%
add_text(data = label_data,
x = ~Make,
y = ~Electric.Range,
text = ~Electric.Range,
textposition = "top center",
showlegend = FALSE) %>%
layout(
title = list(
text = "Top 10 Electric Vehicle Models and Range by Manufacturer<br><sup>Data Collected from the State of Washington</sup>",
x = 0.5
),
xaxis = list(
title = "Make",
tickangle = 45
),
yaxis = list(
title = "Electric Range (miles)",
range = c(0, 400),
tickvals = seq(0, 400, 50)
),
legend = list(
title = list(text = "<b>Model</b>"),
orientation = "h",
xanchor = "center",
x = 0.5,
y = -0.2
)
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
EVP_COUNTY <- read.csv("EVP_COUNTY.csv")
EVP_COUNTY <- EVP_COUNTY %>%
filter(State == "WA")
# RENAMING COLUMNS
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Battery.Electric.Vehicles..BEVs."] <- "BEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Electric.Vehicle..EV..Total"] <- "EV.Total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Plug.In.Hybrid.Electric.Vehicles..PHEVs."] <- "PHEVs"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Non.Electric.Vehicle.Total"] <- "non.EV.total"
colnames(EVP_COUNTY)[colnames(EVP_COUNTY) == "Percent.Electric.Vehicles"] <- "percent.EV"
date1 <- as.Date(EVP_COUNTY$Date, format = "%B %d %Y")
library(ggplot2)
# # Create a bar plot to compare BEVs and PHEVs
# bar_plot <- ggplot(EVP_COUNTY, aes(x = Vehicle.Primary.Use, fill = factor(BEVs > PHEVs, labels = c("PHEVs", "BEVs")))) +
# geom_bar(position = "dodge", alpha = 0.8) +
# labs(
# title = "Popularity of BEVs vs PHEVs by Vehicle Primary Use",
# x = "Vehicle Primary Use",
# y = "Count",
# fill = "Dominant Type"
# ) +
# theme_minimal(base_size = 18) +
# theme(legend.position = "bottom")
#
# bar_plot
bar_plot <- ggplot(EVP_COUNTY, aes(x = Vehicle.Primary.Use, fill = factor(BEVs > PHEVs, labels = c("PHEVs", "BEVs")))) +
geom_bar(position = "dodge", alpha = 0.8) +
scale_fill_manual(
values = c("PHEVs" = "darkorange", "BEVs" = "royalblue"), # Custom colors
name = "Type of Electric Vehicle"
) +
labs(
title = "Popularity of BEVs vs PHEVs by Vehicle Primary Use",
x = "Vehicle Primary Use",
y = "# of Vehicles"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "bottom",
legend.title = element_text(face = "bold", size = 10),
legend.text = element_text(size = 9),
axis.text.x = element_text(hjust = 1, size = 10),
axis.text.y = element_text(size = 10),
axis.title = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(size = 12)
)
bar_plot
# Load necessary libraries
library(shiny)
library(ggplot2)
library(dplyr)
library(plotly)
# Sample data preparation (replace this with your actual EVP dataset)
# EVP <- your_dataset_here
# Filter out invalid or missing Electric Range values
EVP_clean <- EVP %>%
filter(!is.na(Electric.Range), Electric.Range > 0)
# Define UI
ui <- fluidPage(
titlePanel("Electric Vehicle Data Visualization"),
sidebarLayout(
sidebarPanel(
# Conditional panel for the slider, only visible in the Histogram tab
conditionalPanel(
condition = "input.tabs == 'Histogram'",
sliderInput(
"bins",
"Number of bins:",
min = 5,
max = 100,
value = 30,
step = 1
)
)
),
mainPanel(
tabsetPanel(
id = "tabs",
tabPanel(
"Boxplot",
plotlyOutput("boxplot")
),
tabPanel(
"Histogram",
plotlyOutput("histogram")
)
)
)
)
)
# Define Server
server <- function(input, output) {
# Render Boxplot
output$boxplot <- renderPlotly({
ggplot_boxplot <- ggplot(EVP_clean, aes(x = Electric.Vehicle.Type, y = Electric.Range)) +
geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7, color = "black") + # Clear boxes with black outlines
labs(
title = "Distribution of Electric Ranges for BEVs and PHEVs",
x = "Electric Vehicle Type",
y = "Electric Range (miles)"
) +
theme_minimal(base_size = 15) +
theme(legend.position = "none")
ggplotly(ggplot_boxplot)
})
# Render Histogram
output$histogram <- renderPlotly({
ggplot_histogram <- ggplot(EVP_clean, aes(x = Electric.Range, fill = Electric.Vehicle.Type)) +
geom_histogram(bins = input$bins, alpha = 0.7, position = "identity", color = "black") + # Use bins based on slider input
facet_wrap(~ Electric.Vehicle.Type, scales = "free_y") +
labs(
title = "Electric Range Distribution for BEVs and PHEVs",
x = "Electric Range (miles)",
y = "Count",
fill = "Vehicle Type"
) +
theme_minimal(base_size = 10)
ggplotly(ggplot_histogram)
})
}
shinyApp(ui = ui, server = server)
# Load necessary libraries
library(shiny)
library(ggplot2)
library(dplyr)
library(plotly)
# Sample data preparation (replace this with your actual EVP dataset)
# EVP <- your_dataset_here
# Filter out invalid or missing Electric Range values
EVP_clean <- EVP %>%
filter(!is.na(Electric.Range), Electric.Range > 0)
# Define UI
ui <- fluidPage(
titlePanel("Electric Vehicle Data Visualization"),
sidebarLayout(
sidebarPanel(
sliderInput(
"bins",
"Number of bins:",
min = 5,
max = 15,
value = 15,
step = 1
)
),
mainPanel(
tabsetPanel(
tabPanel(
"Boxplot",
plotlyOutput("boxplot")
),
tabPanel(
"Histogram",
plotlyOutput("histogram")
)
)
)
)
)
# Define Server
server <- function(input, output) {
# Render Boxplot
output$boxplot <- renderPlotly({
ggplot_boxplot <- ggplot(EVP_clean, aes(x = Electric.Vehicle.Type, y = Electric.Range)) +
geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7, color = "black") + # Clear boxes with black outlines
labs(
title = "Distribution of Electric Ranges for BEVs and PHEVs",
x = "Electric Vehicle Type",
y = "Electric Range (miles)"
) +
theme_minimal(base_size = 15) +
theme(legend.position = "none")
ggplotly(ggplot_boxplot)
})
# Render Histogram
output$histogram <- renderPlotly({
ggplot_histogram <- ggplot(EVP_clean, aes(x = Electric.Range, fill = Electric.Vehicle.Type)) +
geom_histogram(bins = input$bins, alpha = 0.7, position = "identity", color = "black") + # Use bins based on slider input
facet_wrap(~ Electric.Vehicle.Type, scales = "free_y") +
labs(
title = "Electric Range Distribution for BEVs and PHEVs",
x = "Electric Range (miles)",
y = "Count",
fill = "Vehicle Type"
) +
theme_minimal(base_size = 10)
ggplotly(ggplot_histogram)
})
}
# Run the Shiny App
shinyApp(ui = ui, server = server)
# Load necessary libraries
library(shiny)
library(ggplot2)
library(dplyr)
library(plotly)
# Sample data preparation (replace this with your actual EVP dataset)
# EVP <- your_dataset_here
# Filter out invalid or missing Electric Range values
EVP_clean <- EVP %>%
filter(!is.na(Electric.Range), Electric.Range > 0)
# Define UI
ui <- fluidPage(
titlePanel("Electric Vehicle Data Visualization"),
tabsetPanel(
tabPanel(
"Boxplot",
plotlyOutput("boxplot")
),
tabPanel(
"Histogram",
plotlyOutput("histogram")
)
)
)
# Define Server
server <- function(input, output) {
# Render Boxplot
output$boxplot <- renderPlotly({
ggplot_boxplot <- ggplot(EVP_clean, aes(x = Electric.Vehicle.Type, y = Electric.Range)) +
geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7, color = "black") + # Clear boxes with black outlines
labs(
title = "Distribution of Electric Ranges for BEVs and PHEVs",
x = "Electric Vehicle Type",
y = "Electric Range (miles)"
) +
theme_minimal(base_size = 15) +
theme(legend.position = "none")
# ggplot_boxplot <- ggplot(EVP_clean, aes(x = Electric.Vehicle.Type, y = Electric.Range, fill = Electric.Vehicle.Type)) +
# geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7, color = "black") +
# labs(
# title = "Distribution of Electric Ranges for BEVs and PHEVs",
# x = "Electric Vehicle Type",
# y = "Electric Range (miles)",
# fill = "Vehicle Type"
# ) +
# theme_minimal(base_size = 15) +
# theme(legend.position = "none")
ggplotly(ggplot_boxplot)
})
output$histogram <- renderPlotly({
ggplot_histogram <- ggplot(EVP_clean, aes(x = Electric.Range, fill = Electric.Vehicle.Type)) +
geom_histogram(bins = 30, alpha = 0.7, position = "identity", color = "black") +
facet_wrap(~ Electric.Vehicle.Type, scales = "free_y") +
labs(
title = "Electric Range Distribution for BEVs and PHEVs",
x = "Electric Range (miles)",
y = "Count",
fill = "Vehicle Type"
) +
theme_minimal(base_size = 10)
ggplotly(ggplot_histogram)
})
}
# Run the Shiny App
shinyApp(ui = ui, server = server)
# Electric Vehicle Population Data
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Step 1: Data Preparation
# Filter out invalid or missing Electric Range values
EVP_clean <- EVP %>%
filter(!is.na(Electric.Range), Electric.Range > 0) # Remove missing or invalid ranges
# Step 2: Create Clear Boxplot
boxplot_plot <- ggplot(EVP_clean, aes(x = Electric.Vehicle.Type, y = Electric.Range)) +
geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7, color = "black") + # Clear boxes with black outlines
labs(
title = "Distribution of Electric Ranges for BEVs and PHEVs",
x = "Electric Vehicle Type",
y = "Electric Range (miles)"
) +
theme_minimal(base_size = 15) +
theme(legend.position = "none")
# Display the boxplot
print(boxplot_plot)
# # Electric Vehicle Population Data
#
# # Load necessary libraries
# library(ggplot2)
# library(dplyr)
#
# # Step 1: Data Preparation
# # Filter out invalid or missing Electric Range values
# EVP_clean <- EVP %>%
# filter(!is.na(Electric.Range), Electric.Range > 0) # Remove missing or invalid ranges
#
# # Step 2: Create Boxplot
# boxplot_plot <- ggplot(EVP_clean, aes(x = Electric.Vehicle.Type, y = Electric.Range, fill = Electric.Vehicle.Type)) +
# geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7) +
# labs(
# title = "Distribution of Electric Ranges for BEVs and PHEVs",
# x = "Electric Vehicle Type",
# y = "Electric Range (miles)",
# fill = "Vehicle Type"
# ) +
# theme_minimal(base_size = 15) +
# theme(legend.position = "none")
#
# # Display the boxplot
# print(boxplot_plot)
#
# # make it a plotly animation
# Create Histogram
histogram_plot <- ggplot(EVP_clean, aes(x = Electric.Range, fill = Electric.Vehicle.Type)) +
geom_histogram(bins = 30, alpha = 0.7, position = "identity", color = "black") +
facet_wrap(~ Electric.Vehicle.Type, scales = "free_y") + # Separate histograms for BEVs and PHEVs
labs(
title = "Electric Range Distribution for BEVs and PHEVs",
x = "Electric Range (miles)",
y = "Count",
fill = "Vehicle Type"
) +
theme_minimal(base_size = 10)
# Display the histogram
print(histogram_plot)
# this can work as either two graphs or a plotly where you combine both the data and they can be interchanged
library(shiny)
library(ggplot2)
library(dplyr)
library(plotly)
# DATSET 1 -- Electric Vehicle Population Data
EVP <- read.csv("EVP.csv")
colnames(EVP)[colnames(EVP) == "Clean.Alternative.Fuel.Vehicle..CAFV..Eligibility"] <- "CAFV"
colnames(EVP)[colnames(EVP) == "VIN..1.10."] <- "VIN"
colnames(EVP)[colnames(EVP) == "X2020.Census.Tract"] <- "Census.Tract"
# SHINY
EVP <- EVP %>%
group_by(Model.Year, Electric.Vehicle.Type) %>%
summarise(Count = n_distinct(DOL.Vehicle.ID), .groups = "drop")
# Define the UI
ui <- fluidPage(
titlePanel("Trends in EV Ownership Over Time"),
sidebarLayout(
sidebarPanel(
checkboxGroupInput("vehicleType", "Select Vehicle Type(s):",
choices = unique(EVP$Electric.Vehicle.Type),
selected = unique(EVP$Electric.Vehicle.Type))
),
mainPanel(
plotlyOutput("linePlot")
)
)
)
server <- function(input, output) {
output$linePlot <- renderPlotly({
filtered_data <- EVP %>%
filter(Electric.Vehicle.Type %in% input$vehicleType)
p <- ggplot(filtered_data, aes(x = Model.Year, y = Count, color = Electric.Vehicle.Type)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
title = "Trends in EV Ownership Over Time",
x = "Model Year",
y = "Number of Registrations",
color = "Vehicle Type"
) +
theme_minimal()
# Convert ggplot to plotly
ggplotly(p)
})
}
shinyApp(ui = ui, server = server)