setwd("C://Users//srich//OneDrive//Documents//IS 460 Data Visualization SP '24")
#install and call necessary libraries for visualizations
#install.packages("readxl")
#install.packages("leaflet")
library(leaflet)
library(dplyr)
library(readxl)
library(data.table)
library(ggplot2)
library(scales)
library(plotly)
library(ggthemes)
For my IS 460 course modules covering Rstudio, I examined a data set called “Electric Vehicle Population Data” from data.gov. This data set includes 17 rows (variables) and 166,800 observations.
Variables:
VIN: This column contains unique Vehicle Identification Numbers
(VINs) for each electric vehicle in the dataset.
County:
Indicates the county in Washington where the electric vehicle is
registered. (char)
City: Represents the city where the
electric vehicle is located. (char)
State: Denotes the state
where the electric vehicle is registered. This data set only highlights
EVs in the state of Washington. (char)
Postal Code: Provides
the postal code of the location where the electric vehicle is
registered.
Model Year: Indicates the year the electric
vehicle model was manufactured. (min = 1997, max = 2024, mean =
2020)
Make: Specifies the manufacturer or brand of the
electric vehicle. (char)
Model: Represents the specific model
name or designation of the electric vehicle. (char)
Electric
Vehicle Type: Describes the type of electric vehicle, Battery
Electric Vehicle (BEV) or Plug-in Hybrid Electric Vehicle (PHEV).
Clean Alternative Fuel Vehicle (CAFV) Eligibility: Indicates
whether the electric vehicle is eligible as a clean alternative fuel
vehicle.
Range: Represents the range of the electric vehicle
on a full charge, measured in miles. (min = 0, max = 337, mean =
61.51)
Base MSRP: Denotes the Manufacturer’s Suggested Retail
Price (MSRP) for the electric vehicle.
Legislative District:
Indicates the legislative district associated with the electric
vehicle’s registration.
DOL Vehicle ID: Provides a unique
identifier assigned by the Department of Licensing (DOL) for the
electric vehicle.
Vehicle Location: Specifies the location of
the electric vehicle.(num, coordinates)
Electric Utility:
Represents the electric utility provider associated with the electric
vehicle’s location.(char)
2020 Census Tract: Denotes the
census tract of the electric vehicle’s location, as per the 2020 Census
data.(num)
#read data selected into R
file_name <- "Electric_Vehicle_Population_Data.csv"
EVData <- read.csv(file_name)
#examine data
#str(EVData)
#create a data frame
EV_df <- EVData
#create new data frame for visualization purposes
EV_clean_df <- EV_df %>%
group_by(Make) %>%
summarise(TotalVehicles = n()) %>%
arrange(desc(TotalVehicles)) %>%
data.frame()
#get top 10 using head()
top_10 <- head(EV_clean_df, 10)
This stacked bar chart visualization explores the top 10 electric vehicle makers from the data set based on the count of the electric vehicles grouped by Make in the population. This graphic portrays the total cars on the road based on the car makers. It can be concluded that Tesla is the most common EV in the state of Washington.
#create visualization 1 for top 10 car makers from data set
Top10Bar <- ggplot(top_10, aes(x = reorder(Make, TotalVehicles), y = TotalVehicles)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE), colour='black', fill = "darkgreen") +
coord_flip() +
labs(title = "Top 10 Electric Vehicle Makers", x = "EV Maker", y = "Count of EVs") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(labels=comma) +
theme_classic() +
geom_text(aes(label = comma(TotalVehicles, y = TotalVehicles), hjust = -0.2))
Top10Bar
This visualization provides overview of the state of Washington. The map highlights the location of the top 5 counties in Washington with the highest frequency of electric vehicles. The flag coordinates are set at the center of each respective county to provide a basic understanding of the state and location of county where the highest density of electric vehicles are found. Click on the tag to view county name.
#Visual: create a map of Washington to highlight different counties from data set to get rough idea of state
#head(top_counties)
#install.packages("leaflet")
#library(leaflet)
#Top 5 counties in Washing with most EVs, coordinates found through online search
King <- c(47.5480, -121.9836, "King County")
Snohomish <- c(48.0330, -121.8339, "Snohomish County")
Pierce <- c(47.0676, -122.1295, "Pierce County")
Clark <- c(45.7466, -122.5194, "Clark County")
Thurston <- c(46.8646, -122.7696, 'Thurston County')
WA_counties <- data.frame(rbind(King, Snohomish, Pierce, Clark, Thurston))
#WA_counties
colnames(WA_counties)<- c("Lat", "Long", "County")
WA_counties$Lat <- as.numeric(WA_counties$Lat)
WA_counties$Long <- as.numeric(WA_counties$Long)
#make map
icon.glyphicon <- makeAwesomeIcon(icon = 'flag', markerColor = 'blue', iconColor = 'white')
WA_map <- leaflet() %>% addTiles() %>%
addAwesomeMarkers(lng = WA_counties$Long, lat = WA_counties$Lat, icon = icon.glyphicon,
popup = paste(row.names(WA_counties)))
WA_map
This bar chart visualization explores the counties with the most electric vehicles in the state. The total electric vehicles in each county is reported within the graph.
#create second visualization for mid module deliverable
#EV_clean_df
#EV_df
#determine county in WA with most Electric Vehicles
EV_county_df <- EV_df %>% group_by(County) %>%
summarise(TotalVehicles = n()) %>%
arrange(desc(TotalVehicles)) %>%
data.frame()
top_counties <- head(EV_county_df, 25)
Top25CountiesBar <- ggplot(top_counties, aes(x = reorder(County, TotalVehicles), y = TotalVehicles)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE), colour = 'lightgreen', fill = 'black') +
labs(title = "Top 25 Counties with Most Electric Vehicles in Washington", x = "County", y = "Count of EVs") +
scale_y_continuous(labels = comma) +
theme(plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label=comma(TotalVehicles), vjust = -1))
Top25CountiesBar
This pie chart visualization identifies the percentage and frequency of battery vs. plug-in hybrid vehicles within the Electric Vehicles data in Washington.
#Visual: I will create a pie chart with plotly to determine population of hybrid vs fully electric vehicle with battery
#unique(EV_df$Electric.Vehicle.Type)
#length(EV_df$Electric.Vehicle.Type)
EV_type <- EV_df %>%
group_by(Electric.Vehicle.Type) %>%
summarise(count = n())
# Create a pie chart
custom_colors <- c("#2ca02c", "#003300")
EVTypePie <- plot_ly(data = EV_type,
labels = ~Electric.Vehicle.Type,
values = ~count,
type = "pie",
textposition = "outside",
textinfo = "label + percent",
marker = list(colors = custom_colors)) %>%
layout(title ="Battery vs. Hybrid Vehicles Count")
# Display the pie chart
EVTypePie
#end of visual 3
This multiple bar chart visualization displays the top 8 car makers from the data set. Each bar chart explores the most common models (count) of car by maker in the state of Washington.
#Visual 4: ggplot with facet_wrap multiple bar charts of top 8 car makers and most popular models from data set
top_makers <- EV_df %>%
group_by(Make) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>% top_n(5) %>% data.frame()
#top_makers
top_models <- EV_df %>%
filter(Make %in% top_makers$Make) %>%
group_by(Make, Model) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
top_n(5, Count) %>% data.frame()
#top_models
#create multiple bar charts
MakeModelBar <- ggplot(top_models, aes(x = Model,
y = Count,
fill = Model)) +
geom_bar(stat = "identity") +
facet_wrap(~Make, scales = "free") +
labs(title = "Count of Models by Top 5 Car Makers in WA",
x = "Model",
y= "Count",
fill = "Model") +
theme_classic() +
scale_y_continuous(labels = comma) +
theme(plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label = comma(Count)),color = "black", size = 3, vjust = -0.5) +
guides(fill = "none")
MakeModelBar
#end of visual 4
This visualization provides a heat map displaying the car models with the highest electric range in miles. It displays the 50 most common car models from the data set along the x-axis and reports the fuel range (y-axis) for the model.
#unique(EV_df$Model)
#identify models with highest electric ranges
top_ranges <- EV_df %>%
group_by(Model) %>%
summarise(max_range = max(Electric.Range)) %>%
top_n(50, max_range) %>%
arrange(desc(max_range)) %>% data.frame()
#top_ranges
#create heat map
EV_heatmap <- ggplot(top_ranges, aes(x = Model, y = max_range)) +
geom_tile(aes(fill = max_range), color = "black", width = 1, height = 8) +
scale_fill_gradient(low = "lightgreen", high = "darkgreen", breaks = seq(0, max(top_ranges$max_range), by = 40)) +
guides(fill = guide_legend(reverse = TRUE, override.aes = list(colour="black"))) +
labs(title = "Top Electric Ranges by Model",
x = "Model",
y = "Electric Range (miles)",
fill = "Electric Range (miles)") +
theme_minimal() +
geom_text(aes(label = max_range),color = "black", size = 3, hjust = 0.5, vjust = 0.5) +
theme(axis.text.x = element_text(angle = 40, hjust = 1),
plot.title = element_text(hjust = 0.5))
#view heatmap
EV_heatmap
#END