setwd("C://Users//srich//OneDrive//Documents//IS 460 Data Visualization SP '24")
#install and call necessary libraries for visualizations
#install.packages("readxl")
#install.packages("leaflet")
library(leaflet)
library(dplyr)
library(readxl)
library(data.table)
library(ggplot2)
library(scales)
library(plotly)
library(ggthemes)

Introduction to Data:

For my IS 460 course modules covering Rstudio, I examined a data set called “Electric Vehicle Population Data” from data.gov. This data set includes 17 rows (variables) and 166,800 observations.


Variables:


VIN: This column contains unique Vehicle Identification Numbers (VINs) for each electric vehicle in the dataset.
County: Indicates the county in Washington where the electric vehicle is registered. (char)
City: Represents the city where the electric vehicle is located. (char)
State: Denotes the state where the electric vehicle is registered. This data set only highlights EVs in the state of Washington. (char)
Postal Code: Provides the postal code of the location where the electric vehicle is registered.
Model Year: Indicates the year the electric vehicle model was manufactured. (min = 1997, max = 2024, mean = 2020)
Make: Specifies the manufacturer or brand of the electric vehicle. (char)
Model: Represents the specific model name or designation of the electric vehicle. (char)
Electric Vehicle Type: Describes the type of electric vehicle, Battery Electric Vehicle (BEV) or Plug-in Hybrid Electric Vehicle (PHEV).
Clean Alternative Fuel Vehicle (CAFV) Eligibility: Indicates whether the electric vehicle is eligible as a clean alternative fuel vehicle.
Range: Represents the range of the electric vehicle on a full charge, measured in miles. (min = 0, max = 337, mean = 61.51)
Base MSRP: Denotes the Manufacturer’s Suggested Retail Price (MSRP) for the electric vehicle.
Legislative District: Indicates the legislative district associated with the electric vehicle’s registration.
DOL Vehicle ID: Provides a unique identifier assigned by the Department of Licensing (DOL) for the electric vehicle.
Vehicle Location: Specifies the location of the electric vehicle.(num, coordinates)
Electric Utility: Represents the electric utility provider associated with the electric vehicle’s location.(char)
2020 Census Tract: Denotes the census tract of the electric vehicle’s location, as per the 2020 Census data.(num)

#read data selected into R
file_name <- "Electric_Vehicle_Population_Data.csv"
EVData <- read.csv(file_name)
#examine data
#str(EVData)
#create a data frame
EV_df <- EVData
#create new data frame for visualization purposes
EV_clean_df <- EV_df %>%
  group_by(Make) %>%
  summarise(TotalVehicles = n()) %>%
  arrange(desc(TotalVehicles)) %>%
  data.frame()
#get top 10 using head()
top_10 <- head(EV_clean_df, 10)

Visual #1: Top Car Makers

This stacked bar chart visualization explores the top 10 electric vehicle makers from the data set based on the count of the electric vehicles grouped by Make in the population. This graphic portrays the total cars on the road based on the car makers. It can be concluded that Tesla is the most common EV in the state of Washington.

#create visualization 1 for top 10 car makers from data set
Top10Bar <- ggplot(top_10, aes(x = reorder(Make, TotalVehicles), y = TotalVehicles)) +
  geom_bar(stat = "identity", position = position_stack(reverse = TRUE), colour='black', fill = "darkgreen") + 
  coord_flip() +
  labs(title = "Top 10 Electric Vehicle Makers", x = "EV Maker", y = "Count of EVs") +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(labels=comma) +
  theme_classic() +
  geom_text(aes(label = comma(TotalVehicles, y = TotalVehicles), hjust = -0.2))
Top10Bar

Visual #2: Map of Washington

This visualization provides overview of the state of Washington. The map highlights the location of the top 5 counties in Washington with the highest frequency of electric vehicles. The flag coordinates are set at the center of each respective county to provide a basic understanding of the state and location of county where the highest density of electric vehicles are found. Click on the tag to view county name.

#Visual: create a map of Washington to highlight different counties from data set to get rough idea of state
#head(top_counties)
#install.packages("leaflet")
#library(leaflet)
#Top 5 counties in Washing with most EVs, coordinates found through online search
King <- c(47.5480, -121.9836, "King County")
Snohomish <- c(48.0330, -121.8339, "Snohomish County")
Pierce <- c(47.0676, -122.1295, "Pierce County")
Clark <- c(45.7466, -122.5194, "Clark County")
Thurston <- c(46.8646, -122.7696, 'Thurston County')
WA_counties <- data.frame(rbind(King, Snohomish, Pierce, Clark, Thurston))
#WA_counties
colnames(WA_counties)<- c("Lat", "Long", "County")
WA_counties$Lat <- as.numeric(WA_counties$Lat)
WA_counties$Long <- as.numeric(WA_counties$Long)
#make map
icon.glyphicon <- makeAwesomeIcon(icon = 'flag', markerColor = 'blue', iconColor = 'white')
WA_map <- leaflet() %>% addTiles() %>%
  addAwesomeMarkers(lng = WA_counties$Long, lat = WA_counties$Lat, icon = icon.glyphicon, 
                    popup = paste(row.names(WA_counties)))
WA_map

Visual #3: Counties in Washington with the Most Electric Vehicles

This bar chart visualization explores the counties with the most electric vehicles in the state. The total electric vehicles in each county is reported within the graph.

#create second visualization for mid module deliverable
#EV_clean_df
#EV_df
#determine county in WA with most Electric Vehicles
EV_county_df <- EV_df %>% group_by(County) %>%
  summarise(TotalVehicles = n()) %>%
  arrange(desc(TotalVehicles)) %>%
  data.frame()
top_counties <- head(EV_county_df, 25)
Top25CountiesBar <- ggplot(top_counties, aes(x = reorder(County, TotalVehicles), y = TotalVehicles)) +
  geom_bar(stat = "identity", position = position_stack(reverse = TRUE), colour = 'lightgreen', fill = 'black') +
  labs(title = "Top 25 Counties with Most Electric Vehicles in Washington", x = "County", y = "Count of EVs") +
  scale_y_continuous(labels = comma) +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_text(aes(label=comma(TotalVehicles), vjust = -1))
Top25CountiesBar

Visual #4: Battery vs. Plug in Hybrid:

This pie chart visualization identifies the percentage and frequency of battery vs. plug-in hybrid vehicles within the Electric Vehicles data in Washington.

#Visual: I will create a pie chart with plotly to determine population of hybrid vs fully electric vehicle with battery
#unique(EV_df$Electric.Vehicle.Type)
#length(EV_df$Electric.Vehicle.Type)
EV_type <- EV_df %>%
  group_by(Electric.Vehicle.Type) %>%
  summarise(count = n())
# Create a pie chart
custom_colors <- c("#2ca02c", "#003300")
EVTypePie <- plot_ly(data = EV_type, 
                     labels = ~Electric.Vehicle.Type, 
                     values = ~count, 
                     type = "pie", 
                     textposition = "outside", 
                     textinfo = "label + percent",
                     marker = list(colors = custom_colors)) %>%
  layout(title ="Battery vs. Hybrid Vehicles Count")
# Display the pie chart
EVTypePie
#end of visual 3

Visual #6: Electric Range Heat Map

This visualization provides a heat map displaying the car models with the highest electric range in miles. It displays the 50 most common car models from the data set along the x-axis and reports the fuel range (y-axis) for the model.

#unique(EV_df$Model)
#identify models with highest electric ranges
top_ranges <- EV_df %>% 
  group_by(Model) %>% 
  summarise(max_range = max(Electric.Range)) %>%
  top_n(50, max_range) %>%
  arrange(desc(max_range)) %>% data.frame()
#top_ranges
#create heat map
EV_heatmap <- ggplot(top_ranges, aes(x = Model, y = max_range)) +
  geom_tile(aes(fill = max_range), color = "black", width = 1, height = 8) +
  scale_fill_gradient(low = "lightgreen", high = "darkgreen", breaks = seq(0, max(top_ranges$max_range), by = 40)) +
  guides(fill = guide_legend(reverse = TRUE, override.aes = list(colour="black"))) +
  labs(title = "Top Electric Ranges by Model",
       x = "Model",
       y = "Electric Range (miles)",
       fill = "Electric Range (miles)") +
  theme_minimal() +
  geom_text(aes(label = max_range),color = "black", size = 3, hjust = 0.5, vjust = 0.5) +
  theme(axis.text.x = element_text(angle = 40, hjust = 1),
        plot.title = element_text(hjust = 0.5)) 
#view heatmap
EV_heatmap

#END