library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ev_data <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/Electric.csv")
## New names:
## Rows: 261698 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): VIN (1-10), County, City, State, Make, Model, Electric_Vehicle_Typ... dbl
## (7): Postal_Code, Year, Electric Range, Base MSRP, Legislative District... lgl
## (1): ...18
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
head(ev_data)
make_count <- ev_data %>%
  count(Make)
view(make_count)
sum(make_count$n)
## [1] 261698
ggplot(make_count, aes(x = reorder(Make, n), y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_text(aes(label = n), hjust = -0.1, size = 2) +  # Match text size to axis labels
  coord_flip() +
  labs(
    title = "Number of Registered Electric Vehicles by Make",
    x = "Make (Manufacturer)",
    y = "Number of Vehicles"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text.y = element_text(size = 6)  # Match label size to geom_text
  )

library(tidyverse)

ev_count <- 261698
wa_population <- 7900000
non_ev_count <- wa_population - ev_count

ev_data <- tibble(
  Category = c("EV Owners", "Non-EV Owners"),
  Count = c(ev_count, non_ev_count)
)

ev_data <- ev_data %>%
  mutate(
    Percent = Count / sum(Count) * 100,
    Label = paste0(Category, "\n", round(Percent, 2), "%")
  )

ggplot(ev_data, aes(x = "", y = Count, fill = Category)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  geom_text(aes(label = Label), position = position_stack(vjust = 0.5), size = 4) +
  scale_fill_manual(values = c("steelblue", "grey80")) +
  labs(
    title = "EV Ownership in Washington (2025)",
    fill = "Category"
  ) +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
  )

library(tigris)     
## Warning: package 'tigris' was built under R version 4.5.2
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(leaflet)    
## Warning: package 'leaflet' was built under R version 4.5.2
library(sf)         
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
options(tigris_use_cache = TRUE)

wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2024
wa_centroids <- st_centroid(wa_counties)
## Warning: st_centroid assumes attributes are constant over geometries
leaflet(data = wa_counties) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = "lightblue",
    color = "black",
    weight = 1,
    popup = ~NAME
  ) %>%
  addLabelOnlyMarkers(
    data = wa_centroids,
    lng = ~st_coordinates(geometry)[,1],
    lat = ~st_coordinates(geometry)[,2],
    label = ~NAME,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "black",
        "font-size" = "10px",
        "font-weight" = "bold"
      )
    )
  )
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
options(tigris_use_cache = TRUE)

ev_data <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/Electric.csv")
## New names:
## Rows: 261698 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): VIN (1-10), County, City, State, Make, Model, Electric_Vehicle_Typ... dbl
## (7): Postal_Code, Year, Electric Range, Base MSRP, Legislative District... lgl
## (1): ...18
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
ev_by_county <- ev_data %>%
  count(County) %>%
  rename(NAME = County, ev_count = n)

wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2024
wa_counties_ev <- left_join(wa_counties, ev_by_county, by = "NAME")

wa_centroids <- st_centroid(wa_counties_ev)
## Warning: st_centroid assumes attributes are constant over geometries
leaflet(data = wa_counties_ev) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = ~colorNumeric("Blues", domain = wa_counties_ev$ev_count)(ev_count),
    color = "black",
    weight = 1,
    popup = ~paste(NAME, "<br>EVs:", ev_count),
    label = ~paste(NAME, ":", ev_count)
  ) %>%
  addLabelOnlyMarkers(
    data = wa_centroids,
    lng = ~st_coordinates(geometry)[,1],
    lat = ~st_coordinates(geometry)[,2],
    label = ~NAME,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "black",
        "font-size" = "10px",
        "font-weight" = "bold"
      )
    )
  )
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
options(tigris_use_cache = TRUE)

ev_data <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/Electric.csv")
## New names:
## Rows: 261698 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): VIN (1-10), County, City, State, Make, Model, Electric_Vehicle_Typ... dbl
## (7): Postal_Code, Year, Electric Range, Base MSRP, Legislative District... lgl
## (1): ...18
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
ev_by_county <- ev_data %>%
  count(County) %>%
  rename(NAME = County, ev_count = n)

ev_models_by_county <- ev_data %>%
  count(County, Make) %>%
  rename(NAME = County, model = Make, count = n) %>%
  group_by(NAME) %>%
  summarise(model_breakdown = paste(model, count, sep = ": ", collapse = "<br>"))

wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2024
wa_counties_ev <- wa_counties %>%
  left_join(ev_by_county, by = "NAME") %>%
  left_join(ev_models_by_county, by = "NAME")

wa_centroids <- st_centroid(wa_counties_ev)
## Warning: st_centroid assumes attributes are constant over geometries
leaflet(data = wa_counties_ev) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = ~colorNumeric("Blues", domain = wa_counties_ev$ev_count)(ev_count),
    color = "black",
    weight = 1,
    popup = ~paste0(
      "<strong>", NAME, "</strong><br>",
      "Total EVs: ", ev_count, "<br><br>",
      "<u>Model Breakdown:</u><br>", model_breakdown
    ),
    label = ~paste(NAME, ":", ev_count)
  ) %>%
  addLabelOnlyMarkers(
    data = wa_centroids,
    lng = ~st_coordinates(geometry)[,1],
    lat = ~st_coordinates(geometry)[,2],
    label = ~NAME,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "black",
        "font-size" = "10px",
        "font-weight" = "bold"
      )
    )
  )
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
library(maps)
## Warning: package 'maps' was built under R version 4.5.2
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(sf)
library(dplyr)
library(tidycensus)
library(ggplot2)
library(tigris)
library(openintro)
## Warning: package 'openintro' was built under R version 4.5.2
## Loading required package: airports
## Warning: package 'airports' was built under R version 4.5.2
## Loading required package: cherryblossom
## Warning: package 'cherryblossom' was built under R version 4.5.2
## Loading required package: usdata
## Warning: package 'usdata' was built under R version 4.5.2
library(tidyr)
options(tigris_use_cache = TRUE)

data("us.cities")
wa_cities <- us.cities %>% filter(country.etc == "WA")
wa_cities_sf <- st_as_sf(wa_cities, coords = c("long", "lat"), crs = 4326)

wa_counties <- get_acs(
  geography = "county",
  state     = "WA",
  variables = "B01001_001",
  year      = 2023,
  survey    = "acs5",
  geometry  = TRUE
) %>%
  separate(NAME, into = c("county", "state"), sep = ", ", remove = FALSE)
## Getting data from the 2019-2023 5-year ACS
wa_cities_proj   <- st_transform(wa_cities_sf, 2855)  # NAD83 / UTM zone 10N
wa_counties_proj <- st_transform(wa_counties, 2855)

top_cities <- wa_cities_proj %>% arrange(desc(pop)) %>% slice_head(n = 10)
buffer_50km <- st_buffer(top_cities, dist = 50000)

buffer_50km_union <- st_union(buffer_50km)

mat <- st_intersects(wa_counties_proj, buffer_50km_union, sparse = FALSE)

wa_counties_urb <- wa_counties_proj %>%
  mutate(
    in_buffer   = mat[, 1],
    urbanicity  = if_else(in_buffer, "Urban", "Rural"),
    urbanicity  = factor(urbanicity, levels = c("Rural", "Urban"))
  )

ggplot() +
  geom_sf(data = wa_counties_urb, aes(fill = urbanicity), color = "white", linewidth = 0.2) +
  scale_fill_manual(values = c("Rural" = "grey85", "Urban" = "steelblue")) +
  labs(title = "Urban vs Rural Counties in Washington", fill = "Urbanicity") +
  theme_minimal()

library(tidyverse)
library(tigris)
library(sf)
library(leaflet)

options(tigris_use_cache = TRUE)

ev_data <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/Electric.csv") %>%
  select(-`...18`)  # Drop unnamed column if present
## New names:
## Rows: 261698 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): VIN (1-10), County, City, State, Make, Model, Electric_Vehicle_Typ... dbl
## (7): Postal_Code, Year, Electric Range, Base MSRP, Legislative District... lgl
## (1): ...18
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
ev_by_county <- ev_data %>%
  count(County) %>%
  rename(NAME = County, ev_count = n)


ev_models_by_county <- ev_data %>%
  count(County, Make) %>%
  rename(NAME = County, model = Make, count = n) %>%
  group_by(NAME) %>%
  summarise(model_breakdown = paste(model, count, sep = ": ", collapse = "<br>"), .groups = "drop")


wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2024
wa_counties_ev <- wa_counties %>%
  left_join(ev_by_county, by = "NAME") %>%
  left_join(ev_models_by_county, by = "NAME")

wa_counties_proj <- st_transform(wa_counties_ev, 2855)

data("us.cities")
wa_cities <- us.cities %>% filter(country.etc == "WA")
wa_cities_sf <- st_as_sf(wa_cities, coords = c("long", "lat"), crs = 4326)
wa_cities_proj <- st_transform(wa_cities_sf, 2855)

top_cities <- wa_cities_proj %>% arrange(desc(pop)) %>% slice_head(n = 10)
buffer_50km <- st_buffer(top_cities, dist = 50000)
buffer_50km_union <- st_union(buffer_50km)

mat <- st_intersects(wa_counties_proj, buffer_50km_union, sparse = FALSE)

wa_counties_ev <- wa_counties_ev %>%
  mutate(
    in_buffer = mat[, 1],
    urbanicity = if_else(in_buffer, "Urban", "Rural"),
    urbanicity = factor(urbanicity, levels = c("Rural", "Urban"))
  )

wa_counties_ev <- st_transform(wa_counties_ev, 4326)
wa_centroids <- st_centroid(wa_counties_ev)
## Warning: st_centroid assumes attributes are constant over geometries
urban_palette <- colorFactor(
  palette = c("grey85", "steelblue"),  # Rural = grey85, Urban = steelblue
  domain  = wa_counties_ev$urbanicity
)

leaflet(data = wa_counties_ev) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = ~urban_palette(urbanicity),
    color = "black",
    weight = 1,
    popup = ~paste0(
      "<strong>", NAME, "</strong><br>",
      "Urbanicity: ", urbanicity, "<br>",
      "Total EVs: ", ev_count, "<br><br>",
      "<u>Model Breakdown:</u><br>", model_breakdown
    ),
    label = ~paste(NAME, ":", ev_count)
  ) %>%
  addLabelOnlyMarkers(
    data = wa_centroids,
    lng = ~st_coordinates(geometry)[,1],
    lat = ~st_coordinates(geometry)[,2],
    label = ~NAME,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "black",
        "font-size" = "10px",
        "font-weight" = "bold"
      )
    )
  )
library(tidyverse)
library(sf)

stations <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/alt_fuel_stations_ev_charging_units.csv") %>%
  filter(State == "WA", `Fuel Type Code` == "ELEC") %>%
  drop_na(Latitude, Longitude)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 7493 Columns: 85
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (27): Fuel Type Code, Station Name, Street Address, Intersection Direct...
## dbl  (15): ZIP, EV Level2 EVSE Num, EV DC Fast Count, Latitude, Longitude, I...
## lgl  (41): Plus4, Expected Date, BD Blends, NG Fill Type Code, NG PSI, EV Le...
## date  (2): Date Last Confirmed, Open Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stations_sf <- st_as_sf(stations, coords = c("Longitude", "Latitude"), crs = 4326)

stations_joined <- st_join(stations_sf, wa_counties_ev, join = st_within)

# Summary: total, urban, and rural station counts
station_summary <- stations_joined %>%
  st_drop_geometry() %>%
  count(urbanicity, name = "station_count") %>%
  add_row(urbanicity = "Total", station_count = nrow(stations_joined))

print(station_summary)
## # A tibble: 4 × 2
##   urbanicity station_count
##   <chr>              <int>
## 1 Rural                731
## 2 Urban               6760
## 3 <NA>                   2
## 4 Total               7493
library(tidyverse)
library(tigris)
library(sf)
library(leaflet)

options(tigris_use_cache = TRUE)

# Load EV registration data
ev_data <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/Electric.csv") %>%
  select(-`...18`)
## New names:
## Rows: 261698 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): VIN (1-10), County, City, State, Make, Model, Electric_Vehicle_Typ... dbl
## (7): Postal_Code, Year, Electric Range, Base MSRP, Legislative District... lgl
## (1): ...18
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
ev_by_county <- ev_data %>% count(County) %>% rename(NAME = County, ev_count = n)

ev_models_by_county <- ev_data %>%
  count(County, Make) %>%
  rename(NAME = County, model = Make, count = n) %>%
  group_by(NAME) %>%
  summarise(model_breakdown = paste(model, count, sep = ": ", collapse = "<br>"), .groups = "drop")

wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2024
wa_counties_ev <- wa_counties %>%
  left_join(ev_by_county, by = "NAME") %>%
  left_join(ev_models_by_county, by = "NAME")

wa_counties_proj <- st_transform(wa_counties_ev, 2855)

data("us.cities")
wa_cities <- us.cities %>% filter(country.etc == "WA")
wa_cities_sf <- st_as_sf(wa_cities, coords = c("long", "lat"), crs = 4326)
wa_cities_proj <- st_transform(wa_cities_sf, 2855)

top_cities <- wa_cities_proj %>% arrange(desc(pop)) %>% slice_head(n = 10)
buffer_50km <- st_buffer(top_cities, dist = 50000)
buffer_50km_union <- st_union(buffer_50km)

mat <- st_intersects(wa_counties_proj, buffer_50km_union, sparse = FALSE)

wa_counties_ev <- wa_counties_ev %>%
  mutate(
    in_buffer = mat[, 1],
    urbanicity = if_else(in_buffer, "Urban", "Rural"),
    urbanicity = factor(urbanicity, levels = c("Rural", "Urban"))
  )

wa_counties_ev <- st_transform(wa_counties_ev, 4326)
wa_centroids <- st_centroid(wa_counties_ev)
## Warning: st_centroid assumes attributes are constant over geometries
urban_palette <- colorFactor(
  palette = c("grey85", "steelblue"),
  domain  = wa_counties_ev$urbanicity
)

stations <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/alt_fuel_stations_ev_charging_units.csv") %>%
  filter(State == "WA") %>%
  drop_na(Latitude, Longitude)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 7493 Columns: 85
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (27): Fuel Type Code, Station Name, Street Address, Intersection Direct...
## dbl  (15): ZIP, EV Level2 EVSE Num, EV DC Fast Count, Latitude, Longitude, I...
## lgl  (41): Plus4, Expected Date, BD Blends, NG Fill Type Code, NG PSI, EV Le...
## date  (2): Date Last Confirmed, Open Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stations_sf <- st_as_sf(stations, coords = c("Longitude", "Latitude"), crs = 4326)

leaflet(data = wa_counties_ev) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = ~urban_palette(urbanicity),
    color = "black",
    weight = 1,
    popup = ~paste0(
      "<strong>", NAME, "</strong><br>",
      "Urbanicity: ", urbanicity, "<br>",
      "Total EVs: ", ev_count, "<br><br>",
      "<u>Model Breakdown:</u><br>", model_breakdown
    ),
    label = ~paste(NAME, ":", ev_count)
  ) %>%
  addCircleMarkers(
    data = stations_sf,
    radius = 4,
    color = "darkgreen",
    fillOpacity = 0.7,
    popup = ~paste0(
      "<strong>", `Station Name`, "</strong><br>",
      "Fuel Type: ", `Fuel Type Code`, "<br>",
      "City: ", City
    )
  ) %>%
  addLabelOnlyMarkers(
    data = wa_centroids,
    lng = ~st_coordinates(geometry)[,1],
    lat = ~st_coordinates(geometry)[,2],
    label = ~NAME,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "black",
        "font-size" = "10px",
        "font-weight" = "bold"
      )
    )
  )
# T-test: compare station counts per county by urbanicity
stations_by_county <- stations_joined %>%
  st_drop_geometry() %>%
  count(NAME, urbanicity, name = "station_count")

t.test(station_count ~ urbanicity, data = stations_by_county)
## 
##  Welch Two Sample t-test
## 
## data:  station_count by urbanicity
## t = -1.6381, df = 21.174, p-value = 0.1162
## alternative hypothesis: true difference in means between group Rural and group Urban is not equal to 0
## 95 percent confidence interval:
##  -593.51571   70.34526
## sample estimates:
## mean in group Rural mean in group Urban 
##             45.6875            307.2727
library(tidyverse)
library(tigris)
library(sf)

options(tigris_use_cache = TRUE)

ev_data <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/Electric.csv") %>%
  select(-`...18`)
## New names:
## Rows: 261698 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): VIN (1-10), County, City, State, Make, Model, Electric_Vehicle_Typ... dbl
## (7): Postal_Code, Year, Electric Range, Base MSRP, Legislative District... lgl
## (1): ...18
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
ev_by_county <- ev_data %>% count(County) %>% rename(NAME = County, ev_count = n)

ev_models_by_county <- ev_data %>%
  count(County, Make) %>%
  rename(NAME = County, model = Make, count = n) %>%
  group_by(NAME) %>%
  summarise(model_breakdown = paste(model, count, sep = ": ", collapse = "<br>"), .groups = "drop")

county_pop_2024 <- tribble(
  ~NAME, ~totalpop,
  "Adams", 21039, "Asotin", 22523, "Benton", 218190, "Chelan", 81228, "Clallam", 77958,
  "Clark", 527269, "Columbia", 4025, "Cowlitz", 113982, "Douglas", 45795, "Ferry", 7543,
  "Franklin", 101238, "Garfield", 2404, "Grant", 104717, "Grays Harbor", 77893, "Island", 86478,
  "Jefferson", 33944, "King", 2340211, "Kitsap", 281420, "Kittitas", 48172, "Klickitat", 24124,
  "Lewis", 87049, "Lincoln", 11862, "Mason", 69632, "Okanogan", 44942, "Pacific", 24245,
  "Pend Oreille", 14332, "Pierce", 941170, "San Juan", 18668, "Skagit", 132736, "Skamania", 12660,
  "Snohomish", 864113, "Spokane", 555947, "Stevens", 49015, "Thurston", 302912, "Wahkiakum", 4800,
  "Walla Walla", 62068, "Whatcom", 234954, "Whitman", 48399, "Yakima", 258523
)

wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2024
wa_counties_ev <- wa_counties %>%
  left_join(ev_by_county, by = "NAME") %>%
  left_join(ev_models_by_county, by = "NAME") %>%
  left_join(county_pop_2024, by = "NAME") %>%
  mutate(ev_per_capita = ev_count / totalpop)

data("us.cities")
wa_cities <- us.cities %>% filter(country.etc == "WA")
wa_cities_sf <- st_as_sf(wa_cities, coords = c("long", "lat"), crs = 4326)
wa_cities_proj <- st_transform(wa_cities_sf, 2855)

wa_counties_proj <- st_transform(wa_counties_ev, 2855)
top_cities <- wa_cities_proj %>% arrange(desc(pop)) %>% slice_head(n = 10)
buffer_50km <- st_buffer(top_cities, dist = 50000)
buffer_50km_union <- st_union(buffer_50km)

mat <- st_intersects(wa_counties_proj, buffer_50km_union, sparse = FALSE)

wa_counties_ev <- wa_counties_ev %>%
  mutate(
    in_buffer = mat[, 1],
    urbanicity = if_else(in_buffer, "Urban", "Rural"),
    urbanicity = factor(urbanicity, levels = c("Rural", "Urban"))
  )
library(tidyverse)
library(tigris)
library(sf)
library(leaflet)

options(tigris_use_cache = TRUE)

ev_data <- read_csv("C:/Users/lunal/OneDrive/Desktop/Final project folder/Electric.csv") %>%
  select(-`...18`)
## New names:
## Rows: 261698 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): VIN (1-10), County, City, State, Make, Model, Electric_Vehicle_Typ... dbl
## (7): Postal_Code, Year, Electric Range, Base MSRP, Legislative District... lgl
## (1): ...18
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
ev_by_county <- ev_data %>% count(County) %>% rename(NAME = County, ev_count = n)

ev_models_by_county <- ev_data %>%
  count(County, Make) %>%
  rename(NAME = County, model = Make, count = n) %>%
  group_by(NAME) %>%
  summarise(model_breakdown = paste(model, count, sep = ": ", collapse = "<br>"), .groups = "drop")

county_pop_2024 <- tribble(
  ~NAME, ~totalpop,
  "Adams", 21039, "Asotin", 22523, "Benton", 218190, "Chelan", 81228, "Clallam", 77958,
  "Clark", 527269, "Columbia", 4025, "Cowlitz", 113982, "Douglas", 45795, "Ferry", 7543,
  "Franklin", 101238, "Garfield", 2404, "Grant", 104717, "Grays Harbor", 77893, "Island", 86478,
  "Jefferson", 33944, "King", 2340211, "Kitsap", 281420, "Kittitas", 48172, "Klickitat", 24124,
  "Lewis", 87049, "Lincoln", 11862, "Mason", 69632, "Okanogan", 44942, "Pacific", 24245,
  "Pend Oreille", 14332, "Pierce", 941170, "San Juan", 18668, "Skagit", 132736, "Skamania", 12660,
  "Snohomish", 864113, "Spokane", 555947, "Stevens", 49015, "Thurston", 302912, "Wahkiakum", 4800,
  "Walla Walla", 62068, "Whatcom", 234954, "Whitman", 48399, "Yakima", 258523
)

wa_counties <- counties(state = "WA", cb = TRUE, class = "sf")
## Retrieving data for the year 2024
wa_counties_ev <- wa_counties %>%
  left_join(ev_by_county, by = "NAME") %>%
  left_join(ev_models_by_county, by = "NAME") %>%
  left_join(county_pop_2024, by = "NAME") %>%
  mutate(ev_per_capita = ev_count / totalpop)

data("us.cities")
wa_cities <- us.cities %>% filter(country.etc == "WA")
wa_cities_sf <- st_as_sf(wa_cities, coords = c("long", "lat"), crs = 4326)
wa_cities_proj <- st_transform(wa_cities_sf, 2855)

wa_counties_proj <- st_transform(wa_counties_ev, 2855)
top_cities <- wa_cities_proj %>% arrange(desc(pop)) %>% slice_head(n = 10)
buffer_50km <- st_buffer(top_cities, dist = 50000)
buffer_50km_union <- st_union(buffer_50km)

mat <- st_intersects(wa_counties_proj, buffer_50km_union, sparse = FALSE)

wa_counties_ev <- wa_counties_ev %>%
  mutate(
    in_buffer = mat[, 1],
    urbanicity = if_else(in_buffer, "Urban", "Rural"),
    urbanicity = factor(urbanicity, levels = c("Rural", "Urban"))
  )

wa_counties_ev <- st_transform(wa_counties_ev, 4326)
wa_centroids <- st_centroid(wa_counties_ev)
## Warning: st_centroid assumes attributes are constant over geometries
urban_palette <- colorFactor(
  palette = c("grey85", "steelblue"),
  domain  = wa_counties_ev$urbanicity
)

leaflet(data = wa_counties_ev) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = ~urban_palette(urbanicity),
    color = "black",
    weight = 1,
    popup = ~paste0(
      "<strong>", NAME, "</strong><br>",
      "Urbanicity: ", urbanicity, "<br>",
      "Population (2024): ", formatC(totalpop, format = "d", big.mark = ","), "<br>",
      "Total EVs: ", ev_count, "<br>",
      "EVs per capita: ", round(ev_per_capita, 4), "<br><br>",
      "<u>Model Breakdown:</u><br>", model_breakdown
    ),
    label = ~paste(NAME, ":", ev_count)
  ) %>%
  addLabelOnlyMarkers(
    data = wa_centroids,
    lng = ~st_coordinates(geometry)[,1],
    lat = ~st_coordinates(geometry)[,2],
    label = ~NAME,
    labelOptions = labelOptions(
      noHide = TRUE,
      direction = "center",
      textOnly = TRUE,
      style = list(
        "color" = "black",
        "font-size" = "10px",
        "font-weight" = "bold"
      )
    )
  )
t_test_result <- t.test(ev_per_capita ~ urbanicity, data = wa_counties_ev)

print(t_test_result)
## 
##  Welch Two Sample t-test
## 
## data:  ev_per_capita by urbanicity
## t = -3.3237, df = 27.855, p-value = 0.002496
## alternative hypothesis: true difference in means between group Rural and group Urban is not equal to 0
## 95 percent confidence interval:
##  -0.019818306 -0.004702521
## sample estimates:
## mean in group Rural mean in group Urban 
##          0.01182276          0.02408317
library(ggplot2)

ggplot(wa_counties_ev, aes(x = urbanicity, y = ev_per_capita, fill = urbanicity)) +
  geom_boxplot(alpha = 0.7, color = "black") +
  scale_fill_manual(values = c("grey85", "steelblue")) +
  labs(
    title = "EVs per Capita by County Urbanicity",
    x = "Urbanicity",
    y = "EVs per Capita"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 14),
    axis.title = element_text(size = 12)
  )