library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sunburstR)
library(plotly) 
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(ggplot2)
library(tidyr)
library(htmlwidgets)
library(htmltools)
library(readr)


accident <- read_csv("/Users/annapatrick/Downloads/archive-2/accident.csv", show_col_types = FALSE)

colnames(accident)
##  [1] "index"          "accident_id"    "ST_CASE"        "VE_TOTAL"      
##  [5] "VE_FORMS"       "PEDS"           "PERSONS"        "COUNTY"        
##  [9] "county_name"    "CITY"           "city_name"      "DAY"           
## [13] "MONTH"          "YEAR"           "HOUR"           "MINUTE"        
## [17] "NHS"            "FUNC_SYS"       "func_sys_lit"   "ROAD_FNC"      
## [21] "road_fnc_lit"   "RD_OWNER"       "rd_owner_lit"   "TWAY_ID"       
## [25] "TWAY_ID2"       "LATITUDE"       "LONGITUD"       "SP_JUR"        
## [29] "sp_jur_lit"     "HARM_EV"        "harm_ev_lit"    "MAN_COLL"      
## [33] "man_coll_lit"   "RELJCT1"        "RELJCT2"        "TYP_INT"       
## [37] "WRK_ZONE"       "REL_ROAD"       "LGT_COND"       "lgt_cond_lit"  
## [41] "WEATHER"        "weather_lit"    "SCH_BUS"        "CF1"           
## [45] "CF2"            "CF3"            "cf1_lit"        "cf2_lit"       
## [49] "cf3_lit"        "FATALS"         "A_INTER"        "a_inter_lit"   
## [53] "A_ROADFC"       "a_road_fc_lit"  "A_TOD"          "a_tod_lit"     
## [57] "A_DOW"          "a_dow_lit"      "A_LT"           "a_lt_lit"      
## [61] "A_SPCRA"        "a_spcra_lit"    "A_PED"          "a_ped_lit"     
## [65] "A_PED_F"        "a_ped_f_lit"    "A_PEDAL"        "a_pedal_lit"   
## [69] "A_PEDAL_F"      "a_pedal_f_lit"  "A_POLPUR"       "a_polour_lit"  
## [73] "A_POSBAC"       "a_posbac_lit"   "A_DIST"         "a_dist_lit"    
## [77] "A_DROWSY"       "a_drowsy_lit"   "INDIAN_RES"     "indian_res_lit"
county_fatal <- accident %>%
  filter(!is.na(county_name), !is.na(FATALS)) %>%
  group_by(county_name = tolower(county_name)) %>%
  summarise(total_fatalities = sum(FATALS), .groups = "drop")

us_counties <- read_sf("/Users/annapatrick/Downloads/Archive-5/cb_2022_us_county_5m/cb_2022_us_county_5m.shp")

colnames(us_counties)
##  [1] "STATEFP"    "COUNTYFP"   "COUNTYNS"   "AFFGEOID"   "GEOID"     
##  [6] "NAME"       "NAMELSAD"   "STUSPS"     "STATE_NAME" "LSAD"      
## [11] "ALAND"      "AWATER"     "geometry"
az_counties <- us_counties %>%
  filter(STUSPS == "AZ") %>%
  mutate(NAME = tolower(NAME))

az_map_data <- az_counties %>%
  left_join(county_fatal, by = c("NAME" = "county_name")) %>%
  mutate(total_fatalities = replace_na(total_fatalities, 0))

##Q1

az_map_data <- az_map_data %>%
  mutate(
    fatality_category = cut(
      total_fatalities,
      breaks = c(-1, 20, 40, 60, 80, 100, 120, 140, 160, 180, 241, Inf),
      labels = c("0-20", "21-40", "41-60", "61-80", "81-100", "101-120", "121-140", "141-160", "161-180", "181-240", "241+"),
      include.lowest = TRUE,
      right = TRUE
    ),
    fatality_category = factor(
      fatality_category,
      levels = c("0-20", "21-40", "41-60", "61-80", "81-100", "101-120", "121-140", "141-160", "161-180", "181-240", "241+")
    ),
    tooltip = paste0("County: ", NAME, "\nFatalities: ", total_fatalities)  
  )

plot <- ggplot(az_map_data) +
  geom_sf(aes(fill = fatality_category, text = tooltip), color = "white", linewidth = 0.2) +
  scale_fill_viridis_d(option = "plasma", na.value = "grey90") +
  labs(
    title = "Total Accident Fatalities by County in Arizona",
    fill = "Fatality Range",
    x = "Longitude",
    y = "Latitude"
  ) +
  coord_sf(crs = st_crs("EPSG:26948")) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12),
    axis.text = element_text(size = 10),
    axis.text.x = element_text(angle = 40, hjust = 1)
  )
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(plot, tooltip = "text")

Q2

# Define a colorblind-friendly palette
color_palette <- c(
  "#7AD151",  # 0–20
  "#F0F921",  # 41–60
  "#FDC326",  # 61–80
  "#F89D45",  # 81–100
  "#E76F5B",  # 101–120
  "#D1497F",  # 121–140
  "#AA2395",  # 141–160
  "#7E03A8",  # 161–180
  "#0D0887"   # 241+
)

# Create severity group based on FATALS
accident_clean <- accident %>%
  filter(!is.na(weather_lit), !is.na(FATALS)) %>%
  mutate(SeverityGroup = case_when(
    FATALS == 0 ~ "No Fatalities",
    FATALS == 1 ~ "1 Fatality",
    FATALS >= 2 ~ "2+ Fatalities"
  ))

# Create sunburst path with two layers
weather_fatal <- accident_clean %>%
  count(weather_lit, SeverityGroup) %>%
  mutate(
    path = paste(weather_lit, SeverityGroup, sep = "-") 
  ) %>%
  select(path, n)

# Prepare data for sunburst
weather_fatal_hierarchy <- accident_clean %>%
  count(weather_lit) %>%
  mutate(path = weather_lit) %>%
  select(path, n) %>%
  bind_rows(weather_fatal)

# Plot
sunburst_plot <- sunburst(weather_fatal_hierarchy, colors = color_palette)

prependContent(
  sunburst_plot,
  tags$div(
    style = "text-align:center; font-weight:normal; font-size:22px; 
             font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; 
             margin-bottom:10px;",
    "Accident Severity by Weather Condition"
  )
)
Accident Severity by Weather Condition
Legend

Connecting the two questions:

county_weather_severity <- accident %>%
  filter(!is.na(county_name), !is.na(weather_lit), !is.na(FATALS)) %>%
  mutate(
    county_name = tolower(county_name),
    severity = case_when(
      FATALS == 0 ~ "No Fatality",
      FATALS == 1 ~ "1 Fatality",
      FATALS >= 2 ~ "2+ Fatalities"
    )
  ) %>%
  group_by(county_name, weather_lit, severity) %>%
  summarise(total_fatalities = sum(FATALS), .groups = "drop")

# Then plot
plot2 <- ggplot(county_weather_severity, aes(x = reorder(county_name, -total_fatalities), y = total_fatalities, fill = severity)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ weather_lit, scales = "free_x") +
  scale_fill_viridis_d(option = "plasma") +
  labs(
    title = "Accident Fatalities by County and Weather Condition",
    x = "County",
    y = "Number of Fatalities",
    fill = "Severity Level"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

ggplotly(plot2)