STEP 1: Visit the Oregon School Immunization Coverage (ArcGIS Map) for Preschools and childcare

https://geo.maps.arcgis.com/apps/webappviewer/index.html?id=ea1c78a745c845d899a0184f3581a2ff

STEP 2: Load the 2023 - 2024 data into the R

## FA24 DATA 502
## COUNTY LEVEL DATA
county<-read.csv("https://raw.githubusercontent.com/kitadasmalley/Teaching/refs/heads/main/ProjectData/countyImmRate.csv")
#str(county)

## VACCINE DATA FOR PRESCHOOLS
ps <- read.csv("https://raw.githubusercontent.com/kitadasmalley/Teaching/refs/heads/main/ProjectData/Preschool%20_%20Child%20Care.csv")

## NEEDS TO BE NUMERIC
ps$Per_Vac_AllVac<-as.numeric(ps$Per_Vac_AllVac)
#str(ps)

STEP 3: Load maps package and map data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## MAPS
#install.packages("maps")
library(maps)
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
## LOAD MAP DATA
or_counties <- map_data("county", "oregon") %>%
  select(lon = long, lat, group, id = subregion)

STEP 4: Join the map data to the vaccine data

library(dplyr)

county2 <- county %>%
  mutate(County = tolower(trimws(County)))

or_counties2 <- or_counties %>%
  mutate(id = tolower(trimws(id)))

#joining map data with vaccination data by county name
#took a couple tries because i kept trying to join ps and or_counties
map_data_joined <- or_counties2 %>%
  left_join(county2, by = c("id" = "County"))

STEP 5: Recreate the choropleth map

## USE geom_polygon() or geom_sf()
library(ggplot2)

ggplot(map_data_joined, aes(x = lon, y = lat, group = group)) +
  geom_polygon(aes(fill = All.vaccines), color = "black", size =0.2) +
  theme_minimal() +
  coord_fixed(1.3) +
  labs(
    title = "Vaccination Rates for All Required Vaccines by County",
    subtitle = "Oregon Preschools",
    x = "Longitude",
    y = "Latitude"
  ) +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

STEP 6: Add layer for school points

## ADD LAYER OF POINTS 
ggplot(map_data_joined, aes(x = lon, y = lat, group = group)) +
  #layer for vaccination rates by county
  geom_polygon(aes(fill = All.vaccines), color = "black", size = 0.2) +
  #layer of points for preschools
  geom_point(data = ps, aes(x, y), 
             color = "black", inherit.aes = FALSE) +
  theme_minimal() +
  coord_fixed(1.3) +
  labs(
    title = "Vaccination Rates for All Required Vaccines by County",
    subtitle = "Oregon Preschools",
    x = "Longitude",
    y = "Latitude"
  ) +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  )

STEP 7: Color Palettes

## COLOR
# install.packages("colorspace")
library(colorspace)
#hcl_palettes(plot = TRUE)

#vaccination rates into the specified ranges
map_data_joined2 <- map_data_joined %>%
  mutate(Vac_Rate_Category = cut(All.vaccines, 
                                 breaks = c(0, 70, 75, 80, 100),
                                 labels = c("0-70", ">70-75", ">75-80", ">80-100"),
                                 
                                 right = TRUE)) %>%
  mutate(Vac_Rate_Category = factor(Vac_Rate_Category, 
                                    levels = c(">80-100", ">75-80", ">70-75", "0-70")))

#vaccination rates into vulnerability levels
ps2 <- ps %>%
  mutate(Vaccination_Status = case_when(
    Per_Vac_AllVac >= 95 ~ "Safest (95-100% of students fully vaccinated)",
    Per_Vac_AllVac >= 90 & Per_Vac_AllVac < 95 ~ "Moderately vulnerable (90-94.9% of students fully vaccinated)",
    Per_Vac_AllVac >= 85 & Per_Vac_AllVac < 90 ~ "More vulnerable (85-89.9% of students fully vaccinated)",
    Per_Vac_AllVac < 85 ~ "Most vulnerable (less than 85% of students fully vaccinated)",
    TRUE ~ "No data available (fewer than 10 children)"
  )) %>%
  mutate(Vaccination_Status = factor(Vaccination_Status, #ordered
                                     levels = c( "Safest (95-100% of students fully vaccinated)", 
                                                "Moderately vulnerable (90-94.9% of students fully vaccinated)", 
                                                "More vulnerable (85-89.9% of students fully vaccinated)", 
                                                "Most vulnerable (less than 85% of students fully vaccinated)",
                                                "No data available (fewer than 10 children)")))

#w/custom color scale for both the map and points
ggplot(map_data_joined2, aes(x = lon, y = lat, group = group)) +
  #map for vaccination rates by county
  geom_polygon(aes(fill = Vac_Rate_Category), color = "gray55") +
  scale_fill_manual(
    values = c(">80-100" = "#66828f", 
               ">75-80" = "#87aeb0", 
               ">70-75" = "#acc6bf", 
               "0-70" = "#e9eed8"),
    name = "County Immunization Rates:\n % with all vaccines"
  ) +
  #points w/ custom color scale based on vaccination status
  geom_point(data = ps2, aes(x, y, color = Vaccination_Status), 
              inherit.aes = FALSE) +
  scale_color_manual(
    values = c(
      "Safest (95-100% of students fully vaccinated)" = "#ffffb9", 
      "Moderately vulnerable (90-94.9% of students fully vaccinated)" = "#f5cd6f", 
      "More vulnerable (85-89.9% of students fully vaccinated)" = "#eb914b", 
      "Most vulnerable (less than 85% of students fully vaccinated)" = "#cd3228", 
      "No data available (fewer than 10 children)" = "#d9dada"
    ),
    name = "Preschool/Child Care \n % with all vaccines required"
  ) +
  theme_minimal() +
  coord_fixed(1.3) +
  labs(
    fill = "Vaccination Rate",
    title = "How well-vaccinated is your child care, school, and county?",
  ) +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    text = element_text(size = 8),  # Set all text elements to a smaller font size
    plot.title = element_text(size = 10),  
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 7) 
  ) +
  guides( #tried to fix the order of the legends in the code but google suggested using guides so I think this works
    fill = guide_legend(order = 2),  #County immunization rates 
    color = guide_legend(order = 1)  #Preschool/Child care
  )

STEP 8: Apply scales, themes, and guides to polish your graphic

## POLISH
#w/ custom color scale for both the map and points
ggplot(map_data_joined2, aes(x = lon, y = lat, group = group)) +
  #map for vaccination rates by county
  geom_polygon(aes(fill = Vac_Rate_Category), color = "gray55") +
  scale_fill_manual(
    values = c(">80-100" = "#66828f", 
               ">75-80" = "#87aeb0", 
               ">70-75" = "#acc6bf", 
               "0-70" = "#e9eed8"),
    name = "County Immunization Rates:\n % with all vaccines"
  ) +
  #points w/ custom color scale based on vaccination status
  geom_point(data = ps2, aes(x, y, color = Vaccination_Status), 
              inherit.aes = FALSE) +
  scale_color_manual(
    values = c(
      "Safest (95-100% of students fully vaccinated)" = "#ffffb9", 
      "Moderately vulnerable (90-94.9% of students fully vaccinated)" = "#f5cd6f", 
      "More vulnerable (85-89.9% of students fully vaccinated)" = "#eb914b", 
      "Most vulnerable (less than 85% of students fully vaccinated)" = "#cd3228", 
      "No data available (fewer than 10 children)" = "#d9dada"
    ),
    name = "Preschool/Child Care \n % with all vaccines required"
  ) +
  theme_minimal() +
  coord_fixed(1.3) +
  labs(
    fill = "Vaccination Rate",
    title = "How well-vaccinated is your child care, school, and county?",
    x = NULL, 
    y = NULL,
    caption = "Data from oregon.gov"
  ) +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    plot.caption = element_text(hjust = 0.9, size = 8),
    legend.title = element_text(face = "bold", size = 8), # add bold
    text = element_text(size = 8),  #set all text elements to a smaller font size
    plot.title = element_text(size = 10),
    legend.text = element_text(size = 7)
  ) +
  guides(
    fill = guide_legend(order = 2),  #county immunization rates
    color = guide_legend(order = 1)  #preschool/Child care
  ) 

STEP 9: Extra Credit for Interactivity

This is not perfect as I ran out of time but I played around with interactivity (do not expect much from this attempt).

## INTERACTION
library(ggplot2)
library(dplyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- ggplot(map_data_joined2, aes(x = lon, y = lat, group = group)) +
  geom_polygon(aes(fill = Vac_Rate_Category), color = "gray55") +
  scale_fill_manual(
    values = c(">80" = "#66828f", 
               "75-80" = "#87aeb0", 
               "70-75" = "#acc6bf", 
               "0-70" = "#e9eed8"),
    name = "County Immunization Rates:\n % with all vaccines"
  ) +
  geom_point(data = ps2, aes(x, y, color = Vaccination_Status),
             inherit.aes = FALSE) +  
  scale_color_manual(
    values = c(
      "Safest (95-100% of students fully vaccinated)" = "#ffffb9", 
      "Moderately vulnerable (90-94.9% of students fully vaccinated)" = "#f5cd6f", 
      "More vulnerable (85-89.9% of students fully vaccinated)" = "#eb914b", 
      "Most vulnerable (less than 85% of students fully vaccinated)" = "#cd3228", 
      "No data available (fewer than 10 children)" = "#d9dada"
    ),
    name = "Preschool/Child Care \n % with all vaccines required"
  ) +
  theme_minimal() +
  coord_fixed(1.3) +
  labs(
    fill = "Vaccination Rate",
    title = "How well-vaccinated is your child care, school, and county?",
    x = NULL, 
    y = NULL,  
    caption = "Data from oregon.gov" 
  ) +
  theme(
    axis.text = element_blank(), 
    axis.ticks = element_blank(), 
    panel.grid = element_blank(), 
    plot.caption = element_text(hjust = 0.9, size = 8), 
    legend.title = element_text(face = "bold", size = 8),
    text = element_text(size = 8),
    plot.title = element_text(size = 10),
    legend.text = element_text(size = 7) 
  ) +
  guides(
    fill = guide_legend(order = 2),
    color = guide_legend(order = 1)
  )

# Convert to interactive plot using plotly
interactive_plot <- ggplotly(p)

# Show the interactive plot
interactive_plot

STEP 10: Extra Extra Credit for Ordering for Overlap

also, makes the “No data available (fewer than 10 children)” points smaller.

## ORDER
#'Most vulnerable' for plotting on top
ps3 <- ps2 %>%
  mutate(Vaccination_Status = factor(Vaccination_Status, 
                                     levels = c(
                                       "No data available (fewer than 10 children)",
                                       "Safest (95-100% of students fully vaccinated)", 
                                       "Moderately vulnerable (90-94.9% of students fully vaccinated)", 
                                       "More vulnerable (85-89.9% of students fully vaccinated)", 
                                       "Most vulnerable (less than 85% of students fully vaccinated)"
                                     ))) %>%
  arrange(Vaccination_Status)  #arranges points in the specified order for plotting

#map plot
ggplot(map_data_joined2, aes(x = lon, y = lat, group = group)) +
  geom_polygon(aes(fill = Vac_Rate_Category), color = "gray55") +
  scale_fill_manual(
    values = c(">80-100" = "#66828f", 
               ">75-80" = "#87aeb0", 
               ">70-75" = "#acc6bf", 
               "0-70" = "#e9eed8"),
    name = "County Immunization Rates:\n % with all vaccines"
  ) +
  #points with varying size for "No data available"
  geom_point(data = ps3, aes(x, y, color = Vaccination_Status, size = Vaccination_Status), 
             inherit.aes = FALSE) +
  scale_color_manual(
    values = c(
      "Safest (95-100% of students fully vaccinated)" = "#ffffb9", 
      "Moderately vulnerable (90-94.9% of students fully vaccinated)" = "#f5cd6f", 
      "More vulnerable (85-89.9% of students fully vaccinated)" = "#eb914b", 
      "Most vulnerable (less than 85% of students fully vaccinated)" = "#cd3228", 
      "No data available (fewer than 10 children)" = "#d9dada"
    ),
    name = "Preschool/Child Care \n % with all vaccines required"
  ) +
  scale_size_manual(
    values = c(
      "Safest (95-100% of students fully vaccinated)" = 1.2,
      "Moderately vulnerable (90-94.9% of students fully vaccinated)" = 1.2,
      "More vulnerable (85-89.9% of students fully vaccinated)" = 1.2,
      "Most vulnerable (less than 85% of students fully vaccinated)" = 1.2,
      "No data available (fewer than 10 children)" = 0.7
    ),
    guide = "none" 
  ) +
  theme_minimal() +
  coord_fixed(1.3) +
  labs(
    title = "How Well-Vaccinated is Your Child Care, School, and County?",
    x = NULL,
    y = NULL,
    caption = "Data from oregon.gov"
  ) +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    plot.caption = element_text(hjust = 0.9, size = 8),
    legend.title = element_text(face = "bold", size = 8),
    text = element_text(size = 8),  
    plot.title = element_text(size = 10),
    legend.text = element_text(size = 7)
  ) +
  guides(
    fill = guide_legend(order = 2), 
    color = guide_legend(order = 1) 
  )

STEP 11: Make an alternative

Create an alternative that improves some aspect(s) of the graphic using best practices (improving on perception and/or color theory)

## ALTERNATIVE
library(colorspace)

#find the "Most vulnerable" preschools
most_vulnerable_preschools <- ps2 %>%
  filter(Vaccination_Status == "Most vulnerable (less than 85% of students fully vaccinated)") %>%
  group_by(County) %>%
  summarize(Count = n()) %>%
  ungroup()

#counties by most to least vulnerable
most_vulnerable_preschools <- most_vulnerable_preschools %>%
  mutate(County = fct_reorder(County, Count, .desc = TRUE))

#bar chart with a color-blind friendly continuous fill
ggplot(most_vulnerable_preschools, aes(x = County, y = Count, fill = Count)) +
  geom_bar(stat = "identity") +
  scale_fill_gradient( #tried to pick a color-blind friendly pallette
    low = sequential_hcl(9, palette = "BluYl", rev = TRUE)[1],  # Lightest shade for least vulnerable
    high = sequential_hcl(9, palette = "BluYl", rev = TRUE)[9], # Darkest shade for most vulnerable
    guide = guide_colorbar(title = "Number of Vulnerable Sites")
  ) +
  labs(
    title = "Multnomah County Preschools are the Most Vulnerable",
    subtitle = "Vaccine Numbers Need to Start Improving",
    x = "County",
    y = "Number of Preschools \n (with most vulnerable rating)",
    caption = "Data from oregon.gov"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.caption = element_text(hjust = 1, size = 8),
    legend.position = "bottom"
  )