Load Packages and Import Data Sets

# packages
library(tidyverse)
library(plotly)
library(viridis)
# setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # set working directory to current file location

# data sets
Moderna <- read_csv("https://data.cdc.gov/api/views/b7pe-5nws/rows.csv")
Pfizer <- read_csv("https://data.cdc.gov/api/views/saz5-9hgg/rows.csv")
cities <- read_csv("uscities.csv")

Tidy and Combine Data Sets

# Join Moderna and Pfizer data sets and add all allocations for each state together
allocations <- bind_rows(Moderna = Moderna, Pfizer = Pfizer, .id="Vaccine") %>%
  rename(Dose1 = `1st Dose Allocations`,
         Dose2 = `2nd Dose Allocations`,
         Week = `Week of Allocations`) %>%
  mutate(Week = as.Date(Week,"%m/%d/%Y"))
  

# Convert New York City to New York
allocations$Jurisdiction[which(allocations$Jurisdiction==
                          "New York City")] <- "New York"

# filter out populations of each states and state names
population_state <- cities %>%
    select(city, state_id, state_name, population) %>%
    group_by(state_name, state_id) %>%
  rename(state = state_name, id = state_id) %>%
    summarise(population = sum(population)) %>%
    ungroup()
## `summarise()` has grouped output by 'state'. You can override using the `.groups` argument.
# filter out the state jurisdictions
allo_state <- allocations %>% # still contains both states and cities
    semi_join(select(population_state, state),by=c("Jurisdiction"="state")) %>%
    left_join(select(population_state, state, population), by=c("Jurisdiction"="state")) %>%
  rename(state = Jurisdiction)

# filter out the city jurisdictions
allo_city <- allocations %>%
    anti_join(select(population_state, state), by=c("Jurisdiction"="state")) %>%
    left_join(rename(cities, state = state_name) %>% select(city, population, state), by=c("Jurisdiction"="city")) %>%
    filter(!is.na(state)) %>%
    select(-Jurisdiction) %>%
    mutate(population = NA)

# combine state and city jurisdictions together
allo_state <- bind_rows(allo_state, allo_city)
  
# calculate the coverage rate in each state
coverage <- allo_state %>%
    group_by(Vaccine, state) %>%
    summarise(Dose1 = sum(Dose1),Dose2 = sum(Dose2)) %>%
    left_join(select(population_state,-id)) %>%
    mutate(cover_1 = Dose1/population, cover_2 = Dose2/population,
           Details = paste("<br>", state, "<br>", "Coverage Rate: ", round(cover_1 * 100,2), "%")) # create a column for hover text of the plotly object
## `summarise()` has grouped output by 'Vaccine'. You can override using the `.groups` argument.
## Joining, by = "state"

Popularity of Moderna and Pfizer

Fluctuation of Allocations (No. of Vaccines) over Time

# compare 1st dose and 2nd dose

sum(ifelse(allocations$Dose1==allocations$Dose2,F,T))
## [1] 56
# total number of rows
nrow(allocations)
## [1] 3465
  • In all 3456 rows (combinations between type of vaccine type and states, e.g. Pfizer - Massachusetts or Moderna - Massachusetts), there are only 56 cases of which the number of allocations of 1st dose and 2nd dose did not match.
# 1st dose
a <- allocations %>%
    group_by(Week, Vaccine) %>%
    summarise(total_1 = sum(Dose1), 
              total_2 = sum(Dose2)) %>%
    plot_ly(x=~Week, y=~total_1, color=~Vaccine)
## `summarise()` has grouped output by 'Week'. You can override using the `.groups` argument.
# 2nd Dose
b <- allocations %>%
    group_by(Week, Vaccine) %>%
    summarise(total_1 = sum(Dose1), 
              total_2 = sum(Dose2)) %>%
    plot_ly(x=~Week, y=~total_1, color=~Vaccine)
## `summarise()` has grouped output by 'Week'. You can override using the `.groups` argument.
subplot(a,b) %>% 
    layout(title = "Vaccine Allocation by Vaccine Types of 1st and 2nd doses",
        annotations = list(
        list(x = 0.2 , y = 1.03, text = "1st", showarrow = F, xref='paper', yref='paper'),
        list(x = 0.8 , y = 1.03, text = "2nd", showarrow = F, xref='paper', yref='paper')
        ),
        autosize = F, width = 1000
    )
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
  • In general, Pfizer is more popular than Moderna.
    • At the beginning of both 1st and 2nd dose allocations, Moderna outnumbered Pfizer in several time points such as Dec 21th in 2020, Feb 1st, 8th, and 15th in 2021.
    • From Feb 22 to Jun 21, the differences of popularity of these two types of vaccine increase dramatically and levelled off at Apr 19.

Coverage Rate of Moderna and Pfizer in Mainland States

coverage_map <- map_data("state") %>%
    left_join(coverage %>% mutate(state = tolower(state)), by = c("region" = "state")) %>%
    ggplot(aes(hover = Details)) + 
    geom_polygon(aes(x = long, y = lat, fill = cover_1 * 100, group = group), color = "white") + 
    coord_fixed(1.3) + 
    scale_fill_gradientn(colors = rev(rainbow(7)),
                         breaks = c(20, 40, 60, 80)) + 
    facet_wrap(vars(Vaccine), nrow = 2) + 
    labs(title = "Coverage Rate of Moderna and Pfizer in Mainland States",
         fill = "Coverage Rate (%)",
         x = "Longitude",
         y = "Latitude") + 
    theme_classic() 
    

ggplotly(coverage_map, tooltip = "Details") %>%
            layout(autosize = F, width = 1000, height = 700)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()

The graph displays the coverage rate of Moderna and Pfizer vaccine in mainland states (Hawaii, Alaska, and Puerto Rico were excluded). - The coverage rate of Pfizer in the mainland states is generally higher than that of Moderna. - The states at east coast, south coast and north west have higher coverage rate of both types of vaccine. - The state Vermont has the highest coverage rate of both types of vaccine.