Question 1

Q1. Take the items from the DFP survey on countermeasures used to mitigate the virus1.Take the national items from the Google mobility data indicating mobility for retail, mobility for visiting a workplace, and mobility for recreation. Aggregate these mobility data with a smoothed average preceding the survey dates of each survey in the DFP data. Which of the mobility series are most strongly related with which countermeasures?

library(tidyverse)
library(magrittr)
library(RcppRoll)
library(lubridate)
library(broom)


d1 <- "https://github.com/thomasjwood/ps7160/raw/master/cleaned_dfp_covid_econ_2021-02-24.rds" %>% 
  url %>% 
  gzcon %>% 
  readRDS %>% 
  as_tibble

d2 <-  "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv" %>%
  read_csv

A national table to answer this question

t2 <- d2 %>% 
  filter(
    country_region == "United States" &
    sub_region_1 %>% is.na
  ) %>% 
  mutate(
    across(
      retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline,
      ~roll_meanr(.x, 7)
      )
    ) %>% 
  select(
    date, retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline
  ) %>% 
  set_colnames(
     c("date",
       str_c(
         "mob_",
           c("retail_and_recreation",
             "grocery_and_pharmacy",
             "parks",
             "transit_stations",
             "workplaces",
             "residential")
         )
       )
     )

join the mobility and the survey data, and make the dependent variables numeric

d1 %<>% 
  mutate(
    date = endtime %>% 
      as.Date
    ) %>% 
  left_join(t2)


d1 %<>% 
  modify_if(
    names(d1) %>% 
      str_detect("corona_measure"),
    ~str_detect(.x, "took this measure") %>% 
      ifelse(
        1, 0
      ) %>% 
      as.character %>% 
      as.numeric
  ) 

Modeling and plotting

m1 <- crossing(
  dv = names(d1) %>% 
    str_subset("corona_measure"),
  iv = names(d1) %>% 
    str_subset("mob_"),
  type = c("bivar",
           "multivar")
  )

m1$frm <- m1 %>% 
  pmap_chr(
    function(
      dv, iv, type
      )      
      str_c(
        dv, 
        " ~ ",
        type %>% 
          equals("bivar") %>% 
          ifelse(
            yes = iv,
            no = iv %>% 
              c(names(d1) %>% 
                  str_subset("mob_")) %>% 
              unique %>% 
              str_c(collapse = " + ")
            )
        )
    )

m1$l_m <- m1$frm %>% 
  map(
    ~lm(.x, data = d1)
  )

m1 %>% 
  pmap_dfr(
    function(
      dv, iv, type, frm, l_m
      )
      
      l_m %>%
      tidy %>% 
      slice(2) %>% 
      mutate(
        dv = dv,
        type = type
      )
    ) %>% 
  mutate(
    dv = dv %>%
      fct_reorder(
        estimate %>%
          abs,
        .desc = T
        ),
    term = term %>%
      fct_reorder(
        estimate,
        var,
        .desc = T
        )
    ) %>% 
  ggplot(
    aes(
      term, estimate
    )
  ) +
  geom_hline(
    yintercept =  0
  ) +
  geom_point(
    aes(
      shape = type
    )
  ) +
  facet_wrap(~dv) +
  theme(
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    )
  )

Question 2

Q.2 Repeat this analysis, separately according to respondent’s home state (now use the separate state level Google mobility data.) Do the relationships you found in Q1 persist?

Let’s just pass another indicator to the group_by to get state level estimates

t2 <- d2 %>% 
  filter(
    country_region == "United States" &
    sub_region_1 %>% is.na %>% 
      not
  ) %>% 
  group_by(
    sub_region_1
  ) %>% 
  mutate(
    across(
      retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline,
      ~roll_meanr(.x, 7)
    )
  ) %>% 
  select(
    sub_region_1, 
    date, retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline
  ) %>% 
  set_colnames(
    c("state",
      "date",
      str_c(
        "state_mob_",
        c("retail_and_recreation",
          "grocery_and_pharmacy",
          "parks",
          "transit_stations",
          "workplaces",
          "residential")
      )
    )
  ) %>% 
  mutate(
    state = state %>% 
      str_replace_all(
        state.abb %>% 
          set_attr(
            "names",
            state.name
            )
        )
    )

d1 %<>% 
  left_join(
    t2
  )


m2 <- crossing(
  dv = names(d1) %>% 
    str_subset("corona_measure"),
  iv = names(d1) %>% 
    str_subset("mob_")
  )

m2$frm <- m2 %>% 
  pmap_chr(
    function(
      dv, iv
    )      
      str_c(
        dv, 
        " ~ ",
        iv
      )
    )

m2$mod <- m2$frm %>% 
  map(
    ~lm(.x, data = d1)
  )

t3 <- m2 %>% 
  select(-frm) %>% 
  pmap_dfr(
    function(
      dv, iv, mod 
    )
      
      mod %>% 
      tidy %>% 
      slice(-1) %>% 
      mutate(
        dv = dv,
        iv = iv
        )
      ) %>% 
  mutate(
    join_type = term %>% 
      str_detect("state_") %>% 
      ifelse(
        "State join",
        "National join"
      ),
    term = term %>% 
      str_remove_all(
        "mob_|state_mob_"
      ),
    dv = dv %>%
      fct_reorder(
        estimate %>%
          abs,
        .desc = T
        ),
    term = term %>%
      fct_reorder(
        estimate,
        var,
        .desc = T
        )
    )

t3 %>% 
  ggplot(
    aes(
      term, estimate
    )
  ) +
  geom_hline(
    yintercept =  0
  ) +
  geom_point(
    aes(
      shape = join_type
    )
  ) +
  facet_wrap(~dv) +
  theme(
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1
    )
  )

Gosh, the higher level of aggregation appears better related to the behavioral outcomes