rm(list=ls())
library(stargazer)
library(data.table)
library(ggplot2)
library(lfe)
library(rgdal)
library(rgeos)
library(gridExtra)
library(scales)
library(zoo)
library(foreign)
library(fst)
library(stringr)
library(ggrepel)
library(lubridate)
library(tidycensus)
library(arrow)
library(dplyr)
library(purrr)
library(jsonlite)
library(quantmod)
library(tidyverse)
library(directlabels)
# files = list.files(pattern="*.parquet",path = "C:/Users/dratnadiwakara2/Downloads/sg/sg/weekly.parq",full.names = TRUE)
# 
# weekly = lapply(files, read_parquet)
# weekly = do.call("rbind", weekly)
# 
# weekly = data.table(weekly)
# 
# 
# v = data.table(t(as.matrix(sapply(weekly$visits_by_day,function(x) t(fromJSON(x))))))
# names(v) = paste0("visitsonday",1:7)
# weekly = cbind(weekly,v)
# 
# weekly <- cbind(weekly,data.table(purrr::map_dfr(weekly$device_type, jsonlite::fromJSON)))
# 
# 
# 
# weekly[,date_range_start:=as.Date(substr(weekly$date_range_start,1,10))]
# 
# write_fst(weekly,"C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/weekly.fst",compress = 100)
# files = list.files(pattern="*.parquet",path = "C:/Users/dratnadiwakara2/Downloads/sg/sg/core.parq",full.names = TRUE)
# 
# core = lapply(files, read_parquet)
# core = do.call("rbind", core)
# 
# write_fst(core,"C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/core.fst",compress = 100)
census_api_key("06b232797e7854aad802d7c7d8673c337fe1b29a")
acs_variables <- read.csv(file="C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/ACS Data/variable_names.csv",stringsAsFactors = FALSE,header = FALSE)

var_labels <- c("B00001_001","B02001_001","B02001_002","B01002_001","B25099_001","B25003_003","B25003_002","B06009_004","B25034_001","B25035_001","B25034_002","B25034_003","B25034_004","B25034_005","B25034_006","B25034_007","B25034_008","B25034_009","B25012_002","B25012_010","C27001A_001")

acs <- get_acs(geography = "county", year = 2018,output = "wide",variables = var_labels)

acs <- acs[,names(acs)[names(acs) %in% c("GEOID","year",acs_variables$V4)]]

for(i in 1:nrow(acs_variables)) {
  names(acs)[which(names(acs)==acs_variables[i,]$V4)]=acs_variables[i,]$V3
}

acs <- data.table(acs)
acs[,fips:=as.numeric(acs$GEOID)]
acs[,c("renter_occ_units","owner_occ_units"):=list(NULL)]

acs[,fracuninsured:= 1 - with_health_insurance/total_population]
us_counties <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US Counties/cb_2013_us_county_20m","cb_2013_us_county_20m")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US Counties\cb_2013_us_county_20m", layer: "cb_2013_us_county_20m"
## with 3221 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
us_counties <- fortify(us_counties,region="GEOID")

us_counties <- data.table(us_counties)

us_counties[,id:=as.numeric(us_counties$id)]

us_counties <- merge(us_counties,acs[,c("fips","fracuninsured")],by.x="id",by.y="fips")

us_counties[,state:=floor(id/1000)]

ggplot()+
geom_polygon(data=us_counties[! us_counties$state %in% c(2,15,72)], aes(x=long,y=lat,group=group,fill=fracuninsured),color="black")+
scale_fill_gradientn(colors=c("white","yellow","orange","red","darkred"))+ggtitle("Fraction Uninsured (ACS 2018)")+theme(legend.title = element_blank())

zip_county <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")

zip_county<- zip_county[order(zip_county$ZIP,-zip_county$RES_RATIO)]

zip_county <- zip_county[!duplicated(zip_county$ZIP),c("ZIP","COUNTY")]
core = read_fst(
  "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/core.fst",
  columns = c("safegraph_place_id","location_name","top_category","naics_code","latitude","longitude","region","postal_code","iso_country_code"),
  as.data.table = TRUE)
weekly = read_fst(
  "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/weekly.fst",
  columns = c("safegraph_place_id","location_name","postal_code","naics_code","date_range_start","raw_visit_counts","raw_visitor_counts",paste0("visitsonday",1:7),"android","ios"),
  as.data.table = TRUE)

weekly = merge(weekly,core[,c("safegraph_place_id","top_category")],by="safegraph_place_id")

weekly = merge(weekly,zip_county,by.x="postal_code",by.y="ZIP",all.x=TRUE)

weekly = merge(weekly,acs,by.x="COUNTY",by.y="fips")

weekly[,week:=as.numeric(format(date_range_start,format="%Y%m%d"))]

weekly[,place_category:=NA]
place_categories <- list()
place_categories[['grocery']] = c("General Merchandise Stores, including Warehouse Clubs and Supercenters","Grocery Stores")
gasolineother = c("Gasoline Stations","Health and Personal Care Stores")
place_categories[['gasolineother']] = c("Gasoline Stations","Health and Personal Care Stores")
place_categories[['restaurants']] =c("Restaurants and Other Eating Places")
place_categories[['entertainment1']] = c("Museums, Historical Sites, and Similar Institutions","Gambling Industries","Motion Picture and Video Industries","Spectator Sports","Department Stores")
place_categories[['entertainment2']] = c("Sporting Goods, Hobby, and Musical Instrument Stores","Amusement Parks and Arcades")
place_categories[['healthservices']] = c("General Medical and Surgical Hospitals","Offices of Other Health Practitioners")
place_categories[['airports']] = c("Support Activities for Air Transportation")
i=1
for(cat in place_categories) {
  weekly[,place_category:=ifelse(weekly$top_category %in% cat,names(place_categories)[i],weekly$place_category)]
  i = i +1
}
category_county_week = weekly[!is.na(place_category),
                               .(v1=sum(visitsonday1,na.rm=TRUE),
                                 v2=sum(visitsonday2,na.rm=TRUE),
                                 v3=sum(visitsonday3,na.rm=TRUE),
                                 v4=sum(visitsonday4,na.rm=TRUE),
                                 v5=sum(visitsonday5,na.rm=TRUE),
                                 v6=sum(visitsonday6,na.rm=TRUE),
                                 v7=sum(visitsonday7,na.rm=TRUE)),
                              by=.(place_category,week)]

category_county_week = melt(category_county_week,id.vars = c("place_category","week"))

category_county_week[,variable:=as.numeric(as.character(substr(variable,2,2)))]
category_county_week[,day:=as.numeric(week+variable)]
category_county_week[,day:=day-min(unique(category_county_week$day))+1]

category_county_week[,variable:=NULL]

names(category_county_week) <- c("place_category","week","visits","dayno")

ggplot(category_county_week,aes(x=dayno,y=log(visits),color=place_category))+geom_line()+ 
  scale_colour_discrete(guide = 'none') +
  scale_x_continuous(expand=c(0,5)) +
  geom_dl(aes(label = place_category), method = list(dl.combine("last.points"), cex = 0.8)) + ggtitle("log(No of Visits) by Category")

Are people in areas where more people are uninsured more likely to social distance?

category_county_week = weekly[,.(v1=sum(visitsonday1,na.rm=TRUE),
                                 v2=sum(visitsonday2,na.rm=TRUE),
                                 v3=sum(visitsonday3,na.rm=TRUE),
                                 v4=sum(visitsonday4,na.rm=TRUE),
                                 v5=sum(visitsonday5,na.rm=TRUE),
                                 v6=sum(visitsonday6,na.rm=TRUE),
                                 v7=sum(visitsonday7,na.rm=TRUE),
                                 fracuninsured=mean(fracuninsured,na.rm=TRUE)),
                              by=.(top_category,week,COUNTY)]

category_county_week = melt(category_county_week,id.vars = c("top_category","week","COUNTY","fracuninsured"))

category_county_week[,variable:=as.numeric(as.character(substr(variable,2,2)))]
category_county_week[,day:=as.numeric(week+variable)]
category_county_week[,day:=day-min(unique(category_county_week$day))+1]

category_county_week[,variable:=NULL]

names(category_county_week) <- c("top_category","week","COUNTY","fracuninsured","visits","dayno")

category_county_week[,state:=floor(COUNTY/1000)]
uninsub = 0.7 # 2.5% percentile
uninslb = 0.03 # 97.5% percentile

category_county_week = category_county_week[fracuninsured>uninslb & fracuninsured<uninsub]

Regression Evidence

regs <- list()
gr <- list()

i = 1
collabels = NULL
for(cat in place_categories) {

  regs[[i]] <- felm(log(1+visits)~fracuninsured*factor(week)|top_category+week+COUNTY|0|COUNTY,data=category_county_week[top_category %in% cat])
 
  collabels = c(collabels,names(place_categories)[i])
 
  gr[[i]] = .coef_plot_1reg(regs[[i]],"fracuninsured:factor(week)",20200301)+ggtitle(names(place_categories)[i])
  i= i+1
}


.printtable(regs,column.labels = collabels)
## 
## ===========================================================================================================================
##                                                                      Dependent variable:                                   
##                                    ----------------------------------------------------------------------------------------
##                                    grocery gasolineother restaurants entertainment1 entertainment2 healthservices airports 
##                                      (1)        (2)          (3)          (4)            (5)            (6)          (7)   
## ---------------------------------------------------------------------------------------------------------------------------
## fracuninsured                                                                                                              
##                                    (0.000)    (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## factor(week)20200308                                                                                                       
##                                    (0.000)    (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## factor(week)20200315                                                                                                       
##                                    (0.000)    (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## factor(week)20200322                                                                                                       
##                                    (0.000)    (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## fracuninsured:factor(week)20200308  0.010     0.037**       0.019       -0.089**        -0.022        -0.061**      0.002  
##                                    (0.013)    (0.017)      (0.016)      (0.038)        (0.034)        (0.027)      (0.049) 
## fracuninsured:factor(week)20200315 -0.011     -0.016       0.069**     -0.581***      -0.327***      -0.112***    -0.226***
##                                    (0.020)    (0.024)      (0.033)      (0.070)        (0.064)        (0.034)      (0.069) 
## fracuninsured:factor(week)20200322 -0.035     -0.041       -0.026      -0.844***      -0.456***      -0.143***    -0.496***
##                                    (0.027)    (0.030)      (0.048)      (0.087)        (0.082)        (0.038)      (0.102) 
## ---------------------------------------------------------------------------------------------------------------------------
##                                                                                                                            
## Observations                       162,113    161,378      84,791       179,375         99,610        127,015      48,664  
## Adjusted R2                         0.888      0.872        0.975        0.728          0.869          0.820        0.882  
## ===========================================================================================================================
## Note:                                                                                           *p<0.1; **p<0.05; ***p<0.01
## 

Regression evidence

These graphs plot the coefficients of the above regression with their corresponding 95% confidence intervals

grid.arrange(gr[[1]],gr[[2]],gr[[3]],gr[[4]],gr[[5]],gr[[6]],gr[[7]],ncol=4)

How do lower-income uninsured people behave?

category_county_week_device = weekly[,.(
                                         android=sum(android,na.rm=TRUE),
                                         ios=sum(ios,na.rm=TRUE),
                                         fracuninsured=mean(fracuninsured,na.rm=TRUE)),
                                        by=.(top_category,week,COUNTY)]

category_county_week_device = melt(category_county_week_device,id.vars = c("top_category","week","COUNTY","fracuninsured"))

category_county_week_device[,variable:=as.character(variable)]
category_county_week_device[,android:=ifelse(variable=="ios",0,1)]

category_county_week_device[,variable:=NULL]

names(category_county_week_device) <- c("top_category","week","COUNTY","fracuninsured","visits","android")

category_county_week_device[,state:=floor(COUNTY/1000)]

category_county_week_device = category_county_week_device[fracuninsured>uninslb & fracuninsured<uninsub]
regs <- list()
gr <- list()

i = 1
collabels = NULL
for(cat in place_categories) {

  regs[[i]] <- felm(log(1+visits)~fracuninsured*android*factor(week)|top_category+week+COUNTY|0|COUNTY,data=category_county_week_device[top_category %in% cat])
 
  collabels = c(collabels,names(place_categories)[i])
 
  gr[[i]] = .coef_plot_1reg(regs[[i]],"fracuninsured:android:factor(week)",20200301)+ggtitle(names(place_categories)[i])
  i= i+1
}


.printtable(regs,column.labels = collabels)
## 
## =====================================================================================================================================
##                                                                               Dependent variable:                                    
##                                            ------------------------------------------------------------------------------------------
##                                             grocery  gasolineother restaurants entertainment1 entertainment2 healthservices airports 
##                                               (1)         (2)          (3)          (4)            (5)            (6)          (7)   
## -------------------------------------------------------------------------------------------------------------------------------------
## fracuninsured                                                                                                                        
##                                             (0.000)     (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## android                                    0.314***    0.227***      -0.016       0.045**        0.105***       0.203***      0.012  
##                                             (0.014)     (0.015)      (0.015)      (0.019)        (0.023)        (0.021)      (0.037) 
## factor(week)20200308                                                                                                                 
##                                             (0.000)     (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## factor(week)20200315                                                                                                                 
##                                             (0.000)     (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## factor(week)20200322                                                                                                                 
##                                             (0.000)     (0.000)      (0.000)      (0.000)        (0.000)        (0.000)      (0.000) 
## fracuninsured:android                      -0.272***   -0.326***    -0.333***    -0.501***      -0.566***      -0.486***    -0.515***
##                                             (0.061)     (0.065)      (0.067)      (0.078)        (0.094)        (0.089)      (0.142) 
## fracuninsured:factor(week)20200308           0.020     0.098***      0.066**      -0.109*        -0.118*         -0.031       0.132  
##                                             (0.030)     (0.036)      (0.032)      (0.062)        (0.067)        (0.067)      (0.129) 
## fracuninsured:factor(week)20200315           0.043     0.164***     0.265***     -0.609***       -0.218**        0.037       -0.262  
##                                             (0.034)     (0.045)      (0.052)      (0.095)        (0.100)        (0.079)      (0.166) 
## fracuninsured:factor(week)20200322           0.028     0.196***     0.295***     -0.936***       -0.243*         0.115      -0.633***
##                                             (0.042)     (0.055)      (0.074)      (0.134)        (0.138)        (0.088)      (0.184) 
## android:factor(week)20200308               -0.044***    -0.008      -0.031***    -0.046***      -0.077***       -0.039**     -0.015  
##                                             (0.008)     (0.011)      (0.009)      (0.015)        (0.021)        (0.020)      (0.040) 
## android:factor(week)20200315               0.087***    0.146***     0.144***      0.055***       0.109***       0.077***     0.091** 
##                                             (0.009)     (0.012)      (0.013)      (0.019)        (0.025)        (0.022)      (0.041) 
## android:factor(week)20200322               0.096***    0.178***     0.186***      0.191***       0.252***       0.128***    0.161*** 
##                                             (0.009)     (0.013)      (0.014)      (0.021)        (0.027)        (0.024)      (0.043) 
## fracuninsured:android:factor(week)20200308  -0.025      -0.060       -0.031        -0.015         0.141*         -0.047      -0.079  
##                                             (0.034)     (0.044)      (0.041)      (0.059)        (0.083)        (0.085)      (0.164) 
## fracuninsured:android:factor(week)20200315 -0.101***   -0.127***     -0.027       0.184**         0.004          -0.093       0.073  
##                                             (0.034)     (0.046)      (0.054)      (0.073)        (0.092)        (0.090)      (0.172) 
## fracuninsured:android:factor(week)20200322 -0.090***   -0.118**      -0.046       0.199**         -0.058         -0.150       0.057  
##                                             (0.035)     (0.052)      (0.053)      (0.086)        (0.106)        (0.103)      (0.169) 
## -------------------------------------------------------------------------------------------------------------------------------------
##                                                                                                                                      
## Observations                                46,318      46,108       24,226        51,250         28,460         36,290      13,904  
## Adjusted R2                                  0.882       0.862        0.970        0.719          0.857          0.816        0.889  
## =====================================================================================================================================
## Note:                                                                                                     *p<0.1; **p<0.05; ***p<0.01
## 
grid.arrange(gr[[1]],gr[[2]],gr[[3]],gr[[4]],gr[[5]],gr[[6]],gr[[7]],ncol=4)

Growth in the number of confirmed cases and number of deaths are higher when the fraction of insured is lower

CSSE <- fread("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")

CSSE <- CSSE[CSSE$FIPS>=1000]
CSSE[,c("UID","iso2","iso3","code3","Admin2","Province_State","Country_Region","Lat","Long_","Combined_Key"):=list(NULL)]

CSSE <- melt(data = CSSE,id.vars = c("FIPS"))
names(CSSE) <- c("fips","date","cases")
CSSE[,date:=as.character(CSSE$date)]
CSSE[,date:=as.Date(CSSE$date,"%m/%d/%y")]


CSSE_dealth <- fread("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")

CSSE_dealth <- CSSE_dealth[CSSE_dealth$FIPS>=1000]
CSSE_dealth[,c("UID","iso2","iso3","code3","Admin2","Province_State","Country_Region","Lat","Long_","Combined_Key"):=list(NULL)]

CSSE_dealth <- melt(data = CSSE_dealth,id.vars = c("FIPS"))
names(CSSE_dealth) <- c("fips","date","deaths")
CSSE_dealth[,date:=as.character(CSSE_dealth$date)]
CSSE_dealth[,date:=as.Date(CSSE_dealth$date,"%m/%d/%y")]

# CSSE_change <- CSSE[CSSE$date %in% c(min(CSSE$date,na.rm=TRUE),max(CSSE$date,na.rm=TRUE))]

CSSE_change <- CSSE[CSSE$date %in% c(as.Date("2020-03-01"),max(CSSE$date,na.rm=TRUE))]
CSSE_change <- dcast(CSSE_change,formula = fips~date,fun.aggregate = sum )
names(CSSE_change) <- c("fips","start","end")
CSSE_change[,casegrowth:= end-start]


temp <- CSSE_dealth[CSSE_dealth$date %in% c(as.Date("2020-03-01"),max(CSSE$date,na.rm=TRUE))]
temp <- dcast(temp,formula = fips~date,fun.aggregate = sum )
names(temp) <- c("fips","start","end")
temp[,deathgrowth:= end-start]
temp[,c("start","end"):=list(NULL)]

CSSE_change <- merge(CSSE_change,temp,by="fips")
combdata <- merge(CSSE_change,acs,by="fips")
combdata[,casegrowth:=combdata$casegrowth/combdata$total_population]
combdata[,deathgrowth:=combdata$deathgrowth/combdata$total_population]
combdata[,fracuninsured:=1-combdata$with_health_insurance/combdata$total_population]
combdata[,state:=floor(combdata$fips/1000)]
regs <- list()
regs[[1]] <- felm(log(1+casegrowth)~fracuninsured+log(total_population)+log(median_hh_income)+median_age|state|0|state,data=combdata[ combdata$total_population>50000 & casegrowth>0])
regs[[2]] <- felm(log(1+deathgrowth)~fracuninsured+log(total_population)+log(median_hh_income)+median_age|state|0|state,data=combdata[ combdata$total_population>50000  & casegrowth>0])


.printtable(regs,column.labels = c("Change in # cases","Change in # deaths"))
## 
## ==========================================================
##                               Dependent variable:         
##                       ------------------------------------
##                       Change in # cases Change in # deaths
##                              (1)               (2)        
## ----------------------------------------------------------
## fracuninsured              0.004**           0.0002*      
##                            (0.002)           (0.0001)     
## log(total_population)     0.0002**          0.00001**     
##                           (0.0001)          (0.00000)     
## log(median_hh_income)       0.003             0.0001      
##                            (0.002)           (0.0001)     
## median_age                 0.00005           0.00000*     
##                           (0.00003)         (0.00000)     
## ----------------------------------------------------------
##                                                           
## Observations                 978               978        
## Adjusted R2                 0.238             0.104       
## ==========================================================
## Note:                          *p<0.1; **p<0.05; ***p<0.01
## 
# .coef_plot_1reg(regs[[2]],"factor(insuredcat)",50)