rm(list=ls())
library(stargazer)
library(data.table)
library(ggplot2)
library(lfe)
library(rgdal)
library(rgeos)
library(gridExtra)
library(scales)
library(zoo)
library(foreign)
library(fst)
library(stringr)
library(ggrepel)
library(lubridate)
library(tidycensus)
library(arrow)
library(dplyr)
library(purrr)
library(jsonlite)
library(quantmod)
library(tidyverse)
library(directlabels)
# files = list.files(pattern="*.parquet",path = "C:/Users/dratnadiwakara2/Downloads/sg/sg/weekly.parq",full.names = TRUE)
#
# weekly = lapply(files, read_parquet)
# weekly = do.call("rbind", weekly)
#
# weekly = data.table(weekly)
#
#
# v = data.table(t(as.matrix(sapply(weekly$visits_by_day,function(x) t(fromJSON(x))))))
# names(v) = paste0("visitsonday",1:7)
# weekly = cbind(weekly,v)
#
# weekly <- cbind(weekly,data.table(purrr::map_dfr(weekly$device_type, jsonlite::fromJSON)))
#
#
#
# weekly[,date_range_start:=as.Date(substr(weekly$date_range_start,1,10))]
#
# write_fst(weekly,"C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/weekly.fst",compress = 100)
# files = list.files(pattern="*.parquet",path = "C:/Users/dratnadiwakara2/Downloads/sg/sg/core.parq",full.names = TRUE)
#
# core = lapply(files, read_parquet)
# core = do.call("rbind", core)
#
# write_fst(core,"C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/core.fst",compress = 100)
census_api_key("06b232797e7854aad802d7c7d8673c337fe1b29a")
acs_variables <- read.csv(file="C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/ACS Data/variable_names.csv",stringsAsFactors = FALSE,header = FALSE)
var_labels <- c("B00001_001","B02001_001","B02001_002","B01002_001","B25099_001","B25003_003","B25003_002","B06009_004","B25034_001","B25035_001","B25034_002","B25034_003","B25034_004","B25034_005","B25034_006","B25034_007","B25034_008","B25034_009","B25012_002","B25012_010","C27001A_001")
acs <- get_acs(geography = "county", year = 2018,output = "wide",variables = var_labels)
acs <- acs[,names(acs)[names(acs) %in% c("GEOID","year",acs_variables$V4)]]
for(i in 1:nrow(acs_variables)) {
names(acs)[which(names(acs)==acs_variables[i,]$V4)]=acs_variables[i,]$V3
}
acs <- data.table(acs)
acs[,fips:=as.numeric(acs$GEOID)]
acs[,c("renter_occ_units","owner_occ_units"):=list(NULL)]
acs[,fracuninsured:= 1 - with_health_insurance/total_population]
us_counties <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US Counties/cb_2013_us_county_20m","cb_2013_us_county_20m")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US Counties\cb_2013_us_county_20m", layer: "cb_2013_us_county_20m"
## with 3221 features
## It has 9 fields
## Integer64 fields read as strings: ALAND AWATER
us_counties <- fortify(us_counties,region="GEOID")
us_counties <- data.table(us_counties)
us_counties[,id:=as.numeric(us_counties$id)]
us_counties <- merge(us_counties,acs[,c("fips","fracuninsured")],by.x="id",by.y="fips")
us_counties[,state:=floor(id/1000)]
ggplot()+
geom_polygon(data=us_counties[! us_counties$state %in% c(2,15,72)], aes(x=long,y=lat,group=group,fill=fracuninsured),color="black")+
scale_fill_gradientn(colors=c("white","yellow","orange","red","darkred"))+ggtitle("Fraction Uninsured (ACS 2018)")+theme(legend.title = element_blank())

zip_county <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")
zip_county<- zip_county[order(zip_county$ZIP,-zip_county$RES_RATIO)]
zip_county <- zip_county[!duplicated(zip_county$ZIP),c("ZIP","COUNTY")]
core = read_fst(
"C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/core.fst",
columns = c("safegraph_place_id","location_name","top_category","naics_code","latitude","longitude","region","postal_code","iso_country_code"),
as.data.table = TRUE)
weekly = read_fst(
"C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/weekly.fst",
columns = c("safegraph_place_id","location_name","postal_code","naics_code","date_range_start","raw_visit_counts","raw_visitor_counts",paste0("visitsonday",1:7),"android","ios"),
as.data.table = TRUE)
weekly = merge(weekly,core[,c("safegraph_place_id","top_category")],by="safegraph_place_id")
weekly = merge(weekly,zip_county,by.x="postal_code",by.y="ZIP",all.x=TRUE)
weekly = merge(weekly,acs,by.x="COUNTY",by.y="fips")
weekly[,week:=as.numeric(format(date_range_start,format="%Y%m%d"))]
weekly[,place_category:=NA]
place_categories <- list()
place_categories[['grocery']] = c("General Merchandise Stores, including Warehouse Clubs and Supercenters","Grocery Stores")
gasolineother = c("Gasoline Stations","Health and Personal Care Stores")
place_categories[['gasolineother']] = c("Gasoline Stations","Health and Personal Care Stores")
place_categories[['restaurants']] =c("Restaurants and Other Eating Places")
place_categories[['entertainment1']] = c("Museums, Historical Sites, and Similar Institutions","Gambling Industries","Motion Picture and Video Industries","Spectator Sports","Department Stores")
place_categories[['entertainment2']] = c("Sporting Goods, Hobby, and Musical Instrument Stores","Amusement Parks and Arcades")
place_categories[['healthservices']] = c("General Medical and Surgical Hospitals","Offices of Other Health Practitioners")
place_categories[['airports']] = c("Support Activities for Air Transportation")
i=1
for(cat in place_categories) {
weekly[,place_category:=ifelse(weekly$top_category %in% cat,names(place_categories)[i],weekly$place_category)]
i = i +1
}
category_county_week = weekly[!is.na(place_category),
.(v1=sum(visitsonday1,na.rm=TRUE),
v2=sum(visitsonday2,na.rm=TRUE),
v3=sum(visitsonday3,na.rm=TRUE),
v4=sum(visitsonday4,na.rm=TRUE),
v5=sum(visitsonday5,na.rm=TRUE),
v6=sum(visitsonday6,na.rm=TRUE),
v7=sum(visitsonday7,na.rm=TRUE)),
by=.(place_category,week)]
category_county_week = melt(category_county_week,id.vars = c("place_category","week"))
category_county_week[,variable:=as.numeric(as.character(substr(variable,2,2)))]
category_county_week[,day:=as.numeric(week+variable)]
category_county_week[,day:=day-min(unique(category_county_week$day))+1]
category_county_week[,variable:=NULL]
names(category_county_week) <- c("place_category","week","visits","dayno")
ggplot(category_county_week,aes(x=dayno,y=log(visits),color=place_category))+geom_line()+
scale_colour_discrete(guide = 'none') +
scale_x_continuous(expand=c(0,5)) +
geom_dl(aes(label = place_category), method = list(dl.combine("last.points"), cex = 0.8)) + ggtitle("log(No of Visits) by Category")

Are people in areas where more people are uninsured more likely to social distance?
- Constructed a county-category-day panel. Each row represents the number of visits for a particular category (e.g: grocery) in a given county in a given day of March
- Following regression specification to understand the association between the fraction of uninsured people in a county and the number of visits. \[log(no visits)_{county,day}= \alpha + \beta fracuninsured_{county} \times week dummy + place category FE + week FE + county FE\]
- Results indicate that uninsured people are more likely to avoid non-essential trips (e.g: entertainment related and airports) compared to insured people
- There is no difference in the number of visits in the insured and uninsured counties
category_county_week = weekly[,.(v1=sum(visitsonday1,na.rm=TRUE),
v2=sum(visitsonday2,na.rm=TRUE),
v3=sum(visitsonday3,na.rm=TRUE),
v4=sum(visitsonday4,na.rm=TRUE),
v5=sum(visitsonday5,na.rm=TRUE),
v6=sum(visitsonday6,na.rm=TRUE),
v7=sum(visitsonday7,na.rm=TRUE),
fracuninsured=mean(fracuninsured,na.rm=TRUE)),
by=.(top_category,week,COUNTY)]
category_county_week = melt(category_county_week,id.vars = c("top_category","week","COUNTY","fracuninsured"))
category_county_week[,variable:=as.numeric(as.character(substr(variable,2,2)))]
category_county_week[,day:=as.numeric(week+variable)]
category_county_week[,day:=day-min(unique(category_county_week$day))+1]
category_county_week[,variable:=NULL]
names(category_county_week) <- c("top_category","week","COUNTY","fracuninsured","visits","dayno")
category_county_week[,state:=floor(COUNTY/1000)]
uninsub = 0.7 # 2.5% percentile
uninslb = 0.03 # 97.5% percentile
category_county_week = category_county_week[fracuninsured>uninslb & fracuninsured<uninsub]
Regression Evidence
regs <- list()
gr <- list()
i = 1
collabels = NULL
for(cat in place_categories) {
regs[[i]] <- felm(log(1+visits)~fracuninsured*factor(week)|top_category+week+COUNTY|0|COUNTY,data=category_county_week[top_category %in% cat])
collabels = c(collabels,names(place_categories)[i])
gr[[i]] = .coef_plot_1reg(regs[[i]],"fracuninsured:factor(week)",20200301)+ggtitle(names(place_categories)[i])
i= i+1
}
.printtable(regs,column.labels = collabels)
##
## ===========================================================================================================================
## Dependent variable:
## ----------------------------------------------------------------------------------------
## grocery gasolineother restaurants entertainment1 entertainment2 healthservices airports
## (1) (2) (3) (4) (5) (6) (7)
## ---------------------------------------------------------------------------------------------------------------------------
## fracuninsured
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## factor(week)20200308
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## factor(week)20200315
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## factor(week)20200322
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## fracuninsured:factor(week)20200308 0.010 0.037** 0.019 -0.089** -0.022 -0.061** 0.002
## (0.013) (0.017) (0.016) (0.038) (0.034) (0.027) (0.049)
## fracuninsured:factor(week)20200315 -0.011 -0.016 0.069** -0.581*** -0.327*** -0.112*** -0.226***
## (0.020) (0.024) (0.033) (0.070) (0.064) (0.034) (0.069)
## fracuninsured:factor(week)20200322 -0.035 -0.041 -0.026 -0.844*** -0.456*** -0.143*** -0.496***
## (0.027) (0.030) (0.048) (0.087) (0.082) (0.038) (0.102)
## ---------------------------------------------------------------------------------------------------------------------------
##
## Observations 162,113 161,378 84,791 179,375 99,610 127,015 48,664
## Adjusted R2 0.888 0.872 0.975 0.728 0.869 0.820 0.882
## ===========================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
##
Regression evidence
These graphs plot the coefficients of the above regression with their corresponding 95% confidence intervals
grid.arrange(gr[[1]],gr[[2]],gr[[3]],gr[[4]],gr[[5]],gr[[6]],gr[[7]],ncol=4)

How do lower-income uninsured people behave?
- Android phone usage as a proxy for income
- Following regression \[log(no visits)_{county,day}= \alpha + \beta fracuninsured_{county} \times android \times week dummy + place category FE + week FE + county FE\]
- Results suggest uninsured lower-income people reduced the number of essential trips (to grocery store) as well
category_county_week_device = weekly[,.(
android=sum(android,na.rm=TRUE),
ios=sum(ios,na.rm=TRUE),
fracuninsured=mean(fracuninsured,na.rm=TRUE)),
by=.(top_category,week,COUNTY)]
category_county_week_device = melt(category_county_week_device,id.vars = c("top_category","week","COUNTY","fracuninsured"))
category_county_week_device[,variable:=as.character(variable)]
category_county_week_device[,android:=ifelse(variable=="ios",0,1)]
category_county_week_device[,variable:=NULL]
names(category_county_week_device) <- c("top_category","week","COUNTY","fracuninsured","visits","android")
category_county_week_device[,state:=floor(COUNTY/1000)]
category_county_week_device = category_county_week_device[fracuninsured>uninslb & fracuninsured<uninsub]
regs <- list()
gr <- list()
i = 1
collabels = NULL
for(cat in place_categories) {
regs[[i]] <- felm(log(1+visits)~fracuninsured*android*factor(week)|top_category+week+COUNTY|0|COUNTY,data=category_county_week_device[top_category %in% cat])
collabels = c(collabels,names(place_categories)[i])
gr[[i]] = .coef_plot_1reg(regs[[i]],"fracuninsured:android:factor(week)",20200301)+ggtitle(names(place_categories)[i])
i= i+1
}
.printtable(regs,column.labels = collabels)
##
## =====================================================================================================================================
## Dependent variable:
## ------------------------------------------------------------------------------------------
## grocery gasolineother restaurants entertainment1 entertainment2 healthservices airports
## (1) (2) (3) (4) (5) (6) (7)
## -------------------------------------------------------------------------------------------------------------------------------------
## fracuninsured
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## android 0.314*** 0.227*** -0.016 0.045** 0.105*** 0.203*** 0.012
## (0.014) (0.015) (0.015) (0.019) (0.023) (0.021) (0.037)
## factor(week)20200308
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## factor(week)20200315
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## factor(week)20200322
## (0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
## fracuninsured:android -0.272*** -0.326*** -0.333*** -0.501*** -0.566*** -0.486*** -0.515***
## (0.061) (0.065) (0.067) (0.078) (0.094) (0.089) (0.142)
## fracuninsured:factor(week)20200308 0.020 0.098*** 0.066** -0.109* -0.118* -0.031 0.132
## (0.030) (0.036) (0.032) (0.062) (0.067) (0.067) (0.129)
## fracuninsured:factor(week)20200315 0.043 0.164*** 0.265*** -0.609*** -0.218** 0.037 -0.262
## (0.034) (0.045) (0.052) (0.095) (0.100) (0.079) (0.166)
## fracuninsured:factor(week)20200322 0.028 0.196*** 0.295*** -0.936*** -0.243* 0.115 -0.633***
## (0.042) (0.055) (0.074) (0.134) (0.138) (0.088) (0.184)
## android:factor(week)20200308 -0.044*** -0.008 -0.031*** -0.046*** -0.077*** -0.039** -0.015
## (0.008) (0.011) (0.009) (0.015) (0.021) (0.020) (0.040)
## android:factor(week)20200315 0.087*** 0.146*** 0.144*** 0.055*** 0.109*** 0.077*** 0.091**
## (0.009) (0.012) (0.013) (0.019) (0.025) (0.022) (0.041)
## android:factor(week)20200322 0.096*** 0.178*** 0.186*** 0.191*** 0.252*** 0.128*** 0.161***
## (0.009) (0.013) (0.014) (0.021) (0.027) (0.024) (0.043)
## fracuninsured:android:factor(week)20200308 -0.025 -0.060 -0.031 -0.015 0.141* -0.047 -0.079
## (0.034) (0.044) (0.041) (0.059) (0.083) (0.085) (0.164)
## fracuninsured:android:factor(week)20200315 -0.101*** -0.127*** -0.027 0.184** 0.004 -0.093 0.073
## (0.034) (0.046) (0.054) (0.073) (0.092) (0.090) (0.172)
## fracuninsured:android:factor(week)20200322 -0.090*** -0.118** -0.046 0.199** -0.058 -0.150 0.057
## (0.035) (0.052) (0.053) (0.086) (0.106) (0.103) (0.169)
## -------------------------------------------------------------------------------------------------------------------------------------
##
## Observations 46,318 46,108 24,226 51,250 28,460 36,290 13,904
## Adjusted R2 0.882 0.862 0.970 0.719 0.857 0.816 0.889
## =====================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
##
grid.arrange(gr[[1]],gr[[2]],gr[[3]],gr[[4]],gr[[5]],gr[[6]],gr[[7]],ncol=4)

Growth in the number of confirmed cases and number of deaths are higher when the fraction of insured is lower
CSSE <- fread("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
CSSE <- CSSE[CSSE$FIPS>=1000]
CSSE[,c("UID","iso2","iso3","code3","Admin2","Province_State","Country_Region","Lat","Long_","Combined_Key"):=list(NULL)]
CSSE <- melt(data = CSSE,id.vars = c("FIPS"))
names(CSSE) <- c("fips","date","cases")
CSSE[,date:=as.character(CSSE$date)]
CSSE[,date:=as.Date(CSSE$date,"%m/%d/%y")]
CSSE_dealth <- fread("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
CSSE_dealth <- CSSE_dealth[CSSE_dealth$FIPS>=1000]
CSSE_dealth[,c("UID","iso2","iso3","code3","Admin2","Province_State","Country_Region","Lat","Long_","Combined_Key"):=list(NULL)]
CSSE_dealth <- melt(data = CSSE_dealth,id.vars = c("FIPS"))
names(CSSE_dealth) <- c("fips","date","deaths")
CSSE_dealth[,date:=as.character(CSSE_dealth$date)]
CSSE_dealth[,date:=as.Date(CSSE_dealth$date,"%m/%d/%y")]
# CSSE_change <- CSSE[CSSE$date %in% c(min(CSSE$date,na.rm=TRUE),max(CSSE$date,na.rm=TRUE))]
CSSE_change <- CSSE[CSSE$date %in% c(as.Date("2020-03-01"),max(CSSE$date,na.rm=TRUE))]
CSSE_change <- dcast(CSSE_change,formula = fips~date,fun.aggregate = sum )
names(CSSE_change) <- c("fips","start","end")
CSSE_change[,casegrowth:= end-start]
temp <- CSSE_dealth[CSSE_dealth$date %in% c(as.Date("2020-03-01"),max(CSSE$date,na.rm=TRUE))]
temp <- dcast(temp,formula = fips~date,fun.aggregate = sum )
names(temp) <- c("fips","start","end")
temp[,deathgrowth:= end-start]
temp[,c("start","end"):=list(NULL)]
CSSE_change <- merge(CSSE_change,temp,by="fips")
combdata <- merge(CSSE_change,acs,by="fips")
combdata[,casegrowth:=combdata$casegrowth/combdata$total_population]
combdata[,deathgrowth:=combdata$deathgrowth/combdata$total_population]
combdata[,fracuninsured:=1-combdata$with_health_insurance/combdata$total_population]
combdata[,state:=floor(combdata$fips/1000)]
regs <- list()
regs[[1]] <- felm(log(1+casegrowth)~fracuninsured+log(total_population)+log(median_hh_income)+median_age|state|0|state,data=combdata[ combdata$total_population>50000 & casegrowth>0])
regs[[2]] <- felm(log(1+deathgrowth)~fracuninsured+log(total_population)+log(median_hh_income)+median_age|state|0|state,data=combdata[ combdata$total_population>50000 & casegrowth>0])
.printtable(regs,column.labels = c("Change in # cases","Change in # deaths"))
##
## ==========================================================
## Dependent variable:
## ------------------------------------
## Change in # cases Change in # deaths
## (1) (2)
## ----------------------------------------------------------
## fracuninsured 0.004** 0.0002*
## (0.002) (0.0001)
## log(total_population) 0.0002** 0.00001**
## (0.0001) (0.00000)
## log(median_hh_income) 0.003 0.0001
## (0.002) (0.0001)
## median_age 0.00005 0.00000*
## (0.00003) (0.00000)
## ----------------------------------------------------------
##
## Observations 978 978
## Adjusted R2 0.238 0.104
## ==========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
##
# .coef_plot_1reg(regs[[2]],"factor(insuredcat)",50)