rm(list=ls())
library(fst)
library(ggplot2)
library(data.table)
library(stargazer)
library(lfe)
library(tidycensus)
library(R.matlab)
library(rgdal)
library(rgeos)
unempins <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/Unemployment Insurance.csv")
fp <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/us-state-ansi-fips.csv")
unempins <- merge(unempins,fp[,c("statefips","statecode")],by.x="state",by.y="statecode")
unempins <- unempins[,c("statefips","basebenefit","weeklywage","state")]
names(unempins) <- c("state","basebenefit","weeklywage","statecode")
unempins[,replacementrate:=(basebenefit+600)/weeklywage]
unempins[,growthrate:=((600)/basebenefit)]
unempins[,state:=ifelse(nchar(as.character(state))==1,paste0("0",state),as.character(state))]
acs1 <- R.matlab::readMat("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/mat/acs.mat")
median_age <- unlist(acs1$DP05.0018E,use.names = F)
pctwhite <- unlist(acs1$DP05.0037PE, use.names = F)
child <- unlist(acs1$DP02.0013PE, use.names = F)
medianincome <- unlist(acs1$DP03.0062E, use.names = F)
acs1 <- data.frame(median_age=median_age,pctwhite=pctwhite,child=child,medianincome=medianincome)
spatial <- R.matlab::readMat('C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/mat/spatial_data.mat')
fips <- unlist(spatial$FIPS, use.names=FALSE)
pop <- unlist(spatial$Pop, use.names=FALSE)
land <- unlist(spatial$Land,use.names = FALSE)
pcthome <- unlist(spatial$PctHome,use.names = F)
pcthome <- data.table(pcthome)
names(pcthome) <- paste0("week",c("01","02","03","04","05","06","07","08","09",as.character(10:15)))
acs2 <- data.frame(fips=as.numeric(fips),pop=pop,land=land)
acskp <- cbind(acs1,acs2)
acskp <- data.table(acskp)
acskp[,logdensity:=log(pop/land)]
acskp[,fips:=ifelse(nchar(fips)==4,paste0("0",fips),as.character(fips))]
load(file="C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Presidential Election/countypres_2000-2016.RData")
election <- x
rm(x)
election <- data.table(election)
election <- election[election$year %in% c(2016)& election$party=="republican"]
election <- election[,c("year","FIPS","candidatevotes","totalvotes")]
election[,rep_pct:=election$candidatevotes/election$totalvotes]
election[,FIPS:=as.character(FIPS)]
election[,FIPS:=ifelse(nchar(FIPS)==4,paste0("0",FIPS),FIPS)]
Due to the additional $600 of unemployment benefits provided by CARES Act, in some states unemployed receive more money than they would have typically earned in their jobs.
The graph below shows the replacement rate after CARES (red) and replacement rate before CARES (blue)
test <- unempins
test[,replacementratebeforecares:= basebenefit/weeklywage]
test <- test[,c("statecode","replacementrate","replacementratebeforecares")]
test <- test[order(-test$replacementrate)]
test <- melt(test,"statecode")
test <- test[order(-value)]
ggplot(test) + geom_point(aes(x=statecode,y=value,color=variable),size=4)+theme_minimal()+theme(legend.position = "bottom")+geom_hline(yintercept = 1)
us_states <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US States","cb_2014_us_state_20m")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US States", layer: "cb_2014_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings: ALAND AWATER
us_states <- fortify(us_states,region="GEOID")
us_states <- data.table(us_states)
us_states <- merge(us_states,unempins[,c("replacementrate","state")],by.x="id",by.y="state")
ggplot()+
geom_polygon(data=us_states[! us_states$id %in% c("02","15","72")], aes(x=long,y=lat,group=group,fill=replacementrate),color="black")+
scale_fill_gradientn(colors=c("orange","orange3","orangered1","orangered3","orangered4"))+ theme_minimal()+
theme(axis.title=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
legend.position = "bottom",panel.grid = element_blank())+ guides(fill=guide_legend(title="Replacement Rate"))
\[ Outcome_w = \Sigma_w\text{ } \beta_w \times w \times HighBenefits + County\text{ }Fixed\text{ }Effects + Week\text{ }Fixed\text{ }Effects\]
where \(w\) is a dummy variable that indicates the week and \(HighBenefits\) indicates the states where unemployment benefit amount is more than the wage.
All the figures below plot the \(\beta_w\) estimates with the corresponding 95% confidence intervals.
files <- list.files(path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/social distancing",pattern = '\\.fst',full.names = TRUE)
sd = lapply(files, read_fst, as.data.table = TRUE)
sd <- do.call(rbind , sd)
sd[,county:=substr(origin_census_block_group,1,5)]
sd[,dayno:=as.numeric(day-as.Date("2019-12-31"))]
sd[,week:=floor(dayno/7)]
sd <- sd[,.(device_count=sum(device_count,na.rm = T),
distance_traveled_from_home=mean(distance_traveled_from_home,na.rm=T),
completely_home_device_count=sum(completely_home_device_count,na.rm = T),
part_time_work_behavior_devices=sum(part_time_work_behavior_devices,na.rm = T),
full_time_work_behavior_devices=sum(full_time_work_behavior_devices,na.rm = T)),
by=.(week,county)]
sd[,state:=substr(county,1,2)]
# sd[,county:=substr(tract,1,5)]
sd[,pcthome:=completely_home_device_count*100/device_count]
sd[,pctparttime:=part_time_work_behavior_devices*100/device_count]
sd[,pctfulltime:=full_time_work_behavior_devices*100/device_count]
ftdata <- sd
sd <- merge(sd,unempins,by="state")
sd <- merge(sd,acskp,by.x="county",by.y="fips")
sd <- merge(sd,election[,c("FIPS","rep_pct")],by.x="county",by.y="FIPS")
sd[,weekno := week]
sd[,week:=as.factor(week)]
sd[,week:= relevel(sd$week,9)]
r <- list()
r[[1]] <- felm(log(1+pcthome)~I(replacementrate>1)*week|county+week|0|county,data=sd[sd$medianincome>30000 & sd$medianincome<50000 ])
r[[2]] <- felm(log(1+pcthome)~I(replacementrate>1)*week|county+week|0|county,data=sd[sd$medianincome>=50000 ])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
core = read_fst(
"C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/core.fst",
columns = c("safegraph_place_id","top_category"), as.data.table = TRUE)
files <- list.files(path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg/weekly/v2",pattern = '\\.fst',full.names = TRUE)
weekly = lapply(files, read_fst, as.data.table = TRUE)
weekly <- do.call(rbind , weekly)
weekly = merge(weekly,core[,c("safegraph_place_id","top_category")],by="safegraph_place_id")
weekly[,county:=substr(poi_cbg,1,5)]
place_categories <- list()
place_categories[['grocery']] = c("General Merchandise Stores, including Warehouse Clubs and Supercenters","Grocery Stores")
# gasolineother = c("Gasoline Stations","Health and Personal Care Stores")
# place_categories[['gasolineother']] = c("Gasoline Stations","Health and Personal Care Stores")
place_categories[['restaurants']] =c("Restaurants and Other Eating Places")
place_categories[['entertainment1']] = c("Museums, Historical Sites, and Similar Institutions","Gambling Industries","Motion Picture and Video Industries","Spectator Sports")#,"Department Stores"
# place_categories[['entertainment2']] = c("Sporting Goods, Hobby, and Musical Instrument Stores","Amusement Parks and Arcades")
# place_categories[['healthservices']] = c("General Medical and Surgical Hospitals","Offices of Other Health Practitioners")
# place_categories[['airports']] = c("Support Activities for Air Transportation")
category_county_week = weekly[,.(v1=sum(visitsonday1,na.rm=TRUE),
v2=sum(visitsonday2,na.rm=TRUE),
v3=sum(visitsonday3,na.rm=TRUE),
v4=sum(visitsonday4,na.rm=TRUE),
v5=sum(visitsonday5,na.rm=TRUE),
v6=sum(visitsonday6,na.rm=TRUE),
v7=sum(visitsonday7,na.rm=TRUE)),
by=.(top_category,date_range_start,county)]
category_county_week[,visits:=v1+v2+v3+v4+v5+v6+v7]
category_county_week[,dayno:=as.numeric(date_range_start-as.Date("2019-12-31"))]
category_county_week[,week:=floor(dayno/7)]
category_county_week <- category_county_week[week>=0]
category_county_week[,state:=substr(county,1,2)]
category_county_week <- merge(category_county_week,unempins,by="state")
category_county_week <- merge(category_county_week,acskp,by.x="county",by.y="fips")
category_county_week <- merge(category_county_week,election[,c("FIPS","rep_pct")],by.x="county",by.y="FIPS")
category_county_week[,weekno := week]
category_county_week[,week:=as.factor(week)]
category_county_week[,week:= relevel(category_county_week$week,9)]
i=1
r <- list()
r[[1]] <- felm(log(1+visits)~I(replacementrate>1)*week|county+week|0|county,data=category_county_week[category_county_week$medianincome>30000 & sd$medianincome<50000 & top_category %in% as.vector(place_categories[[i]]) ])
r[[2]] <- felm(log(1+visits)~I(replacementrate>1)*week|county+week|0|county,data=category_county_week[category_county_week$medianincome>=50000 & top_category %in% as.vector(place_categories[[i]])])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
i=2
r <- list()
r[[1]] <- felm(log(1+visits)~I(replacementrate>1)*week|county+week|0|county,data=category_county_week[category_county_week$medianincome>30000 & sd$medianincome<50000 & top_category %in% as.vector(place_categories[[i]]) ])
r[[2]] <- felm(log(1+visits)~I(replacementrate>1)*week|county+week|0|county,data=category_county_week[category_county_week$medianincome>=50000 & top_category %in% as.vector(place_categories[[i]])])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
i=3
r <- list()
r[[1]] <- felm(log(1+visits)~I(replacementrate>1)*week|county+week|0|county,data=category_county_week[category_county_week$medianincome>30000 & sd$medianincome<50000 & top_category %in% as.vector(place_categories[[i]]) ])
r[[2]] <- felm(log(1+visits)~I(replacementrate>1)*week|county+week|0|county,data=category_county_week[category_county_week$medianincome>=50000 & top_category %in% as.vector(place_categories[[i]])])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
zip_county <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")
zip_county<- zip_county[order(zip_county$ZIP,-zip_county$RES_RATIO)]
zip_county <- zip_county[!duplicated(zip_county$ZIP),c("ZIP","COUNTY")]
trans1 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg_trans/cut-1-daily-spend-by-zip-20170101-20200503.csv.gz")
names(trans1) <- c("day","zip","nocards","notrans","totalspent")
trans1[,day:=as.Date(day)]
trans2 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg_trans/cut-1-daily-spend-by-zip-20200425-20200515.csv.gz")
names(trans2) <- c("day","zip","nocards","notrans","totalspent")
trans2[,day:=as.Date(day)]
trans <- rbind(trans1,trans2)
rm(trans1)
rm(trans2)
trans <- trans[!duplicated(trans[,c("day","zip")])]
trans <- merge(trans,zip_county,by.x="zip",by.y="ZIP")
trans <- trans[day >= "2020-01-01"]
trans[,dayno:=as.numeric(day-as.Date("2019-12-31"))]
trans[,week:=floor(dayno/7)]
trans <- trans[,.(nocards=sum(nocards,na.rm = T),
notrans=sum(notrans,na.rm=T),
totalspent=sum(totalspent,na.rm=T)),
by=.(COUNTY,week)]
names(trans) <- c("county","week","nocards","notrans","totalspent")
trans[,county:=ifelse(nchar(county)==4,paste0("0",county),paste0(county))]
trans[,state:=substr(county,1,2)]
trans <- merge(trans,election[,c("FIPS","rep_pct")],by.x="county",by.y="FIPS")
trans <- merge(trans,acskp,by.x="county",by.y="fips")
trans <- merge(trans,unempins,by="state")
trans[,weekno := week]
trans[,week:=as.factor(week)]
trans[,week:= relevel(trans$week,9)]
r <- list()
r[[1]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=trans[trans$medianincome>30000 & trans$medianincome<50000 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25)) & weekno>4 ])
r[[2]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=trans[trans$medianincome>=50000 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25)) & weekno>4 ])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
# clean raw data
# fn <- c("cut-2-daily-spend-by-zip-by-mcc-20170101-20200417-03")
#
# for(f in fn) {
# trans1 <- fread(paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg_trans/bymcc/",f,".csv.gz"),nrows = 10000000)
# names(trans1) <- c("day","mcc","zip","nocards","notrans","totalspent")
#
# trans1 <- trans1[substr(trans1$day,1,4)=="2020"]
# trans1[,day:=as.Date(day)]
# write_fst(trans1,path=paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg_trans/bymcc/",f,".fst"),compress = 100)
# }
mccdesc <- fread("https://raw.githubusercontent.com/greggles/mcc-codes/master/mcc_codes.csv")
mccdesc <- mccdesc[,c("mcc","irs_description")]
files <- list.files(path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/COVID19/sg_trans/bymcc",pattern = '\\.fst',full.names = TRUE)
bymcc = lapply(files, read_fst, as.data.table = TRUE)
bymcc <- do.call(rbind , bymcc)
bymcc <- bymcc[!duplicated(bymcc[,c("day","mcc","zip")])]
bymcc[,mcc:=as.integer(mcc)]
bymcc <- merge(bymcc,mccdesc,by="mcc")
totbymcc <- bymcc[,.(totalspent=sum(totalspent,na.rm=T)),by=irs_description]
bymcc <- bymcc[irs_description %in% c("Grocery Stores, Supermarkets","Fast Food Restaurants","Eating Places, Restaurants","Discount Stores","Utilities","Miscellaneous Specialty Retail")]
bymcc <- merge(bymcc,zip_county,by.x="zip",by.y="ZIP")
bymcc[,dayno:=as.numeric(day-as.Date("2019-12-31"))]
bymcc[,week:=floor(dayno/7)]
bymcc <- bymcc[,.(nocards=sum(nocards,na.rm = T),
notrans=sum(notrans,na.rm=T),
totalspent=sum(totalspent,na.rm=T)),
by=.(COUNTY,week,irs_description)]
names(bymcc) <- c("county","week","mcc","nocards","notrans","totalspent")
bymcc[,county:=ifelse(nchar(county)==4,paste0("0",county),paste0(county))]
bymcc[,state:=substr(county,1,2)]
bymcc <- merge(bymcc,acskp,by.x="county",by.y="fips")
bymcc <- merge(bymcc,unempins,by="state")
bymcc[,weekno := week]
bymcc[,week:=as.factor(week)]
bymcc[,week:= relevel(bymcc$week,9)]
cat = "Grocery Stores, Supermarkets"
r <- list()
r[[1]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>30000 & bymcc$medianincome<50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
r[[2]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>=50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
cat = "Fast Food Restaurants"
r <- list()
r[[1]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>30000 & bymcc$medianincome<50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
r[[2]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>=50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
cat = "Eating Places, Restaurants"
r <- list()
r[[1]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>30000 & bymcc$medianincome<50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
r[[2]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>=50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
cat = "Discount Stores"
r <- list()
r[[1]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>30000 & bymcc$medianincome<50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
r[[2]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>=50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
cat = "Utilities"
r <- list()
r[[1]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>30000 & bymcc$medianincome<50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
r[[2]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>=50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")
cat = "Miscellaneous Specialty Retail"
r <- list()
r[[1]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>30000 & bymcc$medianincome<50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
r[[2]] <- felm(log(1+totalspent)~I(replacementrate>1)*week|county+week|0|county,data=bymcc[bymcc$medianincome>=50000 & mcc == cat & weekno<16 & (replacementrate>quantile(unempins$replacementrate,0.75) | replacementrate<quantile(unempins$replacementrate,0.25))])
.coef_plot_2reg(r[[1]],"Income less than 50k",r[[2]],"Income greater than 50k","I(replacementrate > 1)TRUE:week",8)+ geom_vline(xintercept = 9,color = "grey",linetype="dashed")