rm(list=ls())
library(data.table)
library(RSQLite)
library(DBI)
library(dplyr)
library(stargazer)
library(lfe)
library(ggplot2)
library(tidyverse)
library(stringr)
library(fst)
zhvi <- readRDS("/data/atr/County_Zhvi_SingleFamilyResidence.rds")
zhvi <- as.data.table(zhvi)
zhvi[,year:=year(month)]
zhvi <- zhvi[,.(zhvi=mean(zhvi,na.rm=T)),by=.(fips,year)]
names(zhvi) <- c("fips","year","zhvi")
zhvi[,fips:=as.numeric(fips)]
zhvi <- zhvi[year %in% 2004:2017]
agi <- list.files(pattern="*.fst",path = "/data/IRSSOI",full.names = TRUE)
agi = lapply(agi, read_fst, as.data.table = TRUE)
agi <- do.call(rbind , agi)
temp <- agi[year==2016]
temp[,year:=2017]
agi <- rbind(agi,temp)
temp <- agi[year==2005]
temp[,year:=2004]
agi <- rbind(agi,temp)
agi[,zipcode:=as.numeric(zipcode)]
ziptofips <- fread("/data/atr/county_zip.csv")
ziptofips <- ziptofips[,c("COUNTY","ZIP","RES_RATIO")]
agi <- merge(agi,ziptofips,by.x=c("zipcode"),by.y="ZIP",allow.cartesian=TRUE)
agi[,agi:=agi*RES_RATIO]
agi <- agi[,.(income=sum(agi)),by=.(COUNTY,year)]
zhvi <- merge(zhvi,agi,by.x=c("fips","year"),by.y=c("COUNTY","year"))
zhvi[,pricetoincome:=zhvi/income]
zhvi <- zhvi[income>10000 & income<1000000]
unemp <- fread("/data/atr/blsunempdata.csv",select = c("state","county","year","unemprate"))
unemp[,state:=str_pad(state,2,side="left",pad="0")]
unemp[,county:=str_pad(county,3,side="left",pad="0")]
unemp[,fips:=paste0(state,county)]
unemp[,c("state","county"):=list(NULL)]
unemp[,fips:=as.numeric(fips)]
ptoi <- zhvi[,.(pricetoincome=mean(pricetoincome,na.rm=T)),by=year]
ggplot(ptoi)+geom_line(aes(x=year,y=pricetoincome))+theme_minimal()+labs(x="",y="House Price/Income")

outmigration <- read_fst("/data/atr/out_migration_1997_2017.fst",as.data.table = T)
outmigration <- outmigration[fromcounty != "000"]
outmigration[,fromcounty:=paste0(fromstate,fromcounty)]
outmigration[,tocounty:=paste0(tostate,tocounty)]
outmigration <- outmigration[fromcounty==tocounty | tocounty=="96000"]
outmigration[,samecounty:=ifelse(fromcounty==tocounty,1,0)]
outmigration[,c("fromstate","tostate","file","statecode","desc","tocounty","noexceptions","agi"):=list(NULL)]
outmigration <- outmigration[fromyear>=2003]
outmigration[,fromcounty:=as.numeric(fromcounty)]
outmigration <- dcast(outmigration,fromcounty+fromyear~samecounty,fun.aggregate = sum,value.var = "noreturns")
outmigration <- outmigration[!is.na(fromcounty) & fromcounty!= 1000]
names(outmigration) <- c("fromcounty","fromyear","nomigrate","nostay")
fracmigrate <- outmigration[,.(nomigrate=sum(nomigrate,na.rm=T)/1e6),by=fromyear]
ggplot(fracmigrate)+geom_line(aes(x=fromyear,y=nomigrate))+theme_minimal()+labs(x="",y="Millions of households")+ylim(c(0,11))

outmigration <- merge(outmigration,zhvi,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"))
outmigration <- outmigration[!duplicated(outmigration)]
outmigration <- merge(outmigration,unemp,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"),all.x=T)
outmigration[,fracmigrated:=nomigrate/(nomigrate+nostay)]
r <- felm(log(nomigrate)~log(pricetoincome)*factor(fromyear)+unemprate+log(income)+log(zhvi)|fromcounty+fromyear|0|fromcounty,data=outmigration[fromyear>=2005])
stargazer(r,type="text",no.space = T)
##
## ===================================================================
## Dependent variable:
## ---------------------------
## log(nomigrate)
## -------------------------------------------------------------------
## log(pricetoincome) -0.056*
## (0.034)
## factor(fromyear)2006
## (0.000)
## factor(fromyear)2007
## (0.000)
## factor(fromyear)2008
## (0.000)
## factor(fromyear)2009
## (0.000)
## factor(fromyear)2010
## (0.000)
## factor(fromyear)2011
## (0.000)
## factor(fromyear)2012
## (0.000)
## factor(fromyear)2013
## (0.000)
## factor(fromyear)2014
## (0.000)
## factor(fromyear)2015
## (0.000)
## factor(fromyear)2016
## (0.000)
## factor(fromyear)2017
## (0.000)
## unemprate 0.004***
## (0.001)
## log(income)
## (0.000)
## log(zhvi) 0.173***
## (0.044)
## log(pricetoincome):factor(fromyear)2006 -0.021***
## (0.005)
## log(pricetoincome):factor(fromyear)2007 -0.040***
## (0.007)
## log(pricetoincome):factor(fromyear)2008 -0.027***
## (0.008)
## log(pricetoincome):factor(fromyear)2009 0.004
## (0.009)
## log(pricetoincome):factor(fromyear)2010 -0.021**
## (0.010)
## log(pricetoincome):factor(fromyear)2011 -0.015
## (0.011)
## log(pricetoincome):factor(fromyear)2012 0.015
## (0.009)
## log(pricetoincome):factor(fromyear)2013 0.045***
## (0.009)
## log(pricetoincome):factor(fromyear)2014 0.046***
## (0.010)
## log(pricetoincome):factor(fromyear)2015 0.084***
## (0.009)
## log(pricetoincome):factor(fromyear)2016 0.110***
## (0.010)
## log(pricetoincome):factor(fromyear)2017 0.140***
## (0.010)
## -------------------------------------------------------------------
## Observations 24,608
## R2 0.996
## Adjusted R2 0.995
## Residual Std. Error 0.093 (df = 22591)
## ===================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01