rm(list=ls())
library(data.table)
library(RSQLite)
library(DBI)
library(dplyr)
library(stargazer)
library(lfe)
library(ggplot2)
library(tidyverse)
library(stringr)
library(fst)
zhvi <- readRDS("/data/atr/County_Zhvi_SingleFamilyResidence.rds")
zhvi <- as.data.table(zhvi)
zhvi[,year:=year(month)]
zhvi <- zhvi[,.(zhvi=mean(zhvi,na.rm=T)),by=.(fips,year)]
names(zhvi) <- c("fips","year","zhvi")
zhvi[,fips:=as.numeric(fips)]
zhvi <- zhvi[year %in% 2004:2017]
agi <- list.files(pattern="*.fst",path = "/data/IRSSOI",full.names = TRUE)
agi = lapply(agi, read_fst, as.data.table = TRUE)
agi <- do.call(rbind , agi)
temp <- agi[year==2016]
temp[,year:=2017]
agi <- rbind(agi,temp)
temp <- agi[year==2005]
temp[,year:=2004]
agi <- rbind(agi,temp)
agi[,zipcode:=as.numeric(zipcode)]

ziptofips <- fread("/data/atr/county_zip.csv")
ziptofips <- ziptofips[,c("COUNTY","ZIP","RES_RATIO")]
agi <- merge(agi,ziptofips,by.x=c("zipcode"),by.y="ZIP",allow.cartesian=TRUE)
agi[,agi:=agi*RES_RATIO]
agi <- agi[,.(income=sum(agi)),by=.(COUNTY,year)]

zhvi <- merge(zhvi,agi,by.x=c("fips","year"),by.y=c("COUNTY","year"))

zhvi[,pricetoincome:=zhvi/income]
zhvi <- zhvi[income>10000 & income<1000000]

unemp <- fread("/data/atr/blsunempdata.csv",select = c("state","county","year","unemprate"))
unemp[,state:=str_pad(state,2,side="left",pad="0")]
unemp[,county:=str_pad(county,3,side="left",pad="0")]
unemp[,fips:=paste0(state,county)]
unemp[,c("state","county"):=list(NULL)]
unemp[,fips:=as.numeric(fips)]
ptoi <- zhvi[,.(pricetoincome=mean(pricetoincome,na.rm=T)),by=year]
ggplot(ptoi)+geom_line(aes(x=year,y=pricetoincome))+theme_minimal()+labs(x="",y="House Price/Income")

outmigration <- read_fst("/data/atr/out_migration_1997_2017.fst",as.data.table = T)
outmigration <- outmigration[fromcounty != "000"]
outmigration[,fromcounty:=paste0(fromstate,fromcounty)]
outmigration[,tocounty:=paste0(tostate,tocounty)]
outmigration <- outmigration[fromcounty==tocounty | tocounty=="96000"]
outmigration[,samecounty:=ifelse(fromcounty==tocounty,1,0)]
outmigration[,c("fromstate","tostate","file","statecode","desc","tocounty","noexceptions","agi"):=list(NULL)]
outmigration <- outmigration[fromyear>=2003]
outmigration[,fromcounty:=as.numeric(fromcounty)]

outmigration <- dcast(outmigration,fromcounty+fromyear~samecounty,fun.aggregate = sum,value.var = "noreturns")
outmigration <- outmigration[!is.na(fromcounty) & fromcounty!= 1000]
names(outmigration) <- c("fromcounty","fromyear","nomigrate","nostay")
fracmigrate <- outmigration[,.(nomigrate=sum(nomigrate,na.rm=T)/1e6),by=fromyear]
ggplot(fracmigrate)+geom_line(aes(x=fromyear,y=nomigrate))+theme_minimal()+labs(x="",y="Millions of households")+ylim(c(0,11))

outmigration <- merge(outmigration,zhvi,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"))
outmigration <- outmigration[!duplicated(outmigration)]

outmigration <- merge(outmigration,unemp,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"),all.x=T)
outmigration[,fracmigrated:=nomigrate/(nomigrate+nostay)]
r <- felm(log(nomigrate)~log(pricetoincome)*factor(fromyear)+unemprate+log(income)+log(zhvi)|fromcounty+fromyear|0|fromcounty,data=outmigration[fromyear>=2005])

stargazer(r,type="text",no.space = T)
## 
## ===================================================================
##                                             Dependent variable:    
##                                         ---------------------------
##                                               log(nomigrate)       
## -------------------------------------------------------------------
## log(pricetoincome)                                -0.056*          
##                                                   (0.034)          
## factor(fromyear)2006                                               
##                                                   (0.000)          
## factor(fromyear)2007                                               
##                                                   (0.000)          
## factor(fromyear)2008                                               
##                                                   (0.000)          
## factor(fromyear)2009                                               
##                                                   (0.000)          
## factor(fromyear)2010                                               
##                                                   (0.000)          
## factor(fromyear)2011                                               
##                                                   (0.000)          
## factor(fromyear)2012                                               
##                                                   (0.000)          
## factor(fromyear)2013                                               
##                                                   (0.000)          
## factor(fromyear)2014                                               
##                                                   (0.000)          
## factor(fromyear)2015                                               
##                                                   (0.000)          
## factor(fromyear)2016                                               
##                                                   (0.000)          
## factor(fromyear)2017                                               
##                                                   (0.000)          
## unemprate                                        0.004***          
##                                                   (0.001)          
## log(income)                                                        
##                                                   (0.000)          
## log(zhvi)                                        0.173***          
##                                                   (0.044)          
## log(pricetoincome):factor(fromyear)2006          -0.021***         
##                                                   (0.005)          
## log(pricetoincome):factor(fromyear)2007          -0.040***         
##                                                   (0.007)          
## log(pricetoincome):factor(fromyear)2008          -0.027***         
##                                                   (0.008)          
## log(pricetoincome):factor(fromyear)2009            0.004           
##                                                   (0.009)          
## log(pricetoincome):factor(fromyear)2010          -0.021**          
##                                                   (0.010)          
## log(pricetoincome):factor(fromyear)2011           -0.015           
##                                                   (0.011)          
## log(pricetoincome):factor(fromyear)2012            0.015           
##                                                   (0.009)          
## log(pricetoincome):factor(fromyear)2013          0.045***          
##                                                   (0.009)          
## log(pricetoincome):factor(fromyear)2014          0.046***          
##                                                   (0.010)          
## log(pricetoincome):factor(fromyear)2015          0.084***          
##                                                   (0.009)          
## log(pricetoincome):factor(fromyear)2016          0.110***          
##                                                   (0.010)          
## log(pricetoincome):factor(fromyear)2017          0.140***          
##                                                   (0.010)          
## -------------------------------------------------------------------
## Observations                                      24,608           
## R2                                                 0.996           
## Adjusted R2                                        0.995           
## Residual Std. Error                         0.093 (df = 22591)     
## ===================================================================
## Note:                                   *p<0.1; **p<0.05; ***p<0.01