library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(maps)
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
library(mapdata)
library(lubridate)
library(readr)
library(dplyr)
library(stringr)
library(plm)
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(synthdid)
library(devtools)
## Loading required package: usethis
library(latex2exp)
library(MCPanel)
library(rngtools)
library(future)
library(doFuture)
## Loading required package: foreach
##
## Attaching package: 'foreach'
##
## The following objects are masked from 'package:purrr':
##
## accumulate, when
library(future.batchtools)
## Loading required package: parallelly
library(xtable)
library(tidyr)
library(tibble)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
The rent data was converted to a balanced data (i.e equal observations for all year), then it was converted to a panel matrix data to be used in SDID and SC Estimate.
RENT.merge<-read.csv("MSA-RENT merge All.csv")
RENT.merge$treat<-ifelse(RENT.merge$state=="LA",1,0)
colnames(RENT.merge)
## [1] "FirstNAME" "AREANAME" "RENT_0"
## [4] "RENT_1" "RENT_2" "RENT_3"
## [7] "RENT_4" "YEAR" "NAME.x"
## [10] "state" "Unemployee.Rate" "Personal.Income"
## [13] "Resident.Population" "nyear" "MSA"
## [16] "MSA.Code" "Price" "Change"
## [19] "NAME.y" "treat"
# Remove NA
RENT.merge<-RENT.merge %>%
group_by(MSA)%>%
filter(!any(is.na(Unemployee.Rate)))
#KEEP NEW ORLEAN
RENT.merge1<-RENT.merge
RENT.merge1$NS<-paste0(RENT.merge1$FirstNAME,RENT.merge1$state)
RENT.NEWLA<-RENT.merge1%>%
filter(NS=="newLA")
RENTUS<-RENT.merge1%>%
filter(state!="LA")
RENT.merge1<-RENT.merge1%>%
filter(NS!="newLA")
## WITH NEW ORLEAN ONLY
RENT.merge2<-rbind(RENT.NEWLA,RENTUS)
RENT.merge2$YEAR<-as.numeric(RENT.merge2$YEAR)
RENT.merge2$s.treat<-ifelse(RENT.merge2$state=="LA" &
RENT.merge2$YEAR>2005,1,0)
##BALLANCE BY NS
unq<-data.frame(NS=rep(unique(RENT.merge2$NS),15))
unq<-arrange(unq,NS)
unq$YEAR<-rep(2001:2015,length(unique(RENT.merge2$NS)))
Ballance.RENT<-left_join(unq,RENT.merge2,by=c("YEAR","NS"))
Ballance.RENT<-Ballance.RENT%>%
group_by(NS)%>%
filter(!any(is.na(RENT_0)))
Ballance.RENT$lg<-log(Ballance.RENT$RENT_0)
Ballance.RENT$NS<-factor(Ballance.RENT$NS)
Ballance.RENT<-data.frame(Ballance.RENT)
panelp<-panel.matrices(Ballance.RENT,unit = "NS",time = "YEAR",outcome = "lg",treatment = "s.treat")
## SDID Estimate
r0<-synthdid_estimate(Y=panelp$Y,N0 = panelp$N0,T0 = panelp$T0)
plot(r0,overlay=1)+theme(legend.position = "bottom",
legend.direction = "horizontal",
legend.box.background = element_blank())+
ylab("RENT 0")+xlab("Year")
## SC Estimate
sc0<-sc_estimate(Y=panelp$Y,N0 = panelp$N0,T0 = panelp$T0)
plot(sc0)+theme(legend.position = "bottom",
legend.direction = "horizontal",
legend.box.background = element_blank())+
ylab("RENT 0")+xlab("Year")
The functions to be used in calculating the unit and time weights were created.
### Unit and Time Weights
mc_estimate = function(Y, N0, T0) {
N1=nrow(Y)-N0
T1=ncol(Y)-T0
W <- outer(c(rep(0,N0),rep(1,N1)),c(rep(0,T0),rep(1,T1)))
mc_pred <- mcnnm_cv(Y, 1-W, num_lam_L = 20)
mc_fit <- mc_pred$L + outer(mc_pred$u, mc_pred$v, '+')
mc_est <- sum(W*(Y-mc_fit))/sum(W)
mc_est
}
mc_placebo_se = function(Y, N0, T0, replications=200) {
N1 = nrow(Y) - N0
theta = function(ind) { mc_estimate(Y[ind,], length(ind)-N1, T0) }
sqrt((replications-1)/replications) * sd(replicate(replications, theta(sample(1:N0))))
}
difp_estimate = function(Y, N0, T0) {
synthdid_estimate(Y, N0, T0, weights=list(lambda=rep(1/T0, T0)), eta.omega=1e-6)
}
sc_estimate_reg = function(Y, N0, T0) {
sc_estimate(Y, N0, T0, eta.omega=((nrow(Y)-N0)*(ncol(Y)-T0))^(1/4))
}
difp_estimate_reg = function(Y, N0, T0) {
synthdid_estimate(Y, N0, T0, weights=list(lambda=rep(1/T0, T0)))
}
estimators <- list(did = did_estimate,
sc = sc_estimate,
sdid = synthdid_estimate,
difp = difp_estimate,
mc = mc_estimate,
sc_reg = sc_estimate_reg,
difp_reg = difp_estimate_reg)
##
estimates <- lapply(estimators, function(estimator) {
estimator(panelp$Y, panelp$N0, panelp$T0)
})
unit.weights <- synthdid_controls(estimates[1:3], weight.type='omega', mass=1)
time.weights <- synthdid_controls(estimates[1:3], weight.type='lambda', mass=1)
unit.df <- as.data.frame(round(unit.weights[rev(1:nrow(unit.weights)), ], 3)) %>%
rownames_to_column(var = "NS")
time.df <- as.data.frame(round(time.weights, 3)) %>%
rownames_to_column(var = "NS")
head(unit.df, 10)
## NS did sc sdid
## 1 abileneTX 0.006 0 0.000
## 2 akronOH 0.006 0 0.020
## 3 albanyGA 0.006 0 0.003
## 4 albuquerqueNM 0.006 0 0.000
## 5 altoonaPA 0.006 0 0.000
## 6 amarilloTX 0.006 0 0.008
## 7 anchorageAK 0.006 0 0.000
## 8 annistonAL 0.006 0 0.002
## 9 annMI 0.006 0 0.005
## 10 ashevilleNC 0.006 0 0.000
MSA_code <- Ballance.RENT %>%
distinct(NS, MSA.Code)
# Adding MSA code to our unit weights data
unit.df1 <- inner_join(MSA_code, unit.df, by = "NS")
map_df <- unit.df1 %>%
mutate(sc_mp = ifelse(sc > 0, 1, 0),
sdid_mp = ifelse(sdid > 0, 1, 0))
# Loading the shape data for each MSA to be used in Mapping
land_shapefile <- "/Users/owad/Downloads/tl_2019_us_cbsa/tl_2019_us_cbsa.shp"
land_areas <- st_read(land_shapefile) %>%
rename(MSA.Code = CBSAFP) %>%
mutate(MSA.Code = as.numeric(MSA.Code))
## Reading layer `tl_2019_us_cbsa' from data source
## `/Users/owad/Downloads/tl_2019_us_cbsa/tl_2019_us_cbsa.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 938 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -178.4436 ymin: 17.83151 xmax: -65.42271 ymax: 65.45352
## Geodetic CRS: NAD83
# Merging with weights data
msa_area <- inner_join(map_df, land_areas, by = "MSA.Code")
map_sdid <- msa_area %>%
filter(sdid_mp == 1)
map_sc <- msa_area %>%
filter(sc_mp == 1)
usa_map <- map_data("state")
usa_plot <- ggplot() +
geom_polygon(data = usa_map, aes(x = long, y = lat, group = group), fill = "white", color = "black") +
labs(title = "Control MSAs") +
theme_map() +
theme(
plot.title = element_text(size = 16, hjust = 0.5),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(c(1, 1, 1, 1), "cm")
)
# Plot the MSA points on the USA map for SDID == 1
usa_plot +
geom_sf(data = map_sdid,
aes(geometry = geometry), fill = "lightblue", color = "black") +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
plot.margin = unit(c(1, 1, 1, 1), "cm")
)
usa_plot +
geom_sf(data = map_sc,
aes(geometry = geometry), fill = "lightgreen", color = "black") +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
plot.margin = unit(c(1, 1, 1, 1), "cm")
)