##Load and merge data
setwd("/Users/asiyavalidova/Dropbox/Demography/Causal inference/week8")
#3 state level variables: poverty rate, state minimum wage, unemployment rate
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(tidycensus)
eitc<-read_dta("eitcRR.dta")
head(eitc)
ukcpr <- read.csv("UKCPR.csv")
head(ukcpr)
ukcpr<-filter(ukcpr,1991<=ukcpr$year & ukcpr$year<=1996)
ukcpr<-ukcpr%>%
select(state_name,year,Unemployment.rate,Poverty.Rate,State.Minimum.Wage)
word_codes<-read.table("word_codes.txt",sep=";",header=TRUE)
word_codes$State<-" "
for(i in 1:61)
word_codes$State[i]<-strsplit(word_codes$Label, ", ")[[i]][1]
word_codes<-subset(word_codes,select=-c(Label))
eitc<-merge(eitc,word_codes,by.x="state",by.y="Code")
post_codes<-read.table("postal_codes.txt",sep=";",header=TRUE)
ukcpr<-merge(ukcpr,post_codes,by.x="state_name",by.y="Postal.Code")
ukcpr<-subset(ukcpr,select=-c(state_name))
eitc<-merge(eitc,ukcpr,by.x=c("year","State"),by.y=c("year","state"),all.x=TRUE,all.y=FALSE)
head(eitc)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
eitc<-eitc%>%
mutate(
children1=Recode(children,recodes="0='no children';1='one child';2:9='two or more children'", as.factor=T),
employed=ifelse(earn>0,1,0)
)
table1(~age+ed+finc+earn+unearn+Unemployment.rate+Poverty.Rate+State.Minimum.Wage|children1,data=eitc)
| no children (N=5927) |
one child (N=3058) |
two or more children (N=4761) |
Overall (N=13746) |
|
|---|---|---|---|---|
| age | ||||
| Mean (SD) | 38.5 (11.0) | 33.8 (9.90) | 32.0 (7.63) | 35.2 (10.2) |
| Median [Min, Max] | 40.0 [20.0, 54.0] | 33.0 [20.0, 54.0] | 31.0 [20.0, 54.0] | 34.0 [20.0, 54.0] |
| ed | ||||
| Mean (SD) | 8.55 (2.89) | 8.99 (2.40) | 9.01 (2.42) | 8.81 (2.64) |
| Median [Min, Max] | 10.0 [0, 11.0] | 10.0 [0, 11.0] | 10.0 [0, 11.0] | 10.0 [0, 11.0] |
| finc | ||||
| Mean (SD) | 18600 (23000) | 13900 (18600) | 12000 (13600) | 15300 (19400) |
| Median [Min, Max] | 11900 [0, 576000] | 8900 [0, 411000] | 8240 [0, 231000] | 9640 [0, 576000] |
| earn | ||||
| Mean (SD) | 13800 (21300) | 9930 (17500) | 6610 (12900) | 10400 (18200) |
| Median [Min, Max] | 7660 [0, 538000] | 3660 [0, 366000] | 151 [0, 162000] | 3330 [0, 538000] |
| unearn | ||||
| Mean (SD) | 4.80 (8.50) | 4.01 (5.74) | 5.37 (5.90) | 4.82 (7.12) |
| Median [Min, Max] | 1.25 [0, 134] | 2.44 [0, 103] | 4.47 [0, 81.3] | 2.97 [0, 134] |
| Unemployment.rate | ||||
| Mean (SD) | 6.66 (1.47) | 6.80 (1.44) | 6.86 (1.43) | 6.76 (1.45) |
| Median [Min, Max] | 6.60 [2.60, 11.3] | 6.90 [2.60, 11.3] | 6.90 [2.60, 11.3] | 6.90 [2.60, 11.3] |
| Poverty.Rate | ||||
| Mean (SD) | 14.9 (3.69) | 15.2 (3.62) | 15.4 (3.69) | 15.1 (3.68) |
| Median [Min, Max] | 15.4 [5.30, 26.4] | 15.7 [5.30, 26.4] | 15.7 [5.30, 26.4] | 15.7 [5.30, 26.4] |
| State.Minimum.Wage | ||||
| Mean (SD) | 4.16 (0.476) | 4.18 (0.464) | 4.18 (0.451) | 4.17 (0.465) |
| Median [Min, Max] | 4.25 [1.60, 5.25] | 4.25 [1.60, 5.25] | 4.25 [1.60, 5.25] | 4.25 [1.60, 5.25] |
## Conditional earning
eitc<-eitc%>%
mutate(
cond_earning=ifelse(earn>0,earn,NA)
)
# mean
mean_cond<-tapply(eitc$cond_earning,eitc$children1,function(x) mean(x,na.rm=T))
mean_cond
## no children one child two or more children
## 20202.39 15657.90 12989.73
eitc<-eitc%>%
mutate(
treatment=ifelse(children>0,1,0),
post=ifelse(year>=1993,1,0)
)
head(eitc)
all<-eitc%>%
group_by(year, State,children1 )%>%
summarize(
all=n()
)
## `summarise()` regrouping output by 'year', 'State' (override with `.groups` argument)
employed<-eitc%>%
group_by(year, State,children1,employed )%>%
summarize(
empl=n()
)
## `summarise()` regrouping output by 'year', 'State', 'children1' (override with `.groups` argument)
employed<-merge(employed,all,by.x=c("year","State","children1"),by.y=c("year","State","children1"),all.x=TRUE,all.y=FALSE)
employed$employment_rate<-employed$empl*100/employed$all
employed<-employed%>%
filter(employed==1)
employed<-employed%>%
select(year,State,children1,employment_rate)
eitc<-merge(eitc,employed,by.x=c("year","State","children1"),by.y=c("year","State","children1"),all.x=TRUE,all.y=FALSE)
head(eitc)
## Graph
eitc_control<-filter(eitc,eitc$treatment==0)
eitc_treatment<-filter(eitc,eitc$treatment==1)
mean_annual_control<-tapply(eitc_control$employment_rate,eitc_control$year,function(x) mean(x,na.rm=T))
mean_annual_treatment<-tapply(eitc_treatment$employment_rate,eitc_treatment$year,function(x) mean(x,na.rm=T))
years<-1991:1996
plot(mean_annual_control~years,main="Employment rates",type="b",xlab="Years",ylim=c(50,80),ylab="Rates",lwd=2,col="blue")
lines(mean_annual_treatment~years,type="b",lwd=2, col="red")
legend("topright",
c("Control","Treatment"),
fill=c("blue","red"))
## The graph demonstrates the effects of the intervention, we can see raise in employment for the treatment group (single women with children), but not for the control group.
fit<-lm(employment_rate~treatment+post+treatment*post,data=eitc)
summary(fit)
##
## Call:
## lm(formula = employment_rate ~ treatment + post + treatment *
## post, data = eitc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.345 -8.637 0.141 9.128 46.294
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.7966 0.3285 209.443 <2e-16 ***
## treatment -15.0910 0.4332 -34.839 <2e-16 ***
## post -0.7850 0.4115 -1.908 0.0564 .
## treatment:post 4.6178 0.5445 8.481 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.21 on 13683 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.1426, Adjusted R-squared: 0.1424
## F-statistic: 758.5 on 3 and 13683 DF, p-value: < 2.2e-16
mean_na<-function(x)
{
return (mean(x,na.rm=TRUE))
}
est<-eitc%>%
group_by(treatment,post)%>%
summarize(
rate=mean_na(employment_rate)
)
## `summarise()` regrouping output by 'treatment' (override with `.groups` argument)
est
#DiD estimate
#57.5-53.7-(68-68.8)=4.6
# Equals the coefficient treatment:post in the regression