##Load and merge data

setwd("/Users/asiyavalidova/Dropbox/Demography/Causal inference/week8")
#3 state level variables: poverty rate, state minimum wage, unemployment rate
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(haven)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(tidycensus)
eitc<-read_dta("eitcRR.dta")
head(eitc)
ukcpr <- read.csv("UKCPR.csv")
head(ukcpr)
ukcpr<-filter(ukcpr,1991<=ukcpr$year & ukcpr$year<=1996)
ukcpr<-ukcpr%>%
    select(state_name,year,Unemployment.rate,Poverty.Rate,State.Minimum.Wage)
word_codes<-read.table("word_codes.txt",sep=";",header=TRUE)
word_codes$State<-" "
for(i in 1:61)
  word_codes$State[i]<-strsplit(word_codes$Label, ", ")[[i]][1]
word_codes<-subset(word_codes,select=-c(Label))  
eitc<-merge(eitc,word_codes,by.x="state",by.y="Code")
post_codes<-read.table("postal_codes.txt",sep=";",header=TRUE)
ukcpr<-merge(ukcpr,post_codes,by.x="state_name",by.y="Postal.Code")
ukcpr<-subset(ukcpr,select=-c(state_name))
eitc<-merge(eitc,ukcpr,by.x=c("year","State"),by.y=c("year","state"),all.x=TRUE,all.y=FALSE)
head(eitc)

Sample means

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
eitc<-eitc%>%
        mutate(
                 children1=Recode(children,recodes="0='no children';1='one child';2:9='two or more children'", as.factor=T),
                 employed=ifelse(earn>0,1,0)
        )
table1(~age+ed+finc+earn+unearn+Unemployment.rate+Poverty.Rate+State.Minimum.Wage|children1,data=eitc)
no children
(N=5927)
one child
(N=3058)
two or more children
(N=4761)
Overall
(N=13746)
age
Mean (SD) 38.5 (11.0) 33.8 (9.90) 32.0 (7.63) 35.2 (10.2)
Median [Min, Max] 40.0 [20.0, 54.0] 33.0 [20.0, 54.0] 31.0 [20.0, 54.0] 34.0 [20.0, 54.0]
ed
Mean (SD) 8.55 (2.89) 8.99 (2.40) 9.01 (2.42) 8.81 (2.64)
Median [Min, Max] 10.0 [0, 11.0] 10.0 [0, 11.0] 10.0 [0, 11.0] 10.0 [0, 11.0]
finc
Mean (SD) 18600 (23000) 13900 (18600) 12000 (13600) 15300 (19400)
Median [Min, Max] 11900 [0, 576000] 8900 [0, 411000] 8240 [0, 231000] 9640 [0, 576000]
earn
Mean (SD) 13800 (21300) 9930 (17500) 6610 (12900) 10400 (18200)
Median [Min, Max] 7660 [0, 538000] 3660 [0, 366000] 151 [0, 162000] 3330 [0, 538000]
unearn
Mean (SD) 4.80 (8.50) 4.01 (5.74) 5.37 (5.90) 4.82 (7.12)
Median [Min, Max] 1.25 [0, 134] 2.44 [0, 103] 4.47 [0, 81.3] 2.97 [0, 134]
Unemployment.rate
Mean (SD) 6.66 (1.47) 6.80 (1.44) 6.86 (1.43) 6.76 (1.45)
Median [Min, Max] 6.60 [2.60, 11.3] 6.90 [2.60, 11.3] 6.90 [2.60, 11.3] 6.90 [2.60, 11.3]
Poverty.Rate
Mean (SD) 14.9 (3.69) 15.2 (3.62) 15.4 (3.69) 15.1 (3.68)
Median [Min, Max] 15.4 [5.30, 26.4] 15.7 [5.30, 26.4] 15.7 [5.30, 26.4] 15.7 [5.30, 26.4]
State.Minimum.Wage
Mean (SD) 4.16 (0.476) 4.18 (0.464) 4.18 (0.451) 4.17 (0.465)
Median [Min, Max] 4.25 [1.60, 5.25] 4.25 [1.60, 5.25] 4.25 [1.60, 5.25] 4.25 [1.60, 5.25]
## Conditional earning
eitc<-eitc%>%
        mutate(
                 cond_earning=ifelse(earn>0,earn,NA)
              )
# mean
mean_cond<-tapply(eitc$cond_earning,eitc$children1,function(x) mean(x,na.rm=T))
mean_cond
##          no children            one child two or more children 
##             20202.39             15657.90             12989.73

Variables “treatment” and “post”

eitc<-eitc%>%
      mutate(
                 treatment=ifelse(children>0,1,0),
                 post=ifelse(year>=1993,1,0)
            )
head(eitc)

Defining variable employment_rate

all<-eitc%>%
     group_by(year, State,children1 )%>%
     summarize(
                all=n()
              )
## `summarise()` regrouping output by 'year', 'State' (override with `.groups` argument)
employed<-eitc%>%
     group_by(year, State,children1,employed )%>%
     summarize(
                empl=n()
              )
## `summarise()` regrouping output by 'year', 'State', 'children1' (override with `.groups` argument)
employed<-merge(employed,all,by.x=c("year","State","children1"),by.y=c("year","State","children1"),all.x=TRUE,all.y=FALSE)
employed$employment_rate<-employed$empl*100/employed$all
employed<-employed%>%
     filter(employed==1)
employed<-employed%>%
    select(year,State,children1,employment_rate)
eitc<-merge(eitc,employed,by.x=c("year","State","children1"),by.y=c("year","State","children1"),all.x=TRUE,all.y=FALSE) 
head(eitc)
## Graph
eitc_control<-filter(eitc,eitc$treatment==0)
eitc_treatment<-filter(eitc,eitc$treatment==1)
mean_annual_control<-tapply(eitc_control$employment_rate,eitc_control$year,function(x) mean(x,na.rm=T))
mean_annual_treatment<-tapply(eitc_treatment$employment_rate,eitc_treatment$year,function(x) mean(x,na.rm=T))
years<-1991:1996
plot(mean_annual_control~years,main="Employment rates",type="b",xlab="Years",ylim=c(50,80),ylab="Rates",lwd=2,col="blue")
lines(mean_annual_treatment~years,type="b",lwd=2, col="red")
legend("topright",
c("Control","Treatment"),
fill=c("blue","red"))

## The graph demonstrates the effects of the intervention, we can see raise in employment for the treatment group (single women with children), but not for the control group. 

Regression

fit<-lm(employment_rate~treatment+post+treatment*post,data=eitc)
summary(fit)
## 
## Call:
## lm(formula = employment_rate ~ treatment + post + treatment * 
##     post, data = eitc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.345  -8.637   0.141   9.128  46.294 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     68.7966     0.3285 209.443   <2e-16 ***
## treatment      -15.0910     0.4332 -34.839   <2e-16 ***
## post            -0.7850     0.4115  -1.908   0.0564 .  
## treatment:post   4.6178     0.5445   8.481   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.21 on 13683 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.1426, Adjusted R-squared:  0.1424 
## F-statistic: 758.5 on 3 and 13683 DF,  p-value: < 2.2e-16
mean_na<-function(x)
{
   return (mean(x,na.rm=TRUE))
}
est<-eitc%>%
   group_by(treatment,post)%>%
       summarize(
                  rate=mean_na(employment_rate)
                )  
## `summarise()` regrouping output by 'treatment' (override with `.groups` argument)
est
#DiD estimate
#57.5-53.7-(68-68.8)=4.6 
# Equals the coefficient treatment:post in the regression