Homework3

##Load and merge data

setwd("/Users/asiyavalidova/Dropbox/Demography/Causal inference/week8")
#3 state level variables: poverty rate, state minimum wage, unemployment rate
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(haven)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(tidycensus)
eitc<-read_dta("eitcRR.dta")
head(eitc)

ukcpr <- read.csv("UKCPR.csv")
head(ukcpr)

ukcpr<-filter(ukcpr,1991<=ukcpr$year & ukcpr$year<=1996)
ukcpr<-ukcpr%>%
    select(state_name,year,Unemployment.rate,Poverty.Rate,State.Minimum.Wage)
word_codes<-read.table("word_codes.txt",sep=";",header=TRUE)
word_codes$State<-" "
for(i in 1:61)
  word_codes$State[i]<-strsplit(word_codes$Label, ", ")[[i]][1]
word_codes<-subset(word_codes,select=-c(Label))  
eitc<-merge(eitc,word_codes,by.x="state",by.y="Code")
post_codes<-read.table("postal_codes.txt",sep=";",header=TRUE)
ukcpr<-merge(ukcpr,post_codes,by.x="state_name",by.y="Postal.Code")
ukcpr<-subset(ukcpr,select=-c(state_name))
eitc<-merge(eitc,ukcpr,by.x=c("year","State"),by.y=c("year","state"),all.x=TRUE,all.y=FALSE)
head(eitc)

Sample means

library(table1)

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

eitc<-eitc%>%
        mutate(
                 children1=Recode(children,recodes="0='no children';1='one child';2:9='two or more children'", as.factor=T),
                 employed=ifelse(earn>0,1,0)
        )
table1(~age+ed+finc+earn+unearn+Unemployment.rate+Poverty.Rate+State.Minimum.Wage|children1,data=eitc)

	no children (N=5927)	one child (N=3058)	two or more children (N=4761)	Overall (N=13746)
age
Mean (SD)	38.5 (11.0)	33.8 (9.90)	32.0 (7.63)	35.2 (10.2)
Median [Min, Max]	40.0 [20.0, 54.0]	33.0 [20.0, 54.0]	31.0 [20.0, 54.0]	34.0 [20.0, 54.0]
ed
Mean (SD)	8.55 (2.89)	8.99 (2.40)	9.01 (2.42)	8.81 (2.64)
Median [Min, Max]	10.0 [0, 11.0]	10.0 [0, 11.0]	10.0 [0, 11.0]	10.0 [0, 11.0]
finc
Mean (SD)	18600 (23000)	13900 (18600)	12000 (13600)	15300 (19400)
Median [Min, Max]	11900 [0, 576000]	8900 [0, 411000]	8240 [0, 231000]	9640 [0, 576000]
earn
Mean (SD)	13800 (21300)	9930 (17500)	6610 (12900)	10400 (18200)
Median [Min, Max]	7660 [0, 538000]	3660 [0, 366000]	151 [0, 162000]	3330 [0, 538000]
unearn
Mean (SD)	4.80 (8.50)	4.01 (5.74)	5.37 (5.90)	4.82 (7.12)
Median [Min, Max]	1.25 [0, 134]	2.44 [0, 103]	4.47 [0, 81.3]	2.97 [0, 134]
Unemployment.rate
Mean (SD)	6.66 (1.47)	6.80 (1.44)	6.86 (1.43)	6.76 (1.45)
Median [Min, Max]	6.60 [2.60, 11.3]	6.90 [2.60, 11.3]	6.90 [2.60, 11.3]	6.90 [2.60, 11.3]
Poverty.Rate
Mean (SD)	14.9 (3.69)	15.2 (3.62)	15.4 (3.69)	15.1 (3.68)
Median [Min, Max]	15.4 [5.30, 26.4]	15.7 [5.30, 26.4]	15.7 [5.30, 26.4]	15.7 [5.30, 26.4]
State.Minimum.Wage
Mean (SD)	4.16 (0.476)	4.18 (0.464)	4.18 (0.451)	4.17 (0.465)
Median [Min, Max]	4.25 [1.60, 5.25]	4.25 [1.60, 5.25]	4.25 [1.60, 5.25]	4.25 [1.60, 5.25]

## Conditional earning
eitc<-eitc%>%
        mutate(
                 cond_earning=ifelse(earn>0,earn,NA)
              )
# mean
mean_cond<-tapply(eitc$cond_earning,eitc$children1,function(x) mean(x,na.rm=T))
mean_cond

##          no children            one child two or more children 
##             20202.39             15657.90             12989.73

Variables “treatment” and “post”

eitc<-eitc%>%
      mutate(
                 treatment=ifelse(children>0,1,0),
                 post=ifelse(year>=1993,1,0)
            )
head(eitc)

Defining variable employment_rate

all<-eitc%>%
     group_by(year, State,children1 )%>%
     summarize(
                all=n()
              )

## `summarise()` regrouping output by 'year', 'State' (override with `.groups` argument)

employed<-eitc%>%
     group_by(year, State,children1,employed )%>%
     summarize(
                empl=n()
              )

## `summarise()` regrouping output by 'year', 'State', 'children1' (override with `.groups` argument)

employed<-merge(employed,all,by.x=c("year","State","children1"),by.y=c("year","State","children1"),all.x=TRUE,all.y=FALSE)
employed$employment_rate<-employed$empl*100/employed$all
employed<-employed%>%
     filter(employed==1)
employed<-employed%>%
    select(year,State,children1,employment_rate)
eitc<-merge(eitc,employed,by.x=c("year","State","children1"),by.y=c("year","State","children1"),all.x=TRUE,all.y=FALSE) 
head(eitc)

## Graph
eitc_control<-filter(eitc,eitc$treatment==0)
eitc_treatment<-filter(eitc,eitc$treatment==1)
mean_annual_control<-tapply(eitc_control$employment_rate,eitc_control$year,function(x) mean(x,na.rm=T))
mean_annual_treatment<-tapply(eitc_treatment$employment_rate,eitc_treatment$year,function(x) mean(x,na.rm=T))
years<-1991:1996
plot(mean_annual_control~years,main="Employment rates",type="b",xlab="Years",ylim=c(50,80),ylab="Rates",lwd=2,col="blue")
lines(mean_annual_treatment~years,type="b",lwd=2, col="red")
legend("topright",
c("Control","Treatment"),
fill=c("blue","red"))

## The graph demonstrates the effects of the intervention, we can see raise in employment for the treatment group (single women with children), but not for the control group.

Regression

fit<-lm(employment_rate~treatment+post+treatment*post,data=eitc)
summary(fit)

## 
## Call:
## lm(formula = employment_rate ~ treatment + post + treatment * 
##     post, data = eitc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.345  -8.637   0.141   9.128  46.294 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     68.7966     0.3285 209.443   <2e-16 ***
## treatment      -15.0910     0.4332 -34.839   <2e-16 ***
## post            -0.7850     0.4115  -1.908   0.0564 .  
## treatment:post   4.6178     0.5445   8.481   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.21 on 13683 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.1426, Adjusted R-squared:  0.1424 
## F-statistic: 758.5 on 3 and 13683 DF,  p-value: < 2.2e-16

mean_na<-function(x)
{
   return (mean(x,na.rm=TRUE))
}
est<-eitc%>%
   group_by(treatment,post)%>%
       summarize(
                  rate=mean_na(employment_rate)
                )

## `summarise()` regrouping output by 'treatment' (override with `.groups` argument)

est

#DiD estimate
#57.5-53.7-(68-68.8)=4.6 
# Equals the coefficient treatment:post in the regression

Homework3

Asiya

4/11/2021

Sample means

Variables “treatment” and “post”

Defining variable employment_rate

Regression