Adding state control variables. I extracted some data from University of Kentucky Center for Poverty Research national Welfare Database (UKCPR_National_Data_Final_Update). From the dataset, please first identify three important socioeconomic and/or demographic factors at the state level, and then merge these variables to the main eitcR.dta file.
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
data<-read_dta('C:/Users/PCMcC/Documents/Causal Inference/Homeworks/Assignment 3/eitcR.dta')
data2<-read.csv("C:/Users/PCMcC/Documents/Causal Inference/Homeworks/Assignment 3/UKCPR_National_Welfare_Data_Final_Update_20180116_0.csv")
The following variables are selected from “data2” for our analysis: unemployment rate, minimum wage and povert rate
attach(data2)
vars<-cbind(state, year, state_name, unemployment_rate, poverty_rate, state_min_wage)
data3<-data.frame(vars)
I include the statement all.x = TRUE in order to avoid duplicates and maintain my original sample size of N=1887.
merge_data<-merge(data,data3, by = c("state", "year"),all.x = TRUE)
#rm(data2, data3)
Evaluation of the merge shows that the number of observations is consistent for all columns. I filter complete cases in order to eliminate states in which I was unable to get information leaving me with a sample of 5,809 individuals.
library(stargazer)
df <- data.frame(merge_data)
df<-df %>% filter(complete.cases(.))
cols <- c('children', 'nonwhite', "earn", "age", "ed", "work", "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)
##
## =============================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## ---------------------------------------------------------------------------------------------
## children 5,809 0 0 1 2 9 1.189 1.404
## nonwhite 5,809 0 0 1 1 1 0.542 0.498
## earn 5,809 0.000 0.000 1,624.494 14,439.450 366,095.500 10,280.960 17,760.500
## age 5,809 20 27 35 44 54 35.514 10.118
## ed 5,809 0 7 10 11 11 9.019 2.462
## work 5,809 0 0 0 1 1 0.469 0.499
## unearn 5,809 0 0.1 4.3 7.4 134 5.472 7.307
## unemployment_rate 5,809 2.900 5.000 6.100 6.900 9.200 6.100 1.356
## poverty_rate 5,809 7.500 10.100 11.800 14.400 25.500 12.405 3.018
## state_min_wage 5,809 1.600 4.250 4.250 4.250 5.250 4.162 0.552
## ---------------------------------------------------------------------------------------------
library(dplyr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
names(df)
## [1] "state" "year" "children"
## [4] "nonwhite" "finc" "earn"
## [7] "age" "ed" "work"
## [10] "unearn" "state_name" "unemployment_rate"
## [13] "poverty_rate" "state_min_wage"
#Number of children Variables
df$withchildren<-ifelse(df$children>=1,1,0)
df$onechild<-ifelse(df$children==1,1,0)
df$twochildrenormore<-ifelse(df$children>=2,1,0)
df$nochild<-ifelse(df$children==0,1,0)
df$childrenx <- Recode(df$children, recodes = "1:9='withchildren'; 0='nochilren'; else=NA")
df$childrenx<-as.factor(df$childrenx)
#Employment
df$rec_work<-ifelse(df$work==1 & df$earn >=1, 1,0)
df$rec_nowork<-ifelse(df$work==0 & df$earn<=0,1,0)
#Pre-Treatment and Expansion Variables
df$expansion<-ifelse(df$year>=1993,1,0)
df$pretreat<-ifelse(df$year<=1992,1,0)
ch0 <- subset(df, children==0)
cols <- c('nonwhite', "earn", "age", "ed", "work", "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(ch0[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))
##
## =====================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## -------------------------------------------------------------------------------------
## nonwhite 2,564 0 0 0 1 1 0.445 0.497
## earn 2,564 0 0 7,067.5 20,003.1 176,050 13,925.040 19,645.530
## age 2,564 20 29 41 49 54 39.068 10.937
## ed 2,564 0 7 10 11 11 8.670 2.789
## work 2,564 0 0 1 1 1 0.549 0.498
## unearn 2,564 0.000 0.000 2.222 6.874 134.058 5.131 8.629
## unemployment_rate 2,564 2.900 5.000 6.100 7.000 9.200 6.098 1.395
## poverty_rate 2,564 7.500 10.200 11.800 14.500 25.500 12.536 3.071
## state_min_wage 2,564 1.600 4.250 4.250 4.250 5.250 4.135 0.567
## -------------------------------------------------------------------------------------
ch1<- subset(df, children==1)
cols <- c('nonwhite', "earn", "age", "ed", "work", "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(ch1[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))
##
## =====================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## -------------------------------------------------------------------------------------
## nonwhite 1,249 0 0 1 1 1 0.525 0.500
## earn 1,249 0 0 2,159.0 14,176.4 366,095 10,300.080 19,575.000
## age 1,249 20 24 34 42 54 33.862 9.862
## ed 1,249 0 7 10 11 11 9.219 2.176
## work 1,249 0 0 0 1 1 0.486 0.500
## unearn 1,249 0.000 0.295 3.684 6.383 92.926 4.812 6.006
## unemployment_rate 1,249 2.900 5.000 6.100 6.900 9.200 6.100 1.332
## poverty_rate 1,249 7.500 10.100 11.800 14.400 25.500 12.429 3.048
## state_min_wage 1,249 1.600 4.250 4.250 4.250 5.250 4.131 0.593
## -------------------------------------------------------------------------------------
ch2<- subset(df, children>=2)
cols <- c('nonwhite', "earn", "age", "ed", "work", "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(ch2[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))
##
## ========================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## ----------------------------------------------------------------------------------------
## nonwhite 1,996 0 0 1 1 1 0.675 0.468
## earn 1,996 0.000 0.000 0.000 6,572.419 118,847.800 5,587.935 11,986.770
## age 1,996 20 26 32 37 54 31.983 7.288
## ed 1,996 0 9 10 11 11 9.342 2.100
## work 1,996 0 0 0 1 1 0.357 0.479
## unearn 1,996 0 2.7 5.6 8.2 81 6.324 6.006
## unemployment_rate 1,996 2.900 5.000 6.100 6.900 9.200 6.103 1.320
## poverty_rate 1,996 7.500 10.100 11.200 14.400 25.500 12.223 2.923
## state_min_wage 1,996 1.600 4.250 4.250 4.250 5.250 4.216 0.499
## ----------------------------------------------------------------------------------------
#Women employed
em1 <- subset(df, work==1 & earn>=1)
cols <- c('nonwhite', "earn", "age", "ed", "work", "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(em1[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))
##
## ===============================================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## -----------------------------------------------------------------------------------------------
## nonwhite 2,726 0 0 0 1 1 0.451 0.498
## earn 2,726 1.023 5,719.886 12,995.950 23,325.260 176,050.200 17,946.030 18,564.300
## age 2,726 20 27 36 45 54 36.050 10.203
## ed 2,726 0 7.5 10 11 11 9.241 2.282
## work 2,726 1 1 1 1 1 1.000 0.000
## unearn 2,726 0 0 0.6 4.9 108 3.469 6.371
## unemployment_rate 2,726 2.900 5.000 6.100 6.900 9.200 6.034 1.409
## poverty_rate 2,726 7.500 10.200 11.800 14.500 25.500 12.537 3.141
## state_min_wage 2,726 1.600 4.250 4.250 4.250 5.250 4.144 0.586
## -----------------------------------------------------------------------------------------------
#women Unemployed
em0<- subset(df, work==0 & earn==0)
cols <- c('nonwhite', "earn", "age", "ed", "work", "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(em0[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))
##
## ==============================================================================
## Statistic N Min Pctl(25) Median Pctl(75) Max Mean St. Dev.
## ------------------------------------------------------------------------------
## nonwhite 2,547 0 0 1 1 1 0.631 0.483
## earn 2,547 0 0 0 0 0 0.000 0.000
## age 2,547 20 26 33 43 54 34.672 9.849
## ed 2,547 0 7 10 11 11 8.902 2.521
## work 2,547 0 0 0 0 0 0.000 0.000
## unearn 2,547 0.000 3.672 5.967 8.485 134.058 6.914 6.756
## unemployment_rate 2,547 2.900 5.000 6.100 6.900 9.200 6.154 1.301
## poverty_rate 2,547 7.500 10.100 11.800 14.200 25.500 12.298 2.858
## state_min_wage 2,547 1.600 4.250 4.250 4.250 5.250 4.172 0.519
## ------------------------------------------------------------------------------
plot<-aggregate(df$work, by=list(df$year, df$childrenx), FUN=sum)
#install.packages("ggplot2")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
ggplot(plot, aes(fill=Group.2, y=x/5809*100 , x=Group.1)) +
geom_bar(position="dodge", stat="identity") + scale_fill_manual(values=c( "#E69F00", "#56B4E9"))+ theme_minimal() + scale_y_continuous(limits=c(0,5)) + scale_x_continuous(breaks=seq(1991, 1996, 1)) +
ggtitle("Annual Employment Rates by Year (1991 to 1996) for Single Women by Treatment") +
theme(plot.title = element_text(face="bold", size=10, hjust=0)) + theme(legend.title=element_blank()) + ylab("Percentage Rate")+xlab("Year") + geom_text(aes(label=round(x/5809*100,2) ),position=position_dodge(0.9))
## Unconditional difference-in-difference estimates
of the effect of the 1993 EITC expansion on employment of single women. Your table should be parallel to the format found in a typical differences-in-differences paper. Calculate estimates with all women with children as the treatment (single women with no children as the control), women with children as the control. Discuss these results.
#Create the interaction
did<-df$withchildren * df$expansion
#DID Estimator
didreg = lm(work ~ nochild + expansion + did, data = df)
summary(didreg)
##
## Call:
## lm(formula = work ~ nochild + expansion + did, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5672 -0.4117 -0.4032 0.4624 0.5968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.41167 0.01397 29.469 < 2e-16 ***
## nochild 0.15548 0.02115 7.351 2.24e-13 ***
## expansion -0.02955 0.02013 -1.468 0.142
## did 0.02109 0.02688 0.785 0.433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4941 on 5805 degrees of freedom
## Multiple R-squared: 0.02044, Adjusted R-squared: 0.01994
## F-statistic: 40.38 on 3 and 5805 DF, p-value: < 2.2e-16
reg1 <- lm(work ~ withchildren + did + scale(age) + nonwhite + ed, data = df)
summary(reg1)
##
## Call:
## lm(formula = work ~ withchildren + did + scale(age) + nonwhite +
## ed, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6872 -0.4519 -0.2966 0.4858 0.8614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.429474 0.026100 16.455 < 2e-16 ***
## withchildren -0.120437 0.017453 -6.901 5.73e-12 ***
## did 0.001549 0.017551 0.088 0.9297
## scale(age) 0.016070 0.006803 2.362 0.0182 *
## nonwhite -0.148992 0.013040 -11.426 < 2e-16 ***
## ed 0.020761 0.002643 7.854 4.76e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.486 on 5803 degrees of freedom
## Multiple R-squared: 0.05259, Adjusted R-squared: 0.05178
## F-statistic: 64.43 on 5 and 5803 DF, p-value: < 2.2e-16
reg2 <- lm(work ~ (did + scale(age) + nonwhite + ed +unemployment_rate + poverty_rate + state_min_wage)*withchildren, data = df)
summary(reg2)
##
## Call:
## lm(formula = work ~ (did + scale(age) + nonwhite + ed + unemployment_rate +
## poverty_rate + state_min_wage) * withchildren, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7146 -0.4393 -0.2748 0.4771 0.8404
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4859358 0.1021325 4.758 2.00e-06 ***
## did -0.0403472 0.0201922 -1.998 0.045746 *
## scale(age) -0.0059237 0.0089029 -0.665 0.505840
## nonwhite -0.1377841 0.0195321 -7.054 1.94e-12 ***
## ed 0.0261223 0.0034443 7.584 3.88e-14 ***
## unemployment_rate -0.0050752 0.0071542 -0.709 0.478100
## poverty_rate 0.0007088 0.0033598 0.211 0.832923
## state_min_wage -0.0189002 0.0174667 -1.082 0.279265
## withchildren 0.0260217 0.1428319 0.182 0.855445
## did:withchildren NA NA NA NA
## scale(age):withchildren 0.0516190 0.0138346 3.731 0.000192 ***
## nonwhite:withchildren -0.0112088 0.0265281 -0.423 0.672658
## ed:withchildren -0.0106774 0.0053962 -1.979 0.047897 *
## unemployment_rate:withchildren -0.0314584 0.0103581 -3.037 0.002400 **
## poverty_rate:withchildren 0.0064888 0.0045983 1.411 0.158259
## state_min_wage:withchildren 0.0224460 0.0243120 0.923 0.355917
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4844 on 5794 degrees of freedom
## Multiple R-squared: 0.0603, Adjusted R-squared: 0.05803
## F-statistic: 26.56 on 14 and 5794 DF, p-value: < 2.2e-16
#DID PLACEBO Estimator
#Create the interaction
did2<-df$withchildren * df$pretreat
#DID Estimator
didreg2 = lm(work ~ withchildren + pretreat + did2, data = df)
summary(didreg2)
##
## Call:
## lm(formula = work ~ withchildren + pretreat + did2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5672 -0.4117 -0.4032 0.4624 0.5968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.53759 0.01237 43.467 < 2e-16 ***
## withchildren -0.13438 0.01660 -8.098 6.76e-16 ***
## pretreat 0.02955 0.02013 1.468 0.142
## did2 -0.02109 0.02688 -0.785 0.433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4941 on 5805 degrees of freedom
## Multiple R-squared: 0.02044, Adjusted R-squared: 0.01994
## F-statistic: 40.38 on 3 and 5805 DF, p-value: < 2.2e-16
reg3 <- lm(work ~ did2 +scale (age) + nonwhite + ed, data = df)
summary(reg3)
##
## Call:
## lm(formula = work ~ did2 + scale(age) + nonwhite + ed, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6668 -0.4394 -0.3063 0.4858 0.8706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.399261 0.025935 15.395 < 2e-16 ***
## did2 -0.061853 0.015796 -3.916 9.12e-05 ***
## scale(age) 0.030115 0.006589 4.571 4.96e-06 ***
## nonwhite -0.167803 0.012875 -13.033 < 2e-16 ***
## ed 0.019316 0.002650 7.289 3.53e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4885 on 5804 degrees of freedom
## Multiple R-squared: 0.04286, Adjusted R-squared: 0.0422
## F-statistic: 64.98 on 4 and 5804 DF, p-value: < 2.2e-16
reg4 <- lm(work ~ (did2 + scale(age) + nonwhite + ed +unemployment_rate + poverty_rate + state_min_wage)*withchildren, data = df)
summary(reg4)
##
## Call:
## lm(formula = work ~ (did2 + scale(age) + nonwhite + ed + unemployment_rate +
## poverty_rate + state_min_wage) * withchildren, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7146 -0.4393 -0.2748 0.4771 0.8404
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4859358 0.1021325 4.758 2.00e-06 ***
## did2 0.0403472 0.0201922 1.998 0.045746 *
## scale(age) -0.0059237 0.0089029 -0.665 0.505840
## nonwhite -0.1377841 0.0195321 -7.054 1.94e-12 ***
## ed 0.0261223 0.0034443 7.584 3.88e-14 ***
## unemployment_rate -0.0050752 0.0071542 -0.709 0.478100
## poverty_rate 0.0007088 0.0033598 0.211 0.832923
## state_min_wage -0.0189002 0.0174667 -1.082 0.279265
## withchildren -0.0143255 0.1425014 -0.101 0.919928
## did2:withchildren NA NA NA NA
## scale(age):withchildren 0.0516190 0.0138346 3.731 0.000192 ***
## nonwhite:withchildren -0.0112088 0.0265281 -0.423 0.672658
## ed:withchildren -0.0106774 0.0053962 -1.979 0.047897 *
## unemployment_rate:withchildren -0.0314584 0.0103581 -3.037 0.002400 **
## poverty_rate:withchildren 0.0064888 0.0045983 1.411 0.158259
## state_min_wage:withchildren 0.0224460 0.0243120 0.923 0.355917
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4844 on 5794 degrees of freedom
## Multiple R-squared: 0.0603, Adjusted R-squared: 0.05803
## F-statistic: 26.56 on 14 and 5794 DF, p-value: < 2.2e-16