PART 2 OF ASSIGNMENT

LOADING DATA

Adding state control variables. I extracted some data from University of Kentucky Center for Poverty Research national Welfare Database (UKCPR_National_Data_Final_Update). From the dataset, please first identify three important socioeconomic and/or demographic factors at the state level, and then merge these variables to the main eitcR.dta file.

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.5.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(haven)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

data<-read_dta('C:/Users/PCMcC/Documents/Causal Inference/Homeworks/Assignment 3/eitcR.dta')
data2<-read.csv("C:/Users/PCMcC/Documents/Causal Inference/Homeworks/Assignment 3/UKCPR_National_Welfare_Data_Final_Update_20180116_0.csv")

VARIABLE SELECTION FROM UKCPR-NATIONAL WELFARE DATA SET

The following variables are selected from “data2” for our analysis: unemployment rate, minimum wage and povert rate

attach(data2)
vars<-cbind(state, year, state_name, unemployment_rate, poverty_rate, state_min_wage)
data3<-data.frame(vars)

MERGE DATASETS AND PRINT OF COLUMN NAMES TO CONFIRM ALL VARIABLES MERGE

I include the statement all.x = TRUE in order to avoid duplicates and maintain my original sample size of N=1887.

merge_data<-merge(data,data3, by = c("state", "year"),all.x = TRUE)

#rm(data2, data3)

EVALUAITON OF DATA AND CONFIRM INTEGRITY OF MERGE

Evaluation of the merge shows that the number of observations is consistent for all columns. I filter complete cases in order to eliminate states in which I was unable to get information leaving me with a sample of 5,809 individuals.

library(stargazer)
df <- data.frame(merge_data)
df<-df %>% filter(complete.cases(.))

cols <- c('children', 'nonwhite', "earn", "age", "ed", "work",  "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(df[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd")
)

## 
## =============================================================================================
## Statistic           N    Min  Pctl(25)  Median    Pctl(75)      Max        Mean     St. Dev. 
## ---------------------------------------------------------------------------------------------
## children          5,809   0      0         1         2           9        1.189      1.404   
## nonwhite          5,809   0      0         1         1           1        0.542      0.498   
## earn              5,809 0.000  0.000   1,624.494 14,439.450 366,095.500 10,280.960 17,760.500
## age               5,809  20      27       35         44         54        35.514     10.118  
## ed                5,809   0      7        10         11         11        9.019      2.462   
## work              5,809   0      0         0         1           1        0.469      0.499   
## unearn            5,809   0     0.1       4.3       7.4         134       5.472      7.307   
## unemployment_rate 5,809 2.900  5.000     6.100     6.900       9.200      6.100      1.356   
## poverty_rate      5,809 7.500  10.100   11.800     14.400     25.500      12.405     3.018   
## state_min_wage    5,809 1.600  4.250     4.250     4.250       5.250      4.162      0.552   
## ---------------------------------------------------------------------------------------------

RECODING OF VARIABLES BY NUMBER OF CHILDREN AND ADD THEM TO THE WORKING DATAFRAME

library(dplyr)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

names(df)

##  [1] "state"             "year"              "children"         
##  [4] "nonwhite"          "finc"              "earn"             
##  [7] "age"               "ed"                "work"             
## [10] "unearn"            "state_name"        "unemployment_rate"
## [13] "poverty_rate"      "state_min_wage"

#Number of children Variables
df$withchildren<-ifelse(df$children>=1,1,0)
df$onechild<-ifelse(df$children==1,1,0)
df$twochildrenormore<-ifelse(df$children>=2,1,0)
df$nochild<-ifelse(df$children==0,1,0)
df$childrenx <- Recode(df$children, recodes = "1:9='withchildren'; 0='nochilren'; else=NA")
df$childrenx<-as.factor(df$childrenx)

#Employment
df$rec_work<-ifelse(df$work==1 & df$earn >=1, 1,0)
df$rec_nowork<-ifelse(df$work==0 & df$earn<=0,1,0)

#Pre-Treatment and Expansion Variables
df$expansion<-ifelse(df$year>=1993,1,0)
df$pretreat<-ifelse(df$year<=1992,1,0)

CALCULATION OF SAMPLES MEANS BY NUMBER OF CHILDREN AS CONDITION INCLUDING ADDITIONAL SUMMARY STATISTICS

ch0 <- subset(df, children==0)
cols <- c('nonwhite', "earn", "age", "ed", "work",  "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(ch0[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))

## 
## =====================================================================================
## Statistic           N    Min  Pctl(25) Median  Pctl(75)   Max      Mean     St. Dev. 
## -------------------------------------------------------------------------------------
## nonwhite          2,564   0      0        0       1        1      0.445      0.497   
## earn              2,564   0      0     7,067.5 20,003.1 176,050 13,925.040 19,645.530
## age               2,564  20      29      41       49      54      39.068     10.937  
## ed                2,564   0      7       10       11      11      8.670      2.789   
## work              2,564   0      0        1       1        1      0.549      0.498   
## unearn            2,564 0.000  0.000    2.222   6.874   134.058   5.131      8.629   
## unemployment_rate 2,564 2.900  5.000    6.100   7.000    9.200    6.098      1.395   
## poverty_rate      2,564 7.500  10.200  11.800   14.500  25.500    12.536     3.071   
## state_min_wage    2,564 1.600  4.250    4.250   4.250    5.250    4.135      0.567   
## -------------------------------------------------------------------------------------

ch1<- subset(df, children==1)
cols <- c('nonwhite', "earn", "age", "ed", "work",  "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(ch1[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))

## 
## =====================================================================================
## Statistic           N    Min  Pctl(25) Median  Pctl(75)   Max      Mean     St. Dev. 
## -------------------------------------------------------------------------------------
## nonwhite          1,249   0      0        1       1        1      0.525      0.500   
## earn              1,249   0      0     2,159.0 14,176.4 366,095 10,300.080 19,575.000
## age               1,249  20      24      34       42      54      33.862     9.862   
## ed                1,249   0      7       10       11      11      9.219      2.176   
## work              1,249   0      0        0       1        1      0.486      0.500   
## unearn            1,249 0.000  0.295    3.684   6.383   92.926    4.812      6.006   
## unemployment_rate 1,249 2.900  5.000    6.100   6.900    9.200    6.100      1.332   
## poverty_rate      1,249 7.500  10.100  11.800   14.400  25.500    12.429     3.048   
## state_min_wage    1,249 1.600  4.250    4.250   4.250    5.250    4.131      0.593   
## -------------------------------------------------------------------------------------

ch2<- subset(df, children>=2)
cols <- c('nonwhite', "earn", "age", "ed", "work",  "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(ch2[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))

## 
## ========================================================================================
## Statistic           N    Min  Pctl(25) Median Pctl(75)      Max       Mean     St. Dev. 
## ----------------------------------------------------------------------------------------
## nonwhite          1,996   0      0       1        1          1        0.675     0.468   
## earn              1,996 0.000  0.000   0.000  6,572.419 118,847.800 5,587.935 11,986.770
## age               1,996  20      26      32      37         54       31.983     7.288   
## ed                1,996   0      9       10      11         11        9.342     2.100   
## work              1,996   0      0       0        1          1        0.357     0.479   
## unearn            1,996   0     2.7     5.6      8.2        81        6.324     6.006   
## unemployment_rate 1,996 2.900  5.000   6.100    6.900      9.200      6.103     1.320   
## poverty_rate      1,996 7.500  10.100  11.200  14.400     25.500     12.223     2.923   
## state_min_wage    1,996 1.600  4.250   4.250    4.250      5.250      4.216     0.499   
## ----------------------------------------------------------------------------------------

CALCULATION OF SAMPLES MEANS BY EMPLOYMENT STATUS AS CONDITION INCLUDING ADDITIONAL SUMMARY STATISTICS

#Women employed
em1 <- subset(df, work==1 & earn>=1)
cols <- c('nonwhite', "earn", "age", "ed", "work",  "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(em1[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))

## 
## ===============================================================================================
## Statistic           N    Min  Pctl(25)    Median    Pctl(75)      Max        Mean     St. Dev. 
## -----------------------------------------------------------------------------------------------
## nonwhite          2,726   0       0         0          1           1        0.451      0.498   
## earn              2,726 1.023 5,719.886 12,995.950 23,325.260 176,050.200 17,946.030 18,564.300
## age               2,726  20      27         36         45         54        36.050     10.203  
## ed                2,726   0      7.5        10         11         11        9.241      2.282   
## work              2,726   1       1         1          1           1        1.000      0.000   
## unearn            2,726   0       0        0.6        4.9         108       3.469      6.371   
## unemployment_rate 2,726 2.900   5.000     6.100      6.900       9.200      6.034      1.409   
## poverty_rate      2,726 7.500  10.200     11.800     14.500     25.500      12.537     3.141   
## state_min_wage    2,726 1.600   4.250     4.250      4.250       5.250      4.144      0.586   
## -----------------------------------------------------------------------------------------------

#women Unemployed
em0<- subset(df, work==0 & earn==0)
cols <- c('nonwhite', "earn", "age", "ed", "work",  "unearn", "unemployment_rate", "poverty_rate", "state_min_wage")
stargazer(em0[, cols], type = "text", summary.stat = c("N","min", "p25", "median", "p75", "max", "mean", "sd"))

## 
## ==============================================================================
## Statistic           N    Min  Pctl(25) Median Pctl(75)   Max    Mean  St. Dev.
## ------------------------------------------------------------------------------
## nonwhite          2,547   0      0       1       1        1    0.631   0.483  
## earn              2,547   0      0       0       0        0    0.000   0.000  
## age               2,547  20      26      33      43      54    34.672  9.849  
## ed                2,547   0      7       10      11      11    8.902   2.521  
## work              2,547   0      0       0       0        0    0.000   0.000  
## unearn            2,547 0.000  3.672   5.967   8.485   134.058 6.914   6.756  
## unemployment_rate 2,547 2.900  5.000   6.100   6.900    9.200  6.154   1.301  
## poverty_rate      2,547 7.500  10.100  11.800  14.200  25.500  12.298  2.858  
## state_min_wage    2,547 1.600  4.250   4.250   4.250    5.250  4.172   0.519  
## ------------------------------------------------------------------------------

Annual employment rates by year (1991-1996) for single women with children (treatment) and without children (control).

plot<-aggregate(df$work, by=list(df$year, df$childrenx), FUN=sum)

#install.packages("ggplot2")
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.5.3

ggplot(plot, aes(fill=Group.2, y=x/5809*100 , x=Group.1)) + 
    geom_bar(position="dodge", stat="identity") + scale_fill_manual(values=c( "#E69F00", "#56B4E9"))+ theme_minimal() + scale_y_continuous(limits=c(0,5)) + scale_x_continuous(breaks=seq(1991, 1996, 1)) +
ggtitle("Annual Employment Rates by Year (1991 to 1996) for Single Women by Treatment")  + 
  theme(plot.title = element_text(face="bold", size=10, hjust=0)) + theme(legend.title=element_blank()) + ylab("Percentage Rate")+xlab("Year") +  geom_text(aes(label=round(x/5809*100,2) ),position=position_dodge(0.9))

## Unconditional difference-in-difference estimates

of the effect of the 1993 EITC expansion on employment of single women. Your table should be parallel to the format found in a typical differences-in-differences paper. Calculate estimates with all women with children as the treatment (single women with no children as the control), women with children as the control. Discuss these results.

CREATE AN INTERACTION AND ESTIMATE THE DID ESTIMATOR

#Create the interaction
did<-df$withchildren * df$expansion

#DID Estimator
didreg = lm(work ~ nochild + expansion + did, data = df)
summary(didreg)

## 
## Call:
## lm(formula = work ~ nochild + expansion + did, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5672 -0.4117 -0.4032  0.4624  0.5968 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.41167    0.01397  29.469  < 2e-16 ***
## nochild      0.15548    0.02115   7.351 2.24e-13 ***
## expansion   -0.02955    0.02013  -1.468    0.142    
## did          0.02109    0.02688   0.785    0.433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4941 on 5805 degrees of freedom
## Multiple R-squared:  0.02044,    Adjusted R-squared:  0.01994 
## F-statistic: 40.38 on 3 and 5805 DF,  p-value: < 2.2e-16

Regression with conditional difference-in-difference estimates including individual-level controls.

reg1 <- lm(work ~ withchildren + did + scale(age) + nonwhite + ed, data = df)
summary(reg1)

## 
## Call:
## lm(formula = work ~ withchildren + did + scale(age) + nonwhite + 
##     ed, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6872 -0.4519 -0.2966  0.4858  0.8614 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.429474   0.026100  16.455  < 2e-16 ***
## withchildren -0.120437   0.017453  -6.901 5.73e-12 ***
## did           0.001549   0.017551   0.088   0.9297    
## scale(age)    0.016070   0.006803   2.362   0.0182 *  
## nonwhite     -0.148992   0.013040 -11.426  < 2e-16 ***
## ed            0.020761   0.002643   7.854 4.76e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.486 on 5803 degrees of freedom
## Multiple R-squared:  0.05259,    Adjusted R-squared:  0.05178 
## F-statistic: 64.43 on 5 and 5803 DF,  p-value: < 2.2e-16

Regression with conxtetual factors (includes interaction term “withchildren”)

reg2 <- lm(work ~ (did + scale(age) + nonwhite + ed +unemployment_rate + poverty_rate + state_min_wage)*withchildren, data = df)
summary(reg2)

## 
## Call:
## lm(formula = work ~ (did + scale(age) + nonwhite + ed + unemployment_rate + 
##     poverty_rate + state_min_wage) * withchildren, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7146 -0.4393 -0.2748  0.4771  0.8404 
## 
## Coefficients: (1 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.4859358  0.1021325   4.758 2.00e-06 ***
## did                            -0.0403472  0.0201922  -1.998 0.045746 *  
## scale(age)                     -0.0059237  0.0089029  -0.665 0.505840    
## nonwhite                       -0.1377841  0.0195321  -7.054 1.94e-12 ***
## ed                              0.0261223  0.0034443   7.584 3.88e-14 ***
## unemployment_rate              -0.0050752  0.0071542  -0.709 0.478100    
## poverty_rate                    0.0007088  0.0033598   0.211 0.832923    
## state_min_wage                 -0.0189002  0.0174667  -1.082 0.279265    
## withchildren                    0.0260217  0.1428319   0.182 0.855445    
## did:withchildren                       NA         NA      NA       NA    
## scale(age):withchildren         0.0516190  0.0138346   3.731 0.000192 ***
## nonwhite:withchildren          -0.0112088  0.0265281  -0.423 0.672658    
## ed:withchildren                -0.0106774  0.0053962  -1.979 0.047897 *  
## unemployment_rate:withchildren -0.0314584  0.0103581  -3.037 0.002400 ** 
## poverty_rate:withchildren       0.0064888  0.0045983   1.411 0.158259    
## state_min_wage:withchildren     0.0224460  0.0243120   0.923 0.355917    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4844 on 5794 degrees of freedom
## Multiple R-squared:  0.0603, Adjusted R-squared:  0.05803 
## F-statistic: 26.56 on 14 and 5794 DF,  p-value: < 2.2e-16

PLACEBO TREATMENT

#DID PLACEBO Estimator
#Create the interaction
did2<-df$withchildren * df$pretreat
#DID Estimator

didreg2 = lm(work ~ withchildren + pretreat + did2, data = df)
summary(didreg2)

## 
## Call:
## lm(formula = work ~ withchildren + pretreat + did2, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5672 -0.4117 -0.4032  0.4624  0.5968 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.53759    0.01237  43.467  < 2e-16 ***
## withchildren -0.13438    0.01660  -8.098 6.76e-16 ***
## pretreat      0.02955    0.02013   1.468    0.142    
## did2         -0.02109    0.02688  -0.785    0.433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4941 on 5805 degrees of freedom
## Multiple R-squared:  0.02044,    Adjusted R-squared:  0.01994 
## F-statistic: 40.38 on 3 and 5805 DF,  p-value: < 2.2e-16

reg3 <- lm(work ~  did2 +scale (age) + nonwhite + ed, data = df)
summary(reg3)

## 
## Call:
## lm(formula = work ~ did2 + scale(age) + nonwhite + ed, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6668 -0.4394 -0.3063  0.4858  0.8706 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.399261   0.025935  15.395  < 2e-16 ***
## did2        -0.061853   0.015796  -3.916 9.12e-05 ***
## scale(age)   0.030115   0.006589   4.571 4.96e-06 ***
## nonwhite    -0.167803   0.012875 -13.033  < 2e-16 ***
## ed           0.019316   0.002650   7.289 3.53e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4885 on 5804 degrees of freedom
## Multiple R-squared:  0.04286,    Adjusted R-squared:  0.0422 
## F-statistic: 64.98 on 4 and 5804 DF,  p-value: < 2.2e-16

reg4 <- lm(work ~ (did2 + scale(age) + nonwhite + ed +unemployment_rate + poverty_rate + state_min_wage)*withchildren, data = df)
summary(reg4)

## 
## Call:
## lm(formula = work ~ (did2 + scale(age) + nonwhite + ed + unemployment_rate + 
##     poverty_rate + state_min_wage) * withchildren, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7146 -0.4393 -0.2748  0.4771  0.8404 
## 
## Coefficients: (1 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.4859358  0.1021325   4.758 2.00e-06 ***
## did2                            0.0403472  0.0201922   1.998 0.045746 *  
## scale(age)                     -0.0059237  0.0089029  -0.665 0.505840    
## nonwhite                       -0.1377841  0.0195321  -7.054 1.94e-12 ***
## ed                              0.0261223  0.0034443   7.584 3.88e-14 ***
## unemployment_rate              -0.0050752  0.0071542  -0.709 0.478100    
## poverty_rate                    0.0007088  0.0033598   0.211 0.832923    
## state_min_wage                 -0.0189002  0.0174667  -1.082 0.279265    
## withchildren                   -0.0143255  0.1425014  -0.101 0.919928    
## did2:withchildren                      NA         NA      NA       NA    
## scale(age):withchildren         0.0516190  0.0138346   3.731 0.000192 ***
## nonwhite:withchildren          -0.0112088  0.0265281  -0.423 0.672658    
## ed:withchildren                -0.0106774  0.0053962  -1.979 0.047897 *  
## unemployment_rate:withchildren -0.0314584  0.0103581  -3.037 0.002400 ** 
## poverty_rate:withchildren       0.0064888  0.0045983   1.411 0.158259    
## state_min_wage:withchildren     0.0224460  0.0243120   0.923 0.355917    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4844 on 5794 degrees of freedom
## Multiple R-squared:  0.0603, Adjusted R-squared:  0.05803 
## F-statistic: 26.56 on 14 and 5794 DF,  p-value: < 2.2e-16

Causal_Inference_HW3: Difference in Difference

Paulina Cano

April 11, 2019