library(haven)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
# Change the data path below to the path on your computer
CPS_micro_data <- read_dta("CPS_micro_data.dta")

#Recode the education variable into years of education
CPS_micro_data$educ2 <- NA # create a new variable

CPS_micro_data$educ2[CPS_micro_data$chareduc=="1 year of college"] <- 13
CPS_micro_data$educ2[CPS_micro_data$chareduc=="12th grade, diploma unclear"] <- 11
CPS_micro_data$educ2[CPS_micro_data$chareduc=="12th grade, no diploma"] <- 11                                
CPS_micro_data$educ2[CPS_micro_data$chareduc=="2 years of college"] <-  14
CPS_micro_data$educ2[CPS_micro_data$chareduc=="3 years of college"] <-  15                                 
CPS_micro_data$educ2[CPS_micro_data$chareduc=="4 years of college"] <- 16
CPS_micro_data$educ2[CPS_micro_data$chareduc=="5 years of college"] <-   17                          
CPS_micro_data$educ2[CPS_micro_data$chareduc=="6+ years of college"] <- 19
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Associate's degree, academic program"] <-  14
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Associate's degree, occupational/vocational program"] <- 14  
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Bachelor's degree"] <- 16
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Doctorate degree"] <- 22
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 1"] <- 1                              
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 10"] <- 10
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 11"] <-   11                                      
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 2"] <- 2
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 3"] <-  3                                       
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 4"] <- 4
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 5"] <-  5                                       
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 6"] <- 6
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 7"] <-  7                                       
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 8"] <- 8
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 9"] <-  9                     
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grades 1, 2, 3, or 4"] <- 2.5  
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grades 5 or 6"] <- 5.5                           
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grades 7 or 8"] <- 7.5
CPS_micro_data$educ2[CPS_micro_data$chareduc=="High school diploma or equivalent"] <- 12                                     
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Master's degree"] <- 18
CPS_micro_data$educ2[CPS_micro_data$chareduc=="NIU or blank"] <-  NA                          
CPS_micro_data$educ2[CPS_micro_data$chareduc=="None or preschool"] <- 0 
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Professional school degree"] <-  19                         
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Some college but no degree"] <- 13

# Creating a subset of data aggregated by year and state for the low educated (less than 12 years of schooling)
by_year_state <- group_by(filter(CPS_micro_data,educ2<12 & year>=1992),year,state) 
# also restricting to year>=1992 for consistent employment numbers

# Creating a dataset of means for the variables we are interested in
means <- summarise(by_year_state,earnweek=mean(earnweek,na.rm=TRUE),employed=mean(employed,na.rm=TRUE),hourwage=mean(hourwage,na.rm=TRUE),minwage=mean(minwage,na.rm=TRUE))

# Plotting wages over time for the low educated

ggplot(data = means) + 
  geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="Pennsylvania")) +
  geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="Pennsylvania"),color="red") +
  ggtitle("Minimum wage and hourly wage in Pennsylvania") +
  xlab("Year") + ylab("Wage")

ggplot(data = means) + 
  geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="New Jersey")) +
  geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="New Jersey"),color="red") +
  ggtitle("Minimum wage and hourly wage in New Jersey") +
  xlab("Year") + ylab("Wage")

ggplot(data = means) + 
  geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="Pennsylvania")) +
  geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="New Jersey"),color="red") +
  geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
  geom_vline(xintercept=1996) + # PA increases minimum wage
  geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage 
  geom_vline(xintercept=2006) + # PA increases minimum wage 
  geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
  ggtitle("Minimum wage") +
  xlab("Year") + ylab("Minimum wage")

ggplot(data = means) + 
  geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="Pennsylvania")) +
  geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="New Jersey"),color="red") +
  geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
  geom_vline(xintercept=1996) + # PA increases minimum wage
  geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage 
  geom_vline(xintercept=2006) + # PA increases minimum wage 
  geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
  ggtitle("Hourly wage") +
  xlab("Year") + ylab("Hourly wage")

ggplot(data = means) + 
  geom_line(mapping = aes(x = year, y = earnweek),subset(means,state=="Pennsylvania")) +
  geom_line(mapping = aes(x = year, y = earnweek),subset(means,state=="New Jersey"),color="red") +
  geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
  geom_vline(xintercept=1996) + # PA increases minimum wage
  geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage 
  geom_vline(xintercept=2006) + # PA increases minimum wage 
  geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
  ggtitle("Weekly earnings") +
  xlab("Year") + ylab("Weekly earnings")

ggplot(data = means) + 
  geom_line(mapping = aes(x = year, y = employed),subset(means,state=="Pennsylvania")) +
  geom_line(mapping = aes(x = year, y = employed),subset(means,state=="New Jersey"),color="red") +
  geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
  geom_vline(xintercept=1996) + # PA increases minimum wage
  geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage 
  geom_vline(xintercept=2006) + # PA increases minimum wage 
  geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
  ggtitle("Employment") +
  xlab("Year") + ylab("Employment")

# Regressing outcomes on the minimum wage variable
model1=lm(data=means, formula= hourwage ~ minwage)
summary(model1)
## 
## Call:
## lm(formula = hourwage ~ minwage, data = means)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.04014 -0.67544 -0.07075  0.66610  1.82644 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.8977     0.6115   6.374 6.70e-08 ***
## minwage       0.9039     0.1014   8.916 9.41e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9012 on 48 degrees of freedom
## Multiple R-squared:  0.6235, Adjusted R-squared:  0.6157 
## F-statistic:  79.5 on 1 and 48 DF,  p-value: 9.413e-12
model2=lm(data=means, formula= earnweek ~ minwage)
summary(model2)
## 
## Call:
## lm(formula = earnweek ~ minwage, data = means)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -132.216  -28.079    3.547   29.043  209.965 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  116.010     42.185   2.750  0.00838 ** 
## minwage       39.063      6.994   5.585 1.07e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62.17 on 48 degrees of freedom
## Multiple R-squared:  0.3939, Adjusted R-squared:  0.3813 
## F-statistic:  31.2 on 1 and 48 DF,  p-value: 1.071e-06
model3=lm(data=means, formula= employed ~ minwage)
summary(model3)
## 
## Call:
## lm(formula = employed ~ minwage, data = means)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.042948 -0.022830 -0.002171  0.016288  0.047907 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.340167   0.017307  19.655   <2e-16 ***
## minwage     -0.004958   0.002869  -1.728   0.0904 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02551 on 48 degrees of freedom
## Multiple R-squared:  0.05856,    Adjusted R-squared:  0.03895 
## F-statistic: 2.986 on 1 and 48 DF,  p-value: 0.09044
# Graphical analysis for the New Jersey minimum wage increase of 2014
# Before is: 2010-2013
# After is: 2014-2016

ggplot(data = subset(by_year_state,year>=2010 & year<=2013)) + 
  geom_density(aes(hourwage,fill=state,colour=state),alpha=0.1) +
  geom_vline(xintercept=7.25,colour="red") +
  ggtitle("Hourly wage before NJ min. wage increase")
## Warning: Removed 5282 rows containing non-finite values (stat_density).

ggplot(data = subset(by_year_state,year>=2014)) + 
  geom_density(aes(hourwage,fill=state,colour=state),alpha=0.1) +
  geom_vline(xintercept=7.25,colour="cyan3") +
  geom_vline(xintercept=8.25,colour="red") +
  ggtitle("Hourly wage after NJ min. wage increase")
## Warning: Removed 3385 rows containing non-finite values (stat_density).

# Difference in differences estimates using tests for mean differences & proportions
## Testing the difference before the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2010 & year<=2013)$hourwage, 
       subset(by_year_state,state=="New Jersey"& year>=2010 & year<=2013)$hourwage)
## 
##  Welch Two Sample t-test
## 
## data:  subset(by_year_state, state == "Pennsylvania" & year >= 2010 &  and subset(by_year_state, state == "New Jersey" & year >= 2010 &     year <= 2013)$hourwage and     year <= 2013)$hourwage
## t = 1.3372, df = 188.96, p-value = 0.1828
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.427406  2.226242
## sample estimates:
## mean of x mean of y 
##  11.23750  10.33808
## Testing the difference after the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2014)$hourwage, 
       subset(by_year_state,state=="New Jersey"& year>=2014)$hourwage)
## 
##  Welch Two Sample t-test
## 
## data:  subset(by_year_state, state == "Pennsylvania" & year >= 2014)$hourwage and subset(by_year_state, state == "New Jersey" & year >= 2014)$hourwage
## t = 0.65822, df = 95.099, p-value = 0.512
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.057787  2.107164
## sample estimates:
## mean of x mean of y 
## 10.499107  9.974419
## Testing the difference before the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2010 & year<=2013)$earnweek, 
       subset(by_year_state,state=="New Jersey"& year>=2010 & year<=2013)$earnweek)
## 
##  Welch Two Sample t-test
## 
## data:  subset(by_year_state, state == "Pennsylvania" & year >= 2010 &  and subset(by_year_state, state == "New Jersey" & year >= 2010 &     year <= 2013)$earnweek and     year <= 2013)$earnweek
## t = -1.4458, df = 174.51, p-value = 0.15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -184.26379   28.44326
## sample estimates:
## mean of x mean of y 
##  386.5578  464.4681
## Testing the difference after the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2014)$earnweek, 
       subset(by_year_state,state=="New Jersey"& year>=2014)$earnweek)
## 
##  Welch Two Sample t-test
## 
## data:  subset(by_year_state, state == "Pennsylvania" & year >= 2014)$earnweek and subset(by_year_state, state == "New Jersey" & year >= 2014)$earnweek
## t = -1.4043, df = 113.34, p-value = 0.163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -142.29224   24.24189
## sample estimates:
## mean of x mean of y 
##  330.3385  389.3636
## Testing the difference before the increase in the minimum wage
by_year_state$femp <- as.factor(by_year_state$employed)
summary(by_year_state$femp)
##     0     1 
## 28536 12801
tablebefore <- table(subset(by_year_state,year>=2010 & year<=2013)$state,
                     subset(by_year_state,year>=2010 & year<=2013)$femp)
prop.table(tablebefore,1)
##               
##                        0         1
##   New Jersey   0.6977929 0.3022071
##   Pennsylvania 0.7217057 0.2782943
prop.test(tablebefore)
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  tablebefore
## X-squared = 3.6227, df = 1, p-value = 0.057
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.0486000578  0.0007744466
## sample estimates:
##    prop 1    prop 2 
## 0.6977929 0.7217057
## Testing the difference after the increase in the minimum wage
tableafter <- table(subset(by_year_state,year>=2014)$state,
                    subset(by_year_state,year>=2014)$femp)

prop.table(tableafter,1)
##               
##                        0         1
##   New Jersey   0.6845638 0.3154362
##   Pennsylvania 0.7161484 0.2838516
prop.test(tableafter)
## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  tableafter
## X-squared = 3.9219, df = 1, p-value = 0.04766
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.062966117 -0.000203257
## sample estimates:
##    prop 1    prop 2 
## 0.6845638 0.7161484