library(haven)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
# Change the data path below to the path on your computer
CPS_micro_data <- read_dta("CPS_micro_data.dta")
#Recode the education variable into years of education
CPS_micro_data$educ2 <- NA # create a new variable
CPS_micro_data$educ2[CPS_micro_data$chareduc=="1 year of college"] <- 13
CPS_micro_data$educ2[CPS_micro_data$chareduc=="12th grade, diploma unclear"] <- 11
CPS_micro_data$educ2[CPS_micro_data$chareduc=="12th grade, no diploma"] <- 11
CPS_micro_data$educ2[CPS_micro_data$chareduc=="2 years of college"] <- 14
CPS_micro_data$educ2[CPS_micro_data$chareduc=="3 years of college"] <- 15
CPS_micro_data$educ2[CPS_micro_data$chareduc=="4 years of college"] <- 16
CPS_micro_data$educ2[CPS_micro_data$chareduc=="5 years of college"] <- 17
CPS_micro_data$educ2[CPS_micro_data$chareduc=="6+ years of college"] <- 19
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Associate's degree, academic program"] <- 14
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Associate's degree, occupational/vocational program"] <- 14
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Bachelor's degree"] <- 16
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Doctorate degree"] <- 22
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 1"] <- 1
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 10"] <- 10
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 11"] <- 11
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 2"] <- 2
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 3"] <- 3
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 4"] <- 4
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 5"] <- 5
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 6"] <- 6
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 7"] <- 7
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 8"] <- 8
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grade 9"] <- 9
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grades 1, 2, 3, or 4"] <- 2.5
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grades 5 or 6"] <- 5.5
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Grades 7 or 8"] <- 7.5
CPS_micro_data$educ2[CPS_micro_data$chareduc=="High school diploma or equivalent"] <- 12
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Master's degree"] <- 18
CPS_micro_data$educ2[CPS_micro_data$chareduc=="NIU or blank"] <- NA
CPS_micro_data$educ2[CPS_micro_data$chareduc=="None or preschool"] <- 0
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Professional school degree"] <- 19
CPS_micro_data$educ2[CPS_micro_data$chareduc=="Some college but no degree"] <- 13
# Creating a subset of data aggregated by year and state for the low educated (less than 12 years of schooling)
by_year_state <- group_by(filter(CPS_micro_data,educ2<12 & year>=1992),year,state)
# also restricting to year>=1992 for consistent employment numbers
# Creating a dataset of means for the variables we are interested in
means <- summarise(by_year_state,earnweek=mean(earnweek,na.rm=TRUE),employed=mean(employed,na.rm=TRUE),hourwage=mean(hourwage,na.rm=TRUE),minwage=mean(minwage,na.rm=TRUE))
# Plotting wages over time for the low educated
ggplot(data = means) +
geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="Pennsylvania")) +
geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="Pennsylvania"),color="red") +
ggtitle("Minimum wage and hourly wage in Pennsylvania") +
xlab("Year") + ylab("Wage")

ggplot(data = means) +
geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="New Jersey")) +
geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="New Jersey"),color="red") +
ggtitle("Minimum wage and hourly wage in New Jersey") +
xlab("Year") + ylab("Wage")

ggplot(data = means) +
geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="Pennsylvania")) +
geom_line(mapping = aes(x = year, y = minwage),subset(means,state=="New Jersey"),color="red") +
geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
geom_vline(xintercept=1996) + # PA increases minimum wage
geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage
geom_vline(xintercept=2006) + # PA increases minimum wage
geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
ggtitle("Minimum wage") +
xlab("Year") + ylab("Minimum wage")

ggplot(data = means) +
geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="Pennsylvania")) +
geom_line(mapping = aes(x = year, y = hourwage),subset(means,state=="New Jersey"),color="red") +
geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
geom_vline(xintercept=1996) + # PA increases minimum wage
geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage
geom_vline(xintercept=2006) + # PA increases minimum wage
geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
ggtitle("Hourly wage") +
xlab("Year") + ylab("Hourly wage")

ggplot(data = means) +
geom_line(mapping = aes(x = year, y = earnweek),subset(means,state=="Pennsylvania")) +
geom_line(mapping = aes(x = year, y = earnweek),subset(means,state=="New Jersey"),color="red") +
geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
geom_vline(xintercept=1996) + # PA increases minimum wage
geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage
geom_vline(xintercept=2006) + # PA increases minimum wage
geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
ggtitle("Weekly earnings") +
xlab("Year") + ylab("Weekly earnings")

ggplot(data = means) +
geom_line(mapping = aes(x = year, y = employed),subset(means,state=="Pennsylvania")) +
geom_line(mapping = aes(x = year, y = employed),subset(means,state=="New Jersey"),color="red") +
geom_vline(xintercept=1993,color="red") + # NJ increases minimum wage
geom_vline(xintercept=1996) + # PA increases minimum wage
geom_vline(xintercept=2005,color="red") + # NJ increases minimum wage
geom_vline(xintercept=2006) + # PA increases minimum wage
geom_vline(xintercept=2013,color="red") + # NJ increases minimum wage
ggtitle("Employment") +
xlab("Year") + ylab("Employment")

# Regressing outcomes on the minimum wage variable
model1=lm(data=means, formula= hourwage ~ minwage)
summary(model1)
##
## Call:
## lm(formula = hourwage ~ minwage, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.04014 -0.67544 -0.07075 0.66610 1.82644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8977 0.6115 6.374 6.70e-08 ***
## minwage 0.9039 0.1014 8.916 9.41e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9012 on 48 degrees of freedom
## Multiple R-squared: 0.6235, Adjusted R-squared: 0.6157
## F-statistic: 79.5 on 1 and 48 DF, p-value: 9.413e-12
model2=lm(data=means, formula= earnweek ~ minwage)
summary(model2)
##
## Call:
## lm(formula = earnweek ~ minwage, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -132.216 -28.079 3.547 29.043 209.965
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 116.010 42.185 2.750 0.00838 **
## minwage 39.063 6.994 5.585 1.07e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62.17 on 48 degrees of freedom
## Multiple R-squared: 0.3939, Adjusted R-squared: 0.3813
## F-statistic: 31.2 on 1 and 48 DF, p-value: 1.071e-06
model3=lm(data=means, formula= employed ~ minwage)
summary(model3)
##
## Call:
## lm(formula = employed ~ minwage, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.042948 -0.022830 -0.002171 0.016288 0.047907
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.340167 0.017307 19.655 <2e-16 ***
## minwage -0.004958 0.002869 -1.728 0.0904 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02551 on 48 degrees of freedom
## Multiple R-squared: 0.05856, Adjusted R-squared: 0.03895
## F-statistic: 2.986 on 1 and 48 DF, p-value: 0.09044
# Graphical analysis for the New Jersey minimum wage increase of 2014
# Before is: 2010-2013
# After is: 2014-2016
ggplot(data = subset(by_year_state,year>=2010 & year<=2013)) +
geom_density(aes(hourwage,fill=state,colour=state),alpha=0.1) +
geom_vline(xintercept=7.25,colour="red") +
ggtitle("Hourly wage before NJ min. wage increase")
## Warning: Removed 5282 rows containing non-finite values (stat_density).

ggplot(data = subset(by_year_state,year>=2014)) +
geom_density(aes(hourwage,fill=state,colour=state),alpha=0.1) +
geom_vline(xintercept=7.25,colour="cyan3") +
geom_vline(xintercept=8.25,colour="red") +
ggtitle("Hourly wage after NJ min. wage increase")
## Warning: Removed 3385 rows containing non-finite values (stat_density).

# Difference in differences estimates using tests for mean differences & proportions
## Testing the difference before the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2010 & year<=2013)$hourwage,
subset(by_year_state,state=="New Jersey"& year>=2010 & year<=2013)$hourwage)
##
## Welch Two Sample t-test
##
## data: subset(by_year_state, state == "Pennsylvania" & year >= 2010 & and subset(by_year_state, state == "New Jersey" & year >= 2010 & year <= 2013)$hourwage and year <= 2013)$hourwage
## t = 1.3372, df = 188.96, p-value = 0.1828
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.427406 2.226242
## sample estimates:
## mean of x mean of y
## 11.23750 10.33808
## Testing the difference after the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2014)$hourwage,
subset(by_year_state,state=="New Jersey"& year>=2014)$hourwage)
##
## Welch Two Sample t-test
##
## data: subset(by_year_state, state == "Pennsylvania" & year >= 2014)$hourwage and subset(by_year_state, state == "New Jersey" & year >= 2014)$hourwage
## t = 0.65822, df = 95.099, p-value = 0.512
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.057787 2.107164
## sample estimates:
## mean of x mean of y
## 10.499107 9.974419
## Testing the difference before the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2010 & year<=2013)$earnweek,
subset(by_year_state,state=="New Jersey"& year>=2010 & year<=2013)$earnweek)
##
## Welch Two Sample t-test
##
## data: subset(by_year_state, state == "Pennsylvania" & year >= 2010 & and subset(by_year_state, state == "New Jersey" & year >= 2010 & year <= 2013)$earnweek and year <= 2013)$earnweek
## t = -1.4458, df = 174.51, p-value = 0.15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -184.26379 28.44326
## sample estimates:
## mean of x mean of y
## 386.5578 464.4681
## Testing the difference after the increase in the minimum wage
t.test(subset(by_year_state,state=="Pennsylvania"& year>=2014)$earnweek,
subset(by_year_state,state=="New Jersey"& year>=2014)$earnweek)
##
## Welch Two Sample t-test
##
## data: subset(by_year_state, state == "Pennsylvania" & year >= 2014)$earnweek and subset(by_year_state, state == "New Jersey" & year >= 2014)$earnweek
## t = -1.4043, df = 113.34, p-value = 0.163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -142.29224 24.24189
## sample estimates:
## mean of x mean of y
## 330.3385 389.3636
## Testing the difference before the increase in the minimum wage
by_year_state$femp <- as.factor(by_year_state$employed)
summary(by_year_state$femp)
## 0 1
## 28536 12801
tablebefore <- table(subset(by_year_state,year>=2010 & year<=2013)$state,
subset(by_year_state,year>=2010 & year<=2013)$femp)
prop.table(tablebefore,1)
##
## 0 1
## New Jersey 0.6977929 0.3022071
## Pennsylvania 0.7217057 0.2782943
prop.test(tablebefore)
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: tablebefore
## X-squared = 3.6227, df = 1, p-value = 0.057
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.0486000578 0.0007744466
## sample estimates:
## prop 1 prop 2
## 0.6977929 0.7217057
## Testing the difference after the increase in the minimum wage
tableafter <- table(subset(by_year_state,year>=2014)$state,
subset(by_year_state,year>=2014)$femp)
prop.table(tableafter,1)
##
## 0 1
## New Jersey 0.6845638 0.3154362
## Pennsylvania 0.7161484 0.2838516
prop.test(tableafter)
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: tableafter
## X-squared = 3.9219, df = 1, p-value = 0.04766
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.062966117 -0.000203257
## sample estimates:
## prop 1 prop 2
## 0.6845638 0.7161484