Loading R library

library(tidyverse)
library(kableExtra)
library(broom)
library(texreg)
library(patchwork)
library(readr)
library(lubridate)
library(boot)
library(stargazer)
library(fixest)
library(pander)
library(MatchIt)

Loading Dataset

df_2019 <- read.csv("nov19pub.csv")
df_2021 <- read.csv("nov21pub.csv")

names(df_2019) <- tolower(names(df_2019))
names(df_2021) <- tolower(names(df_2021))

n_raw_2019 <- as.character(nrow(df_2019))
n_raw_2021 <- as.character(nrow(df_2021))

n_unique_2019 <- as.character(length(unique(df_2019$hrhhid)))
n_unique_2021 <- as.character(length(unique(df_2021$hrhhid)))

Checking age variable

df_2019 <- df_2019 %>% filter(prtage > 21 & prtage < 35)

n_row_agerestrict_2019 <- as.character(nrow(df_2019))


df_2021 <- df_2021 %>% filter(prtage> 21 & prtage < 35)

n_row_agerestrict_2021 <- as.character(nrow(df_2021))

Append two dataset together

df_2019_s <- df_2019 %>% select(hrhhid, hryear4, pemlr, pruntype, gereg, pemaritl, peeduca, peedtrai, pesex, ptdtrace, gtmetsta, prtage) 

df_2021_s <- df_2021 %>% select(hrhhid, hryear4, pemlr, pruntype, gereg, pemaritl, peeduca, peedtrai, pesex, ptdtrace, gtmetsta, prtage) 

df <- rbind(df_2019_s, df_2021_s)

Checking employment variable

df %>% names()
##  [1] "hrhhid"   "hryear4"  "pemlr"    "pruntype" "gereg"    "pemaritl"
##  [7] "peeduca"  "peedtrai" "pesex"    "ptdtrace" "gtmetsta" "prtage"
df %>% count(hryear4)
##   hryear4     n
## 1    2019 18594
## 2    2021 15925
df %>% group_by(hryear4) %>% count(pemlr)
## # A tibble: 16 × 3
## # Groups:   hryear4 [2]
##    hryear4 pemlr     n
##      <int> <int> <int>
##  1    2019    -1   205
##  2    2019     1 14357
##  3    2019     2   318
##  4    2019     3    54
##  5    2019     4   514
##  6    2019     5   108
##  7    2019     6   458
##  8    2019     7  2580
##  9    2021    -1   152
## 10    2021     1 12009
## 11    2021     2   342
## 12    2021     3    46
## 13    2021     4   479
## 14    2021     5   137
## 15    2021     6   407
## 16    2021     7  2353
df %>% group_by(pemlr) %>% count(pruntype)
## # A tibble: 12 × 3
## # Groups:   pemlr [8]
##    pemlr pruntype     n
##    <int>    <int> <int>
##  1    -1       -1   357
##  2     1       -1 26366
##  3     2       -1   660
##  4     3        1   100
##  5     4        2   273
##  6     4        3   124
##  7     4        4   180
##  8     4        5   355
##  9     4        6    61
## 10     5       -1   245
## 11     6       -1   865
## 12     7       -1  4933
df$arm <-df$pruntype
df$arm[df$pruntype==-1]<-"employed_control" 
df$arm[df$pruntype==1 | df$pruntype==2] <-"involuntary exit"
df$arm[df$pruntype==3 | df$pruntype==4] <-"voluntary exit"
df$arm[df$pruntype==5] <-"re-entrants"
df$arm[df$pruntype==6] <-"new-entrants"

df$labor <-df$pemlr
df$labor[df$labor==1 | df$labor==2] <- "employed"
df$labor[df$labor==5| df$labor==6 | df$labor==7] <-"not in LB"
df$labor[df$labor==3] <-"unemployed_layoff"
df$labor[df$labor==4] <-"unemployed_looking"

df %>% group_by(arm) %>% count(labor) %>%
kable("html", col.names=(c("Treatment Arm", "Labor Force Status", "Count"))) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>% scroll_box(width = "100%")
Treatment Arm Labor Force Status Count
employed_control -1 357
employed_control employed 27026
employed_control not in LB 6043
involuntary exit unemployed_layoff 100
involuntary exit unemployed_looking 273
new-entrants unemployed_looking 61
re-entrants unemployed_looking 355
voluntary exit unemployed_looking 304
# 1. Region (assuming gereg is already present in df)
df$region <- df$gereg

# 2. Marital Status
df$married <- 0
df$married[df$pemaritl == 1 | df$pemaritl == 2] <- 1

# 3. Gender: Male binary
df$male <- ifelse(df$pesex == 1, 1, 0)

# 4. Education Status
df$educ <- 1
df$educ[df$peeduca %in% 31:38] <- 1
df$educ[df$peeduca %in% 39:40] <- 2
df$educ[df$peeduca %in% 41:42] <- 3
df$educ[df$peeduca == 43] <- 4
df$educ[df$peeduca %in% 44:46] <- 5
table(df$educ) # Equivalent to tab educ in Stata
## 
##     1     2     3     4     5 
##  2053 15831  3513  9519  3603
# 5. Race
df$race <- 0
df$race[df$ptdtrace == 1] <- 1
df$race[df$ptdtrace == 2] <- 2
df$race[df$ptdtrace == 4] <- 3
df$race[df$race == 0] <- 4
table(df$race) # Equivalent to tab race in Stata
## 
##     1     2     3     4 
## 26805  3748  2340  1626
df$white <- ifelse(df$race == 1, 1, 0)
df$black <- ifelse(df$race == 2, 1, 0)

# 6. Metropolitan Status
df$metro <- ifelse(df$gtmetsta == 1, 1, 0)
df1 <- df %>% 
  filter(labor=="employed" | labor == "unemployed_layoff" | labor=="unemployed_looking")

df1 %>%
  group_by(arm) %>%
  count(labor)
## # A tibble: 6 × 3
## # Groups:   arm [5]
##   arm              labor                  n
##   <chr>            <chr>              <int>
## 1 employed_control employed           27026
## 2 involuntary exit unemployed_layoff    100
## 3 involuntary exit unemployed_looking   273
## 4 new-entrants     unemployed_looking    61
## 5 re-entrants      unemployed_looking   355
## 6 voluntary exit   unemployed_looking   304
df1 %>% 
  group_by(hryear4) %>%
  count(arm)
## # A tibble: 10 × 3
## # Groups:   hryear4 [2]
##    hryear4 arm                  n
##      <int> <chr>            <int>
##  1    2019 employed_control 14675
##  2    2019 involuntary exit   197
##  3    2019 new-entrants        36
##  4    2019 re-entrants        177
##  5    2019 voluntary exit     158
##  6    2021 employed_control 12351
##  7    2021 involuntary exit   176
##  8    2021 new-entrants        25
##  9    2021 re-entrants        178
## 10    2021 voluntary exit     146
#Proportion of involuntary unemployed is 1.27% in 2019 and 1.52% in 2021. 
df1 %>%
  count(peedtrai)
##   peedtrai     n
## 1       -1 15217
## 2        1  4047
## 3        2  8855
df1$onlineeduc[df1$peedtrai==1]  = 1 
df1$onlineeduc[df1$peedtrai==2] = 0

df1 %>%
  count(onlineeduc)
##   onlineeduc     n
## 1          0  8855
## 2          1  4047
## 3         NA 15217
data_regression <-df1 %>%
  select(onlineeduc, labor, arm, hryear4, region, educ, married, male, black, prtage) %>% filter(arm=="involuntary exit" | arm=="voluntary exit" | arm=="employed_control")

regression1 <- feols(onlineeduc ~ i(arm) + i(hryear4) + i(educ) + prtage, data = data_regression)
etable(
    regression1)
##                               regression1
## Dependent Var.:                onlineeduc
##                                          
## Constant               0.4133*** (0.0396)
## arm = involuntaryexit   -0.0830* (0.0353)
## arm = voluntaryexit       0.0078 (0.0382)
## hryear4 = 2021         0.0487*** (0.0081)
## educ = 2               0.0950*** (0.0222)
## educ = 3               0.1669*** (0.0246)
## educ = 4               0.2298*** (0.0225)
## educ = 5               0.2997*** (0.0240)
## prtage                -0.0101*** (0.0012)
## _____________________ ___________________
## S.E. type                             IID
## Observations                       12,733
## R2                                0.03716
## Adj. R2                           0.03655
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Probit estimate

probit <- glm(onlineeduc ~i(region) + i(educ) + i(arm) , family = binomial(link = "probit"), data = data_regression)

probit
## 
## Call:  glm(formula = onlineeduc ~ i(region) + i(educ) + i(arm), family = binomial(link = "probit"), 
##     data = data_regression)
## 
## Coefficients:
##            (Intercept)              i(region)1              i(region)2  
##               -0.03477                -0.17154                -0.05077  
##             i(region)3              i(region)4                i(educ)1  
##               -0.12135                      NA                -0.89579  
##               i(educ)2                i(educ)3                i(educ)4  
##               -0.52994                -0.33558                -0.14902  
##               i(educ)5  i(arm)employed_control  i(arm)involuntary exit  
##                     NA                -0.04860                -0.31327  
##   i(arm)voluntary exit  
##                     NA  
## 
## Degrees of Freedom: 12732 Total (i.e. Null);  12723 Residual
##   (14970 observations deleted due to missingness)
## Null Deviance:       15850 
## Residual Deviance: 15460     AIC: 15480

Propensity Score

# Estimate propensity scores

data_regression2 <- df1 %>%
  filter(arm == "involuntary exit" | arm == "involuntary exit" | arm == "voluntary exit" )

# create binary variable for treatment 

data_regression2$inv <- ifelse(data_regression2$arm == "involuntary exit", 1, 0)

propensity_model <- glm(inv ~i(region) + i(educ), family = binomial(link = "probit"), data = data_regression2)

# Extract propensity scores
data_regression2$pscore <- predict(propensity_model, type = "response")

outcome_model <- lm(onlineeduc ~ inv + pscore, data = data_regression2)
summary(outcome_model)
## 
## Call:
## lm(formula = onlineeduc ~ inv + pscore, data = data_regression2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4121 -0.2577 -0.1835 -0.1104  0.8896 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.59856    0.13839   4.325 2.06e-05 ***
## inv         -0.09818    0.04815  -2.039   0.0423 *  
## pscore      -0.56578    0.25260  -2.240   0.0258 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 311 degrees of freedom
##   (363 observations deleted due to missingness)
## Multiple R-squared:  0.03421,    Adjusted R-squared:  0.028 
## F-statistic: 5.508 on 2 and 311 DF,  p-value: 0.004459