Loading R library

library(tidyverse)
library(kableExtra)
library(broom)
library(texreg)
library(patchwork)
library(readr)
library(lubridate)
library(boot)
library(stargazer)
library(fixest)
library(pander)
library(MatchIt)

The data is from the Current Population Survey (CPS) from 2019 to 2021

Loading Dataset

df_2019 <- read.csv("nov19pub.csv")
df_2021 <- read.csv("nov21pub.csv")

names(df_2019) <- tolower(names(df_2019))
names(df_2021) <- tolower(names(df_2021))

n_raw_2019 <- as.character(nrow(df_2019))
n_raw_2021 <- as.character(nrow(df_2021))

n_unique_2019 <- as.character(length(unique(df_2019$hrhhid)))
n_unique_2021 <- as.character(length(unique(df_2021$hrhhid)))

The raw data contains 138850 and 127375 observations for 2019 and 2021, respectively.
The unique household IDs are 55107 and 54184 for 2019 and 2021, respectively.

Checking age variable

Variable prtage indicates the age of members of a household, which is uniquely identified by variable hrhhid
Filter to people between 18 to 40.

df_2019 <- df_2019 %>% filter(prtage > 21 & prtage < 35)

n_row_agerestrict_2019 <- as.character(nrow(df_2019))


df_2021 <- df_2021 %>% filter(prtage> 21 & prtage < 35)

n_row_agerestrict_2021 <- as.character(nrow(df_2021))

After restricting to age between 18 to 40, we have 18594 observations for 2019 and 15925 observations for 2021.

Append two dataset together

df_2019_s <- df_2019 %>% select(hrhhid, hryear4, pemlr, pruntype, gereg, pemaritl, peeduca, peedtrai, pesex, ptdtrace, gtmetsta, prtage) 

df_2021_s <- df_2021 %>% select(hrhhid, hryear4, pemlr, pruntype, gereg, pemaritl, peeduca, peedtrai, pesex, ptdtrace, gtmetsta, prtage) 

df <- rbind(df_2019_s, df_2021_s)

Checking employment variable

df %>% names()

##  [1] "hrhhid"   "hryear4"  "pemlr"    "pruntype" "gereg"    "pemaritl"
##  [7] "peeduca"  "peedtrai" "pesex"    "ptdtrace" "gtmetsta" "prtage"

df %>% count(hryear4)

##   hryear4     n
## 1    2019 18594
## 2    2021 15925

df %>% group_by(hryear4) %>% count(pemlr)

## # A tibble: 16 × 3
## # Groups:   hryear4 [2]
##    hryear4 pemlr     n
##      <int> <int> <int>
##  1    2019    -1   205
##  2    2019     1 14357
##  3    2019     2   318
##  4    2019     3    54
##  5    2019     4   514
##  6    2019     5   108
##  7    2019     6   458
##  8    2019     7  2580
##  9    2021    -1   152
## 10    2021     1 12009
## 11    2021     2   342
## 12    2021     3    46
## 13    2021     4   479
## 14    2021     5   137
## 15    2021     6   407
## 16    2021     7  2353

df %>% group_by(pemlr) %>% count(pruntype)

## # A tibble: 12 × 3
## # Groups:   pemlr [8]
##    pemlr pruntype     n
##    <int>    <int> <int>
##  1    -1       -1   357
##  2     1       -1 26366
##  3     2       -1   660
##  4     3        1   100
##  5     4        2   273
##  6     4        3   124
##  7     4        4   180
##  8     4        5   355
##  9     4        6    61
## 10     5       -1   245
## 11     6       -1   865
## 12     7       -1  4933

Decision made here to set labor = employed, unemployed_layoff, and unemployed_looking

df$arm <-df$pruntype
df$arm[df$pruntype==-1]<-"employed_control" 
df$arm[df$pruntype==1 | df$pruntype==2] <-"involuntary exit"
df$arm[df$pruntype==3 | df$pruntype==4] <-"voluntary exit"
df$arm[df$pruntype==5] <-"re-entrants"
df$arm[df$pruntype==6] <-"new-entrants"

df$labor <-df$pemlr
df$labor[df$labor==1 | df$labor==2] <- "employed"
df$labor[df$labor==5| df$labor==6 | df$labor==7] <-"not in LB"
df$labor[df$labor==3] <-"unemployed_layoff"
df$labor[df$labor==4] <-"unemployed_looking"

df %>% group_by(arm) %>% count(labor) %>%
kable("html", col.names=(c("Treatment Arm", "Labor Force Status", "Count"))) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>% scroll_box(width = "100%")

Treatment Arm	Labor Force Status	Count
employed_control	-1	357
employed_control	employed	27026
employed_control	not in LB	6043
involuntary exit	unemployed_layoff	100
involuntary exit	unemployed_looking	273
new-entrants	unemployed_looking	61
re-entrants	unemployed_looking	355
voluntary exit	unemployed_looking	304

# 1. Region (assuming gereg is already present in df)
df$region <- df$gereg

# 2. Marital Status
df$married <- 0
df$married[df$pemaritl == 1 | df$pemaritl == 2] <- 1

# 3. Gender: Male binary
df$male <- ifelse(df$pesex == 1, 1, 0)

# 4. Education Status
df$educ <- 1
df$educ[df$peeduca %in% 31:38] <- 1
df$educ[df$peeduca %in% 39:40] <- 2
df$educ[df$peeduca %in% 41:42] <- 3
df$educ[df$peeduca == 43] <- 4
df$educ[df$peeduca %in% 44:46] <- 5
table(df$educ) # Equivalent to tab educ in Stata

## 
##     1     2     3     4     5 
##  2053 15831  3513  9519  3603

# 5. Race
df$race <- 0
df$race[df$ptdtrace == 1] <- 1
df$race[df$ptdtrace == 2] <- 2
df$race[df$ptdtrace == 4] <- 3
df$race[df$race == 0] <- 4
table(df$race) # Equivalent to tab race in Stata

## 
##     1     2     3     4 
## 26805  3748  2340  1626

df$white <- ifelse(df$race == 1, 1, 0)
df$black <- ifelse(df$race == 2, 1, 0)

# 6. Metropolitan Status
df$metro <- ifelse(df$gtmetsta == 1, 1, 0)

df1 <- df %>% 
  filter(labor=="employed" | labor == "unemployed_layoff" | labor=="unemployed_looking")

df1 %>%
  group_by(arm) %>%
  count(labor)

## # A tibble: 6 × 3
## # Groups:   arm [5]
##   arm              labor                  n
##   <chr>            <chr>              <int>
## 1 employed_control employed           27026
## 2 involuntary exit unemployed_layoff    100
## 3 involuntary exit unemployed_looking   273
## 4 new-entrants     unemployed_looking    61
## 5 re-entrants      unemployed_looking   355
## 6 voluntary exit   unemployed_looking   304

df1 %>% 
  group_by(hryear4) %>%
  count(arm)

## # A tibble: 10 × 3
## # Groups:   hryear4 [2]
##    hryear4 arm                  n
##      <int> <chr>            <int>
##  1    2019 employed_control 14675
##  2    2019 involuntary exit   197
##  3    2019 new-entrants        36
##  4    2019 re-entrants        177
##  5    2019 voluntary exit     158
##  6    2021 employed_control 12351
##  7    2021 involuntary exit   176
##  8    2021 new-entrants        25
##  9    2021 re-entrants        178
## 10    2021 voluntary exit     146

#Proportion of involuntary unemployed is 1.27% in 2019 and 1.52% in 2021.

df1 %>%
  count(peedtrai)

##   peedtrai     n
## 1       -1 15217
## 2        1  4047
## 3        2  8855

df1$onlineeduc[df1$peedtrai==1]  = 1 
df1$onlineeduc[df1$peedtrai==2] = 0

df1 %>%
  count(onlineeduc)

##   onlineeduc     n
## 1          0  8855
## 2          1  4047
## 3         NA 15217

data_regression <-df1 %>%
  select(onlineeduc, labor, arm, hryear4, region, educ, married, male, black, prtage) %>% filter(arm=="involuntary exit" | arm=="voluntary exit" | arm=="employed_control")

regression1 <- feols(onlineeduc ~ i(arm) + i(hryear4) + i(educ) + prtage, data = data_regression)
etable(
    regression1)

##                               regression1
## Dependent Var.:                onlineeduc
##                                          
## Constant               0.4133*** (0.0396)
## arm = involuntaryexit   -0.0830* (0.0353)
## arm = voluntaryexit       0.0078 (0.0382)
## hryear4 = 2021         0.0487*** (0.0081)
## educ = 2               0.0950*** (0.0222)
## educ = 3               0.1669*** (0.0246)
## educ = 4               0.2298*** (0.0225)
## educ = 5               0.2997*** (0.0240)
## prtage                -0.0101*** (0.0012)
## _____________________ ___________________
## S.E. type                             IID
## Observations                       12,733
## R2                                0.03716
## Adj. R2                           0.03655
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Probit estimate

probit <- glm(onlineeduc ~i(region) + i(educ) + i(arm) , family = binomial(link = "probit"), data = data_regression)

probit

## 
## Call:  glm(formula = onlineeduc ~ i(region) + i(educ) + i(arm), family = binomial(link = "probit"), 
##     data = data_regression)
## 
## Coefficients:
##            (Intercept)              i(region)1              i(region)2  
##               -0.03477                -0.17154                -0.05077  
##             i(region)3              i(region)4                i(educ)1  
##               -0.12135                      NA                -0.89579  
##               i(educ)2                i(educ)3                i(educ)4  
##               -0.52994                -0.33558                -0.14902  
##               i(educ)5  i(arm)employed_control  i(arm)involuntary exit  
##                     NA                -0.04860                -0.31327  
##   i(arm)voluntary exit  
##                     NA  
## 
## Degrees of Freedom: 12732 Total (i.e. Null);  12723 Residual
##   (14970 observations deleted due to missingness)
## Null Deviance:       15850 
## Residual Deviance: 15460     AIC: 15480

Propensity Score

# Estimate propensity scores

data_regression2 <- df1 %>%
  filter(arm == "involuntary exit" | arm == "involuntary exit" | arm == "voluntary exit" )

# create binary variable for treatment 

data_regression2$inv <- ifelse(data_regression2$arm == "involuntary exit", 1, 0)

propensity_model <- glm(inv ~i(region) + i(educ), family = binomial(link = "probit"), data = data_regression2)

# Extract propensity scores
data_regression2$pscore <- predict(propensity_model, type = "response")

outcome_model <- lm(onlineeduc ~ inv + pscore, data = data_regression2)
summary(outcome_model)

## 
## Call:
## lm(formula = onlineeduc ~ inv + pscore, data = data_regression2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4121 -0.2577 -0.1835 -0.1104  0.8896 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.59856    0.13839   4.325 2.06e-05 ***
## inv         -0.09818    0.04815  -2.039   0.0423 *  
## pscore      -0.56578    0.25260  -2.240   0.0258 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 311 degrees of freedom
##   (363 observations deleted due to missingness)
## Multiple R-squared:  0.03421,    Adjusted R-squared:  0.028 
## F-statistic: 5.508 on 2 and 311 DF,  p-value: 0.004459

Online Learning in the Face of Unemployment

Kiet Le and Octavio Aguilar

2024-01-22

Loading R library

Loading Dataset

Checking age variable

Append two dataset together

Checking employment variable

Probit estimate

Propensity Score