What is the likelihood of experiencing joblessness due to the Covid-19 pandemic by sex and educational attainment?
library(readr)
cps1 <- read_csv("C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/HW/HW3/cps_00008.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## YEAR = col_double(),
## SERIAL = col_double(),
## MONTH = col_double(),
## CPSID = col_double(),
## PERNUM = col_double(),
## WTFINL = col_double(),
## CPSIDP = col_double(),
## SEX = col_double(),
## LABFORCE = col_double(),
## EDUC99 = col_double(),
## EDCYC = col_double(),
## COVIDUNAW = col_double()
## )
cps2<-cps1[sample(nrow(cps1), 10000), ]
#The names in the data are very ugly, so I make them less ugly
nams<-names(cps2)
head(nams, n=10)
## [1] "YEAR" "SERIAL" "MONTH" "CPSID" "PERNUM" "WTFINL"
## [7] "CPSIDP" "SEX" "LABFORCE" "EDUC99"
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(cps2)<-newnames
#Unable to work due to COVID
cps2$covdunaw<-Recode(cps2$covidunaw, recodes="2=1; 1=0; else=NA")
#sex
cps2$female<- Recode(cps2$sex, recodes="2=1; 1=0; else=NA")
#education level
cps2$edatt<-Recode(cps2$educ99,
recodes="1:5='0to8'; 6:9='1somehs'; 10='2hsgrad'; 11='3somecol'; 12:15='4undgrad'; 16:18='5advgrad' ;else=NA",
as.factor=T)
First, we will do some descriptive analysis, such as means and cross tabulations.
sub<-cps2 %>%
select(covdunaw,female, edatt,wtfinl) %>%
filter( complete.cases( . ))
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids= ~1,
weights= ~wtfinl,
data = sub )
First, we examine the % of US adults with with COVID related unemployment by Education level, and do a survey-corrected chi-square test for independence.
cat<-svyby(formula = ~covdunaw,
by = ~edatt,
design = des,
FUN = svymean,
na.rm=T)
svychisq(~covdunaw+edatt,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~covdunaw + edatt, design = des)
## F = 3.6511, ndf = 4.9545, ddf = 39933.3413, p-value = 0.002738
library(ggplot2)
cat%>%
ggplot()+
geom_point(aes(x=edatt,y=covdunaw))+
geom_errorbar(aes(x=edatt, ymin = covdunaw-1.96*se,
ymax= covdunaw+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults not working due to COVID by Education",
caption = "Source: IPUMS CPS, 2020 \n Calculations by David R. Rodriguez, MA.",
x = "Education",
y = "% Not working due to COVID shutdown")+
theme_minimal()
dog<-svyby(formula = ~covdunaw,
by = ~female,
design = des,
FUN = svymean,
na.rm=T)
svychisq(~covdunaw+female,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~covdunaw + female, design = des)
## F = 0.63052, ndf = 1, ddf = 8060, p-value = 0.4272
dog%>%
ggplot()+
geom_point(aes(x=female,y=covdunaw))+
geom_errorbar(aes(x=female, ymin = covdunaw-1.96*se,
ymax= covdunaw+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults not working due to COVID by Sex",
caption = "Source: IPUMS CPS, 2020 \n Calculations by David R. Rodriguez, MA.",
x = "Sex",
y = "% Not working due to COVID shutdown")+
theme_minimal()
catdog<-svyby(formula = ~covdunaw,
by = ~female+edatt,
design = des,
FUN = svymean,
na.rm=T)
#this plot is a little more complicated, but facet_wrap() plots separate plots for groups
catdog%>%
ggplot()+
geom_point(aes(x=edatt, y = covdunaw))+
geom_errorbar(aes(x=edatt, ymin = covdunaw-1.96*se,
ymax= covdunaw+1.96*se),
width=.25)+
facet_wrap(~ female, nrow = 3)+
labs(title = "Percent % of US Adults not working due to COVID by Sex and education",
caption = "Source: IPUMS CPS, 2020 \n Calculations by David R. Rodriguez, MA.",
x = "Sex",
x = "Education",
y = "% Not working due to COVID shutdown")+
theme_minimal()
To fit the model, we use svyglm()
#Logit model
fit.logit<-svyglm(covdunaw ~ female + edatt,
design = des,
family = binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
library(broom)
fit.logit%>%
tidy()%>%
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | -2.216 | 0.288 | -7.688 | 0.000 |
| female | 0.068 | 0.092 | 0.742 | 0.458 |
| edatt1somehs | -0.596 | 0.344 | -1.730 | 0.084 |
| edatt2hsgrad | 0.067 | 0.294 | 0.227 | 0.821 |
| edatt3somecol | -0.039 | 0.303 | -0.130 | 0.897 |
| edatt4undgrad | 0.059 | 0.292 | 0.201 | 0.841 |
| edatt5advgrad | -0.446 | 0.318 | -1.403 | 0.161 |
The most straighforward way to get odds ratios is to exponentiate the \(\beta\) parameters. These are stored in the coefficients(fit.logit)
fit.logit%>%
tidy()%>%
mutate(OR = exp(estimate))%>%
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value | OR |
|---|---|---|---|---|---|
| (Intercept) | -2.216 | 0.288 | -7.688 | 0.000 | 0.109 |
| female | 0.068 | 0.092 | 0.742 | 0.458 | 1.070 |
| edatt1somehs | -0.596 | 0.344 | -1.730 | 0.084 | 0.551 |
| edatt2hsgrad | 0.067 | 0.294 | 0.227 | 0.821 | 1.069 |
| edatt3somecol | -0.039 | 0.303 | -0.130 | 0.897 | 0.961 |
| edatt4undgrad | 0.059 | 0.292 | 0.201 | 0.841 | 1.060 |
| edatt5advgrad | -0.446 | 0.318 | -1.403 | 0.161 | 0.640 |
And we can get simple confidence intervals for these esimates using confint() which will work for most models.
fit.logit%>%
tidy()%>%
mutate(OR = exp(estimate),
LowerOR_Ci = exp(estimate - 1.96*std.error),
UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value | OR | LowerOR_Ci | UpperOR_Ci |
|---|---|---|---|---|---|---|---|
| (Intercept) | -2.216 | 0.288 | -7.688 | 0.000 | 0.109 | 0.062 | 0.192 |
| female | 0.068 | 0.092 | 0.742 | 0.458 | 1.070 | 0.894 | 1.281 |
| edatt1somehs | -0.596 | 0.344 | -1.730 | 0.084 | 0.551 | 0.281 | 1.082 |
| edatt2hsgrad | 0.067 | 0.294 | 0.227 | 0.821 | 1.069 | 0.601 | 1.902 |
| edatt3somecol | -0.039 | 0.303 | -0.130 | 0.897 | 0.961 | 0.531 | 1.742 |
| edatt4undgrad | 0.059 | 0.292 | 0.201 | 0.841 | 1.060 | 0.598 | 1.880 |
| edatt5advgrad | -0.446 | 0.318 | -1.403 | 0.161 | 0.640 | 0.343 | 1.194 |
A sligtly more digestible form can be obtained from the sjPlot library. In this plot, if the error bars overlap 1, the effects are not statistically significant.
library(sjPlot)
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
plot_model(fit.logit,
title = "Odds ratios for Not Working due to COVID shut down")
#get a series of predicted probabilities for different "types" of people for each model
#ref_grid will generate all possible combinations of predictors from a model
library(emmeans)
rg<-ref_grid(fit.logit)
marg_logit<-emmeans(object = rg,
specs = c( "female", "edatt"),
type="response" )
knitr::kable(marg_logit, digits = 4)
| female | edatt | prob | SE | df | asymp.LCL | asymp.UCL |
|---|---|---|---|---|---|---|
| 0 | 0to8 | 0.0983 | 0.0256 | Inf | 0.0583 | 0.1609 |
| 1 | 0to8 | 0.1045 | 0.0265 | Inf | 0.0628 | 0.1689 |
| 0 | 1somehs | 0.0567 | 0.0108 | Inf | 0.0389 | 0.0820 |
| 1 | 1somehs | 0.0604 | 0.0115 | Inf | 0.0414 | 0.0874 |
| 0 | 2hsgrad | 0.1044 | 0.0089 | Inf | 0.0882 | 0.1231 |
| 1 | 2hsgrad | 0.1109 | 0.0093 | Inf | 0.0939 | 0.1305 |
| 0 | 3somecol | 0.0949 | 0.0104 | Inf | 0.0764 | 0.1173 |
| 1 | 3somecol | 0.1009 | 0.0111 | Inf | 0.0810 | 0.1249 |
| 0 | 4undgrad | 0.1036 | 0.0086 | Inf | 0.0880 | 0.1216 |
| 1 | 4undgrad | 0.1101 | 0.0086 | Inf | 0.0944 | 0.1281 |
| 0 | 5advgrad | 0.0652 | 0.0096 | Inf | 0.0487 | 0.0868 |
| 1 | 5advgrad | 0.0695 | 0.0096 | Inf | 0.0529 | 0.0909 |
An interesting case is that of Men with only some high school education having the lowest probability (.065) of losing work due to a Covid shutdown. This might be explained by this group being over represented in occupations considered “essential” work.
comps<-as.data.frame(marg_logit)
comps[comps$female=="0" & comps$edatt == "1somehs" , ]
comps[comps$female=="0" & comps$edatt == "2hsgrad" , ]
comps[comps$female=="0" & comps$edatt == "3somecol" , ]
comps[comps$female=="0" & comps$edatt == "4undgrad" , ]
comps[comps$female=="0" & comps$edatt == "5advgrad" , ]