Recode Data
library(questionr)
library(dplyr)
ds2<-ds1%>%
mutate(
covid = as.factor(if_else(year==2020 & month>=4 | year==2021 & month<3,1,0)),
covidint = as.factor(if_else(year==2019 & month<=9,"1precov_Q32019",
if_else(year==2019 & month>=10,"2precov_Q42019",
if_else(year==2020 & month<=2,"3precov_Q12020",
if_else(year==2020 & month>=3 & month<=8,"4earlycov_Q22020",
if_else(year==2020 & month>=9 & month<=12,"5midcov_Q32020",
if_else(year==2021 & month>=1 & month<=3,"6latecov_Q42020","NA"))))))
),
covid19unaw = Recode(covidunaw, recodes = "1='0'; 2= '1'; else=NA", as.factor=T),
hhinc =Recode(faminc,recodes="100:490='1_lt15k';500:600='2_15-25k';710:720='3_25-35k';
730:740='4_35-50k';810:830='5_50k-75k';840:843='6_75kplus';999='NA'", as.factor = T),
agegrp=cut(age, breaks=c(16,24,34,44,54,64)),
male=Recode(sex,recodes="2='0';1='1';9=NA", as.factor = T),
raceeth=as.factor(if_else(race==100 & hispan==000,"NH_wht",
if_else(race==200 & hispan==000, "NH_blk",
if_else(!race%in%c('100','200','999') & hispan==000, "NH_other",
if_else(!hispan%in%c('000','901','902'), "Hisp","NA"))))
),
employst=Recode( empstat,recodes="1='Mil';10='emp';12='emp_nwklwk';20:22='unemp';32='NILF_utw';34='NILF_oth';36='NILF_ret'; else=NA" , as.factor = T),
unemployed=Recode( empstat,recodes="1:12='0';20:22='1';else=NA"),
industry=Recode( ind,recodes="770='const';1070:3990='manuf';4070:4590='whsale';4670:5790='retail';6070:6390='trans';570:690='utl';
6470:6672='media_ent';6870:6992='bnk_ins';7071:7072='realest';7860:7890='Teach';7970:8290='Health';8670:8690='rstrnt_svc';8770:8891='repair_maint';8970:8990='perscare';9160:9390='gen_supprt';9470='crimjust'; else=NA", as.factor = T ),
selfemp=Recode( classwkr,recodes="10:14='1';20:29='0';else=NA" ),
uedurwks =Recode( durunem2,recodes="0:3='lt4wks';4:7='4-10wks';8:10='11-22wks';11:15='23-52wks';16='gt52wks'; else=NA", as.factor = T),
whyue =Recode( whyunemp,recodes="1='templayoff';2='jobloss';3='tempwk_end';4='leftjob';5='re_ent';6='new_ent'; else=NA" , as.factor = T),
whypt =Recode( whyptlwk,recodes="60='onlyfindpt';80='ftdown';100:101='health_med';111='persday';120:122='chcare_fam';
123='sch';130='oth'; else=NA" , as.factor = T),
underemp=Recode( wkstat,recodes="12='1'; 13='1'; 21='1';99=NA; else='0'" ),#reconsider recodes
sameemp=Recode( empsame,recodes="2='1';1='0';else=NA", as.factor=T ),
offerwrk=Recode( wrkoffer,recodes= "2='0';1='1';else=NA"),
paidhourly=Recode( paidhour,recodes="2='1';1='0';else=NA"),
unioncover=Recode( union,recodes="2:3='1';1='0';else=NA"),
earnerstud=as.factor(x=eligorg),
ottippay=Recode( otpay,recodes="2='1';1='0';else=NA"),
earnweekna=recode.na(earnweek,9999.99,as.numeric = T),
earnyear=earnweek*52
)
## Recoded 65562 values to NA.
ds3<-ds2%>%
filter(earnerstud==1 )
#select(earnwt, earnweekna, hhinc, covidint, agegrp, male, raceeth, industry)
ds3in <- ds3%>%
filter(sameemp==1 )
ds3out <- ds3%>%
filter(sameemp==0 )
set.seed(410)
samp<-sample(1:dim(ds3in)[1], size = 228) #smaller sample from group with same employer
ds3inbind<-ds3in[samp,]
ds4<-rbind(ds3out,ds3inbind)%>%
select(wtfinl,earnwt, earnweekna, sameemp, covidint,age, agegrp, male, raceeth, industry)
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
gam1<-gam(sameemp~earnweekna+age+covidint+male+raceeth+industry,
data=ds4,
weights = wtfinl,
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(gam1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## sameemp ~ earnweekna + age + covidint + male + raceeth + industry
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.065e-01 9.798e-03 -92.518 < 2e-16 ***
## earnweekna 5.855e-04 3.983e-06 147.009 < 2e-16 ***
## age 2.914e-02 1.433e-04 203.335 < 2e-16 ***
## covidint2precov_Q42019 -4.795e-01 5.739e-03 -83.548 < 2e-16 ***
## covidint3precov_Q12020 -1.842e-01 6.743e-03 -27.313 < 2e-16 ***
## covidint4earlycov_Q22020 -1.380e-01 5.238e-03 -26.352 < 2e-16 ***
## covidint5midcov_Q32020 -3.214e-01 5.843e-03 -55.005 < 2e-16 ***
## covidint6latecov_Q42020 4.376e-01 6.985e-03 62.655 < 2e-16 ***
## male1 6.515e-02 3.900e-03 16.705 < 2e-16 ***
## raceethNH_blk -9.483e-01 6.384e-03 -148.544 < 2e-16 ***
## raceethNH_other -3.121e-01 8.191e-03 -38.104 < 2e-16 ***
## raceethNH_wht -3.079e-01 3.844e-03 -80.081 < 2e-16 ***
## industryconst -1.224e+00 9.011e-03 -135.817 < 2e-16 ***
## industrycrimjust -1.158e+00 1.409e-02 -82.169 < 2e-16 ***
## industrygen_supprt -6.031e-02 1.085e-02 -5.558 2.72e-08 ***
## industryHealth -6.360e-01 8.008e-03 -79.422 < 2e-16 ***
## industrymanuf 4.243e-01 8.763e-03 48.421 < 2e-16 ***
## industrymedia_ent -8.267e-01 1.913e-02 -43.219 < 2e-16 ***
## industryperscare 1.315e+01 8.395e+00 1.566 0.117
## industryrealest -9.851e-01 1.208e-02 -81.548 < 2e-16 ***
## industryrepair_maint 1.051e+00 1.427e-02 73.625 < 2e-16 ***
## industryretail -7.382e-01 7.939e-03 -92.977 < 2e-16 ***
## industryrstrnt_svc -4.870e-01 8.930e-03 -54.537 < 2e-16 ***
## industryTeach -2.967e-01 8.446e-03 -35.128 < 2e-16 ***
## industrytrans -1.578e-01 8.685e-03 -18.165 < 2e-16 ***
## industryutl -4.088e-01 1.259e-02 -32.485 < 2e-16 ***
## industrywhsale 5.461e-01 1.570e-02 34.782 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.06 Deviance explained = 9.98%
## UBRE = 5785.9 Scale est. = 1 n = 359
gam2<-gam(sameemp~s(earnweekna)+s(age)+covidint+male+raceeth+industry,
data=ds4,
weights = wtfinl,
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(gam2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## sameemp ~ s(earnweekna) + s(age) + covidint + male + raceeth +
## industry
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.851946 0.008369 101.803 <2e-16 ***
## covidint2precov_Q42019 -0.444908 0.005927 -75.059 <2e-16 ***
## covidint3precov_Q12020 -0.125121 0.006951 -18.001 <2e-16 ***
## covidint4earlycov_Q22020 -0.201661 0.005469 -36.873 <2e-16 ***
## covidint5midcov_Q32020 -0.249959 0.006088 -41.054 <2e-16 ***
## covidint6latecov_Q42020 0.454445 0.007411 61.318 <2e-16 ***
## male1 0.061196 0.004070 15.037 <2e-16 ***
## raceethNH_blk -0.951292 0.006705 -141.883 <2e-16 ***
## raceethNH_other -0.250881 0.008493 -29.538 <2e-16 ***
## raceethNH_wht -0.328432 0.003976 -82.609 <2e-16 ***
## industryconst -1.524032 0.009479 -160.779 <2e-16 ***
## industrycrimjust -1.189358 0.014742 -80.678 <2e-16 ***
## industrygen_supprt -0.115787 0.011109 -10.423 <2e-16 ***
## industryHealth -0.668139 0.008365 -79.877 <2e-16 ***
## industrymanuf 0.441623 0.009103 48.516 <2e-16 ***
## industrymedia_ent -1.142661 0.019787 -57.748 <2e-16 ***
## industryperscare 18.036281 102.268737 0.176 0.86
## industryrealest -1.033039 0.012606 -81.948 <2e-16 ***
## industryrepair_maint 0.795128 0.014682 54.155 <2e-16 ***
## industryretail -0.887832 0.008256 -107.541 <2e-16 ***
## industryrstrnt_svc -0.515327 0.009392 -54.868 <2e-16 ***
## industryTeach -0.223744 0.008883 -25.188 <2e-16 ***
## industrytrans -0.197969 0.008985 -22.033 <2e-16 ***
## industryutl -0.516301 0.013461 -38.355 <2e-16 ***
## industrywhsale 0.588486 0.017435 33.754 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(earnweekna) 8.945 8.999 38514 <2e-16 ***
## s(age) 8.934 8.999 70939 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0489 Deviance explained = 12.8%
## UBRE = 5604.8 Scale est. = 1 n = 359
According to the model results, the smooth (non-linear) terms are statistically significant. Covid period effects on staying with the same employer show a negative likelihood compared to the reference period.Further, men are more likely to have the same employer; blacks whites and other race/ethnicity are less likely compared to Hispanics; and all but manufacturing, maintenance, and wholesale industries have lower likelihood than bankers. The personal care industry does not appear to have an effect.
anova( gam1, gam2, test="Chisq")
The Chisquare test (LRT) results display a low p-value for the model with smooth terms, indicating the terms should be included in the model.
plot(gam2)


The plot of the model indicates that there is a positive relationship between keeping the same employer and weekly earnings up to $250. After which the relationship flatens and is less prevalent for higher earnings.The relationship with age oscilates but increases slightly overall until the age of 60 where the likelihood of having the same employer greatly increases for individuals older than 60.