library(car)
library(stargazer)
library(survey)
library(ggplot2)
library(pander)
library(dplyr)
library(knitr)
library(tidyverse)
library(readr)
ds1 <- read.csv("C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00015.csv")
nams<-names(ds1)
head(nams, n=10)
##  [1] "YEAR"     "SERIAL"   "MONTH"    "HWTFINL"  "CPSID"    "STATEFIP"
##  [7] "FAMINC"   "PERNUM"   "WTFINL"   "CPSIDP"
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(ds1)<-newnames

#summary(ds1)

Recode Data

library(questionr)
library(dplyr)

ds2<-ds1%>%
  mutate(
          date = ISOdate(year,month,day = 1),
          covid = as.factor(if_else(year==2020 & month>=4 | year==2021 & month<4,1,0)),
          covidint = as.factor(if_else(year==2019 & month<=9,"1precov_Q32019",
                                       if_else(year==2019 & month>=10,"2precov_Q42019",
                                       if_else(year==2020 & month<=3,"3precov_Q12020",
                                       if_else(year==2020 & month>=4 & month<=6,"4earlycov_Q22020",
                                       if_else(year==2020 & month>=7 & month<=9,"5midcov_Q32020",
                                       if_else(year==2020 & month>=10,"6latecov_Q42020",
                                       if_else(year==2021 & month<=3,"7latercov_Q12021", "NA")))))))),
                    covidint=relevel(covidint, ref = "3precov_Q12020"),

          covid19unaw = Recode(covidunaw, recodes = "1='0'; 2= '1'; else=NA", as.factor=T),

            hhinc   =Recode(faminc,recodes="100:490='1_lt15k';500:600='2_15-25k';710:720='3_25-35k';
                          730:740='4_35-50k';810:830='5_50k-75k';840:843='6_75kplus';999='NA'", as.factor = T),
          
          hhinc2=Recode(faminc,recodes="100:490=1;500:600=2;710:720=3;
                          730:740=4;810:830=5;840:843=6;999=NA", as.factor = T),
          hhinc2=relevel(hhinc2, ref = "1"),
          hhincnum=car::Recode(faminc,recodes="100:490=1;500:600=2;710:720=3;
                          730:740=4;810:830=5;840:843=6;999=NA", as.factor = F),
          
          
            agegrp=cut(age, breaks=c(16,24,34,44,54,64)),
          agegrp=relevel(agegrp, ref = "(44,54]"),
          male=Recode(sex,recodes="2='0';1='1';9=NA", as.factor = T),
          male=relevel(male, ref = "1"),
          raceeth=as.factor(if_else(race==100 & hispan==000,"NH_wht",
                          if_else(race==200 & hispan==000, "NH_blk",
                          if_else(!race%in%c('100','200','999') & hispan==000, "NH_other",
                          if_else(!hispan%in%c('000','901','902'), "Hisp","NA"))))
                          ),
          raceeth=relevel(raceeth, ref = "NH_wht"),
            employst=Recode( empstat,recodes="1='Mil';10='emp';12='emp_nwklwk';20:22='unemp';32='NILF_utw';34='NILF_oth';36='NILF_ret'; else=NA" , as.factor = T),
          empstatus=Recode( empstat,recodes="1:12='emp';20:22='ue';0= NA;else='NILF'", as.factor=T),
          unemp=Recode( empstat,recodes="1:12='0';20:22='1';0= NA;else='0'", as.factor=T),
          nilf=Recode( empstat,recodes="1:12='0';20:22='0';0= NA;else='1'", as.factor=T),

            industry=Recode( ind,recodes="770='const';1070:3990='manuf';4070:4590='whsale';4670:5790='retail';6070:6390='trans';570:690='utl';
                             6470:6672='media_ent';6870:6992='bnk_ins';7071:7072='realest';7860:7890='Teach';7970:8290='Health';8670:8690='rstrnt_svc';8770:8891='repair_maint';8970:8990='perscare';9160:9390='gen_supprt';9470='crimjust'; else=NA", as.factor = T ),
          ind.cd =recode.na(ind,0,as.factor = T),
            selfemp=Recode( classwkr,recodes="10:14='1';20:29='0';else=NA",as.factor=T ),
            uedurwks    =Recode( durunem2,recodes="0:3='lt4wks';4:7='4-10wks';8:10='11-22wks';11:15='23-52wks';16='gt52wks'; else=NA", as.factor = T),
            whyue   =Recode( whyunemp,recodes="1='templayoff';2='jobloss';3='tempwk_end';4='leftjob';5='re_ent';6='new_ent'; else=NA" , as.factor = T),
            whypt   =Recode( whyptlwk,recodes="60='onlyfindpt';80='ftdown';100:101='health_med';111='persday';120:122='chcare_fam';
                           123='sch';130='oth'; else=NA" , as.factor = T),
            underemp=Recode( wkstat,recodes="12='1'; 13='1'; 21='1';42='1';99=NA; else='0'" , as.factor=T),
            sameemp=Recode( empsame,recodes="2='1';1='0';else=NA", as.factor=T ),
            offerwrk=Recode( wrkoffer,recodes= "2='0';1='1';else=NA", as.factor=T),
            paidhourly=Recode( paidhour,recodes="2='1';1='0';else=NA", as.factor=T),
            unioncover=Recode( union,recodes="2:3='1';1='0';else=NA", as.factor=T),
            earnerstud=as.factor(x=eligorg),
            ottippay=Recode( otpay,recodes="2='1';1='0';else=NA", as.factor=T),
          earnweekna=recode.na(earnweek,9999.99,as.numeric = T),
          uhrswrk=recode.na(uhrsworkorg,998:999,as.numeric = T),
          earnyear=earnweekna*52,
          relate.f = as.factor(x=relate),
          cpsidp = as.factor(cpsidp)
         )
## Recoded 22663 values to NA.
## Recoded variable contains only numeric characters. Consider using as.numeric = TRUE.
## Recoded 65562 values to NA.
## Recoded 76775 values to NA.
earner<-ds2%>%
  filter(earnerstud==1 ) %>% 
    select(date,year,month,earnyear, earnwt, earnweekna, covidint,covid, agegrp, male, raceeth, industry,paidhourly,ottippay)%>%   
  filter(complete.cases(.))
# outcomes~ earnyear

HHinc<-ds2%>%
  filter(earnerstud==0 &  relate.f==101) %>% 
    select(date,cpsidp, wtfinl, hwtfinl,hhinc2,hhincnum,  hhinc,covid19unaw,empstatus,uedurwks,whyue,whypt,underemp, covidint, agegrp, male, raceeth, industry,ind.cd, selfemp,sameemp,unemp,nilf)
# %>%  filter(complete.cases(.))
# outcomes~ hhinc;empstatus;uedurwks;underemp    
    
emp<-ds2%>%
  filter(earnerstud==0) %>% 
    select(date,cpsidp, wtfinl, hwtfinl,hhinc2,hhincnum,  hhinc,covid19unaw,empstatus,uedurwks,whyue,whypt,underemp, covidint, agegrp, male, raceeth, industry,ind.cd, selfemp,sameemp,unemp,nilf)

#summary(earner)
#summary(HHinc)
#summary(emp)

plot(density(earner$earnweekna))

options(survey.lonely.psu = "adjust")
earner.des<-svydesign(ids=~1, weights=~earnwt, data =earner )
HHinc.des<-svydesign(ids=~1, weights=~hwtfinl, data =HHinc )
emp.des<-svydesign(ids=~1, weights=~wtfinl, data =emp )
fit.logit1<-svyglm(earnweekna~covidint,design= earner.des, family=Gamma(link = "log"))

fit.logit2<-svyglm(earnweekna~covidint+raceeth,design= earner.des, family=Gamma(link = "log"))

fit.logit3<-svyglm(earnweekna~covidint+raceeth+agegrp,design= earner.des, family=Gamma(link = "log"))

fit.logit4<-svyglm(earnweekna~covidint+raceeth+agegrp+male,design= earner.des, family=Gamma(link = "log"))

fit.logit5<-svyglm(earnweekna~covidint+raceeth+agegrp+male+industry,design= earner.des, family=Gamma(link = "log"))

fit.logit6<-svyglm(earnweekna~covidint+raceeth+agegrp+male+industry+paidhourly,design= earner.des, family=Gamma(link = "log"))

fit.logit7<-svyglm(earnweekna~covidint+raceeth+agegrp+male+industry+paidhourly+ottippay,design= earner.des, family=Gamma(link = "log"))


# summary(fit.logit1)
# summary(fit.logit2)
# summary(fit.logit3)
# summary(fit.logit4)
# summary(fit.logit5)
# summary(fit.logit6)
summary(fit.logit7)
## 
## Call:
## svyglm(formula = earnweekna ~ covidint + raceeth + agegrp + male + 
##     industry + paidhourly + ottippay, design = earner.des, family = Gamma(link = "log"))
## 
## Survey design:
## svydesign(ids = ~1, weights = ~earnwt, data = earner)
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               7.376429   0.019859 371.445  < 2e-16 ***
## covidint1precov_Q32019    0.002844   0.016646   0.171 0.864346    
## covidint2precov_Q42019   -0.001840   0.015953  -0.115 0.908184    
## covidint4earlycov_Q22020  0.031869   0.017242   1.848 0.064591 .  
## covidint5midcov_Q32020    0.052610   0.017258   3.048 0.002307 ** 
## covidint6latecov_Q42020   0.008796   0.016048   0.548 0.583615    
## covidint7latercov_Q12021  0.004886   0.019262   0.254 0.799779    
## raceethHisp              -0.105268   0.010643  -9.891  < 2e-16 ***
## raceethNH_blk            -0.069139   0.015322  -4.512 6.49e-06 ***
## raceethNH_other          -0.004061   0.018967  -0.214 0.830476    
## agegrp(16,24]            -0.497231   0.019721 -25.214  < 2e-16 ***
## agegrp(24,34]            -0.094043   0.012570  -7.481 8.01e-14 ***
## agegrp(34,44]            -0.017770   0.012144  -1.463 0.143420    
## agegrp(54,64]            -0.039183   0.013130  -2.984 0.002850 ** 
## male0                    -0.138469   0.010431 -13.275  < 2e-16 ***
## industryconst            -0.129966   0.019987  -6.502 8.31e-11 ***
## industrycrimjust         -0.025630   0.027018  -0.949 0.342835    
## industrygen_supprt       -0.225474   0.030370  -7.424 1.23e-13 ***
## industryHealth           -0.068176   0.017678  -3.856 0.000116 ***
## industrymanuf            -0.070687   0.017492  -4.041 5.36e-05 ***
## industrymedia_ent        -0.122911   0.052895  -2.324 0.020163 *  
## industryperscare         -0.544242   0.066503  -8.184 3.11e-16 ***
## industryrealest          -0.140771   0.048362  -2.911 0.003614 ** 
## industryrepair_maint     -0.170715   0.034781  -4.908 9.34e-07 ***
## industryretail           -0.289303   0.019047 -15.189  < 2e-16 ***
## industryrstrnt_svc       -0.512016   0.026336 -19.442  < 2e-16 ***
## industryTeach            -0.193188   0.017873 -10.809  < 2e-16 ***
## industrytrans            -0.132454   0.022021  -6.015 1.87e-09 ***
## industryutl               0.029384   0.028545   1.029 0.303328    
## industrywhsale           -0.166577   0.024146  -6.899 5.59e-12 ***
## paidhourly1              -0.371951   0.010017 -37.130  < 2e-16 ***
## ottippay1                 0.230499   0.012949  17.801  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.1875096)
## 
## Number of Fisher Scoring iterations: 5
library(broom)
fit.logit7%>%
  tidy()%>%
    mutate(OR = exp(estimate))%>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value OR
(Intercept) 7.376 0.020 371.445 0.000 1597.874
covidint1precov_Q32019 0.003 0.017 0.171 0.864 1.003
covidint2precov_Q42019 -0.002 0.016 -0.115 0.908 0.998
covidint4earlycov_Q22020 0.032 0.017 1.848 0.065 1.032
covidint5midcov_Q32020 0.053 0.017 3.048 0.002 1.054
covidint6latecov_Q42020 0.009 0.016 0.548 0.584 1.009
covidint7latercov_Q12021 0.005 0.019 0.254 0.800 1.005
raceethHisp -0.105 0.011 -9.891 0.000 0.900
raceethNH_blk -0.069 0.015 -4.512 0.000 0.933
raceethNH_other -0.004 0.019 -0.214 0.830 0.996
agegrp(16,24] -0.497 0.020 -25.214 0.000 0.608
agegrp(24,34] -0.094 0.013 -7.481 0.000 0.910
agegrp(34,44] -0.018 0.012 -1.463 0.143 0.982
agegrp(54,64] -0.039 0.013 -2.984 0.003 0.962
male0 -0.138 0.010 -13.275 0.000 0.871
industryconst -0.130 0.020 -6.502 0.000 0.878
industrycrimjust -0.026 0.027 -0.949 0.343 0.975
industrygen_supprt -0.225 0.030 -7.424 0.000 0.798
industryHealth -0.068 0.018 -3.856 0.000 0.934
industrymanuf -0.071 0.017 -4.041 0.000 0.932
industrymedia_ent -0.123 0.053 -2.324 0.020 0.884
industryperscare -0.544 0.067 -8.184 0.000 0.580
industryrealest -0.141 0.048 -2.911 0.004 0.869
industryrepair_maint -0.171 0.035 -4.908 0.000 0.843
industryretail -0.289 0.019 -15.189 0.000 0.749
industryrstrnt_svc -0.512 0.026 -19.442 0.000 0.599
industryTeach -0.193 0.018 -10.809 0.000 0.824
industrytrans -0.132 0.022 -6.015 0.000 0.876
industryutl 0.029 0.029 1.029 0.303 1.030
industrywhsale -0.167 0.024 -6.899 0.000 0.847
paidhourly1 -0.372 0.010 -37.130 0.000 0.689
ottippay1 0.230 0.013 17.801 0.000 1.259
AIC(fit.logit1)
##        eff.p          AIC     deltabar 
##    1.5838471 3572.3839365    0.2639745
AIC(fit.logit2)
##        eff.p          AIC     deltabar 
##    2.3538283 3446.4652372    0.2615365
AIC(fit.logit3)
##        eff.p          AIC     deltabar 
##    3.2611366 2997.3715314    0.2508567
AIC(fit.logit4)
##        eff.p          AIC     deltabar 
##    3.4640094 2933.5394101    0.2474292
AIC(fit.logit5)
##       eff.p         AIC    deltabar 
##    6.596457 2752.426991    0.227464
AIC(fit.logit6)
##        eff.p          AIC     deltabar 
##    6.3122839 2515.8024000    0.2104095
AIC(fit.logit7) #************* best model with lowest AIC 
##        eff.p          AIC     deltabar 
##    6.2626055 2449.8917072    0.2020195