library(car)
library(stargazer)
library(survey)
library(ggplot2)
library(pander)
library(dplyr)
library(knitr)
library(tidyverse)
library(readr)
cps_proj <- read_csv("C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   ASECFLAG = col_logical(),
##   COVIDUNAW = col_logical(),
##   COVIDLOOK = col_logical()
## )
## i Use `spec()` for the full column specifications.
## Warning: 33939 parsing failures.
##   row      col           expected actual                                                                                       file
## 33909 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33910 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33911 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33912 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33913 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## ..... ........ .................. ...... ..........................................................................................
## See problems(...) for more details.
#The names in the data are very ugly, so I make them less ugly
nams<-names(cps_proj)
head(nams, n=10)
##  [1] "YEAR"     "SERIAL"   "MONTH"    "HWTFINL"  "CPSID"    "ASECFLAG"
##  [7] "STATEFIP" "COUNTY"   "FAMINC"   "PERNUM"
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(cps_proj)<-newnames

summary(cps_proj)
##       year          serial          month           hwtfinl     
##  Min.   :2019   Min.   :56720   Min.   : 1.000   Min.   : 2336  
##  1st Qu.:2019   1st Qu.:58433   1st Qu.: 3.000   1st Qu.: 3864  
##  Median :2020   Median :59368   Median : 8.000   Median : 4310  
##  Mean   :2020   Mean   :59379   Mean   : 6.962   Mean   : 4557  
##  3rd Qu.:2020   3rd Qu.:60320   3rd Qu.:10.000   3rd Qu.: 4932  
##  Max.   :2021   Max.   :62078   Max.   :12.000   Max.   :21835  
##      cpsid           asecflag          statefip      county     
##  Min.   :2.018e+13   Mode:logical   Min.   :48   Min.   :    0  
##  1st Qu.:2.019e+13   NA's:77641     1st Qu.:48   1st Qu.:    0  
##  Median :2.019e+13                  Median :48   Median :    0  
##  Mean   :2.019e+13                  Mean   :48   Mean   : 7452  
##  3rd Qu.:2.020e+13                  3rd Qu.:48   3rd Qu.:    0  
##  Max.   :2.021e+13                  Max.   :48   Max.   :48485  
##      faminc          pernum           wtfinl          cpsidp         
##  Min.   :100.0   Min.   : 1.000   Min.   :    0   Min.   :2.018e+13  
##  1st Qu.:730.0   1st Qu.: 1.000   1st Qu.: 3968   1st Qu.:2.019e+13  
##  Median :830.0   Median : 2.000   Median : 4476   Median :2.019e+13  
##  Mean   :754.5   Mean   : 1.946   Mean   : 4745   Mean   :2.019e+13  
##  3rd Qu.:842.0   3rd Qu.: 2.000   3rd Qu.: 5191   3rd Qu.:2.020e+13  
##  Max.   :843.0   Max.   :14.000   Max.   :21835   Max.   :2.021e+13  
##       age          sex             race           hispan         empstat    
##  Min.   :16   Min.   :1.000   Min.   :100.0   Min.   :  0.0   Min.   : 1.0  
##  1st Qu.:28   1st Qu.:1.000   1st Qu.:100.0   1st Qu.:  0.0   1st Qu.:10.0  
##  Median :40   Median :2.000   Median :100.0   Median :  0.0   Median :10.0  
##  Mean   :40   Mean   :1.517   Mean   :157.7   Mean   : 66.7   Mean   :17.2  
##  3rd Qu.:52   3rd Qu.:2.000   3rd Qu.:100.0   3rd Qu.:100.0   3rd Qu.:32.0  
##  Max.   :64   Max.   :2.000   Max.   :814.0   Max.   :612.0   Max.   :36.0  
##       ind          classwkr        durunem2          whyunemp     
##  Min.   :   0   Min.   : 0.00   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:   0   1st Qu.: 0.00   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median :5180   Median :22.00   Median : 0.0000   Median :0.0000  
##  Mean   :4444   Mean   :15.91   Mean   : 0.3218   Mean   :0.1185  
##  3rd Qu.:7860   3rd Qu.:22.00   3rd Qu.: 0.0000   3rd Qu.:0.0000  
##  Max.   :9890   Max.   :29.00   Max.   :16.0000   Max.   :6.0000  
##     whyptlwk          wnlook          wkstat         empsame     
##  Min.   :  0.00   Min.   :  1.0   Min.   :11.00   Min.   : 1.00  
##  1st Qu.:  0.00   1st Qu.:999.0   1st Qu.:11.00   1st Qu.: 2.00  
##  Median :  0.00   Median :999.0   Median :11.00   Median :99.00  
##  Mean   : 11.78   Mean   :978.2   Mean   :40.32   Mean   :58.89  
##  3rd Qu.:  0.00   3rd Qu.:999.0   3rd Qu.:99.00   3rd Qu.:99.00  
##  Max.   :130.00   Max.   :999.0   Max.   :99.00   Max.   :99.00  
##     wrkoffer         earnwt         hourwage          paidhour     
##  Min.   : 1.00   Min.   :    0   Min.   :   1.01   Min.   :0.0000  
##  1st Qu.:99.00   1st Qu.:    0   1st Qu.: 999.99   1st Qu.:0.0000  
##  Median :99.00   Median :    0   Median : 999.99   Median :0.0000  
##  Mean   :98.27   Mean   : 4726   Mean   : 917.63   Mean   :0.2397  
##  3rd Qu.:99.00   3rd Qu.:12776   3rd Qu.: 999.99   3rd Qu.:0.0000  
##  Max.   :99.00   Max.   :65538   Max.   : 999.99   Max.   :2.0000  
##      union           earnweek         uhrsworkorg       eligorg      
##  Min.   :0.0000   Min.   :    0.01   Min.   :  1.0   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 9999.99   1st Qu.:999.0   1st Qu.:0.0000  
##  Median :0.0000   Median : 9999.99   Median :999.0   Median :0.0000  
##  Mean   :0.1667   Mean   : 8604.45   Mean   :944.5   Mean   :0.1558  
##  3rd Qu.:0.0000   3rd Qu.: 9999.99   3rd Qu.:999.0   3rd Qu.:0.0000  
##  Max.   :3.0000   Max.   : 9999.99   Max.   :999.0   Max.   :1.0000  
##      otpay       covidunaw      covidlook     
##  Min.   : 1.00   Mode:logical   Mode:logical  
##  1st Qu.:99.00   TRUE:33325     TRUE:9662     
##  Median :99.00   NA's:44316     NA's:67979    
##  Mean   :83.76                                
##  3rd Qu.:99.00                                
##  Max.   :99.00

Recode variables

library(dplyr)
cps1<-cps_proj%>%
  mutate(
          covid = as.factor(if_else(year==2020 & month>=4 | year==2021 & month<3,1,0)),
          covidint = as.factor(if_else(year==2019 & month<=9,"1precov_Q32019",
                                       if_else(year==2019 & month>=10,"2precov_Q42019",
                                       if_else(year==2020 & month<=2,"3precov_Q12020",
                                       if_else(year==2020 & month>=3 & month<=8,"4earlycov_Q22020",
                                       if_else(year==2020 & month>=9 & month<=12,"5midcov_Q32020",
                                       if_else(year==2021 & month>=1 & month<=3,"6latecov_Q42020","NA"))))))
                                       ),

            hhinc   =Recode(faminc,recodes="100:490='1_lt15k';500:600='2_15-25k';710:720='3_25-35k';
                          730:740='4_35-50k';810:830='5_50k-75k';840:843='6_75kplus';else='NA'", as.factor = T),
            agegrp=cut(age, breaks=c(16,24,34,44,54,64)),
          male=Recode(sex,recodes="2='0';1='1';else='NA'", as.factor = T),
          raceeth=as.factor(if_else(race==100 & hispan==000,"NH_wht",
                          if_else(race==200 & hispan==000, "NH_blk",
                          if_else(!race%in%c('100','200','999') & hispan==000, "NH_other",
                          if_else(!hispan%in%c('000','901','902'), "Hisp","NA"))))
                          ),

            employst=Recode( empstat,recodes="1='Mil';10='emp';12='emp_nwklwk';20:22='unemp';32='NILF_utw';34='NILF_oth';36='NILF_ret'" , as.factor = T),
          unemployed=Recode( empstat,recodes="1:12='0';20:22='1';else=NA"),
            industry=Recode( ind,recodes="770='const';1070:3990='manuf';4070:4590='whsale';4670:5790='retail';6070:6390='trans';570:690='utl';
                             6470:6672='media_ent';6870:6992='bnk_ins';7071:7072='realest';7860:7890='Teach';7970:8290='Health';8670:8690='rstrnt_svc';
                             8770:8891='repair_maint';8970:8990='perscare';9160:9390='gen_supprt';9470='crimjust'; else=NA", as.factor = T ),
            selfemp=Recode( classwkr,recodes="10:14='1';20:29='0';else=NA" ),
            uedurwks    =Recode( durunem2,recodes="0:3='lt4wks';4:7='4-10wks';8:10='11-22wks';11:15='23-52wks';16='gt52wks'; else=NA", as.factor = T),
            whyue   =Recode( whyunemp,recodes="1='templayoff';2='jobloss';3='tempwk_end';4='leftjob';5='re_ent';6='new_ent'; else=NA" , as.factor = T),
            whypt   =Recode( whyptlwk,recodes="60='onlyfindpt';80='ftdown';100:101='health_med';111='persday';120:122='chcare_fam';
                           123='sch';130='oth'; else=NA" , as.factor = T),
            underemp=Recode( wkstat,recodes="12='1'; 13='1'; 21='1';99=NA; else='0'" ),
            sameemp=Recode( empsame,recodes="2='1';1='0';else=NA" ),
            offerwrk=Recode( wrkoffer,recodes= "2='0';1='1';else=NA"),
            paidhourly=Recode( paidhour,recodes="2='1';1='0';else=NA"),
            unioncover=Recode( union,recodes="2:3='1';1='0';else=NA"),
            earnerstud=as.factor(x=eligorg),
            ottippay=Recode( otpay,recodes="2='1';1='0';else=NA"),
          earnyear=earnweek*52
         )


earner<-cps1%>%
   filter(earnerstud==1 & covidint!='NA' & earnweek<2000 & earnweek>100)%>%
    select(year,month,wtfinl,earnwt,hourwage,earnweek,earnyear,covid,covidint, agegrp, male,raceeth,employst,unemployed, industry, selfemp,uedurwks, whyue, whypt, underemp, sameemp, offerwrk,paidhourly,unioncover, ottippay,covidlook,covidunaw)

cps2<-cps1%>%
   filter(complete.cases(agegrp, male,raceeth,employst, industry)& covidint!='NA')%>%
    select(year,month,wtfinl,earnwt,hwtfinl,hhinc,covid,covidint, agegrp, male,raceeth,employst,unemployed, industry, selfemp,uedurwks, whyue, whypt, underemp, sameemp, offerwrk,paidhourly,unioncover, ottippay,covidunaw,covidlook)

summary(earner)
##       year          month            wtfinl          earnwt     
##  Min.   :2019   Min.   : 1.000   Min.   : 2336   Min.   : 9099  
##  1st Qu.:2019   1st Qu.: 3.000   1st Qu.: 3960   1st Qu.:15755  
##  Median :2020   Median : 8.000   Median : 4470   Median :17812  
##  Mean   :2020   Mean   : 7.007   Mean   : 4679   Mean   :18678  
##  3rd Qu.:2020   3rd Qu.:10.000   3rd Qu.: 5094   3rd Qu.:20396  
##  Max.   :2021   Max.   :12.000   Max.   :13531   Max.   :65538  
##                                                                 
##     hourwage          earnweek         earnyear      covid   
##  Min.   :   1.01   Min.   : 101.0   Min.   :  5251   0:5240  
##  1st Qu.:  14.00   1st Qu.: 480.8   1st Qu.: 25000   1:5239  
##  Median :  25.00   Median : 760.0   Median : 39520           
##  Mean   : 424.42   Mean   : 842.2   Mean   : 43793           
##  3rd Qu.: 999.99   3rd Qu.:1153.8   3rd Qu.: 60000           
##  Max.   : 999.99   Max.   :1996.2   Max.   :103800           
##                                                              
##              covidint        agegrp     male         raceeth    
##  1precov_Q32019  :1705   (16,24]:1410   0:5245   Hisp    :4064  
##  2precov_Q42019  :1812   (24,34]:2565   1:5234   NH_blk  :1219  
##  3precov_Q12020  :1171   (34,44]:2395            NH_other: 717  
##  4earlycov_Q22020:2667   (44,54]:2239            NH_wht  :4479  
##  5midcov_Q32020  :2133   (54,64]:1837                           
##  6latecov_Q42020 : 991   NA's   :  33                           
##                                                                 
##        employst       unemployed    industry       selfemp      uedurwks    
##  emp       :10129   Min.   :0    Health :1275   Min.   :0   11-22wks:    0  
##  emp_nwklwk:  350   1st Qu.:0    retail :1250   1st Qu.:0   23-52wks:    0  
##  Mil       :    0   Median :0    Teach  :1173   Median :0   4-10wks :    0  
##  NILF_oth  :    0   Mean   :0    manuf  : 868   Mean   :0   gt52wks :    0  
##  NILF_ret  :    0   3rd Qu.:0    const  : 819   3rd Qu.:0   lt4wks  :10479  
##  NILF_utw  :    0   Max.   :0    (Other):3043   Max.   :0                   
##  unemp     :    0                NA's   :2051                               
##         whyue              whypt         underemp          sameemp      
##  jobloss   :    0   sch       : 397   Min.   :0.00000   Min.   :0.0000  
##  leftjob   :    0   chcare_fam: 338   1st Qu.:0.00000   1st Qu.:1.0000  
##  new_ent   :    0   ftdown    : 226   Median :0.00000   Median :1.0000  
##  re_ent    :    0   persday   : 211   Mean   :0.09209   Mean   :0.9766  
##  templayoff:    0   oth       : 147   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  tempwk_end:    0   (Other)   : 224   Max.   :1.00000   Max.   :1.0000  
##  NA's      :10479   NA's      :8936                     NA's   :1711    
##     offerwrk       paidhourly       unioncover         ottippay     
##  Min.   : NA     Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.: NA     1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median : NA     Median :1.0000   Median :0.00000   Median :0.0000  
##  Mean   :NaN     Mean   :0.5858   Mean   :0.05812   Mean   :0.1411  
##  3rd Qu.: NA     3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   : NA     Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##  NA's   :10479                                                      
##  covidlook      covidunaw     
##  Mode:logical   Mode:logical  
##  NA's:10479     TRUE:4446     
##                 NA's:6033     
##                               
##                               
##                               
## 
summary(cps2)
##       year          month            wtfinl          earnwt     
##  Min.   :2019   Min.   : 1.000   Min.   : 2336   Min.   :    0  
##  1st Qu.:2019   1st Qu.: 3.000   1st Qu.: 3990   1st Qu.:    0  
##  Median :2020   Median : 8.000   Median : 4504   Median :    0  
##  Mean   :2020   Mean   : 6.997   Mean   : 4796   Mean   : 4878  
##  3rd Qu.:2020   3rd Qu.:10.000   3rd Qu.: 5240   3rd Qu.:13538  
##  Max.   :2021   Max.   :12.000   Max.   :20883   Max.   :65538  
##                                                                 
##     hwtfinl            hhinc       covid                 covidint    
##  Min.   : 2336   1_lt15k  : 2305   0:20908   1precov_Q32019  : 6942  
##  1st Qu.: 3884   2_15-25k : 2430   1:22326   2precov_Q42019  : 7096  
##  Median : 4334   3_25-35k : 3819             3precov_Q12020  : 4840  
##  Mean   : 4592   4_35-50k : 5640             4earlycov_Q22020:10705  
##  3rd Qu.: 4965   5_50k-75k: 8345             5midcov_Q32020  : 9234  
##  Max.   :16022   6_75kplus:20695             6latecov_Q42020 : 4417  
##                                                                      
##      agegrp      male          raceeth            employst       unemployed    
##  (16,24]: 5677   0:20363   Hisp    :16384   emp       :38908   Min.   :0.0000  
##  (24,34]: 9598   1:22871   NH_blk  : 4852   emp_nwklwk: 1588   1st Qu.:0.0000  
##  (34,44]:10089             NH_other: 3079   Mil       :    0   Median :0.0000  
##  (44,54]: 9870             NH_wht  :18919   NILF_oth  :  348   Mean   :0.0539  
##  (54,64]: 8000                              NILF_ret  :   45   3rd Qu.:0.0000  
##                                             NILF_utw  :   39   Max.   :1.0000  
##                                             unemp     : 2306   NA's   :432     
##        industry        selfemp            uedurwks            whyue      
##  Health    : 6131   Min.   :0.00000   11-22wks:  489   jobloss   :  627  
##  retail    : 6035   1st Qu.:0.00000   23-52wks:  446   leftjob   :  205  
##  Teach     : 5333   Median :0.00000   4-10wks :  778   new_ent   :    0  
##  const     : 4839   Mean   :0.07737   gt52wks :  109   re_ent    :  490  
##  manuf     : 4719   3rd Qu.:0.00000   lt4wks  :41412   templayoff:  780  
##  rstrnt_svc: 3593   Max.   :1.00000                    tempwk_end:  204  
##  (Other)   :12584                                      NA's      :40928  
##         whypt          underemp         sameemp         offerwrk    
##  sch       : 1450   Min.   :0.0000   Min.   :0.000   Min.   :0.00   
##  chcare_fam: 1251   1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:1.00   
##  ftdown    :  886   Median :0.0000   Median :1.000   Median :1.00   
##  persday   :  803   Mean   :0.0925   Mean   :0.977   Mean   :0.85   
##  oth       :  638   3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:1.00   
##  (Other)   :  848   Max.   :1.0000   Max.   :1.000   Max.   :1.00   
##  NA's      :37358   NA's   :432      NA's   :18404   NA's   :43193  
##    paidhourly      unioncover       ottippay     covidunaw      covidlook     
##  Min.   :0.00    Min.   :0.00    Min.   :0.00    Mode:logical   Mode:logical  
##  1st Qu.:0.00    1st Qu.:0.00    1st Qu.:0.00    TRUE:18449     TRUE:192      
##  Median :1.00    Median :0.00    Median :0.00    NA's:24785     NA's:43042    
##  Mean   :0.56    Mean   :0.06    Mean   :0.15                                 
##  3rd Qu.:1.00    3rd Qu.:0.00    3rd Qu.:0.00                                 
##  Max.   :1.00    Max.   :1.00    Max.   :1.00                                 
##  NA's   :33718   NA's   :33718   NA's   :33718

Descriptive cross tabs

# cross-tab of covid period and demographics for earner ds and full ds 
table(earner$covidint, earner$male)
##                   
##                       0    1
##   1precov_Q32019    871  834
##   2precov_Q42019    912  900
##   3precov_Q12020    609  562
##   4earlycov_Q22020 1308 1359
##   5midcov_Q32020   1044 1089
##   6latecov_Q42020   501  490
table(cps2$covidint, cps2$male)
##                   
##                       0    1
##   1precov_Q32019   3264 3678
##   2precov_Q42019   3371 3725
##   3precov_Q12020   2305 2535
##   4earlycov_Q22020 5099 5606
##   5midcov_Q32020   4267 4967
##   6latecov_Q42020  2057 2360
table(earner$covidint, earner$raceeth)
##                   
##                    Hisp NH_blk NH_other NH_wht
##   1precov_Q32019    636    218      115    736
##   2precov_Q42019    716    228      117    751
##   3precov_Q12020    477    141       88    465
##   4earlycov_Q22020 1005    273      195   1194
##   5midcov_Q32020    828    254      137    914
##   6latecov_Q42020   402    105       65    419
table(cps2$covidint, cps2$raceeth)
##                   
##                    Hisp NH_blk NH_other NH_wht
##   1precov_Q32019   2583    825      421   3113
##   2precov_Q42019   2670    830      533   3063
##   3precov_Q12020   1934    513      378   2015
##   4earlycov_Q22020 3921   1136      780   4868
##   5midcov_Q32020   3506   1037      644   4047
##   6latecov_Q42020  1770    511      323   1813
table(earner$covidint, earner$agegrp)
##                   
##                    (16,24] (24,34] (34,44] (44,54] (54,64]
##   1precov_Q32019       275     408     364     347     302
##   2precov_Q42019       253     464     428     381     280
##   3precov_Q12020       154     302     256     248     209
##   4earlycov_Q22020     320     627     624     589     499
##   5midcov_Q32020       276     510     522     452     367
##   6latecov_Q42020      132     254     201     222     180
table(cps2$covidint, cps2$agegrp)
##                   
##                    (16,24] (24,34] (34,44] (44,54] (54,64]
##   1precov_Q32019      1010    1567    1507    1606    1252
##   2precov_Q42019       896    1653    1680    1597    1270
##   3precov_Q12020       650    1116    1104    1052     918
##   4earlycov_Q22020    1325    2174    2497    2521    2188
##   5midcov_Q32020      1238    2078    2243    2076    1599
##   6latecov_Q42020      558    1010    1058    1018     773
# cross-tab of employ status and demographics for earner ds and full ds 
table(earner$employst, earner$male)
##             
##                 0    1
##   emp        5029 5100
##   emp_nwklwk  216  134
##   Mil           0    0
##   NILF_oth      0    0
##   NILF_ret      0    0
##   NILF_utw      0    0
##   unemp         0    0
table(cps2$employst, cps2$male)
##             
##                  0     1
##   emp        18159 20749
##   emp_nwklwk   884   704
##   Mil            0     0
##   NILF_oth     189   159
##   NILF_ret      29    16
##   NILF_utw      18    21
##   unemp       1084  1222
table(earner$employst, earner$raceeth)
##             
##              Hisp NH_blk NH_other NH_wht
##   emp        3947   1169      682   4331
##   emp_nwklwk  117     50       35    148
##   Mil           0      0        0      0
##   NILF_oth      0      0        0      0
##   NILF_ret      0      0        0      0
##   NILF_utw      0      0        0      0
##   unemp         0      0        0      0
table(cps2$employst, cps2$raceeth)
##             
##               Hisp NH_blk NH_other NH_wht
##   emp        14594   4224     2786  17304
##   emp_nwklwk   550    202      121    715
##   Mil            0      0        0      0
##   NILF_oth     145     46       33    124
##   NILF_ret       6      8        0     31
##   NILF_utw      11      7        1     20
##   unemp       1078    365      138    725
table(earner$employst, earner$agegrp)
##             
##              (16,24] (24,34] (34,44] (44,54] (54,64]
##   emp           1357    2483    2333    2166    1760
##   emp_nwklwk      53      82      62      73      77
##   Mil              0       0       0       0       0
##   NILF_oth         0       0       0       0       0
##   NILF_ret         0       0       0       0       0
##   NILF_utw         0       0       0       0       0
##   unemp            0       0       0       0       0
table(cps2$employst, cps2$agegrp)
##             
##              (16,24] (24,34] (34,44] (44,54] (54,64]
##   emp           4834    8580    9243    9010    7241
##   emp_nwklwk     186     348     350     358     346
##   Mil              0       0       0       0       0
##   NILF_oth       129      85      44      53      37
##   NILF_ret         1       0       1       6      37
##   NILF_utw         1       5       5      11      17
##   unemp          526     580     446     432     322
# cross-tab of income group and demographics for earner ds and full ds 
table(earner$earnyearint<-cut(earner$earnyear, breaks=c(25000,50000,75000,100000)), earner$male)
##                  
##                      0    1
##   (2.5e+04,5e+04] 2085 2108
##   (5e+04,7.5e+04] 1140 1204
##   (7.5e+04,1e+05]  469  779
table(cps2$hhinc, cps2$male)
##            
##                 0     1
##   1_lt15k    1133  1172
##   2_15-25k   1171  1259
##   3_25-35k   1831  1988
##   4_35-50k   2531  3109
##   5_50k-75k  3991  4354
##   6_75kplus  9706 10989
table(earner$earnyearint<-cut(earner$earnyear, breaks=c(25000,50000,75000,100000)), earner$raceeth)
##                  
##                   Hisp NH_blk NH_other NH_wht
##   (2.5e+04,5e+04] 1796    554      237   1606
##   (5e+04,7.5e+04]  664    264      195   1221
##   (7.5e+04,1e+05]  280     97      123    748
table(cps2$hhinc, cps2$raceeth)
##            
##              Hisp NH_blk NH_other NH_wht
##   1_lt15k    1250    353      103    599
##   2_15-25k   1367    348       96    619
##   3_25-35k   2114    506      196   1003
##   4_35-50k   2940    681      321   1698
##   5_50k-75k  3245   1074      530   3496
##   6_75kplus  5468   1890     1833  11504
table(earner$earnyearint<-cut(earner$earnyear, breaks=c(25000,50000,75000,100000)), earner$agegrp)
##                  
##                   (16,24] (24,34] (34,44] (44,54] (54,64]
##   (2.5e+04,5e+04]     446    1189     967     876     713
##   (5e+04,7.5e+04]      96     563     611     583     491
##   (7.5e+04,1e+05]      19     278     352     341     258
table(cps2$hhinc, cps2$agegrp)
##            
##             (16,24] (24,34] (34,44] (44,54] (54,64]
##   1_lt15k       541     600     486     368     310
##   2_15-25k      401     660     520     472     377
##   3_25-35k      625     953     814     689     738
##   4_35-50k      913    1495    1226    1109     897
##   5_50k-75k    1114    1943    1815    1886    1587
##   6_75kplus    2083    3947    5228    5346    4091

Boxplots of Annual income over covid periods by demographics for earner ds

options(survey.lonely.psu = "adjust")

des<-svydesign(ids=~1, weights=~earnwt, data = earner )
des2<-svydesign(ids=~1, weights=~hwtfinl, data = cps2 )


p <- ggplot(earner, aes(x=covidint, y=earnyear, fill=covidint)) + 
  geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))

q <- ggplot(earner, aes(x=covidint, y=earnyear, fill=male)) + 
  geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))

r <- ggplot(earner, aes(x=covidint, y=earnyear, fill=raceeth)) + 
  geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))

s <- ggplot(earner, aes(x=covidint, y=earnyear, fill=agegrp)) + 
  geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))

p

q

r

s

### Weighted Analysis: Survey means and Chi-square test

# create surveymean objects for different combinations of outcomes and inputs
cat1<-svyby(formula = ~hhinc, by=~male, design = des2, FUN=svymean,na.rm=T)
cat2<-svyby(formula = ~hhinc, by=~agegrp, design = des2, FUN=svymean,na.rm=T)
cat3<-svyby(formula = ~hhinc, by=~raceeth, design = des2, FUN=svymean,na.rm=T)
cat4<-svyby(formula = ~hhinc, by=~covidint, design = des2, FUN=svymean,na.rm=T)

cat5<-svyby(formula = ~uedurwks, by=~male, design = des2, FUN=svymean,na.rm=T)
cat6<-svyby(formula = ~uedurwks, by=~agegrp, design = des2, FUN=svymean,na.rm=T)
cat7<-svyby(formula = ~uedurwks, by=~raceeth, design = des2, FUN=svymean,na.rm=T)
cat8<-svyby(formula = ~uedurwks, by=~covidint, design = des2, FUN=svymean,na.rm=T)

cat9<-svyby(formula = ~underemp, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat10<-svyby(formula = ~sameemp, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat11<-svyby(formula = ~ottippay, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat12<-svyby(formula = ~paidhourly, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat13<-svyby(formula = ~unemployed, by=~covidint, design = des2, FUN=svymean,na.rm=T)

# Chi square test of independence between outcomes and covid period
svychisq(~hhinc+covidint, design = des2)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~hhinc + covidint, design = des2)
## F = 8.18, ndf = 2.4756e+01, ddf = 1.0703e+06, p-value < 2.2e-16
svychisq(~uedurwks+covidint, design = des2)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~uedurwks + covidint, design = des2)
## F = 37.412, ndf = 19.764, ddf = 854455.446, p-value < 2.2e-16
svychisq(~underemp+covidint, design = des2)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~underemp + covidint, design = des2)
## F = 46.617, ndf = 4.9655e+00, ddf = 2.1467e+05, p-value < 2.2e-16
svychisq(~unemployed+covidint, design = des2)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~unemployed + covidint, design = des2)
## F = 81.877, ndf = 4.9651e+00, ddf = 2.1466e+05, p-value < 2.2e-16
#Survey estimates
knitr::kable(cat1,
      caption = "Survey Estimates of Houshold Income by Sex",
      align = 'c',  
      format = "html")
Survey Estimates of Houshold Income by Sex
male hhinc1_lt15k hhinc2_15-25k hhinc3_25-35k hhinc4_35-50k hhinc5_50k-75k hhinc6_75kplus se.hhinc1_lt15k se.hhinc2_15-25k se.hhinc3_25-35k se.hhinc4_35-50k se.hhinc5_50k-75k se.hhinc6_75kplus
0 0 0.0571734 0.0579473 0.0907405 0.1278998 0.1952205 0.4710185 0.0016946 0.0017049 0.0020904 0.0024468 0.0028552 0.0035959
1 1 0.0523408 0.0566314 0.0897881 0.1394460 0.1900247 0.4717691 0.0015236 0.0016050 0.0019840 0.0023882 0.0026681 0.0033924
knitr::kable(cat5,
      caption = "Survey Estimates of Duration of unemp by sex",
      align = 'c',  
      format = "html")
Survey Estimates of Duration of unemp by sex
male uedurwks11-22wks uedurwks23-52wks uedurwks4-10wks uedurwksgt52wks uedurwkslt4wks se.uedurwks11-22wks se.uedurwks23-52wks se.uedurwks4-10wks se.uedurwksgt52wks se.uedurwkslt4wks
0 0 0.0126117 0.0102757 0.0204298 0.0025851 0.9540977 0.0008480 0.0007319 0.0010830 0.0003770 0.0015757
1 1 0.0118231 0.0105597 0.0193357 0.0024162 0.9558653 0.0007573 0.0006936 0.0009949 0.0003262 0.0014431
knitr::kable(cat9,
      caption = "Survey Estimates of underemployment before/during/after COVID",
      align = 'c',  
      format = "html")
Survey Estimates of underemployment before/during/after COVID
covidint underemp se
1precov_Q32019 1precov_Q32019 0.0756078 0.0032184
2precov_Q42019 2precov_Q42019 0.0694747 0.0030726
3precov_Q12020 3precov_Q12020 0.0779991 0.0039881
4earlycov_Q22020 4earlycov_Q22020 0.1266970 0.0033681
5midcov_Q32020 5midcov_Q32020 0.0940073 0.0031029
6latecov_Q42020 6latecov_Q42020 0.0928577 0.0045159
knitr::kable(cat13,
      caption = "Survey Estimates of unemployment before/during/after COVID",
      align = 'c',  
      format = "html")
Survey Estimates of unemployment before/during/after COVID
covidint unemployed se
1precov_Q32019 1precov_Q32019 0.0328733 0.0022072
2precov_Q42019 2precov_Q42019 0.0275986 0.0019802
3precov_Q12020 3precov_Q12020 0.0340363 0.0026657
4earlycov_Q22020 4earlycov_Q22020 0.0851442 0.0028492
5midcov_Q32020 5midcov_Q32020 0.0600619 0.0025449
6latecov_Q42020 6latecov_Q42020 0.0707891 0.0039906

Calculate Covid period by different employment-status vars cross tabulation, and plot it

cat9%>%
  ggplot()+
  geom_point(aes(x=covidint,y=underemp))+
  geom_errorbar(aes(x=covidint, ymin = underemp-1.96*se, 
                    ymax= underemp+1.96*se),
                width=.25)+
   labs(title = "Percent % of US Adults underemployed by COVID period", 
        caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
       x = "COVID period",
       y = "Underemployed")+
  theme_minimal()

cat10%>%
  ggplot()+
  geom_point(aes(x=covidint,y=sameemp))+
  geom_errorbar(aes(x=covidint, ymin = sameemp-1.96*se, 
                    ymax= sameemp+1.96*se),
                width=.25)+
   labs(title = "Percent % of US Adults working for the same employer as prior month by COVID period", 
        caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
       x = "COVID period",
       y = "same employer")+
  theme_minimal()

cat11%>%
  ggplot()+
  geom_point(aes(x=covidint,y=ottippay))+
  geom_errorbar(aes(x=covidint, ymin = ottippay-1.96*se, 
                    ymax= ottippay+1.96*se),
                width=.25)+
   labs(title = "Percent % of US Adults who earn OT and tips by COVID period", 
        caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
       x = "COVID period",
       y = "Earn OT and tips")+
  theme_minimal()

cat12%>%
  ggplot()+
  geom_point(aes(x=covidint,y=paidhourly))+
  geom_errorbar(aes(x=covidint, ymin = paidhourly-1.96*se, 
                    ymax= paidhourly+1.96*se),
                width=.25)+
   labs(title = "Percent % of US Adults paid hourly by COVID period", 
        caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
       x = "COVID period",
       y = "paid hourly")+
  theme_minimal()

cat13%>%
  ggplot()+
  geom_point(aes(x=covidint,y=unemployed))+
  geom_errorbar(aes(x=covidint, ymin = unemployed-1.96*se, 
                    ymax= unemployed+1.96*se),
                width=.25)+
   labs(title = "Percent % of US Adults unemployed by COVID period", 
        caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
       x = "COVID period",
       y = "paid hourly")+
  theme_minimal()

Calculate Covid period by different employment-status vars by race-ethnicity cross tabulation, and plot it

catdog9<-svyby(formula = ~underemp, by=~covidint+raceeth, design = des2, FUN=svymean,na.rm=T)

catdog9%>%
  ggplot()+
  geom_point(aes(x=covidint,y=underemp))+
  geom_errorbar(aes(x=covidint, ymin = underemp-1.96*se, 
                    ymax= underemp+1.96*se),
                width=.25)+
  facet_wrap(~ raceeth, nrow = 3)+
   labs(title = "Percent % of US Adults underemployed by Race/Ethnicity COVID period", 
        caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
       x = "COVID period",
       y = "Underemployed")+
theme(axis.text.x = element_text(angle = 40,hjust=1))

catdog13<-svyby(formula = ~unemployed, by=~covidint+raceeth, design = des2, FUN=svymean,na.rm=T)

catdog13%>%
  ggplot()+
  geom_point(aes(x=covidint,y=unemployed))+
  geom_errorbar(aes(x=covidint, ymin = unemployed-1.96*se, 
                    ymax= unemployed+1.96*se),
                width=.25)+
  facet_wrap(~ raceeth, nrow = 3)+
   labs(title = "Percent % of US Adults unemployed by Race/Ethnicity and COVID period", 
        caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
       x = "COVID period",
       y = "Underemployed")+
theme(axis.text.x = element_text(angle = 40,hjust=1))