library(car)
library(stargazer)
library(survey)
library(ggplot2)
library(pander)
library(dplyr)
library(knitr)
library(tidyverse)
library(readr)
cps_proj <- read_csv("C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## ASECFLAG = col_logical(),
## COVIDUNAW = col_logical(),
## COVIDLOOK = col_logical()
## )
## i Use `spec()` for the full column specifications.
## Warning: 33939 parsing failures.
## row col expected actual file
## 33909 ASECFLAG 1/0/T/F/TRUE/FALSE 2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33910 ASECFLAG 1/0/T/F/TRUE/FALSE 2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33911 ASECFLAG 1/0/T/F/TRUE/FALSE 2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33912 ASECFLAG 1/0/T/F/TRUE/FALSE 2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33913 ASECFLAG 1/0/T/F/TRUE/FALSE 2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## ..... ........ .................. ...... ..........................................................................................
## See problems(...) for more details.
#The names in the data are very ugly, so I make them less ugly
nams<-names(cps_proj)
head(nams, n=10)
## [1] "YEAR" "SERIAL" "MONTH" "HWTFINL" "CPSID" "ASECFLAG"
## [7] "STATEFIP" "COUNTY" "FAMINC" "PERNUM"
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(cps_proj)<-newnames
summary(cps_proj)
## year serial month hwtfinl
## Min. :2019 Min. :56720 Min. : 1.000 Min. : 2336
## 1st Qu.:2019 1st Qu.:58433 1st Qu.: 3.000 1st Qu.: 3864
## Median :2020 Median :59368 Median : 8.000 Median : 4310
## Mean :2020 Mean :59379 Mean : 6.962 Mean : 4557
## 3rd Qu.:2020 3rd Qu.:60320 3rd Qu.:10.000 3rd Qu.: 4932
## Max. :2021 Max. :62078 Max. :12.000 Max. :21835
## cpsid asecflag statefip county
## Min. :2.018e+13 Mode:logical Min. :48 Min. : 0
## 1st Qu.:2.019e+13 NA's:77641 1st Qu.:48 1st Qu.: 0
## Median :2.019e+13 Median :48 Median : 0
## Mean :2.019e+13 Mean :48 Mean : 7452
## 3rd Qu.:2.020e+13 3rd Qu.:48 3rd Qu.: 0
## Max. :2.021e+13 Max. :48 Max. :48485
## faminc pernum wtfinl cpsidp
## Min. :100.0 Min. : 1.000 Min. : 0 Min. :2.018e+13
## 1st Qu.:730.0 1st Qu.: 1.000 1st Qu.: 3968 1st Qu.:2.019e+13
## Median :830.0 Median : 2.000 Median : 4476 Median :2.019e+13
## Mean :754.5 Mean : 1.946 Mean : 4745 Mean :2.019e+13
## 3rd Qu.:842.0 3rd Qu.: 2.000 3rd Qu.: 5191 3rd Qu.:2.020e+13
## Max. :843.0 Max. :14.000 Max. :21835 Max. :2.021e+13
## age sex race hispan empstat
## Min. :16 Min. :1.000 Min. :100.0 Min. : 0.0 Min. : 1.0
## 1st Qu.:28 1st Qu.:1.000 1st Qu.:100.0 1st Qu.: 0.0 1st Qu.:10.0
## Median :40 Median :2.000 Median :100.0 Median : 0.0 Median :10.0
## Mean :40 Mean :1.517 Mean :157.7 Mean : 66.7 Mean :17.2
## 3rd Qu.:52 3rd Qu.:2.000 3rd Qu.:100.0 3rd Qu.:100.0 3rd Qu.:32.0
## Max. :64 Max. :2.000 Max. :814.0 Max. :612.0 Max. :36.0
## ind classwkr durunem2 whyunemp
## Min. : 0 Min. : 0.00 Min. : 0.0000 Min. :0.0000
## 1st Qu.: 0 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :5180 Median :22.00 Median : 0.0000 Median :0.0000
## Mean :4444 Mean :15.91 Mean : 0.3218 Mean :0.1185
## 3rd Qu.:7860 3rd Qu.:22.00 3rd Qu.: 0.0000 3rd Qu.:0.0000
## Max. :9890 Max. :29.00 Max. :16.0000 Max. :6.0000
## whyptlwk wnlook wkstat empsame
## Min. : 0.00 Min. : 1.0 Min. :11.00 Min. : 1.00
## 1st Qu.: 0.00 1st Qu.:999.0 1st Qu.:11.00 1st Qu.: 2.00
## Median : 0.00 Median :999.0 Median :11.00 Median :99.00
## Mean : 11.78 Mean :978.2 Mean :40.32 Mean :58.89
## 3rd Qu.: 0.00 3rd Qu.:999.0 3rd Qu.:99.00 3rd Qu.:99.00
## Max. :130.00 Max. :999.0 Max. :99.00 Max. :99.00
## wrkoffer earnwt hourwage paidhour
## Min. : 1.00 Min. : 0 Min. : 1.01 Min. :0.0000
## 1st Qu.:99.00 1st Qu.: 0 1st Qu.: 999.99 1st Qu.:0.0000
## Median :99.00 Median : 0 Median : 999.99 Median :0.0000
## Mean :98.27 Mean : 4726 Mean : 917.63 Mean :0.2397
## 3rd Qu.:99.00 3rd Qu.:12776 3rd Qu.: 999.99 3rd Qu.:0.0000
## Max. :99.00 Max. :65538 Max. : 999.99 Max. :2.0000
## union earnweek uhrsworkorg eligorg
## Min. :0.0000 Min. : 0.01 Min. : 1.0 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 9999.99 1st Qu.:999.0 1st Qu.:0.0000
## Median :0.0000 Median : 9999.99 Median :999.0 Median :0.0000
## Mean :0.1667 Mean : 8604.45 Mean :944.5 Mean :0.1558
## 3rd Qu.:0.0000 3rd Qu.: 9999.99 3rd Qu.:999.0 3rd Qu.:0.0000
## Max. :3.0000 Max. : 9999.99 Max. :999.0 Max. :1.0000
## otpay covidunaw covidlook
## Min. : 1.00 Mode:logical Mode:logical
## 1st Qu.:99.00 TRUE:33325 TRUE:9662
## Median :99.00 NA's:44316 NA's:67979
## Mean :83.76
## 3rd Qu.:99.00
## Max. :99.00
Recode variables
library(dplyr)
cps1<-cps_proj%>%
mutate(
covid = as.factor(if_else(year==2020 & month>=4 | year==2021 & month<3,1,0)),
covidint = as.factor(if_else(year==2019 & month<=9,"1precov_Q32019",
if_else(year==2019 & month>=10,"2precov_Q42019",
if_else(year==2020 & month<=2,"3precov_Q12020",
if_else(year==2020 & month>=3 & month<=8,"4earlycov_Q22020",
if_else(year==2020 & month>=9 & month<=12,"5midcov_Q32020",
if_else(year==2021 & month>=1 & month<=3,"6latecov_Q42020","NA"))))))
),
hhinc =Recode(faminc,recodes="100:490='1_lt15k';500:600='2_15-25k';710:720='3_25-35k';
730:740='4_35-50k';810:830='5_50k-75k';840:843='6_75kplus';else='NA'", as.factor = T),
agegrp=cut(age, breaks=c(16,24,34,44,54,64)),
male=Recode(sex,recodes="2='0';1='1';else='NA'", as.factor = T),
raceeth=as.factor(if_else(race==100 & hispan==000,"NH_wht",
if_else(race==200 & hispan==000, "NH_blk",
if_else(!race%in%c('100','200','999') & hispan==000, "NH_other",
if_else(!hispan%in%c('000','901','902'), "Hisp","NA"))))
),
employst=Recode( empstat,recodes="1='Mil';10='emp';12='emp_nwklwk';20:22='unemp';32='NILF_utw';34='NILF_oth';36='NILF_ret'" , as.factor = T),
unemployed=Recode( empstat,recodes="1:12='0';20:22='1';else=NA"),
industry=Recode( ind,recodes="770='const';1070:3990='manuf';4070:4590='whsale';4670:5790='retail';6070:6390='trans';570:690='utl';
6470:6672='media_ent';6870:6992='bnk_ins';7071:7072='realest';7860:7890='Teach';7970:8290='Health';8670:8690='rstrnt_svc';
8770:8891='repair_maint';8970:8990='perscare';9160:9390='gen_supprt';9470='crimjust'; else=NA", as.factor = T ),
selfemp=Recode( classwkr,recodes="10:14='1';20:29='0';else=NA" ),
uedurwks =Recode( durunem2,recodes="0:3='lt4wks';4:7='4-10wks';8:10='11-22wks';11:15='23-52wks';16='gt52wks'; else=NA", as.factor = T),
whyue =Recode( whyunemp,recodes="1='templayoff';2='jobloss';3='tempwk_end';4='leftjob';5='re_ent';6='new_ent'; else=NA" , as.factor = T),
whypt =Recode( whyptlwk,recodes="60='onlyfindpt';80='ftdown';100:101='health_med';111='persday';120:122='chcare_fam';
123='sch';130='oth'; else=NA" , as.factor = T),
underemp=Recode( wkstat,recodes="12='1'; 13='1'; 21='1';99=NA; else='0'" ),
sameemp=Recode( empsame,recodes="2='1';1='0';else=NA" ),
offerwrk=Recode( wrkoffer,recodes= "2='0';1='1';else=NA"),
paidhourly=Recode( paidhour,recodes="2='1';1='0';else=NA"),
unioncover=Recode( union,recodes="2:3='1';1='0';else=NA"),
earnerstud=as.factor(x=eligorg),
ottippay=Recode( otpay,recodes="2='1';1='0';else=NA"),
earnyear=earnweek*52
)
earner<-cps1%>%
filter(earnerstud==1 & covidint!='NA' & earnweek<2000 & earnweek>100)%>%
select(year,month,wtfinl,earnwt,hourwage,earnweek,earnyear,covid,covidint, agegrp, male,raceeth,employst,unemployed, industry, selfemp,uedurwks, whyue, whypt, underemp, sameemp, offerwrk,paidhourly,unioncover, ottippay,covidlook,covidunaw)
cps2<-cps1%>%
filter(complete.cases(agegrp, male,raceeth,employst, industry)& covidint!='NA')%>%
select(year,month,wtfinl,earnwt,hwtfinl,hhinc,covid,covidint, agegrp, male,raceeth,employst,unemployed, industry, selfemp,uedurwks, whyue, whypt, underemp, sameemp, offerwrk,paidhourly,unioncover, ottippay,covidunaw,covidlook)
summary(earner)
## year month wtfinl earnwt
## Min. :2019 Min. : 1.000 Min. : 2336 Min. : 9099
## 1st Qu.:2019 1st Qu.: 3.000 1st Qu.: 3960 1st Qu.:15755
## Median :2020 Median : 8.000 Median : 4470 Median :17812
## Mean :2020 Mean : 7.007 Mean : 4679 Mean :18678
## 3rd Qu.:2020 3rd Qu.:10.000 3rd Qu.: 5094 3rd Qu.:20396
## Max. :2021 Max. :12.000 Max. :13531 Max. :65538
##
## hourwage earnweek earnyear covid
## Min. : 1.01 Min. : 101.0 Min. : 5251 0:5240
## 1st Qu.: 14.00 1st Qu.: 480.8 1st Qu.: 25000 1:5239
## Median : 25.00 Median : 760.0 Median : 39520
## Mean : 424.42 Mean : 842.2 Mean : 43793
## 3rd Qu.: 999.99 3rd Qu.:1153.8 3rd Qu.: 60000
## Max. : 999.99 Max. :1996.2 Max. :103800
##
## covidint agegrp male raceeth
## 1precov_Q32019 :1705 (16,24]:1410 0:5245 Hisp :4064
## 2precov_Q42019 :1812 (24,34]:2565 1:5234 NH_blk :1219
## 3precov_Q12020 :1171 (34,44]:2395 NH_other: 717
## 4earlycov_Q22020:2667 (44,54]:2239 NH_wht :4479
## 5midcov_Q32020 :2133 (54,64]:1837
## 6latecov_Q42020 : 991 NA's : 33
##
## employst unemployed industry selfemp uedurwks
## emp :10129 Min. :0 Health :1275 Min. :0 11-22wks: 0
## emp_nwklwk: 350 1st Qu.:0 retail :1250 1st Qu.:0 23-52wks: 0
## Mil : 0 Median :0 Teach :1173 Median :0 4-10wks : 0
## NILF_oth : 0 Mean :0 manuf : 868 Mean :0 gt52wks : 0
## NILF_ret : 0 3rd Qu.:0 const : 819 3rd Qu.:0 lt4wks :10479
## NILF_utw : 0 Max. :0 (Other):3043 Max. :0
## unemp : 0 NA's :2051
## whyue whypt underemp sameemp
## jobloss : 0 sch : 397 Min. :0.00000 Min. :0.0000
## leftjob : 0 chcare_fam: 338 1st Qu.:0.00000 1st Qu.:1.0000
## new_ent : 0 ftdown : 226 Median :0.00000 Median :1.0000
## re_ent : 0 persday : 211 Mean :0.09209 Mean :0.9766
## templayoff: 0 oth : 147 3rd Qu.:0.00000 3rd Qu.:1.0000
## tempwk_end: 0 (Other) : 224 Max. :1.00000 Max. :1.0000
## NA's :10479 NA's :8936 NA's :1711
## offerwrk paidhourly unioncover ottippay
## Min. : NA Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.: NA 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median : NA Median :1.0000 Median :0.00000 Median :0.0000
## Mean :NaN Mean :0.5858 Mean :0.05812 Mean :0.1411
## 3rd Qu.: NA 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. : NA Max. :1.0000 Max. :1.00000 Max. :1.0000
## NA's :10479
## covidlook covidunaw
## Mode:logical Mode:logical
## NA's:10479 TRUE:4446
## NA's:6033
##
##
##
##
summary(cps2)
## year month wtfinl earnwt
## Min. :2019 Min. : 1.000 Min. : 2336 Min. : 0
## 1st Qu.:2019 1st Qu.: 3.000 1st Qu.: 3990 1st Qu.: 0
## Median :2020 Median : 8.000 Median : 4504 Median : 0
## Mean :2020 Mean : 6.997 Mean : 4796 Mean : 4878
## 3rd Qu.:2020 3rd Qu.:10.000 3rd Qu.: 5240 3rd Qu.:13538
## Max. :2021 Max. :12.000 Max. :20883 Max. :65538
##
## hwtfinl hhinc covid covidint
## Min. : 2336 1_lt15k : 2305 0:20908 1precov_Q32019 : 6942
## 1st Qu.: 3884 2_15-25k : 2430 1:22326 2precov_Q42019 : 7096
## Median : 4334 3_25-35k : 3819 3precov_Q12020 : 4840
## Mean : 4592 4_35-50k : 5640 4earlycov_Q22020:10705
## 3rd Qu.: 4965 5_50k-75k: 8345 5midcov_Q32020 : 9234
## Max. :16022 6_75kplus:20695 6latecov_Q42020 : 4417
##
## agegrp male raceeth employst unemployed
## (16,24]: 5677 0:20363 Hisp :16384 emp :38908 Min. :0.0000
## (24,34]: 9598 1:22871 NH_blk : 4852 emp_nwklwk: 1588 1st Qu.:0.0000
## (34,44]:10089 NH_other: 3079 Mil : 0 Median :0.0000
## (44,54]: 9870 NH_wht :18919 NILF_oth : 348 Mean :0.0539
## (54,64]: 8000 NILF_ret : 45 3rd Qu.:0.0000
## NILF_utw : 39 Max. :1.0000
## unemp : 2306 NA's :432
## industry selfemp uedurwks whyue
## Health : 6131 Min. :0.00000 11-22wks: 489 jobloss : 627
## retail : 6035 1st Qu.:0.00000 23-52wks: 446 leftjob : 205
## Teach : 5333 Median :0.00000 4-10wks : 778 new_ent : 0
## const : 4839 Mean :0.07737 gt52wks : 109 re_ent : 490
## manuf : 4719 3rd Qu.:0.00000 lt4wks :41412 templayoff: 780
## rstrnt_svc: 3593 Max. :1.00000 tempwk_end: 204
## (Other) :12584 NA's :40928
## whypt underemp sameemp offerwrk
## sch : 1450 Min. :0.0000 Min. :0.000 Min. :0.00
## chcare_fam: 1251 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:1.00
## ftdown : 886 Median :0.0000 Median :1.000 Median :1.00
## persday : 803 Mean :0.0925 Mean :0.977 Mean :0.85
## oth : 638 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.00
## (Other) : 848 Max. :1.0000 Max. :1.000 Max. :1.00
## NA's :37358 NA's :432 NA's :18404 NA's :43193
## paidhourly unioncover ottippay covidunaw covidlook
## Min. :0.00 Min. :0.00 Min. :0.00 Mode:logical Mode:logical
## 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 TRUE:18449 TRUE:192
## Median :1.00 Median :0.00 Median :0.00 NA's:24785 NA's:43042
## Mean :0.56 Mean :0.06 Mean :0.15
## 3rd Qu.:1.00 3rd Qu.:0.00 3rd Qu.:0.00
## Max. :1.00 Max. :1.00 Max. :1.00
## NA's :33718 NA's :33718 NA's :33718
Descriptive cross tabs
# cross-tab of covid period and demographics for earner ds and full ds
table(earner$covidint, earner$male)
##
## 0 1
## 1precov_Q32019 871 834
## 2precov_Q42019 912 900
## 3precov_Q12020 609 562
## 4earlycov_Q22020 1308 1359
## 5midcov_Q32020 1044 1089
## 6latecov_Q42020 501 490
table(cps2$covidint, cps2$male)
##
## 0 1
## 1precov_Q32019 3264 3678
## 2precov_Q42019 3371 3725
## 3precov_Q12020 2305 2535
## 4earlycov_Q22020 5099 5606
## 5midcov_Q32020 4267 4967
## 6latecov_Q42020 2057 2360
table(earner$covidint, earner$raceeth)
##
## Hisp NH_blk NH_other NH_wht
## 1precov_Q32019 636 218 115 736
## 2precov_Q42019 716 228 117 751
## 3precov_Q12020 477 141 88 465
## 4earlycov_Q22020 1005 273 195 1194
## 5midcov_Q32020 828 254 137 914
## 6latecov_Q42020 402 105 65 419
table(cps2$covidint, cps2$raceeth)
##
## Hisp NH_blk NH_other NH_wht
## 1precov_Q32019 2583 825 421 3113
## 2precov_Q42019 2670 830 533 3063
## 3precov_Q12020 1934 513 378 2015
## 4earlycov_Q22020 3921 1136 780 4868
## 5midcov_Q32020 3506 1037 644 4047
## 6latecov_Q42020 1770 511 323 1813
table(earner$covidint, earner$agegrp)
##
## (16,24] (24,34] (34,44] (44,54] (54,64]
## 1precov_Q32019 275 408 364 347 302
## 2precov_Q42019 253 464 428 381 280
## 3precov_Q12020 154 302 256 248 209
## 4earlycov_Q22020 320 627 624 589 499
## 5midcov_Q32020 276 510 522 452 367
## 6latecov_Q42020 132 254 201 222 180
table(cps2$covidint, cps2$agegrp)
##
## (16,24] (24,34] (34,44] (44,54] (54,64]
## 1precov_Q32019 1010 1567 1507 1606 1252
## 2precov_Q42019 896 1653 1680 1597 1270
## 3precov_Q12020 650 1116 1104 1052 918
## 4earlycov_Q22020 1325 2174 2497 2521 2188
## 5midcov_Q32020 1238 2078 2243 2076 1599
## 6latecov_Q42020 558 1010 1058 1018 773
# cross-tab of employ status and demographics for earner ds and full ds
table(earner$employst, earner$male)
##
## 0 1
## emp 5029 5100
## emp_nwklwk 216 134
## Mil 0 0
## NILF_oth 0 0
## NILF_ret 0 0
## NILF_utw 0 0
## unemp 0 0
table(cps2$employst, cps2$male)
##
## 0 1
## emp 18159 20749
## emp_nwklwk 884 704
## Mil 0 0
## NILF_oth 189 159
## NILF_ret 29 16
## NILF_utw 18 21
## unemp 1084 1222
table(earner$employst, earner$raceeth)
##
## Hisp NH_blk NH_other NH_wht
## emp 3947 1169 682 4331
## emp_nwklwk 117 50 35 148
## Mil 0 0 0 0
## NILF_oth 0 0 0 0
## NILF_ret 0 0 0 0
## NILF_utw 0 0 0 0
## unemp 0 0 0 0
table(cps2$employst, cps2$raceeth)
##
## Hisp NH_blk NH_other NH_wht
## emp 14594 4224 2786 17304
## emp_nwklwk 550 202 121 715
## Mil 0 0 0 0
## NILF_oth 145 46 33 124
## NILF_ret 6 8 0 31
## NILF_utw 11 7 1 20
## unemp 1078 365 138 725
table(earner$employst, earner$agegrp)
##
## (16,24] (24,34] (34,44] (44,54] (54,64]
## emp 1357 2483 2333 2166 1760
## emp_nwklwk 53 82 62 73 77
## Mil 0 0 0 0 0
## NILF_oth 0 0 0 0 0
## NILF_ret 0 0 0 0 0
## NILF_utw 0 0 0 0 0
## unemp 0 0 0 0 0
table(cps2$employst, cps2$agegrp)
##
## (16,24] (24,34] (34,44] (44,54] (54,64]
## emp 4834 8580 9243 9010 7241
## emp_nwklwk 186 348 350 358 346
## Mil 0 0 0 0 0
## NILF_oth 129 85 44 53 37
## NILF_ret 1 0 1 6 37
## NILF_utw 1 5 5 11 17
## unemp 526 580 446 432 322
# cross-tab of income group and demographics for earner ds and full ds
table(earner$earnyearint<-cut(earner$earnyear, breaks=c(25000,50000,75000,100000)), earner$male)
##
## 0 1
## (2.5e+04,5e+04] 2085 2108
## (5e+04,7.5e+04] 1140 1204
## (7.5e+04,1e+05] 469 779
table(cps2$hhinc, cps2$male)
##
## 0 1
## 1_lt15k 1133 1172
## 2_15-25k 1171 1259
## 3_25-35k 1831 1988
## 4_35-50k 2531 3109
## 5_50k-75k 3991 4354
## 6_75kplus 9706 10989
table(earner$earnyearint<-cut(earner$earnyear, breaks=c(25000,50000,75000,100000)), earner$raceeth)
##
## Hisp NH_blk NH_other NH_wht
## (2.5e+04,5e+04] 1796 554 237 1606
## (5e+04,7.5e+04] 664 264 195 1221
## (7.5e+04,1e+05] 280 97 123 748
table(cps2$hhinc, cps2$raceeth)
##
## Hisp NH_blk NH_other NH_wht
## 1_lt15k 1250 353 103 599
## 2_15-25k 1367 348 96 619
## 3_25-35k 2114 506 196 1003
## 4_35-50k 2940 681 321 1698
## 5_50k-75k 3245 1074 530 3496
## 6_75kplus 5468 1890 1833 11504
table(earner$earnyearint<-cut(earner$earnyear, breaks=c(25000,50000,75000,100000)), earner$agegrp)
##
## (16,24] (24,34] (34,44] (44,54] (54,64]
## (2.5e+04,5e+04] 446 1189 967 876 713
## (5e+04,7.5e+04] 96 563 611 583 491
## (7.5e+04,1e+05] 19 278 352 341 258
table(cps2$hhinc, cps2$agegrp)
##
## (16,24] (24,34] (34,44] (44,54] (54,64]
## 1_lt15k 541 600 486 368 310
## 2_15-25k 401 660 520 472 377
## 3_25-35k 625 953 814 689 738
## 4_35-50k 913 1495 1226 1109 897
## 5_50k-75k 1114 1943 1815 1886 1587
## 6_75kplus 2083 3947 5228 5346 4091
Boxplots of Annual income over covid periods by demographics for earner ds
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, weights=~earnwt, data = earner )
des2<-svydesign(ids=~1, weights=~hwtfinl, data = cps2 )
p <- ggplot(earner, aes(x=covidint, y=earnyear, fill=covidint)) +
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))
q <- ggplot(earner, aes(x=covidint, y=earnyear, fill=male)) +
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))
r <- ggplot(earner, aes(x=covidint, y=earnyear, fill=raceeth)) +
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))
s <- ggplot(earner, aes(x=covidint, y=earnyear, fill=agegrp)) +
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45,hjust=1))
p

q

r

s
### Weighted Analysis: Survey means and Chi-square test
# create surveymean objects for different combinations of outcomes and inputs
cat1<-svyby(formula = ~hhinc, by=~male, design = des2, FUN=svymean,na.rm=T)
cat2<-svyby(formula = ~hhinc, by=~agegrp, design = des2, FUN=svymean,na.rm=T)
cat3<-svyby(formula = ~hhinc, by=~raceeth, design = des2, FUN=svymean,na.rm=T)
cat4<-svyby(formula = ~hhinc, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat5<-svyby(formula = ~uedurwks, by=~male, design = des2, FUN=svymean,na.rm=T)
cat6<-svyby(formula = ~uedurwks, by=~agegrp, design = des2, FUN=svymean,na.rm=T)
cat7<-svyby(formula = ~uedurwks, by=~raceeth, design = des2, FUN=svymean,na.rm=T)
cat8<-svyby(formula = ~uedurwks, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat9<-svyby(formula = ~underemp, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat10<-svyby(formula = ~sameemp, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat11<-svyby(formula = ~ottippay, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat12<-svyby(formula = ~paidhourly, by=~covidint, design = des2, FUN=svymean,na.rm=T)
cat13<-svyby(formula = ~unemployed, by=~covidint, design = des2, FUN=svymean,na.rm=T)
# Chi square test of independence between outcomes and covid period
svychisq(~hhinc+covidint, design = des2)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~hhinc + covidint, design = des2)
## F = 8.18, ndf = 2.4756e+01, ddf = 1.0703e+06, p-value < 2.2e-16
svychisq(~uedurwks+covidint, design = des2)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~uedurwks + covidint, design = des2)
## F = 37.412, ndf = 19.764, ddf = 854455.446, p-value < 2.2e-16
svychisq(~underemp+covidint, design = des2)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~underemp + covidint, design = des2)
## F = 46.617, ndf = 4.9655e+00, ddf = 2.1467e+05, p-value < 2.2e-16
svychisq(~unemployed+covidint, design = des2)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~unemployed + covidint, design = des2)
## F = 81.877, ndf = 4.9651e+00, ddf = 2.1466e+05, p-value < 2.2e-16
#Survey estimates
knitr::kable(cat1,
caption = "Survey Estimates of Houshold Income by Sex",
align = 'c',
format = "html")
Survey Estimates of Houshold Income by Sex
|
|
male
|
hhinc1_lt15k
|
hhinc2_15-25k
|
hhinc3_25-35k
|
hhinc4_35-50k
|
hhinc5_50k-75k
|
hhinc6_75kplus
|
se.hhinc1_lt15k
|
se.hhinc2_15-25k
|
se.hhinc3_25-35k
|
se.hhinc4_35-50k
|
se.hhinc5_50k-75k
|
se.hhinc6_75kplus
|
|
0
|
0
|
0.0571734
|
0.0579473
|
0.0907405
|
0.1278998
|
0.1952205
|
0.4710185
|
0.0016946
|
0.0017049
|
0.0020904
|
0.0024468
|
0.0028552
|
0.0035959
|
|
1
|
1
|
0.0523408
|
0.0566314
|
0.0897881
|
0.1394460
|
0.1900247
|
0.4717691
|
0.0015236
|
0.0016050
|
0.0019840
|
0.0023882
|
0.0026681
|
0.0033924
|
knitr::kable(cat5,
caption = "Survey Estimates of Duration of unemp by sex",
align = 'c',
format = "html")
Survey Estimates of Duration of unemp by sex
|
|
male
|
uedurwks11-22wks
|
uedurwks23-52wks
|
uedurwks4-10wks
|
uedurwksgt52wks
|
uedurwkslt4wks
|
se.uedurwks11-22wks
|
se.uedurwks23-52wks
|
se.uedurwks4-10wks
|
se.uedurwksgt52wks
|
se.uedurwkslt4wks
|
|
0
|
0
|
0.0126117
|
0.0102757
|
0.0204298
|
0.0025851
|
0.9540977
|
0.0008480
|
0.0007319
|
0.0010830
|
0.0003770
|
0.0015757
|
|
1
|
1
|
0.0118231
|
0.0105597
|
0.0193357
|
0.0024162
|
0.9558653
|
0.0007573
|
0.0006936
|
0.0009949
|
0.0003262
|
0.0014431
|
knitr::kable(cat9,
caption = "Survey Estimates of underemployment before/during/after COVID",
align = 'c',
format = "html")
Survey Estimates of underemployment before/during/after COVID
|
|
covidint
|
underemp
|
se
|
|
1precov_Q32019
|
1precov_Q32019
|
0.0756078
|
0.0032184
|
|
2precov_Q42019
|
2precov_Q42019
|
0.0694747
|
0.0030726
|
|
3precov_Q12020
|
3precov_Q12020
|
0.0779991
|
0.0039881
|
|
4earlycov_Q22020
|
4earlycov_Q22020
|
0.1266970
|
0.0033681
|
|
5midcov_Q32020
|
5midcov_Q32020
|
0.0940073
|
0.0031029
|
|
6latecov_Q42020
|
6latecov_Q42020
|
0.0928577
|
0.0045159
|
knitr::kable(cat13,
caption = "Survey Estimates of unemployment before/during/after COVID",
align = 'c',
format = "html")
Survey Estimates of unemployment before/during/after COVID
|
|
covidint
|
unemployed
|
se
|
|
1precov_Q32019
|
1precov_Q32019
|
0.0328733
|
0.0022072
|
|
2precov_Q42019
|
2precov_Q42019
|
0.0275986
|
0.0019802
|
|
3precov_Q12020
|
3precov_Q12020
|
0.0340363
|
0.0026657
|
|
4earlycov_Q22020
|
4earlycov_Q22020
|
0.0851442
|
0.0028492
|
|
5midcov_Q32020
|
5midcov_Q32020
|
0.0600619
|
0.0025449
|
|
6latecov_Q42020
|
6latecov_Q42020
|
0.0707891
|
0.0039906
|
Calculate Covid period by different employment-status vars cross tabulation, and plot it
cat9%>%
ggplot()+
geom_point(aes(x=covidint,y=underemp))+
geom_errorbar(aes(x=covidint, ymin = underemp-1.96*se,
ymax= underemp+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults underemployed by COVID period",
caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
x = "COVID period",
y = "Underemployed")+
theme_minimal()

cat10%>%
ggplot()+
geom_point(aes(x=covidint,y=sameemp))+
geom_errorbar(aes(x=covidint, ymin = sameemp-1.96*se,
ymax= sameemp+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults working for the same employer as prior month by COVID period",
caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
x = "COVID period",
y = "same employer")+
theme_minimal()

cat11%>%
ggplot()+
geom_point(aes(x=covidint,y=ottippay))+
geom_errorbar(aes(x=covidint, ymin = ottippay-1.96*se,
ymax= ottippay+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults who earn OT and tips by COVID period",
caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
x = "COVID period",
y = "Earn OT and tips")+
theme_minimal()

cat12%>%
ggplot()+
geom_point(aes(x=covidint,y=paidhourly))+
geom_errorbar(aes(x=covidint, ymin = paidhourly-1.96*se,
ymax= paidhourly+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults paid hourly by COVID period",
caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
x = "COVID period",
y = "paid hourly")+
theme_minimal()

cat13%>%
ggplot()+
geom_point(aes(x=covidint,y=unemployed))+
geom_errorbar(aes(x=covidint, ymin = unemployed-1.96*se,
ymax= unemployed+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults unemployed by COVID period",
caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
x = "COVID period",
y = "paid hourly")+
theme_minimal()

Calculate Covid period by different employment-status vars by race-ethnicity cross tabulation, and plot it
catdog9<-svyby(formula = ~underemp, by=~covidint+raceeth, design = des2, FUN=svymean,na.rm=T)
catdog9%>%
ggplot()+
geom_point(aes(x=covidint,y=underemp))+
geom_errorbar(aes(x=covidint, ymin = underemp-1.96*se,
ymax= underemp+1.96*se),
width=.25)+
facet_wrap(~ raceeth, nrow = 3)+
labs(title = "Percent % of US Adults underemployed by Race/Ethnicity COVID period",
caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
x = "COVID period",
y = "Underemployed")+
theme(axis.text.x = element_text(angle = 40,hjust=1))

catdog13<-svyby(formula = ~unemployed, by=~covidint+raceeth, design = des2, FUN=svymean,na.rm=T)
catdog13%>%
ggplot()+
geom_point(aes(x=covidint,y=unemployed))+
geom_errorbar(aes(x=covidint, ymin = unemployed-1.96*se,
ymax= unemployed+1.96*se),
width=.25)+
facet_wrap(~ raceeth, nrow = 3)+
labs(title = "Percent % of US Adults unemployed by Race/Ethnicity and COVID period",
caption = "Source: IPUMS CPS - Microdata, 2019-2021 \n Calculations by David Rodriguez",
x = "COVID period",
y = "Underemployed")+
theme(axis.text.x = element_text(angle = 40,hjust=1))
