library(car)
library(mice)
library(ggplot2)
library(dplyr)
library(readr)
ds1 <- read_csv("C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   ASECFLAG = col_logical(),
##   COVIDUNAW = col_logical(),
##   COVIDLOOK = col_logical()
## )
## i Use `spec()` for the full column specifications.
## Warning: 33939 parsing failures.
##   row      col           expected actual                                                                                       file
## 33909 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33910 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33911 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33912 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## 33913 ASECFLAG 1/0/T/F/TRUE/FALSE      2 'C:/Users/drayr/OneDrive/Desktop/DEM Spring 2021/Stats II 7183/Project/data/cps_00012.csv'
## ..... ........ .................. ...... ..........................................................................................
## See problems(...) for more details.
#The names in the data are very ugly, so I make them less ugly
nams<-names(ds1)
head(nams, n=10)
##  [1] "YEAR"     "SERIAL"   "MONTH"    "HWTFINL"  "CPSID"    "ASECFLAG"
##  [7] "STATEFIP" "COUNTY"   "FAMINC"   "PERNUM"
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(ds1)<-newnames

summary(ds1)
##       year          serial          month           hwtfinl     
##  Min.   :2019   Min.   :56720   Min.   : 1.000   Min.   : 2336  
##  1st Qu.:2019   1st Qu.:58433   1st Qu.: 3.000   1st Qu.: 3864  
##  Median :2020   Median :59368   Median : 8.000   Median : 4310  
##  Mean   :2020   Mean   :59379   Mean   : 6.962   Mean   : 4557  
##  3rd Qu.:2020   3rd Qu.:60320   3rd Qu.:10.000   3rd Qu.: 4932  
##  Max.   :2021   Max.   :62078   Max.   :12.000   Max.   :21835  
##      cpsid           asecflag          statefip      county     
##  Min.   :2.018e+13   Mode:logical   Min.   :48   Min.   :    0  
##  1st Qu.:2.019e+13   NA's:77641     1st Qu.:48   1st Qu.:    0  
##  Median :2.019e+13                  Median :48   Median :    0  
##  Mean   :2.019e+13                  Mean   :48   Mean   : 7452  
##  3rd Qu.:2.020e+13                  3rd Qu.:48   3rd Qu.:    0  
##  Max.   :2.021e+13                  Max.   :48   Max.   :48485  
##      faminc          pernum           wtfinl          cpsidp         
##  Min.   :100.0   Min.   : 1.000   Min.   :    0   Min.   :2.018e+13  
##  1st Qu.:730.0   1st Qu.: 1.000   1st Qu.: 3968   1st Qu.:2.019e+13  
##  Median :830.0   Median : 2.000   Median : 4476   Median :2.019e+13  
##  Mean   :754.5   Mean   : 1.946   Mean   : 4745   Mean   :2.019e+13  
##  3rd Qu.:842.0   3rd Qu.: 2.000   3rd Qu.: 5191   3rd Qu.:2.020e+13  
##  Max.   :843.0   Max.   :14.000   Max.   :21835   Max.   :2.021e+13  
##       age          sex             race           hispan         empstat    
##  Min.   :16   Min.   :1.000   Min.   :100.0   Min.   :  0.0   Min.   : 1.0  
##  1st Qu.:28   1st Qu.:1.000   1st Qu.:100.0   1st Qu.:  0.0   1st Qu.:10.0  
##  Median :40   Median :2.000   Median :100.0   Median :  0.0   Median :10.0  
##  Mean   :40   Mean   :1.517   Mean   :157.7   Mean   : 66.7   Mean   :17.2  
##  3rd Qu.:52   3rd Qu.:2.000   3rd Qu.:100.0   3rd Qu.:100.0   3rd Qu.:32.0  
##  Max.   :64   Max.   :2.000   Max.   :814.0   Max.   :612.0   Max.   :36.0  
##       ind          classwkr        durunem2          whyunemp     
##  Min.   :   0   Min.   : 0.00   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:   0   1st Qu.: 0.00   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median :5180   Median :22.00   Median : 0.0000   Median :0.0000  
##  Mean   :4444   Mean   :15.91   Mean   : 0.3218   Mean   :0.1185  
##  3rd Qu.:7860   3rd Qu.:22.00   3rd Qu.: 0.0000   3rd Qu.:0.0000  
##  Max.   :9890   Max.   :29.00   Max.   :16.0000   Max.   :6.0000  
##     whyptlwk          wnlook          wkstat         empsame     
##  Min.   :  0.00   Min.   :  1.0   Min.   :11.00   Min.   : 1.00  
##  1st Qu.:  0.00   1st Qu.:999.0   1st Qu.:11.00   1st Qu.: 2.00  
##  Median :  0.00   Median :999.0   Median :11.00   Median :99.00  
##  Mean   : 11.78   Mean   :978.2   Mean   :40.32   Mean   :58.89  
##  3rd Qu.:  0.00   3rd Qu.:999.0   3rd Qu.:99.00   3rd Qu.:99.00  
##  Max.   :130.00   Max.   :999.0   Max.   :99.00   Max.   :99.00  
##     wrkoffer         earnwt         hourwage          paidhour     
##  Min.   : 1.00   Min.   :    0   Min.   :   1.01   Min.   :0.0000  
##  1st Qu.:99.00   1st Qu.:    0   1st Qu.: 999.99   1st Qu.:0.0000  
##  Median :99.00   Median :    0   Median : 999.99   Median :0.0000  
##  Mean   :98.27   Mean   : 4726   Mean   : 917.63   Mean   :0.2397  
##  3rd Qu.:99.00   3rd Qu.:12776   3rd Qu.: 999.99   3rd Qu.:0.0000  
##  Max.   :99.00   Max.   :65538   Max.   : 999.99   Max.   :2.0000  
##      union           earnweek         uhrsworkorg       eligorg      
##  Min.   :0.0000   Min.   :    0.01   Min.   :  1.0   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 9999.99   1st Qu.:999.0   1st Qu.:0.0000  
##  Median :0.0000   Median : 9999.99   Median :999.0   Median :0.0000  
##  Mean   :0.1667   Mean   : 8604.45   Mean   :944.5   Mean   :0.1558  
##  3rd Qu.:0.0000   3rd Qu.: 9999.99   3rd Qu.:999.0   3rd Qu.:0.0000  
##  Max.   :3.0000   Max.   : 9999.99   Max.   :999.0   Max.   :1.0000  
##      otpay       covidunaw      covidlook     
##  Min.   : 1.00   Mode:logical   Mode:logical  
##  1st Qu.:99.00   TRUE:33325     TRUE:9662     
##  Median :99.00   NA's:44316     NA's:67979    
##  Mean   :83.76                                
##  3rd Qu.:99.00                                
##  Max.   :99.00

Recode Data

library(questionr)
library(dplyr)

ds2<-ds1%>%
  mutate(
          covid = as.factor(if_else(year==2020 & month>=4 | year==2021 & month<3,1,0)),
          covidint = as.factor(if_else(year==2019 & month<=9,"1precov_Q32019",
                                       if_else(year==2019 & month>=10,"2precov_Q42019",
                                       if_else(year==2020 & month<=2,"3precov_Q12020",
                                       if_else(year==2020 & month>=3 & month<=8,"4earlycov_Q22020",
                                       if_else(year==2020 & month>=9 & month<=12,"5midcov_Q32020",
                                       if_else(year==2021 & month>=1 & month<=3,"6latecov_Q42020","NA"))))))
                                       ),

            hhinc   =Recode(faminc,recodes="100:490='1_lt15k';500:600='2_15-25k';710:720='3_25-35k';
                          730:740='4_35-50k';810:830='5_50k-75k';840:843='6_75kplus';999='NA'", as.factor = T),
            agegrp=cut(age, breaks=c(16,24,34,44,54,64)),
          male=Recode(sex,recodes="2='0';1='1';9=NA", as.factor = T),
          raceeth=as.factor(if_else(race==100 & hispan==000,"NH_wht",
                          if_else(race==200 & hispan==000, "NH_blk",
                          if_else(!race%in%c('100','200','999') & hispan==000, "NH_other",
                          if_else(!hispan%in%c('000','901','902'), "Hisp","NA"))))
                          ),

            employst=Recode( empstat,recodes="1='Mil';10='emp';12='emp_nwklwk';20:22='unemp';32='NILF_utw';34='NILF_oth';36='NILF_ret'; else=NA" , as.factor = T),
          unemployed=Recode( empstat,recodes="1:12='0';20:22='1';else=NA"),
            industry=Recode( ind,recodes="770='const';1070:3990='manuf';4070:4590='whsale';4670:5790='retail';6070:6390='trans';570:690='utl';
                             6470:6672='media_ent';6870:6992='bnk_ins';7071:7072='realest';7860:7890='Teach';7970:8290='Health';8670:8690='rstrnt_svc';8770:8891='repair_maint';8970:8990='perscare';9160:9390='gen_supprt';9470='crimjust'; else=NA", as.factor = T ),
            selfemp=Recode( classwkr,recodes="10:14='1';20:29='0';else=NA" ),
            uedurwks    =Recode( durunem2,recodes="0:3='lt4wks';4:7='4-10wks';8:10='11-22wks';11:15='23-52wks';16='gt52wks'; else=NA", as.factor = T),
            whyue   =Recode( whyunemp,recodes="1='templayoff';2='jobloss';3='tempwk_end';4='leftjob';5='re_ent';6='new_ent'; else=NA" , as.factor = T),
            whypt   =Recode( whyptlwk,recodes="60='onlyfindpt';80='ftdown';100:101='health_med';111='persday';120:122='chcare_fam';
                           123='sch';130='oth'; else=NA" , as.factor = T),
            underemp=Recode( wkstat,recodes="12='1'; 13='1'; 21='1';99=NA; else='0'" ),#reconsider recodes
            sameemp=Recode( empsame,recodes="2='1';1='0';else=NA" ),
            offerwrk=Recode( wrkoffer,recodes= "2='0';1='1';else=NA"),
            paidhourly=Recode( paidhour,recodes="2='1';1='0';else=NA"),
            unioncover=Recode( union,recodes="2:3='1';1='0';else=NA"),
            earnerstud=as.factor(x=eligorg),
            ottippay=Recode( otpay,recodes="2='1';1='0';else=NA"),
          earnweekna=recode.na(earnweek,9999.99,as.numeric = T),
          earnyear=earnweek*52
         )
## Recoded 65562 values to NA.
ds2a<-ds2%>%
  filter(earnerstud==1 )%>%
    select(earnweekna, hhinc, covidint, agegrp, male, raceeth, industry)

ds2b<-ds2%>%
  filter(earnerstud==0 )%>%
    select(earnweekna, hhinc, covidint, agegrp, male, raceeth, industry)

set.seed(410)
samp<-sample(1:dim(ds2b)[1], size = 1000) #smaller sample from non-earner study group
ds2b<-ds2b[samp,]

ds3<-rbind(ds2a,ds2b)

Pattern of Missingness

summary(ds3)
##    earnweekna         hhinc                  covidint        agegrp    
##  Min.   :   1   1_lt15k  : 679   1precov_Q32019  :2114   (16,24]:1654  
##  1st Qu.: 579   2_15-25k : 681   2precov_Q42019  :2211   (24,34]:3001  
##  Median : 972   3_25-35k :1120   3precov_Q12020  :1483   (34,44]:3059  
##  Mean   :1003   4_35-50k :1614   4earlycov_Q22020:3370   (44,54]:2892  
##  3rd Qu.:1416   5_50k-75k:2472   5midcov_Q32020  :2674   (54,64]:2412  
##  Max.   :1843   6_75kplus:6529   6latecov_Q42020 :1243   NA's   :  77  
##  NA's   :1016                                                          
##  male         raceeth        industry   
##  0:6302   Hisp    :4740   Health :1515  
##  1:6793   NH_blk  :1432   retail :1447  
##           NH_other: 971   Teach  :1351  
##           NH_wht  :5952   manuf  :1101  
##                           const  : 964  
##                           (Other):3700  
##                           NA's   :3017

Mean Imputation for Weekly income

ds3$wkearn.mean<-ifelse(is.na(ds3$earnweekna)==T, mean(ds3$earnweekna, na.rm=T), ds3$earnweekna)

mean(ds3$earnweekna, na.rm=T)
## [1] 1003.404
mean(ds3$wkearn.mean)
## [1] 1003.404
median(ds3$earnweekna, na.rm=T)
## [1] 972
median(ds3$wkearn.mean)
## [1] 1003.404
var(ds3$earnweekna, na.rm=T)
## [1] 251959.4
var(ds3$wkearn.mean)
## [1] 232409.2
library(reshape2)

ds3%>%
  select(wkearn.mean, earnweekna)%>%
  melt()%>%
  ggplot()+geom_freqpoly(aes(x = value,
     y = ..density.., colour = variable))
## No id variables; using all as measure variables
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1016 rows containing non-finite values (stat_bin).

##Testing for MAR

fitmar<-lm(earnweekna~hhinc+covidint+agegrp+raceeth+male+is.na(industry), ds3)
summary(fitmar)
## 
## Call:
## lm(formula = earnweekna ~ hhinc + covidint + agegrp + raceeth + 
##     male + is.na(industry), data = ds3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1386.39  -285.21    15.58   305.95  1652.85 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              108.4544    21.0854   5.144 2.74e-07 ***
## hhinc2_15-25k             10.3051    23.5193   0.438  0.66128    
## hhinc3_25-35k             56.4508    21.0323   2.684  0.00728 ** 
## hhinc4_35-50k            130.1379    19.7893   6.576 5.03e-11 ***
## hhinc5_50k-75k           239.3397    18.8599  12.690  < 2e-16 ***
## hhinc6_75kplus           465.2678    17.8290  26.096  < 2e-16 ***
## covidint2precov_Q42019    -0.3496    12.8617  -0.027  0.97832    
## covidint3precov_Q12020    25.3863    14.3363   1.771  0.07662 .  
## covidint4earlycov_Q22020  14.8477    11.7747   1.261  0.20734    
## covidint5midcov_Q32020    18.2010    12.3558   1.473  0.14076    
## covidint6latecov_Q42020    3.4219    15.1537   0.226  0.82135    
## agegrp(24,34]            404.4469    13.0457  31.002  < 2e-16 ***
## agegrp(34,44]            488.6643    13.0471  37.454  < 2e-16 ***
## agegrp(44,54]            492.0499    13.1771  37.341  < 2e-16 ***
## agegrp(54,64]            470.6266    13.7552  34.214  < 2e-16 ***
## raceethNH_blk             50.6578    12.8216   3.951 7.83e-05 ***
## raceethNH_other          132.0393    15.1258   8.729  < 2e-16 ***
## raceethNH_wht            136.5762     8.6503  15.789  < 2e-16 ***
## male1                    171.1952     7.4268  23.051  < 2e-16 ***
## is.na(industry)TRUE       63.4951     9.1209   6.961 3.54e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 405.8 on 12015 degrees of freedom
##   (1060 observations deleted due to missingness)
## Multiple R-squared:  0.3435, Adjusted R-squared:  0.3425 
## F-statistic: 330.9 on 19 and 12015 DF,  p-value: < 2.2e-16
#Those with missing industry have higher weekly incomes, suggesting that weekly income may not be missing at random with respect to industry.

Multiple Imputaion

##Pattern of Missingness

md.pattern(ds3[,c("earnweekna", "hhinc", "agegrp","covidint","raceeth","male","industry")])

##      hhinc covidint raceeth male agegrp earnweekna industry     
## 9501     1        1       1    1      1          1        1    0
## 2534     1        1       1    1      1          1        0    1
## 538      1        1       1    1      1          0        1    1
## 445      1        1       1    1      1          0        0    2
## 37       1        1       1    1      0          1        1    1
## 7        1        1       1    1      0          1        0    2
## 2        1        1       1    1      0          0        1    2
## 31       1        1       1    1      0          0        0    3
##          0        0       0    0     77       1016     3017 4110

##Multiple Imputation

dat2<-ds3
samp2<-sample(1:dim(dat2)[1], replace = F, size = 500)
dat2$earnwkknock<-dat2$earnweekna
dat2$earnwkknock[samp2]<-NA

head(dat2[, c("earnwkknock","earnweekna")])
imp<-mice(data = dat2[,c("earnweekna", "hhinc", "agegrp","covidint","raceeth","male","industry")], m = 5)
## 
##  iter imp variable
##   1   1  earnweekna  agegrp  industry
##   1   2  earnweekna  agegrp  industry
##   1   3  earnweekna  agegrp  industry
##   1   4  earnweekna  agegrp  industry
##   1   5  earnweekna  agegrp  industry
##   2   1  earnweekna  agegrp  industry
##   2   2  earnweekna  agegrp  industry
##   2   3  earnweekna  agegrp  industry
##   2   4  earnweekna  agegrp  industry
##   2   5  earnweekna  agegrp  industry
##   3   1  earnweekna  agegrp  industry
##   3   2  earnweekna  agegrp  industry
##   3   3  earnweekna  agegrp  industry
##   3   4  earnweekna  agegrp  industry
##   3   5  earnweekna  agegrp  industry
##   4   1  earnweekna  agegrp  industry
##   4   2  earnweekna  agegrp  industry
##   4   3  earnweekna  agegrp  industry
##   4   4  earnweekna  agegrp  industry
##   4   5  earnweekna  agegrp  industry
##   5   1  earnweekna  agegrp  industry
##   5   2  earnweekna  agegrp  industry
##   5   3  earnweekna  agegrp  industry
##   5   4  earnweekna  agegrp  industry
##   5   5  earnweekna  agegrp  industry

Analysis of Imputed data

print(imp)
## Class: mids
## Number of multiple imputations:  5 
## Imputation methods:
## earnweekna      hhinc     agegrp   covidint    raceeth       male   industry 
##      "pmm"         ""  "polyreg"         ""         ""         ""  "polyreg" 
## PredictorMatrix:
##            earnweekna hhinc agegrp covidint raceeth male industry
## earnweekna          0     1      1        1       1    1        1
## hhinc               1     0      1        1       1    1        1
## agegrp              1     1      0        1       1    1        1
## covidint            1     1      1        0       1    1        1
## raceeth             1     1      1        1       0    1        1
## male                1     1      1        1       1    0        1
head(imp$imp$earnweekna)
summary(imp$imp$earnweekna)
##        1                2                3                4         
##  Min.   :   3.0   Min.   :  13.0   Min.   :   6.0   Min.   :  17.0  
##  1st Qu.: 538.8   1st Qu.: 515.0   1st Qu.: 526.0   1st Qu.: 550.2  
##  Median : 908.0   Median : 849.0   Median : 931.0   Median : 927.5  
##  Mean   : 944.5   Mean   : 933.4   Mean   : 948.6   Mean   : 943.7  
##  3rd Qu.:1334.0   3rd Qu.:1357.8   3rd Qu.:1346.8   3rd Qu.:1334.5  
##  Max.   :1843.0   Max.   :1843.0   Max.   :1843.0   Max.   :1843.0  
##        5         
##  Min.   :   6.0  
##  1st Qu.: 548.0  
##  Median : 931.0  
##  Mean   : 958.2  
##  3rd Qu.:1346.0  
##  Max.   :1843.0
summary(ds3$earnweekna)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       1     579     972    1003    1416    1843    1016
head(imp$imp$industry)
summary(imp$imp$industry)
##        1             2             3             4             5      
##  Health :457   Health :479   Health :440   Health :462   Health :496  
##  Teach  :430   Teach  :423   retail :440   Teach  :396   retail :392  
##  retail :418   retail :380   Teach  :395   retail :394   Teach  :387  
##  manuf  :342   manuf  :327   manuf  :353   manuf  :336   manuf  :367  
##  const  :277   const  :280   const  :263   const  :302   const  :284  
##  bnk_ins:226   trans  :224   bnk_ins:236   bnk_ins:238   bnk_ins:252  
##  (Other):867   (Other):904   (Other):890   (Other):889   (Other):839
library(lattice)
stripplot(imp,earnweekna~raceeth|.imp, pch=20)

dat.imp<-complete(imp, action = 1)

head(dat.imp[is.na(ds3$earnweekna)==T,], n=10)
head(ds3[is.na(ds3$earnweekna)==T,c("earnweekna", "hhinc", "agegrp","covidint","raceeth","male","industry")], n=10)
fit.wkinc<-with(data=imp ,expr=lm(earnweekna~hhinc+agegrp+covidint+raceeth+male+industry))
fit.wkinc
## call :
## with.mids(data = imp, expr = lm(earnweekna ~ hhinc + agegrp + 
##     covidint + raceeth + male + industry))
## 
## call1 :
## mice(data = dat2[, c("earnweekna", "hhinc", "agegrp", "covidint", 
##     "raceeth", "male", "industry")], m = 5)
## 
## nmis :
## earnweekna      hhinc     agegrp   covidint    raceeth       male   industry 
##       1016          0         77          0          0          0       3017 
## 
## analyses :
## [[1]]
## 
## Call:
## lm(formula = earnweekna ~ hhinc + agegrp + covidint + raceeth + 
##     male + industry)
## 
## Coefficients:
##              (Intercept)             hhinc2_15-25k             hhinc3_25-35k  
##                  413.990                    28.086                    59.157  
##            hhinc4_35-50k            hhinc5_50k-75k            hhinc6_75kplus  
##                  119.043                   222.515                   432.958  
##            agegrp(24,34]             agegrp(34,44]             agegrp(44,54]  
##                  327.995                   411.652                   417.254  
##            agegrp(54,64]    covidint2precov_Q42019    covidint3precov_Q12020  
##                  391.685                   -10.769                    20.211  
## covidint4earlycov_Q22020    covidint5midcov_Q32020   covidint6latecov_Q42020  
##                    4.397                     8.301                    -4.313  
##            raceethNH_blk           raceethNH_other             raceethNH_wht  
##                   41.084                   143.043                   134.036  
##                    male1             industryconst          industrycrimjust  
##                  165.055                  -155.432                   -99.961  
##       industrygen_supprt            industryHealth             industrymanuf  
##                 -240.305                  -157.513                  -118.035  
##        industrymedia_ent          industryperscare           industryrealest  
##                 -214.152                  -387.994                  -131.634  
##     industryrepair_maint            industryretail        industryrstrnt_svc  
##                 -245.155                  -321.806                  -427.733  
##            industryTeach             industrytrans               industryutl  
##                 -180.548                  -185.299                   -18.774  
##           industrywhsale  
##                 -190.564  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = earnweekna ~ hhinc + agegrp + covidint + raceeth + 
##     male + industry)
## 
## Coefficients:
##              (Intercept)             hhinc2_15-25k             hhinc3_25-35k  
##                  404.728                    11.603                    52.868  
##            hhinc4_35-50k            hhinc5_50k-75k            hhinc6_75kplus  
##                  114.845                   214.995                   433.476  
##            agegrp(24,34]             agegrp(34,44]             agegrp(44,54]  
##                  331.604                   414.167                   422.755  
##            agegrp(54,64]    covidint2precov_Q42019    covidint3precov_Q12020  
##                  394.315                    -2.010                    22.036  
## covidint4earlycov_Q22020    covidint5midcov_Q32020   covidint6latecov_Q42020  
##                    8.273                    12.648                    -5.388  
##            raceethNH_blk           raceethNH_other             raceethNH_wht  
##                   41.005                   146.208                   132.259  
##                    male1             industryconst          industrycrimjust  
##                  163.726                  -139.429                  -107.904  
##       industrygen_supprt            industryHealth             industrymanuf  
##                 -239.695                  -152.400                  -120.043  
##        industrymedia_ent          industryperscare           industryrealest  
##                 -228.755                  -434.742                  -135.243  
##     industryrepair_maint            industryretail        industryrstrnt_svc  
##                 -228.377                  -309.503                  -408.997  
##            industryTeach             industrytrans               industryutl  
##                 -175.187                  -184.954                   -39.757  
##           industrywhsale  
##                 -162.555  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = earnweekna ~ hhinc + agegrp + covidint + raceeth + 
##     male + industry)
## 
## Coefficients:
##              (Intercept)             hhinc2_15-25k             hhinc3_25-35k  
##                  404.874                    23.042                    58.569  
##            hhinc4_35-50k            hhinc5_50k-75k            hhinc6_75kplus  
##                  112.800                   222.052                   428.290  
##            agegrp(24,34]             agegrp(34,44]             agegrp(44,54]  
##                  337.091                   417.495                   421.676  
##            agegrp(54,64]    covidint2precov_Q42019    covidint3precov_Q12020  
##                  389.940                    -3.361                    20.125  
## covidint4earlycov_Q22020    covidint5midcov_Q32020   covidint6latecov_Q42020  
##                    9.554                     9.907                     2.436  
##            raceethNH_blk           raceethNH_other             raceethNH_wht  
##                   45.266                   142.534                   132.884  
##                    male1             industryconst          industrycrimjust  
##                  161.896                  -143.321                   -87.369  
##       industrygen_supprt            industryHealth             industrymanuf  
##                 -241.269                  -152.248                  -110.352  
##        industrymedia_ent          industryperscare           industryrealest  
##                 -224.570                  -407.861                  -133.023  
##     industryrepair_maint            industryretail        industryrstrnt_svc  
##                 -227.647                  -314.851                  -424.885  
##            industryTeach             industrytrans               industryutl  
##                 -165.146                  -179.743                    -3.626  
##           industrywhsale  
##                 -185.576  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = earnweekna ~ hhinc + agegrp + covidint + raceeth + 
##     male + industry)
## 
## Coefficients:
##              (Intercept)             hhinc2_15-25k             hhinc3_25-35k  
##                  401.203                    17.193                    63.127  
##            hhinc4_35-50k            hhinc5_50k-75k            hhinc6_75kplus  
##                  123.205                   220.975                   430.359  
##            agegrp(24,34]             agegrp(34,44]             agegrp(44,54]  
##                  328.918                   412.331                   413.321  
##            agegrp(54,64]    covidint2precov_Q42019    covidint3precov_Q12020  
##                  389.173                    -8.918                    18.760  
## covidint4earlycov_Q22020    covidint5midcov_Q32020   covidint6latecov_Q42020  
##                    7.652                    10.534                    -6.355  
##            raceethNH_blk           raceethNH_other             raceethNH_wht  
##                   49.598                   136.272                   136.207  
##                    male1             industryconst          industrycrimjust  
##                  166.707                  -143.036                   -89.088  
##       industrygen_supprt            industryHealth             industrymanuf  
##                 -225.693                  -146.695                  -109.175  
##        industrymedia_ent          industryperscare           industryrealest  
##                 -202.995                  -378.792                  -131.005  
##     industryrepair_maint            industryretail        industryrstrnt_svc  
##                 -225.252                  -307.456                  -422.689  
##            industryTeach             industrytrans               industryutl  
##                 -165.937                  -175.720                   -41.066  
##           industrywhsale  
##                 -156.684  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = earnweekna ~ hhinc + agegrp + covidint + raceeth + 
##     male + industry)
## 
## Coefficients:
##              (Intercept)             hhinc2_15-25k             hhinc3_25-35k  
##                  412.326                    18.449                    46.625  
##            hhinc4_35-50k            hhinc5_50k-75k            hhinc6_75kplus  
##                  114.346                   217.858                   429.159  
##            agegrp(24,34]             agegrp(34,44]             agegrp(44,54]  
##                  337.919                   415.668                   417.913  
##            agegrp(54,64]    covidint2precov_Q42019    covidint3precov_Q12020  
##                  393.794                    -7.232                    22.522  
## covidint4earlycov_Q22020    covidint5midcov_Q32020   covidint6latecov_Q42020  
##                   11.952                    16.589                     2.484  
##            raceethNH_blk           raceethNH_other             raceethNH_wht  
##                   42.805                   132.003                   131.266  
##                    male1             industryconst          industrycrimjust  
##                  165.836                  -163.389                  -101.188  
##       industrygen_supprt            industryHealth             industrymanuf  
##                 -239.904                  -150.018                  -124.488  
##        industrymedia_ent          industryperscare           industryrealest  
##                 -223.122                  -395.397                  -160.898  
##     industryrepair_maint            industryretail        industryrstrnt_svc  
##                 -261.807                  -320.941                  -426.935  
##            industryTeach             industrytrans               industryutl  
##                 -181.117                  -180.402                   -42.377  
##           industrywhsale  
##                 -172.075
with (data=imp, exp=(sd(earnweekna)))
## call :
## with.mids(data = imp, expr = (sd(earnweekna)))
## 
## call1 :
## mice(data = dat2[, c("earnweekna", "hhinc", "agegrp", "covidint", 
##     "raceeth", "male", "industry")], m = 5)
## 
## nmis :
## earnweekna      hhinc     agegrp   covidint    raceeth       male   industry 
##       1016          0         77          0          0          0       3017 
## 
## analyses :
## [[1]]
## [1] 501.9464
## 
## [[2]]
## [1] 503.3447
## 
## [[3]]
## [1] 501.7715
## 
## [[4]]
## [1] 501.0831
## 
## [[5]]
## [1] 501.6806
with (data=imp, exp=(prop.table(table(hhinc))))
## call :
## with.mids(data = imp, expr = (prop.table(table(hhinc))))
## 
## call1 :
## mice(data = dat2[, c("earnweekna", "hhinc", "agegrp", "covidint", 
##     "raceeth", "male", "industry")], m = 5)
## 
## nmis :
## earnweekna      hhinc     agegrp   covidint    raceeth       male   industry 
##       1016          0         77          0          0          0       3017 
## 
## analyses :
## [[1]]
## hhinc
##    1_lt15k   2_15-25k   3_25-35k   4_35-50k  5_50k-75k  6_75kplus 
## 0.05185185 0.05200458 0.08552883 0.12325315 0.18877434 0.49858725 
## 
## [[2]]
## hhinc
##    1_lt15k   2_15-25k   3_25-35k   4_35-50k  5_50k-75k  6_75kplus 
## 0.05185185 0.05200458 0.08552883 0.12325315 0.18877434 0.49858725 
## 
## [[3]]
## hhinc
##    1_lt15k   2_15-25k   3_25-35k   4_35-50k  5_50k-75k  6_75kplus 
## 0.05185185 0.05200458 0.08552883 0.12325315 0.18877434 0.49858725 
## 
## [[4]]
## hhinc
##    1_lt15k   2_15-25k   3_25-35k   4_35-50k  5_50k-75k  6_75kplus 
## 0.05185185 0.05200458 0.08552883 0.12325315 0.18877434 0.49858725 
## 
## [[5]]
## hhinc
##    1_lt15k   2_15-25k   3_25-35k   4_35-50k  5_50k-75k  6_75kplus 
## 0.05185185 0.05200458 0.08552883 0.12325315 0.18877434 0.49858725
with (data=imp, exp=(prop.table(table(raceeth))))
## call :
## with.mids(data = imp, expr = (prop.table(table(raceeth))))
## 
## call1 :
## mice(data = dat2[, c("earnweekna", "hhinc", "agegrp", "covidint", 
##     "raceeth", "male", "industry")], m = 5)
## 
## nmis :
## earnweekna      hhinc     agegrp   covidint    raceeth       male   industry 
##       1016          0         77          0          0          0       3017 
## 
## analyses :
## [[1]]
## raceeth
##       Hisp     NH_blk   NH_other     NH_wht 
## 0.36197022 0.10935472 0.07415044 0.45452463 
## 
## [[2]]
## raceeth
##       Hisp     NH_blk   NH_other     NH_wht 
## 0.36197022 0.10935472 0.07415044 0.45452463 
## 
## [[3]]
## raceeth
##       Hisp     NH_blk   NH_other     NH_wht 
## 0.36197022 0.10935472 0.07415044 0.45452463 
## 
## [[4]]
## raceeth
##       Hisp     NH_blk   NH_other     NH_wht 
## 0.36197022 0.10935472 0.07415044 0.45452463 
## 
## [[5]]
## raceeth
##       Hisp     NH_blk   NH_other     NH_wht 
## 0.36197022 0.10935472 0.07415044 0.45452463
with (data=imp, exp=(prop.table(table(agegrp))))
## call :
## with.mids(data = imp, expr = (prop.table(table(agegrp))))
## 
## call1 :
## mice(data = dat2[, c("earnweekna", "hhinc", "agegrp", "covidint", 
##     "raceeth", "male", "industry")], m = 5)
## 
## nmis :
## earnweekna      hhinc     agegrp   covidint    raceeth       male   industry 
##       1016          0         77          0          0          0       3017 
## 
## analyses :
## [[1]]
## agegrp
##   (16,24]   (24,34]   (34,44]   (44,54]   (54,64] 
## 0.1285223 0.2306224 0.2347461 0.2216113 0.1844979 
## 
## [[2]]
## agegrp
##   (16,24]   (24,34]   (34,44]   (44,54]   (54,64] 
## 0.1287514 0.2303169 0.2342879 0.2219168 0.1847270 
## 
## [[3]]
## agegrp
##   (16,24]   (24,34]   (34,44]   (44,54]   (54,64] 
## 0.1280641 0.2301642 0.2348988 0.2219168 0.1849561 
## 
## [[4]]
## agegrp
##   (16,24]   (24,34]   (34,44]   (44,54]   (54,64] 
## 0.1285987 0.2304696 0.2346697 0.2216113 0.1846506 
## 
## [[5]]
## agegrp
##   (16,24]   (24,34]   (34,44]   (44,54]   (54,64] 
## 0.1288278 0.2301642 0.2345170 0.2217640 0.1847270
est.p<-pool(fit.wkinc)
print(est.p)
## Class: mipo    m = 5 
##                        term m    estimate       ubar          b          t
## 1               (Intercept) 5  407.423970  573.46379  29.904755  609.34950
## 2             hhinc2_15-25k 5   19.674521  459.52126  38.723072  505.98894
## 3             hhinc3_25-35k 5   56.069177  369.85935  41.262112  419.37389
## 4             hhinc4_35-50k 5  116.847773  327.28738  17.972563  348.85446
## 5            hhinc5_50k-75k 5  219.678972  297.13457  10.153007  309.31818
## 6            hhinc6_75kplus 5  430.848453  265.59740   5.248646  271.89578
## 7             agegrp(24,34] 5  332.705284  153.25133  21.040591  178.50004
## 8             agegrp(34,44] 5  414.262530  155.12874   5.744996  162.02273
## 9             agegrp(44,54] 5  418.583630  158.67001  14.217186  175.73063
## 10            agegrp(54,64] 5  391.781382  171.81283   5.168205  178.01467
## 11   covidint2precov_Q42019 5   -6.457798  144.30905  13.652856  160.69248
## 12   covidint3precov_Q12020 5   20.730708  179.50009   2.358605  182.33041
## 13 covidint4earlycov_Q22020 5    8.365759  120.80773   7.636276  129.97126
## 14   covidint5midcov_Q32020 5   11.595577  132.59360  10.219570  144.85709
## 15  covidint6latecov_Q42020 5   -2.227088  200.31288  18.830905  222.90996
## 16            raceethNH_blk 5   43.951646  145.56693  12.955969  161.11410
## 17          raceethNH_other 5  140.012036  202.20739  33.015700  241.82623
## 18            raceethNH_wht 5  133.330418   65.77275   3.595442   70.08728
## 19                    male1 5  164.644222   59.12333   3.560269   63.39565
## 20            industryconst 5 -148.921441  322.53046 101.951507  444.87227
## 21         industrycrimjust 5  -97.101899  644.07478  75.128643  734.22915
## 22       industrygen_supprt 5 -237.373030  544.43929  42.998444  596.03743
## 23           industryHealth 5 -151.774786  255.13503  15.608440  273.86516
## 24            industrymanuf 5 -116.418406  288.90247  42.533936  339.94320
## 25        industrymedia_ent 5 -218.718963 1638.05955 105.606895 1764.78782
## 26         industryperscare 5 -400.957168 1601.59619 469.845920 2165.41129
## 27          industryrealest 5 -138.360559 1289.42432 161.376365 1483.07596
## 28     industryrepair_maint 5 -237.647552  871.15167 244.904812 1165.03744
## 29           industryretail 5 -314.911450  264.55352  42.180828  315.17052
## 30       industryrstrnt_svc 5 -422.247956  343.76605  58.696104  414.20138
## 31            industryTeach 5 -173.586932  264.12769  59.371998  335.37409
## 32            industrytrans 5 -181.223716  367.25890  15.920080  386.36300
## 33              industryutl 5  -29.119961  851.47627 297.145157 1208.05046
## 34           industrywhsale 5 -173.490757  535.13068 210.399753  787.61038
##    dfcom         df        riv     lambda        fmi
## 1  13061 1054.37486 0.06257711 0.05889183 0.06067191
## 2  13061  456.04768 0.10112195 0.09183538 0.09579211
## 3  13061  279.96885 0.13387396 0.11806776 0.12430118
## 4  13061  964.20036 0.06589645 0.06182256 0.06376255
## 5  13061 2138.66806 0.04100367 0.03938859 0.04028566
## 6  13061 4704.95483 0.02371399 0.02316467 0.02357964
## 7  13061  196.41854 0.16475360 0.14144932 0.15005986
## 8  13061 1877.59999 0.04444048 0.04254956 0.04356780
## 9  13061  409.64598 0.10752267 0.09708394 0.10146017
## 10 13061 2612.48044 0.03609653 0.03483896 0.03557700
## 11 13061  372.58062 0.11353014 0.10195516 0.10673733
## 12 13061 7245.09913 0.01576783 0.01552306 0.01579471
## 13 13061  754.66134 0.07585220 0.07050429 0.07295788
## 14 13061  533.20459 0.09248926 0.08465919 0.08807334
## 15 13061  376.74105 0.11280896 0.10137316 0.10610600
## 16 13061  414.47126 0.10680422 0.09649784 0.10082629
## 17 13061  147.01994 0.19593171 0.16383186 0.17497928
## 18 13061  971.82821 0.06559754 0.06155939 0.06348474
## 19 13061  821.34879 0.07226120 0.06739141 0.06965407
## 20 13061   52.59706 0.37931862 0.27500435 0.30108471
## 21 13061  259.30219 0.13997501 0.12278779 0.12947635
## 22 13061  510.89118 0.09477298 0.08656861 0.09012357
## 23 13061  799.00516 0.07341261 0.06839179 0.07071499
## 24 13061  174.64278 0.17667112 0.15014486 0.15971299
## 25 13061  729.05128 0.07736488 0.07180935 0.07434522
## 26 13061   58.64385 0.35203325 0.26037322 0.28436999
## 27 13061  229.85920 0.15018457 0.13057432 0.13804172
## 28 13061   62.45911 0.33735317 0.25225436 0.27510055
## 29 13061  152.91755 0.19132988 0.16060193 0.17136913
## 30 13061  136.58228 0.20489319 0.17005092 0.18194282
## 31 13061   87.87524 0.26974225 0.21243859 0.22977139
## 32 13061 1445.53577 0.05201806 0.04944598 0.05075841
## 33 13061   45.68456 0.41877173 0.29516498 0.32412016
## 34 13061   38.75510 0.47180944 0.32056422 0.35310807
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))

ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))

## Model comparisons

fit1<-lm(earnweekna~agegrp+covidint+raceeth+male+industry, ds3)
fit2<-lm(wkearn.mean~agegrp+covidint+raceeth+male+industry, ds3)
fit.imp<-lm(earnweekna~agegrp+covidint+raceeth+male+industry, data=dat.imp)

summary(fit1)
## 
## Call:
## lm(formula = earnweekna ~ agegrp + covidint + raceeth + male + 
##     industry, data = ds3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1322.54  -309.14    -5.06   318.16  1268.72 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               639.197     23.997  26.636  < 2e-16 ***
## agegrp(24,34]             331.444     15.238  21.751  < 2e-16 ***
## agegrp(34,44]             446.261     15.259  29.246  < 2e-16 ***
## agegrp(44,54]             468.015     15.342  30.505  < 2e-16 ***
## agegrp(54,64]             414.567     16.066  25.804  < 2e-16 ***
## covidint2precov_Q42019     -2.620     14.964  -0.175 0.861025    
## covidint3precov_Q12020     28.432     16.803   1.692 0.090660 .  
## covidint4earlycov_Q22020   23.350     13.791   1.693 0.090458 .  
## covidint5midcov_Q32020     24.430     14.355   1.702 0.088816 .  
## covidint6latecov_Q42020    14.985     17.561   0.853 0.393498    
## raceethNH_blk              41.344     14.868   2.781 0.005434 ** 
## raceethNH_other           171.691     18.018   9.529  < 2e-16 ***
## raceethNH_wht             197.963      9.718  20.371  < 2e-16 ***
## male1                     172.482      9.566  18.031  < 2e-16 ***
## industryconst            -228.258     22.361 -10.208  < 2e-16 ***
## industrycrimjust          -93.967     31.419  -2.991 0.002790 ** 
## industrygen_supprt       -262.750     28.946  -9.077  < 2e-16 ***
## industryHealth           -173.264     20.083  -8.627  < 2e-16 ***
## industrymanuf            -149.718     21.311  -7.026 2.28e-12 ***
## industrymedia_ent        -186.589     50.212  -3.716 0.000204 ***
## industryperscare         -507.927     49.883 -10.182  < 2e-16 ***
## industryrealest          -156.656     45.300  -3.458 0.000546 ***
## industryrepair_maint     -312.430     36.445  -8.573  < 2e-16 ***
## industryretail           -366.937     20.329 -18.050  < 2e-16 ***
## industryrstrnt_svc       -499.603     23.021 -21.702  < 2e-16 ***
## industryTeach            -180.498     20.417  -8.841  < 2e-16 ***
## industrytrans            -214.733     23.995  -8.949  < 2e-16 ***
## industryutl               -26.488     37.297  -0.710 0.477597    
## industrywhsale           -204.364     29.085  -7.026 2.26e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 417.6 on 9472 degrees of freedom
##   (3594 observations deleted due to missingness)
## Multiple R-squared:  0.2807, Adjusted R-squared:  0.2786 
## F-statistic:   132 on 28 and 9472 DF,  p-value: < 2.2e-16
summary(fit2)
## 
## Call:
## lm(formula = wkearn.mean ~ agegrp + covidint + raceeth + male + 
##     industry, data = ds3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1305.93  -299.08   -12.28   309.76  1246.76 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               653.604     23.020  28.393  < 2e-16 ***
## agegrp(24,34]             319.407     14.597  21.881  < 2e-16 ***
## agegrp(34,44]             426.145     14.637  29.114  < 2e-16 ***
## agegrp(44,54]             447.422     14.732  30.372  < 2e-16 ***
## agegrp(54,64]             394.126     15.392  25.606  < 2e-16 ***
## covidint2precov_Q42019     -1.457     14.315  -0.102  0.91891    
## covidint3precov_Q12020     29.683     16.090   1.845  0.06508 .  
## covidint4earlycov_Q22020   24.313     13.171   1.846  0.06493 .  
## covidint5midcov_Q32020     24.561     13.721   1.790  0.07348 .  
## covidint6latecov_Q42020    13.093     16.814   0.779  0.43618    
## raceethNH_blk              39.354     14.210   2.770  0.00562 ** 
## raceethNH_other           158.512     17.200   9.216  < 2e-16 ***
## raceethNH_wht             186.290      9.313  20.004  < 2e-16 ***
## male1                     162.611      9.145  17.781  < 2e-16 ***
## industryconst            -215.901     21.388 -10.094  < 2e-16 ***
## industrycrimjust          -86.974     30.222  -2.878  0.00401 ** 
## industrygen_supprt       -246.286     27.820  -8.853  < 2e-16 ***
## industryHealth           -164.652     19.210  -8.571  < 2e-16 ***
## industrymanuf            -140.034     20.424  -6.856 7.48e-12 ***
## industrymedia_ent        -175.485     48.154  -3.644  0.00027 ***
## industryperscare         -477.006     47.339 -10.076  < 2e-16 ***
## industryrealest          -150.530     43.098  -3.493  0.00048 ***
## industryrepair_maint     -290.961     34.691  -8.387  < 2e-16 ***
## industryretail           -343.114     19.431 -17.658  < 2e-16 ***
## industryrstrnt_svc       -466.436     22.011 -21.191  < 2e-16 ***
## industryTeach            -170.019     19.532  -8.705  < 2e-16 ***
## industrytrans            -201.859     22.911  -8.811  < 2e-16 ***
## industryutl               -28.763     35.433  -0.812  0.41695    
## industrywhsale           -193.508     27.781  -6.966 3.48e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 410.7 on 10010 degrees of freedom
##   (3056 observations deleted due to missingness)
## Multiple R-squared:  0.2648, Adjusted R-squared:  0.2627 
## F-statistic: 128.7 on 28 and 10010 DF,  p-value: < 2.2e-16
summary(fit.imp)
## 
## Call:
## lm(formula = earnweekna ~ agegrp + covidint + raceeth + male + 
##     industry, data = dat.imp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1358.88  -311.82    -0.03   324.29  1263.19 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               646.066     20.761  31.120  < 2e-16 ***
## agegrp(24,34]             331.268     13.251  25.000  < 2e-16 ***
## agegrp(34,44]             443.537     13.328  33.278  < 2e-16 ***
## agegrp(44,54]             458.331     13.469  34.028  < 2e-16 ***
## agegrp(54,64]             408.690     14.033  29.123  < 2e-16 ***
## covidint2precov_Q42019     -6.589     12.875  -0.512  0.60883    
## covidint3precov_Q12020     37.129     14.351   2.587  0.00969 ** 
## covidint4earlycov_Q22020   23.079     11.772   1.960  0.04996 *  
## covidint5midcov_Q32020     27.617     12.332   2.239  0.02515 *  
## covidint6latecov_Q42020    11.667     15.163   0.769  0.44166    
## raceethNH_blk              52.571     12.934   4.065 4.84e-05 ***
## raceethNH_other           227.047     15.105  15.032  < 2e-16 ***
## raceethNH_wht             222.939      8.426  26.459  < 2e-16 ***
## male1                     188.584      8.248  22.864  < 2e-16 ***
## industryconst            -248.590     19.182 -12.959  < 2e-16 ***
## industrycrimjust         -113.520     27.258  -4.165 3.14e-05 ***
## industrygen_supprt       -279.916     25.154 -11.128  < 2e-16 ***
## industryHealth           -187.418     17.177 -10.911  < 2e-16 ***
## industrymanuf            -158.328     18.239  -8.681  < 2e-16 ***
## industrymedia_ent        -219.692     42.972  -5.112 3.23e-07 ***
## industryperscare         -504.037     44.030 -11.447  < 2e-16 ***
## industryrealest          -154.706     38.198  -4.050 5.15e-05 ***
## industryrepair_maint     -329.208     31.235 -10.540  < 2e-16 ***
## industryretail           -390.314     17.377 -22.461  < 2e-16 ***
## industryrstrnt_svc       -524.325     19.771 -26.520  < 2e-16 ***
## industryTeach            -196.441     17.425 -11.274  < 2e-16 ***
## industrytrans            -234.988     20.435 -11.499  < 2e-16 ***
## industryutl               -16.533     31.659  -0.522  0.60152    
## industrywhsale           -236.095     25.097  -9.407  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 422.7 on 13066 degrees of freedom
## Multiple R-squared:  0.2923, Adjusted R-squared:  0.2907 
## F-statistic: 192.7 on 28 and 13066 DF,  p-value: < 2.2e-16

Were the results similar between the mean/modal and multiple imputed data sets?

The results between the mean and multiple imputed models are similar in terms of beta values and signs. However, the imputed model performed better as indicated by the higher adjusted R squared value. The R squared value indicates that 29% of the variation in weekly income is explained by the model, with age group, race/ethnicity, sex and industry all being statistically significant at the >.99 level.

##How do the results compare to the results from the model fit with the data source with missing values? Again, the betas are largely similar, however the adjusted R squared value is greater for the imputed model. The model fit using the missing values slightly outperformed the mean imputed model.