library(car)
## Loading required package: carData
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
brfss20sm <- readRDS("C:/Users/shahi/Dropbox/PC/Downloads/brfss20sm.rds")
names(brfss20sm) <- tolower(gsub(pattern = "_",replacement = "",x = names(brfss20sm)))
#Measure an outcome variable and at least 5 predictors:
#Age cut into intervals
brfss20sm$agec<-cut(brfss20sm$age80,
breaks=c(0,29,39,59,79,99))
brfss20sm$ageg<-factor(brfss20sm$ageg)
#depressive disorder
brfss20sm$depression<-Recode(brfss20sm$addepev3, recodes="1=1; 2=0; 7:9=NA")
#healthy mental health days
brfss20sm$healthmdays <- Recode (brfss20sm$menthlth,recodes = "88=0; 77=NA; 99=NA")
#smoking currently
brfss20sm$smoke<-Recode(brfss20sm$smoker3,
recodes="1:2 ='Current'; 3 ='Former';4 ='NeverSmoked'; else = NA",
as.factor=T)
brfss20sm$smoke<-relevel(brfss20sm$smoke,
ref = "NeverSmoked")
#employment
brfss20sm$employ<-Recode(brfss20sm$employ1,
recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss20sm$employ<-relevel(brfss20sm$employ,
ref='Employed')
#marital status
brfss20sm$marst<-Recode(brfss20sm$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss20sm$marst<-relevel(brfss20sm$marst,
ref='married')
#insurance
brfss20sm$ins<-Recode(brfss20sm$hlthpln1,
recodes ="7:9=NA; 1=1;2=0")
brfss20sm$checkup <- Recode (brfss20sm$checkup1, recodes = "1:2 = 1; 3:4 =0; 8=0; else=NA")
##Report the pattern of missingness among all of these variables:
summary(brfss20sm[, c("ins", "smoke", "checkup" , "depression", "employ", "marst")])
## ins smoke checkup depression
## Min. :0.0000 NeverSmoked:113280 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 Current : 22708 1st Qu.:1.0000 1st Qu.:0.0000
## Median :1.0000 Former : 48272 Median :1.0000 Median :0.0000
## Mean :0.9166 NA's : 11128 Mean :0.9014 Mean :0.1881
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :996 NA's :2374 NA's :1093
## employ marst
## Employed:102906 married :98739
## nilf : 26377 cohab : 8116
## retired : 51605 divorced :24375
## unable : 10561 nm :39391
## NA's : 3939 separated: 3990
## widowed :18542
## NA's : 2235
Which shows that, among these recoded variables, smoke , the currently smoking variable, 11128 people in the BRFSS, or 5.6953344% of the sample.
The lowest number of missings is in the insurance variable, which only has 0.5097549% missing.
##Perform a mean (a mean for numeric data) or a modal imputation (for categorical data) of all values. Perform the analysis using this imputed data. What are your results?:
table(brfss20sm$employ)
##
## Employed nilf retired unable
## 102906 26377 51605 10561
#find the most common value
mcv.employ<-factor(names(which.max(table(brfss20sm$employ))), levels=levels(brfss20sm$employ))
mcv.employ
## [1] Employed
## Levels: Employed nilf retired unable
#impute the cases
brfss20sm$employ.imp<-as.factor(ifelse(is.na(brfss20sm$employ)==T, mcv.employ, brfss20sm$employ))
levels(brfss20sm$employ.imp)<-levels(brfss20sm$employ)
prop.table(table(brfss20sm$employ))
##
## Employed nilf retired unable
## 0.53751130 0.13777560 0.26954959 0.05516352
prop.table(table(brfss20sm$employ.imp))
##
## Employed nilf retired unable
## 0.54683502 0.13499806 0.26411550 0.05405143
barplot(prop.table(table(brfss20sm$employ)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(brfss20sm$employ.imp)), main="Imputed Data",ylim=c(0, .6))
table(brfss20sm$marst)
##
## married cohab divorced nm separated widowed
## 98739 8116 24375 39391 3990 18542
#find the most common value
mcv.marst<-factor(names(which.max(table(brfss20sm$marst))), levels=levels(brfss20sm$marst))
mcv.marst
## [1] married
## Levels: married cohab divorced nm separated widowed
#impute the cases
brfss20sm$marst.imp<-as.factor(ifelse(is.na(brfss20sm$marst)==T, mcv.marst, brfss20sm$marst))
levels(brfss20sm$marst.imp)<-levels(brfss20sm$marst)
prop.table(table(brfss20sm$marst))
##
## married cohab divorced nm separated widowed
## 0.51119579 0.04201850 0.12619530 0.20393678 0.02065720 0.09599644
prop.table(table(brfss20sm$marst.imp))
##
## married cohab divorced nm separated widowed
## 0.51678711 0.04153786 0.12475178 0.20160399 0.02042091 0.09489836
barplot(prop.table(table(brfss20sm$marst)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(brfss20sm$marst.imp)), main="Imputed Data",ylim=c(0, .6))
table(brfss20sm$smoke)
##
## NeverSmoked Current Former
## 113280 22708 48272
#find the most common value
mcv.smoke<-factor(names(which.max(table(brfss20sm$smoke))), levels=levels(brfss20sm$smoke))
mcv.smoke
## [1] NeverSmoked
## Levels: NeverSmoked Current Former
#impute the cases
brfss20sm$smoke.imp<-as.factor(ifelse(is.na(brfss20sm$smoke)==T, mcv.smoke, brfss20sm$smoke))
levels(brfss20sm$smoke.imp)<-levels(brfss20sm$smoke)
prop.table(table(brfss20sm$smoke))
##
## NeverSmoked Current Former
## 0.6147835 0.1232389 0.2619776
prop.table(table(brfss20sm$smoke.imp))
##
## NeverSmoked Current Former
## 0.6367228 0.1162200 0.2470571
barplot(prop.table(table(brfss20sm$smoke)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(brfss20sm$smoke.imp)), main="Imputed Data",ylim=c(0, .6))
#look at the patterns of missingness
md.pattern(brfss20sm[,c("ins", "smoke", "checkup" , "depression", "employ", "marst")])
## ins depression marst checkup employ smoke
## 177531 1 1 1 1 1 1 0
## 8478 1 1 1 1 1 0 1
## 1405 1 1 1 1 0 1 1
## 1773 1 1 1 1 0 0 2
## 1835 1 1 1 0 1 1 1
## 174 1 1 1 0 1 0 2
## 60 1 1 1 0 0 1 2
## 41 1 1 1 0 0 0 3
## 1330 1 1 0 1 1 1 1
## 215 1 1 0 1 1 0 2
## 241 1 1 0 1 0 1 2
## 170 1 1 0 1 0 0 3
## 45 1 1 0 0 1 1 2
## 11 1 1 0 0 1 0 3
## 25 1 1 0 0 0 1 3
## 13 1 1 0 0 0 0 4
## 817 1 0 1 1 1 1 1
## 61 1 0 1 1 1 0 2
## 20 1 0 1 1 0 1 2
## 20 1 0 1 1 0 0 3
## 28 1 0 1 0 1 1 2
## 6 1 0 1 0 1 0 3
## 3 1 0 1 0 0 1 3
## 6 1 0 1 0 0 0 4
## 27 1 0 0 1 1 1 2
## 12 1 0 0 1 1 0 3
## 13 1 0 0 1 0 1 3
## 15 1 0 0 1 0 0 4
## 1 1 0 0 0 1 1 3
## 3 1 0 0 0 1 0 4
## 2 1 0 0 0 0 1 4
## 11 1 0 0 0 0 0 5
## 686 0 1 1 1 1 1 1
## 56 0 1 1 1 1 0 2
## 34 0 1 1 1 0 1 2
## 16 0 1 1 1 0 0 3
## 57 0 1 1 0 1 1 2
## 5 0 1 1 0 1 0 3
## 6 0 1 1 0 0 1 3
## 3 0 1 1 0 0 0 4
## 26 0 1 0 1 1 1 2
## 3 0 1 0 1 1 0 3
## 21 0 1 0 1 0 1 3
## 14 0 1 0 1 0 0 4
## 6 0 1 0 0 1 1 3
## 1 0 1 0 0 1 0 4
## 7 0 1 0 0 0 1 4
## 7 0 1 0 0 0 0 5
## 26 0 0 1 1 1 1 2
## 1 0 0 1 1 1 0 3
## 1 0 0 1 1 0 1 3
## 2 0 0 1 0 1 1 3
## 1 0 0 1 0 1 0 4
## 1 0 0 1 0 0 0 5
## 1 0 0 0 1 1 0 4
## 1 0 0 0 1 0 1 4
## 3 0 0 0 0 1 1 4
## 1 0 0 0 0 1 0 5
## 1 0 0 0 0 0 1 5
## 9 0 0 0 0 0 0 6
## 996 1093 2235 2374 3939 11128 21765
md.pairs(brfss20sm[,c("ins", "smoke", "checkup" , "depression", "employ", "marst")])
## $rr
## ins smoke checkup depression employ marst
## ins 194392 183383 192128 193347 190574 192258
## smoke 183383 184260 182179 183315 182420 182511
## checkup 192128 182179 193014 191999 189270 190925
## depression 193347 183315 191999 194295 190459 192160
## employ 190574 182420 189270 190459 191449 189764
## marst 192258 182511 190925 192160 189764 193153
##
## $rm
## ins smoke checkup depression employ marst
## ins 0 11009 2264 1045 3818 2134
## smoke 877 0 2081 945 1840 1749
## checkup 886 10835 0 1015 3744 2089
## depression 948 10980 2296 0 3836 2135
## employ 875 9029 2179 990 0 1685
## marst 895 10642 2228 993 3389 0
##
## $mr
## ins smoke checkup depression employ marst
## ins 0 877 886 948 875 895
## smoke 11009 0 10835 10980 9029 10642
## checkup 2264 2081 0 2296 2179 2228
## depression 1045 945 1015 0 990 993
## employ 3818 1840 3744 3836 0 3389
## marst 2134 1749 2089 2135 1685 0
##
## $mm
## ins smoke checkup depression employ marst
## ins 996 119 110 48 121 101
## smoke 119 11128 293 148 2099 486
## checkup 110 293 2374 78 195 146
## depression 48 148 78 1093 103 100
## employ 121 2099 195 103 3939 550
## marst 101 486 146 100 550 2235
dat1<-brfss20sm
samp1<-sample(1:dim(dat1)[1], replace = F, size = 500)
imp<-mice(data = dat1[,c("ins", "smoke", "checkup" , "depression", "employ", "marst")], seed = 22, m = 5)
##
## iter imp variable
## 1 1 ins smoke checkup depression employ marst
## 1 2 ins smoke checkup depression employ marst
## 1 3 ins smoke checkup depression employ marst
## 1 4 ins smoke checkup depression employ marst
## 1 5 ins smoke checkup depression employ marst
## 2 1 ins smoke checkup depression employ marst
## 2 2 ins smoke checkup depression employ marst
## 2 3 ins smoke checkup depression employ marst
## 2 4 ins smoke checkup depression employ marst
## 2 5 ins smoke checkup depression employ marst
## 3 1 ins smoke checkup depression employ marst
## 3 2 ins smoke checkup depression employ marst
## 3 3 ins smoke checkup depression employ marst
## 3 4 ins smoke checkup depression employ marst
## 3 5 ins smoke checkup depression employ marst
## 4 1 ins smoke checkup depression employ marst
## 4 2 ins smoke checkup depression employ marst
## 4 3 ins smoke checkup depression employ marst
## 4 4 ins smoke checkup depression employ marst
## 4 5 ins smoke checkup depression employ marst
## 5 1 ins smoke checkup depression employ marst
## 5 2 ins smoke checkup depression employ marst
## 5 3 ins smoke checkup depression employ marst
## 5 4 ins smoke checkup depression employ marst
## 5 5 ins smoke checkup depression employ marst
print(imp)
## Class: mids
## Number of multiple imputations: 5
## Imputation methods:
## ins smoke checkup depression employ marst
## "pmm" "polyreg" "pmm" "pmm" "polyreg" "polyreg"
## PredictorMatrix:
## ins smoke checkup depression employ marst
## ins 0 1 1 1 1 1
## smoke 1 0 1 1 1 1
## checkup 1 1 0 1 1 1
## depression 1 1 1 0 1 1
## employ 1 1 1 1 0 1
## marst 1 1 1 1 1 0
plot(imp)
head(imp$imp$depression)
summary(imp$imp$depression)
## 1 2 3 4
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2342 Mean :0.2763 Mean :0.1839 Mean :0.2187
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## 5
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.1876
## 3rd Qu.:0.0000
## Max. :1.0000
head(imp$imp$employ)
summary(imp$imp$employ)
## 1 2 3 4
## Employed:2183 Employed:2187 Employed:2159 Employed:2217
## nilf : 584 nilf : 614 nilf : 601 nilf : 567
## retired : 988 retired : 939 retired : 977 retired : 946
## unable : 184 unable : 199 unable : 202 unable : 209
## 5
## Employed:2180
## nilf : 612
## retired : 970
## unable : 177
dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
#Compare to the original data
head(brfss20sm[,c("ins", "smoke", "checkup" , "depression", "employ", "marst")], n=10)
While the first few cases don’t show much missingness, we can coax some more interesting cases out and compare the original data to the imputed:
head(dat.imp[is.na(brfss20sm$depression)==T,], n=10)
head(brfss20sm[is.na(brfss20sm$depression)==T,c("ins", "smoke", "checkup" , "depression", "employ", "marst")], n=10)
#Now, I will see the variability in the 5 different imputations for each outcome
fit.depression<-with(data=imp ,expr=lm(depression~employ+marst+smoke+ins+checkup))
fit.depression
## call :
## with.mids(data = imp, expr = lm(depression ~ employ + marst +
## smoke + ins + checkup))
##
## call1 :
## mice(data = dat1[, c("ins", "smoke", "checkup", "depression",
## "employ", "marst")], m = 5, seed = 22)
##
## nmis :
## ins smoke checkup depression employ marst
## 996 11128 2374 1093 3939 2235
##
## analyses :
## [[1]]
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup)
##
## Coefficients:
## (Intercept) employnilf employretired employunable marstcohab
## 0.037109 0.072289 -0.009816 0.280136 0.089027
## marstdivorced marstnm marstseparated marstwidowed smokeCurrent
## 0.088611 0.068979 0.092572 0.013286 0.123439
## smokeFormer ins checkup
## 0.049842 0.040039 0.034886
##
##
## [[2]]
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup)
##
## Coefficients:
## (Intercept) employnilf employretired employunable marstcohab
## 0.038159 0.072358 -0.009856 0.278872 0.086988
## marstdivorced marstnm marstseparated marstwidowed smokeCurrent
## 0.088090 0.068492 0.093169 0.014078 0.123171
## smokeFormer ins checkup
## 0.050411 0.039085 0.035051
##
##
## [[3]]
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup)
##
## Coefficients:
## (Intercept) employnilf employretired employunable marstcohab
## 0.037264 0.073017 -0.009108 0.279511 0.090949
## marstdivorced marstnm marstseparated marstwidowed smokeCurrent
## 0.089545 0.068263 0.091600 0.012312 0.122336
## smokeFormer ins checkup
## 0.050591 0.039437 0.034699
##
##
## [[4]]
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup)
##
## Coefficients:
## (Intercept) employnilf employretired employunable marstcohab
## 0.038792 0.073720 -0.008912 0.282308 0.089020
## marstdivorced marstnm marstseparated marstwidowed smokeCurrent
## 0.088654 0.067862 0.091635 0.013003 0.122820
## smokeFormer ins checkup
## 0.051315 0.039069 0.033136
##
##
## [[5]]
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup)
##
## Coefficients:
## (Intercept) employnilf employretired employunable marstcohab
## 0.036851 0.073579 -0.009043 0.278487 0.088210
## marstdivorced marstnm marstseparated marstwidowed smokeCurrent
## 0.088525 0.068994 0.091202 0.012301 0.122340
## smokeFormer ins checkup
## 0.050321 0.039741 0.035080
with (data=imp, exp=(sd(depression)))
## call :
## with.mids(data = imp, expr = (sd(depression)))
##
## call1 :
## mice(data = dat1[, c("ins", "smoke", "checkup", "depression",
## "employ", "marst")], m = 5, seed = 22)
##
## nmis :
## ins smoke checkup depression employ marst
## 996 11128 2374 1093 3939 2235
##
## analyses :
## [[1]]
## [1] 0.3910196
##
## [[2]]
## [1] 0.3912071
##
## [[3]]
## [1] 0.3907951
##
## [[4]]
## [1] 0.3909503
##
## [[5]]
## [1] 0.3908115
with (data=imp, exp=(prop.table(table(employ))))
## call :
## with.mids(data = imp, expr = (prop.table(table(employ))))
##
## call1 :
## mice(data = dat1[, c("ins", "smoke", "checkup", "depression",
## "employ", "marst")], m = 5, seed = 22)
##
## nmis :
## ins smoke checkup depression employ marst
## 996 11128 2374 1093 3939 2235
##
## analyses :
## [[1]]
## employ
## Employed nilf retired unable
## 0.53784777 0.13798698 0.26917211 0.05499314
##
## [[2]]
## employ
## Employed nilf retired unable
## 0.53786824 0.13814052 0.26892133 0.05506991
##
## [[3]]
## employ
## Employed nilf retired unable
## 0.53772494 0.13807399 0.26911581 0.05508527
##
## [[4]]
## employ
## Employed nilf retired unable
## 0.53802178 0.13789997 0.26895715 0.05512109
##
## [[5]]
## employ
## Employed nilf retired unable
## 0.53783242 0.13813028 0.26907998 0.05495732
with (data=imp, exp=(prop.table(table(marst))))
## call :
## with.mids(data = imp, expr = (prop.table(table(marst))))
##
## call1 :
## mice(data = dat1[, c("ins", "smoke", "checkup", "depression",
## "employ", "marst")], m = 5, seed = 22)
##
## nmis :
## ins smoke checkup depression employ marst
## 996 11128 2374 1093 3939 2235
##
## analyses :
## [[1]]
## marst
## married cohab divorced nm separated widowed
## 0.51136713 0.04204455 0.12606711 0.20397875 0.02062051 0.09592196
##
## [[2]]
## marst
## married cohab divorced nm separated widowed
## 0.51129547 0.04205478 0.12611829 0.20401969 0.02062563 0.09588613
##
## [[3]]
## marst
## married cohab divorced nm separated widowed
## 0.51111122 0.04205990 0.12601593 0.20415276 0.02066145 0.09599873
##
## [[4]]
## marst
## married cohab divorced nm separated widowed
## 0.51129547 0.04191660 0.12605687 0.20417835 0.02067681 0.09587590
##
## [[5]]
## marst
## married cohab divorced nm separated widowed
## 0.51136201 0.04198825 0.12611829 0.20400434 0.02066657 0.09586054
with (data=imp, exp=(prop.table(table(smoke))))
## call :
## with.mids(data = imp, expr = (prop.table(table(smoke))))
##
## call1 :
## mice(data = dat1[, c("ins", "smoke", "checkup", "depression",
## "employ", "marst")], m = 5, seed = 22)
##
## nmis :
## ins smoke checkup depression employ marst
## 996 11128 2374 1093 3939 2235
##
## analyses :
## [[1]]
## smoke
## NeverSmoked Current Former
## 0.6153807 0.1232010 0.2614183
##
## [[2]]
## smoke
## NeverSmoked Current Former
## 0.6157031 0.1232522 0.2610447
##
## [[3]]
## smoke
## NeverSmoked Current Former
## 0.6153193 0.1229758 0.2617049
##
## [[4]]
## smoke
## NeverSmoked Current Former
## 0.6150327 0.1235388 0.2614285
##
## [[5]]
## smoke
## NeverSmoked Current Former
## 0.6155393 0.1231038 0.2613569
Now we pool the separate models from each imputed data set:
est.p<-pool(fit.depression)
print(est.p)
## Class: mipo m = 5
## term m estimate ubar b t dfcom
## 1 (Intercept) 5 0.037635036 1.559770e-05 6.603547e-07 1.639013e-05 195375
## 2 employnilf 5 0.072992620 6.869113e-06 4.428284e-07 7.400507e-06 195375
## 3 employretired 5 -0.009347154 4.824058e-06 2.046772e-07 5.069671e-06 195375
## 4 employunable 5 0.279862848 1.535580e-05 2.263188e-06 1.807163e-05 195375
## 5 marstcohab 5 0.088838877 1.928990e-05 2.085649e-06 2.179268e-05 195375
## 6 marstdivorced 5 0.088685165 7.482644e-06 2.814094e-07 7.820335e-06 195375
## 7 marstnm 5 0.068517958 5.344889e-06 2.338560e-07 5.625516e-06 195375
## 8 marstseparated 5 0.092035605 3.760207e-05 6.543578e-07 3.838730e-05 195375
## 9 marstwidowed 5 0.012995882 9.996220e-06 5.516393e-07 1.065819e-05 195375
## 10 smokeCurrent 5 0.122821159 7.525099e-06 2.428998e-07 7.816579e-06 195375
## 11 smokeFormer 5 0.050495917 4.152875e-06 2.861812e-07 4.496292e-06 195375
## 12 ins 5 0.039473970 1.051176e-05 1.767339e-07 1.072384e-05 195375
## 13 checkup 5 0.034570411 8.934236e-06 6.660831e-07 9.733536e-06 195375
## df riv lambda fmi
## 1 1695.6212 0.05080400 0.04834774 0.04946824
## 2 772.4949 0.07735992 0.07180509 0.07419890
## 3 1688.7106 0.05091410 0.04844745 0.04957241
## 4 176.9245 0.17685986 0.15028115 0.15972644
## 5 302.7440 0.12974553 0.11484492 0.12063509
## 6 2120.8780 0.04512994 0.04318117 0.04408218
## 7 1593.6056 0.05250383 0.04988469 0.05107486
## 8 9104.8467 0.02088261 0.02045545 0.02067054
## 9 1031.1054 0.06622175 0.06210880 0.06392271
## 10 2833.2504 0.03873434 0.03728994 0.03796880
## 11 683.0887 0.08269392 0.07637793 0.07907036
## 12 9708.7495 0.02017558 0.01977657 0.01997843
## 13 591.2181 0.08946481 0.08211813 0.08520751
summary(est.p)
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))
ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
We can also compare to the model fit on the original data, with missings eliminated:
library(dplyr)
bnm<-brfss20sm%>%
select(ins,smoke,checkup,depression,employ,marst) %>%
filter(complete.cases(.))%>%
as.data.frame()
summary(lm(depression~employ+marst+smoke+ins+checkup, bnm))
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup,
## data = bnm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6177 -0.1931 -0.1492 -0.1050 0.9694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.041295 0.004244 9.730 < 2e-16 ***
## employnilf 0.073910 0.002790 26.490 < 2e-16 ***
## employretired -0.010676 0.002312 -4.618 3.87e-06 ***
## employunable 0.283736 0.004167 68.087 < 2e-16 ***
## marstcohab 0.090242 0.004643 19.437 < 2e-16 ***
## marstdivorced 0.088196 0.002885 30.571 < 2e-16 ***
## marstnm 0.070792 0.002462 28.759 < 2e-16 ***
## marstseparated 0.095842 0.006558 14.615 < 2e-16 ***
## marstwidowed 0.013506 0.003332 4.053 5.06e-05 ***
## smokeCurrent 0.122450 0.002905 42.148 < 2e-16 ***
## smokeFormer 0.050869 0.002148 23.686 < 2e-16 ***
## ins 0.037220 0.003515 10.590 < 2e-16 ***
## checkup 0.037115 0.003169 11.713 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3819 on 177518 degrees of freedom
## Multiple R-squared: 0.06169, Adjusted R-squared: 0.06163
## F-statistic: 972.6 on 12 and 177518 DF, p-value: < 2.2e-16
Here, I compare the coefficients from the model where we eliminated all missing data to the one that we fit on the imputed data:
fit1<-lm(depression~employ+marst+smoke+ins+checkup, data=brfss20sm)
summary(fit1)
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup,
## data = brfss20sm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6177 -0.1931 -0.1492 -0.1050 0.9694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.041295 0.004244 9.730 < 2e-16 ***
## employnilf 0.073910 0.002790 26.490 < 2e-16 ***
## employretired -0.010676 0.002312 -4.618 3.87e-06 ***
## employunable 0.283736 0.004167 68.087 < 2e-16 ***
## marstcohab 0.090242 0.004643 19.437 < 2e-16 ***
## marstdivorced 0.088196 0.002885 30.571 < 2e-16 ***
## marstnm 0.070792 0.002462 28.759 < 2e-16 ***
## marstseparated 0.095842 0.006558 14.615 < 2e-16 ***
## marstwidowed 0.013506 0.003332 4.053 5.06e-05 ***
## smokeCurrent 0.122450 0.002905 42.148 < 2e-16 ***
## smokeFormer 0.050869 0.002148 23.686 < 2e-16 ***
## ins 0.037220 0.003515 10.590 < 2e-16 ***
## checkup 0.037115 0.003169 11.713 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3819 on 177518 degrees of freedom
## (17857 observations deleted due to missingness)
## Multiple R-squared: 0.06169, Adjusted R-squared: 0.06163
## F-statistic: 972.6 on 12 and 177518 DF, p-value: < 2.2e-16
fit.imp<-lm(depression~employ+marst+smoke+ins+checkup, data=dat.imp)
summary(fit.imp)
##
## Call:
## lm(formula = depression ~ employ + marst + smoke + ins + checkup,
## data = dat.imp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6082 -0.1908 -0.1410 -0.1022 0.9727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.037109 0.003951 9.393 < 2e-16 ***
## employnilf 0.072289 0.002621 27.579 < 2e-16 ***
## employretired -0.009816 0.002195 -4.471 7.78e-06 ***
## employunable 0.280136 0.003920 71.459 < 2e-16 ***
## marstcohab 0.089027 0.004391 20.275 < 2e-16 ***
## marstdivorced 0.088611 0.002736 32.390 < 2e-16 ***
## marstnm 0.068979 0.002312 29.837 < 2e-16 ***
## marstseparated 0.092572 0.006137 15.085 < 2e-16 ***
## marstwidowed 0.013286 0.003162 4.202 2.64e-05 ***
## smokeCurrent 0.123439 0.002744 44.980 < 2e-16 ***
## smokeFormer 0.049842 0.002037 24.464 < 2e-16 ***
## ins 0.040039 0.003247 12.332 < 2e-16 ***
## checkup 0.034886 0.002986 11.682 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3787 on 195375 degrees of freedom
## Multiple R-squared: 0.06188, Adjusted R-squared: 0.06182
## F-statistic: 1074 on 12 and 195375 DF, p-value: < 2.2e-16