hardtoget<-haven::read_xpt("/Users/christacrumrine/Downloads/LLCP2020.XPT ")
names(hardtoget)<-tolower(gsub(pattern = "_", replacement = "",x=names(hardtoget)))
BRFSS2020
Whether a person had a stroke will be my outcome variable. The variable Stroke is coded as 1 for yes and 2 for no. It is a categorical variable. The five predictor variables I will use are marital status (marst), whether a person drinks alcohol (drink), a self rated health question (badhealth), a persons race (race_eth) and a person’s health insurance status (healthinsurace_coverage).
According to this table drinking had the highest missing data at 6.66%. This variable was asking people if they have had at least 1 drink in the last 30 days.
The variable with the least amount of missing data is badhealth with only 30 (.23) respondents who did not answer this question.
The Marital variable only reported 88 non responses (.94).
summary(hardtoget[, c("stroke", "marst", "drink","badhealth","race_eth", "depression", "healthinsurace_coverage")])
## stroke marst drink badhealth
## Min. :0.0000 cohab : 15261 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 divorced : 51939 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 married :207302 Median :1.000 Median :0.0000
## Mean :0.0391 nm : 72051 Mean :0.511 Mean :0.1539
## 3rd Qu.:0.0000 separated: 7975 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :1.0000 widowed : 43646 Max. :1.000 Max. :1.0000
## NA's :1186 NA's : 3784 NA's :26775 NA's :961
## race_eth depression healthinsurace_coverage
## hispanic : 36408 Min. :0.0000 yes, no coverage: 34034
## nh_black : 30390 1st Qu.:0.0000 no, has coverage:365862
## nh_multirace: 6954 Median :0.0000 NA's : 2062
## nh_other : 10243 Mean :0.1896
## nhwhite :303886 3rd Qu.:0.0000
## NA's : 14077 Max. :1.0000
## NA's :2103
100* (table(is.na(hardtoget$stroke))[2]/length(hardtoget$stroke))
## TRUE
## 0.2950557
100*(table(is.na(hardtoget$marst))[2]/length(hardtoget$marst))
## TRUE
## 0.9413919
100*(table(is.na(hardtoget$drink))[2]/length(hardtoget$drink))
## TRUE
## 6.661144
100*(table(is.na(hardtoget$race_eth))[2]/length(hardtoget$race_eth))
## TRUE
## 3.502107
100*(table(is.na(hardtoget$badhealth))[2]/length(hardtoget$badhealth))
## TRUE
## 0.2390797
100*(table(is.na(hardtoget$healthinsurace_coverage))[2]/length(hardtoget$healthinsurace_coverage))
## TRUE
## 0.5129889
100*(table(is.na(hardtoget$smoked))[2]/length(hardtoget$smoked))
## TRUE
## 5.143075
summary(hardtoget$stroke)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.0391 0.0000 1.0000 1186
#what happens when we replace the missings with the mode?
hardtoget$stroke.imp.mode<-ifelse(is.na(hardtoget$stroke)==T, mode(hardtoget$stroke), hardtoget$stroke)
mode(hardtoget$stroke)
## [1] "numeric"
mode(hardtoget$stroke.imp.mean) #no difference!
## Warning: Unknown or uninitialised column: `stroke.imp.mean`.
## [1] "NULL"
fit<-lm(stroke~race_eth+drink+badhealth+healthinsurace_coverage+smoked, hardtoget)
For this homework I used a modal imputation since my data was categorical. The output of the data shows a mean of .0421.
table(hardtoget$race_eth)
##
## hispanic nh_black nh_multirace nh_other nhwhite
## 36408 30390 6954 10243 303886
#find the most common value
mcv.race_eth<-factor(names(which.max(table(hardtoget$race_eth))), levels=levels(hardtoget$race_eth))
mcv.race_eth
## [1] nhwhite
## Levels: hispanic nh_black nh_multirace nh_other nhwhite
#impute the cases
hardtoget$race_eth.imp<-as.factor(ifelse(is.na(hardtoget$race_eth)==T, mcv.race_eth, hardtoget$race_eth))
levels(hardtoget$race_eth.imp)<-levels(hardtoget$race_eth)
prop.table(table(hardtoget$race_eth))
##
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09386384 0.07834877 0.01792818 0.02640758 0.78345163
prop.table(table(hardtoget$race_eth.imp))
##
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09057663 0.07560491 0.01730031 0.02548276 0.79103538
barplot(prop.table(table(hardtoget$race_eth)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(hardtoget$race_eth.imp)), main="Imputed Data",ylim=c(0, .6))
barplot(prop.table(table(hardtoget$marst)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(hardtoget$marst)), main="Imputed Data",ylim=c(0, .6))
barplot(prop.table(table(hardtoget$smoked)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(hardtoget$smoked)), main="Imputed Data",ylim=c(0, .6))
barplot(prop.table(table(hardtoget$healthinsurace_coverage)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(hardtoget$healthinsurace_coverage)), main="Imputed Data",ylim=c(0, .6))
barplot(prop.table(table(hardtoget$badhealth)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(hardtoget$badhealth)), main="Imputed Data",ylim=c(0, .6))
table(hardtoget$marst)
##
## cohab divorced married nm separated widowed
## 15261 51939 207302 72051 7975 43646
#find the most common value
mcv.marst<-factor(names(which.max(table(hardtoget$marst))), levels=levels(hardtoget$marst))
mcv.marst
## [1] married
## Levels: cohab divorced married nm separated widowed
#impute the cases
hardtoget$marst.imp<-as.factor(ifelse(is.na(hardtoget$marst)==T, mcv.marst, hardtoget$marst))
levels(hardtoget$marst.imp)<-levels(hardtoget$marst)
prop.table(table(hardtoget$marst))
##
## cohab divorced married nm separated widowed
## 0.03832746 0.13044297 0.52063168 0.18095355 0.02002893 0.10961539
prop.table(table(hardtoget$marst.imp))
##
## cohab divorced married nm separated widowed
## 0.03796665 0.12921499 0.52514442 0.17925007 0.01984038 0.10858348
barplot(prop.table(table(hardtoget$marst)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(hardtoget$marst.imp)), main="Imputed Data",ylim=c(0, .6))
fit1<-lm(stroke~is.na(badhealth), data =hardtoget)
fit2<-lm(stroke~is.na(marst), data =hardtoget)
fit3<-lm(stroke~is.na(race_eth), data =hardtoget)
summary(fit1)
##
## Call:
## lm(formula = stroke ~ is.na(badhealth), data = hardtoget)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0913 -0.0390 -0.0390 -0.0390 0.9610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0389969 0.0003066 127.203 < 2e-16 ***
## is.na(badhealth)TRUE 0.0523074 0.0063986 8.175 2.97e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1939 on 400770 degrees of freedom
## (1186 observations deleted due to missingness)
## Multiple R-squared: 0.0001667, Adjusted R-squared: 0.0001642
## F-statistic: 66.83 on 1 and 400770 DF, p-value: 2.973e-16
summary(fit2)
##
## Call:
## lm(formula = stroke ~ is.na(marst), data = hardtoget)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03921 -0.03921 -0.03921 -0.03921 0.97111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0392115 0.0003077 127.45 < 2e-16 ***
## is.na(marst)TRUE -0.0103208 0.0032154 -3.21 0.00133 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1939 on 400770 degrees of freedom
## (1186 observations deleted due to missingness)
## Multiple R-squared: 2.571e-05, Adjusted R-squared: 2.321e-05
## F-statistic: 10.3 on 1 and 400770 DF, p-value: 0.001328
summary(fit3)
##
## Call:
## lm(formula = stroke ~ is.na(race_eth), data = hardtoget)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.04731 -0.03882 -0.03882 -0.03882 0.96118
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0388201 0.0003117 124.529 < 2e-16 ***
## is.na(race_eth)TRUE 0.0084897 0.0016671 5.093 3.53e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1939 on 400770 degrees of freedom
## (1186 observations deleted due to missingness)
## Multiple R-squared: 6.471e-05, Adjusted R-squared: 6.221e-05
## F-statistic: 25.93 on 1 and 400770 DF, p-value: 3.534e-07
md.pattern(hardtoget[,c("stroke", "marst", "drink","badhealth","race_eth", "smoked")])
## badhealth stroke marst race_eth smoked drink
## 355786 1 1 1 1 1 1 0
## 7734 1 1 1 1 1 0 1
## 2115 1 1 1 1 0 1 1
## 16799 1 1 1 1 0 0 2
## 12715 1 1 1 0 1 1 1
## 300 1 1 1 0 1 0 2
## 75 1 1 1 0 0 1 2
## 707 1 1 1 0 0 0 3
## 2544 1 1 0 1 1 1 1
## 249 1 1 0 1 1 0 2
## 58 1 1 0 1 0 1 2
## 607 1 1 0 1 0 0 3
## 126 1 1 0 0 1 1 2
## 10 1 1 0 0 1 0 3
## 6 1 1 0 0 0 1 3
## 21 1 1 0 0 0 0 4
## 854 1 0 1 1 1 1 1
## 36 1 0 1 1 1 0 2
## 19 1 0 1 1 0 1 2
## 79 1 0 1 1 0 0 3
## 41 1 0 1 0 1 1 2
## 7 1 0 1 0 1 0 3
## 2 1 0 1 0 0 1 3
## 7 1 0 1 0 0 0 4
## 36 1 0 0 1 1 1 2
## 9 1 0 0 1 1 0 3
## 2 1 0 0 1 0 1 3
## 49 1 0 0 1 0 0 4
## 1 1 0 0 0 1 1 3
## 1 1 0 0 0 1 0 4
## 1 1 0 0 0 0 1 4
## 1 1 0 0 0 0 0 5
## 694 0 1 1 1 1 1 1
## 45 0 1 1 1 1 0 2
## 19 0 1 1 1 0 1 2
## 63 0 1 1 1 0 0 3
## 42 0 1 1 0 1 1 2
## 2 0 1 1 0 1 0 3
## 7 0 1 1 0 0 0 4
## 25 0 1 0 1 1 1 2
## 5 0 1 0 1 1 0 3
## 1 0 1 0 1 0 1 3
## 14 0 1 0 1 0 0 4
## 1 0 1 0 0 1 1 3
## 1 0 1 0 0 1 0 4
## 1 0 1 0 0 0 0 5
## 16 0 0 1 1 1 1 2
## 2 0 0 1 1 1 0 3
## 6 0 0 1 1 0 0 4
## 1 0 0 1 0 1 1 3
## 1 0 0 1 0 0 1 4
## 2 0 0 0 1 1 1 3
## 13 0 0 0 1 0 0 5
## 961 1186 3784 14077 20673 26775 67456
md.pairs(hardtoget[,c("stroke", "marst", "drink","badhealth","race_eth", "smoked")])
## $rr
## stroke marst drink badhealth race_eth smoked
## stroke 400772 397103 374207 399852 386758 380279
## marst 397103 398174 372380 397276 384267 378275
## drink 374207 372380 375183 374381 362171 372884
## badhealth 399852 397276 374381 400997 386976 380449
## race_eth 386758 384267 362171 386976 387881 368037
## smoked 380279 378275 372884 380449 368037 381285
##
## $rm
## stroke marst drink badhealth race_eth smoked
## stroke 0 3669 26565 920 14014 20493
## marst 1071 0 25794 898 13907 19899
## drink 976 2803 0 802 13012 2299
## badhealth 1145 3721 26616 0 14021 20548
## race_eth 1123 3614 25710 905 0 19844
## smoked 1006 3010 8401 836 13248 0
##
## $mr
## stroke marst drink badhealth race_eth smoked
## stroke 0 1071 976 1145 1123 1006
## marst 3669 0 2803 3721 3614 3010
## drink 26565 25794 0 26616 25710 8401
## badhealth 920 898 802 0 905 836
## race_eth 14014 13907 13012 14021 0 13248
## smoked 20493 19899 2299 20548 19844 0
##
## $mm
## stroke marst drink badhealth race_eth smoked
## stroke 1186 115 210 41 63 180
## marst 115 3784 981 63 170 774
## drink 210 981 26775 159 1065 18374
## badhealth 41 63 159 961 56 125
## race_eth 63 170 1065 56 14077 829
## smoked 180 774 18374 125 829 20673
library(mice)
dat2<-hardtoget
imp<-mice(data = dat2[,c("stroke", "marst", "drink","badhealth","race_eth", "smoked")], seed= 22, m = 8)
##
## iter imp variable
## 1 1 stroke marst drink badhealth race_eth smoked
## 1 2 stroke marst drink badhealth race_eth smoked
## 1 3 stroke marst drink badhealth race_eth smoked
## 1 4 stroke marst drink badhealth race_eth smoked
## 1 5 stroke marst drink badhealth race_eth smoked
## 1 6 stroke marst drink badhealth race_eth smoked
## 1 7 stroke marst drink badhealth race_eth smoked
## 1 8 stroke marst drink badhealth race_eth smoked
## 2 1 stroke marst drink badhealth race_eth smoked
## 2 2 stroke marst drink badhealth race_eth smoked
## 2 3 stroke marst drink badhealth race_eth smoked
## 2 4 stroke marst drink badhealth race_eth smoked
## 2 5 stroke marst drink badhealth race_eth smoked
## 2 6 stroke marst drink badhealth race_eth smoked
## 2 7 stroke marst drink badhealth race_eth smoked
## 2 8 stroke marst drink badhealth race_eth smoked
## 3 1 stroke marst drink badhealth race_eth smoked
## 3 2 stroke marst drink badhealth race_eth smoked
## 3 3 stroke marst drink badhealth race_eth smoked
## 3 4 stroke marst drink badhealth race_eth smoked
## 3 5 stroke marst drink badhealth race_eth smoked
## 3 6 stroke marst drink badhealth race_eth smoked
## 3 7 stroke marst drink badhealth race_eth smoked
## 3 8 stroke marst drink badhealth race_eth smoked
## 4 1 stroke marst drink badhealth race_eth smoked
## 4 2 stroke marst drink badhealth race_eth smoked
## 4 3 stroke marst drink badhealth race_eth smoked
## 4 4 stroke marst drink badhealth race_eth smoked
## 4 5 stroke marst drink badhealth race_eth smoked
## 4 6 stroke marst drink badhealth race_eth smoked
## 4 7 stroke marst drink badhealth race_eth smoked
## 4 8 stroke marst drink badhealth race_eth smoked
## 5 1 stroke marst drink badhealth race_eth smoked
## 5 2 stroke marst drink badhealth race_eth smoked
## 5 3 stroke marst drink badhealth race_eth smoked
## 5 4 stroke marst drink badhealth race_eth smoked
## 5 5 stroke marst drink badhealth race_eth smoked
## 5 6 stroke marst drink badhealth race_eth smoked
## 5 7 stroke marst drink badhealth race_eth smoked
## 5 8 stroke marst drink badhealth race_eth smoked
print(imp)
## Class: mids
## Number of multiple imputations: 8
## Imputation methods:
## stroke marst drink badhealth race_eth smoked
## "pmm" "polyreg" "pmm" "pmm" "polyreg" "pmm"
## PredictorMatrix:
## stroke marst drink badhealth race_eth smoked
## stroke 0 1 1 1 1 1
## marst 1 0 1 1 1 1
## drink 1 1 0 1 1 1
## badhealth 1 1 1 0 1 1
## race_eth 1 1 1 1 0 1
## smoked 1 1 1 1 1 0
plot(imp)
head(imp$imp$race_eth)
summary(imp$imp$race_eth)
## 1 2 3
## hispanic : 1425 hispanic : 1442 hispanic : 1462
## nh_black : 1279 nh_black : 1242 nh_black : 1239
## nh_multirace: 308 nh_multirace: 299 nh_multirace: 288
## nh_other : 371 nh_other : 383 nh_other : 375
## nhwhite :10694 nhwhite :10711 nhwhite :10713
## 4 5 6
## hispanic : 1381 hispanic : 1465 hispanic : 1471
## nh_black : 1252 nh_black : 1237 nh_black : 1217
## nh_multirace: 322 nh_multirace: 297 nh_multirace: 270
## nh_other : 385 nh_other : 382 nh_other : 389
## nhwhite :10737 nhwhite :10696 nhwhite :10730
## 7 8
## hispanic : 1402 hispanic : 1417
## nh_black : 1234 nh_black : 1232
## nh_multirace: 283 nh_multirace: 306
## nh_other : 371 nh_other : 362
## nhwhite :10787 nhwhite :10760
summary(hardtoget$stroke)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.0391 0.0000 1.0000 1186
head(imp$imp$marst)
summary(imp$imp$marst)
## 1 2 3 4
## cohab : 128 cohab : 134 cohab : 126 cohab : 134
## divorced : 469 divorced : 469 divorced : 493 divorced : 444
## married :1961 married :1973 married :1933 married :1995
## nm : 736 nm : 702 nm : 675 nm : 707
## separated: 64 separated: 66 separated: 77 separated: 71
## widowed : 426 widowed : 440 widowed : 480 widowed : 433
## 5 6 7 8
## cohab : 153 cohab : 145 cohab : 133 cohab : 151
## divorced : 458 divorced : 466 divorced : 477 divorced : 489
## married :1997 married :1958 married :1942 married :1972
## nm : 692 nm : 699 nm : 686 nm : 689
## separated: 68 separated: 81 separated: 85 separated: 77
## widowed : 416 widowed : 435 widowed : 461 widowed : 406
dat.imp<-complete(imp, action = 1)
head(dat.imp, n=6)
#Compare to the original data
head(hardtoget[,c("stroke", "marst", "drink","badhealth","race_eth", "smoked")], n=20)
head(dat.imp[is.na(hardtoget$stroke)==T,], n=10)
head(hardtoget[is.na(hardtoget$stroke)==T,c("stroke", "marst", "drink","badhealth","race_eth", "smoked")], n=10)
#Now, I will see the variability in the 5 different imputations for each outcome
fit.stroke<-with(data=imp ,expr=lm(stroke~factor(marst)+marst+drink+race_eth+smoked))
fit.stroke
## call :
## with.mids(data = imp, expr = lm(stroke ~ factor(marst) + marst +
## drink + race_eth + smoked))
##
## call1 :
## mice(data = dat2[, c("stroke", "marst", "drink", "badhealth",
## "race_eth", "smoked")], m = 8, seed = 22)
##
## nmis :
## stroke marst drink badhealth race_eth smoked
## 1186 3784 26775 961 14077 20673
##
## analyses :
## [[1]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.0125849 0.0330640 0.0115891
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.0009613 0.0324439 0.0581044
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.0243935
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.0293370 0.0280098 -0.0019745
## race_ethnhwhite smoked
## 0.0121795 0.0224309
##
##
## [[2]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.012824 0.033793 0.011834
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.001304 0.032955 0.058327
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.024844
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.029062 0.028022 -0.002397
## race_ethnhwhite smoked
## 0.012292 0.021541
##
##
## [[3]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.012610 0.033054 0.011962
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.001410 0.032846 0.058247
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.024692
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.029287 0.026919 -0.001571
## race_ethnhwhite smoked
## 0.012027 0.022581
##
##
## [[4]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.012385 0.033430 0.011888
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.001311 0.032516 0.058560
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.024647
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.030126 0.028304 -0.002016
## race_ethnhwhite smoked
## 0.012241 0.022473
##
##
## [[5]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.012209 0.032850 0.012067
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.001403 0.033040 0.058433
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.024946
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.029334 0.027488 -0.001864
## race_ethnhwhite smoked
## 0.012353 0.023444
##
##
## [[6]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.012632 0.033466 0.012006
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.001416 0.033434 0.058582
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.025075
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.029773 0.028835 -0.002051
## race_ethnhwhite smoked
## 0.012798 0.021292
##
##
## [[7]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.012563 0.033156 0.011933
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.001229 0.032383 0.057899
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.025153
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.029286 0.028358 -0.002137
## race_ethnhwhite smoked
## 0.012636 0.022493
##
##
## [[8]]
##
## Call:
## lm(formula = stroke ~ factor(marst) + marst + drink + race_eth +
## smoked)
##
## Coefficients:
## (Intercept) factor(marst)divorced factor(marst)married
## 0.011871 0.033476 0.011995
## factor(marst)nm factor(marst)separated factor(marst)widowed
## 0.001522 0.033047 0.058634
## marstdivorced marstmarried marstnm
## NA NA NA
## marstseparated marstwidowed drink
## NA NA -0.023900
## race_ethnh_black race_ethnh_multirace race_ethnh_other
## 0.029472 0.027128 -0.001865
## race_ethnhwhite smoked
## 0.012342 0.022304
with (data=imp, exp=(sd(stroke)))
## call :
## with.mids(data = imp, expr = (sd(stroke)))
##
## call1 :
## mice(data = dat2[, c("stroke", "marst", "drink", "badhealth",
## "race_eth", "smoked")], m = 8, seed = 22)
##
## nmis :
## stroke marst drink badhealth race_eth smoked
## 1186 3784 26775 961 14077 20673
##
## analyses :
## [[1]]
## [1] 0.1939245
##
## [[2]]
## [1] 0.1939185
##
## [[3]]
## [1] 0.1941608
##
## [[4]]
## [1] 0.1940072
##
## [[5]]
## [1] 0.1940249
##
## [[6]]
## [1] 0.194149
##
## [[7]]
## [1] 0.1941076
##
## [[8]]
## [1] 0.1940013
with (data=imp, exp=(prop.table(table(marst))))
## call :
## with.mids(data = imp, expr = (prop.table(table(marst))))
##
## call1 :
## mice(data = dat2[, c("stroke", "marst", "drink", "badhealth",
## "race_eth", "smoked")], m = 8, seed = 22)
##
## nmis :
## stroke marst drink badhealth race_eth smoked
## 1186 3784 26775 961 14077 20673
##
## analyses :
## [[1]]
## marst
## cohab divorced married nm separated widowed
## 0.03828509 0.13038178 0.52060912 0.18108111 0.01999960 0.10964330
##
## [[2]]
## marst
## cohab divorced married nm separated widowed
## 0.03830002 0.13038178 0.52063897 0.18099652 0.02000458 0.10967813
##
## [[3]]
## marst
## cohab divorced married nm separated widowed
## 0.03828012 0.13044149 0.52053946 0.18092935 0.02003194 0.10977764
##
## [[4]]
## marst
## cohab divorced married nm separated widowed
## 0.03830002 0.13031959 0.52069370 0.18100896 0.02001702 0.10966071
##
## [[5]]
## marst
## cohab divorced married nm separated widowed
## 0.03834729 0.13035442 0.52069868 0.18097164 0.02000955 0.10961842
##
## [[6]]
## marst
## cohab divorced married nm separated widowed
## 0.03832739 0.13037432 0.52060165 0.18098906 0.02004189 0.10966569
##
## [[7]]
## marst
## cohab divorced married nm separated widowed
## 0.03829753 0.13040168 0.52056185 0.18095672 0.02005185 0.10973037
##
## [[8]]
## marst
## cohab divorced married nm separated widowed
## 0.03834231 0.13043154 0.52063648 0.18096418 0.02003194 0.10959354
with (data=imp, exp=(prop.table(table(race_eth))))
## call :
## with.mids(data = imp, expr = (prop.table(table(race_eth))))
##
## call1 :
## mice(data = dat2[, c("stroke", "marst", "drink", "badhealth",
## "race_eth", "smoked")], m = 8, seed = 22)
##
## nmis :
## stroke marst drink badhealth race_eth smoked
## 1186 3784 26775 961 14077 20673
##
## analyses :
## [[1]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09412177 0.07878684 0.01806656 0.02640574 0.78261908
##
## [[2]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09416407 0.07869479 0.01804417 0.02643560 0.78266137
##
## [[3]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09421382 0.07868733 0.01801681 0.02641570 0.78266635
##
## [[4]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09401231 0.07871967 0.01810139 0.02644057 0.78272606
##
## [[5]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09422129 0.07868235 0.01803920 0.02643311 0.78262406
##
## [[6]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09423621 0.07863259 0.01797203 0.02645052 0.78270864
##
## [[7]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09406455 0.07867489 0.01800437 0.02640574 0.78285045
##
## [[8]]
## race_eth
## hispanic nh_black nh_multirace nh_other nhwhite
## 0.09410187 0.07866991 0.01806159 0.02638335 0.78278328
est.p<-pool(fit.stroke)
print(est.p)
## Class: mipo m = 8
## term m estimate ubar b t
## 1 (Intercept) 8 0.012459838 3.212914e-06 8.959539e-08 3.313708e-06
## 2 factor(marst)divorced 8 0.033286078 3.139702e-06 9.383506e-08 3.245267e-06
## 3 factor(marst)married 8 0.011909243 2.606242e-06 2.195810e-08 2.630945e-06
## 4 factor(marst)nm 8 0.001319608 2.933843e-06 2.891760e-08 2.966376e-06
## 5 factor(marst)separated 8 0.032833121 7.024263e-06 1.313761e-07 7.172061e-06
## 6 factor(marst)widowed 8 0.058348120 3.303703e-06 6.575741e-08 3.377680e-06
## 7 marstdivorced 8 NA NA NA NA
## 8 marstmarried 8 NA NA NA NA
## 9 marstnm 8 NA NA NA NA
## 10 marstseparated 8 NA NA NA NA
## 11 marstwidowed 8 NA NA NA NA
## 12 drink 8 -0.024706259 3.788474e-07 1.661964e-07 5.658183e-07
## 13 race_ethnh_black 8 0.029459774 2.163645e-06 1.130469e-07 2.290823e-06
## 14 race_ethnh_multirace 8 0.027883092 6.105234e-06 4.283701e-07 6.587150e-06
## 15 race_ethnh_other 8 -0.001984420 4.478446e-06 5.707006e-08 4.542650e-06
## 16 race_ethnhwhite 8 0.012358540 1.132712e-06 6.146215e-08 1.201857e-06
## 17 smoked 8 0.022319932 3.947875e-07 4.370812e-07 8.865038e-07
## dfcom df riv lambda fmi
## 1 401946 7421.64055 0.031371779 0.03041753 0.030678707
## 2 401946 6504.85155 0.033622436 0.03252874 0.032826065
## 3 401946 66199.90616 0.009478346 0.00938935 0.009419277
## 4 401946 50767.30265 0.011088630 0.01096702 0.011005982
## 5 401946 15820.99897 0.021041081 0.02060748 0.020731263
## 6 401946 14070.58833 0.022392170 0.02190174 0.022040740
## 7 401946 NA NA NA NA
## 8 401946 NA NA NA NA
## 9 401946 NA NA NA NA
## 10 401946 NA NA NA NA
## 11 401946 NA NA NA NA
## 12 401946 64.09152 0.493525589 0.33044334 0.350402845
## 13 401946 2257.71169 0.058779379 0.05551617 0.056351736
## 14 401946 1303.25043 0.078934955 0.07316007 0.074579153
## 15 401946 32195.36521 0.014336183 0.01413356 0.014194799
## 16 401946 2103.12592 0.061043683 0.05753173 0.058426713
## 17 401946 22.74965 1.245521550 0.55466916 0.589258427
summary(est.p)
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))
ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 5 rows containing missing values (position_stack).
library(dplyr)
bnm<-hardtoget%>%
select(stroke, marst,sex,race_eth, badhealth)%>%
filter(complete.cases(.))%>%
as.data.frame()
summary(lm(stroke~factor(marst)+sex+race_eth+badhealth, bnm))
##
## Call:
## lm(formula = stroke ~ factor(marst) + sex + race_eth + badhealth,
## data = bnm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16652 -0.04321 -0.02546 -0.01918 1.01051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0020593 0.0020254 1.017 0.30929
## factor(marst)divorced 0.0304872 0.0018007 16.931 < 2e-16 ***
## factor(marst)married 0.0127381 0.0016402 7.766 8.09e-15 ***
## factor(marst)nm 0.0008650 0.0017420 0.497 0.61952
## factor(marst)separated 0.0275830 0.0027024 10.207 < 2e-16 ***
## factor(marst)widowed 0.0570176 0.0018468 30.873 < 2e-16 ***
## sex -0.0062829 0.0006250 -10.053 < 2e-16 ***
## race_ethnh_black 0.0296890 0.0014917 19.903 < 2e-16 ***
## race_ethnh_multirace 0.0316602 0.0025083 12.622 < 2e-16 ***
## race_ethnh_other 0.0061947 0.0021427 2.891 0.00384 **
## race_ethnhwhite 0.0169459 0.0010742 15.775 < 2e-16 ***
## badhealth 0.0820625 0.0008656 94.804 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1897 on 382422 degrees of freedom
## Multiple R-squared: 0.03506, Adjusted R-squared: 0.03503
## F-statistic: 1263 on 11 and 382422 DF, p-value: < 2.2e-16
fit1<-lm(stroke~factor(marst)+sex+race_eth+badhealth, data=hardtoget)
summary(fit1)
##
## Call:
## lm(formula = stroke ~ factor(marst) + sex + race_eth + badhealth,
## data = hardtoget)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16652 -0.04321 -0.02546 -0.01918 1.01051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0020593 0.0020254 1.017 0.30929
## factor(marst)divorced 0.0304872 0.0018007 16.931 < 2e-16 ***
## factor(marst)married 0.0127381 0.0016402 7.766 8.09e-15 ***
## factor(marst)nm 0.0008650 0.0017420 0.497 0.61952
## factor(marst)separated 0.0275830 0.0027024 10.207 < 2e-16 ***
## factor(marst)widowed 0.0570176 0.0018468 30.873 < 2e-16 ***
## sex -0.0062829 0.0006250 -10.053 < 2e-16 ***
## race_ethnh_black 0.0296890 0.0014917 19.903 < 2e-16 ***
## race_ethnh_multirace 0.0316602 0.0025083 12.622 < 2e-16 ***
## race_ethnh_other 0.0061947 0.0021427 2.891 0.00384 **
## race_ethnhwhite 0.0169459 0.0010742 15.775 < 2e-16 ***
## badhealth 0.0820625 0.0008656 94.804 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1897 on 382422 degrees of freedom
## (19524 observations deleted due to missingness)
## Multiple R-squared: 0.03506, Adjusted R-squared: 0.03503
## F-statistic: 1263 on 11 and 382422 DF, p-value: < 2.2e-16
fit.imp<-lm(stroke~factor(marst)+race_eth+badhealth, data=dat.imp)
summary(fit.imp)
##
## Call:
## lm(formula = stroke ~ factor(marst) + race_eth + badhealth, data = dat.imp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16211 -0.04067 -0.02228 -0.02228 1.00777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0077659 0.0017416 -4.459 8.23e-06 ***
## factor(marst)divorced 0.0311680 0.0017549 17.761 < 2e-16 ***
## factor(marst)married 0.0127724 0.0015990 7.988 1.38e-15 ***
## factor(marst)nm 0.0012702 0.0016959 0.749 0.453872
## factor(marst)separated 0.0277958 0.0026280 10.577 < 2e-16 ***
## factor(marst)widowed 0.0556106 0.0017966 30.953 < 2e-16 ***
## race_ethnh_black 0.0298362 0.0014573 20.474 < 2e-16 ***
## race_ethnh_multirace 0.0319106 0.0024426 13.064 < 2e-16 ***
## race_ethnh_other 0.0069462 0.0020997 3.308 0.000939 ***
## race_ethnhwhite 0.0172723 0.0010510 16.433 < 2e-16 ***
## badhealth 0.0823546 0.0008439 97.592 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1905 on 401947 degrees of freedom
## Multiple R-squared: 0.03515, Adjusted R-squared: 0.03513
## F-statistic: 1464 on 10 and 401947 DF, p-value: < 2.2e-16