library(ipumsr)
ddi <- read_ipums_ddi("nhis_00002.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
data <- data%>%
filter(age >=18 & age<=24)
summary(data[, c("badhealth", "smoke_100cig", "opportunity_youth_cat", "educ", "healthinsurace_coverage", "medication_for_depression", "medication_for_worry", "anxiety_disoreder")])
## badhealth smoke_100cig opportunity_youth_cat
## Min. :0.00000 yes : 522 Opportunity youth : 402
## 1st Qu.:0.00000 no :3270 Not opportunity youth:3373
## Median :0.00000 NA's: 80 NA's : 97
## Mean :0.05528
## 3rd Qu.:0.00000
## Max. :1.00000
## NA's :1
## educ healthinsurace_coverage medication_for_depression
## 1Less than HS: 379 yes, no coverage: 562 yes : 303
## 2hsgrad :1277 no, has coverage:3278 no :3519
## 3More than HS:2206 NA's : 32 NA's: 50
## NA's : 10
##
##
##
## medication_for_worry anxiety_disoreder
## yes : 369 yes : 673
## no :3453 no :3193
## NA's: 50 NA's: 6
##
##
##
##
Which shows that, among these recoded variables, opportunity_youth_cat
, the opportunity youth variable, 97 people in the IPUMS NHIS, or 2.5051653% of the sample.
The lowest number of missings is in the bad health variable, which only has 0.0258264% missing.
summary(data$badhealth)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00000 0.00000 0.00000 0.05528 0.00000 1.00000 1
data$badhealth.imp.mode<-ifelse(is.na(data$badhealth)==T, mode(data$badhealth), data$badhealth)
mode(data$badhealth)
## [1] "numeric"
mode(data$badhealth.imp.mode) #no difference!
## [1] "character"
fit<-lm(badhealth ~ opportunity_youth_cat + race_eth + educ + smoke_100cig +healthinsurace_coverage +anxiety_disoreder+ medication_for_depression, data)
summary(fit)
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + educ +
## smoke_100cig + healthinsurace_coverage + anxiety_disoreder +
## medication_for_depression, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27940 -0.07317 -0.02979 -0.01306 0.98793
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.276535 0.022511 12.284
## opportunity_youth_catNot opportunity youth -0.058798 0.012144 -4.842
## race_ethotherminority 0.016730 0.007575 2.209
## educ2hsgrad -0.028887 0.013266 -2.178
## educ3More than HS -0.027892 0.012818 -2.176
## smoke_100cigno -0.047244 0.010832 -4.361
## healthinsurace_coverageno, has coverage -0.013861 0.010709 -1.294
## anxiety_disorederno -0.072285 0.010904 -6.629
## medication_for_depressionno -0.043391 0.015151 -2.864
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## opportunity_youth_catNot opportunity youth 1.34e-06 ***
## race_ethotherminority 0.02726 *
## educ2hsgrad 0.02950 *
## educ3More than HS 0.02962 *
## smoke_100cigno 1.33e-05 ***
## healthinsurace_coverageno, has coverage 0.19563
## anxiety_disorederno 3.86e-11 ***
## medication_for_depressionno 0.00421 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2209 on 3712 degrees of freedom
## (151 observations deleted due to missingness)
## Multiple R-squared: 0.04272, Adjusted R-squared: 0.04065
## F-statistic: 20.7 on 8 and 3712 DF, p-value: < 2.2e-16
table(data$opportunity_youth_cat)
##
## Opportunity youth Not opportunity youth
## 402 3373
#find the most common value
mcv.opportunity_youth_cat<-factor(names(which.max(table(data$opportunity_youth_cat))), levels=levels(data$opportunity_youth_cat))
mcv.opportunity_youth_cat
## [1] Not opportunity youth
## Levels: Opportunity youth Not opportunity youth
#impute the cases
data$opportunity_youth_cat.imp<-as.factor(ifelse(is.na(data$opportunity_youth_cat)==T, mcv.opportunity_youth_cat, data$opportunity_youth_cat))
levels(data$opportunity_youth_cat.imp)<-levels(data$opportunity_youth_cat)
prop.table(table(data$opportunity_youth_cat))
##
## Opportunity youth Not opportunity youth
## 0.1064901 0.8935099
prop.table(table(data$opportunity_youth_cat.imp))
##
## Opportunity youth Not opportunity youth
## 0.1038223 0.8961777
barplot(prop.table(table(data$opportunity_youth_cat)), main="Original Data",ylim=c(0, 0.9))
barplot(prop.table(table(data$opportunity_youth_cat)), main="Imputed Data",ylim=c(0, 0.9))
Opportunity youth status
level of educcation
smoking behavior
health insurance coverage
anxiety disorder
medication for depression
fit1<- lm(badhealth ~ opportunity_youth_cat + educ +sex + smoke_100cig +healthinsurace_coverage +anxiety_disoreder+ medication_for_depression , data= data)
summary(fit1)
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + educ + sex +
## smoke_100cig + healthinsurace_coverage + anxiety_disoreder +
## medication_for_depression, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26821 -0.07499 -0.02514 -0.01660 0.98393
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.286058 0.022363 12.792
## opportunity_youth_catNot opportunity youth -0.058918 0.012165 -4.843
## educ2hsgrad -0.029137 0.013271 -2.195
## educ3More than HS -0.029662 0.012819 -2.314
## sexMale -0.008544 0.007436 -1.149
## smoke_100cigno -0.045601 0.010810 -4.218
## healthinsurace_coverageno, has coverage -0.017844 0.010543 -1.693
## anxiety_disorederno -0.067871 0.010967 -6.189
## medication_for_depressionno -0.041543 0.015168 -2.739
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## opportunity_youth_catNot opportunity youth 1.33e-06 ***
## educ2hsgrad 0.02819 *
## educ3More than HS 0.02073 *
## sexMale 0.25066
## smoke_100cigno 2.52e-05 ***
## healthinsurace_coverageno, has coverage 0.09061 .
## anxiety_disorederno 6.73e-10 ***
## medication_for_depressionno 0.00619 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.221 on 3712 degrees of freedom
## (151 observations deleted due to missingness)
## Multiple R-squared: 0.0418, Adjusted R-squared: 0.03973
## F-statistic: 20.24 on 8 and 3712 DF, p-value: < 2.2e-16
fit1<-lm(badhealth~is.na(smoke_100cig), data=data)
fit2<-lm(badhealth~is.na(opportunity_youth_cat), data=data)
fit3<-lm(badhealth~is.na(medication_for_depression), data=data)
summary(fit1)
##
## Call:
## lm(formula = badhealth ~ is.na(smoke_100cig), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0875 -0.0546 -0.0546 -0.0546 0.9454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.054603 0.003712 14.710 <2e-16 ***
## is.na(smoke_100cig)TRUE 0.032897 0.025820 1.274 0.203
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2285 on 3869 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0004194, Adjusted R-squared: 0.000161
## F-statistic: 1.623 on 1 and 3869 DF, p-value: 0.2027
summary(fit2)
##
## Call:
## lm(formula = badhealth ~ is.na(opportunity_youth_cat), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.07216 -0.05485 -0.05485 -0.05485 0.94515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.054849 0.003721 14.741 <2e-16 ***
## is.na(opportunity_youth_cat)TRUE 0.017316 0.023505 0.737 0.461
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2286 on 3869 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0001403, Adjusted R-squared: -0.0001182
## F-statistic: 0.5427 on 1 and 3869 DF, p-value: 0.4613
summary(fit3)
##
## Call:
## lm(formula = badhealth ~ is.na(medication_for_depression), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08000 -0.05496 -0.05496 -0.05496 0.94504
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.054959 0.003698 14.86 <2e-16 ***
## is.na(medication_for_depression)TRUE 0.025041 0.032536 0.77 0.442
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2286 on 3869 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0001531, Adjusted R-squared: -0.0001054
## F-statistic: 0.5923 on 1 and 3869 DF, p-value: 0.4416
#look at the patterns of missingness
md.pattern(data[,c("race_eth", "badhealth", "opportunity_youth_cat","smoke_100cig","educ", "anxiety_disoreder")])
## race_eth badhealth anxiety_disoreder educ smoke_100cig
## 3754 1 1 1 1 1
## 22 1 1 1 1 1
## 6 1 1 1 1 0
## 73 1 1 1 1 0
## 9 1 1 1 0 1
## 1 1 1 1 0 0
## 5 1 1 0 1 1
## 1 1 1 0 1 1
## 1 1 0 1 1 1
## 0 1 6 10 80
## opportunity_youth_cat
## 3754 1 0
## 22 0 1
## 6 1 1
## 73 0 2
## 9 1 1
## 1 0 3
## 5 1 1
## 1 0 2
## 1 1 1
## 97 194
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2022 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
md.pairs(data[,c("race_eth", "badhealth", "opportunity_youth_cat","smoke_100cig","educ", "anxiety_disoreder")])
## $rr
## race_eth badhealth opportunity_youth_cat smoke_100cig
## race_eth 3872 3871 3775 3792
## badhealth 3871 3871 3774 3791
## opportunity_youth_cat 3775 3774 3775 3769
## smoke_100cig 3792 3791 3769 3792
## educ 3862 3861 3766 3783
## anxiety_disoreder 3866 3865 3770 3786
## educ anxiety_disoreder
## race_eth 3862 3866
## badhealth 3861 3865
## opportunity_youth_cat 3766 3770
## smoke_100cig 3783 3786
## educ 3862 3856
## anxiety_disoreder 3856 3866
##
## $rm
## race_eth badhealth opportunity_youth_cat smoke_100cig
## race_eth 0 1 97 80
## badhealth 0 0 97 80
## opportunity_youth_cat 0 1 0 6
## smoke_100cig 0 1 23 0
## educ 0 1 96 79
## anxiety_disoreder 0 1 96 80
## educ anxiety_disoreder
## race_eth 10 6
## badhealth 10 6
## opportunity_youth_cat 9 5
## smoke_100cig 9 6
## educ 0 6
## anxiety_disoreder 10 0
##
## $mr
## race_eth badhealth opportunity_youth_cat smoke_100cig
## race_eth 0 0 0 0
## badhealth 1 0 1 1
## opportunity_youth_cat 97 97 0 23
## smoke_100cig 80 80 6 0
## educ 10 10 9 9
## anxiety_disoreder 6 6 5 6
## educ anxiety_disoreder
## race_eth 0 0
## badhealth 1 1
## opportunity_youth_cat 96 96
## smoke_100cig 79 80
## educ 0 10
## anxiety_disoreder 6 0
##
## $mm
## race_eth badhealth opportunity_youth_cat smoke_100cig
## race_eth 0 0 0 0
## badhealth 0 1 0 0
## opportunity_youth_cat 0 0 97 74
## smoke_100cig 0 0 74 80
## educ 0 0 1 1
## anxiety_disoreder 0 0 1 0
## educ anxiety_disoreder
## race_eth 0 0
## badhealth 0 0
## opportunity_youth_cat 1 1
## smoke_100cig 1 0
## educ 10 0
## anxiety_disoreder 0 6
dat2<-data
imp<-mice(data = dat2[,c("race_eth", "badhealth", "opportunity_youth_cat","smoke_100cig","educ", "anxiety_disoreder")], seed = 22, m = 10)
##
## iter imp variable
## 1 1 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 2 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 3 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 4 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 5 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 6 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 7 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 8 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 9 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 1 10 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 1 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 2 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 3 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 4 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 5 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 6 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 7 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 8 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 9 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 2 10 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 1 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 2 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 3 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 4 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 5 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 6 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 7 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 8 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 9 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 3 10 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 1 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 2 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 3 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 4 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 5 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 6 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 7 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 8 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 9 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 4 10 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 1 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 2 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 3 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 4 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 5 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 6 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 7 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 8 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 9 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
## 5 10 badhealth opportunity_youth_cat smoke_100cig educ anxiety_disoreder
print(imp)
## Class: mids
## Number of multiple imputations: 10
## Imputation methods:
## race_eth badhealth opportunity_youth_cat
## "" "pmm" "logreg"
## smoke_100cig educ anxiety_disoreder
## "logreg" "polyreg" "logreg"
## PredictorMatrix:
## race_eth badhealth opportunity_youth_cat smoke_100cig
## race_eth 0 1 1 1
## badhealth 1 0 1 1
## opportunity_youth_cat 1 1 0 1
## smoke_100cig 1 1 1 0
## educ 1 1 1 1
## anxiety_disoreder 1 1 1 1
## educ anxiety_disoreder
## race_eth 1 1
## badhealth 1 1
## opportunity_youth_cat 1 1
## smoke_100cig 1 1
## educ 0 1
## anxiety_disoreder 1 0
plot(imp)
head(imp$imp$badhealth)
summary(imp$imp$badhealth)
## 1 2 3 4 5 6
## Min. :1 Min. :0 Min. :0 Min. :1 Min. :0 Min. :0
## 1st Qu.:1 1st Qu.:0 1st Qu.:0 1st Qu.:1 1st Qu.:0 1st Qu.:0
## Median :1 Median :0 Median :0 Median :1 Median :0 Median :0
## Mean :1 Mean :0 Mean :0 Mean :1 Mean :0 Mean :0
## 3rd Qu.:1 3rd Qu.:0 3rd Qu.:0 3rd Qu.:1 3rd Qu.:0 3rd Qu.:0
## Max. :1 Max. :0 Max. :0 Max. :1 Max. :0 Max. :0
## 7 8 9 10
## Min. :0 Min. :0 Min. :0 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0
## Median :0 Median :0 Median :0 Median :0
## Mean :0 Mean :0 Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0 Max. :0 Max. :0
summary(data$badhealth)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00000 0.00000 0.00000 0.05528 0.00000 1.00000 1
dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
#Compare to the original data
head(data[,c("race_eth", "badhealth", "opportunity_youth_cat","smoke_100cig","educ", "anxiety_disoreder")], n=10)
head(dat.imp[is.na(data$badhealth)==T,], n=10)
head(data[is.na(data$badhealth)==T,c("race_eth", "badhealth", "opportunity_youth_cat","smoke_100cig","educ", "anxiety_disoreder")], n=10)
fit.badhealth<-with(data=imp ,expr=lm(badhealth~opportunity_youth_cat + race_eth + smoke_100cig + educ
+anxiety_disoreder))
fit.badhealth
## call :
## with.mids(data = imp, expr = lm(badhealth ~ opportunity_youth_cat +
## race_eth + smoke_100cig + educ + anxiety_disoreder))
##
## call1 :
## mice(data = dat2[, c("race_eth", "badhealth", "opportunity_youth_cat",
## "smoke_100cig", "educ", "anxiety_disoreder")], m = 10, seed = 22)
##
## nmis :
## race_eth badhealth opportunity_youth_cat
## 0 1 97
## smoke_100cig educ anxiety_disoreder
## 80 10 6
##
## analyses :
## [[1]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.24618
## opportunity_youth_catNot opportunity youth
## -0.06337
## race_ethotherminority
## 0.01571
## smoke_100cigno
## -0.05293
## educ2hsgrad
## -0.02264
## educ3More than HS
## -0.02892
## anxiety_disorederno
## -0.08656
##
##
## [[2]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.25359
## opportunity_youth_catNot opportunity youth
## -0.07072
## race_ethotherminority
## 0.01583
## smoke_100cigno
## -0.05330
## educ2hsgrad
## -0.02507
## educ3More than HS
## -0.02868
## anxiety_disorederno
## -0.08667
##
##
## [[3]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.24961
## opportunity_youth_catNot opportunity youth
## -0.06754
## race_ethotherminority
## 0.01597
## smoke_100cigno
## -0.05030
## educ2hsgrad
## -0.02464
## educ3More than HS
## -0.02888
## anxiety_disorederno
## -0.08854
##
##
## [[4]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.24507
## opportunity_youth_catNot opportunity youth
## -0.06059
## race_ethotherminority
## 0.01588
## smoke_100cigno
## -0.05361
## educ2hsgrad
## -0.02432
## educ3More than HS
## -0.02857
## anxiety_disorederno
## -0.08720
##
##
## [[5]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.25344
## opportunity_youth_catNot opportunity youth
## -0.06745
## race_ethotherminority
## 0.01639
## smoke_100cigno
## -0.05406
## educ2hsgrad
## -0.02736
## educ3More than HS
## -0.03164
## anxiety_disorederno
## -0.08655
##
##
## [[6]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.24750
## opportunity_youth_catNot opportunity youth
## -0.06435
## race_ethotherminority
## 0.01624
## smoke_100cigno
## -0.05271
## educ2hsgrad
## -0.02432
## educ3More than HS
## -0.02866
## anxiety_disorederno
## -0.08732
##
##
## [[7]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.24452
## opportunity_youth_catNot opportunity youth
## -0.06141
## race_ethotherminority
## 0.01592
## smoke_100cigno
## -0.05317
## educ2hsgrad
## -0.02370
## educ3More than HS
## -0.02950
## anxiety_disorederno
## -0.08597
##
##
## [[8]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.24557
## opportunity_youth_catNot opportunity youth
## -0.06202
## race_ethotherminority
## 0.01611
## smoke_100cigno
## -0.04982
## educ2hsgrad
## -0.02414
## educ3More than HS
## -0.03025
## anxiety_disorederno
## -0.08946
##
##
## [[9]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.25096
## opportunity_youth_catNot opportunity youth
## -0.06529
## race_ethotherminority
## 0.01599
## smoke_100cigno
## -0.05306
## educ2hsgrad
## -0.02758
## educ3More than HS
## -0.03172
## anxiety_disorederno
## -0.08668
##
##
## [[10]]
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + race_eth + smoke_100cig +
## educ + anxiety_disoreder)
##
## Coefficients:
## (Intercept)
## 0.24351
## opportunity_youth_catNot opportunity youth
## -0.06331
## race_ethotherminority
## 0.01586
## smoke_100cigno
## -0.04854
## educ2hsgrad
## -0.02528
## educ3More than HS
## -0.03012
## anxiety_disorederno
## -0.08629
with (data=imp, exp=(sd(badhealth)))
## call :
## with.mids(data = imp, expr = (sd(badhealth)))
##
## call1 :
## mice(data = dat2[, c("race_eth", "badhealth", "opportunity_youth_cat",
## "smoke_100cig", "educ", "anxiety_disoreder")], m = 10, seed = 22)
##
## nmis :
## race_eth badhealth opportunity_youth_cat
## 0 1 97
## smoke_100cig educ anxiety_disoreder
## 80 10 6
##
## analyses :
## [[1]]
## [1] 0.2290353
##
## [[2]]
## [1] 0.2285333
##
## [[3]]
## [1] 0.2285333
##
## [[4]]
## [1] 0.2290353
##
## [[5]]
## [1] 0.2285333
##
## [[6]]
## [1] 0.2285333
##
## [[7]]
## [1] 0.2285333
##
## [[8]]
## [1] 0.2285333
##
## [[9]]
## [1] 0.2285333
##
## [[10]]
## [1] 0.2285333
with (data=imp, exp=(prop.table(table(opportunity_youth_cat))))
## call :
## with.mids(data = imp, expr = (prop.table(table(opportunity_youth_cat))))
##
## call1 :
## mice(data = dat2[, c("race_eth", "badhealth", "opportunity_youth_cat",
## "smoke_100cig", "educ", "anxiety_disoreder")], m = 10, seed = 22)
##
## nmis :
## race_eth badhealth opportunity_youth_cat
## 0 1 97
## smoke_100cig educ anxiety_disoreder
## 80 10 6
##
## analyses :
## [[1]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.1066632 0.8933368
##
## [[2]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.106405 0.893595
##
## [[3]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.1066632 0.8933368
##
## [[4]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.107438 0.892562
##
## [[5]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.106405 0.893595
##
## [[6]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.1069215 0.8930785
##
## [[7]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.1069215 0.8930785
##
## [[8]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.106405 0.893595
##
## [[9]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.1061467 0.8938533
##
## [[10]]
## opportunity_youth_cat
## Opportunity youth Not opportunity youth
## 0.1061467 0.8938533
est.p<-pool(fit.badhealth)
print(est.p)
## Class: mipo m = 10
## term m estimate ubar
## 1 (Intercept) 10 0.24799478 3.332444e-04
## 2 opportunity_youth_catNot opportunity youth 10 -0.06460482 1.423434e-04
## 3 race_ethotherminority 10 0.01599045 5.465086e-05
## 4 smoke_100cigno 10 -0.05215022 1.140941e-04
## 5 educ2hsgrad 10 -0.02490614 1.713281e-04
## 6 educ3More than HS 10 -0.02969375 1.586324e-04
## 7 anxiety_disorederno 10 -0.08712492 9.358192e-05
## b t dfcom df riv lambda fmi
## 1 1.361550e-05 3.482214e-04 3865 2100.657 0.0449431425 0.0430101320 0.04391997
## 2 1.010216e-05 1.534558e-04 3865 1160.468 0.0780673722 0.0724141869 0.07400871
## 3 4.141546e-08 5.469641e-05 3865 3858.636 0.0008336009 0.0008329066 0.00135039
## 4 3.523066e-06 1.179694e-04 3865 2580.205 0.0339664672 0.0328506468 0.03359944
## 5 2.364498e-06 1.739290e-04 3865 3476.531 0.0151810969 0.0149540776 0.01552027
## 6 1.449799e-06 1.602272e-04 3865 3670.049 0.0100532957 0.0099532329 0.01049232
## 7 1.175686e-06 9.487517e-05 3865 3532.461 0.0138194952 0.0136311200 0.01418911
summary(est.p)
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))
ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(dplyr)
bnm<-data%>%
select(race_eth, badhealth, opportunity_youth_cat,smoke_100cig,educ, anxiety_disoreder)%>%
filter(complete.cases(.))%>%
as.data.frame()
summary(lm(badhealth~opportunity_youth_cat + educ+ race_eth + smoke_100cig +anxiety_disoreder, bnm))
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + educ + race_eth +
## smoke_100cig + anxiety_disoreder, data = bnm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25887 -0.07905 -0.03136 -0.01594 0.98406
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.243443 0.018388 13.239
## opportunity_youth_catNot opportunity youth -0.064913 0.012033 -5.395
## educ2hsgrad -0.029375 0.013201 -2.225
## educ3More than HS -0.029525 0.012702 -2.325
## race_ethotherminority 0.015422 0.007438 2.073
## smoke_100cigno -0.047686 0.010782 -4.423
## anxiety_disorederno -0.085380 0.009699 -8.803
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## opportunity_youth_catNot opportunity youth 7.3e-08 ***
## educ2hsgrad 0.0261 *
## educ3More than HS 0.0202 *
## race_ethotherminority 0.0382 *
## smoke_100cigno 1.0e-05 ***
## anxiety_disorederno < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2217 on 3747 degrees of freedom
## Multiple R-squared: 0.04061, Adjusted R-squared: 0.03908
## F-statistic: 26.44 on 6 and 3747 DF, p-value: < 2.2e-16
fit1<- lm(badhealth~opportunity_youth_cat + educ+ race_eth + smoke_100cig +anxiety_disoreder, data= data)
summary(fit1)
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + educ + race_eth +
## smoke_100cig + anxiety_disoreder, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25887 -0.07905 -0.03136 -0.01594 0.98406
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.243443 0.018388 13.239
## opportunity_youth_catNot opportunity youth -0.064913 0.012033 -5.395
## educ2hsgrad -0.029375 0.013201 -2.225
## educ3More than HS -0.029525 0.012702 -2.325
## race_ethotherminority 0.015422 0.007438 2.073
## smoke_100cigno -0.047686 0.010782 -4.423
## anxiety_disorederno -0.085380 0.009699 -8.803
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## opportunity_youth_catNot opportunity youth 7.3e-08 ***
## educ2hsgrad 0.0261 *
## educ3More than HS 0.0202 *
## race_ethotherminority 0.0382 *
## smoke_100cigno 1.0e-05 ***
## anxiety_disorederno < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2217 on 3747 degrees of freedom
## (118 observations deleted due to missingness)
## Multiple R-squared: 0.04061, Adjusted R-squared: 0.03908
## F-statistic: 26.44 on 6 and 3747 DF, p-value: < 2.2e-16
fit.imp<-lm(badhealth~opportunity_youth_cat + educ+ race_eth + smoke_100cig +anxiety_disoreder, data=dat.imp)
summary(fit.imp)
##
## Call:
## lm(formula = badhealth ~ opportunity_youth_cat + educ + race_eth +
## smoke_100cig + anxiety_disoreder, data = dat.imp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26189 -0.07908 -0.03010 -0.01439 0.98561
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.246178 0.018216 13.515
## opportunity_youth_catNot opportunity youth -0.063370 0.011964 -5.297
## educ2hsgrad -0.022643 0.013103 -1.728
## educ3More than HS -0.028922 0.012617 -2.292
## race_ethotherminority 0.015715 0.007409 2.121
## smoke_100cigno -0.052934 0.010683 -4.955
## anxiety_disorederno -0.086563 0.009687 -8.936
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## opportunity_youth_catNot opportunity youth 1.25e-07 ***
## educ2hsgrad 0.0841 .
## educ3More than HS 0.0219 *
## race_ethotherminority 0.0340 *
## smoke_100cigno 7.53e-07 ***
## anxiety_disorederno < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2244 on 3865 degrees of freedom
## Multiple R-squared: 0.04182, Adjusted R-squared: 0.04033
## F-statistic: 28.11 on 6 and 3865 DF, p-value: < 2.2e-16
##Were the results similar between the modal and multiply imputed data sets? How do the results compare to the results from the model fit with the data source with missing values?