library(forcats)
library(stargazer, quietly = T)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey, quietly = T)
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(car, quietly = T)
library(questionr, quietly = T)
library(dplyr, quietly = T)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats, quietly = T)
library(tidyverse, quietly = T)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v readr 2.1.2
## v tibble 3.1.6 v purrr 0.3.4
## v tidyr 1.2.0 v stringr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
## x tidyr::unpack() masks Matrix::unpack()
library(srvyr, quietly = T)
##
## Attaching package: 'srvyr'
## The following object is masked from 'package:stats':
##
## filter
library( gtsummary, quietly = T)
## #Uighur
library(caret, quietly = T)
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
## The following object is masked from 'package:survival':
##
## cluster
library(tableone, quietly = T)
library(car)
library(stargazer, quietly = T)
library(survey, quietly = T)
library(ggplot2, quietly = T)
library(pander, quietly = T)
## Warning: package 'pander' was built under R version 4.1.3
library(knitr, quietly = T)
library(dplyr, quietly = T)
library(factoextra, quietly = T)
## Warning: package 'factoextra' was built under R version 4.1.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR, quietly = T)
## Warning: package 'FactoMineR' was built under R version 4.1.3
library(car)
library(mice)
## Warning: package 'mice' was built under R version 4.1.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(ggplot2)
library(dplyr)
library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.1.3
library(haven)
ddi <- read_ipums_ddi("C:/Users/spara/OneDrive/Desktop/project/nhis_00010.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
#currently Pregnant
data$pregnantnow<-as.factor(data$pregnantnow)
data$curpreg<-car::Recode(data$pregnantnow,
recodes="0='yes';else=NA",
as.factor=T)
# medication for depression
data$deprx<- car::Recode(data$deprx,
recodes="1=0; 2=1;else=NA")
# ever had anxiety disorder
data$anxietydisorder_cat<- car::Recode(data$anxietyev,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
# education level
data$educ<-as.factor(data$educ)
data$educ<-Recode(data$educ,
recodes="102 ='NoSchool'; 201='HS Diploma'; 301='Some college';
400= 'Undergrad'; 501= 'Masters';else=NA", as.factor = T)
# health insurance coverage
data$healthinsurancecov<- car::Recode(data$hinotcove,
recodes="1='no, has coverage'; 2='yes, no coverage';else=NA",
as.factor=T)
##race/ethnicity
data$race<- car::Recode(data$racea,
recodes="100 ='White'; 200 ='African American';
400:434= 'Asian'; 500:590 = 'Other'; else=NA",
as.factor=T)
## marital status
data$mars<- car::Recode(data$marstat,
recodes ="10:13='Married'; 20='Widowed'; 30='Divorced';
40='Separated'; 50='Never Married'; else=NA",
as.factor=T)
data <- data%>%
filter(age >=18 &age<=45)
summary(data[, c("deprx", "educ", "mars", "healthinsurancecov", "race", "anxietydisorder_cat")])
## deprx educ mars
## Min. :0.0000 HS Diploma :4805 Divorced : 1938
## 1st Qu.:0.0000 Masters :2678 Married : 9938
## Median :0.0000 NoSchool : 35 Never Married:10766
## Mean :0.0893 Some college:4011 Separated : 356
## 3rd Qu.:0.0000 Undergrad :6286 Widowed : 92
## Max. :1.0000 NA's :5962 NA's : 687
## NA's :332
## healthinsurancecov race anxietydisorder_cat
## no, has coverage:20392 African American: 2773 no :19661
## yes, no coverage: 3306 Asian : 1779 yes : 4091
## NA's : 79 Other : 469 NA's: 25
## White :16632
## NA's : 2124
##
##
100* (table(is.na(data$educ))[2]/length(data$educ))
## TRUE
## 25.07465
100* (table(is.na(data$anxietydisorder_cat))[2]/length(data$anxietydisorder_cat))
## TRUE
## 0.1051436
which shows that among these recoded variables, the highest number of missings is in (educ) education which has 25.07% missing whereas the lowest of missings is in anxietydisorder_cat (anxiety disorder) , which has only 0.10% missing values.
summary(data$deprx)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.0893 0.0000 1.0000 332
data$deprx.imp.mode<-ifelse(is.na(data$deprx)==T, mean(data$deprx, na.rm=T), data$deprx)
mode(data$deprx)
## [1] "numeric"
mode(data$deprx.imp.mode)
## [1] "numeric"
fit<-lm(deprx ~ educ + mars+ healthinsurancecov+ race +poverty, data)
summary(fit)
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## poverty, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23113 -0.10504 -0.08688 -0.05332 1.03617
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1734888 0.0136104 12.747 < 2e-16 ***
## educMasters 0.0103381 0.0076803 1.346 0.17830
## educNoSchool -0.0773618 0.0558581 -1.385 0.16608
## educSome college 0.0211245 0.0065731 3.214 0.00131 **
## educUndergrad 0.0081588 0.0061575 1.325 0.18519
## marsMarried -0.0549516 0.0088994 -6.175 6.79e-10 ***
## marsNever Married -0.0420415 0.0087935 -4.781 1.76e-06 ***
## marsSeparated -0.0326805 0.0213551 -1.530 0.12595
## marsWidowed 0.0154963 0.0380971 0.407 0.68419
## healthinsurancecovyes, no coverage -0.0527337 0.0074178 -7.109 1.22e-12 ***
## raceAsian -0.0258983 0.0101215 -2.559 0.01051 *
## raceOther 0.0317785 0.0165143 1.924 0.05433 .
## raceWhite 0.0525219 0.0070031 7.500 6.73e-14 ***
## poverty -0.0026255 0.0003288 -7.985 1.50e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2834 on 15897 degrees of freedom
## (7866 observations deleted due to missingness)
## Multiple R-squared: 0.01802, Adjusted R-squared: 0.01722
## F-statistic: 22.44 on 13 and 15897 DF, p-value: < 2.2e-16
table(data$educ)
##
## HS Diploma Masters NoSchool Some college Undergrad
## 4805 2678 35 4011 6286
#find the most common value
mcv.educ<-factor(names(which.max(table(data$educ))), levels=levels(data$educ))
mcv.educ
## [1] Undergrad
## Levels: HS Diploma Masters NoSchool Some college Undergrad
#impute the cases
data$educ.imp<-as.factor(ifelse(is.na(data$educ)==T, mcv.educ, data$educ))
levels(data$educ.imp)<-levels(data$educ)
prop.table(table(data$educ))
##
## HS Diploma Masters NoSchool Some college Undergrad
## 0.269716531 0.150322762 0.001964637 0.225147348 0.352848723
prop.table(table(data$educ.imp))
##
## HS Diploma Masters NoSchool Some college Undergrad
## 0.202086050 0.112629852 0.001472011 0.168692434 0.515119653
barplot(prop.table(table(data$educ)), main="Original Data",ylim=c(0, 0.9))
barplot(prop.table(table(data$educ)), main="Imputed Data",ylim=c(0, 0.9))
fit1<-lm(deprx ~ educ +mars + healthinsurancecov+ race+anxietydisorder_cat, data)
summary(fit1)
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41323 -0.03930 -0.03540 -0.02112 1.00754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.044663 0.009723 4.594 4.39e-06 ***
## educMasters 0.000515 0.006701 0.077 0.93874
## educNoSchool -0.039379 0.050259 -0.784 0.43334
## educSome college 0.010613 0.005919 1.793 0.07299 .
## educUndergrad 0.001240 0.005374 0.231 0.81748
## marsMarried -0.023548 0.008019 -2.937 0.00332 **
## marsNever Married -0.021108 0.007926 -2.663 0.00775 **
## marsSeparated -0.032402 0.019214 -1.686 0.09174 .
## marsWidowed 0.011722 0.034301 0.342 0.73254
## healthinsurancecovyes, no coverage -0.031095 0.006641 -4.682 2.86e-06 ***
## raceAsian -0.019291 0.009098 -2.120 0.03399 *
## raceOther 0.005135 0.014852 0.346 0.72955
## raceWhite 0.013044 0.006266 2.082 0.03739 *
## anxietydisorder_catyes 0.333190 0.005409 61.600 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2552 on 15888 degrees of freedom
## (7875 observations deleted due to missingness)
## Multiple R-squared: 0.2042, Adjusted R-squared: 0.2035
## F-statistic: 313.5 on 13 and 15888 DF, p-value: < 2.2e-16
fit1<-lm(deprx~is.na(educ), data=data)
fit2<-lm(deprx~is.na(mars), data=data)
fit3<-lm(deprx~is.na(race), data=data)
summary(fit1)
##
## Call:
## lm(formula = deprx ~ is.na(educ), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09712 -0.08665 -0.08665 -0.08665 0.91335
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.086652 0.002151 40.292 <2e-16 ***
## is.na(educ)TRUE 0.010468 0.004298 2.435 0.0149 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2851 on 23443 degrees of freedom
## (332 observations deleted due to missingness)
## Multiple R-squared: 0.0002529, Adjusted R-squared: 0.0002103
## F-statistic: 5.931 on 1 and 23443 DF, p-value: 0.01488
summary(fit2)
##
## Call:
## lm(formula = deprx ~ is.na(mars), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08966 -0.08966 -0.08966 -0.08966 0.93333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.089655 0.001878 47.743 <2e-16 ***
## is.na(mars)TRUE -0.022989 0.014560 -1.579 0.114
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2851 on 23443 degrees of freedom
## (332 observations deleted due to missingness)
## Multiple R-squared: 0.0001063, Adjusted R-squared: 6.367e-05
## F-statistic: 2.493 on 1 and 23443 DF, p-value: 0.1144
summary(fit3)
##
## Call:
## lm(formula = deprx ~ is.na(race), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09242 -0.09242 -0.09242 -0.09242 0.94316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.092424 0.001949 47.411 < 2e-16 ***
## is.na(race)TRUE -0.035584 0.006551 -5.432 5.64e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.285 on 23443 degrees of freedom
## (332 observations deleted due to missingness)
## Multiple R-squared: 0.001257, Adjusted R-squared: 0.001214
## F-statistic: 29.5 on 1 and 23443 DF, p-value: 5.64e-08
#look at the patterns of missingness
md.pattern(data[,c("deprx", "educ", "mars","healthinsurancecov", "race", "anxietydisorder_cat")])
## anxietydisorder_cat healthinsurancecov deprx mars race educ
## 15902 1 1 1 1 1 1 0
## 5052 1 1 1 1 1 0 1
## 1327 1 1 1 1 0 1 1
## 695 1 1 1 1 0 0 2
## 254 1 1 1 0 1 1 1
## 89 1 1 1 0 1 0 2
## 27 1 1 1 0 0 1 2
## 14 1 1 1 0 0 0 3
## 25 1 1 0 1 1 1 1
## 4 1 1 0 1 1 0 2
## 1 1 1 0 1 0 1 2
## 2 1 1 0 1 0 0 3
## 181 1 1 0 0 1 1 2
## 59 1 1 0 0 1 0 3
## 22 1 1 0 0 0 1 3
## 23 1 1 0 0 0 0 4
## 46 1 0 1 1 1 1 1
## 9 1 0 1 1 1 0 2
## 4 1 0 1 1 0 1 2
## 5 1 0 1 1 0 0 3
## 2 1 0 1 0 1 1 2
## 1 1 0 1 0 1 0 3
## 1 1 0 1 0 0 1 3
## 1 1 0 0 1 1 1 2
## 3 1 0 0 0 1 1 3
## 3 1 0 0 0 1 0 4
## 9 0 1 1 1 1 1 1
## 3 0 1 1 1 1 0 2
## 2 0 1 1 1 0 1 2
## 1 0 1 1 0 1 1 2
## 1 0 1 1 0 0 1 3
## 1 0 1 0 1 1 1 2
## 1 0 1 0 1 1 0 3
## 2 0 1 0 0 1 1 3
## 1 0 1 0 0 1 0 4
## 1 0 0 1 1 1 0 3
## 3 0 0 0 0 1 1 4
## 25 79 332 687 2124 5962 9209
md.pairs(data[,c("deprx", "educ", "mars","healthinsurancecov", "race", "anxietydisorder_cat")])
## $rr
## deprx educ mars healthinsurancecov race
## deprx 23445 17576 23055 23376 21369
## educ 17576 17815 17318 17755 16430
## mars 23055 17318 23090 23024 21054
## healthinsurancecov 23376 17755 23024 23698 21584
## race 21369 16430 21054 21584 21653
## anxietydisorder_cat 23428 17796 23073 23677 21631
## anxietydisorder_cat
## deprx 23428
## educ 17796
## mars 23073
## healthinsurancecov 23677
## race 21631
## anxietydisorder_cat 23752
##
## $rm
## deprx educ mars healthinsurancecov race anxietydisorder_cat
## deprx 0 5869 390 69 2076 17
## educ 239 0 497 60 1385 19
## mars 35 5772 0 66 2036 17
## healthinsurancecov 322 5943 674 0 2114 21
## race 284 5223 599 69 0 22
## anxietydisorder_cat 324 5956 679 75 2121 0
##
## $mr
## deprx educ mars healthinsurancecov race anxietydisorder_cat
## deprx 0 239 35 322 284 324
## educ 5869 0 5772 5943 5223 5956
## mars 390 497 0 674 599 679
## healthinsurancecov 69 60 66 0 69 75
## race 2076 1385 2036 2114 0 2121
## anxietydisorder_cat 17 19 17 21 22 0
##
## $mm
## deprx educ mars healthinsurancecov race anxietydisorder_cat
## deprx 332 93 297 10 48 8
## educ 93 5962 190 19 739 6
## mars 297 190 687 13 88 8
## healthinsurancecov 10 19 13 79 10 4
## race 48 739 88 10 2124 3
## anxietydisorder_cat 8 6 8 4 3 25
library(Amelia)
## Warning: package 'Amelia' was built under R version 4.1.3
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2022 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
md.pairs(data[,c("deprx","educ","mars", "healthinsurancecov","race", "anxietydisorder_cat")])
## $rr
## deprx educ mars healthinsurancecov race
## deprx 23445 17576 23055 23376 21369
## educ 17576 17815 17318 17755 16430
## mars 23055 17318 23090 23024 21054
## healthinsurancecov 23376 17755 23024 23698 21584
## race 21369 16430 21054 21584 21653
## anxietydisorder_cat 23428 17796 23073 23677 21631
## anxietydisorder_cat
## deprx 23428
## educ 17796
## mars 23073
## healthinsurancecov 23677
## race 21631
## anxietydisorder_cat 23752
##
## $rm
## deprx educ mars healthinsurancecov race anxietydisorder_cat
## deprx 0 5869 390 69 2076 17
## educ 239 0 497 60 1385 19
## mars 35 5772 0 66 2036 17
## healthinsurancecov 322 5943 674 0 2114 21
## race 284 5223 599 69 0 22
## anxietydisorder_cat 324 5956 679 75 2121 0
##
## $mr
## deprx educ mars healthinsurancecov race anxietydisorder_cat
## deprx 0 239 35 322 284 324
## educ 5869 0 5772 5943 5223 5956
## mars 390 497 0 674 599 679
## healthinsurancecov 69 60 66 0 69 75
## race 2076 1385 2036 2114 0 2121
## anxietydisorder_cat 17 19 17 21 22 0
##
## $mm
## deprx educ mars healthinsurancecov race anxietydisorder_cat
## deprx 332 93 297 10 48 8
## educ 93 5962 190 19 739 6
## mars 297 190 687 13 88 8
## healthinsurancecov 10 19 13 79 10 4
## race 48 739 88 10 2124 3
## anxietydisorder_cat 8 6 8 4 3 25
Basic Imputation
data2<-data
imp<-mice(data = data2[,c("deprx","educ","mars","healthinsurancecov", "race","anxietydisorder_cat")], seed = 22, m = 10)
##
## iter imp variable
## 1 1 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 2 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 3 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 4 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 5 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 6 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 7 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 8 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 9 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 1 10 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 1 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 2 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 3 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 4 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 5 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 6 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 7 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 8 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 9 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 2 10 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 1 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 2 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 3 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 4 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 5 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 6 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 7 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 8 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 9 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 3 10 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 1 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 2 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 3 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 4 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 5 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 6 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 7 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 8 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 9 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 4 10 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 1 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 2 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 3 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 4 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 5 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 6 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 7 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 8 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 9 deprx educ mars healthinsurancecov race anxietydisorder_cat
## 5 10 deprx educ mars healthinsurancecov race anxietydisorder_cat
print(imp)
## Class: mids
## Number of multiple imputations: 10
## Imputation methods:
## deprx educ mars healthinsurancecov
## "pmm" "polyreg" "polyreg" "logreg"
## race anxietydisorder_cat
## "polyreg" "logreg"
## PredictorMatrix:
## deprx educ mars healthinsurancecov race anxietydisorder_cat
## deprx 0 1 1 1 1 1
## educ 1 0 1 1 1 1
## mars 1 1 0 1 1 1
## healthinsurancecov 1 1 1 0 1 1
## race 1 1 1 1 0 1
## anxietydisorder_cat 1 1 1 1 1 0
plot(imp)
head(imp$imp$deprx)
summary(imp$imp$deprx)
## 1 2 3 4
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09639 Mean :0.08434 Mean :0.09036 Mean :0.04819
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## 5 6 7 8
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06928 Mean :0.05422 Mean :0.05723 Mean :0.09337
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## 9 10
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000
## Mean :0.04217 Mean :0.04819
## 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000
summary(data$deprx)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.0893 0.0000 1.0000 332
dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
#Compare to the original data
head(data[,c("deprx","educ","mars","healthinsurancecov","race","anxietydisorder_cat")], n=10)
While the first few cases don’t show much missingness, we can coax some more interesting cases out and compare the original data to the imputed:
head(dat.imp[is.na(data$deprx)==T,], n=10)
head(data[is.na(data$deprx)==T,c("deprx","educ","mars","healthinsurancecov","race","anxietydisorder_cat")], n=10)
Here I look at a linear model for taking medication for depression (deprx):
#Here, I will see the variability in the 5 different imputations for each outcome
fit.deprx<-with(data=imp ,expr=lm(deprx~educ+mars+healthinsurancecov+race+anxietydisorder_cat))
fit.deprx
## call :
## with.mids(data = imp, expr = lm(deprx ~ educ + mars + healthinsurancecov +
## race + anxietydisorder_cat))
##
## call1 :
## mice(data = data2[, c("deprx", "educ", "mars", "healthinsurancecov",
## "race", "anxietydisorder_cat")], m = 10, seed = 22)
##
## nmis :
## deprx educ mars healthinsurancecov
## 332 5962 687 79
## race anxietydisorder_cat
## 2124 25
##
## analyses :
## [[1]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.0474218 0.0012697
## educNoSchool educSome college
## -0.0170750 0.0066884
## educUndergrad marsMarried
## 0.0007463 -0.0229853
## marsNever Married marsSeparated
## -0.0227158 -0.0007224
## marsWidowed healthinsurancecovyes, no coverage
## 0.0322668 -0.0358036
## raceAsian raceOther
## -0.0170964 -0.0016007
## raceWhite anxietydisorder_catyes
## 0.0120975 0.3337941
##
##
## [[2]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.0493524 0.0004261
## educNoSchool educSome college
## -0.0537454 0.0110889
## educUndergrad marsMarried
## 0.0012598 -0.0227216
## marsNever Married marsSeparated
## -0.0240042 -0.0051989
## marsWidowed healthinsurancecovyes, no coverage
## 0.0435972 -0.0351042
## raceAsian raceOther
## -0.0208331 -0.0065907
## raceWhite anxietydisorder_catyes
## 0.0088016 0.3355605
##
##
## [[3]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.050259 -0.006231
## educNoSchool educSome college
## -0.046813 0.003943
## educUndergrad marsMarried
## -0.002711 -0.021608
## marsNever Married marsSeparated
## -0.021919 -0.007184
## marsWidowed healthinsurancecovyes, no coverage
## 0.032796 -0.035605
## raceAsian raceOther
## -0.017241 -0.002535
## raceWhite anxietydisorder_catyes
## 0.010336 0.337605
##
##
## [[4]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.045840 0.004334
## educNoSchool educSome college
## -0.033747 0.006408
## educUndergrad marsMarried
## 0.003324 -0.024040
## marsNever Married marsSeparated
## -0.023067 -0.004389
## marsWidowed healthinsurancecovyes, no coverage
## 0.030720 -0.032193
## raceAsian raceOther
## -0.019267 -0.002565
## raceWhite anxietydisorder_catyes
## 0.011577 0.336492
##
##
## [[5]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.043785 0.004441
## educNoSchool educSome college
## -0.026102 0.014276
## educUndergrad marsMarried
## 0.003494 -0.021750
## marsNever Married marsSeparated
## -0.021147 -0.006203
## marsWidowed healthinsurancecovyes, no coverage
## 0.034657 -0.033274
## raceAsian raceOther
## -0.017760 0.001454
## raceWhite anxietydisorder_catyes
## 0.009238 0.338216
##
##
## [[6]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.0493776 -0.0013657
## educNoSchool educSome college
## -0.0327385 0.0085524
## educUndergrad marsMarried
## -0.0002549 -0.0218267
## marsNever Married marsSeparated
## -0.0227424 -0.0065035
## marsWidowed healthinsurancecovyes, no coverage
## 0.0316115 -0.0338419
## raceAsian raceOther
## -0.0209362 -0.0015294
## raceWhite anxietydisorder_catyes
## 0.0086160 0.3346532
##
##
## [[7]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.0435771 0.0007703
## educNoSchool educSome college
## -0.0308560 0.0085621
## educUndergrad marsMarried
## 0.0050869 -0.0224918
## marsNever Married marsSeparated
## -0.0217806 -0.0027608
## marsWidowed healthinsurancecovyes, no coverage
## 0.0330898 -0.0330504
## raceAsian raceOther
## -0.0185379 -0.0013842
## raceWhite anxietydisorder_catyes
## 0.0122265 0.3368449
##
##
## [[8]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.049069 0.001925
## educNoSchool educSome college
## -0.029758 0.007520
## educUndergrad marsMarried
## -0.001500 -0.023721
## marsNever Married marsSeparated
## -0.023016 -0.003181
## marsWidowed healthinsurancecovyes, no coverage
## 0.025728 -0.034429
## raceAsian raceOther
## -0.019297 0.001201
## raceWhite anxietydisorder_catyes
## 0.010356 0.337596
##
##
## [[9]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.0461183 0.0049652
## educNoSchool educSome college
## -0.0495708 0.0128163
## educUndergrad marsMarried
## 0.0020027 -0.0235783
## marsNever Married marsSeparated
## -0.0232409 -0.0070404
## marsWidowed healthinsurancecovyes, no coverage
## 0.0224789 -0.0321989
## raceAsian raceOther
## -0.0192529 -0.0005943
## raceWhite anxietydisorder_catyes
## 0.0098782 0.3357455
##
##
## [[10]]
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat)
##
## Coefficients:
## (Intercept) educMasters
## 0.048554 0.001466
## educNoSchool educSome college
## -0.031191 0.005085
## educUndergrad marsMarried
## -0.001278 -0.022382
## marsNever Married marsSeparated
## -0.021715 -0.005404
## marsWidowed healthinsurancecovyes, no coverage
## 0.029799 -0.034764
## raceAsian raceOther
## -0.020223 -0.007441
## raceWhite anxietydisorder_catyes
## 0.010055 0.336188
with (data=imp, exp=(sd(deprx)))
## call :
## with.mids(data = imp, expr = (sd(deprx)))
##
## call1 :
## mice(data = data2[, c("deprx", "educ", "mars", "healthinsurancecov",
## "race", "anxietydisorder_cat")], m = 10, seed = 22)
##
## nmis :
## deprx educ mars healthinsurancecov
## 332 5962 687 79
## race anxietydisorder_cat
## 2124 25
##
## analyses :
## [[1]]
## [1] 0.2852861
##
## [[2]]
## [1] 0.2850438
##
## [[3]]
## [1] 0.2851649
##
## [[4]]
## [1] 0.284315
##
## [[5]]
## [1] 0.2847404
##
## [[6]]
## [1] 0.2844367
##
## [[7]]
## [1] 0.2844975
##
## [[8]]
## [1] 0.2852255
##
## [[9]]
## [1] 0.2841933
##
## [[10]]
## [1] 0.284315
with (data=imp, exp=(prop.table(table(educ))))
## call :
## with.mids(data = imp, expr = (prop.table(table(educ))))
##
## call1 :
## mice(data = data2[, c("deprx", "educ", "mars", "healthinsurancecov",
## "race", "anxietydisorder_cat")], m = 10, seed = 22)
##
## nmis :
## deprx educ mars healthinsurancecov
## 332 5962 687 79
## race anxietydisorder_cat
## 2124 25
##
## analyses :
## [[1]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.276906254 0.147495479 0.002271102 0.225385877 0.347941288
##
## [[2]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.275308071 0.146275813 0.002355217 0.226731716 0.349329184
##
## [[3]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.276443622 0.147621651 0.002060815 0.226142911 0.347731001
##
## [[4]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.27606510 0.14913572 0.00180847 0.22643731 0.34655339
##
## [[5]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.271270556 0.148420743 0.002102873 0.227614922 0.350590907
##
## [[6]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.2747613 0.1476637 0.0019767 0.2254700 0.3501283
##
## [[7]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.27341549 0.14749548 0.00214493 0.22858224 0.34836186
##
## [[8]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.276738024 0.147621651 0.001892585 0.225848509 0.347899230
##
## [[9]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.275392186 0.147873996 0.002102873 0.224544728 0.350086218
##
## [[10]]
## educ
## HS Diploma Masters NoSchool Some college Undergrad
## 0.2744669 0.1469908 0.0019767 0.2257644 0.3508012
Now I pool the separate models from each imputed data set:
est.p<-pool(fit.deprx)
print(est.p)
## Class: mipo m = 10
## term m estimate ubar b
## 1 (Intercept) 10 0.047335456 5.820676e-05 5.748322e-06
## 2 educMasters 10 0.001199994 3.031984e-05 1.085436e-05
## 3 educNoSchool 10 -0.035159600 1.325242e-03 1.299259e-04
## 4 educSome college 10 0.008493933 2.195095e-05 1.108314e-05
## 5 educUndergrad 10 0.001016844 1.871747e-05 6.249804e-06
## 6 marsMarried 10 -0.022710419 3.920374e-05 7.436319e-07
## 7 marsNever Married 10 -0.022534830 3.822768e-05 7.548872e-07
## 8 marsSeparated 10 -0.004858610 2.067654e-04 4.403973e-06
## 9 marsWidowed 10 0.031674515 7.062526e-04 3.100080e-05
## 10 healthinsurancecovyes, no coverage 10 -0.034026393 2.385466e-05 1.752576e-06
## 11 raceAsian 10 -0.019044436 5.657807e-05 1.919865e-06
## 12 raceOther 10 -0.002158476 1.448102e-04 8.437754e-06
## 13 raceWhite 10 0.010318181 2.519091e-05 1.669196e-06
## 14 anxietydisorder_catyes 10 0.336269443 1.944423e-05 1.914997e-06
## t dfcom df riv lambda fmi
## 1 6.452991e-05 23763 898.06384 0.10863265 0.09798796 0.09999006
## 2 4.225963e-05 23763 112.00500 0.39379483 0.28253429 0.29501141
## 3 1.468160e-03 23763 909.48318 0.10784336 0.09734531 0.09932377
## 4 3.414241e-05 23763 70.26151 0.55539540 0.35707666 0.37462813
## 5 2.559226e-05 23763 123.83284 0.36729229 0.26862749 0.28016034
## 6 4.002174e-05 23763 11188.20724 0.02086523 0.02043877 0.02061383
## 7 3.905806e-05 23763 10727.19676 0.02172185 0.02126004 0.02144247
## 8 2.116098e-04 23763 9871.31194 0.02342930 0.02289294 0.02309085
## 9 7.403535e-04 23763 3573.40117 0.04828425 0.04606027 0.04659373
## 10 2.578250e-05 23763 1499.90487 0.08081581 0.07477298 0.07600423
## 11 5.868992e-05 23763 5332.69727 0.03732633 0.03598321 0.03634455
## 12 1.540918e-04 23763 2232.60945 0.06409443 0.06023378 0.06107450
## 13 2.702703e-05 23763 1792.21474 0.07288803 0.06793629 0.06897468
## 14 2.155073e-05 23763 902.33895 0.10833530 0.09774596 0.09973915
summary(est.p)
Attention to the fmi column and the lambda column because they convey information about how much the missingness of each particular variable affects the model coefficients.
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))
ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(dplyr)
bnm<-data%>%
select(deprx, educ, mars, healthinsurancecov, race, anxietydisorder_cat)%>%
filter(complete.cases(.))%>%
as.data.frame()
summary(lm(deprx~educ +mars+healthinsurancecov+race+anxietydisorder_cat, bnm))
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat, data = bnm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41323 -0.03930 -0.03540 -0.02112 1.00754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.044663 0.009723 4.594 4.39e-06 ***
## educMasters 0.000515 0.006701 0.077 0.93874
## educNoSchool -0.039379 0.050259 -0.784 0.43334
## educSome college 0.010613 0.005919 1.793 0.07299 .
## educUndergrad 0.001240 0.005374 0.231 0.81748
## marsMarried -0.023548 0.008019 -2.937 0.00332 **
## marsNever Married -0.021108 0.007926 -2.663 0.00775 **
## marsSeparated -0.032402 0.019214 -1.686 0.09174 .
## marsWidowed 0.011722 0.034301 0.342 0.73254
## healthinsurancecovyes, no coverage -0.031095 0.006641 -4.682 2.86e-06 ***
## raceAsian -0.019291 0.009098 -2.120 0.03399 *
## raceOther 0.005135 0.014852 0.346 0.72955
## raceWhite 0.013044 0.006266 2.082 0.03739 *
## anxietydisorder_catyes 0.333190 0.005409 61.600 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2552 on 15888 degrees of freedom
## Multiple R-squared: 0.2042, Adjusted R-squared: 0.2035
## F-statistic: 313.5 on 13 and 15888 DF, p-value: < 2.2e-16
fit1<- lm(deprx~educ+mars+ healthinsurancecov+race+anxietydisorder_cat, data= data)
summary(fit1)
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41323 -0.03930 -0.03540 -0.02112 1.00754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.044663 0.009723 4.594 4.39e-06 ***
## educMasters 0.000515 0.006701 0.077 0.93874
## educNoSchool -0.039379 0.050259 -0.784 0.43334
## educSome college 0.010613 0.005919 1.793 0.07299 .
## educUndergrad 0.001240 0.005374 0.231 0.81748
## marsMarried -0.023548 0.008019 -2.937 0.00332 **
## marsNever Married -0.021108 0.007926 -2.663 0.00775 **
## marsSeparated -0.032402 0.019214 -1.686 0.09174 .
## marsWidowed 0.011722 0.034301 0.342 0.73254
## healthinsurancecovyes, no coverage -0.031095 0.006641 -4.682 2.86e-06 ***
## raceAsian -0.019291 0.009098 -2.120 0.03399 *
## raceOther 0.005135 0.014852 0.346 0.72955
## raceWhite 0.013044 0.006266 2.082 0.03739 *
## anxietydisorder_catyes 0.333190 0.005409 61.600 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2552 on 15888 degrees of freedom
## (7875 observations deleted due to missingness)
## Multiple R-squared: 0.2042, Adjusted R-squared: 0.2035
## F-statistic: 313.5 on 13 and 15888 DF, p-value: < 2.2e-16
fit.imp<-lm(deprx~educ+mars+ healthinsurancecov+race+anxietydisorder_cat, data=dat.imp)
summary(fit.imp)
##
## Call:
## lm(formula = deprx ~ educ + mars + healthinsurancecov + race +
## anxietydisorder_cat, data = dat.imp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43227 -0.04322 -0.03728 -0.01090 1.02151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0474218 0.0076558 6.194 5.95e-10 ***
## educMasters 0.0012697 0.0055231 0.230 0.818184
## educNoSchool -0.0170750 0.0348275 -0.490 0.623946
## educSome college 0.0066884 0.0047021 1.422 0.154920
## educUndergrad 0.0007463 0.0043437 0.172 0.863581
## marsMarried -0.0229853 0.0062900 -3.654 0.000258 ***
## marsNever Married -0.0227158 0.0062136 -3.656 0.000257 ***
## marsSeparated -0.0007224 0.0144126 -0.050 0.960022
## marsWidowed 0.0322668 0.0269967 1.195 0.232016
## healthinsurancecovyes, no coverage -0.0358036 0.0049077 -7.295 3.07e-13 ***
## raceAsian -0.0170964 0.0075553 -2.263 0.023655 *
## raceOther -0.0016007 0.0120870 -0.132 0.894642
## raceWhite 0.0120975 0.0050399 2.400 0.016388 *
## anxietydisorder_catyes 0.3337941 0.0044259 75.419 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2544 on 23763 degrees of freedom
## Multiple R-squared: 0.2052, Adjusted R-squared: 0.2047
## F-statistic: 471.9 on 13 and 23763 DF, p-value: < 2.2e-16