## Data
ddi <- read_ipums_ddi("/Volumes/Jyoti/Stat 2 /PROJECT/nhis_00013.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
data<- filter(data, data$pregnantnow ==2)
#depression level
data$depfeelevl<-as.factor(data$depfeelevl)
data$depfeelevl<- car::Recode(data$depfeelevl,
recodes="1='A Lot'; 2='A Little';
3='Between a Little and a Lot'; 7:9=NA; else=NA",
as.factor=T)
data$depfeelevl<- as.ordered(data$depfeelevl)
# medication for depression
data$deprx <- as.factor(data$deprx)
data$deprx<- car::Recode(data$deprx,
recodes="1=0; 2=1;else=NA",
as.factor=T)
#currently Pregnant
data$pregnantnow<-as.factor(data$pregnantnow)
data$curpreg<-car::Recode(data$pregnantnow,
recodes="2='Yes';else=NA",
as.factor=T)
#education level
data$educ<-Recode(data$educ,
recodes="100:303 ='Less than UG Degree'; 400:503='UG degree or higher';else=NA", as.factor = T)
data$educ<-as.factor(data$educ)
#employment status
data$empstat<- car::Recode(data$empstat,
recodes="100='Employed'; 200='Unemployed';else=NA",
as.factor=T)
# income grouping
data$famtotinc_cat<-Recode(data$famtotinc, recodes = "0:49999='Less than 50k'; 50000:99999='50-100k';100000:149999='100-150k';150000:199999='150-200k';200000:250000='200-250k';else=NA", as.factor = T)
data$famtotinc_cat<-as.ordered(data$famtotinc)
##race
data$race<- car::Recode(data$racea,
recodes="100 ='White'; 200 ='African American';
400:590= 'Asian/Others'; else=NA",
as.factor=T)
#race/ethnicity
data$black<- car::Recode(data$hisprace,
recodes="03=1; 99=NA; else=0")
data$white<- car::Recode(data$hisprace,
recodes="02=1; 99=NA; else=0")
data$other<- car::Recode(data$hisprace,
recodes="4:7=1; 99=NA; else=0")
data$hispanic<- car::Recode(data$hisprace,
recodes="01=1; 99=NA; else=0")
data$hisprace<- as.factor(data$hisprace)
data$race_eth<-car::Recode(data$hisprace,
recodes="01='Hispanic'; 02='NH_White'; 03='NH_Black';04:07='NH_Other'; else=NA",
as.factor = T)
data$race_eth<-relevel(data$race_eth,
ref = "NH_White")
## marital status
data$mars<- car::Recode(data$marstat,
recodes ="10:13='Married'; 20='Widowed'; 30:40='Divorced/Separated';
; 50='Never Married'; else=NA",
as.factor=T)
## Filter data
data<-data%>%
filter(is.na(educ)==F)
data<-data%>%
filter(is.na(curpreg)==F)
data<-data%>%
filter(is.na(deprx)==F)
data<-data%>%
filter(is.na(empstat)==F)
data<-data%>%
filter(is.na(marstat)==F)
data<-data%>%
filter(is.na(race_eth)==F)
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~strata, weights=~sampweight, data = data, , nest=T )
des
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = data, , nest = T)
I ran my outcome variable with all my predictor variables to see if they are significant. I will then nest them.
fit1 <- svyglm(deprx~ educ+race_eth+empstat+mars, design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Take a look at the output
gtsummary::tbl_regression(fit1, exp = TRUE)
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| educ | |||
| Less than UG Degree | — | — | |
| UG degree or higher | 0.41 | 0.12, 1.42 | 0.2 |
| race_eth | |||
| NH_White | — | — | |
| Hispanic | 0.40 | 0.09, 1.76 | 0.2 |
| NH_Black | 0.00 | 0.00, 0.00 | <0.001 |
| NH_Other | 0.00 | 0.00, 0.00 | <0.001 |
| empstat | |||
| Employed | — | — | |
| Unemployed | 1.10 | 0.29, 4.24 | 0.9 |
| mars | |||
| Divorced/Separated | — | — | |
| Married | 1.10 | 0.12, 10.2 | >0.9 |
| Never Married | 0.96 | 0.09, 10.0 | >0.9 |
|
1
OR = Odds Ratio, CI = Confidence Interval
|
|||
## Survey design #First we tell R our survey design
options(survey.lonely.psu = "adjust")
library(dplyr)
sub<-data%>%
select(depfeelevl, curpreg, educ, deprx, empstat, race_eth, mars,depfeelevl, sampweight,strata) %>%
filter( complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1,
strata=~strata,
weights=~sampweight,
data =sub)
#Logit model
fit.logit<-svyglm(deprx~educ+race_eth+empstat+mars,
design= des,
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit)
##
## Call:
## svyglm(formula = deprx ~ educ + race_eth + empstat + mars, design = des,
## family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = sub)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.21099 1.40229 -0.864 0.392
## educUG degree or higher -1.01408 0.64977 -1.561 0.125
## race_ethHispanic -0.08073 0.82386 -0.098 0.922
## race_ethNH_Black -17.44002 0.58369 -29.879 <2e-16 ***
## race_ethNH_Other -17.17360 0.60117 -28.567 <2e-16 ***
## empstatUnemployed 0.33019 0.71306 0.463 0.645
## marsMarried 0.64644 1.33410 0.485 0.630
## marsNever Married -0.26830 1.41043 -0.190 0.850
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.8697342)
##
## Number of Fisher Scoring iterations: 17
Here, we only see coefficients, the direction of relationship, and the significance. Now, we need to calculate odds ratios and confidence intervals.
library(gtsummary)
fit.logit%>%
tbl_regression(exponentiate=TRUE )
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| educ | |||
| Less than UG Degree | — | — | |
| UG degree or higher | 0.36 | 0.10, 1.34 | 0.13 |
| race_eth | |||
| NH_White | — | — | |
| Hispanic | 0.92 | 0.18, 4.84 | >0.9 |
| NH_Black | 0.00 | 0.00, 0.00 | <0.001 |
| NH_Other | 0.00 | 0.00, 0.00 | <0.001 |
| empstat | |||
| Employed | — | — | |
| Unemployed | 1.39 | 0.33, 5.84 | 0.6 |
| mars | |||
| Divorced/Separated | — | — | |
| Married | 1.91 | 0.13, 27.9 | 0.6 |
| Never Married | 0.76 | 0.04, 13.1 | 0.8 |
|
1
OR = Odds Ratio, CI = Confidence Interval
|
|||
library(sjPlot)
plot_model(fit.logit,
axis.lim = c(.1, 10),
title = " Figure 1. Odds Ratios for Depression")
## Warning: Removed 2 rows containing missing values (geom_point).
knitr::kable(data.frame(OR = exp(coef(fit.logit)), ci = exp(confint(fit.logit))))
| OR | ci.2.5.. | ci.97.5.. | |
|---|---|---|---|
| (Intercept) | 0.2979029 | 0.0177384 | 5.0030577 |
| educUG degree or higher | 0.3627356 | 0.0981501 | 1.3405708 |
| race_ethHispanic | 0.9224466 | 0.1758504 | 4.8388158 |
| race_ethNH_Black | 0.0000000 | 0.0000000 | 0.0000001 |
| race_ethNH_Other | 0.0000000 | 0.0000000 | 0.0000001 |
| empstatUnemployed | 1.3912270 | 0.3314405 | 5.8396983 |
| marsMarried | 1.9087249 | 0.1303638 | 27.9466468 |
| marsNever Married | 0.7646784 | 0.0447926 | 13.0542267 |
#get a series of predicted probabilities for different "types" of people for each model
#ref_grid will generate all possible combinations of predictors from a model
library(emmeans)
rg<-ref_grid(fit.logit)
marg_logit<-emmeans(object = rg,
specs = c( "educ", "race_eth", "empstat", "mars"),
type="response" )
knitr::kable(marg_logit, digits = 4)
| educ | race_eth | empstat | mars | prob | SE | df | asymp.LCL | asymp.UCL |
|---|---|---|---|---|---|---|---|---|
| Less than UG Degree | NH_White | Employed | Divorced/Separated | 0.2295 | 0.2480 | Inf | 0.0187 | 0.8231 |
| UG degree or higher | NH_White | Employed | Divorced/Separated | 0.0975 | 0.1234 | Inf | 0.0069 | 0.6276 |
| Less than UG Degree | Hispanic | Employed | Divorced/Separated | 0.2156 | 0.2352 | Inf | 0.0177 | 0.8076 |
| UG degree or higher | Hispanic | Employed | Divorced/Separated | 0.0906 | 0.1209 | Inf | 0.0056 | 0.6384 |
| Less than UG Degree | NH_Black | Employed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Black | Employed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_Other | Employed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Other | Employed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_White | Unemployed | Divorced/Separated | 0.2930 | 0.2640 | Inf | 0.0330 | 0.8344 |
| UG degree or higher | NH_White | Unemployed | Divorced/Separated | 0.1307 | 0.1546 | Inf | 0.0103 | 0.6839 |
| Less than UG Degree | Hispanic | Unemployed | Divorced/Separated | 0.2766 | 0.2671 | Inf | 0.0272 | 0.8395 |
| UG degree or higher | Hispanic | Unemployed | Divorced/Separated | 0.1218 | 0.1596 | Inf | 0.0074 | 0.7210 |
| Less than UG Degree | NH_Black | Unemployed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Black | Unemployed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_Other | Unemployed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Other | Unemployed | Divorced/Separated | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_White | Employed | Married | 0.3625 | 0.1255 | Inf | 0.1640 | 0.6224 |
| UG degree or higher | NH_White | Employed | Married | 0.1710 | 0.0639 | Inf | 0.0786 | 0.3329 |
| Less than UG Degree | Hispanic | Employed | Married | 0.3441 | 0.1730 | Inf | 0.1046 | 0.7020 |
| UG degree or higher | Hispanic | Employed | Married | 0.1598 | 0.1135 | Inf | 0.0350 | 0.4992 |
| Less than UG Degree | NH_Black | Employed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Black | Employed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_Other | Employed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Other | Employed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_White | Unemployed | Married | 0.4417 | 0.1758 | Inf | 0.1636 | 0.7619 |
| UG degree or higher | NH_White | Unemployed | Married | 0.2230 | 0.1392 | Inf | 0.0561 | 0.5808 |
| Less than UG Degree | Hispanic | Unemployed | Married | 0.4219 | 0.2427 | Inf | 0.0940 | 0.8369 |
| UG degree or higher | Hispanic | Unemployed | Married | 0.2093 | 0.1920 | Inf | 0.0265 | 0.7201 |
| Less than UG Degree | NH_Black | Unemployed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Black | Unemployed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_Other | Unemployed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Other | Unemployed | Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_White | Employed | Never Married | 0.1855 | 0.1065 | Inf | 0.0542 | 0.4755 |
| UG degree or higher | NH_White | Employed | Never Married | 0.0763 | 0.0588 | Inf | 0.0158 | 0.2978 |
| Less than UG Degree | Hispanic | Employed | Never Married | 0.1736 | 0.1114 | Inf | 0.0439 | 0.4903 |
| UG degree or higher | Hispanic | Employed | Never Married | 0.0708 | 0.0665 | Inf | 0.0104 | 0.3557 |
| Less than UG Degree | NH_Black | Employed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Black | Employed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_Other | Employed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Other | Employed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_White | Unemployed | Never Married | 0.2407 | 0.1465 | Inf | 0.0618 | 0.6041 |
| UG degree or higher | NH_White | Unemployed | Never Married | 0.1031 | 0.0957 | Inf | 0.0149 | 0.4665 |
| Less than UG Degree | Hispanic | Unemployed | Never Married | 0.2262 | 0.1696 | Inf | 0.0419 | 0.6613 |
| UG degree or higher | Hispanic | Unemployed | Never Married | 0.0959 | 0.1092 | Inf | 0.0089 | 0.5558 |
| Less than UG Degree | NH_Black | Unemployed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Black | Unemployed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| Less than UG Degree | NH_Other | Unemployed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
| UG degree or higher | NH_Other | Unemployed | Never Married | 0.0000 | 0.0000 | Inf | 0.0000 | 0.0000 |
fit.logit1<-svyglm(I(deprx==1)~educ, design=des, family=binomial) #educational attainment only
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
fit.logit2<-svyglm(I(deprx==1)~educ +race_eth+ empstat, design=des, family=binomial) #educational attainment only +race/ethnicity+employment status
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
fit.logit3<-svyglm(I(deprx==1)~educ +race_eth++empstat+mars, design=des, family=binomial) #educational attainment only +race/ethnicity+employment status+ marital status
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit1)
##
## Call:
## svyglm(formula = I(deprx == 1) ~ educ, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = sub)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0866 0.4055 -2.680 0.0098 **
## educUG degree or higher -0.7919 0.5891 -1.344 0.1846
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.010417)
##
## Number of Fisher Scoring iterations: 4
regTermTest(fit.logit1, test.terms = ~educ, method="Wald", df = NULL)
## Wald test for educ
## in svyglm(formula = I(deprx == 1) ~ educ, design = des, family = binomial)
## F = 1.807119 on 1 and 53 df: p= 0.18458
Now, let’s see if, by controlling for other two socio-demographic variables, race and ethnicity and employment status. The fancy word for when an effect is reduced is “attenuated”. We will also do a test to see if the model with the race and ethnicity and employment status significantly improve the model fit. Traditionally, this would be done using a likelihood ratio test, but in survey models, that’s not kosher
#controlling for race/ethnicity and employment status
summary(fit.logit2)
##
## Call:
## svyglm(formula = I(deprx == 1) ~ educ + race_eth + empstat, design = des,
## family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = sub)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8872 0.4940 -1.796 0.0787 .
## educUG degree or higher -0.7360 0.6265 -1.175 0.2458
## race_ethHispanic -0.3761 0.8581 -0.438 0.6631
## race_ethNH_Black -17.6443 0.4870 -36.229 <2e-16 ***
## race_ethNH_Other -17.3081 0.5854 -29.567 <2e-16 ***
## empstatUnemployed 0.2532 0.7382 0.343 0.7330
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.829294)
##
## Number of Fisher Scoring iterations: 17
regTermTest(fit.logit2, test.terms = ~race_eth, method="Wald", df = NULL)
## Wald test for race_eth
## in svyglm(formula = I(deprx == 1) ~ educ + race_eth + empstat, design = des,
## family = binomial)
## F = 721.1291 on 3 and 49 df: p= < 2.22e-16
Next we consider the third model, which contains employment and marital status of women:
#controlling for race/ethnicity+employment status+ marital status
summary(fit.logit3)
##
## Call:
## svyglm(formula = I(deprx == 1) ~ educ + race_eth + +empstat +
## mars, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = sub)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.21099 1.40229 -0.864 0.392
## educUG degree or higher -1.01408 0.64977 -1.561 0.125
## race_ethHispanic -0.08073 0.82386 -0.098 0.922
## race_ethNH_Black -17.44002 0.58369 -29.879 <2e-16 ***
## race_ethNH_Other -17.17360 0.60117 -28.567 <2e-16 ***
## empstatUnemployed 0.33019 0.71306 0.463 0.645
## marsMarried 0.64644 1.33410 0.485 0.630
## marsNever Married -0.26830 1.41043 -0.190 0.850
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.8697342)
##
## Number of Fisher Scoring iterations: 17
regTermTest(fit.logit3, test.terms = ~empstat, method="Wald", df = NULL)
## Wald test for empstat
## in svyglm(formula = I(deprx == 1) ~ educ + race_eth + +empstat +
## mars, design = des, family = binomial)
## F = 0.2144196 on 1 and 47 df: p= 0.64546
f1 <- fit.logit1 %>%
tbl_regression(exponentiate = T)
f2 <- fit.logit2 %>%
tbl_regression(exponentiate = T)
f3 <- fit.logit3 %>%
tbl_regression(exponentiate = T)
f_all <- tbl_merge( tbls= list(f1, f2, f3),
tab_spanner = c("**Model 1**", "**Model 2**", "**Model 3**"))
f_all
| Characteristic | Model 1 | Model 2 | Model 3 | ||||||
|---|---|---|---|---|---|---|---|---|---|
| OR1 | 95% CI1 | p-value | OR1 | 95% CI1 | p-value | OR1 | 95% CI1 | p-value | |
| educ | |||||||||
| Less than UG Degree | — | — | — | — | — | — | |||
| UG degree or higher | 0.45 | 0.14, 1.48 | 0.2 | 0.48 | 0.14, 1.69 | 0.2 | 0.36 | 0.10, 1.34 | 0.13 |
| race_eth | |||||||||
| NH_White | — | — | — | — | |||||
| Hispanic | 0.69 | 0.12, 3.85 | 0.7 | 0.92 | 0.18, 4.84 | >0.9 | |||
| NH_Black | 0.00 | 0.00, 0.00 | <0.001 | 0.00 | 0.00, 0.00 | <0.001 | |||
| NH_Other | 0.00 | 0.00, 0.00 | <0.001 | 0.00 | 0.00, 0.00 | <0.001 | |||
| empstat | |||||||||
| Employed | — | — | — | — | |||||
| Unemployed | 1.29 | 0.29, 5.68 | 0.7 | 1.39 | 0.33, 5.84 | 0.6 | |||
| mars | |||||||||
| Divorced/Separated | — | — | |||||||
| Married | 1.91 | 0.13, 27.9 | 0.6 | ||||||
| Never Married | 0.76 | 0.04, 13.1 | 0.8 | ||||||
|
1
OR = Odds Ratio, CI = Confidence Interval
|
|||||||||
AIC tests for the models which measures overall model deviance, or residual variance and a penalty term for the number of parameters in a model showed that model 2 has an the Akaike Information Criteria (AIC) of 95.53, which is lower than the other two models, indicating model two is the better fit to the data and explains more variation in the data.
AIC(fit.logit1, fit.logit2, fit.logit3)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## eff.p AIC deltabar
## [1,] 1.102491 98.26848 1.1024912
## [2,] 3.979825 95.52794 0.7959650
## [3,] 5.728172 97.02147 0.8183103