Libraries
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v tibble 3.0.3 v dplyr 1.0.3
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::lift() masks caret::lift()
library(patchwork)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(ggsn)
## Loading required package: grid
library(foreign)
library(survey)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(VGAM)
## Warning: package 'VGAM' was built under R version 4.0.4
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:survey':
##
## calibrate
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:tidyr':
##
## fill
## The following object is masked from 'package:caret':
##
## predictors
Importing NHANES data
DEMO_J <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/NHANES data/DEMO_J.XPT",
NULL)
ALQ_J <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/NHANES data/ALQ_J.XPT",
NULL)
BMX_J <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/NHANES data/BMX_J.XPT",
NULL)
DIQ_J <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/NHANES data/DIQ_J.XPT",
NULL)
SMQ_J <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/NHANES data/SMQ_J.XPT",
NULL)
Merging data into master file
Master1 <- merge(DEMO_J, ALQ_J, all.x=TRUE)
Master2 <- merge(Master1, BMX_J, all.x=TRUE)
Master3 <- merge(Master2, DIQ_J, all.x=TRUE)
Master4 <- merge(Master3, SMQ_J, all.x=TRUE)
Recoding, filtering, and cleaning up the data
nams<-names(Master4)
head(nams, n=10)
## [1] "SEQN" "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
## [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(Master4)<-newnames
#sex
Master4$sex<-as.factor(ifelse(Master4$riagendr==1,
"Male",
"Female"))
#race/ethnicity
Master4$hispanic <- Recode(Master4$ridreth3, recodes = "1=1; 2=1; else=0")
Master4$black<-Recode(Master4$ridreth3, recodes="4=1; else=0")
Master4$white<-Recode(Master4$ridreth3, recodes="3=1; else=0")
Master4$asian<-Recode(Master4$ridreth3, recodes="6=1; else=0")
Master4$other<-Recode(Master4$ridreth3, recodes="7=1; else=0")
Master4$race_eth<- Recode(Master4$ridreth3, recodes= "3= '1 white'; 1:2='2 hispanic'; 4= '3 black'; 6= '4 asian'; 7= '5 other' ", as.factor=T )
# at or below poverty threshold using income to poverty ratio
Master4$pov<-ifelse(Master4$indfmpir <= 1, "at or below poverty",
ifelse(Master4$indfmpir > 1 & Master4$indfmpir <=2, "1-2 times pov",
ifelse(Master4$indfmpir >2 & Master4$indfmpir <=3, "2-3 times pov",
ifelse(Master4$indfmpir >3 & Master4$indfmpir <=4, "3-4 times pov",
ifelse(Master4$indfmpir >4 & Master4$indfmpir <=5, "4-5 times pov",
ifelse(Master4$indfmpir >5, "5+ times pov",NA))))))
Master4$pov2<- Recode(Master4$pov, recodes= "'at or below poverty'=1; '1-2 times pov'=2; '2-3 times pov'=3; '3-4 times pov'=4; '4-5 times pov'=5; '5+ times pov'=5", as.factor=T)
# education
Master4$educ <- Recode(Master4$dmdeduc2, recodes="1:2='1 lths'; 3='2 hs'; 4='3 some col'; 5='4 col grad'; 7:9=NA; else=NA", as.factor=T)
# marital status
Master4$marst <- Recode(Master4$dmdmartl, recodes="1='1 married'; 2='2 widowed'; 3='3 divorced'; 4='4 separated'; 5='5 nm'; 6='6 cohab'; else=NA", as.factor=T)
# age
Master4$agec<-cut(Master4$ridageyr,
breaks=c(0,17,65,80))
# smoking status
Master4$cur_smk <- Recode(Master4$smq040, recodes="1:2 = 1; else=0", as.factor=T)
# ever used e-cig
Master4$evr_ecig <- Recode(Master4$smq900, recodes="1= 1; 2=0; else=NA", as.factor=T)
# ever consumed alcohol
Master4$evr_alc <- Recode(Master4$alq111, recodes="1= 1; 2=0; else=NA", as.factor=T)
# diabetes
Master4$diabetes <- Recode(Master4$diq010, recodes="1= 1; 2:3=0; else=NA", as.factor=T)
# prediabetes
Master4$pre_dbts <- Recode(Master4$diq160, recodes="1= 1; 2=0; else=NA", as.factor=T)
# health risk for diabetes
Master4$hlth_rsk <- Recode(Master4$diq170, recodes="1= 1; 2=0; else=NA", as.factor=T)
# BMI
Master4$bmicat <- ifelse(Master4$bmxbmi < 18.5, "1 underweight",
ifelse(Master4$bmxbmi >=18.5 & Master4$bmxbmi < 25, "2 normal",
ifelse(Master4$bmxbmi >=25 & Master4$bmxbmi< 30, "3 overweight",
ifelse(Master4$bmxbmi >= 30, "4 obese", NA))))
Master4$bmicat2 <- Recode (Master4$bmicat, recodes= "'1 underweight'=1; '2 normal'=2; '3 overweight'=3; '4 obese'=4; else=NA ", as.factor=T)
Master4$bmicat2 <- relevel(Master4$bmicat2, ref ="1")
Master4$bminum <- Recode (Master4$bmicat, recodes= "'1 underweight'=1; '2 normal'=2; '3 overweight'=3; '4 obese'=4; else=NA ", as.factor=F)
Master4$obese <- Recode (Master4$bmicat, recodes= "'4 obese'=1; else=0 ", as.factor=T)
# The outcome variable used for this assignment will be Body Mass Index (BMI). This variable will be ordinally categorized by: underweight (BMI<18.5), normal (18.5<=BMI<25), overweight (25<=BMI<30), and obese (BMI>=30)
# Independent variables like age, race/ethniciy, education, and poverty status will be significant influences on BMI. Are older individuals more likely to have higher BMI category? Will Blacks and Hispanics have greater odds of being in a higher BMi category than Whites? As education increases will the odds of being in a lower BMi category decrease? Lastly, will poverty status be a significant predictor in BMI status.
library(dplyr)
sub<-Master4%>%
select(bmicat, bmicat2, bminum, hlth_rsk, obese, cur_smk, agec,ridageyr, marst, educ, pov, pov2, race_eth, hispanic, black, white, asian, other, sex, wtint2yr, sdmvpsu, sdmvstra) %>%
filter( complete.cases(.))
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~sdmvpsu, strata=~sdmvstra, weights=~wtint2yr, data =sub, nest = TRUE )
fit.ordr<-svyolr(bmicat2~race_eth+educ+agec+pov2,des)
## Warning in svyolr.survey.design2(bmicat2 ~ race_eth + educ + agec + pov2, :
## design appears to be rank-deficient, so dropping some coefs
summary(fit.ordr)
## Call:
## svyolr(bmicat2 ~ race_eth + educ + agec + pov2, des)
##
## Coefficients:
## Value Std. Error t value
## race_eth2 hispanic 0.40074709 0.09461744 4.2354466
## race_eth3 black 0.31747488 0.12344109 2.5718736
## race_eth4 asian -0.80774992 0.09398224 -8.5947085
## race_eth5 other 0.32139849 0.20318206 1.5818252
## educ2 hs 0.13000447 0.14158091 0.9182344
## educ3 some col 0.13406388 0.13970558 0.9596172
## educ4 col grad -0.18892269 0.17729830 -1.0655640
## agec(17,65] -0.04312386 0.14346676 -0.3005843
## pov22 0.08391165 0.12758609 0.6576865
## pov23 0.23387889 0.16794868 1.3925617
## pov24 0.20145693 0.14926907 1.3496227
## pov25 0.30457147 0.19774241 1.5402436
##
## Intercepts:
## Value Std. Error t value
## 1|2 -3.8094 0.2853 -13.3528
## 2|3 -0.7210 0.1979 -3.6440
## 3|4 0.6364 0.1944 3.2734
mfit<-vglm(bmicat2~race_eth+educ+agec+pov2,
family=multinomial(refLevel = 1),
data=Master4,
weights=wtint2yr/mean(wtint2yr, na.rm=T))
summary(mfit)
##
## Call:
## vglm(formula = bmicat2 ~ race_eth + educ + agec + pov2, family = multinomial(refLevel = 1),
## data = Master4, weights = wtint2yr/mean(wtint2yr, na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 1.6132 0.3668 4.398 1.09e-05 ***
## (Intercept):2 1.3373 0.3666 3.648 0.000264 ***
## (Intercept):3 1.9053 0.3611 5.277 1.31e-07 ***
## race_eth2 hispanic:1 1.0237 0.4753 2.154 0.031234 *
## race_eth2 hispanic:2 1.9218 0.4709 4.081 4.48e-05 ***
## race_eth2 hispanic:3 1.6455 0.4693 3.507 0.000454 ***
## race_eth3 black:1 0.3830 0.3551 1.079 0.280784
## race_eth3 black:2 0.5227 0.3543 1.475 0.140097
## race_eth3 black:3 0.7201 0.3488 2.064 0.038989 *
## race_eth4 asian:1 0.6069 0.4547 1.335 0.181960
## race_eth4 asian:2 0.3931 0.4569 0.860 0.389569
## race_eth4 asian:3 -0.8293 0.4683 -1.771 0.076596 .
## race_eth5 other:1 0.8475 0.6796 1.247 0.212342
## race_eth5 other:2 1.3636 0.6746 2.021 0.043231 *
## race_eth5 other:3 1.2643 0.6711 1.884 0.059576 .
## educ2 hs:1 -0.4247 0.3671 -1.157 0.247295
## educ2 hs:2 -0.3703 0.3660 -1.012 0.311628
## educ2 hs:3 -0.2143 0.3617 -0.592 0.553562
## educ3 some col:1 0.2790 0.4024 0.693 0.488160
## educ3 some col:2 0.4243 0.4012 1.058 0.290250
## educ3 some col:3 0.4896 0.3975 1.232 0.218094
## educ4 col grad:1 0.2512 0.4308 0.583 0.559765
## educ4 col grad:2 0.2330 0.4299 0.542 0.587777
## educ4 col grad:3 -0.1095 0.4271 -0.256 0.797641
## agec(65,80]:1 1.2836 0.5011 2.562 0.010420 *
## agec(65,80]:2 1.8645 0.4991 3.736 0.000187 ***
## agec(65,80]:3 1.5161 0.4984 3.042 0.002350 **
## pov22:1 0.7220 0.3014 2.395 0.016607 *
## pov22:2 0.7563 0.3013 2.510 0.012083 *
## pov22:3 0.6722 0.2958 2.273 0.023041 *
## pov23:1 0.8552 0.3627 2.358 0.018380 *
## pov23:2 0.9255 0.3625 2.553 0.010679 *
## pov23:3 1.0824 0.3563 3.038 0.002379 **
## pov24:1 0.5157 0.3449 1.495 0.134912
## pov24:2 0.8186 0.3435 2.383 0.017183 *
## pov24:3 0.6899 0.3382 2.040 0.041343 *
## pov25:1 1.6658 0.3747 4.446 8.76e-06 ***
## pov25:2 2.0712 0.3741 5.536 3.09e-08 ***
## pov25:3 1.9992 0.3700 5.403 6.57e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1]),
## log(mu[,4]/mu[,1])
##
## Residual deviance: 12648.79 on 13425 degrees of freedom
##
## Log-likelihood: -6324.395 on 13425 degrees of freedom
##
## Number of Fisher scoring iterations: 8
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Reference group is level 1 of the response
ex1<-svyglm(I(bminum>1)~race_eth+educ+agec+pov2,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex2<-svyglm(I(bminum>2)~race_eth+educ+agec+pov2,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex3<-svyglm(I(bminum>3)~race_eth+educ+agec+pov2,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
coefs<-data.frame(parms =c( coefficients(ex1)[-1],coefficients(ex2)[-1],coefficients(ex3)[-1]),
beta = rep(names(coefficients(ex1))[-1], 2, 3 ),
mod = c(rep("I(bminum>1)",12 ),
rep("I(bminum>2)",12 ),
rep("I(bminum>3)",12 )))
coefs%>%
ggplot()+
geom_point(aes(x=beta, y=parms, color=mod))+
theme(axis.text.x = element_text(angle = 45))+
labs(title = "Graphical Summary of Proportional Odds Assumption",
x= "Beta",
y= "Estimate")
#Just print the odds ratios,
round(exp(rbind(coef(ex1)[-1],
coef(ex2)[-1],
coef(ex3))),3)
## Warning in rbind(coef(ex1)[-1], coef(ex2)[-1], coef(ex3)): number of columns of
## result is not a multiple of vector length (arg 1)
## (Intercept) race_eth2 hispanic race_eth3 black race_eth4 asian
## [1,] 4.890 1.790 1.098 3.075
## [2,] 2.304 1.373 0.520 1.669
## [3,] 0.641 1.168 1.326 0.288
## race_eth5 other educ2 hs educ3 some col educ4 col grad agec(65,80] pov22
## [1,] 0.793 1.626 1.270 3.893 2.202 2.568
## [2,] 1.147 1.197 0.882 1.372 1.138 1.259
## [3,] 1.194 1.144 1.100 0.772 0.847 1.019
## pov23 pov24 pov25
## [1,] 2.015 6.780 4.890
## [2,] 1.449 1.630 2.304
## [3,] 1.215 1.059 1.139
# Proportional odds assumption is not met
# I graphed the coafficients for each transition and calculated odds ratios. After viewing the graph and looking at the OR's its clear that the assumption of proportionality is not met
# Since the assumption for the proportional odds model is not met, by default the multinomial modial would work better since it is not bound by the proportional assumption.
round(exp(coef(mfit)), 3)
## (Intercept):1 (Intercept):2 (Intercept):3
## 5.019 3.809 6.722
## race_eth2 hispanic:1 race_eth2 hispanic:2 race_eth2 hispanic:3
## 2.784 6.833 5.184
## race_eth3 black:1 race_eth3 black:2 race_eth3 black:3
## 1.467 1.687 2.055
## race_eth4 asian:1 race_eth4 asian:2 race_eth4 asian:3
## 1.835 1.482 0.436
## race_eth5 other:1 race_eth5 other:2 race_eth5 other:3
## 2.334 3.910 3.541
## educ2 hs:1 educ2 hs:2 educ2 hs:3
## 0.654 0.691 0.807
## educ3 some col:1 educ3 some col:2 educ3 some col:3
## 1.322 1.528 1.632
## educ4 col grad:1 educ4 col grad:2 educ4 col grad:3
## 1.286 1.262 0.896
## agec(65,80]:1 agec(65,80]:2 agec(65,80]:3
## 3.610 6.453 4.554
## pov22:1 pov22:2 pov22:3
## 2.058 2.130 1.959
## pov23:1 pov23:2 pov23:3
## 2.352 2.523 2.952
## pov24:1 pov24:2 pov24:3
## 1.675 2.267 1.994
## pov25:1 pov25:2 pov25:3
## 5.290 7.935 7.383
round(exp(confint(mfit)), 3)
## 2.5 % 97.5 %
## (Intercept):1 2.446 10.299
## (Intercept):2 1.857 7.813
## (Intercept):3 3.312 13.640
## race_eth2 hispanic:1 1.097 7.065
## race_eth2 hispanic:2 2.715 17.197
## race_eth2 hispanic:3 2.066 13.004
## race_eth3 black:1 0.731 2.942
## race_eth3 black:2 0.842 3.377
## race_eth3 black:3 1.037 4.070
## race_eth4 asian:1 0.753 4.473
## race_eth4 asian:2 0.605 3.628
## race_eth4 asian:3 0.174 1.093
## race_eth5 other:1 0.616 8.841
## race_eth5 other:2 1.042 14.669
## race_eth5 other:3 0.950 13.193
## educ2 hs:1 0.318 1.343
## educ2 hs:2 0.337 1.415
## educ2 hs:3 0.397 1.640
## educ3 some col:1 0.601 2.909
## educ3 some col:2 0.696 3.355
## educ3 some col:3 0.749 3.556
## educ4 col grad:1 0.553 2.991
## educ4 col grad:2 0.544 2.932
## educ4 col grad:3 0.388 2.070
## agec(65,80]:1 1.352 9.638
## agec(65,80]:2 2.426 17.161
## agec(65,80]:3 1.715 12.097
## pov22:1 1.140 3.716
## pov22:2 1.180 3.845
## pov22:3 1.097 3.497
## pov23:1 1.155 4.788
## pov23:2 1.240 5.135
## pov23:3 1.468 5.934
## pov24:1 0.852 3.293
## pov24:2 1.156 4.446
## pov24:3 1.027 3.868
## pov25:1 2.538 11.026
## pov25:2 3.811 16.518
## pov25:3 3.575 15.248
# Compared to Whites, Blacks were more likely to be overweight, than obese (OR=1.46; CI= 0.73, 2.94). College graduates were about 26% more likely than individuals with some high school to be normal than overweight (OR=1.26; CI= 0.54, 2.93). Individuals 1-2 times over the poverty threshold were more likely than those at or below poverty to be overweight than obese (OR=2.05; CI= 1.14, 3.72)