#Load data for analysis#sub-setting and Re-codding variables for analysis purposes
brfss <- readRDS("brfss_177.rds")
# Cleaning the variable names for space, underscore & Uppercase Characters
renam<-names(brfss)
newnames<-tolower(gsub(pattern = "_",replacement = "",x = renam))
names(brfss)<-newnames
homewk3 <- brfss %>%
select(state,llcpwt,dispcode,seqno,psu,bmi5cat,racegr3,educa,ststr,smoke100,exerany2,sex,ageg,menthlth,smokday2,sleptim1) %>%
mutate( smokers=ifelse(smoke100==1 & smokday2%in%c(1,2),"Current",
ifelse(smoke100==1 & smokday2==3, "Former",
ifelse(smoke100==2, "Never", NA)))) %>%
mutate(bmi = car::Recode(bmi5cat,recodes="1='Underweight';2='Normal';3='Overweight'; 4='Obese';else=NA",as.factor=T ),
educa = car::Recode(educa,recodes="1:2='<HS';3:4='HS';5:6='>HS';else=NA", as.factor=T ),
racegr3 = car::Recode(racegr3,recodes="1='NH-White';2='NH-Black';3:4='Other';5='Hispanic';else=NA",as.factor=T ),
ageg = car::Recode(ageg,recodes="1='18-24';2='25-30';3='35-44';4='45-54';5='55-54';else='65+'" ),
Physicala = car::Recode(exerany2,recodes="1='Yes';2='No';else=NA",as.factor=T ), sex = car::Recode(sex,recodes="1='Male';2='Female';else=NA",as.factor=T ),
sleepdurat=ifelse(sleptim1<=6.9 & !sleptim1%in%c(77,99), 3,
ifelse(sleptim1>=7 & sleptim1<9, 2,
ifelse(sleptim1>9, 1, NA))),
mentald=ifelse(menthlth<31, "Yes",
ifelse(menthlth==88, "No",NA)))
#Data containing only variables used in the analysis
Final_data <- homewk3 %>%
select(state,llcpwt,dispcode,seqno,psu,smokers,bmi,educa,ageg,Physicala, sleepdurat,sex,racegr3,mentald,ststr) %>%
mutate(sleepdur=as.factor(sleepdurat)) %>%
filter(complete.cases(.))
# removing unwanted data from the R environment
rm(brfss,newnames,renam,rg);gc()
## Warning in rm(brfss, newnames, renam, rg): object 'rg' not found
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2771959 148.1 5132315 274.1 3715738 198.5
## Vcells 13433823 102.5 163816288 1249.9 204751131 1562.2
#rm(list = ls())
# Survey Design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~llcpwt, data = Final_data )
For the purpose of this analysis, the outcome variable is self reported average hours of sleep in a 24 period. The variable is continuous and was re-coded into an ordinal variable. The response to the question “On average, how many hours of sleep do you have in a day” was re-coded such that 3 will be the lowest (Inadequate) sleep duration and 1 being the Highest (More than adequate) sleep duration any of the respondents reported.
what is the relationship between self reported daily sleep duration and Body Mass Index(BMI)categories after controlling for socio-demographic characteristics ?
Final_data$sleepdur<-relevel(Final_data$sleepdur, ref = "1")
fit.solr1<-svyolr(sleepdur~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,des)
summary(fit.solr1)
## Call:
## svyolr(sleepdur ~ bmi + racegr3 + ageg + educa + mentald + Physicala +
## sex + smokers, des)
##
## Coefficients:
## Value Std. Error t value
## bmiObese 0.14008428 0.04309068 3.2509183
## bmiOverweight 0.09584505 0.04012594 2.3886059
## bmiUnderweight 0.09417992 0.13074488 0.7203335
## racegr3NH-Black 0.28838451 0.09831874 2.9331590
## racegr3NH-White -0.08152841 0.05857533 -1.3918558
## racegr3Other 0.12774670 0.08759113 1.4584433
## ageg25-30 0.12433806 0.07692417 1.6163719
## ageg35-44 0.06130697 0.07613296 0.8052619
## ageg45-54 0.08281430 0.07533431 1.0992907
## ageg55-54 -0.06968176 0.07370951 -0.9453563
## ageg65+ -0.44168346 0.07271350 -6.0742978
## educa>HS 0.37804457 0.12286543 3.0768995
## educaHS 0.32026770 0.12427617 2.5770645
## mentaldYes 0.30306658 0.03675509 8.2455666
## PhysicalaYes -0.12266716 0.04186358 -2.9301640
## sexMale 0.09302969 0.03360020 2.7687243
## smokersFormer -0.30499021 0.05642057 -5.4056566
## smokersNever -0.39763485 0.05238721 -7.5903048
##
## Intercepts:
## Value Std. Error t value
## 1|2 -3.0320 0.1574 -19.2612
## 2|3 0.6936 0.1511 4.5918
Since the proportional odds assumption states that for each term included in a model, the coefficient estimate between each pair of outcomes across two response levels are the same irrespective of the pair under consideration.
To check for this assumption, i fitted logits for each pair or changes and calculated the odd ratios by taking the exponent of the coefficients from the logits and then compare them to see if they are proportionate across the different changes
# fitting logit for each changes
ex1<-svyglm(I(sleepdurat>1)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers, des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex2<-svyglm(I(sleepdurat>2)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers, des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#print the odds ratios
round(exp(rbind(coef(ex1)[-1],
coef(ex2)[-1])),3)
## bmiObese bmiOverweight bmiUnderweight racegr3NH-Black racegr3NH-White
## [1,] 0.888 1.103 0.828 0.575 1.250
## [2,] 1.193 1.105 1.165 1.493 0.891
## racegr3Other ageg25-30 ageg35-44 ageg45-54 ageg55-54 ageg65+ educa>HS
## [1,] 0.950 1.246 1.194 1.083 0.863 0.531 2.523
## [2,] 1.164 1.125 1.051 1.084 0.937 0.654 1.291
## educaHS mentaldYes PhysicalaYes sexMale smokersFormer smokersNever
## [1,] 1.325 0.764 1.721 1.220 1.275 1.415
## [2,] 1.314 1.452 0.801 1.086 0.703 0.626
To a large extent the assumption of odds proportionality is satisfied.
Comparing Proportional odds or Multinomial model to determine better fit using the AIC
fit.vgam<-vglm(as.ordered(sleepdur)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,
Final_data, weights =llcpwt/mean(llcpwt, na.rm=T),
family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgam)
##
## Call:
## vglm(formula = as.ordered(sleepdur) ~ bmi + racegr3 + ageg +
## educa + mentald + Physicala + sex + smokers, family = cumulative(parallel = T,
## reverse = T), data = Final_data, weights = llcpwt/mean(llcpwt,
## na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 3.03201 0.07668 39.541 < 2e-16 ***
## (Intercept):2 -0.69358 0.07388 -9.388 < 2e-16 ***
## bmiObese 0.14009 0.02564 5.464 4.66e-08 ***
## bmiOverweight 0.09585 0.02454 3.905 9.41e-05 ***
## bmiUnderweight 0.09420 0.07247 1.300 0.19366
## racegr3NH-Black 0.28838 0.04564 6.319 2.63e-10 ***
## racegr3NH-White -0.08152 0.03244 -2.513 0.01198 *
## racegr3Other 0.12775 0.04673 2.734 0.00626 **
## ageg25-30 0.12432 0.03888 3.198 0.00138 **
## ageg35-44 0.06128 0.03927 1.560 0.11869
## ageg45-54 0.08280 0.03931 2.106 0.03518 *
## ageg55-54 -0.06971 0.03958 -1.761 0.07820 .
## ageg65+ -0.44170 0.03960 -11.153 < 2e-16 ***
## educa>HS 0.37801 0.06011 6.289 3.20e-10 ***
## educaHS 0.32024 0.06022 5.318 1.05e-07 ***
## mentaldYes 0.30306 0.02126 14.252 < 2e-16 ***
## PhysicalaYes -0.12266 0.02310 -5.311 1.09e-07 ***
## sexMale 0.09303 0.02028 4.588 4.48e-06 ***
## smokersFormer -0.30498 0.03118 -9.782 < 2e-16 ***
## smokersNever -0.39763 0.02766 -14.375 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 68103.2 on 85266 degrees of freedom
##
## Log-likelihood: -34051.6 on 85266 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## bmiObese bmiOverweight bmiUnderweight racegr3NH-Black racegr3NH-White
## 1.1503741 1.1005916 1.0987767 1.3342588 0.9217124
## racegr3Other ageg25-30 ageg35-44 ageg45-54 ageg55-54
## 1.1362663 1.1323835 1.0631966 1.0863197 0.9326684
## ageg65+ educa>HS educaHS mentaldYes PhysicalaYes
## 0.6429422 1.4593833 1.3774629 1.3539924 0.8845618
## sexMale smokersFormer smokersNever
## 1.0974940 0.7371355 0.6719121
#Non-proportional odds
fit.vgam2<-vglm(as.ordered(sleepdur)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,Final_data,
weights =llcpwt/mean(llcpwt, na.rm=T),
family=cumulative(parallel = F, reverse = T)) #<-parallel = F == Non proportional odds
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals =
## residuals, : fitted values close to 0 or 1
summary(fit.vgam2)
##
## Call:
## vglm(formula = as.ordered(sleepdur) ~ bmi + racegr3 + ageg +
## educa + mentald + Physicala + sex + smokers, family = cumulative(parallel = F,
## reverse = T), data = Final_data, weights = llcpwt/mean(llcpwt,
## na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 1.94908 0.14728 13.234 < 2e-16 ***
## (Intercept):2 -0.51809 0.07705 -6.724 1.77e-11 ***
## bmiObese:1 -0.09008 0.05990 -1.504 0.132630
## bmiObese:2 0.17506 0.02677 6.538 6.22e-11 ***
## bmiOverweight:1 0.09431 0.06072 1.553 0.120405
## bmiOverweight:2 0.10002 0.02580 3.877 0.000106 ***
## bmiUnderweight:1 -0.17681 0.15012 -1.178 0.238891
## bmiUnderweight:2 0.14726 0.07469 1.972 0.048657 *
## racegr3NH-Black:1 -0.47941 0.09650 -4.968 6.76e-07 ***
## racegr3NH-Black:2 0.38824 0.04676 8.303 < 2e-16 ***
## racegr3NH-White:1 0.16025 0.07762 2.064 0.038971 *
## racegr3NH-White:2 -0.10993 0.03364 -3.267 0.001086 **
## racegr3Other:1 -0.07730 0.11093 -0.697 0.485911
## racegr3Other:2 0.15102 0.04817 3.135 0.001717 **
## ageg25-30:1 0.17291 0.10300 1.679 0.093192 .
## ageg25-30:2 0.12184 0.04024 3.028 0.002460 **
## ageg35-44:1 0.16660 0.10344 1.610 0.107292
## ageg35-44:2 0.05322 0.04074 1.306 0.191415
## ageg45-54:1 0.03366 0.10041 0.335 0.737465
## ageg45-54:2 0.08726 0.04074 2.142 0.032192 *
## ageg55-54:1 -0.16575 0.09782 -1.695 0.090169 .
## ageg55-54:2 -0.06257 0.04118 -1.520 0.128634
## ageg65+:1 -0.63445 0.09304 -6.819 9.15e-12 ***
## ageg65+:2 -0.42485 0.04168 -10.193 < 2e-16 ***
## educa>HS:1 0.97635 0.10504 9.295 < 2e-16 ***
## educa>HS:2 0.25633 0.06320 4.056 4.99e-05 ***
## educaHS:1 0.36417 0.10211 3.566 0.000362 ***
## educaHS:2 0.27113 0.06324 4.287 1.81e-05 ***
## mentaldYes:1 -0.26147 0.05012 -5.217 1.82e-07 ***
## mentaldYes:2 0.37017 0.02198 16.841 < 2e-16 ***
## PhysicalaYes:1 0.50695 0.04966 10.209 < 2e-16 ***
## PhysicalaYes:2 -0.21495 0.02388 -9.001 < 2e-16 ***
## sexMale:1 0.20486 0.04865 4.210 2.55e-05 ***
## sexMale:2 0.08086 0.02120 3.815 0.000136 ***
## smokersFormer:1 0.19472 0.06926 2.811 0.004935 **
## smokersFormer:2 -0.34914 0.03211 -10.875 < 2e-16 ***
## smokersNever:1 0.29777 0.06206 4.799 1.60e-06 ***
## smokersNever:2 -0.46653 0.02837 -16.443 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 66787.75 on 85248 degrees of freedom
##
## Log-likelihood: -33393.87 on 85248 degrees of freedom
##
## Number of Fisher scoring iterations: 10
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## bmiObese:1 bmiObese:2 bmiOverweight:1 bmiOverweight:2
## 0.9138549 1.1913181 1.0988978 1.1051904
## bmiUnderweight:1 bmiUnderweight:2 racegr3NH-Black:1 racegr3NH-Black:2
## 0.8379408 1.1586601 0.6191514 1.4743784
## racegr3NH-White:1 racegr3NH-White:2 racegr3Other:1 racegr3Other:2
## 1.1738010 0.8959003 0.9256154 1.1630186
## ageg25-30:1 ageg25-30:2 ageg35-44:1 ageg35-44:2
## 1.1887607 1.1295739 1.1812764 1.0546615
## ageg45-54:1 ageg45-54:2 ageg55-54:1 ageg55-54:2
## 1.0342298 1.0911803 0.8472571 0.9393479
## ageg65+:1 ageg65+:2 educa>HS:1 educa>HS:2
## 0.5302256 0.6538688 2.6547518 1.2921841
## educaHS:1 educaHS:2 mentaldYes:1 mentaldYes:2
## 1.4393202 1.3114444 0.7699207 1.4479827
## PhysicalaYes:1 PhysicalaYes:2 sexMale:1 sexMale:2
## 1.6602213 0.8065818 1.2273514 1.0842199
## smokersFormer:1 smokersFormer:2 smokersNever:1 smokersNever:2
## 1.2149678 0.7052962 1.3468562 0.6271729
#multinomial
mfit<-vglm(sleepdur~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,
family=multinomial(refLevel = 1),
data=Final_data,
weights=llcpwt/mean(llcpwt, na.rm=T))
summary(mfit)
##
## Call:
## vglm(formula = sleepdur ~ bmi + racegr3 + ageg + educa + mentald +
## Physicala + sex + smokers, family = multinomial(refLevel = 1),
## data = Final_data, weights = llcpwt/mean(llcpwt, na.rm = T))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 1.322183 0.151032 8.754 < 2e-16 ***
## (Intercept):2 1.020010 0.155533 6.558 5.45e-11 ***
## bmiObese:1 -0.193347 0.061106 -3.164 0.001555 **
## bmiObese:2 -0.002896 0.062320 -0.046 0.962935
## bmiOverweight:1 0.063092 0.062120 1.016 0.309804
## bmiOverweight:2 0.157802 0.063427 2.488 0.012849 *
## bmiUnderweight:1 -0.254363 0.154754 -1.644 0.100246
## bmiUnderweight:2 -0.073657 0.156945 -0.469 0.638843
## racegr3NH-Black:1 -0.786897 0.097560 -8.066 7.28e-16 ***
## racegr3NH-Black:2 -0.290499 0.098114 -2.961 0.003068 **
## racegr3NH-White:1 0.269707 0.078505 3.436 0.000591 ***
## racegr3NH-White:2 0.141077 0.079945 1.765 0.077618 .
## racegr3Other:1 -0.128847 0.112831 -1.142 0.253474
## racegr3Other:2 0.038877 0.114196 0.340 0.733527
## ageg25-30:1 0.166038 0.104633 1.587 0.112546
## ageg25-30:2 0.276175 0.105641 2.614 0.008942 **
## ageg35-44:1 0.152336 0.104389 1.459 0.144479
## ageg35-44:2 0.196018 0.105553 1.857 0.063304 .
## ageg45-54:1 0.035765 0.102201 0.350 0.726377
## ageg45-54:2 0.118589 0.103319 1.148 0.251054
## ageg55-54:1 -0.131804 0.099179 -1.329 0.183863
## ageg55-54:2 -0.181807 0.100639 -1.807 0.070836 .
## ageg65+:1 -0.503145 0.094126 -5.345 9.02e-08 ***
## ageg65+:2 -0.883483 0.096327 -9.172 < 2e-16 ***
## educa>HS:1 0.860495 0.109414 7.865 3.70e-15 ***
## educa>HS:2 1.033305 0.114574 9.019 < 2e-16 ***
## educaHS:1 0.186437 0.106505 1.750 0.080033 .
## educaHS:2 0.428927 0.111673 3.841 0.000123 ***
## mentaldYes:1 -0.436559 0.051149 -8.535 < 2e-16 ***
## mentaldYes:2 -0.028943 0.051894 -0.558 0.577023
## PhysicalaYes:1 0.654164 0.050574 12.935 < 2e-16 ***
## PhysicalaYes:2 0.373368 0.051534 7.245 4.32e-13 ***
## sexMale:1 0.171044 0.049693 3.442 0.000577 ***
## sexMale:2 0.240843 0.050613 4.759 1.95e-06 ***
## smokersFormer:1 0.427693 0.070698 6.050 1.45e-09 ***
## smokersFormer:2 0.032403 0.071549 0.453 0.650639
## smokersNever:1 0.577914 0.063204 9.144 < 2e-16 ***
## smokersNever:2 0.057647 0.063696 0.905 0.365448
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
##
## Residual deviance: 66728.1 on 85248 degrees of freedom
##
## Log-likelihood: -33364.05 on 85248 degrees of freedom
##
## Number of Fisher scoring iterations: 6
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Reference group is level 1 of the response
# Printing the AIC for the three models
AIC(fit.vgam)#proportional odds
## [1] 68143.2
AIC(fit.vgam2) #cumulative logit, non proportional
## [1] 66863.75
AIC(mfit)#multinomial
## [1] 66804.1
The multinomial model has the lowest AIC. Hence, the multinomial model is the best fitting model, but it is very close to the non proportional odds model using the cumulative logit.
tab <- round(exp(cbind(OR = coef(mfit),confint(mfit))),3)
colnames(tab) <- c("Oddratio", "CI (2.5%)", "CI (97.5%)")
tab %>%
kbl(caption = "Multinomial Logistic Regression for Sleep Duration") %>%
kable_classic(full_width = F, html_font = "Cambria")
| Oddratio | CI (2.5%) | CI (97.5%) | |
|---|---|---|---|
| (Intercept):1 | 3.752 | 2.790 | 5.044 |
| (Intercept):2 | 2.773 | 2.045 | 3.762 |
| bmiObese:1 | 0.824 | 0.731 | 0.929 |
| bmiObese:2 | 0.997 | 0.882 | 1.127 |
| bmiOverweight:1 | 1.065 | 0.943 | 1.203 |
| bmiOverweight:2 | 1.171 | 1.034 | 1.326 |
| bmiUnderweight:1 | 0.775 | 0.573 | 1.050 |
| bmiUnderweight:2 | 0.929 | 0.683 | 1.264 |
| racegr3NH-Black:1 | 0.455 | 0.376 | 0.551 |
| racegr3NH-Black:2 | 0.748 | 0.617 | 0.906 |
| racegr3NH-White:1 | 1.310 | 1.123 | 1.527 |
| racegr3NH-White:2 | 1.152 | 0.985 | 1.347 |
| racegr3Other:1 | 0.879 | 0.705 | 1.097 |
| racegr3Other:2 | 1.040 | 0.831 | 1.300 |
| ageg25-30:1 | 1.181 | 0.962 | 1.449 |
| ageg25-30:2 | 1.318 | 1.072 | 1.621 |
| ageg35-44:1 | 1.165 | 0.949 | 1.429 |
| ageg35-44:2 | 1.217 | 0.989 | 1.496 |
| ageg45-54:1 | 1.036 | 0.848 | 1.266 |
| ageg45-54:2 | 1.126 | 0.920 | 1.379 |
| ageg55-54:1 | 0.877 | 0.722 | 1.065 |
| ageg55-54:2 | 0.834 | 0.685 | 1.016 |
| ageg65+:1 | 0.605 | 0.503 | 0.727 |
| ageg65+:2 | 0.413 | 0.342 | 0.499 |
| educa>HS:1 | 2.364 | 1.908 | 2.930 |
| educa>HS:2 | 2.810 | 2.245 | 3.518 |
| educaHS:1 | 1.205 | 0.978 | 1.485 |
| educaHS:2 | 1.536 | 1.234 | 1.911 |
| mentaldYes:1 | 0.646 | 0.585 | 0.714 |
| mentaldYes:2 | 0.971 | 0.878 | 1.075 |
| PhysicalaYes:1 | 1.924 | 1.742 | 2.124 |
| PhysicalaYes:2 | 1.453 | 1.313 | 1.607 |
| sexMale:1 | 1.187 | 1.076 | 1.308 |
| sexMale:2 | 1.272 | 1.152 | 1.405 |
| smokersFormer:1 | 1.534 | 1.335 | 1.762 |
| smokersFormer:2 | 1.033 | 0.898 | 1.188 |
| smokersNever:1 | 1.782 | 1.575 | 2.017 |
| smokersNever:2 | 1.059 | 0.935 | 1.200 |
The results from the table implies that people in the overweight BMI category are more likely to report adequate sleep duration, compared to excellent sleep duration, compared to people in the normal BMI category, but those in the obese BMI category are less likely to report adequate vs excellent sleep duration duration, compared to those in Normal BMI category.