#Load data for analysis#sub-setting and Re-codding variables for analysis purposes

brfss <- readRDS("brfss_177.rds")
# Cleaning the  variable names for space, underscore & Uppercase Characters
renam<-names(brfss)
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  renam))
names(brfss)<-newnames


homewk3 <- brfss %>% 
  select(state,llcpwt,dispcode,seqno,psu,bmi5cat,racegr3,educa,ststr,smoke100,exerany2,sex,ageg,menthlth,smokday2,sleptim1) %>%  
  
  mutate( smokers=ifelse(smoke100==1 & smokday2%in%c(1,2),"Current",
                 ifelse(smoke100==1 & smokday2==3, "Former", 
                ifelse(smoke100==2, "Never", NA)))) %>% 
  
mutate(bmi = car::Recode(bmi5cat,recodes="1='Underweight';2='Normal';3='Overweight'; 4='Obese';else=NA",as.factor=T ),
       educa = car::Recode(educa,recodes="1:2='<HS';3:4='HS';5:6='>HS';else=NA", as.factor=T ),
       racegr3 = car::Recode(racegr3,recodes="1='NH-White';2='NH-Black';3:4='Other';5='Hispanic';else=NA",as.factor=T ), 
       ageg = car::Recode(ageg,recodes="1='18-24';2='25-30';3='35-44';4='45-54';5='55-54';else='65+'" ), 
       Physicala = car::Recode(exerany2,recodes="1='Yes';2='No';else=NA",as.factor=T ), sex = car::Recode(sex,recodes="1='Male';2='Female';else=NA",as.factor=T ),
       sleepdurat=ifelse(sleptim1<=6.9 & !sleptim1%in%c(77,99), 3,
          ifelse(sleptim1>=7 & sleptim1<9, 2,
                 ifelse(sleptim1>9, 1, NA))),
        mentald=ifelse(menthlth<31, "Yes",
                       ifelse(menthlth==88, "No",NA))) 
 
#Data containing only variables used in the analysis
 Final_data <- homewk3 %>% 
  select(state,llcpwt,dispcode,seqno,psu,smokers,bmi,educa,ageg,Physicala, sleepdurat,sex,racegr3,mentald,ststr) %>% 
   mutate(sleepdur=as.factor(sleepdurat)) %>% 
 filter(complete.cases(.))
# removing unwanted data from the R environment
rm(brfss,newnames,renam,rg);gc()
## Warning in rm(brfss, newnames, renam, rg): object 'rg' not found
##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  2771959 148.1    5132315  274.1   3715738  198.5
## Vcells 13433823 102.5  163816288 1249.9 204751131 1562.2
#rm(list = ls())
#  Survey Design
options(survey.lonely.psu = "adjust")

des<-svydesign(ids=~1, strata=~ststr, weights=~llcpwt, data = Final_data )

For the purpose of this analysis, the outcome variable is self reported average hours of sleep in a 24 period. The variable is continuous and was re-coded into an ordinal variable. The response to the question “On average, how many hours of sleep do you have in a day” was re-coded such that 3 will be the lowest (Inadequate) sleep duration and 1 being the Highest (More than adequate) sleep duration any of the respondents reported.

Research Question

what is the relationship between self reported daily sleep duration and Body Mass Index(BMI)categories after controlling for socio-demographic characteristics ?

Fitting the Regrsession Model

Final_data$sleepdur<-relevel(Final_data$sleepdur, ref = "1")
fit.solr1<-svyolr(sleepdur~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,des)
summary(fit.solr1)
## Call:
## svyolr(sleepdur ~ bmi + racegr3 + ageg + educa + mentald + Physicala + 
##     sex + smokers, des)
## 
## Coefficients:
##                       Value Std. Error    t value
## bmiObese         0.14008428 0.04309068  3.2509183
## bmiOverweight    0.09584505 0.04012594  2.3886059
## bmiUnderweight   0.09417992 0.13074488  0.7203335
## racegr3NH-Black  0.28838451 0.09831874  2.9331590
## racegr3NH-White -0.08152841 0.05857533 -1.3918558
## racegr3Other     0.12774670 0.08759113  1.4584433
## ageg25-30        0.12433806 0.07692417  1.6163719
## ageg35-44        0.06130697 0.07613296  0.8052619
## ageg45-54        0.08281430 0.07533431  1.0992907
## ageg55-54       -0.06968176 0.07370951 -0.9453563
## ageg65+         -0.44168346 0.07271350 -6.0742978
## educa>HS         0.37804457 0.12286543  3.0768995
## educaHS          0.32026770 0.12427617  2.5770645
## mentaldYes       0.30306658 0.03675509  8.2455666
## PhysicalaYes    -0.12266716 0.04186358 -2.9301640
## sexMale          0.09302969 0.03360020  2.7687243
## smokersFormer   -0.30499021 0.05642057 -5.4056566
## smokersNever    -0.39763485 0.05238721 -7.5903048
## 
## Intercepts:
##     Value    Std. Error t value 
## 1|2  -3.0320   0.1574   -19.2612
## 2|3   0.6936   0.1511     4.5918

Proportionality Assumption.

Since the proportional odds assumption states that for each term included in a model, the coefficient estimate between each pair of outcomes across two response levels are the same irrespective of the pair under consideration.

To check for this assumption, i fitted logits for each pair or changes and calculated the odd ratios by taking the exponent of the coefficients from the logits and then compare them to see if they are proportionate across the different changes

# fitting logit for each changes 
ex1<-svyglm(I(sleepdurat>1)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers, des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex2<-svyglm(I(sleepdurat>2)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers, des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#print the odds ratios
round(exp(rbind(coef(ex1)[-1],
                coef(ex2)[-1])),3)
##      bmiObese bmiOverweight bmiUnderweight racegr3NH-Black racegr3NH-White
## [1,]    0.888         1.103          0.828           0.575           1.250
## [2,]    1.193         1.105          1.165           1.493           0.891
##      racegr3Other ageg25-30 ageg35-44 ageg45-54 ageg55-54 ageg65+ educa>HS
## [1,]        0.950     1.246     1.194     1.083     0.863   0.531    2.523
## [2,]        1.164     1.125     1.051     1.084     0.937   0.654    1.291
##      educaHS mentaldYes PhysicalaYes sexMale smokersFormer smokersNever
## [1,]   1.325      0.764        1.721   1.220         1.275        1.415
## [2,]   1.314      1.452        0.801   1.086         0.703        0.626

To a large extent the assumption of odds proportionality is satisfied.

Best Fit Model

Comparing Proportional odds or Multinomial model to determine better fit using the AIC

fit.vgam<-vglm(as.ordered(sleepdur)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,
               Final_data, weights =llcpwt/mean(llcpwt, na.rm=T),
               family=cumulative(parallel = T, reverse = T))  #<-parallel = T == proportional odds
summary(fit.vgam)
## 
## Call:
## vglm(formula = as.ordered(sleepdur) ~ bmi + racegr3 + ageg + 
##     educa + mentald + Physicala + sex + smokers, family = cumulative(parallel = T, 
##     reverse = T), data = Final_data, weights = llcpwt/mean(llcpwt, 
##     na.rm = T))
## 
## Coefficients: 
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1    3.03201    0.07668  39.541  < 2e-16 ***
## (Intercept):2   -0.69358    0.07388  -9.388  < 2e-16 ***
## bmiObese         0.14009    0.02564   5.464 4.66e-08 ***
## bmiOverweight    0.09585    0.02454   3.905 9.41e-05 ***
## bmiUnderweight   0.09420    0.07247   1.300  0.19366    
## racegr3NH-Black  0.28838    0.04564   6.319 2.63e-10 ***
## racegr3NH-White -0.08152    0.03244  -2.513  0.01198 *  
## racegr3Other     0.12775    0.04673   2.734  0.00626 ** 
## ageg25-30        0.12432    0.03888   3.198  0.00138 ** 
## ageg35-44        0.06128    0.03927   1.560  0.11869    
## ageg45-54        0.08280    0.03931   2.106  0.03518 *  
## ageg55-54       -0.06971    0.03958  -1.761  0.07820 .  
## ageg65+         -0.44170    0.03960 -11.153  < 2e-16 ***
## educa>HS         0.37801    0.06011   6.289 3.20e-10 ***
## educaHS          0.32024    0.06022   5.318 1.05e-07 ***
## mentaldYes       0.30306    0.02126  14.252  < 2e-16 ***
## PhysicalaYes    -0.12266    0.02310  -5.311 1.09e-07 ***
## sexMale          0.09303    0.02028   4.588 4.48e-06 ***
## smokersFormer   -0.30498    0.03118  -9.782  < 2e-16 ***
## smokersNever    -0.39763    0.02766 -14.375  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
## 
## Residual deviance: 68103.2 on 85266 degrees of freedom
## 
## Log-likelihood: -34051.6 on 85266 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##        bmiObese   bmiOverweight  bmiUnderweight racegr3NH-Black racegr3NH-White 
##       1.1503741       1.1005916       1.0987767       1.3342588       0.9217124 
##    racegr3Other       ageg25-30       ageg35-44       ageg45-54       ageg55-54 
##       1.1362663       1.1323835       1.0631966       1.0863197       0.9326684 
##         ageg65+        educa>HS         educaHS      mentaldYes    PhysicalaYes 
##       0.6429422       1.4593833       1.3774629       1.3539924       0.8845618 
##         sexMale   smokersFormer    smokersNever 
##       1.0974940       0.7371355       0.6719121
#Non-proportional odds
fit.vgam2<-vglm(as.ordered(sleepdur)~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,Final_data,
                weights =llcpwt/mean(llcpwt, na.rm=T),
                family=cumulative(parallel = F, reverse = T))  #<-parallel = F == Non proportional odds
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals =
## residuals, : fitted values close to 0 or 1
summary(fit.vgam2)
## 
## Call:
## vglm(formula = as.ordered(sleepdur) ~ bmi + racegr3 + ageg + 
##     educa + mentald + Physicala + sex + smokers, family = cumulative(parallel = F, 
##     reverse = T), data = Final_data, weights = llcpwt/mean(llcpwt, 
##     na.rm = T))
## 
## Coefficients: 
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1      1.94908    0.14728  13.234  < 2e-16 ***
## (Intercept):2     -0.51809    0.07705  -6.724 1.77e-11 ***
## bmiObese:1        -0.09008    0.05990  -1.504 0.132630    
## bmiObese:2         0.17506    0.02677   6.538 6.22e-11 ***
## bmiOverweight:1    0.09431    0.06072   1.553 0.120405    
## bmiOverweight:2    0.10002    0.02580   3.877 0.000106 ***
## bmiUnderweight:1  -0.17681    0.15012  -1.178 0.238891    
## bmiUnderweight:2   0.14726    0.07469   1.972 0.048657 *  
## racegr3NH-Black:1 -0.47941    0.09650  -4.968 6.76e-07 ***
## racegr3NH-Black:2  0.38824    0.04676   8.303  < 2e-16 ***
## racegr3NH-White:1  0.16025    0.07762   2.064 0.038971 *  
## racegr3NH-White:2 -0.10993    0.03364  -3.267 0.001086 ** 
## racegr3Other:1    -0.07730    0.11093  -0.697 0.485911    
## racegr3Other:2     0.15102    0.04817   3.135 0.001717 ** 
## ageg25-30:1        0.17291    0.10300   1.679 0.093192 .  
## ageg25-30:2        0.12184    0.04024   3.028 0.002460 ** 
## ageg35-44:1        0.16660    0.10344   1.610 0.107292    
## ageg35-44:2        0.05322    0.04074   1.306 0.191415    
## ageg45-54:1        0.03366    0.10041   0.335 0.737465    
## ageg45-54:2        0.08726    0.04074   2.142 0.032192 *  
## ageg55-54:1       -0.16575    0.09782  -1.695 0.090169 .  
## ageg55-54:2       -0.06257    0.04118  -1.520 0.128634    
## ageg65+:1         -0.63445    0.09304  -6.819 9.15e-12 ***
## ageg65+:2         -0.42485    0.04168 -10.193  < 2e-16 ***
## educa>HS:1         0.97635    0.10504   9.295  < 2e-16 ***
## educa>HS:2         0.25633    0.06320   4.056 4.99e-05 ***
## educaHS:1          0.36417    0.10211   3.566 0.000362 ***
## educaHS:2          0.27113    0.06324   4.287 1.81e-05 ***
## mentaldYes:1      -0.26147    0.05012  -5.217 1.82e-07 ***
## mentaldYes:2       0.37017    0.02198  16.841  < 2e-16 ***
## PhysicalaYes:1     0.50695    0.04966  10.209  < 2e-16 ***
## PhysicalaYes:2    -0.21495    0.02388  -9.001  < 2e-16 ***
## sexMale:1          0.20486    0.04865   4.210 2.55e-05 ***
## sexMale:2          0.08086    0.02120   3.815 0.000136 ***
## smokersFormer:1    0.19472    0.06926   2.811 0.004935 ** 
## smokersFormer:2   -0.34914    0.03211 -10.875  < 2e-16 ***
## smokersNever:1     0.29777    0.06206   4.799 1.60e-06 ***
## smokersNever:2    -0.46653    0.02837 -16.443  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
## 
## Residual deviance: 66787.75 on 85248 degrees of freedom
## 
## Log-likelihood: -33393.87 on 85248 degrees of freedom
## 
## Number of Fisher scoring iterations: 10 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Exponentiated coefficients:
##        bmiObese:1        bmiObese:2   bmiOverweight:1   bmiOverweight:2 
##         0.9138549         1.1913181         1.0988978         1.1051904 
##  bmiUnderweight:1  bmiUnderweight:2 racegr3NH-Black:1 racegr3NH-Black:2 
##         0.8379408         1.1586601         0.6191514         1.4743784 
## racegr3NH-White:1 racegr3NH-White:2    racegr3Other:1    racegr3Other:2 
##         1.1738010         0.8959003         0.9256154         1.1630186 
##       ageg25-30:1       ageg25-30:2       ageg35-44:1       ageg35-44:2 
##         1.1887607         1.1295739         1.1812764         1.0546615 
##       ageg45-54:1       ageg45-54:2       ageg55-54:1       ageg55-54:2 
##         1.0342298         1.0911803         0.8472571         0.9393479 
##         ageg65+:1         ageg65+:2        educa>HS:1        educa>HS:2 
##         0.5302256         0.6538688         2.6547518         1.2921841 
##         educaHS:1         educaHS:2      mentaldYes:1      mentaldYes:2 
##         1.4393202         1.3114444         0.7699207         1.4479827 
##    PhysicalaYes:1    PhysicalaYes:2         sexMale:1         sexMale:2 
##         1.6602213         0.8065818         1.2273514         1.0842199 
##   smokersFormer:1   smokersFormer:2    smokersNever:1    smokersNever:2 
##         1.2149678         0.7052962         1.3468562         0.6271729
#multinomial 
mfit<-vglm(sleepdur~bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,
           family=multinomial(refLevel = 1),
           data=Final_data,
           weights=llcpwt/mean(llcpwt, na.rm=T))

summary(mfit)
## 
## Call:
## vglm(formula = sleepdur ~ bmi + racegr3 + ageg + educa + mentald + 
##     Physicala + sex + smokers, family = multinomial(refLevel = 1), 
##     data = Final_data, weights = llcpwt/mean(llcpwt, na.rm = T))
## 
## Coefficients: 
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1      1.322183   0.151032   8.754  < 2e-16 ***
## (Intercept):2      1.020010   0.155533   6.558 5.45e-11 ***
## bmiObese:1        -0.193347   0.061106  -3.164 0.001555 ** 
## bmiObese:2        -0.002896   0.062320  -0.046 0.962935    
## bmiOverweight:1    0.063092   0.062120   1.016 0.309804    
## bmiOverweight:2    0.157802   0.063427   2.488 0.012849 *  
## bmiUnderweight:1  -0.254363   0.154754  -1.644 0.100246    
## bmiUnderweight:2  -0.073657   0.156945  -0.469 0.638843    
## racegr3NH-Black:1 -0.786897   0.097560  -8.066 7.28e-16 ***
## racegr3NH-Black:2 -0.290499   0.098114  -2.961 0.003068 ** 
## racegr3NH-White:1  0.269707   0.078505   3.436 0.000591 ***
## racegr3NH-White:2  0.141077   0.079945   1.765 0.077618 .  
## racegr3Other:1    -0.128847   0.112831  -1.142 0.253474    
## racegr3Other:2     0.038877   0.114196   0.340 0.733527    
## ageg25-30:1        0.166038   0.104633   1.587 0.112546    
## ageg25-30:2        0.276175   0.105641   2.614 0.008942 ** 
## ageg35-44:1        0.152336   0.104389   1.459 0.144479    
## ageg35-44:2        0.196018   0.105553   1.857 0.063304 .  
## ageg45-54:1        0.035765   0.102201   0.350 0.726377    
## ageg45-54:2        0.118589   0.103319   1.148 0.251054    
## ageg55-54:1       -0.131804   0.099179  -1.329 0.183863    
## ageg55-54:2       -0.181807   0.100639  -1.807 0.070836 .  
## ageg65+:1         -0.503145   0.094126  -5.345 9.02e-08 ***
## ageg65+:2         -0.883483   0.096327  -9.172  < 2e-16 ***
## educa>HS:1         0.860495   0.109414   7.865 3.70e-15 ***
## educa>HS:2         1.033305   0.114574   9.019  < 2e-16 ***
## educaHS:1          0.186437   0.106505   1.750 0.080033 .  
## educaHS:2          0.428927   0.111673   3.841 0.000123 ***
## mentaldYes:1      -0.436559   0.051149  -8.535  < 2e-16 ***
## mentaldYes:2      -0.028943   0.051894  -0.558 0.577023    
## PhysicalaYes:1     0.654164   0.050574  12.935  < 2e-16 ***
## PhysicalaYes:2     0.373368   0.051534   7.245 4.32e-13 ***
## sexMale:1          0.171044   0.049693   3.442 0.000577 ***
## sexMale:2          0.240843   0.050613   4.759 1.95e-06 ***
## smokersFormer:1    0.427693   0.070698   6.050 1.45e-09 ***
## smokersFormer:2    0.032403   0.071549   0.453 0.650639    
## smokersNever:1     0.577914   0.063204   9.144  < 2e-16 ***
## smokersNever:2     0.057647   0.063696   0.905 0.365448    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
## 
## Residual deviance: 66728.1 on 85248 degrees of freedom
## 
## Log-likelihood: -33364.05 on 85248 degrees of freedom
## 
## Number of Fisher scoring iterations: 6 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## 
## Reference group is level  1  of the response
# Printing the AIC for the three models 
AIC(fit.vgam)#proportional odds
## [1] 68143.2
AIC(fit.vgam2) #cumulative logit, non proportional
## [1] 66863.75
AIC(mfit)#multinomial 
## [1] 66804.1

The multinomial model has the lowest AIC. Hence, the multinomial model is the best fitting model, but it is very close to the non proportional odds model using the cumulative logit.

Table

tab <- round(exp(cbind(OR = coef(mfit),confint(mfit))),3)
 colnames(tab) <- c("Oddratio", "CI (2.5%)", "CI (97.5%)")
tab %>%
  kbl(caption = "Multinomial Logistic Regression for Sleep Duration") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Multinomial Logistic Regression for Sleep Duration
Oddratio CI (2.5%) CI (97.5%)
(Intercept):1 3.752 2.790 5.044
(Intercept):2 2.773 2.045 3.762
bmiObese:1 0.824 0.731 0.929
bmiObese:2 0.997 0.882 1.127
bmiOverweight:1 1.065 0.943 1.203
bmiOverweight:2 1.171 1.034 1.326
bmiUnderweight:1 0.775 0.573 1.050
bmiUnderweight:2 0.929 0.683 1.264
racegr3NH-Black:1 0.455 0.376 0.551
racegr3NH-Black:2 0.748 0.617 0.906
racegr3NH-White:1 1.310 1.123 1.527
racegr3NH-White:2 1.152 0.985 1.347
racegr3Other:1 0.879 0.705 1.097
racegr3Other:2 1.040 0.831 1.300
ageg25-30:1 1.181 0.962 1.449
ageg25-30:2 1.318 1.072 1.621
ageg35-44:1 1.165 0.949 1.429
ageg35-44:2 1.217 0.989 1.496
ageg45-54:1 1.036 0.848 1.266
ageg45-54:2 1.126 0.920 1.379
ageg55-54:1 0.877 0.722 1.065
ageg55-54:2 0.834 0.685 1.016
ageg65+:1 0.605 0.503 0.727
ageg65+:2 0.413 0.342 0.499
educa>HS:1 2.364 1.908 2.930
educa>HS:2 2.810 2.245 3.518
educaHS:1 1.205 0.978 1.485
educaHS:2 1.536 1.234 1.911
mentaldYes:1 0.646 0.585 0.714
mentaldYes:2 0.971 0.878 1.075
PhysicalaYes:1 1.924 1.742 2.124
PhysicalaYes:2 1.453 1.313 1.607
sexMale:1 1.187 1.076 1.308
sexMale:2 1.272 1.152 1.405
smokersFormer:1 1.534 1.335 1.762
smokersFormer:2 1.033 0.898 1.188
smokersNever:1 1.782 1.575 2.017
smokersNever:2 1.059 0.935 1.200

Result

The results from the table implies that people in the overweight BMI category are more likely to report adequate sleep duration, compared to excellent sleep duration, compared to people in the normal BMI category, but those in the obese BMI category are less likely to report adequate vs excellent sleep duration duration, compared to those in Normal BMI category.