The codes below prepares the data for proper survey design analysis.Also, some variables were re-coded for the purpose of the research questions
#Load data for analysis#sub-setting and Re-codding variables for analysis purposes
brfss <- readRDS("brfss_177.rds")
# Cleaning the variable names for space, underscore & Uppercase Characters
renam<-names(brfss)
newnames<-tolower(gsub(pattern = "_",replacement = "",x = renam))
names(brfss)<-newnames
homewk3 <- brfss %>%
select(state,llcpwt,dispcode,seqno,psu,bmi5cat,racegr3,educa,ststr,smoke100,exerany2,sex,ageg,menthlth,smokday2,sleptim1) %>%
mutate( smokers=ifelse(smoke100==1 & smokday2%in%c(1,2),"Current",
ifelse(smoke100==1 & smokday2==3, "Former",
ifelse(smoke100==2, "Never", NA)))) %>%
mutate(bmi = car::Recode(bmi5cat,recodes="1='Underweight';2='Normal';3='Overweight'; 4='Obese';else=NA",as.factor=T ),
educa = car::Recode(educa,recodes="1:2='<HS';3:4='HS';5:6='>HS';else=NA", as.factor=T ),
racegr3 = car::Recode(racegr3,recodes="1='NH-White';2='NH-Black';3:4='Other';5='Hispanic';else=NA",as.factor=T ),
ageg = car::Recode(ageg,recodes="1='18-24';2='25-30';3='35-44';4='45-54';5='55-54';else='65+'" ),
Physicala = car::Recode(exerany2,recodes="1='Yess';2='Noo';else=NA",as.factor=T ), sex = car::Recode(sex,recodes="1='Male';2='Female';else=NA",as.factor=T ),
sleepadq=ifelse(sleptim1>=7 & !sleptim1%in%c(77,99), 0,
ifelse(sleptim1<7,1, NA ) ),
mentald=ifelse(menthlth<31, "Yess",
ifelse(menthlth==88, "Noo",NA)))
#Data containing only variables used in the analysis
Final_data <- homewk3 %>%
select(state,llcpwt,dispcode,seqno,psu,smokers,bmi,educa,ageg,Physicala, sleepadq,sex,racegr3,mentald,ststr) %>%
mutate(sleepadq2=ifelse(sleepadq==0, 'Yes',ifelse(sleepadq==1, 'No' , NA))) %>%
# mutate(sex=as.factor(ifelse(sex==1|sex==2, sex, NA))) %>%
filter(complete.cases(.))
# removing unwanted data from the R environment
rm(brfss,newnames,renam,rg);gc()
## Warning in rm(brfss, newnames, renam, rg): object 'rg' not found
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2412941 128.9 4914481 262.5 2911150 155.5
## Vcells 12932845 98.7 163699796 1249.0 204315101 1558.9
# Survey Design
options(survey.lonely.psu = "adjust")
des<-survey::svydesign(ids=~1, strata=~ststr, weights=~llcpwt, data = Final_data)
For the purpose of this analysis, a variable that measures prevalence of Inadequate sleep is employed as the outcome variable. The variable is continuous and was re-coded into a binary variable. The response to the question “On average, how many hours of sleep do you have in a day” was re-coded as having adequate sleep- if the respondents sleep for an average of 7 or more hours in day, andinadequate sleep- if the average daily sleep is less than 7 hours.
Is the prevalence of inadequate daily sleep time associated with the Body Mass Index(BMI)categories?
Examining the Percentage of US adults experiencing Inadequate sleep by BMI Categories, and do a survey-corrected chi-square test for independence.
des %>% tbl_svysummary(by= sleepadq2,
include=c(sex,racegr3,ageg,educa,smokers,Physicala,mentald,bmi,sleepadq2),
label = list(educa ~ "Educantion Level",
racegr3~ "Race/Ethnicity" ,
mentald ~"Frequent Mental Distress",
ageg~"Age",
smokers~"Smoker",
Physicala~"Physical Activity in Past Month",
bmi~"BMI Categories",
sex~"Sex" ))%>%
add_p() %>%
add_overall() %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Inadequate Sleep Duration**") %>%
as_gt() %>%
# modify with gt functions
gt::tab_header("Table 1: Table 1 Characteristics of Respodents By Inadequate Sleep Duration, BRFSSS, 2017") %>%
gt::tab_options(
table.font.size = "small",
data_row.padding = gt::px(1))
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
Table 1: Table 1 Characteristics of Respodents By Inadequate Sleep Duration, BRFSSS, 2017 | ||||
---|---|---|---|---|
Characteristic | Overall, N = 15,473,3481 | Inadequate Sleep Duration | p-value2 | |
No, N = 5,304,7191 | Yes, N = 10,168,6291 | |||
Sex | 0.020 | |||
Female | 7,676,116 (50%) | 2,564,384 (48%) | 5,111,732 (50%) | |
Male | 7,797,232 (50%) | 2,740,335 (52%) | 5,056,897 (50%) | |
Race/Ethnicity | <0.001 | |||
Hispanic | 1,897,602 (12%) | 680,223 (13%) | 1,217,379 (12%) | |
NH-Black | 1,164,556 (7.5%) | 542,725 (10%) | 621,832 (6.1%) | |
NH-White | 11,308,034 (73%) | 3,640,224 (69%) | 7,667,810 (75%) | |
Other | 1,103,156 (7.1%) | 441,548 (8.3%) | 661,608 (6.5%) | |
Age | <0.001 | |||
18-24 | 1,736,969 (11%) | 607,902 (11%) | 1,129,068 (11%) | |
25-30 | 2,474,525 (16%) | 974,694 (18%) | 1,499,831 (15%) | |
35-44 | 2,486,298 (16%) | 946,937 (18%) | 1,539,360 (15%) | |
45-54 | 2,560,352 (17%) | 987,727 (19%) | 1,572,626 (15%) | |
55-54 | 2,722,928 (18%) | 936,533 (18%) | 1,786,395 (18%) | |
65+ | 3,492,276 (23%) | 850,927 (16%) | 2,641,349 (26%) | |
Educantion Level | <0.001 | |||
<HS | 492,176 (3.2%) | 151,593 (2.9%) | 340,583 (3.3%) | |
>HS | 9,461,508 (61%) | 3,141,322 (59%) | 6,320,186 (62%) | |
HS | 5,519,663 (36%) | 2,011,804 (38%) | 3,507,860 (34%) | |
Smoker | <0.001 | |||
Current | 2,717,430 (18%) | 1,228,145 (23%) | 1,489,285 (15%) | |
Former | 4,003,170 (26%) | 1,287,425 (24%) | 2,715,745 (27%) | |
Never | 8,752,748 (57%) | 2,789,149 (53%) | 5,963,598 (59%) | |
Physical Activity in Past Month | <0.001 | |||
Noo | 4,007,922 (26%) | 1,563,358 (29%) | 2,444,564 (24%) | |
Yess | 11,465,426 (74%) | 3,741,361 (71%) | 7,724,065 (76%) | |
Frequent Mental Distress | <0.001 | |||
Noo | 9,992,417 (65%) | 3,048,336 (57%) | 6,944,081 (68%) | |
Yess | 5,480,931 (35%) | 2,256,383 (43%) | 3,224,547 (32%) | |
BMI Categories | <0.001 | |||
Normal | 4,889,126 (32%) | 1,565,088 (30%) | 3,324,038 (33%) | |
Obese | 4,693,623 (30%) | 1,732,840 (33%) | 2,960,783 (29%) | |
Overweight | 5,588,426 (36%) | 1,891,378 (36%) | 3,697,048 (36%) | |
Underweight | 302,173 (2.0%) | 115,414 (2.2%) | 186,759 (1.8%) | |
1
n (%)
2
chi-squared test with Rao & Scott's second-order correction
|
BMI<-svyby(formula = ~sleepadq,
by = ~bmi,
design = des,
FUN = svymean,
na.rm=T)
svychisq(~sleepadq+bmi,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~sleepadq + bmi, design = des)
## F = 9.4044, ndf = 2.9909e+00, ddf = 1.3274e+05, p-value = 3.379e-06
BMI%>%
mutate(bmi = fct_relevel(bmi,
"Underweight", "Normal", "Overweight", "Obese")) %>%
ggplot()+
geom_point(aes(x=bmi,y=sleepadq))+
geom_errorbar(aes(x=bmi, ymin = sleepadq-1.96*se,
ymax= sleepadq+1.96*se),
width=.30)+
labs(title = "Percent % of US Adults Experiencing Inadequate Sleep by BMI Categories",
caption = "Source: CDC BRFSS Data, 2017 \n Calculations by Samson A. Olowolaju, MPH",
x = "BMI Categories",
y = "% Inadequate sleep")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))
Examining the Percentages of US adults experiencing Inadequate sleep by Education, and do a survey-corrected chi-square test for independence.
edu<-svyby(formula = ~sleepadq,
by = ~educa,
design = des,
FUN = svymean,
na.rm=T)
svychisq(~sleepadq+educa,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~sleepadq + educa, design = des)
## F = 7.8879, ndf = 1.9602, ddf = 86997.3068, p-value = 0.0004159
edu%>%
ggplot()+
geom_point(aes(x=educa,y=sleepadq))+
geom_errorbar(aes(x=educa, ymin = sleepadq-1.96*se,
ymax= sleepadq+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults Experiencing Inadequate Sleep by Education Level",
caption = "Source: CDC BRFSS Data, 2017 \n Calculations by Samson A. Olowolaju, MPH",
x = "Education",
y = "% Inadequate sleep")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))
Examining the Percentages of US adults experiencing Inadequate sleep by Race, and do a survey-corrected chi-square test for independence.
race<-svyby(formula = ~sleepadq,
by = ~racegr3,
design = des,
FUN = svymean,
na.rm=T)
svychisq(~sleepadq+racegr3,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~sleepadq + racegr3, design = des)
## F = 31.534, ndf = 2.9548e+00, ddf = 1.3114e+05, p-value < 2.2e-16
race%>%
mutate(racegr3 = fct_relevel(racegr3,
"NH-White", "Hispanic", "Other", "NH-Black")) %>%
ggplot()+
geom_point(aes(x=racegr3,y=sleepadq))+
geom_errorbar(aes(x=racegr3, ymin = sleepadq-1.96*se,
ymax= sleepadq+1.96*se),
width=.25)+
labs(title = "Percent % of US Adults Experiencing Inadequate Sleep by Race Categories",
caption = "Source: CDC BRFSS Data, 2017 \n Calculations by Samson A. Olowolaju, MPH",
x = "Race",
y = "% Inadequate sleep")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))
Calculating Race by Education by Inadequate sleep tabulation, and then plot its graph
crossre<-svyby(formula = ~sleepadq,
by = ~racegr3+educa,
design = des,
FUN = svymean,
na.rm=T)
crossre%>%
mutate(educa = fct_relevel(educa,
"<HS", "HS", ">HS")) %>%
ggplot()+
geom_point(aes(x=educa, y = sleepadq))+
geom_errorbar(aes(x=educa, ymin = sleepadq-1.96*se,
ymax=sleepadq+1.96*se),
width=.25)+
facet_wrap(~ racegr3, nrow = 3)+
labs(title = "Percent % of US Adults Experiencing Inadequate Sleep by Race/Ethnicity and Education",
caption = "Source: CDC BRFSS Data, 2017 \n Calculations by Samson A. Olowolaju, MPH",
x = "Education",
y = "% Inadequate sleep")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))
Calculating Sex by Education by Inadequate sleep tabulation, and then plot its graph
crossse<-svyby(formula = ~sleepadq,
by = ~sex+educa,
design = des,
FUN = svymean,
na.rm=T)
crossse%>%
mutate(educa = fct_relevel(educa,
"<HS", "HS", ">HS")) %>%
ggplot()+
geom_point(aes(x=educa, y = sleepadq))+
geom_errorbar(aes(x=educa, ymin = sleepadq-1.96*se,
ymax=sleepadq+1.96*se),
width=.25)+
facet_wrap(~ sex, nrow = 2)+
labs(title = "Percent % of US Adults Experiencing Inadequate Sleep by Sex and Education",
caption = "Source: CDC BRFSS Data, 2017 \n Calculations by Samson A. Olowolaju, MPH",
x = "Education",
y = "% Inadequate sleep")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))
Fitting the logistic regression model
fit.logit<-svyglm(sleepadq ~ bmi+ racegr3+ ageg+ educa+ mentald+ Physicala+ sex+ smokers,
design = des,
family = binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Getting odds ratios and confidence intervals for the estimates
fit.logit%>%
tidy()%>%
mutate(Odd.Ratio = exp(estimate),
LowerOR_Ci = exp(estimate - 1.96*std.error),
UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
knitr::kable(digits = 3)
term | estimate | std.error | statistic | p.value | Odd.Ratio | LowerOR_Ci | UpperOR_Ci |
---|---|---|---|---|---|---|---|
(Intercept) | -0.583 | 0.147 | -3.971 | 0.000 | 0.558 | 0.419 | 0.744 |
bmiObese | 0.162 | 0.044 | 3.651 | 0.000 | 1.176 | 1.078 | 1.282 |
bmiOverweight | 0.095 | 0.043 | 2.237 | 0.025 | 1.100 | 1.012 | 1.196 |
bmiUnderweight | 0.162 | 0.129 | 1.252 | 0.210 | 1.176 | 0.913 | 1.514 |
racegr3NH-Black | 0.436 | 0.093 | 4.683 | 0.000 | 1.547 | 1.289 | 1.857 |
racegr3NH-White | -0.102 | 0.060 | -1.717 | 0.086 | 0.903 | 0.803 | 1.015 |
racegr3Other | 0.166 | 0.089 | 1.865 | 0.062 | 1.181 | 0.992 | 1.407 |
ageg25-30 | 0.163 | 0.080 | 2.044 | 0.041 | 1.177 | 1.007 | 1.375 |
ageg35-44 | 0.128 | 0.079 | 1.625 | 0.104 | 1.136 | 0.974 | 1.326 |
ageg45-54 | 0.149 | 0.077 | 1.938 | 0.053 | 1.161 | 0.998 | 1.349 |
ageg55-54 | -0.004 | 0.076 | -0.059 | 0.953 | 0.996 | 0.858 | 1.155 |
ageg65+ | -0.403 | 0.075 | -5.393 | 0.000 | 0.668 | 0.577 | 0.774 |
educa>HS | 0.209 | 0.120 | 1.744 | 0.081 | 1.232 | 0.974 | 1.558 |
educaHS | 0.232 | 0.120 | 1.929 | 0.054 | 1.261 | 0.996 | 1.597 |
mentaldYess | 0.376 | 0.037 | 10.263 | 0.000 | 1.456 | 1.355 | 1.564 |
PhysicalaYess | -0.237 | 0.040 | -5.886 | 0.000 | 0.789 | 0.729 | 0.854 |
sexMale | 0.086 | 0.035 | 2.469 | 0.014 | 1.090 | 1.018 | 1.166 |
smokersFormer | -0.342 | 0.054 | -6.315 | 0.000 | 0.710 | 0.639 | 0.790 |
smokersNever | -0.447 | 0.050 | -8.962 | 0.000 | 0.639 | 0.580 | 0.705 |
Generating predicted probabilities for a Relative young male, with more than high school education, who engages in physical activities , black, and whose BMI is categorized as obese. \[\text{Pr(Inadequate sleep)} = \frac{1}{1+exp({\beta_0 + \beta_1*black + \beta_2*Youth+\beta_3*>HS+\beta_4*obese +\beta_5*physicalactivities+\beta_6*Male})}\]
rg<-ref_grid(fit.logit)
marg_logit<-emmeans(object = rg,
specs = c( "racegr3","educa","ageg","bmi","Physicala","sex"),
type="response" )
comps<-as.data.frame(marg_logit)
comps[comps$racegr3=="NH-Black" & comps$educa == ">HS" & comps$ageg=="35-44" & comps$Physicala == "Yes" & comps$sex == "Male" & comps$bmi == "Obese" , ]
#Comparing the black race with white race
comps[comps$racegr3=="NH-White" & comps$educa == ">HS" & comps$ageg=="35-44" & comps$Physicala == "Yes" & comps$sex == "Male" & comps$bmi == "Obese" , ]
#when both age and race is varied
comps[comps$racegr3=="NH-Black" & comps$educa == ">HS" & comps$ageg=="55-54" & comps$Physicala == "Yes" & comps$sex == "Male" & comps$bmi == "Obese" , ]
#Comparing the black race with white race while changing the age
comps[comps$racegr3=="NH-White" & comps$educa == ">HS" & comps$ageg=="55-54" & comps$Physicala == "Yes" & comps$sex == "Male" & comps$bmi == "Obese" , ]