These data are derived from the National Health and Nutrition Examination Survey (NHANES), a non-institutionalized sampling of the United States: https://www.cdc.gov/nchs/nhanes/index.htm
This table provides a description of the variables in the dataset: - var: variable name (column) - var_desc: description of the variable
DT::datatable(VarDescription, rownames = F)
nhData <- nhData %>% mutate(gender = case_when(RIAGENDR == '1' ~ 'M',
RIAGENDR == '2' ~ 'F' ))
nhData <- nhData %>% mutate(ethnicity = case_when(RIDRETH1 == '1' | RIDRETH1 == '2' ~ 'H',
RIDRETH1 == '3' ~ 'W',
RIDRETH1 == '4' ~ 'B',
RIDRETH1 == '5' ~ 'O'))
nhData <- nhData %>% mutate(age_group = case_when(RIDAGEYR >= 0 & RIDAGEYR < 18 ~ '[0,18)',
RIDAGEYR >= 18 & RIDAGEYR < 30 ~ '[18,30)',
RIDAGEYR >= 30 & RIDAGEYR < 60 ~ '[30,60)',
RIDAGEYR >= 60 ~ '[60+)'))
## fill in you answer to question 2 here
nhData <- nhData %>% mutate(obesity = case_when(BMXBMI < 30 ~ 'normal',
BMXBMI >= 30 ~ 'obese' ))
# Creating Variable diabetes
nhData <- nhData %>% mutate(diabetes = case_when(LBXGLU < 126 ~ 'normal',
LBXGLU >= 126 ~ 'diabetic' ))
## write code to group by gender, ethnicity, age group, and gender and ethnicity
## write code to summarize
nhData %>% group_by(gender) %>% summarize(n=n())
## # A tibble: 2 × 2
## gender n
## <chr> <int>
## 1 F 8916
## 2 M 8942
nhData %>% group_by(ethnicity) %>% summarize(n=n())
## # A tibble: 4 × 2
## ethnicity n
## <chr> <int>
## 1 B 3912
## 2 H 5106
## 3 O 1080
## 4 W 7760
nhData %>% group_by(age_group) %>% summarize(n=n())
## # A tibble: 4 × 2
## age_group n
## <chr> <int>
## 1 [0,18) 3353
## 2 [18,30) 3635
## 3 [30,60) 6669
## 4 [60+) 4201
nhData %>% group_by(gender,ethnicity) %>% summarize(n=n())
## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.
## # A tibble: 8 × 3
## # Groups: gender [2]
## gender ethnicity n
## <chr> <chr> <int>
## 1 F B 1923
## 2 F H 2553
## 3 F O 544
## 4 F W 3896
## 5 M B 1989
## 6 M H 2553
## 7 M O 536
## 8 M W 3864
## write code to select your columns
bmi_ldl_df_age_gender <- nhData %>% select(c(BMXBMI, LBDLDL, age_group, gender))
## write code to create a new data tibble (data frame) by filtering
bmi_ldl_age_female <- bmi_ldl_df_age_gender %>% filter(gender=='F')
## write code to group_by and summarize with the mean
bmi_ldl_age_female %>% group_by(age_group) %>% summarize(mean_BMI=mean(BMXBMI), mean_LDL=mean(LBDLDL))
## # A tibble: 4 × 3
## age_group mean_BMI mean_LDL
## <chr> <dbl> <dbl>
## 1 [0,18) 23.6 89.9
## 2 [18,30) 27.2 104.
## 3 [30,60) 29.4 118.
## 4 [60+) 29.0 122.
# fill in answer to 5. here
# write code to plot RIDAGEYR with 60 bins
nhData %>% ggplot(aes(RIDAGEYR)) +
geom_histogram(bins=60,color = "#000000", fill = "#89CFF0", alpha=0.6) +labs(x='Age')
# fill in answer to 6. here
# for plotting a boxplot, the x variable is the 'by' one (ie, gender)
library(cowplot)
BMI_plot<- nhData %>% group_by(gender) %>% ggplot(aes(x =gender, y=BMXBMI, fill=gender)) + geom_boxplot(aes(x=gender, y=BMXBMI, fill=gender)) +
scale_fill_manual(values=c("#999999", "#89CFF0")) + theme_classic()+labs(x='Gender',y='BMI')
LDL_plot<- nhData %>% group_by(gender) %>% ggplot(aes(x =gender, y=LBDLDL, fill=gender)) + geom_boxplot(aes(x=gender, y=LBDLDL, fill=gender)) +
scale_fill_manual(values=c("#999999", "#89CFF0")) + theme_classic()+labs(x='Gender', y='LDL')
SBP_plot<- nhData %>% group_by(gender) %>% ggplot(aes(x =gender, y=MSYS, fill=gender)) + geom_boxplot() +
scale_fill_manual(values=c("#999999", "#89CFF0")) + theme_classic()+labs( x='Gender',y='Mean Systolic Blood Pressure')
plot_grid(BMI_plot,LDL_plot,SBP_plot, ncol = 3)
## fill in answer to 7. here
BMI_plot1 <- nhData %>% group_by(gender) %>% ggplot(aes(x =gender, y=BMXBMI, fill=gender)) + geom_boxplot() + scale_fill_manual(values=c("#999999", "#89CFF0")) + theme_classic()+labs(x='Gender',y='BMI')+ facet_grid(rows = vars(ethnicity))
LDL_plot1<- nhData %>% group_by(gender) %>% ggplot(aes(x =gender, y=LBDLDL,fill=gender)) + geom_boxplot(alpha=0.5) + scale_fill_manual(values=c("#999999", "#89CFF0")) + theme_classic()+labs(x='Gender', y='LDL') +facet_grid(rows = vars(ethnicity))
HDL_plot<- nhData %>% group_by(gender) %>% ggplot(aes(x =gender, y=LBDHDL, fill=gender)) + geom_boxplot(alpha=0.5) + scale_fill_manual(values=c("#999999", "#89CFF0")) + theme_classic()+labs(x='Gender', y='HDL') +facet_grid(rows = vars(ethnicity))
plot_grid(BMI_plot1,LDL_plot1,HDL_plot, ncol = 3)
2/3 people with diabetes also have high blood pressure, according to the American Diabetes Association.
Diabetes is also a risk factor for heart disease.
## fill in answer to 8. here
nhData %>% group_by(diabetes) %>% ggplot(aes(x =gender, y=MSYS, fill=diabetes)) + geom_boxplot(alpha=0.5) + scale_fill_manual(values=c("#999999", "#89CFF0")) + theme_classic()+labs(x='Gender',y='Mean Systolic Blood Pressure')+ facet_wrap(~gender)
Look up the t.test function (?t.test) and compute the difference in the average blood pressure in those with T2D versus not
diabetic_SBP <- nhData %>% filter(diabetes=='diabetic') %>% select(MSYS)
nondiabetic_SBP <- nhData %>% filter(diabetes=='normal') %>% select(MSYS)
t.test(diabetic_SBP,nondiabetic_SBP)
##
## Welch Two Sample t-test
##
## data: diabetic_SBP and nondiabetic_SBP
## t = 23.811, df = 1494.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 12.76223 15.05368
## sample estimates:
## mean of x mean of y
## 132.8748 118.9669
# fill in your answer to 11.a here
# hint: nhData %>% summarise(cor())
nhData %>% summarise(cor(RIDAGEYR,BMXBMI))
## # A tibble: 1 × 1
## `cor(RIDAGEYR, BMXBMI)`
## <dbl>
## 1 0.240
# fill in your answer to 11.b here
# hint nhData %>% group_by() %>% summarise(cor(), numberpeople=n())
nhData %>% group_by(gender) %>% summarise(cor(RIDAGEYR,BMXBMI), numberpeople=n())
## # A tibble: 2 × 3
## gender `cor(RIDAGEYR, BMXBMI)` numberpeople
## <chr> <dbl> <int>
## 1 F 0.201 8916
## 2 M 0.283 8942
We will construct a linear model predicting phenotypes (e.g., BMI) as a function of age, sex, race/ethnicity
#recode gender (0=male, 1= female)
nhData$RIAGENDR <- replace(nhData$RIAGENDR, nhData$RIAGENDR==1,0)
nhData$RIAGENDR <- replace(nhData$RIAGENDR, nhData$RIAGENDR==2,1)
# your answer for 12.a here
# hint: ?lm and learn about the formula function
BMI_predictor <- lm(BMXBMI~RIDAGEYR+RIAGENDR+ethnicity, data=nhData)
summary(BMI_predictor)
##
## Call:
## lm(formula = BMXBMI ~ RIDAGEYR + RIAGENDR + ethnicity, data = nhData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.831 -4.468 -1.112 3.248 103.478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.815624 0.139090 178.414 < 2e-16 ***
## RIDAGEYR 0.079714 0.002317 34.398 < 2e-16 ***
## RIAGENDR 0.930103 0.095730 9.716 < 2e-16 ***
## ethnicityH -0.709386 0.135862 -5.221 1.8e-07 ***
## ethnicityO -3.138232 0.219805 -14.277 < 2e-16 ***
## ethnicityW -1.591449 0.127329 -12.499 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.394 on 17852 degrees of freedom
## Multiple R-squared: 0.07696, Adjusted R-squared: 0.0767
## F-statistic: 297.7 on 5 and 17852 DF, p-value: < 2.2e-16
# hint: use the glance or summary function
glance(BMI_predictor)
## # A tibble: 1 × 12
## r.squared adj.r.…¹ sigma stati…² p.value df logLik AIC BIC devia…³
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0770 0.0767 6.39 298. 5.06e-307 5 -58469. 1.17e5 1.17e5 729837.
## # … with 2 more variables: df.residual <int>, nobs <int>, and abbreviated
## # variable names ¹adj.r.squared, ²statistic, ³deviance
the R2 of the model is 0.0767
# run regressions here
# hint 1: convert to "long" and use "nest by"
# hint 2: use a for loop
nhDataLong <- nhData %>% pivot_longer(cols=LBXGLU:LBDHDL)
regressions <- nhDataLong %>%
nest_by(name, SDDSRVYR) %>%
mutate(fit=list(lm(value ~
RIDAGEYR + RIAGENDR + ethnicity, data = data))) %>%
select(-data)
regressions <- regressions %>% filter(name =='LBDLDL' | name=='LBXTR' | name=='MSYS')
# regressions <- nhDataLong %>%
# nest_by(name, SDDSRVYR) %>%
# mutate(fit=list(lm(value ~
# RIDAGEYR + gender + ethnicity, data = data)))
# output the model coefficients:
# hint: fit_tidy <- regressions %>% tidy()
fit_tidy <- tibble() ## your code here
DT::datatable(fit_tidy)
kable(regressions %>% summarise(tidy(fit)), digits = 2) %>% kable_styling()
## `summarise()` has grouped output by 'name', 'SDDSRVYR'. You can override using
## the `.groups` argument.
| name | SDDSRVYR | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| LBDLDL | 1 | (Intercept) | 92.21 | 2.19 | 42.08 | 0.00 |
| LBDLDL | 1 | RIDAGEYR | 0.56 | 0.04 | 15.43 | 0.00 |
| LBDLDL | 1 | RIAGENDR | 3.42 | 1.52 | 2.25 | 0.02 |
| LBDLDL | 1 | ethnicityH | -0.17 | 2.11 | -0.08 | 0.94 |
| LBDLDL | 1 | ethnicityO | 5.05 | 4.58 | 1.10 | 0.27 |
| LBDLDL | 1 | ethnicityW | 3.26 | 2.11 | 1.55 | 0.12 |
| LBDLDL | 2 | (Intercept) | 95.22 | 1.84 | 51.88 | 0.00 |
| LBDLDL | 2 | RIDAGEYR | 0.49 | 0.03 | 15.81 | 0.00 |
| LBDLDL | 2 | RIAGENDR | -1.79 | 1.28 | -1.40 | 0.16 |
| LBDLDL | 2 | ethnicityH | -1.90 | 1.85 | -1.03 | 0.31 |
| LBDLDL | 2 | ethnicityO | 3.98 | 3.55 | 1.12 | 0.26 |
| LBDLDL | 2 | ethnicityW | 1.87 | 1.73 | 1.08 | 0.28 |
| LBDLDL | 3 | (Intercept) | 87.72 | 1.74 | 50.40 | 0.00 |
| LBDLDL | 3 | RIDAGEYR | 0.49 | 0.03 | 15.89 | 0.00 |
| LBDLDL | 3 | RIAGENDR | 0.66 | 1.34 | 0.49 | 0.62 |
| LBDLDL | 3 | ethnicityH | 1.49 | 1.86 | 0.80 | 0.42 |
| LBDLDL | 3 | ethnicityO | -1.95 | 3.58 | -0.54 | 0.59 |
| LBDLDL | 3 | ethnicityW | 2.65 | 1.73 | 1.53 | 0.13 |
| LBDLDL | 4 | (Intercept) | 80.14 | 1.96 | 40.99 | 0.00 |
| LBDLDL | 4 | RIDAGEYR | 0.73 | 0.04 | 17.90 | 0.00 |
| LBDLDL | 4 | RIAGENDR | 1.03 | 1.37 | 0.75 | 0.45 |
| LBDLDL | 4 | ethnicityH | 2.25 | 1.84 | 1.22 | 0.22 |
| LBDLDL | 4 | ethnicityO | -1.17 | 3.34 | -0.35 | 0.73 |
| LBDLDL | 4 | ethnicityW | 3.81 | 1.73 | 2.21 | 0.03 |
| LBDLDL | 5 | (Intercept) | 93.21 | 2.19 | 42.58 | 0.00 |
| LBDLDL | 5 | RIDAGEYR | 0.38 | 0.03 | 11.41 | 0.00 |
| LBDLDL | 5 | RIAGENDR | 0.71 | 1.37 | 0.51 | 0.61 |
| LBDLDL | 5 | ethnicityH | 2.25 | 2.00 | 1.13 | 0.26 |
| LBDLDL | 5 | ethnicityO | 0.66 | 3.74 | 0.18 | 0.86 |
| LBDLDL | 5 | ethnicityW | -0.47 | 1.85 | -0.25 | 0.80 |
| LBDLDL | 6 | (Intercept) | 94.44 | 2.13 | 44.26 | 0.00 |
| LBDLDL | 6 | RIDAGEYR | 0.35 | 0.03 | 11.08 | 0.00 |
| LBDLDL | 6 | RIAGENDR | 0.57 | 1.29 | 0.44 | 0.66 |
| LBDLDL | 6 | ethnicityH | 2.13 | 1.95 | 1.09 | 0.27 |
| LBDLDL | 6 | ethnicityO | -0.35 | 3.17 | -0.11 | 0.91 |
| LBDLDL | 6 | ethnicityW | 0.52 | 1.84 | 0.28 | 0.78 |
| LBDLDL | 7 | (Intercept) | 90.41 | 2.01 | 44.95 | 0.00 |
| LBDLDL | 7 | RIDAGEYR | 0.35 | 0.03 | 10.64 | 0.00 |
| LBDLDL | 7 | RIAGENDR | 3.32 | 1.34 | 2.47 | 0.01 |
| LBDLDL | 7 | ethnicityH | 4.62 | 1.98 | 2.33 | 0.02 |
| LBDLDL | 7 | ethnicityO | 2.46 | 2.13 | 1.15 | 0.25 |
| LBDLDL | 7 | ethnicityW | 2.27 | 1.76 | 1.29 | 0.20 |
| LBXTR | 1 | (Intercept) | 55.34 | 4.18 | 13.24 | 0.00 |
| LBXTR | 1 | RIDAGEYR | 0.90 | 0.07 | 13.07 | 0.00 |
| LBXTR | 1 | RIAGENDR | 9.40 | 2.90 | 3.24 | 0.00 |
| LBXTR | 1 | ethnicityH | 38.57 | 4.02 | 9.59 | 0.00 |
| LBXTR | 1 | ethnicityO | 38.03 | 8.74 | 4.35 | 0.00 |
| LBXTR | 1 | ethnicityW | 30.41 | 4.01 | 7.57 | 0.00 |
| LBXTR | 2 | (Intercept) | 62.85 | 3.54 | 17.77 | 0.00 |
| LBXTR | 2 | RIDAGEYR | 0.92 | 0.06 | 15.53 | 0.00 |
| LBXTR | 2 | RIAGENDR | -5.19 | 2.46 | -2.11 | 0.04 |
| LBXTR | 2 | ethnicityH | 34.90 | 3.56 | 9.80 | 0.00 |
| LBXTR | 2 | ethnicityO | 42.88 | 6.83 | 6.28 | 0.00 |
| LBXTR | 2 | ethnicityW | 27.99 | 3.33 | 8.40 | 0.00 |
| LBXTR | 3 | (Intercept) | 61.92 | 3.20 | 19.37 | 0.00 |
| LBXTR | 3 | RIDAGEYR | 0.94 | 0.06 | 16.65 | 0.00 |
| LBXTR | 3 | RIAGENDR | -0.57 | 2.46 | -0.23 | 0.82 |
| LBXTR | 3 | ethnicityH | 37.12 | 3.41 | 10.88 | 0.00 |
| LBXTR | 3 | ethnicityO | 26.10 | 6.58 | 3.97 | 0.00 |
| LBXTR | 3 | ethnicityW | 27.02 | 3.17 | 8.52 | 0.00 |
| LBXTR | 4 | (Intercept) | 60.03 | 3.69 | 16.25 | 0.00 |
| LBXTR | 4 | RIDAGEYR | 1.03 | 0.08 | 13.45 | 0.00 |
| LBXTR | 4 | RIAGENDR | -3.47 | 2.59 | -1.34 | 0.18 |
| LBXTR | 4 | ethnicityH | 32.53 | 3.47 | 9.36 | 0.00 |
| LBXTR | 4 | ethnicityO | 19.37 | 6.30 | 3.07 | 0.00 |
| LBXTR | 4 | ethnicityW | 30.42 | 3.26 | 9.33 | 0.00 |
| LBXTR | 5 | (Intercept) | 63.71 | 3.92 | 16.26 | 0.00 |
| LBXTR | 5 | RIDAGEYR | 0.82 | 0.06 | 13.55 | 0.00 |
| LBXTR | 5 | RIAGENDR | -10.14 | 2.46 | -4.13 | 0.00 |
| LBXTR | 5 | ethnicityH | 38.46 | 3.58 | 10.75 | 0.00 |
| LBXTR | 5 | ethnicityO | 27.13 | 6.70 | 4.05 | 0.00 |
| LBXTR | 5 | ethnicityW | 27.39 | 3.32 | 8.26 | 0.00 |
| LBXTR | 6 | (Intercept) | 66.49 | 3.73 | 17.83 | 0.00 |
| LBXTR | 6 | RIDAGEYR | 0.73 | 0.06 | 13.29 | 0.00 |
| LBXTR | 6 | RIAGENDR | -8.60 | 2.25 | -3.83 | 0.00 |
| LBXTR | 6 | ethnicityH | 33.33 | 3.41 | 9.76 | 0.00 |
| LBXTR | 6 | ethnicityO | 19.31 | 5.54 | 3.48 | 0.00 |
| LBXTR | 6 | ethnicityW | 19.33 | 3.21 | 6.02 | 0.00 |
| LBXTR | 7 | (Intercept) | 70.13 | 3.63 | 19.32 | 0.00 |
| LBXTR | 7 | RIDAGEYR | 0.69 | 0.06 | 11.55 | 0.00 |
| LBXTR | 7 | RIAGENDR | -13.87 | 2.43 | -5.71 | 0.00 |
| LBXTR | 7 | ethnicityH | 27.43 | 3.57 | 7.68 | 0.00 |
| LBXTR | 7 | ethnicityO | 23.71 | 3.85 | 6.15 | 0.00 |
| LBXTR | 7 | ethnicityW | 25.81 | 3.17 | 8.14 | 0.00 |
| MSYS | 1 | (Intercept) | 103.50 | 1.03 | 100.72 | 0.00 |
| MSYS | 1 | RIDAGEYR | 0.56 | 0.02 | 32.84 | 0.00 |
| MSYS | 1 | RIAGENDR | -3.45 | 0.71 | -4.85 | 0.00 |
| MSYS | 1 | ethnicityH | -0.58 | 0.99 | -0.58 | 0.56 |
| MSYS | 1 | ethnicityO | 0.05 | 2.15 | 0.02 | 0.98 |
| MSYS | 1 | ethnicityW | -4.79 | 0.99 | -4.85 | 0.00 |
| MSYS | 2 | (Intercept) | 103.95 | 0.83 | 124.87 | 0.00 |
| MSYS | 2 | RIDAGEYR | 0.58 | 0.01 | 41.53 | 0.00 |
| MSYS | 2 | RIAGENDR | -2.46 | 0.58 | -4.25 | 0.00 |
| MSYS | 2 | ethnicityH | -5.46 | 0.84 | -6.52 | 0.00 |
| MSYS | 2 | ethnicityO | -2.07 | 1.61 | -1.28 | 0.20 |
| MSYS | 2 | ethnicityW | -6.32 | 0.78 | -8.06 | 0.00 |
| MSYS | 3 | (Intercept) | 103.34 | 0.79 | 131.29 | 0.00 |
| MSYS | 3 | RIDAGEYR | 0.55 | 0.01 | 39.76 | 0.00 |
| MSYS | 3 | RIAGENDR | -2.62 | 0.61 | -4.31 | 0.00 |
| MSYS | 3 | ethnicityH | -1.79 | 0.84 | -2.13 | 0.03 |
| MSYS | 3 | ethnicityO | -1.77 | 1.62 | -1.09 | 0.28 |
| MSYS | 3 | ethnicityW | -5.59 | 0.78 | -7.15 | 0.00 |
| MSYS | 4 | (Intercept) | 109.99 | 0.76 | 144.61 | 0.00 |
| MSYS | 4 | RIDAGEYR | 0.44 | 0.02 | 27.69 | 0.00 |
| MSYS | 4 | RIAGENDR | -4.97 | 0.53 | -9.31 | 0.00 |
| MSYS | 4 | ethnicityH | -5.82 | 0.72 | -8.14 | 0.00 |
| MSYS | 4 | ethnicityO | -7.37 | 1.30 | -5.68 | 0.00 |
| MSYS | 4 | ethnicityW | -6.71 | 0.67 | -10.00 | 0.00 |
| MSYS | 5 | (Intercept) | 106.43 | 0.98 | 108.22 | 0.00 |
| MSYS | 5 | RIDAGEYR | 0.45 | 0.02 | 29.62 | 0.00 |
| MSYS | 5 | RIAGENDR | -3.37 | 0.62 | -5.45 | 0.00 |
| MSYS | 5 | ethnicityH | -3.59 | 0.90 | -4.00 | 0.00 |
| MSYS | 5 | ethnicityO | -3.64 | 1.68 | -2.16 | 0.03 |
| MSYS | 5 | ethnicityW | -4.82 | 0.83 | -5.79 | 0.00 |
| MSYS | 6 | (Intercept) | 108.48 | 0.92 | 118.15 | 0.00 |
| MSYS | 6 | RIDAGEYR | 0.43 | 0.01 | 32.01 | 0.00 |
| MSYS | 6 | RIAGENDR | -5.18 | 0.55 | -9.37 | 0.00 |
| MSYS | 6 | ethnicityH | -5.18 | 0.84 | -6.16 | 0.00 |
| MSYS | 6 | ethnicityO | -7.11 | 1.36 | -5.21 | 0.00 |
| MSYS | 6 | ethnicityW | -7.53 | 0.79 | -9.54 | 0.00 |
| MSYS | 7 | (Intercept) | 108.10 | 0.88 | 123.03 | 0.00 |
| MSYS | 7 | RIDAGEYR | 0.44 | 0.01 | 30.24 | 0.00 |
| MSYS | 7 | RIAGENDR | -5.14 | 0.59 | -8.75 | 0.00 |
| MSYS | 7 | ethnicityH | -5.04 | 0.86 | -5.83 | 0.00 |
| MSYS | 7 | ethnicityO | -5.88 | 0.93 | -6.30 | 0.00 |
| MSYS | 7 | ethnicityW | -5.29 | 0.77 | -6.90 | 0.00 |
# output the model R2:
# hint: fit_glance <- regressions %>% glance()
fit_glance <- tibble() ## your code here
DT::datatable(fit_glance)
kable(regressions %>% summarise(glance(fit)) %>%
select(name, SDDSRVYR, r.squared) %>% arrange(-r.squared), digits = 2) %>% kable_styling()
## `summarise()` has grouped output by 'name', 'SDDSRVYR'. You can override using
## the `.groups` argument.
| name | SDDSRVYR | r.squared |
|---|---|---|
| MSYS | 2 | 0.41 |
| MSYS | 3 | 0.37 |
| MSYS | 1 | 0.35 |
| MSYS | 6 | 0.29 |
| MSYS | 7 | 0.29 |
| MSYS | 4 | 0.29 |
| MSYS | 5 | 0.26 |
| LBXTR | 3 | 0.15 |
| LBXTR | 1 | 0.14 |
| LBXTR | 2 | 0.13 |
| LBDLDL | 1 | 0.13 |
| LBDLDL | 4 | 0.13 |
| LBXTR | 4 | 0.12 |
| LBXTR | 5 | 0.11 |
| LBDLDL | 2 | 0.10 |
| LBDLDL | 3 | 0.10 |
| LBXTR | 7 | 0.10 |
| LBXTR | 6 | 0.09 |
| LBDLDL | 5 | 0.05 |
| LBDLDL | 7 | 0.05 |
| LBDLDL | 6 | 0.04 |
R2 can be thought as the proportion of the variance of y(outcome) that is explained by x (predictor). The R2 for outcome LDL for the 1st year of the survey is 0.13. Meaning that knowledge of our predictive variables (gender, age, ethnicity) reduces the unexplained variability by 13%. (same logic applies to other values of R2)
the coefficients are: - RIDAGEYR 0.079714
- RIAGENDR 0.930103
- ethnicityH -0.709386
- ethnicityO -3.138232
- ethnicityW -1.591449
All are statistically significant under alpha of .05. - Holding all constant, for each 1 unit increase in Age, BMI will increase by 0.079714 when adjusted to gender and ethnicity. - Holding all constant, females have 0.930103 higher BMI comparing to men when adjusting for age and ethnicity. - reference point for ethnicity is black. Holding all constant, and once adjusting for age and gender, people of white ethnicity/race have 1.59 less BMI comparing to people of black ethnicity/race.