BMI and weight
#weight
d_adults %>%
GG_denhist("weight", "sex") +
scale_x_continuous("Weight (kg)") +
labs(
title = "Weight by sex",
subtitle = "NHANES 2018, US adults"
)
## Warning in GG_denhist(., "weight", "sex"): There were groups without any data.
## These were removed
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/weight by sex.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#height
d_adults %>%
GG_denhist("height", "sex") +
scale_x_continuous("Height (cm)") +
labs(
title = "Height by sex",
subtitle = "NHANES 2018, US adults"
)
## Warning in GG_denhist(., "height", "sex"): There were groups without any data.
## These were removed
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/height by sex.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#numbers
describe2(
d_adults$height,
d_adults$sex
)
## New names:
## • `` -> `...1`
describe2(
d_adults$height,
d_adults$sex_race
)
## New names:
## • `` -> `...1`
SMD_matrix(d_adults$height, d_adults$sex_race)
## female Asian female Black female Mexican
## female Asian NA -0.860565854 -0.06929056
## female Black -0.86056585 NA 0.79127530
## female Mexican -0.06929056 0.791275296 NA
## female Other Hispanic -0.18064197 0.679923882 -0.11135141
## female Other_multiple -0.85804019 0.002525667 -0.78874963
## female White -0.73902899 0.121536863 -0.66973843
## male Asian -1.99783459 -1.137268735 -1.92854403
## male Black -2.83701959 -1.976453734 -2.76772903
## male Mexican -2.01398084 -1.153414985 -1.94469028
## male Other Hispanic -1.91009684 -1.049530987 -1.84080628
## male Other_multiple -2.90301670 -2.042450850 -2.83372615
## male White -2.74905492 -1.888489064 -2.67976436
## female Other Hispanic female Other_multiple female White
## female Asian -0.1806420 -0.858040188 -0.7390290
## female Black 0.6799239 0.002525667 0.1215369
## female Mexican -0.1113514 -0.788749629 -0.6697384
## female Other Hispanic NA -0.677398216 -0.5583870
## female Other_multiple -0.6773982 NA 0.1190112
## female White -0.5583870 0.119011196 NA
## male Asian -1.8171926 -1.139794401 -1.2588056
## male Black -2.6563776 -1.978979400 -2.0979906
## male Mexican -1.8333389 -1.155940651 -1.2749518
## male Other Hispanic -1.7294549 -1.052056654 -1.1710678
## male Other_multiple -2.7223747 -2.044976517 -2.1639877
## male White -2.5684129 -1.891014730 -2.0100259
## male Asian male Black male Mexican male Other Hispanic
## female Asian -1.99783459 -2.83701959 -2.01398084 -1.91009684
## female Black -1.13726873 -1.97645373 -1.15341498 -1.04953099
## female Mexican -1.92854403 -2.76772903 -1.94469028 -1.84080628
## female Other Hispanic -1.81719262 -2.65637762 -1.83333887 -1.72945487
## female Other_multiple -1.13979440 -1.97897940 -1.15594065 -1.05205665
## female White -1.25880560 -2.09799060 -1.27495185 -1.17106785
## male Asian NA -0.83918500 -0.01614625 0.08773775
## male Black -0.83918500 NA 0.82303875 0.92692275
## male Mexican -0.01614625 0.82303875 NA 0.10388400
## male Other Hispanic 0.08773775 0.92692275 0.10388400 NA
## male Other_multiple -0.90518212 -0.06599712 -0.88903587 -0.99291986
## male White -0.75122033 0.08796467 -0.73507408 -0.83895808
## male Other_multiple male White
## female Asian -2.90301670 -2.74905492
## female Black -2.04245085 -1.88848906
## female Mexican -2.83372615 -2.67976436
## female Other Hispanic -2.72237473 -2.56841295
## female Other_multiple -2.04497652 -1.89101473
## female White -2.16398771 -2.01002593
## male Asian -0.90518212 -0.75122033
## male Black -0.06599712 0.08796467
## male Mexican -0.88903587 -0.73507408
## male Other Hispanic -0.99291986 -0.83895808
## male Other_multiple NA 0.15396179
## male White 0.15396179 NA
#BMI
d_adults %>%
GG_denhist("BMI", "sex") +
scale_x_continuous("BMI (weight / height²)") +
labs(
title = "BMI by sex",
subtitle = "NHANES 2018, US adults"
)
## Warning in GG_denhist(., "BMI", "sex"): There were groups without any data.
## These were removed
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/BMI by sex.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
describe2(
d_adults$BMI,
d_adults$sex_race
)
## New names:
## • `` -> `...1`
Spline residualization
#can we outperform the BMI with splines?
weight_fit = gam(weight ~ sex + s(height, by = sex), data = d_adults, na.action = "na.omit")
weight_fit %>% summary()
##
## Family: gaussian
## Link function: identity
##
## Formula:
## weight ~ sex + s(height, by = sex)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 83.9639 0.7936 105.796 < 2e-16 ***
## sexmale -4.3422 0.9698 -4.478 7.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(height):sexfemale 3.58 4.464 80.23 <2e-16 ***
## s(height):sexmale 1.00 1.000 579.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.204 Deviance explained = 20.5%
## GCV = 419.02 Scale est. = 418.49 n = 5175
d_adults$weight_resid = NA
d_adults$weight_resid[-weight_fit$na.action] = resid(weight_fit)
plot(ggeffects::ggpredict(weight_fit, terms = c("height", "sex"))) +
labs(
title = "Predict body fat % from sex, height using splines",
subtitle = "NHANES 2018, US adults",
y = "Body fat % (x ray)"
) +
scale_y_continuous(labels = scales::percent) +
theme_bw()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

#predict BF% from residuals
d_adults %>%
ggplot(aes(weight_resid, body_fat, color = sex)) +
geom_point(alpha = .3) +
geom_smooth() +
scale_x_continuous(limits = c(NA, 80)) +
scale_y_continuous(labels = scales::percent) +
labs(
title = "Spline residualization of weight from height, by sex",
subtitle = "NHANES 2018, US adults",
x = "Weight residual (kg)",
y = "Body fat % (x ray)"
)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 3331 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3331 rows containing missing values (`geom_point()`).

GG_save("figs/spline resid weight BF.png")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 3331 rows containing non-finite values (`stat_smooth()`).
## Removed 3331 rows containing missing values (`geom_point()`).
#correlations
d_adults %>%
group_by(sex) %>%
summarise(r = cor(weight_resid, body_fat, use = "pair"))
Body fat
#by by sex
GG_denhist(d_adults, "body_fat", "sex", auto_fraction_bounary = F) +
scale_x_continuous("Body fat % (x-ray)", labels = scales::percent, limits = c(.1, NA)) +
labs(
title = "Body fat % by sex",
subtitle = "NHANES 2018, US adults"
)
## Warning in GG_denhist(d_adults, "body_fat", "sex", auto_fraction_bounary = F):
## There were groups without any data. These were removed
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (`geom_bar()`).

GG_save("figs/body fat sex.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (`geom_bar()`).
#relationship to BMI by sex
d_adults %>%
ggplot(aes(BMI, body_fat, color = sex)) +
geom_point(alpha = .1) +
geom_smooth() +
geom_smooth(method = lm, linetype = "dotted") +
scale_y_continuous("Body fat % (x-ray)", labels = scales::percent) +
scale_x_continuous(limits = c(NA, 60)) +
labs(
title = "Body fat % as predicted by BMI",
subtitle = "NHANES 2018, US adults"
)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 3331 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3331 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3331 rows containing missing values (`geom_point()`).

GG_save("figs/body fat from BMI sex.png")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 3331 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3331 rows containing non-finite values (`stat_smooth()`).
## Removed 3331 rows containing missing values (`geom_point()`).
#correlations
d_adults %>%
group_by(sex) %>%
summarise(r = cor(BMI, body_fat, use = "pair"))
#ols regression
lm(body_fat ~ BMI * sex + age, data = d_adults) %>% summary()
##
## Call:
## lm(formula = body_fat ~ BMI * sex + age, data = d_adults)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.180717 -0.026588 0.001356 0.027483 0.212488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.694e-01 5.614e-03 30.175 < 2e-16 ***
## BMI 6.609e-03 1.616e-04 40.888 < 2e-16 ***
## sexmale -1.387e-01 7.923e-03 -17.501 < 2e-16 ***
## age 7.082e-04 7.535e-05 9.399 < 2e-16 ***
## BMI:sexmale 9.446e-04 2.666e-04 3.543 0.000403 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04122 on 2235 degrees of freedom
## (3329 observations deleted due to missingness)
## Multiple R-squared: 0.7674, Adjusted R-squared: 0.767
## F-statistic: 1844 on 4 and 2235 DF, p-value: < 2.2e-16
#gam
gam_fit = gam(body_fat ~ sex + s(BMI, by = sex) + s(age, by = sex), data = d_adults)
gam_fit %>% summary()
##
## Family: gaussian
## Link function: identity
##
## Formula:
## body_fat ~ sex + s(BMI, by = sex) + s(age, by = sex)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.391985 0.001145 342.43 <2e-16 ***
## sexmale -0.114842 0.001666 -68.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI):sexfemale 4.994 6.108 347.05 < 2e-16 ***
## s(BMI):sexmale 3.391 4.269 340.50 < 2e-16 ***
## s(age):sexfemale 2.079 2.594 14.89 1.14e-07 ***
## s(age):sexmale 1.000 1.000 30.50 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.793 Deviance explained = 79.4%
## GCV = 0.0015167 Scale est. = 0.0015076 n = 2240
plot(ggeffects::ggpredict(gam_fit, terms = c("BMI", "sex"))) +
labs(
title = "Predict body fat % from age, sex, BMI using splines",
subtitle = "NHANES 2018, US adults",
y = "Body fat % (x ray)"
) +
scale_y_continuous(labels = scales::percent) +
theme_bw()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/BF from BMI by sex gam.png")
#whr
d_adults %>%
ggplot(aes(whr, body_fat, color = sex)) +
geom_point(alpha = .1) +
geom_smooth() +
geom_smooth(method = lm, linetype = "dotted") +
scale_y_continuous("Body fat % (x-ray)", labels = scales::percent) +
# scale_x_continuous() +
labs(
title = "Body fat % as predicted by WHR",
subtitle = "NHANES 2018, US adults",
x = "Waist-height ratio"
)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 3344 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3344 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3344 rows containing missing values (`geom_point()`).

GG_save("figs/body fat from WHR sex.png")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 3344 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3344 rows containing non-finite values (`stat_smooth()`).
## Removed 3344 rows containing missing values (`geom_point()`).
#correlations
d_adults %>%
group_by(sex) %>%
summarise(r = cor(whr, body_fat, use = "pair"))
#ols regression
lm(body_fat ~ whr * sex + age, data = d_adults) %>% summary()
##
## Call:
## lm(formula = body_fat ~ whr * sex + age, data = d_adults)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.155130 -0.025113 0.000391 0.024315 0.187796
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.003e-01 6.662e-03 15.062 < 2e-16 ***
## whr 4.832e-01 1.057e-02 45.714 < 2e-16 ***
## sexmale -1.585e-01 9.883e-03 -16.040 < 2e-16 ***
## age 7.838e-05 7.051e-05 1.112 0.266
## whr:sexmale 9.655e-02 1.678e-02 5.753 9.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03767 on 2220 degrees of freedom
## (3344 observations deleted due to missingness)
## Multiple R-squared: 0.8054, Adjusted R-squared: 0.805
## F-statistic: 2296 on 4 and 2220 DF, p-value: < 2.2e-16
#gam
gam_fit = gam(body_fat ~ sex + s(whr, by = sex) + s(age, by = sex), data = d_adults)
gam_fit %>% summary()
##
## Family: gaussian
## Link function: identity
##
## Formula:
## body_fat ~ sex + s(whr, by = sex) + s(age, by = sex)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.386650 0.001040 371.85 <2e-16 ***
## sexmale -0.104128 0.001519 -68.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(whr):sexfemale 4.819 5.981 436.013 < 2e-16 ***
## s(whr):sexmale 5.320 6.389 347.318 < 2e-16 ***
## s(age):sexfemale 1.463 1.790 0.576 0.43163
## s(age):sexmale 2.105 2.627 4.315 0.00893 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.831 Deviance explained = 83.2%
## GCV = 0.0012393 Scale est. = 0.0012305 n = 2225
plot(ggeffects::ggpredict(gam_fit, terms = c("whr", "sex"))) +
labs(
title = "Predict body fat % from age, sex, WHR using splines",
subtitle = "NHANES 2018, US adults",
y = "Body fat % (x ray)",
x = "Waist-height ratio"
) +
scale_y_continuous(labels = scales::percent) +
theme_bw()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GG_save("figs/BF from WHR by sex gam.png")
Whites only
#whites only
d_white_adults %>%
ggplot(aes(BMI, body_fat, color = sex)) +
geom_point(alpha = .1) +
geom_smooth() +
geom_smooth(method = lm, linetype = "dotted") +
scale_y_continuous("Body fat % (x-ray)", labels = scales::percent) +
scale_x_continuous(limits = c(NA, 60)) +
labs(
title = "Body fat % as predicted by BMI",
subtitle = "NHANES 2018, US adults"
)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 1255 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1255 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1255 rows containing missing values (`geom_point()`).

#correlations
d_white_adults %>%
group_by(sex) %>%
summarise(r = cor(BMI, body_fat, use = "pair"))
#ols regression
lm(body_fat ~ BMI * sex + age, data = d_white_adults) %>% summary()
##
## Call:
## lm(formula = body_fat ~ BMI * sex + age, data = d_white_adults)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.177487 -0.026754 0.001141 0.027726 0.147798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1604735 0.0096218 16.678 < 2e-16 ***
## BMI 0.0067636 0.0002709 24.965 < 2e-16 ***
## sexmale -0.1389525 0.0138791 -10.012 < 2e-16 ***
## age 0.0007441 0.0001379 5.395 9.46e-08 ***
## BMI:sexmale 0.0011454 0.0004644 2.466 0.0139 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04186 on 677 degrees of freedom
## (1253 observations deleted due to missingness)
## Multiple R-squared: 0.7645, Adjusted R-squared: 0.7631
## F-statistic: 549.4 on 4 and 677 DF, p-value: < 2.2e-16
#gam
gam_fit = gam(body_fat ~ sex + s(BMI, k = 5, by = sex) + s(age, by = sex), data = d_white_adults)
gam_fit %>% summary()
##
## Family: gaussian
## Link function: identity
##
## Formula:
## body_fat ~ sex + s(BMI, k = 5, by = sex) + s(age, by = sex)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.389096 0.002018 192.85 <2e-16 ***
## sexmale -0.108768 0.003035 -35.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(BMI):sexfemale 3.540 3.885 208.241 < 2e-16 ***
## s(BMI):sexmale 2.103 2.567 200.156 < 2e-16 ***
## s(age):sexfemale 1.309 1.555 9.877 0.000261 ***
## s(age):sexmale 1.635 2.028 2.439 0.083678 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.795 Deviance explained = 79.8%
## GCV = 0.0015369 Scale est. = 0.001513 n = 682
plot(ggeffects::ggpredict(gam_fit, terms = c("BMI", "sex"))) +
labs(
title = "Predict body fat % from age, sex, BMI",
subtitle = "NHANES 2018, US adults",
y = "Body fat % (x ray)"
) +
scale_y_continuous(labels = scales::percent)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
