Analysis Body Fat Percentage Based On BMI and Age

Raw Data are from a study in Vietnam

This dataset includes the following variables :

  • id
  • age
  • bmi: Body Mass Index
  • pcfat: Percent body fat (%)

Load library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(table1)
## 
## Attaching package: 'table1'
## 
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(ggplot2)

Load data

os = read.csv("/Users/nguyennhug/Downloads/Osteo data.csv")
head(os)
##   id lean.mass fat.mass pcfat age height weight  bmi osta osteo  osteo.group
## 1  1     27.98    16.49 37.09  76  156.0   45.0 18.5  6.2     2 Osteoporosis
## 2  8     29.02    27.54 48.70  54  153.0   56.0 23.9 -0.4     1   Osteopenia
## 3 21     31.72    20.65 39.43  56  158.2   51.5 20.6  0.9     1   Osteopenia
## 4 38     35.96    21.96 37.92  54  154.0   51.0 21.5  0.6     1   Osteopenia
## 5 39     35.00    26.29 42.89  60  159.5   60.0 23.6  0.0     1   Osteopenia
## 6 53     32.58    19.82 37.82  53  156.0   51.0 21.0  0.4     1   Osteopenia

Descriptive analysis with table1

os1 <- os %>% select(id, age , bmi , pcfat)
os1$gender <- sample(c("Male", "Female"), size = nrow(os), replace = TRUE)
table1(~ id + age + bmi + pcfat | gender, data = os1)
Female
(N=161)
Male
(N=139)
Overall
(N=300)
id
Mean (SD) 1550 (1220) 1850 (1160) 1690 (1200)
Median [Min, Max] 1350 [8.00, 4170] 1910 [1.00, 4180] 1600 [1.00, 4180]
age
Mean (SD) 59.9 (7.87) 59.7 (7.65) 59.8 (7.76)
Median [Min, Max] 58.0 [50.0, 86.0] 59.0 [50.0, 93.0] 58.0 [50.0, 93.0]
bmi
Mean (SD) 23.6 (3.48) 23.0 (2.97) 23.3 (3.26)
Median [Min, Max] 23.5 [16.0, 34.7] 22.9 [15.7, 32.0] 23.2 [15.7, 34.7]
pcfat
Mean (SD) 42.7 (4.97) 42.9 (3.47) 42.8 (4.34)
Median [Min, Max] 43.3 [27.7, 52.2] 42.7 [35.2, 53.3] 43.0 [27.7, 53.3]

Simple Linear Regession Model

lm(pcfat ~ bmi, data = os1)
## 
## Call:
## lm(formula = pcfat ~ bmi, data = os1)
## 
## Coefficients:
## (Intercept)          bmi  
##     25.7934       0.7314
lm(pcfat ~ age, data = os1)
## 
## Call:
## lm(formula = pcfat ~ age, data = os1)
## 
## Coefficients:
## (Intercept)          age  
##    42.14283      0.01146

Multiple linear regression model

lm(pcfat ~ bmi + age + gender, data = os1)
## 
## Call:
## lm(formula = pcfat ~ bmi + age + gender, data = os1)
## 
## Coefficients:
## (Intercept)          bmi          age   genderMale  
##    22.25882      0.75179      0.04612      0.64873

Summary

summary(lm(pcfat ~ bmi, data = os1))
## 
## Call:
## lm(formula = pcfat ~ bmi, data = os1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.9528  -2.1885   0.3133   2.6410   8.1807 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.79338    1.51238   17.05   <2e-16 ***
## bmi          0.73140    0.06431   11.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.627 on 298 degrees of freedom
## Multiple R-squared:  0.3027, Adjusted R-squared:  0.3003 
## F-statistic: 129.3 on 1 and 298 DF,  p-value: < 2.2e-16
summary(lm(pcfat ~ age, data = os1))
## 
## Call:
## lm(formula = pcfat ~ age, data = os1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.0488  -2.7402   0.1793   3.0714  10.4452 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 42.14283    1.95233  21.586   <2e-16 ***
## age          0.01146    0.03237   0.354    0.724    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.343 on 298 degrees of freedom
## Multiple R-squared:  0.0004205,  Adjusted R-squared:  -0.002934 
## F-statistic: 0.1254 on 1 and 298 DF,  p-value: 0.7236

Correlation between BMI and pcfat

p <- ggplot(data = os,aes(x = bmi, y = pcfat,col = os1$gender))
p + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

Histogram of pcfat

ggplot(data = os1,aes(x = pcfat)) + geom_histogram(fill = "orange",col="green") + labs(x="Percent body fat", y = "Number of participants")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.