This dataset includes the following variables :
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(table1)
##
## Attaching package: 'table1'
##
## The following objects are masked from 'package:base':
##
## units, units<-
library(ggplot2)
os = read.csv("/Users/nguyennhug/Downloads/Osteo data.csv")
head(os)
## id lean.mass fat.mass pcfat age height weight bmi osta osteo osteo.group
## 1 1 27.98 16.49 37.09 76 156.0 45.0 18.5 6.2 2 Osteoporosis
## 2 8 29.02 27.54 48.70 54 153.0 56.0 23.9 -0.4 1 Osteopenia
## 3 21 31.72 20.65 39.43 56 158.2 51.5 20.6 0.9 1 Osteopenia
## 4 38 35.96 21.96 37.92 54 154.0 51.0 21.5 0.6 1 Osteopenia
## 5 39 35.00 26.29 42.89 60 159.5 60.0 23.6 0.0 1 Osteopenia
## 6 53 32.58 19.82 37.82 53 156.0 51.0 21.0 0.4 1 Osteopenia
os1 <- os %>% select(id, age , bmi , pcfat)
os1$gender <- sample(c("Male", "Female"), size = nrow(os), replace = TRUE)
table1(~ id + age + bmi + pcfat | gender, data = os1)
| Female (N=161) |
Male (N=139) |
Overall (N=300) |
|
|---|---|---|---|
| id | |||
| Mean (SD) | 1550 (1220) | 1850 (1160) | 1690 (1200) |
| Median [Min, Max] | 1350 [8.00, 4170] | 1910 [1.00, 4180] | 1600 [1.00, 4180] |
| age | |||
| Mean (SD) | 59.9 (7.87) | 59.7 (7.65) | 59.8 (7.76) |
| Median [Min, Max] | 58.0 [50.0, 86.0] | 59.0 [50.0, 93.0] | 58.0 [50.0, 93.0] |
| bmi | |||
| Mean (SD) | 23.6 (3.48) | 23.0 (2.97) | 23.3 (3.26) |
| Median [Min, Max] | 23.5 [16.0, 34.7] | 22.9 [15.7, 32.0] | 23.2 [15.7, 34.7] |
| pcfat | |||
| Mean (SD) | 42.7 (4.97) | 42.9 (3.47) | 42.8 (4.34) |
| Median [Min, Max] | 43.3 [27.7, 52.2] | 42.7 [35.2, 53.3] | 43.0 [27.7, 53.3] |
lm(pcfat ~ bmi, data = os1)
##
## Call:
## lm(formula = pcfat ~ bmi, data = os1)
##
## Coefficients:
## (Intercept) bmi
## 25.7934 0.7314
lm(pcfat ~ age, data = os1)
##
## Call:
## lm(formula = pcfat ~ age, data = os1)
##
## Coefficients:
## (Intercept) age
## 42.14283 0.01146
lm(pcfat ~ bmi + age + gender, data = os1)
##
## Call:
## lm(formula = pcfat ~ bmi + age + gender, data = os1)
##
## Coefficients:
## (Intercept) bmi age genderMale
## 22.25882 0.75179 0.04612 0.64873
summary(lm(pcfat ~ bmi, data = os1))
##
## Call:
## lm(formula = pcfat ~ bmi, data = os1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.9528 -2.1885 0.3133 2.6410 8.1807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.79338 1.51238 17.05 <2e-16 ***
## bmi 0.73140 0.06431 11.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.627 on 298 degrees of freedom
## Multiple R-squared: 0.3027, Adjusted R-squared: 0.3003
## F-statistic: 129.3 on 1 and 298 DF, p-value: < 2.2e-16
summary(lm(pcfat ~ age, data = os1))
##
## Call:
## lm(formula = pcfat ~ age, data = os1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.0488 -2.7402 0.1793 3.0714 10.4452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.14283 1.95233 21.586 <2e-16 ***
## age 0.01146 0.03237 0.354 0.724
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.343 on 298 degrees of freedom
## Multiple R-squared: 0.0004205, Adjusted R-squared: -0.002934
## F-statistic: 0.1254 on 1 and 298 DF, p-value: 0.7236
p <- ggplot(data = os,aes(x = bmi, y = pcfat,col = os1$gender))
p + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = os1,aes(x = pcfat)) + geom_histogram(fill = "orange",col="green") + labs(x="Percent body fat", y = "Number of participants")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.