ob=read.csv("/Users/admin/Desktop/Textbook/TDTU Datasets for 2020 Workshop/Obesity data.csv")
dat = ob[, c("gender", "age", "height", "weight", "bmi", "pcfat")]
head(dat)
## gender age height weight bmi pcfat
## 1 F 53 150 49 21.8 37.3
## 2 M 65 165 52 19.1 16.8
## 3 F 64 157 57 23.1 34.0
## 4 F 56 156 53 21.8 33.8
## 5 M 54 160 51 19.9 14.8
## 6 F 52 153 47 20.1 32.2
library(GGally); library(ggplot2); library(visreg); library(table1); library(BMA)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.4-9)
table1(~age + height + weight + bmi + pcfat | gender, data=ob)
| F (n=862) |
M (n=355) |
Overall (n=1217) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 48.6 (16.4) | 43.7 (18.8) | 47.2 (17.3) |
| Median [Min, Max] | 49.0 [14.0, 85.0] | 44.0 [13.0, 88.0] | 48.0 [13.0, 88.0] |
| height | |||
| Mean (SD) | 153 (5.55) | 165 (6.73) | 157 (7.98) |
| Median [Min, Max] | 153 [136, 170] | 165 [146, 185] | 155 [136, 185] |
| weight | |||
| Mean (SD) | 52.3 (7.72) | 62.0 (9.59) | 55.1 (9.40) |
| Median [Min, Max] | 51.0 [34.0, 95.0] | 62.0 [38.0, 95.0] | 54.0 [34.0, 95.0] |
| bmi | |||
| Mean (SD) | 22.3 (3.05) | 22.7 (3.04) | 22.4 (3.06) |
| Median [Min, Max] | 22.1 [15.2, 37.1] | 22.5 [14.5, 34.7] | 22.2 [14.5, 37.1] |
| pcfat | |||
| Mean (SD) | 34.7 (5.19) | 24.2 (5.76) | 31.6 (7.18) |
| Median [Min, Max] | 34.7 [14.6, 48.4] | 24.6 [9.20, 39.0] | 32.4 [9.20, 48.4] |
ggpairs(dat, mapping = aes(color = gender))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### Nhận xét: ### -
m = lm(pcfat ~ gender, data=ob)
summary(m)
##
## Call:
## lm(formula = pcfat ~ gender, data = ob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.0724 -3.2724 0.1484 3.6276 14.8439
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.6724 0.1826 189.9 <2e-16 ***
## genderM -10.5163 0.3381 -31.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.362 on 1215 degrees of freedom
## Multiple R-squared: 0.4432, Adjusted R-squared: 0.4428
## F-statistic: 967.3 on 1 and 1215 DF, p-value: < 2.2e-16
visreg(m)
m = lm(pcfat ~ gender + bmi, data=ob)
summary (m)
##
## Call:
## lm(formula = pcfat ~ gender + bmi, data = ob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.4709 -2.4780 0.1773 2.6903 15.1761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.01035 0.85880 10.49 <2e-16 ***
## genderM -11.06631 0.25599 -43.23 <2e-16 ***
## bmi 1.15303 0.03809 30.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.049 on 1214 degrees of freedom
## Multiple R-squared: 0.6828, Adjusted R-squared: 0.6822
## F-statistic: 1306 on 2 and 1214 DF, p-value: < 2.2e-16
m1 = lm(pcfat ~ bmi, data=ob)
m2 = lm(pcfat ~ bmi + I(bmi^2), data=ob)
m3 = lm(pcfat ~ bmi + I(bmi^2) + I(bmi^3), data=ob)
anova(m1, m2, m3)
## Analysis of Variance Table
##
## Model 1: pcfat ~ bmi
## Model 2: pcfat ~ bmi + I(bmi^2)
## Model 3: pcfat ~ bmi + I(bmi^2) + I(bmi^3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1215 50541
## 2 1214 49921 1 620.14 15.1175 0.0001065 ***
## 3 1213 49758 1 162.30 3.9565 0.0469163 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(1,3))
visreg(m1)
visreg(m2)
visreg(m3)