t="C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\obesity data.csv"
t
## [1] "C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\obesity data.csv"
ob=read.csv(t)
head(ob)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
#analysis
m1=lm(ob$pcfat~ob$gender+ age, data=ob)
m2=lm(ob$pcfat~ob$gender+ bmi, data=ob)
# anova de lua chon mo hinh do no dua ra RSME, va bien nao co y nghia
#summary: cho tom tat cac he so cua phuong trinh hoi quy (a,b e).
anova(m1)
## Analysis of Variance Table
##
## Response: ob$pcfat
## Df Sum Sq Mean Sq F value Pr(>F)
## ob$gender 1 27808 27808.3 1061.83 < 2.2e-16 ***
## age 1 3136 3135.9 119.74 < 2.2e-16 ***
## Residuals 1214 31793 26.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2)
## Analysis of Variance Table
##
## Response: ob$pcfat
## Df Sum Sq Mean Sq F value Pr(>F)
## ob$gender 1 27808 27808.3 1696.18 < 2.2e-16 ***
## bmi 1 15026 15026.3 916.54 < 2.2e-16 ***
## Residuals 1214 19903 16.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## lm(formula = ob$pcfat ~ ob$gender + age, data = ob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.6748 -3.2841 0.3097 3.3655 17.7216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.119597 0.451098 66.77 <2e-16 ***
## ob$genderM -10.059716 0.325415 -30.91 <2e-16 ***
## age 0.093731 0.008566 10.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.118 on 1214 degrees of freedom
## Multiple R-squared: 0.4932, Adjusted R-squared: 0.4924
## F-statistic: 590.8 on 2 and 1214 DF, p-value: < 2.2e-16
summary(m2)
##
## Call:
## lm(formula = ob$pcfat ~ ob$gender + bmi, data = ob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.4709 -2.4780 0.1773 2.6903 15.1761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.01035 0.85880 10.49 <2e-16 ***
## ob$genderM -11.06631 0.25599 -43.23 <2e-16 ***
## bmi 1.15303 0.03809 30.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.049 on 1214 degrees of freedom
## Multiple R-squared: 0.6828, Adjusted R-squared: 0.6822
## F-statistic: 1306 on 2 and 1214 DF, p-value: < 2.2e-16
#so sanh hai mo hinh thi ca hai mo hinh deu cho thay ca hai bien deu co y nghia, tuy nhien mo hinh m2 co R2 lon hon (0.6828 so voi 0.4932 ), RMSE (16.4 so voi 26.2) nho hon nen chon mo hinh m2
m3=lm(bmd~age+ bmi, data=ob)
summary(m3)
##
## Call:
## lm(formula = bmd ~ age + bmi, data = ob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35426 -0.06653 -0.00400 0.06469 0.35887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9613809 0.0210065 45.766 <2e-16 ***
## age -0.0032540 0.0001671 -19.471 <2e-16 ***
## bmi 0.0089660 0.0009446 9.492 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09801 on 1214 degrees of freedom
## Multiple R-squared: 0.2506, Adjusted R-squared: 0.2494
## F-statistic: 203 on 2 and 1214 DF, p-value: < 2.2e-16
anova(m3)
## Analysis of Variance Table
##
## Response: bmd
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 3.0339 3.03391 315.842 < 2.2e-16 ***
## bmi 1 0.8655 0.86545 90.097 < 2.2e-16 ***
## Residuals 1214 11.6614 0.00961
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#kl: o cung mot tuoi thi cu tang 1kg/mr BMI lam tang 0.009g/cm2 bmd; cu tang 1 tuoi lam giam 0.0032 g/cm2 bmd (mat do xuong). hai yeu to
library(visreg)
## Warning: package 'visreg' was built under R version 3.5.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
m=lm(bmd~age,+ bmi, data=ob)
par(mfrow=c(1,2))
visreg(m3, xvar="age", gg=T, xlab="Age", ylab="BMD")
visreg(m3, xvar="bmi", gg=T, xlab="Age", ylab="BMD")