t="C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\obesity data.csv"
t
## [1] "C:\\Users\\pc\\Downloads\\CAC NGHIEN CUU\\BAI GIANG CAC MON\\GS TUAN\\BG 12.6.19\\obesity data.csv"
ob=read.csv(t)
head(ob)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2

#analysis

m1=lm(ob$pcfat~ob$gender+ age, data=ob)
m2=lm(ob$pcfat~ob$gender+ bmi, data=ob)
# anova de lua chon mo hinh do no dua ra RSME, va bien nao co y nghia
#summary: cho tom tat cac he so cua phuong trinh hoi quy (a,b e). 
anova(m1)
## Analysis of Variance Table
## 
## Response: ob$pcfat
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## ob$gender    1  27808 27808.3 1061.83 < 2.2e-16 ***
## age          1   3136  3135.9  119.74 < 2.2e-16 ***
## Residuals 1214  31793    26.2                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2)
## Analysis of Variance Table
## 
## Response: ob$pcfat
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## ob$gender    1  27808 27808.3 1696.18 < 2.2e-16 ***
## bmi          1  15026 15026.3  916.54 < 2.2e-16 ***
## Residuals 1214  19903    16.4                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
## 
## Call:
## lm(formula = ob$pcfat ~ ob$gender + age, data = ob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.6748  -3.2841   0.3097   3.3655  17.7216 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  30.119597   0.451098   66.77   <2e-16 ***
## ob$genderM  -10.059716   0.325415  -30.91   <2e-16 ***
## age           0.093731   0.008566   10.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.118 on 1214 degrees of freedom
## Multiple R-squared:  0.4932, Adjusted R-squared:  0.4924 
## F-statistic: 590.8 on 2 and 1214 DF,  p-value: < 2.2e-16
summary(m2)
## 
## Call:
## lm(formula = ob$pcfat ~ ob$gender + bmi, data = ob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.4709  -2.4780   0.1773   2.6903  15.1761 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.01035    0.85880   10.49   <2e-16 ***
## ob$genderM  -11.06631    0.25599  -43.23   <2e-16 ***
## bmi           1.15303    0.03809   30.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.049 on 1214 degrees of freedom
## Multiple R-squared:  0.6828, Adjusted R-squared:  0.6822 
## F-statistic:  1306 on 2 and 1214 DF,  p-value: < 2.2e-16
#so sanh hai mo hinh thi ca hai mo hinh  deu cho thay ca hai bien deu co y nghia, tuy nhien mo hinh m2 co R2 lon hon (0.6828 so voi 0.4932 ), RMSE (16.4 so voi 26.2) nho hon nen chon mo hinh m2

ve do thi visreg

m3=lm(bmd~age+ bmi, data=ob)
summary(m3)
## 
## Call:
## lm(formula = bmd ~ age + bmi, data = ob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35426 -0.06653 -0.00400  0.06469  0.35887 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.9613809  0.0210065  45.766   <2e-16 ***
## age         -0.0032540  0.0001671 -19.471   <2e-16 ***
## bmi          0.0089660  0.0009446   9.492   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09801 on 1214 degrees of freedom
## Multiple R-squared:  0.2506, Adjusted R-squared:  0.2494 
## F-statistic:   203 on 2 and 1214 DF,  p-value: < 2.2e-16
anova(m3)
## Analysis of Variance Table
## 
## Response: bmd
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## age          1  3.0339 3.03391 315.842 < 2.2e-16 ***
## bmi          1  0.8655 0.86545  90.097 < 2.2e-16 ***
## Residuals 1214 11.6614 0.00961                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#kl:  o cung mot tuoi thi cu tang 1kg/mr BMI lam tang 0.009g/cm2 bmd; cu tang 1 tuoi lam giam 0.0032 g/cm2 bmd (mat do xuong). hai yeu to
library(visreg)
## Warning: package 'visreg' was built under R version 3.5.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
m=lm(bmd~age,+ bmi, data=ob)
par(mfrow=c(1,2))
visreg(m3, xvar="age", gg=T, xlab="Age", ylab="BMD")

visreg(m3, xvar="bmi", gg=T, xlab="Age", ylab="BMD")