ob=read.csv("C:\\Users\\Thu Bo\\Desktop\\obesity data.csv")
head(ob,3)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
#xay dung phuong trinh hoi quy pcfat theo age
#pcfat=25.6+0.13*age
m1=lm(pcfat~age, data=ob)
summary(m1)
## 
## Call:
## lm(formula = pcfat ~ age, data = ob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.7114  -4.0069   0.8378   4.9514  18.2192 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.58408    0.57003   44.88   <2e-16 ***
## age          0.12769    0.01135   11.25   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.839 on 1215 degrees of freedom
## Multiple R-squared:  0.09431,    Adjusted R-squared:  0.09357 
## F-statistic: 126.5 on 1 and 1215 DF,  p-value: < 2.2e-16
plot(ob$pcfat~ob$age, pch=16, col="blue")
abline(m1, col="red")

#xay dung phuong trinh hoi quy pcfat theo gioi tinh
#pcfat= 34.7-10.5*gender(M)
m2=lm(pcfat~gender,data=ob)
m2
## 
## Call:
## lm(formula = pcfat ~ gender, data = ob)
## 
## Coefficients:
## (Intercept)      genderM  
##       34.67       -10.52
#kiem tra epsilon qua plot
m1=lm(pcfat~age,data=ob)
par(mfrow=c(2,2))
plot(m1)