library(visreg)
library(ggplot2)
library(ggfortify)
#Reading data
t = "D:/Obesity data.csv"
ob = read.csv(t)
# Khu tru nu gioi 
women = subset(ob, gender=="F")
head(women)
##    id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1   1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 3   3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4   4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 6   6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2
## 7   7      F    155     58 24.1  66 1546 0.96 20233 35599  35.3
## 10 10      F    158     60 24.0  58 1404 0.86 21365 35534  36.6
#Fit mo hinh hoi qui tuyen tinh
m1 = lm(pcfat ~ bmi, data=women)
summary(m1)
## 
## Call:
## lm(formula = pcfat ~ bmi, data = women)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.4308  -2.3335   0.1359   2.5871  15.1984 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.61490    0.94288   9.137   <2e-16 ***
## bmi          1.17079    0.04197  27.895   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.761 on 860 degrees of freedom
## Multiple R-squared:  0.475,  Adjusted R-squared:  0.4744 
## F-statistic: 778.1 on 1 and 860 DF,  p-value: < 2.2e-16
visreg(m1)

autoplot(m1)

#Fit mo hinh parabol va cubic
ggplot(data=women, aes(x=bmi, y=pcfat)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

m2 = lm(pcfat ~ bmi + I(bmi^2), data=women)
m3 = lm(pcfat ~ bmi + I(bmi^2) + I(bmi^3), data=women)
summary(m2)
## 
## Call:
## lm(formula = pcfat ~ bmi + I(bmi^2), data = women)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.4126  -2.3894   0.0644   2.5644  14.9304 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -18.821101   4.297335  -4.380 1.33e-05 ***
## bmi           3.574746   0.370065   9.660  < 2e-16 ***
## I(bmi^2)     -0.051653   0.007903  -6.536 1.08e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.673 on 859 degrees of freedom
## Multiple R-squared:  0.4999, Adjusted R-squared:  0.4987 
## F-statistic: 429.3 on 2 and 859 DF,  p-value: < 2.2e-16
summary(m3)
## 
## Call:
## lm(formula = pcfat ~ bmi + I(bmi^2) + I(bmi^3), data = women)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.5100  -2.4021   0.0373   2.6260  14.8127 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -42.817565  18.013403  -2.377  0.01767 * 
## bmi           6.614148   2.246437   2.944  0.00332 **
## I(bmi^2)     -0.177044   0.091753  -1.930  0.05399 . 
## I(bmi^3)      0.001683   0.001227   1.372  0.17051   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.671 on 858 degrees of freedom
## Multiple R-squared:  0.501,  Adjusted R-squared:  0.4992 
## F-statistic: 287.1 on 3 and 858 DF,  p-value: < 2.2e-16
anova(m1, m2, m3)
## Analysis of Variance Table
## 
## Model 1: pcfat ~ bmi
## Model 2: pcfat ~ bmi + I(bmi^2)
## Model 3: pcfat ~ bmi + I(bmi^2) + I(bmi^3)
##   Res.Df   RSS Df Sum of Sq       F    Pr(>F)    
## 1    860 12163                                   
## 2    859 11587  1    576.28 42.7662 1.058e-10 ***
## 3    858 11562  1     25.35  1.8816    0.1705    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1