library(visreg)
library(ggplot2)
library(ggfortify)
#Reading data
t = "D:/Obesity data.csv"
ob = read.csv(t)
# Khu tru nu gioi
women = subset(ob, gender=="F")
head(women)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
## 7 7 F 155 58 24.1 66 1546 0.96 20233 35599 35.3
## 10 10 F 158 60 24.0 58 1404 0.86 21365 35534 36.6
#Fit mo hinh hoi qui tuyen tinh
m1 = lm(pcfat ~ bmi, data=women)
summary(m1)
##
## Call:
## lm(formula = pcfat ~ bmi, data = women)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.4308 -2.3335 0.1359 2.5871 15.1984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.61490 0.94288 9.137 <2e-16 ***
## bmi 1.17079 0.04197 27.895 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.761 on 860 degrees of freedom
## Multiple R-squared: 0.475, Adjusted R-squared: 0.4744
## F-statistic: 778.1 on 1 and 860 DF, p-value: < 2.2e-16
visreg(m1)

autoplot(m1)

#Fit mo hinh parabol va cubic
ggplot(data=women, aes(x=bmi, y=pcfat)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

m2 = lm(pcfat ~ bmi + I(bmi^2), data=women)
m3 = lm(pcfat ~ bmi + I(bmi^2) + I(bmi^3), data=women)
summary(m2)
##
## Call:
## lm(formula = pcfat ~ bmi + I(bmi^2), data = women)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.4126 -2.3894 0.0644 2.5644 14.9304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18.821101 4.297335 -4.380 1.33e-05 ***
## bmi 3.574746 0.370065 9.660 < 2e-16 ***
## I(bmi^2) -0.051653 0.007903 -6.536 1.08e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.673 on 859 degrees of freedom
## Multiple R-squared: 0.4999, Adjusted R-squared: 0.4987
## F-statistic: 429.3 on 2 and 859 DF, p-value: < 2.2e-16
summary(m3)
##
## Call:
## lm(formula = pcfat ~ bmi + I(bmi^2) + I(bmi^3), data = women)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.5100 -2.4021 0.0373 2.6260 14.8127
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.817565 18.013403 -2.377 0.01767 *
## bmi 6.614148 2.246437 2.944 0.00332 **
## I(bmi^2) -0.177044 0.091753 -1.930 0.05399 .
## I(bmi^3) 0.001683 0.001227 1.372 0.17051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.671 on 858 degrees of freedom
## Multiple R-squared: 0.501, Adjusted R-squared: 0.4992
## F-statistic: 287.1 on 3 and 858 DF, p-value: < 2.2e-16
anova(m1, m2, m3)
## Analysis of Variance Table
##
## Model 1: pcfat ~ bmi
## Model 2: pcfat ~ bmi + I(bmi^2)
## Model 3: pcfat ~ bmi + I(bmi^2) + I(bmi^3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 860 12163
## 2 859 11587 1 576.28 42.7662 1.058e-10 ***
## 3 858 11562 1 25.35 1.8816 0.1705
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1