t="D:\\Datasets for practice\\obesity data.csv"
ob=read.csv(t)
head(ob)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
rows=nrow(ob)
prop=0.7
upper=floor(prop*rows)
permutation=ob[sample(rows),]
dev=permutation[1:upper,]
val=permutation[(upper+1):rows,]
dim(dev)
## [1] 851 11
dim(val)
## [1] 366 11
m=lm(pcfat~gender+age+bmi+weight,data=dev)
summary(m)
##
## Call:
## lm(formula = pcfat ~ gender + age + bmi + weight, data = dev)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.1087 -2.5076 -0.0144 2.6404 15.4466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.858198 1.004864 8.815 < 2e-16 ***
## genderM -11.430503 0.410916 -27.817 < 2e-16 ***
## age 0.051942 0.008752 5.935 4.28e-09 ***
## bmi 0.881255 0.094960 9.280 < 2e-16 ***
## weight 0.067372 0.033397 2.017 0.044 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.922 on 846 degrees of freedom
## Multiple R-squared: 0.6918, Adjusted R-squared: 0.6903
## F-statistic: 474.7 on 4 and 846 DF, p-value: < 2.2e-16
val$pred=predict(m,newdata = val)
head(val,3)
## id gender height weight bmi age bmc bmd fat lean pcfat pred
## 715 721 M 175 88 28.7 36 2525 1.15 25704 57455 30.0 30.51840
## 86 87 F 151 51 22.4 78 1655 1.05 18166 30180 36.3 36.08580
## 56 56 F 156 54 22.2 64 1287 0.84 21264 30774 39.9 35.38447
r2=cor(val$pcfat,val$pred)^2
r2
## [1] 0.7103249
plot(val$pcfat~val$pred, pch=16, col="blue")
# Đánh giá tầm quan trọng của biến số tiên lượng # dữ liệu cho 1217 đối tượng
library(relaimpo)# relative importance
## Loading required package: MASS
## Loading required package: boot
## Loading required package: survey
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
##
## aml
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
## Loading required package: mitools
## This is the global version of package relaimpo.
## If you are a non-US user, a version with the interesting additional metric pmvd is available
## from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
m=lm(pcfat~gender+age+weight+height+bmi,data=ob)
calc.relimp(m, type="lmg")
## Response variable: pcfat
## Total response variance: 51.5935
## Analysis based on 1217 observations
##
## 5 Regressors:
## gender age weight height bmi
## Proportion of variance explained by model: 69.66%
## Metrics are not normalized (rela=FALSE).
##
## Relative importance metrics:
##
## lmg
## gender 0.35388610
## age 0.03807245
## weight 0.06505573
## height 0.10600159
## bmi 0.13357808
##
## Average coefficients for different model sizes:
##
## 1X 2Xs 3Xs 4Xs 5Xs
## gender -10.51634414 -11.25778000 -11.37601905 -11.34809670 -11.44105012
## age 0.12768705 0.09380448 0.04849443 0.04545609 0.05493292
## weight 0.04319324 0.06043494 0.08062526 0.10196648 0.09394895
## height -0.43201351 -0.39537155 -0.28180720 -0.13427073 -0.01099062
## bmi 1.03619023 1.38422758 1.37018157 1.10104878 0.85803534