ỨNG DỤNG MÔ HÌNH HỒI QUY TUYẾN TÍNH- LÝ THUYẾT DAY 5-PAGE 50 Q2

Đưa file lên R:file.choose()[1] “D:\R\DU LIEU THUC HANH TS ThACH GUI\Obesity data.csv”Phuc=read.csv(“D:\R\DU LIEU THUC HANH TS ThACH GUI\Obesity data.csv”)

Đọc file đưa lên

Phuc=read.csv("D:\\R\\DU LIEU THUC HANH TS ThACH GUI\\Obesity data.csv")
head(Phuc)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2

Vẽ chart nghiên cứu khác biệt tỉ trọng mở(pcfat)nam nữ

####tải ggplot2

library(ggplot2)

chart lệnh ggplot

 ggplot(data = Phuc, aes(x = gender, y = pcfat, col = gender)) +
geom_boxplot() +geom_jitter(alpha = 0.2)

### So sánh 2 cách ### cách t(t-test)

t.test(pcfat~gender,data=Phuc)
## 
##  Welch Two Sample t-test
## 
## data:  pcfat by gender
## t = 29.768, df = 602.01, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##   9.822548 11.210140
## sample estimates:
## mean in group F mean in group M 
##        34.67241        24.15607

Mô hình hồi quy tuyến tính

lm(pcfat~gender,data=Phuc)
## 
## Call:
## lm(formula = pcfat ~ gender, data = Phuc)
## 
## Coefficients:
## (Intercept)      genderM  
##       34.67       -10.52
fit=lm(pcfat~gender,data=Phuc)
summary(fit)
## 
## Call:
## lm(formula = pcfat ~ gender, data = Phuc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.0724  -3.2724   0.1484   3.6276  14.8439 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  34.6724     0.1826   189.9   <2e-16 ***
## genderM     -10.5163     0.3381   -31.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.362 on 1215 degrees of freedom
## Multiple R-squared:  0.4432, Adjusted R-squared:  0.4428 
## F-statistic: 967.3 on 1 and 1215 DF,  p-value: < 2.2e-16

##Finnished 1-ok ## Ứng dụng 2- Page 52: đánh giá tầm quan trọng ###head(Phuc)- Mô hình tỉ trọng mỡ

head(Phuc)
##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2
fit=lm(pcfat~gender+age+bmi,data=Phuc)
summary(fit)
## 
## Call:
## lm(formula = pcfat ~ gender + age + bmi, data = Phuc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.9176  -2.5331   0.0212   2.6914  15.6889 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.137347   0.852356   9.547  < 2e-16 ***
## genderM     -10.806252   0.254068 -42.533  < 2e-16 ***
## age           0.047147   0.006852   6.881 9.53e-12 ***
## bmi           1.089355   0.038508  28.289  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.974 on 1213 degrees of freedom
## Multiple R-squared:  0.6947, Adjusted R-squared:  0.6939 
## F-statistic: 919.9 on 3 and 1213 DF,  p-value: < 2.2e-16

package (relaimpo)trong R-Page 54

####Mã hóa biến gender(character)thành biến sex(biến số 0.1)

Phuc$sex=ifelse(Phuc$gender=="M",1,0)

ƯỚC tính các tham số trong mô hình

m=lm(pcfat~sex+age+bmi,data=Phuc)

###Dùng rlaimpo để phân tích hệ số xác định

library(relaimpo)
## Loading required package: MASS
## Loading required package: boot
## Loading required package: survey
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
## 
##     aml
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
## Loading required package: mitools
## This is the global version of package relaimpo.
## If you are a non-US user, a version with the interesting additional metric pmvd is available
## from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
calc.relimp(m,type="lmg",rela=T,rank=T)
## Response variable: pcfat 
## Total response variance: 51.5935 
## Analysis based on 1217 observations 
## 
## 3 Regressors: 
## sex age bmi 
## Proportion of variance explained by model: 69.47%
## Metrics are normalized to sum to 100% (rela=TRUE). 
## 
## Relative importance metrics: 
## 
##            lmg
## sex 0.64406371
## age 0.07373796
## bmi 0.28219834
## 
## Average coefficients for different model sizes: 
## 
##              1X          2Xs          3Xs
## sex -10.5163441 -10.56301230 -10.80625248
## age   0.1276871   0.09211775   0.04714735
## bmi   1.0361902   1.03613163   1.08935502

##Finished- Ok fiel Ob= Phuc

Ứng dụng 3:Hiệu chỉnh Page 56-File salary