Chương trình tập huấn phân tích dữ liệu bằng ngôn ngữ R - BV 108

Ngày 4: Hồi qui tuyến tính

Việc 1. Đọc dữ liệu vào R

ob = read.csv("C:\\Thach\\VN trips\\2024_2Aug\\Data Analysis workshop (Hospital 108)\\Datasets\\obesity data.csv")

Việc 2. Đánh giá mối liên quan giữa tuổi và mật độ xương toàn thân

2.1 Biểu đồ histogram đánh giá phân bố của tuổi và mật độ xương toàn thân

library(ggplot2)
library(gridExtra) 

p = ggplot(data = ob, aes(x = age))
p1 = p + geom_histogram(fill = "blue", col = "white") + labs(x = "Tuổi (năm)", y = "Số người", title = "Phân bố tuổi")

p = ggplot(data = ob, aes(x = wbbmd))
p2 = p + geom_histogram(fill = "blue", col = "white") + labs(x = "Mật độ xương toàn thân (g/cm2)", y = "Số người", title = "Phân bố MĐX toàn thân")

grid.arrange(p1, p2, ncol = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.2 Biểu đồ tán xạ đánh giá mối liên quan giữa tuổi và MĐX toàn thân

p = ggplot(data = ob, aes(x = age, y = wbbmd))
p + geom_point() + geom_smooth(method = "lm", formula = y~ x)

2.3 Phân tích tương quan đánh giá mối liên quan giữa tuổi và MĐX toàn thân

cor.test(ob$age, ob$wbbmd, method= "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  ob$age and ob$wbbmd
## t = -17.154, df = 1215, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4856972 -0.3951677
## sample estimates:
##        cor 
## -0.4415556

Việc 3. Mô hình tuyến tính đánh giá mối liên quan giữa tuổi và MĐX toàn thân

3.1 Thực hiện mô hình hồi qui

m.1 = lm(wbbmd ~ age, data = ob)
summary(m.1)
## 
## Call:
## lm(formula = wbbmd ~ age, data = ob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32749 -0.07268 -0.00533  0.06793  0.33178 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.1450766  0.0084638  135.29   <2e-16 ***
## age         -0.0028914  0.0001686  -17.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1015 on 1215 degrees of freedom
## Multiple R-squared:  0.195,  Adjusted R-squared:  0.1943 
## F-statistic: 294.3 on 1 and 1215 DF,  p-value: < 2.2e-16

3.2 Đánh giá giả định của mô hình

par(mfrow = c(2,2))
plot(m.1)

library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.3.2
autoplot(m.1)

3.3 Trình bày kết quả

Việc 4. Đánh giá mối liên quan giữa tuổi và giới tính với MĐX toàn thân

4.1 Biểu đồ tán xạ đánh giá mối liên quan giữa tuổi với MĐX toàn thân theo giới tính

p = ggplot(data = ob, aes(x = age, y = wbbmd, fill = gender, col = gender))
p1 = p + geom_point() + geom_smooth() + labs(x = "Tuổi (năm)", y = "Mật độ xương toàn thân (g/cm2)") + ggtitle("Liên quan giữa tuổi và MĐX theo giới tính")
p1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

4.2 Nhận xét về ảnh hưởng của giới tính lên mối liên quan giữa tuổi với MĐX toàn thân

4.3 Thực hiện mô hình hồi qui tuyến tính

m.2 = lm(wbbmd ~ age + gender, data = ob)
summary(m.2)
## 
## Call:
## lm(formula = wbbmd ~ age + gender, data = ob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36272 -0.06658 -0.00411  0.06549  0.34473 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.118288   0.008636 129.485   <2e-16 ***
## age         -0.002691   0.000164 -16.408   <2e-16 ***
## genderM      0.059417   0.006230   9.537   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09798 on 1214 degrees of freedom
## Multiple R-squared:  0.2511, Adjusted R-squared:  0.2498 
## F-statistic: 203.5 on 2 and 1214 DF,  p-value: < 2.2e-16

4.4 Kiểm tra giả định mô hình

par(mfrow = c(2,2))
plot(m.2)

autoplot(m.2)

4.5 Đánh giá kết quả

summary(m.2)
## 
## Call:
## lm(formula = wbbmd ~ age + gender, data = ob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36272 -0.06658 -0.00411  0.06549  0.34473 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.118288   0.008636 129.485   <2e-16 ***
## age         -0.002691   0.000164 -16.408   <2e-16 ***
## genderM      0.059417   0.006230   9.537   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09798 on 1214 degrees of freedom
## Multiple R-squared:  0.2511, Adjusted R-squared:  0.2498 
## F-statistic: 203.5 on 2 and 1214 DF,  p-value: < 2.2e-16

Việc 5. Xây dựng mô hình dự báo MĐX

5.1 Đọc dữ liệu ‘Oxteo-data.xlsx’ và gọi đối tượng là ‘fx’

library(readxl)
fx = as.data.frame(read_excel("C:\\Thach\\VN trips\\2024_2Aug\\Data Analysis workshop (Hospital 108)\\Datasets\\Osteo-data.xlsx"))
dim(fx)
## [1] 2216   17
summary(fx)
##        id             sex                 age            weight      
##  Min.   :   1.0   Length:2216        Min.   :57.00   Min.   : 34.00  
##  1st Qu.: 554.8   Class :character   1st Qu.:65.00   1st Qu.: 60.00  
##  Median :1108.5   Mode  :character   Median :70.00   Median : 69.00  
##  Mean   :1108.5                      Mean   :70.89   Mean   : 70.14  
##  3rd Qu.:1662.2                      3rd Qu.:76.00   3rd Qu.: 79.00  
##  Max.   :2216.0                      Max.   :96.00   Max.   :133.00  
##                                                      NA's   :53      
##      height         prior_fx          fnbmd           smoking      
##  Min.   :136.0   Min.   :0.0000   Min.   :0.2800   Min.   :0.0000  
##  1st Qu.:158.0   1st Qu.:0.0000   1st Qu.:0.7300   1st Qu.:0.0000  
##  Median :164.0   Median :0.0000   Median :0.8200   Median :0.0000  
##  Mean   :164.9   Mean   :0.1476   Mean   :0.8287   Mean   :0.4176  
##  3rd Qu.:171.0   3rd Qu.:0.0000   3rd Qu.:0.9300   3rd Qu.:1.0000  
##  Max.   :196.0   Max.   :1.0000   Max.   :1.5100   Max.   :1.0000  
##  NA's   :54                       NA's   :89       NA's   :1       
##    parkinson           rheum          hypertension       diabetes    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.00000   Median :0.00000   Median :1.0000   Median :0.000  
##  Mean   :0.06498   Mean   :0.03881   Mean   :0.5063   Mean   :0.111  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.0000   Max.   :1.000  
##                                                                      
##       copd           cancer             cvd            falls_n      
##  Min.   :0.000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.111   Mean   :0.08529   Mean   :0.3872   Mean   :0.2843  
##  3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.000   Max.   :1.00000   Max.   :1.0000   Max.   :2.0000  
##                                                                     
##        fx        
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2595  
##  3rd Qu.:1.0000  
##  Max.   :1.0000  
## 

5.2 Mô tả đặc điểm của dữ liệu theo giớ tính

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~ age + weight + height + fnbmd + as.factor(prior_fx) + as.factor(falls_n) + as.factor(smoking) + as.factor(parkinson) + as.factor(rheum) + as.factor(hypertension) + as.factor(diabetes) + as.factor(copd) + as.factor(cancer) + as.factor(cvd) + as.factor(fx) | sex, data = fx)
Female
(N=1358)
Male
(N=858)
Overall
(N=2216)
age
Mean (SD) 71.2 (7.59) 70.4 (6.44) 70.9 (7.17)
Median [Min, Max] 70.0 [57.0, 96.0] 69.0 [59.0, 92.0] 70.0 [57.0, 96.0]
weight
Mean (SD) 64.9 (12.5) 78.2 (12.7) 70.1 (14.2)
Median [Min, Max] 64.0 [34.0, 115] 78.0 [45.0, 133] 69.0 [34.0, 133]
Missing 42 (3.1%) 11 (1.3%) 53 (2.4%)
height
Mean (SD) 160 (6.37) 173 (6.86) 165 (9.35)
Median [Min, Max] 160 [136, 181] 173 [151, 196] 164 [136, 196]
Missing 41 (3.0%) 13 (1.5%) 54 (2.4%)
fnbmd
Mean (SD) 0.777 (0.132) 0.909 (0.153) 0.829 (0.155)
Median [Min, Max] 0.770 [0.280, 1.31] 0.900 [0.340, 1.51] 0.820 [0.280, 1.51]
Missing 57 (4.2%) 32 (3.7%) 89 (4.0%)
as.factor(prior_fx)
0 1146 (84.4%) 743 (86.6%) 1889 (85.2%)
1 212 (15.6%) 115 (13.4%) 327 (14.8%)
as.factor(falls_n)
0 1063 (78.3%) 671 (78.2%) 1734 (78.2%)
1 206 (15.2%) 128 (14.9%) 334 (15.1%)
2 89 (6.6%) 59 (6.9%) 148 (6.7%)
as.factor(smoking)
0 962 (70.8%) 328 (38.2%) 1290 (58.2%)
1 395 (29.1%) 530 (61.8%) 925 (41.7%)
Missing 1 (0.1%) 0 (0%) 1 (0.0%)
as.factor(parkinson)
0 1268 (93.4%) 804 (93.7%) 2072 (93.5%)
1 90 (6.6%) 54 (6.3%) 144 (6.5%)
as.factor(rheum)
0 1306 (96.2%) 824 (96.0%) 2130 (96.1%)
1 52 (3.8%) 34 (4.0%) 86 (3.9%)
as.factor(hypertension)
0 695 (51.2%) 399 (46.5%) 1094 (49.4%)
1 663 (48.8%) 459 (53.5%) 1122 (50.6%)
as.factor(diabetes)
0 1213 (89.3%) 757 (88.2%) 1970 (88.9%)
1 145 (10.7%) 101 (11.8%) 246 (11.1%)
as.factor(copd)
0 1211 (89.2%) 759 (88.5%) 1970 (88.9%)
1 147 (10.8%) 99 (11.5%) 246 (11.1%)
as.factor(cancer)
0 1235 (90.9%) 792 (92.3%) 2027 (91.5%)
1 123 (9.1%) 66 (7.7%) 189 (8.5%)
as.factor(cvd)
0 843 (62.1%) 515 (60.0%) 1358 (61.3%)
1 515 (37.9%) 343 (40.0%) 858 (38.7%)
as.factor(fx)
0 932 (68.6%) 709 (82.6%) 1641 (74.1%)
1 426 (31.4%) 149 (17.4%) 575 (25.9%)

5.3 Xây dựng mô hình dự báo MD0X tại cổ xương đùi

Phương pháp stepwise

fx.2 = na.omit(fx)
m.step = lm(fnbmd ~ age + sex + weight + height + fnbmd + prior_fx + falls_n + smoking + parkinson + rheum + hypertension + diabetes + copd + cancer + cvd, data = fx.2)
step = step(m.step)
## Start:  AIC=-9093.64
## fnbmd ~ age + sex + weight + height + fnbmd + prior_fx + falls_n + 
##     smoking + parkinson + rheum + hypertension + diabetes + copd + 
##     cancer + cvd
## 
## 
## Step:  AIC=-9093.64
## fnbmd ~ age + sex + weight + height + prior_fx + falls_n + smoking + 
##     parkinson + rheum + hypertension + diabetes + copd + cancer + 
##     cvd
## 
##                Df Sum of Sq    RSS     AIC
## - copd          1    0.0002 28.734 -9095.6
## - cvd           1    0.0024 28.736 -9095.5
## - hypertension  1    0.0034 28.737 -9095.4
## - parkinson     1    0.0071 28.741 -9095.1
## - cancer        1    0.0075 28.741 -9095.1
## - diabetes      1    0.0082 28.742 -9095.0
## - falls_n       1    0.0134 28.747 -9094.7
## <none>                      28.734 -9093.6
## - rheum         1    0.0446 28.778 -9092.3
## - height        1    0.1631 28.897 -9083.6
## - prior_fx      1    0.2419 28.976 -9077.9
## - smoking       1    0.3836 29.117 -9067.5
## - sex           1    0.8175 29.551 -9036.1
## - age           1    2.1555 30.889 -8942.2
## - weight        1    5.0206 33.754 -8754.1
## 
## Step:  AIC=-9095.62
## fnbmd ~ age + sex + weight + height + prior_fx + falls_n + smoking + 
##     parkinson + rheum + hypertension + diabetes + cancer + cvd
## 
##                Df Sum of Sq    RSS     AIC
## - cvd           1    0.0025 28.736 -9097.4
## - hypertension  1    0.0034 28.737 -9097.4
## - parkinson     1    0.0071 28.741 -9097.1
## - cancer        1    0.0075 28.741 -9097.1
## - diabetes      1    0.0082 28.742 -9097.0
## - falls_n       1    0.0133 28.747 -9096.6
## <none>                      28.734 -9095.6
## - rheum         1    0.0447 28.779 -9094.3
## - height        1    0.1630 28.897 -9085.6
## - prior_fx      1    0.2418 28.976 -9079.9
## - smoking       1    0.3834 29.117 -9069.5
## - sex           1    0.8180 29.552 -9038.1
## - age           1    2.1586 30.892 -8944.0
## - weight        1    5.0207 33.755 -8756.1
## 
## Step:  AIC=-9097.44
## fnbmd ~ age + sex + weight + height + prior_fx + falls_n + smoking + 
##     parkinson + rheum + hypertension + diabetes + cancer
## 
##                Df Sum of Sq    RSS     AIC
## - hypertension  1    0.0041 28.740 -9099.1
## - parkinson     1    0.0071 28.743 -9098.9
## - diabetes      1    0.0074 28.744 -9098.9
## - cancer        1    0.0077 28.744 -9098.9
## - falls_n       1    0.0133 28.750 -9098.5
## <none>                      28.736 -9097.4
## - rheum         1    0.0440 28.780 -9096.2
## - height        1    0.1628 28.899 -9087.5
## - prior_fx      1    0.2418 28.978 -9081.7
## - smoking       1    0.3849 29.121 -9071.2
## - sex           1    0.8181 29.554 -9039.9
## - age           1    2.1581 30.894 -8945.9
## - weight        1    5.0198 33.756 -8758.0
## 
## Step:  AIC=-9099.14
## fnbmd ~ age + sex + weight + height + prior_fx + falls_n + smoking + 
##     parkinson + rheum + diabetes + cancer
## 
##             Df Sum of Sq    RSS     AIC
## - diabetes   1    0.0064 28.747 -9100.7
## - parkinson  1    0.0074 28.748 -9100.6
## - cancer     1    0.0077 28.748 -9100.6
## - falls_n    1    0.0130 28.753 -9100.2
## <none>                   28.740 -9099.1
## - rheum      1    0.0442 28.785 -9097.9
## - height     1    0.1636 28.904 -9089.1
## - prior_fx   1    0.2409 28.981 -9083.4
## - smoking    1    0.3830 29.123 -9073.1
## - sex        1    0.8148 29.555 -9041.8
## - age        1    2.1568 30.897 -8947.7
## - weight     1    5.0175 33.758 -8759.8
## 
## Step:  AIC=-9100.66
## fnbmd ~ age + sex + weight + height + prior_fx + falls_n + smoking + 
##     parkinson + rheum + cancer
## 
##             Df Sum of Sq    RSS     AIC
## - parkinson  1    0.0078 28.755 -9102.1
## - cancer     1    0.0080 28.755 -9102.1
## - falls_n    1    0.0129 28.760 -9101.7
## <none>                   28.747 -9100.7
## - rheum      1    0.0449 28.792 -9099.3
## - height     1    0.1622 28.909 -9090.7
## - prior_fx   1    0.2425 28.989 -9084.8
## - smoking    1    0.3820 29.129 -9074.7
## - sex        1    0.8189 29.566 -9043.1
## - age        1    2.1640 30.911 -8948.7
## - weight     1    5.0200 33.767 -8761.3
## 
## Step:  AIC=-9102.09
## fnbmd ~ age + sex + weight + height + prior_fx + falls_n + smoking + 
##     rheum + cancer
## 
##            Df Sum of Sq    RSS     AIC
## - cancer    1    0.0080 28.763 -9103.5
## - falls_n   1    0.0127 28.767 -9103.2
## <none>                  28.755 -9102.1
## - rheum     1    0.0454 28.800 -9100.7
## - height    1    0.1622 28.917 -9092.2
## - prior_fx  1    0.2407 28.995 -9086.4
## - smoking   1    0.3823 29.137 -9076.1
## - sex       1    0.8188 29.573 -9044.5
## - age       1    2.1706 30.925 -8949.7
## - weight    1    5.0248 33.779 -8762.5
## 
## Step:  AIC=-9103.5
## fnbmd ~ age + sex + weight + height + prior_fx + falls_n + smoking + 
##     rheum
## 
##            Df Sum of Sq    RSS     AIC
## - falls_n   1    0.0132 28.776 -9104.5
## <none>                  28.763 -9103.5
## - rheum     1    0.0471 28.810 -9102.0
## - height    1    0.1640 28.927 -9093.4
## - prior_fx  1    0.2405 29.003 -9087.8
## - smoking   1    0.3877 29.150 -9077.1
## - sex       1    0.8209 29.584 -9045.8
## - age       1    2.1644 30.927 -8951.6
## - weight    1    5.0254 33.788 -8764.0
## 
## Step:  AIC=-9104.53
## fnbmd ~ age + sex + weight + height + prior_fx + smoking + rheum
## 
##            Df Sum of Sq    RSS     AIC
## <none>                  28.776 -9104.5
## - rheum     1    0.0484 28.824 -9103.0
## - height    1    0.1665 28.942 -9094.3
## - prior_fx  1    0.2443 29.020 -9088.6
## - smoking   1    0.3930 29.169 -9077.8
## - sex       1    0.8192 29.595 -9047.0
## - age       1    2.1621 30.938 -8952.9
## - weight    1    5.0190 33.795 -8765.5
summary(step)
## 
## Call:
## lm(formula = fnbmd ~ age + sex + weight + height + prior_fx + 
##     smoking + rheum, data = fx.2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37449 -0.07631 -0.00732  0.06924  0.56471 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6089875  0.0759281   8.021 1.73e-15 ***
## age         -0.0048301  0.0003833 -12.600  < 2e-16 ***
## sexMale      0.0602003  0.0077620   7.756 1.35e-14 ***
## weight       0.0043244  0.0002253  19.198  < 2e-16 ***
## height       0.0015153  0.0004334   3.496 0.000481 ***
## prior_fx    -0.0311566  0.0073569  -4.235 2.38e-05 ***
## smoking     -0.0290938  0.0054157  -5.372 8.64e-08 ***
## rheum        0.0246442  0.0130749   1.885 0.059587 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1167 on 2113 degrees of freedom
## Multiple R-squared:  0.4332, Adjusted R-squared:  0.4313 
## F-statistic: 230.7 on 7 and 2113 DF,  p-value: < 2.2e-16

5.3.2 Phương pháp BMA

library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-4)
xvars = fx.2[, c("age", "sex", "weight", "height", "prior_fx", "falls_n", "smoking", "parkinson", "rheum", "hypertension", "diabetes", "copd", "cancer", "cvd")]
m.bma = bicreg(xvars, fx.2$fnbmd, strict = FALSE, OR = 20)
summary(m.bma)
## 
## Call:
## bicreg(x = xvars, y = fx.2$fnbmd, strict = FALSE, OR = 20)
## 
## 
##   3  models were selected
##  Best  3  models (cumulative posterior probability =  1 ): 
## 
##               p!=0    EV        SD         model 1     model 2     model 3   
## Intercept     100.0   0.624291  0.0969317   6.062e-01   6.090e-01   8.477e-01
## age           100.0  -0.004836  0.0003879  -4.817e-03  -4.830e-03  -5.050e-03
## sexMale       100.0   0.061274  0.0088264   6.002e-02   6.020e-02   7.683e-02
## weight        100.0   0.004350  0.0002376   4.329e-03   4.324e-03   4.629e-03
## height         92.6   0.001417  0.0005777   1.531e-03   1.515e-03       .    
## prior_fx      100.0  -0.031084  0.0073646  -3.101e-02  -3.116e-02  -3.180e-02
## falls_n         0.0   0.000000  0.0000000       .           .           .    
## smoking       100.0  -0.029203  0.0054195  -2.921e-02  -2.909e-02  -2.929e-02
## parkinson       0.0   0.000000  0.0000000       .           .           .    
## rheum          10.7   0.002638  0.0087381       .       2.464e-02       .    
## hypertension    0.0   0.000000  0.0000000       .           .           .    
## diabetes        0.0   0.000000  0.0000000       .           .           .    
## copd            0.0   0.000000  0.0000000       .           .           .    
## cancer          0.0   0.000000  0.0000000       .           .           .    
## cvd             0.0   0.000000  0.0000000       .           .           .    
##                                                                              
## nVar                                          6           7           5      
## r2                                          0.432       0.433       0.429    
## BIC                                        -1.155e+03  -1.151e+03  -1.150e+03
## post prob                                   0.819       0.107       0.074
imageplot.bma(m.bma)

5.4 Kiểm tra giả định của mô hình

m.bmd = lm(fnbmd ~ age + sex + weight + height + prior_fx + smoking, data = fx.2)
summary(m.bmd)
## 
## Call:
## lm(formula = fnbmd ~ age + sex + weight + height + prior_fx + 
##     smoking, data = fx.2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37332 -0.07653 -0.00745  0.07113  0.58841 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6062028  0.0759596   7.981 2.37e-15 ***
## age         -0.0048171  0.0003835 -12.561  < 2e-16 ***
## sexMale      0.0600151  0.0077660   7.728 1.68e-14 ***
## weight       0.0043288  0.0002254  19.206  < 2e-16 ***
## height       0.0015312  0.0004335   3.532 0.000422 ***
## prior_fx    -0.0310094  0.0073609  -4.213 2.63e-05 ***
## smoking     -0.0292099  0.0054186  -5.391 7.80e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1168 on 2114 degrees of freedom
## Multiple R-squared:  0.4322, Adjusted R-squared:  0.4306 
## F-statistic: 268.2 on 6 and 2114 DF,  p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(m.bmd)

5.5 Viết công thức của mô hình tối ưu

summary(m.bmd)
## 
## Call:
## lm(formula = fnbmd ~ age + sex + weight + height + prior_fx + 
##     smoking, data = fx.2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37332 -0.07653 -0.00745  0.07113  0.58841 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6062028  0.0759596   7.981 2.37e-15 ***
## age         -0.0048171  0.0003835 -12.561  < 2e-16 ***
## sexMale      0.0600151  0.0077660   7.728 1.68e-14 ***
## weight       0.0043288  0.0002254  19.206  < 2e-16 ***
## height       0.0015312  0.0004335   3.532 0.000422 ***
## prior_fx    -0.0310094  0.0073609  -4.213 2.63e-05 ***
## smoking     -0.0292099  0.0054186  -5.391 7.80e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1168 on 2114 degrees of freedom
## Multiple R-squared:  0.4322, Adjusted R-squared:  0.4306 
## F-statistic: 268.2 on 6 and 2114 DF,  p-value: < 2.2e-16

Việc 6. Ghi lại tất cả các hàm/lệnh trên và chia sẻ lên tài khoản rpubs (https://rpubs.com/ThachTran/1214042)