Ngày 6: Hồi qui logistic

Việc 1. Đọc dữ liệu ‘Bone data’

n6 = read.csv("D:\\OneDrive\\Documents\\HPK1 2018\\NVPS2025\\Viet bao khoa hoc paper\\R\\DU LIEU THUC HANH\\Bone data.csv")
head(n6)
##   id    sex age weight height prior.fx fnbmd smoking fx
## 1  1   Male  73     98    175        0  1.08       1  0
## 2  2 Female  68     72    166        0  0.97       0  0
## 3  3   Male  68     87    184        0  1.01       0  0
## 4  4 Female  62     72    173        0  0.84       1  0
## 5  5   Male  61     72    173        0  0.81       1  0
## 6  6 Female  76     57    156        0  0.74       0  0

##Việc 2. Đánh giá mối liên quan giữa hút thuốc và nguy cơ gãy xương

m.1 = glm(fx ~ smoking, family = binomial, data = n6)
summary(m.1)
## 
## Call:
## glm(formula = fx ~ smoking, family = binomial, data = n6)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.03006    0.06442 -15.990   <2e-16 ***
## smoking     -0.13796    0.10081  -1.368    0.171    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2441.4  on 2161  degrees of freedom
## Residual deviance: 2439.5  on 2160  degrees of freedom
## AIC: 2443.5
## 
## Number of Fisher Scoring iterations: 4
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet

###Việc 3. Đánh giá mối liên quan độc lập giữa hút thuốc và nguy cơ gãy xương

library(compareGroups)
createTable(compareGroups(sex ~ smoking + fx, data = n6))
## 
## --------Summary descriptives table by 'sex'---------
## 
## _________________________________________ 
##           Female       Male     p.overall 
##           N=1317       N=845              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## smoking 0.30 (0.46) 0.62 (0.49)  <0.001   
## fx      0.30 (0.46) 0.17 (0.38)  <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

hiệu chỉnh nhiễu

 n6$gender = ifelse(n6$sex == "Male", 1, 0)
m.2 = glm(fx ~ smoking + gender, family = binomial, data = n6)
logistic.display(m.2)
## 
## Logistic regression predicting fx 
##  
##                  crude OR(95%CI)   adj. OR(95%CI)    P(Wald's test) P(LR-test)
## smoking: 1 vs 0  0.87 (0.71,1.06)  1.1 (0.89,1.36)   0.357          0.358     
##                                                                               
## gender: 1 vs 0   0.47 (0.38,0.58)  0.45 (0.36,0.57)  < 0.001        < 0.001   
##                                                                               
## Log-likelihood = -1194.7997
## No. of observations = 2162
## AIC value = 2395.5994

Việc 4. Đánh giá tầm quan trọng của biến số

library(caret)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:epiDisplay':
## 
##     alpha
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
## 
##     dotplot
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
m.rel = glm(fx ~ gender + age + fnbmd + weight, family = binomial, data = n6)
varImp(m.rel, scale = FALSE)
##          Overall
## gender 2.3022178
## age    2.5112611
## fnbmd  8.7193666
## weight 0.8239425

###Việc 5. Xây dựng mô hình dự báo nguy cơ gãy xương 5.1 Tìm mô hình tối ưu bằng pp BMA

library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-7)
n6.2 = na.omit(n6)
xvars = n6.2[, c("sex", "age", "weight", "height", "fnbmd", "smoking", "prior.fx")]
yvar = n6.2[, c("fx")]

m.bma = bic.glm(xvars, yvar, strict = FALSE, OR = 20, glm.family = "binomial")
summary(m.bma)
## 
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = "binomial",     strict = FALSE, OR = 20)
## 
## 
##   4  models were selected
##  Best  4  models (cumulative posterior probability =  1 ): 
## 
##            p!=0    EV        SD        model 1     model 2     model 3   
## Intercept  100     2.249962  0.674258   2.509e+00   2.340e+00   1.140e+00
## sex         18.2  -0.048048  0.114551       .      -2.578e-01       .    
## age         16.2   0.002663  0.006838       .           .       1.602e-02
## weight       0.0   0.000000  0.000000       .           .           .    
## height       0.0   0.000000  0.000000       .           .           .    
## fnbmd      100.0  -4.487636  0.436674  -4.595e+00  -4.279e+00  -4.302e+00
## smoking      0.0   0.000000  0.000000       .           .           .    
## prior.fx   100.0   0.530729  0.134289   5.306e-01   5.445e-01   5.156e-01
##                                                                          
## nVar                                      2           3           3      
## BIC                                    -1.402e+04  -1.402e+04  -1.402e+04
## post prob                               0.696       0.142       0.122    
##            model 4   
## Intercept   7.978e-01
## sex        -2.851e-01
## age         1.780e-02
## weight          .    
## height          .    
## fnbmd      -3.918e+00
## smoking         .    
## prior.fx    5.296e-01
##                      
## nVar          4      
## BIC        -1.402e+04
## post prob   0.040
##hiện biểu đồ
imageplot.bma(m.bma)

###5.2 Mô hình tối ưu

imageplot.bma(m.bma)

n6.2$fnbmd.sd = n6.2$fnbmd/0.15
m.3 = glm(fx ~ fnbmd.sd, family = binomial, data = n6.2)
logistic.display(m.3)
## 
## Logistic regression predicting fx 
##  
##                       OR(95%CI)         P(Wald's test) P(LR-test)
## fnbmd.sd (cont. var.) 0.49 (0.43,0.54)  < 0.001        < 0.001   
##                                                                  
## Log-likelihood = -1106.8757
## No. of observations = 2121
## AIC value = 2217.7514

Kết thúc