Việc 1. Đọc dữ liệu ‘Bone data’
n6 = read.csv("D:\\OneDrive\\Documents\\HPK1 2018\\NVPS2025\\Viet bao khoa hoc paper\\R\\DU LIEU THUC HANH\\Bone data.csv")
head(n6)
## id sex age weight height prior.fx fnbmd smoking fx
## 1 1 Male 73 98 175 0 1.08 1 0
## 2 2 Female 68 72 166 0 0.97 0 0
## 3 3 Male 68 87 184 0 1.01 0 0
## 4 4 Female 62 72 173 0 0.84 1 0
## 5 5 Male 61 72 173 0 0.81 1 0
## 6 6 Female 76 57 156 0 0.74 0 0
##Việc 2. Đánh giá mối liên quan giữa hút thuốc và nguy cơ gãy xương
m.1 = glm(fx ~ smoking, family = binomial, data = n6)
summary(m.1)
##
## Call:
## glm(formula = fx ~ smoking, family = binomial, data = n6)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.03006 0.06442 -15.990 <2e-16 ***
## smoking -0.13796 0.10081 -1.368 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2439.5 on 2160 degrees of freedom
## AIC: 2443.5
##
## Number of Fisher Scoring iterations: 4
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
###Việc 3. Đánh giá mối liên quan độc lập giữa hút thuốc và nguy cơ gãy xương
library(compareGroups)
createTable(compareGroups(sex ~ smoking + fx, data = n6))
##
## --------Summary descriptives table by 'sex'---------
##
## _________________________________________
## Female Male p.overall
## N=1317 N=845
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## smoking 0.30 (0.46) 0.62 (0.49) <0.001
## fx 0.30 (0.46) 0.17 (0.38) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
n6$gender = ifelse(n6$sex == "Male", 1, 0)
m.2 = glm(fx ~ smoking + gender, family = binomial, data = n6)
logistic.display(m.2)
##
## Logistic regression predicting fx
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test) P(LR-test)
## smoking: 1 vs 0 0.87 (0.71,1.06) 1.1 (0.89,1.36) 0.357 0.358
##
## gender: 1 vs 0 0.47 (0.38,0.58) 0.45 (0.36,0.57) < 0.001 < 0.001
##
## Log-likelihood = -1194.7997
## No. of observations = 2162
## AIC value = 2395.5994
library(caret)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:epiDisplay':
##
## alpha
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
##
## dotplot
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
m.rel = glm(fx ~ gender + age + fnbmd + weight, family = binomial, data = n6)
varImp(m.rel, scale = FALSE)
## Overall
## gender 2.3022178
## age 2.5112611
## fnbmd 8.7193666
## weight 0.8239425
###Việc 5. Xây dựng mô hình dự báo nguy cơ gãy xương 5.1 Tìm mô hình tối ưu bằng pp BMA
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-7)
n6.2 = na.omit(n6)
xvars = n6.2[, c("sex", "age", "weight", "height", "fnbmd", "smoking", "prior.fx")]
yvar = n6.2[, c("fx")]
m.bma = bic.glm(xvars, yvar, strict = FALSE, OR = 20, glm.family = "binomial")
summary(m.bma)
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = "binomial", strict = FALSE, OR = 20)
##
##
## 4 models were selected
## Best 4 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 2.249962 0.674258 2.509e+00 2.340e+00 1.140e+00
## sex 18.2 -0.048048 0.114551 . -2.578e-01 .
## age 16.2 0.002663 0.006838 . . 1.602e-02
## weight 0.0 0.000000 0.000000 . . .
## height 0.0 0.000000 0.000000 . . .
## fnbmd 100.0 -4.487636 0.436674 -4.595e+00 -4.279e+00 -4.302e+00
## smoking 0.0 0.000000 0.000000 . . .
## prior.fx 100.0 0.530729 0.134289 5.306e-01 5.445e-01 5.156e-01
##
## nVar 2 3 3
## BIC -1.402e+04 -1.402e+04 -1.402e+04
## post prob 0.696 0.142 0.122
## model 4
## Intercept 7.978e-01
## sex -2.851e-01
## age 1.780e-02
## weight .
## height .
## fnbmd -3.918e+00
## smoking .
## prior.fx 5.296e-01
##
## nVar 4
## BIC -1.402e+04
## post prob 0.040
##hiện biểu đồ
imageplot.bma(m.bma)
###5.2 Mô hình tối ưu
imageplot.bma(m.bma)
n6.2$fnbmd.sd = n6.2$fnbmd/0.15
m.3 = glm(fx ~ fnbmd.sd, family = binomial, data = n6.2)
logistic.display(m.3)
##
## Logistic regression predicting fx
##
## OR(95%CI) P(Wald's test) P(LR-test)
## fnbmd.sd (cont. var.) 0.49 (0.43,0.54) < 0.001 < 0.001
##
## Log-likelihood = -1106.8757
## No. of observations = 2121
## AIC value = 2217.7514