##Việc 1. Đọc dữ liệu ‘Bone data’:file.choose() “D:\R\DU LIEU THUC HANH TS ThACH GUI\Bone data.csv”
os = read.csv("D:\\R\\DU LIEU THUC HANH TS ThACH GUI\\Bone data.csv")
head(os)
## id sex age weight height prior.fx fnbmd smoking fx
## 1 1 Male 73 98 175 0 1.08 1 0
## 2 2 Female 68 72 166 0 0.97 0 0
## 3 3 Male 68 87 184 0 1.01 0 0
## 4 4 Female 62 72 173 0 0.84 1 0
## 5 5 Male 61 72 173 0 0.81 1 0
## 6 6 Female 76 57 156 0 0.74 0 0
##Việc 2. Đánh giá mối liên quan giữa hút thuốc và nguy cơ gãy xương
m.1 = glm(fx ~ smoking, family = binomial, data = os)
summary(m.1)
##
## Call:
## glm(formula = fx ~ smoking, family = binomial, data = os)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.03006 0.06442 -15.990 <2e-16 ***
## smoking -0.13796 0.10081 -1.368 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2439.5 on 2160 degrees of freedom
## AIC: 2443.5
##
## Number of Fisher Scoring iterations: 4
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
logistic.display(m.1)
##
## Logistic regression predicting fx
##
## OR(95%CI) P(Wald's test) P(LR-test)
## smoking: 1 vs 0 0.87 (0.71,1.06) 0.171 0.17
##
## Log-likelihood = -1219.7469
## No. of observations = 2162
## AIC value = 2443.4938
###GPT
logistic_model <- glm(fx ~ smoking, data = os, family = binomial)
summary(logistic_model)
##
## Call:
## glm(formula = fx ~ smoking, family = binomial, data = os)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.03006 0.06442 -15.990 <2e-16 ***
## smoking -0.13796 0.10081 -1.368 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2439.5 on 2160 degrees of freedom
## AIC: 2443.5
##
## Number of Fisher Scoring iterations: 4
##Việc 3. Đánh giá mối liên quan độc lập giữa hút thuốc và nguy cơ gãy xương. Giới tính là yếu tố gây nhiễu
library(compareGroups)
createTable(compareGroups(sex ~ smoking + fx, data = os))
##
## --------Summary descriptives table by 'sex'---------
##
## _________________________________________
## Female Male p.overall
## N=1317 N=845
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## smoking 0.30 (0.46) 0.62 (0.49) <0.001
## fx 0.30 (0.46) 0.17 (0.38) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
###[ChatGPT] #PROMPT: “Y văn ghi nhận giới tính (sex) là yếu tố gây nhiễu. Bạn thực hiện mô hinh hồi qui logistic đa biến đánh giá mối liên quan độc lập giữa hút thuốc và nguy cơ gãy xương”
logistic_model_multivariate <- glm(fx ~ smoking + sex, data = os, family = binomial)
summary(logistic_model_multivariate)
##
## Call:
## glm(formula = fx ~ smoking + sex, family = binomial, data = os)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.85598 0.06835 -12.523 < 2e-16 ***
## smoking 0.09872 0.10724 0.921 0.357
## sexMale -0.78880 0.11493 -6.863 6.73e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2441.4 on 2161 degrees of freedom
## Residual deviance: 2389.6 on 2159 degrees of freedom
## AIC: 2395.6
##
## Number of Fisher Scoring iterations: 4
##Việc 4. Đánh giá tầm quan trọng của biến số
library(caret)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:epiDisplay':
##
## alpha
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
##
## dotplot
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
m.rel <- glm(fx ~ sex + age + fnbmd + weight, family = binomial, data = os)
varImp(m.rel, scale = FALSE)
## Overall
## sexMale 2.3022178
## age 2.5112611
## fnbmd 8.7193666
## weight 0.8239425
###GPT- Làm tiếp sau ##Việc 5. Xây dựng mô hình dự báo nguy cơ gãy xương ###5.1 Tìm mô hình tối ưu bằng pp BMA
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-7)
os.2 = na.omit(os)
xvars = os.2[, c("sex", "age", "weight", "height", "fnbmd", "smoking", "prior.fx")]
yvar = os.2[, c("fx")]
m.bma = bic.glm(xvars, yvar, strict = FALSE, OR = 20, glm.family = "binomial")
summary(m.bma)
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = "binomial", strict = FALSE, OR = 20)
##
##
## 4 models were selected
## Best 4 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 2.249962 0.674258 2.509e+00 2.340e+00 1.140e+00
## sex 18.2 -0.048048 0.114551 . -2.578e-01 .
## age 16.2 0.002663 0.006838 . . 1.602e-02
## weight 0.0 0.000000 0.000000 . . .
## height 0.0 0.000000 0.000000 . . .
## fnbmd 100.0 -4.487636 0.436674 -4.595e+00 -4.279e+00 -4.302e+00
## smoking 0.0 0.000000 0.000000 . . .
## prior.fx 100.0 0.530729 0.134289 5.306e-01 5.445e-01 5.156e-01
##
## nVar 2 3 3
## BIC -1.402e+04 -1.402e+04 -1.402e+04
## post prob 0.696 0.142 0.122
## model 4
## Intercept 7.978e-01
## sex -2.851e-01
## age 1.780e-02
## weight .
## height .
## fnbmd -3.918e+00
## smoking .
## prior.fx 5.296e-01
##
## nVar 4
## BIC -1.402e+04
## post prob 0.040
imageplot.bma(m.bma)
###5.2 Mô hình tối ưu
imageplot.bma(m.bma)
os.2$fnbmd.sd = os.2$fnbmd/0.15
m.3 = glm(fx ~ fnbmd.sd, family = binomial, data = os.2)
logistic.display(m.3)
##
## Logistic regression predicting fx
##
## OR(95%CI) P(Wald's test) P(LR-test)
## fnbmd.sd (cont. var.) 0.49 (0.43,0.54) < 0.001 < 0.001
##
## Log-likelihood = -1106.8757
## No. of observations = 2121
## AIC value = 2217.7514
####GPT:PROMPT: “Xây dựng mô hình tối ưu dự báo gãy xương từ những biến số như tuổi (age), giới tính (sex), cân nặng (weight), chiều cao (height), mật độ xương (fnbmd), hút thuốc là (smoking) và tiền căn gãy xương (prior.fx). Dùng phương pháp Bayesian Model Averaging từ gói lệnh BMA”
logistic_bma_model <- bic.glm(fx ~ age + sex + weight + height + fnbmd + smoking + prior.fx,data = os, glm.family = "binomial")
summary(logistic_bma_model)
##
## Call:
## bic.glm.formula(f = fx ~ age + sex + weight + height + fnbmd + smoking + prior.fx, data = os, glm.family = "binomial")
##
##
## 4 models were selected
## Best 4 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 2.249962 0.674258 2.509e+00 2.340e+00 1.140e+00
## age 16.2 0.002663 0.006838 . . 1.602e-02
## sexMale 18.2 -0.048048 0.114551 . -2.578e-01 .
## weight 0.0 0.000000 0.000000 . . .
## height 0.0 0.000000 0.000000 . . .
## fnbmd 100.0 -4.487636 0.436674 -4.595e+00 -4.279e+00 -4.302e+00
## smoking 0.0 0.000000 0.000000 . . .
## prior.fx 100.0 0.530729 0.134289 5.306e-01 5.445e-01 5.156e-01
##
## nVar 2 3 3
## BIC -1.402e+04 -1.402e+04 -1.402e+04
## post prob 0.696 0.142 0.122
## model 4
## Intercept 7.978e-01
## age 1.780e-02
## sexMale -2.851e-01
## weight .
## height .
## fnbmd -3.918e+00
## smoking .
## prior.fx 5.296e-01
##
## nVar 4
## BIC -1.402e+04
## post prob 0.040
##
## 41 observations deleted due to missingness.
##Việc 6. Ghi lại tất cả các hàm/lệnh trên và chia sẻ lên tài khoản rpubs https://rpubs.com/PHUC/1310628