library(readxl)
fx = as.data.frame(read_excel("C:\\Thach\\VN trips\\2024_2Aug\\Data Analysis workshop (Hospital 108)\\Datasets\\Osteo-data.xlsx"))
dim(fx)
## [1] 2216 17
summary(fx)
## id sex age weight
## Min. : 1.0 Length:2216 Min. :57.00 Min. : 34.00
## 1st Qu.: 554.8 Class :character 1st Qu.:65.00 1st Qu.: 60.00
## Median :1108.5 Mode :character Median :70.00 Median : 69.00
## Mean :1108.5 Mean :70.89 Mean : 70.14
## 3rd Qu.:1662.2 3rd Qu.:76.00 3rd Qu.: 79.00
## Max. :2216.0 Max. :96.00 Max. :133.00
## NA's :53
## height prior_fx fnbmd smoking
## Min. :136.0 Min. :0.0000 Min. :0.2800 Min. :0.0000
## 1st Qu.:158.0 1st Qu.:0.0000 1st Qu.:0.7300 1st Qu.:0.0000
## Median :164.0 Median :0.0000 Median :0.8200 Median :0.0000
## Mean :164.9 Mean :0.1611 Mean :0.8287 Mean :0.4176
## 3rd Qu.:171.0 3rd Qu.:0.0000 3rd Qu.:0.9300 3rd Qu.:1.0000
## Max. :196.0 Max. :1.0000 Max. :1.5100 Max. :1.0000
## NA's :54 NA's :89 NA's :1
## parkinson rheum hypertension diabetes
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.00000 Median :0.00000 Median :1.0000 Median :0.000
## Mean :0.06498 Mean :0.03881 Mean :0.5063 Mean :0.111
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.000
##
## copd cancer cvd falls_n
## Min. :0.000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.111 Mean :0.08529 Mean :0.3872 Mean :0.2843
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.00000 Max. :1.0000 Max. :2.0000
##
## fx
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2595
## 3rd Qu.:1.0000
## Max. :1.0000
##
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
fx = fx %>% mutate(
prior_fx = as.factor(prior_fx),
falls_n = as.factor(falls_n),
smoking = as.factor(smoking),
parkinson = as.factor(parkinson),
rheum = as.factor(rheum),
hypertension = as.factor(hypertension),
diabetes = as.factor(diabetes),
copd = as.factor(copd),
cancer = as.factor(cancer),
cvd = as.factor(cvd),
fx = as.factor(fx)
)
library(table1)
##
## Attaching package: 'table1'
##
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ sex + age + weight + height + fnbmd + prior_fx + falls_n + smoking + parkinson + rheum + hypertension + diabetes + copd + cancer + cvd | fx, data = fx)
0 (N=1641) |
1 (N=575) |
Overall (N=2216) |
|
---|---|---|---|
sex | |||
Female | 932 (56.8%) | 426 (74.1%) | 1358 (61.3%) |
Male | 709 (43.2%) | 149 (25.9%) | 858 (38.7%) |
age | |||
Mean (SD) | 70.2 (6.89) | 72.9 (7.58) | 70.9 (7.17) |
Median [Min, Max] | 69.0 [57.0, 94.0] | 72.0 [60.0, 96.0] | 70.0 [57.0, 96.0] |
weight | |||
Mean (SD) | 71.5 (14.2) | 66.0 (13.3) | 70.1 (14.2) |
Median [Min, Max] | 70.0 [36.0, 133] | 65.0 [34.0, 102] | 69.0 [34.0, 133] |
Missing | 23 (1.4%) | 30 (5.2%) | 53 (2.4%) |
height | |||
Mean (SD) | 166 (9.40) | 162 (8.74) | 165 (9.35) |
Median [Min, Max] | 165 [139, 192] | 162 [136, 196] | 164 [136, 196] |
Missing | 24 (1.5%) | 30 (5.2%) | 54 (2.4%) |
fnbmd | |||
Mean (SD) | 0.854 (0.151) | 0.754 (0.140) | 0.829 (0.155) |
Median [Min, Max] | 0.850 [0.380, 1.51] | 0.750 [0.280, 1.25] | 0.820 [0.280, 1.51] |
Missing | 51 (3.1%) | 38 (6.6%) | 89 (4.0%) |
prior_fx | |||
0 | 1419 (86.5%) | 440 (76.5%) | 1859 (83.9%) |
1 | 222 (13.5%) | 135 (23.5%) | 357 (16.1%) |
falls_n | |||
0 | 1275 (77.7%) | 459 (79.8%) | 1734 (78.2%) |
1 | 252 (15.4%) | 82 (14.3%) | 334 (15.1%) |
2 | 114 (6.9%) | 34 (5.9%) | 148 (6.7%) |
smoking | |||
0 | 935 (57.0%) | 355 (61.7%) | 1290 (58.2%) |
1 | 705 (43.0%) | 220 (38.3%) | 925 (41.7%) |
Missing | 1 (0.1%) | 0 (0%) | 1 (0.0%) |
parkinson | |||
0 | 1537 (93.7%) | 535 (93.0%) | 2072 (93.5%) |
1 | 104 (6.3%) | 40 (7.0%) | 144 (6.5%) |
rheum | |||
0 | 1576 (96.0%) | 554 (96.3%) | 2130 (96.1%) |
1 | 65 (4.0%) | 21 (3.7%) | 86 (3.9%) |
hypertension | |||
0 | 805 (49.1%) | 289 (50.3%) | 1094 (49.4%) |
1 | 836 (50.9%) | 286 (49.7%) | 1122 (50.6%) |
diabetes | |||
0 | 1456 (88.7%) | 514 (89.4%) | 1970 (88.9%) |
1 | 185 (11.3%) | 61 (10.6%) | 246 (11.1%) |
copd | |||
0 | 1452 (88.5%) | 518 (90.1%) | 1970 (88.9%) |
1 | 189 (11.5%) | 57 (9.9%) | 246 (11.1%) |
cancer | |||
0 | 1503 (91.6%) | 524 (91.1%) | 2027 (91.5%) |
1 | 138 (8.4%) | 51 (8.9%) | 189 (8.5%) |
cvd | |||
0 | 993 (60.5%) | 365 (63.5%) | 1358 (61.3%) |
1 | 648 (39.5%) | 210 (36.5%) | 858 (38.7%) |
p = ggplot(data = fx, aes(x = fx, y = fnbmd, fill = fx, col = fx))
p1 = p + geom_boxplot(col = "black") + geom_jitter(alpha = 0.05) + labs(x = "Gãy xương", y = "MĐX cổ xương đùi (g/cm2)") + ggtitle("Liên quan giữa MĐX và khả năng bị gãy xương")
p1
## Warning: Removed 89 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 89 rows containing missing values (`geom_point()`).
m.bmd = glm(fx ~ fnbmd, family = "binomial", data = fx)
summary(m.bmd)
##
## Call:
## glm(formula = fx ~ fnbmd, family = "binomial", data = fx)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.7609 0.3055 9.036 <2e-16 ***
## fnbmd -4.7917 0.3856 -12.427 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2403.6 on 2126 degrees of freedom
## Residual deviance: 2222.8 on 2125 degrees of freedom
## (89 observations deleted due to missingness)
## AIC: 2226.8
##
## Number of Fisher Scoring iterations: 4
library(epiDisplay)
## Warning: package 'epiDisplay' was built under R version 4.3.2
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following object is masked from 'package:ggplot2':
##
## alpha
logistic.display(m.bmd)
##
## Logistic regression predicting fx : 1 vs 0
##
## OR(95%CI) P(Wald's test) P(LR-test)
## fnbmd (cont. var.) 0.01 (0,0.02) < 0.001 < 0.001
##
## Log-likelihood = -1111.4092
## No. of observations = 2127
## AIC value = 2226.8184
library(Publish)
## Loading required package: prodlim
publish(m.bmd)
## Variable Units OddsRatio CI.95 p-value
## fnbmd 0.01 [0.00;0.02] < 1e-04
Nhận xét: gia tăng mỗi 1 g/cm2 không thực tế
fx$fnbmd.sd = fx$fnbmd/(-0.155)
m.bmd.sd = glm(fx ~ fnbmd.sd, family = "binomial", data = fx)
summary(m.bmd.sd)
##
## Call:
## glm(formula = fx ~ fnbmd.sd, family = "binomial", data = fx)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.76088 0.30554 9.036 <2e-16 ***
## fnbmd.sd 0.74271 0.05976 12.427 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2403.6 on 2126 degrees of freedom
## Residual deviance: 2222.8 on 2125 degrees of freedom
## (89 observations deleted due to missingness)
## AIC: 2226.8
##
## Number of Fisher Scoring iterations: 4
publish(m.bmd.sd)
## Variable Units OddsRatio CI.95 p-value
## fnbmd.sd 2.10 [1.87;2.36] < 1e-04
m.sex = glm(fx ~ sex, family = "binomial", data = fx)
summary(m.sex)
##
## Call:
## glm(formula = fx ~ sex, family = "binomial", data = fx)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.78289 0.05848 -13.386 < 2e-16 ***
## sexMale -0.77702 0.10743 -7.232 4.74e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2537.4 on 2215 degrees of freedom
## Residual deviance: 2481.6 on 2214 degrees of freedom
## AIC: 2485.6
##
## Number of Fisher Scoring iterations: 4
publish(m.sex)
## Variable Units OddsRatio CI.95 p-value
## sex Female Ref
## Male 0.46 [0.37;0.57] <1e-04
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
vars = fx[, c("fnbmd", "sex", "fx")]
ggpairs(data = vars, mapping = aes(color = fx))
## Warning: Removed 89 rows containing non-finite values (`stat_density()`).
## Warning: Removed 89 rows containing non-finite values (`stat_boxplot()`).
## Removed 89 rows containing non-finite values (`stat_boxplot()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 89 rows containing non-finite values (`stat_bin()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 89 rows containing non-finite values (`stat_bin()`).
m.multi = glm(fx ~ fnbmd.sd + sex, family = "binomial", data = fx)
summary(m.multi)
##
## Call:
## glm(formula = fx ~ fnbmd.sd + sex, family = "binomial", data = fx)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.62064 0.31432 8.337 <2e-16 ***
## fnbmd.sd 0.70112 0.06367 11.012 <2e-16 ***
## sexMale -0.22252 0.12112 -1.837 0.0662 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2403.6 on 2126 degrees of freedom
## Residual deviance: 2219.4 on 2124 degrees of freedom
## (89 observations deleted due to missingness)
## AIC: 2225.4
##
## Number of Fisher Scoring iterations: 4
publish(m.multi)
## Variable Units OddsRatio CI.95 p-value
## fnbmd.sd 2.02 [1.78;2.28] < 1e-04
## sex Female Ref
## Male 0.80 [0.63;1.01] 0.06618
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-4)
fx.2 = na.omit(fx)
bma.fx = bic.glm(fx ~ sex + age + weight + height + fnbmd + prior_fx + falls_n + smoking + parkinson + rheum + hypertension + diabetes + copd + cancer + cvd, data = fx.2, strict = F, OR = 20, glm.family = "binomial")
summary(bma.fx)
##
## Call:
## bic.glm.formula(f = fx ~ sex + age + weight + height + fnbmd + prior_fx + falls_n + smoking + parkinson + rheum + hypertension + diabetes + copd + cancer + cvd, data = fx.2, glm.family = "binomial", strict = F, OR = 20)
##
##
## 4 models were selected
## Best 4 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2 model 3
## Intercept 100 2.249962 0.674258 2.509e+00 2.340e+00 1.140e+00
## sexMale 18.2 -0.048048 0.114551 . -2.578e-01 .
## age 16.2 0.002663 0.006838 . . 1.602e-02
## weight 0.0 0.000000 0.000000 . . .
## height 0.0 0.000000 0.000000 . . .
## fnbmd 100.0 -4.487636 0.436674 -4.595e+00 -4.279e+00 -4.302e+00
## prior_fx 100.0
## .1 0.530729 0.134289 5.306e-01 5.445e-01 5.156e-01
## falls_n 0.0
## .1 0.000000 0.000000 . . .
## .2 0.000000 0.000000 . . .
## smoking 0.0
## .1 0.000000 0.000000 . . .
## parkinson 0.0
## .1 0.000000 0.000000 . . .
## rheum 0.0
## .1 0.000000 0.000000 . . .
## hypertension 0.0
## .1 0.000000 0.000000 . . .
## diabetes 0.0
## .1 0.000000 0.000000 . . .
## copd 0.0
## .1 0.000000 0.000000 . . .
## cancer 0.0
## .1 0.000000 0.000000 . . .
## cvd 0.0
## .1 0.000000 0.000000 . . .
##
## nVar 2 3 3
## BIC -1.402e+04 -1.402e+04 -1.402e+04
## post prob 0.696 0.142 0.122
## model 4
## Intercept 7.978e-01
## sexMale -2.851e-01
## age 1.780e-02
## weight .
## height .
## fnbmd -3.918e+00
## prior_fx
## .1 5.296e-01
## falls_n
## .1 .
## .2 .
## smoking
## .1 .
## parkinson
## .1 .
## rheum
## .1 .
## hypertension
## .1 .
## diabetes
## .1 .
## copd
## .1 .
## cancer
## .1 .
## cvd
## .1 .
##
## nVar 4
## BIC -1.402e+04
## post prob 0.040
##
## 1 observations deleted due to missingness.
imageplot.bma(bma.fx)
### 6.2 Trình bày kết quả
m.final = glm(fx ~ fnbmd.sd + prior_fx, family = "binomial", data = fx)
summary(m.final)
##
## Call:
## glm(formula = fx ~ fnbmd.sd + prior_fx, family = "binomial",
## data = fx)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.49096 0.31301 7.958 1.75e-15 ***
## fnbmd.sd 0.70829 0.06033 11.740 < 2e-16 ***
## prior_fx1 0.52983 0.13386 3.958 7.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2403.6 on 2126 degrees of freedom
## Residual deviance: 2207.6 on 2124 degrees of freedom
## (89 observations deleted due to missingness)
## AIC: 2213.6
##
## Number of Fisher Scoring iterations: 4
publish(m.final)
## Variable Units OddsRatio CI.95 p-value
## fnbmd.sd 2.03 [1.80;2.29] <1e-04
## prior_fx 0 Ref
## 1 1.70 [1.31;2.21] <1e-04