dat = read.csv("C:/Users/Momo/Desktop/anackd.csv")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
dat1 = dat %>% select(age, sex, sbp, dbp, pulse, tc, hdl, nonhdlc, tg, bs,
rbc, hg, hct, tob, alc, dx_ht, dx_af, dx_ckd, dx_mi, dx_st,
pyst, exer, kaidan, stress, BMI, BMIg, nonhdlc, diabetes,
hypertension, metSg, eGFR, eGFRg, uprg, fast, dx_dm)
dat1$sex = as.factor(dat1$sex) # Sex: 1 is male, 2 is female
dat1$BMIg = as.factor(dat1$BMIg)
dat1$eGFRg = as.factor(dat1$eGFRg)
dat1$uprg = as.factor(dat1$uprg) # Protein in urine: 0 is no protein in urine, 1 is trace, 2 is having protein in urine
dat1$tob = as.factor(dat1$tob) # Smoke: 1 is current, 2 is quit, 3 is never smoke
dat1$alc = as.factor(dat1$alc)
dat1$exer = as.factor(dat1$exer)
dat1$kaidan = as.factor(dat1$kaidan)
dat1$dx_af = as.factor(dat1$dx_af)
dat1$dx_ht = as.factor(dat1$dx_ht)
dat1$dx_ckd = as.factor(dat1$dx_ckd)
dat1$dx_st = as.factor(dat1$dx_st)
dat1$hypertension = as.factor(dat1$hypertension)
dat1$diabetes = as.factor(dat1$diabetes)
dat1$stress = as.factor(dat1$stress)
dat1$metSg = as.factor(dat1$metSg)
# Create new variable: atrial fibrillation
dat1$af[dat1$dx_af == 0] = 0
dat1$af[dat1$dx_af == 1] = 1
dat1$af[dat1$dx_af == 2] = 1
# For stress
dat1$stressnew[dat1$stress == 1] = "No"
dat1$stressnew[dat1$stress == 2] = "Yes"
dat1$stressnew[dat1$stress == 9] = "Unknown"
dat1$stressnew = as.factor(dat1$stressnew)
dat1$stressnew = relevel(dat1$stressnew, ref = "No")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ age + sex + eGFR + eGFRg + uprg + dx_ckd + BMI + BMIg + tob + alc +
sbp + dbp + pulse + hypertension + dx_ht + bs + diabetes +
tc + hdl + nonhdlc + tg + metSg + hg + hct + rbc + dx_af +
dx_ckd + dx_st + pyst, data = dat1 )
| Overall (N=7342) |
|
|---|---|
| age | |
| Mean (SD) | 55.1 (13.1) |
| Median [Min, Max] | 56.0 [30.0, 84.0] |
| sex | |
| 1 | 3357 (45.7%) |
| 2 | 3985 (54.3%) |
| eGFR | |
| Mean (SD) | 77.9 (14.6) |
| Median [Min, Max] | 79.0 [7.92, 179] |
| eGFRg | |
| 0 | 149 (2.0%) |
| 1 | 754 (10.3%) |
| 2 | 6439 (87.7%) |
| uprg | |
| 0 | 6025 (82.1%) |
| 1 | 902 (12.3%) |
| 2 | 415 (5.7%) |
| dx_ckd | |
| 0 | 7323 (99.7%) |
| 1 | 8 (0.1%) |
| 2 | 11 (0.1%) |
| BMI | |
| Mean (SD) | 22.5 (3.12) |
| Median [Min, Max] | 22.3 [13.1, 40.5] |
| Missing | 2 (0.0%) |
| BMIg | |
| Normal | 5282 (71.9%) |
| Obesity | 112 (1.5%) |
| Overweight | 1333 (18.2%) |
| Underweight | 615 (8.4%) |
| tob | |
| 1 | 2124 (28.9%) |
| 2 | 1153 (15.7%) |
| 3 | 3941 (53.7%) |
| Missing | 124 (1.7%) |
| alc | |
| 1 | 3776 (51.4%) |
| 2 | 168 (2.3%) |
| 3 | 3283 (44.7%) |
| Missing | 115 (1.6%) |
| sbp | |
| Mean (SD) | 126 (21.5) |
| Median [Min, Max] | 124 [78.0, 233] |
| dbp | |
| Mean (SD) | 77.6 (12.2) |
| Median [Min, Max] | 77.0 [0, 137] |
| Missing | 1 (0.0%) |
| pulse | |
| Mean (SD) | 69.5 (9.48) |
| Median [Min, Max] | 68.0 [38.0, 128] |
| Missing | 18 (0.2%) |
| hypertension | |
| 0 | 5072 (69.1%) |
| 1 | 2270 (30.9%) |
| dx_ht | |
| 0 | 5970 (81.3%) |
| 1 | 547 (7.5%) |
| 2 | 825 (11.2%) |
| bs | |
| Mean (SD) | 98.3 (18.6) |
| Median [Min, Max] | 95.0 [51.0, 439] |
| diabetes | |
| 0 | 6850 (93.3%) |
| 1 | 492 (6.7%) |
| tc | |
| Mean (SD) | 207 (36.1) |
| Median [Min, Max] | 206 [75.0, 483] |
| hdl | |
| Mean (SD) | 54.7 (14.3) |
| Median [Min, Max] | 53.0 [10.0, 132] |
| Missing | 28 (0.4%) |
| nonhdlc | |
| Mean (SD) | 153 (36.9) |
| Median [Min, Max] | 150 [22.0, 403] |
| Missing | 28 (0.4%) |
| tg | |
| Mean (SD) | 122 (90.5) |
| Median [Min, Max] | 99.0 [15.0, 1540] |
| metSg | |
| 0 | 5756 (78.4%) |
| 1 | 1586 (21.6%) |
| hg | |
| Mean (SD) | 13.9 (1.55) |
| Median [Min, Max] | 13.9 [5.70, 20.6] |
| Missing | 3 (0.0%) |
| hct | |
| Mean (SD) | 41.8 (4.25) |
| Median [Min, Max] | 41.7 [12.4, 61.8] |
| Missing | 3 (0.0%) |
| rbc | |
| Mean (SD) | 4.53 (0.434) |
| Median [Min, Max] | 4.51 [2.61, 7.29] |
| Missing | 4 (0.1%) |
| dx_af | |
| 0 | 7312 (99.6%) |
| 1 | 17 (0.2%) |
| 2 | 13 (0.2%) |
| dx_st | |
| 0 | 6910 (94.1%) |
| 1 | 432 (5.9%) |
| pyst | |
| Mean (SD) | 15.1 (7.06) |
| Median [Min, Max] | 16.7 [0.0862, 24.3] |
table1(~ age + factor(sex) + eGFR + eGFRg + uprg + dx_ckd + BMI + BMIg + tob + alc +
sbp + dbp + pulse + hypertension + dx_ht + bs + diabetes +
tc + hdl + nonhdlc + tg + metSg + hg + hct + rbc + dx_af +
dx_ckd + pyst + stress|dx_st, data = dat1 )
| 0 (N=6910) |
1 (N=432) |
Overall (N=7342) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 54.5 (13.1) | 64.3 (9.73) | 55.1 (13.1) |
| Median [Min, Max] | 55.0 [30.0, 84.0] | 66.0 [32.0, 80.0] | 56.0 [30.0, 84.0] |
| factor(sex) | |||
| 1 | 3128 (45.3%) | 229 (53.0%) | 3357 (45.7%) |
| 2 | 3782 (54.7%) | 203 (47.0%) | 3985 (54.3%) |
| eGFR | |||
| Mean (SD) | 78.4 (14.5) | 70.5 (13.6) | 77.9 (14.6) |
| Median [Min, Max] | 79.6 [7.92, 179] | 72.7 [27.7, 105] | 79.0 [7.92, 179] |
| eGFRg | |||
| 0 | 130 (1.9%) | 19 (4.4%) | 149 (2.0%) |
| 1 | 676 (9.8%) | 78 (18.1%) | 754 (10.3%) |
| 2 | 6104 (88.3%) | 335 (77.5%) | 6439 (87.7%) |
| uprg | |||
| 0 | 5712 (82.7%) | 313 (72.5%) | 6025 (82.1%) |
| 1 | 828 (12.0%) | 74 (17.1%) | 902 (12.3%) |
| 2 | 370 (5.4%) | 45 (10.4%) | 415 (5.7%) |
| dx_ckd | |||
| 0 | 6892 (99.7%) | 431 (99.8%) | 7323 (99.7%) |
| 1 | 8 (0.1%) | 0 (0%) | 8 (0.1%) |
| 2 | 10 (0.1%) | 1 (0.2%) | 11 (0.1%) |
| BMI | |||
| Mean (SD) | 22.5 (3.10) | 23.2 (3.33) | 22.5 (3.12) |
| Median [Min, Max] | 22.3 [13.1, 40.5] | 23.0 [13.9, 35.3] | 22.3 [13.1, 40.5] |
| Missing | 1 (0.0%) | 1 (0.2%) | 2 (0.0%) |
| BMIg | |||
| Normal | 4987 (72.2%) | 295 (68.3%) | 5282 (71.9%) |
| Obesity | 100 (1.4%) | 12 (2.8%) | 112 (1.5%) |
| Overweight | 1237 (17.9%) | 96 (22.2%) | 1333 (18.2%) |
| Underweight | 586 (8.5%) | 29 (6.7%) | 615 (8.4%) |
| tob | |||
| 1 | 1985 (28.7%) | 139 (32.2%) | 2124 (28.9%) |
| 2 | 1069 (15.5%) | 84 (19.4%) | 1153 (15.7%) |
| 3 | 3746 (54.2%) | 195 (45.1%) | 3941 (53.7%) |
| Missing | 110 (1.6%) | 14 (3.2%) | 124 (1.7%) |
| alc | |||
| 1 | 3554 (51.4%) | 222 (51.4%) | 3776 (51.4%) |
| 2 | 152 (2.2%) | 16 (3.7%) | 168 (2.3%) |
| 3 | 3100 (44.9%) | 183 (42.4%) | 3283 (44.7%) |
| Missing | 104 (1.5%) | 11 (2.5%) | 115 (1.6%) |
| sbp | |||
| Mean (SD) | 126 (21.1) | 139 (24.2) | 126 (21.5) |
| Median [Min, Max] | 123 [78.0, 233] | 137 [86.0, 223] | 124 [78.0, 233] |
| dbp | |||
| Mean (SD) | 77.4 (12.0) | 81.2 (13.3) | 77.6 (12.2) |
| Median [Min, Max] | 77.0 [0, 130] | 81.0 [19.0, 137] | 77.0 [0, 137] |
| Missing | 1 (0.0%) | 0 (0%) | 1 (0.0%) |
| pulse | |||
| Mean (SD) | 69.4 (9.38) | 71.0 (10.9) | 69.5 (9.48) |
| Median [Min, Max] | 68.0 [38.0, 128] | 68.0 [40.0, 120] | 68.0 [38.0, 128] |
| Missing | 16 (0.2%) | 2 (0.5%) | 18 (0.2%) |
| hypertension | |||
| 0 | 4877 (70.6%) | 195 (45.1%) | 5072 (69.1%) |
| 1 | 2033 (29.4%) | 237 (54.9%) | 2270 (30.9%) |
| dx_ht | |||
| 0 | 5701 (82.5%) | 269 (62.3%) | 5970 (81.3%) |
| 1 | 491 (7.1%) | 56 (13.0%) | 547 (7.5%) |
| 2 | 718 (10.4%) | 107 (24.8%) | 825 (11.2%) |
| bs | |||
| Mean (SD) | 97.9 (17.7) | 105 (28.6) | 98.3 (18.6) |
| Median [Min, Max] | 95.0 [51.0, 439] | 99.0 [76.0, 394] | 95.0 [51.0, 439] |
| diabetes | |||
| 0 | 6486 (93.9%) | 364 (84.3%) | 6850 (93.3%) |
| 1 | 424 (6.1%) | 68 (15.7%) | 492 (6.7%) |
| tc | |||
| Mean (SD) | 207 (36.1) | 211 (36.2) | 207 (36.1) |
| Median [Min, Max] | 205 [75.0, 483] | 211 [107, 331] | 206 [75.0, 483] |
| hdl | |||
| Mean (SD) | 54.8 (14.3) | 52.5 (14.0) | 54.7 (14.3) |
| Median [Min, Max] | 53.0 [10.0, 132] | 51.0 [26.0, 99.0] | 53.0 [10.0, 132] |
| Missing | 22 (0.3%) | 6 (1.4%) | 28 (0.4%) |
| nonhdlc | |||
| Mean (SD) | 152 (36.9) | 159 (36.9) | 153 (36.9) |
| Median [Min, Max] | 150 [22.0, 403] | 156 [78.0, 283] | 150 [22.0, 403] |
| Missing | 22 (0.3%) | 6 (1.4%) | 28 (0.4%) |
| tg | |||
| Mean (SD) | 121 (89.0) | 139 (111) | 122 (90.5) |
| Median [Min, Max] | 98.0 [15.0, 1540] | 112 [36.0, 1510] | 99.0 [15.0, 1540] |
| metSg | |||
| 0 | 5488 (79.4%) | 268 (62.0%) | 5756 (78.4%) |
| 1 | 1422 (20.6%) | 164 (38.0%) | 1586 (21.6%) |
| hg | |||
| Mean (SD) | 13.9 (1.55) | 14.1 (1.47) | 13.9 (1.55) |
| Median [Min, Max] | 13.8 [5.70, 20.6] | 14.2 [7.00, 19.0] | 13.9 [5.70, 20.6] |
| Missing | 3 (0.0%) | 0 (0%) | 3 (0.0%) |
| hct | |||
| Mean (SD) | 41.7 (4.26) | 42.3 (4.07) | 41.8 (4.25) |
| Median [Min, Max] | 41.6 [12.4, 61.8] | 42.4 [21.5, 53.8] | 41.7 [12.4, 61.8] |
| Missing | 3 (0.0%) | 0 (0%) | 3 (0.0%) |
| rbc | |||
| Mean (SD) | 4.53 (0.435) | 4.54 (0.418) | 4.53 (0.434) |
| Median [Min, Max] | 4.51 [2.61, 7.29] | 4.54 [3.29, 5.79] | 4.51 [2.61, 7.29] |
| Missing | 4 (0.1%) | 0 (0%) | 4 (0.1%) |
| dx_af | |||
| 0 | 6888 (99.7%) | 424 (98.1%) | 7312 (99.6%) |
| 1 | 13 (0.2%) | 4 (0.9%) | 17 (0.2%) |
| 2 | 9 (0.1%) | 4 (0.9%) | 13 (0.2%) |
| pyst | |||
| Mean (SD) | 15.4 (6.99) | 9.89 (6.10) | 15.1 (7.06) |
| Median [Min, Max] | 17.2 [0.0862, 24.3] | 10.0 [0.103, 23.3] | 16.7 [0.0862, 24.3] |
| stress | |||
| 1 | 2300 (33.3%) | 156 (36.1%) | 2456 (33.5%) |
| 2 | 2878 (41.6%) | 136 (31.5%) | 3014 (41.1%) |
| 9 | 1060 (15.3%) | 73 (16.9%) | 1133 (15.4%) |
| Missing | 672 (9.7%) | 67 (15.5%) | 739 (10.1%) |
# Examing the pattern of missing variables
library(rms)
## Warning: package 'rms' was built under R version 4.2.2
## Loading required package: Hmisc
## Warning: package 'Hmisc' was built under R version 4.2.2
## Loading required package: lattice
## Loading required package: survival
## Warning: package 'survival' was built under R version 4.2.1
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:table1':
##
## label, label<-, units
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
na.patterns = naclus(dat1)
plot(na.patterns)
# Change the reference;
dat1$tob=relevel(dat1$tob, ref = "3")
dat1$alc=relevel(dat1$alc, ref = "3")
logreg1 = glm(dx_st ~ eGFR + uprg + tob + alc + age + sex, family=binomial, data=dat1)
library(epiDisplay)
## Loading required package: foreign
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: nnet
## Registered S3 method overwritten by 'epiDisplay':
## method from
## print.lrtest rms
##
## Attaching package: 'epiDisplay'
## The following object is masked from 'package:rms':
##
## lrtest
## The following object is masked from 'package:lattice':
##
## dotplot
## The following object is masked from 'package:ggplot2':
##
## alpha
# summary(logreg1)
logistic.display(logreg1)
##
## Logistic regression predicting dx_st : 1 vs 0
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test)
## eGFR (cont. var.) 0.966 (0.9599,0.9722) 0.9907 (0.9824,0.999) 0.028
##
## uprg: ref.=0
## 1 1.62 (1.24,2.12) 1.47 (1.11,1.95) 0.007
## 2 2.32 (1.67,3.23) 1.68 (1.19,2.37) 0.003
##
## tob: ref.=3
## 1 1.35 (1.08,1.69) 1.43 (1.07,1.92) 0.016
## 2 1.51 (1.16,1.97) 1.01 (0.72,1.42) 0.943
##
## alc: ref.=3
## 1 1.06 (0.87,1.3) 1.21 (0.96,1.54) 0.11
## 2 1.8 (1.05,3.07) 1.19 (0.68,2.1) 0.541
##
## age (cont. var.) 1.07 (1.06,1.08) 1.06 (1.05,1.07) < 0.001
##
## sex: 2 vs 1 0.7015 (0.5752,0.8555) 0.9989 (0.748,1.3341) 0.994
##
## P(LR-test)
## eGFR (cont. var.) 0.03
##
## uprg: ref.=0 0.001
## 1
## 2
##
## tob: ref.=3 0.016
## 1
## 2
##
## alc: ref.=3 0.274
## 1
## 2
##
## age (cont. var.) < 0.001
##
## sex: 2 vs 1 0.994
##
## Log-likelihood = -1455.081
## No. of observations = 7194
## AIC value = 2930.1621
# Figure of OR (95% CI) of developing Stroke;
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggcoef(logreg1, exponentiate=T, exclude_intercept=T, vline_color = "red",
errorbar_color = "blue", errorbar_height = 0.10)
logreg2 = glm(dx_st ~ eGFR + uprg + tob + alc + age + sex +
hypertension + diabetes + metSg + af, family=binomial, data=dat1)
logistic.display(logreg2)
##
## Logistic regression predicting dx_st : 1 vs 0
##
## crude OR(95%CI) adj. OR(95%CI)
## eGFR (cont. var.) 0.966 (0.9599,0.9722) 0.9908 (0.9825,0.9991)
##
## uprg: ref.=0
## 1 1.62 (1.24,2.12) 1.42 (1.07,1.88)
## 2 2.32 (1.67,3.23) 1.25 (0.87,1.79)
##
## tob: ref.=3
## 1 1.35 (1.08,1.69) 1.5 (1.12,2.02)
## 2 1.5124 (1.1605,1.9711) 1.0037 (0.7116,1.4156)
##
## alc: ref.=3
## 1 1.06 (0.87,1.3) 1.17 (0.92,1.49)
## 2 1.8 (1.05,3.07) 1.19 (0.67,2.11)
##
## age (cont. var.) 1.07 (1.06,1.08) 1.05 (1.04,1.07)
##
## sex: 2 vs 1 0.7015 (0.5752,0.8555) 1.0082 (0.7515,1.3526)
##
## hypertension: 1 vs 0 2.96 (2.42,3.61) 1.68 (1.35,2.1)
##
## diabetes: 1 vs 0 2.79 (2.1,3.71) 1.69 (1.24,2.3)
##
## metSg: 1 vs 0 2.41 (1.96,2.96) 1.45 (1.16,1.82)
##
## af: 1 vs 0 6.29 (2.77,14.29) 3.34 (1.41,7.9)
##
## P(Wald's test) P(LR-test)
## eGFR (cont. var.) 0.03 0.032
##
## uprg: ref.=0 0.042
## 1 0.015
## 2 0.228
##
## tob: ref.=3 0.005
## 1 0.007
## 2 0.983
##
## alc: ref.=3 0.426
## 1 0.201
## 2 0.552
##
## age (cont. var.) < 0.001 < 0.001
##
## sex: 2 vs 1 0.957 0.957
##
## hypertension: 1 vs 0 < 0.001 < 0.001
##
## diabetes: 1 vs 0 < 0.001 0.001
##
## metSg: 1 vs 0 0.001 0.001
##
## af: 1 vs 0 0.006 0.012
##
## Log-likelihood = -1422.9374
## No. of observations = 7194
## AIC value = 2873.8747
# Figure of OR (95% CI) of developing Stroke;
ggcoef(logreg2, exponentiate=T, exclude_intercept=T, vline_color = "red",
errorbar_color = "blue", errorbar_height = 0.10)
logreg3 = glm(dx_st ~ eGFR + uprg + tob + alc + age + sex + hypertension + stressnew +
diabetes + metSg + af + tc + tg + nonhdlc, family=binomial, data=dat1)
logistic.display(logreg3)
##
## Logistic regression predicting dx_st : 1 vs 0
##
## crude OR(95%CI) adj. OR(95%CI)
## eGFR (cont. var.) 0.967 (0.9603,0.9738) 0.9919 (0.9827,1.0012)
##
## uprg: ref.=0
## 1 1.76 (1.32,2.36) 1.65 (1.21,2.24)
## 2 2.34 (1.64,3.33) 1.24 (0.84,1.82)
##
## tob: ref.=3
## 1 1.39 (1.09,1.77) 1.45 (1.05,1.99)
## 2 1.42 (1.06,1.91) 0.88 (0.6,1.28)
##
## alc: ref.=3
## 1 1.09 (0.87,1.35) 1.2 (0.92,1.56)
## 2 1.63 (0.84,3.17) 1.16 (0.57,2.36)
##
## age (cont. var.) 1.06 (1.05,1.07) 1.05 (1.04,1.07)
##
## sex: 2 vs 1 0.67 (0.54,0.84) 0.91 (0.65,1.25)
##
## hypertension: 1 vs 0 2.86 (2.31,3.55) 1.64 (1.29,2.09)
##
## stressnew: ref.=No
## Unknown 1.04 (0.78,1.4) 1.41 (1.05,1.92)
## Yes 0.7 (0.55,0.89) 1.14 (0.89,1.48)
##
## diabetes: 1 vs 0 2.89 (2.13,3.93) 1.78 (1.27,2.48)
##
## metSg: 1 vs 0 2.43 (1.94,3.04) 1.44 (1.08,1.92)
##
## af: 1 vs 0 6.14 (2.58,14.62) 3.3 (1.32,8.24)
##
## tc (cont. var.) 1.0035 (1.0005,1.0064) 1.0001 (0.9909,1.0093)
##
## tg (cont. var.) 1.0015 (1.0007,1.0024) 1.0001 (0.9987,1.0014)
##
## nonhdlc (cont. var.) 1.0052 (1.0023,1.008) 1.0015 (0.992,1.011)
##
## P(Wald's test) P(LR-test)
## eGFR (cont. var.) 0.087 0.09
##
## uprg: ref.=0 0.007
## 1 0.001
## 2 0.276
##
## tob: ref.=3 0.005
## 1 0.024
## 2 0.504
##
## alc: ref.=3 0.403
## 1 0.18
## 2 0.672
##
## age (cont. var.) < 0.001 < 0.001
##
## sex: 2 vs 1 0.553 0.553
##
## hypertension: 1 vs 0 < 0.001 < 0.001
##
## stressnew: ref.=No 0.085
## Unknown 0.025
## Yes 0.299
##
## diabetes: 1 vs 0 < 0.001 0.001
##
## metSg: 1 vs 0 0.013 0.013
##
## af: 1 vs 0 0.01 0.019
##
## tc (cont. var.) 0.987 0.987
##
## tg (cont. var.) 0.939 0.939
##
## nonhdlc (cont. var.) 0.763 0.762
##
## Log-likelihood = -1230.9367
## No. of observations = 6479
## AIC value = 2499.8735
# Figure of OR (95% CI) of developing Stroke;
ggcoef(logreg3, exponentiate=T, exclude_intercept=T, vline_color = "red",
errorbar_color = "blue", errorbar_height = 0.10)
# 4.4. Choose the best model by BMA
library(BMA)
## Warning: package 'BMA' was built under R version 4.2.2
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-0)
yvar = dat1[, "dx_st"]
# xvars = dat1[, c("eGFR", "eGFRg", "uprg", "age", "sex", "sbp", "dbp", "pulse", "tc", "hdl", "tg", "bs", "rbc", "hg", "hct", "tob", "alc", "dx_ht", "dx_af", "dx_ckd", "dx_mi", "pyst", "exer", "kaidan", "stress", "BMI", "BMIg", "nonhdlc", "diabetes", "hypertension", "metSg" )]
xvars = dat1[, c("eGFR", "uprg", "age", "sex", "sbp", "hdl", "bs", "tob", "af", "kaidan", "stress", "BMI", "BMIg", "diabetes", "hypertension", "metSg" )]
bma = bic.glm(xvars, yvar, glm.family = "binomial")
## Warning in bic.glm.data.frame(xvars, yvar, glm.family = "binomial"): There were
## 989 records deleted due to NA's
summary(bma)
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = "binomial")
##
##
## 21 models were selected
## Best 5 models (cumulative posterior probability = 0.5175 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -8.5847795 0.818150 -8.704e+00 -9.104e+00
## eGFR 2.1 -0.0002321 0.001720 . .
## uprg 0.0
## .1 0.0000000 0.000000 . .
## .2 0.0000000 0.000000 . .
## age 100.0 0.0536071 0.005729 5.254e-02 5.475e-02
## sex 10.5
## .2 -0.0315872 0.099499 . .
## sbp 91.0 0.0123332 0.004705 1.393e-02 1.467e-02
## hdl 0.0 0.0000000 0.000000 . .
## bs 79.1 0.0069436 0.004037 9.170e-03 9.135e-03
## tob 42.6
## .1 0.2227354 0.271884 . 5.229e-01
## .2 0.0190803 0.104096 . 4.289e-02
## af 1.1 0.0106636 0.114698 . .
## kaidan 0.0
## .2 0.0000000 0.000000 . .
## .3 0.0000000 0.000000 . .
## .4 0.0000000 0.000000 . .
## .5 0.0000000 0.000000 . .
## stress 0.0
## .2 0.0000000 0.000000 . .
## .9 0.0000000 0.000000 . .
## BMI 13.8 0.0064245 0.017336 . .
## BMIg 0.0
## .Obesity 0.0000000 0.000000 . .
## .Overweight 0.0000000 0.000000 . .
## .Underweight 0.0000000 0.000000 . .
## diabetes 19.4
## .1 0.1228145 0.261449 . .
## hypertension 9.9
## .1 0.0554267 0.174021 . .
## metSg 28.1
## .1 0.1000332 0.173838 . .
##
## nVar 3 4
## BIC -5.315e+04 -5.315e+04
## post prob 0.174 0.130
## model 3 model 4 model 5
## Intercept -8.434e+00 -7.855e+00 -8.501e+00
## eGFR . . .
## uprg
## .1 . . .
## .2 . . .
## age 5.236e-02 5.192e-02 5.197e-02
## sex
## .2 . . -2.947e-01
## sbp 1.216e-02 1.427e-02 1.402e-02
## hdl . . .
## bs 7.924e-03 . 8.807e-03
## tob
## .1 . . .
## .2 . . .
## af . . .
## kaidan
## .2 . . .
## .3 . . .
## .4 . . .
## .5 . . .
## stress
## .2 . . .
## .9 . . .
## BMI . . .
## BMIg
## .Obesity . . .
## .Overweight . . .
## .Underweight . . .
## diabetes
## .1 . 6.841e-01 .
## hypertension
## .1 . . .
## metSg
## .1 3.452e-01 . .
##
## nVar 4 3 4
## BIC -5.315e+04 -5.315e+04 -5.315e+04
## post prob 0.083 0.069 0.062
imageplot.bma(bma)
logreg = glm(dx_st ~ I(age/10) + I(sbp/10) + sex + BMI + tob + stressnew +
I(eGFR/10) + I(bs/10) + af + diabetes,
family=binomial, data=dat1)
summary(logreg)
##
## Call:
## glm(formula = dx_st ~ I(age/10) + I(sbp/10) + sex + BMI + tob +
## stressnew + I(eGFR/10) + I(bs/10) + af + diabetes, family = binomial,
## data = dat1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0578 -0.3737 -0.2590 -0.1736 3.2173
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.57312 0.82421 -10.402 < 2e-16 ***
## I(age/10) 0.48942 0.06247 7.835 4.70e-15 ***
## I(sbp/10) 0.13224 0.02602 5.083 3.72e-07 ***
## sex2 -0.11993 0.15453 -0.776 0.43770
## BMI 0.04762 0.01761 2.704 0.00684 **
## tob1 0.44794 0.16146 2.774 0.00553 **
## tob2 -0.09933 0.19099 -0.520 0.60301
## stressnewUnknown 0.36626 0.15432 2.373 0.01763 *
## stressnewYes 0.13503 0.13000 1.039 0.29896
## I(eGFR/10) -0.11295 0.04672 -2.418 0.01562 *
## I(bs/10) 0.06182 0.02741 2.256 0.02410 *
## af 1.02236 0.48918 2.090 0.03662 *
## diabetes1 0.36074 0.21303 1.693 0.09039 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2754.3 on 6497 degrees of freedom
## Residual deviance: 2473.7 on 6485 degrees of freedom
## (844 observations deleted due to missingness)
## AIC: 2499.7
##
## Number of Fisher Scoring iterations: 6
logistic.display(logreg)
##
## OR lower95ci upper95ci Pr(>|Z|)
## I(age/10) 1.6313713 1.4433767 1.8438515 4.699389e-15
## I(sbp/10) 1.1413773 1.0846362 1.2010868 3.719270e-07
## sex2 0.8869850 0.6552121 1.2007444 4.376967e-01
## BMI 1.0487713 1.0131940 1.0855978 6.843365e-03
## tob1 1.5650919 1.1405248 2.1477066 5.531462e-03
## tob2 0.9054432 0.6227129 1.3165416 6.030107e-01
## stressnewUnknown 1.4423373 1.0658765 1.9517617 1.762678e-02
## stressnewYes 1.1445656 0.8871250 1.4767146 2.989638e-01
## I(eGFR/10) 0.8931987 0.8150440 0.9788478 1.562420e-02
## I(bs/10) 1.0637735 1.0081343 1.1224833 2.409957e-02
## af 2.7797383 1.0656499 7.2509229 3.662257e-02
## diabetes1 1.4343901 0.9447820 2.1777243 9.039150e-02
logreg = glm(dx_st ~ I(age/10) + I(sbp/10) + sex + BMI + tob + stressnew +
I(eGFR/10) + I(bs/10) + af + diabetes, family=binomial, data=dat1)
summary(logreg)
##
## Call:
## glm(formula = dx_st ~ I(age/10) + I(sbp/10) + sex + BMI + tob +
## stressnew + I(eGFR/10) + I(bs/10) + af + diabetes, family = binomial,
## data = dat1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0578 -0.3737 -0.2590 -0.1736 3.2173
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.57312 0.82421 -10.402 < 2e-16 ***
## I(age/10) 0.48942 0.06247 7.835 4.70e-15 ***
## I(sbp/10) 0.13224 0.02602 5.083 3.72e-07 ***
## sex2 -0.11993 0.15453 -0.776 0.43770
## BMI 0.04762 0.01761 2.704 0.00684 **
## tob1 0.44794 0.16146 2.774 0.00553 **
## tob2 -0.09933 0.19099 -0.520 0.60301
## stressnewUnknown 0.36626 0.15432 2.373 0.01763 *
## stressnewYes 0.13503 0.13000 1.039 0.29896
## I(eGFR/10) -0.11295 0.04672 -2.418 0.01562 *
## I(bs/10) 0.06182 0.02741 2.256 0.02410 *
## af 1.02236 0.48918 2.090 0.03662 *
## diabetes1 0.36074 0.21303 1.693 0.09039 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2754.3 on 6497 degrees of freedom
## Residual deviance: 2473.7 on 6485 degrees of freedom
## (844 observations deleted due to missingness)
## AIC: 2499.7
##
## Number of Fisher Scoring iterations: 6
logistic.display(logreg)
##
## OR lower95ci upper95ci Pr(>|Z|)
## I(age/10) 1.6313713 1.4433767 1.8438515 4.699389e-15
## I(sbp/10) 1.1413773 1.0846362 1.2010868 3.719270e-07
## sex2 0.8869850 0.6552121 1.2007444 4.376967e-01
## BMI 1.0487713 1.0131940 1.0855978 6.843365e-03
## tob1 1.5650919 1.1405248 2.1477066 5.531462e-03
## tob2 0.9054432 0.6227129 1.3165416 6.030107e-01
## stressnewUnknown 1.4423373 1.0658765 1.9517617 1.762678e-02
## stressnewYes 1.1445656 0.8871250 1.4767146 2.989638e-01
## I(eGFR/10) 0.8931987 0.8150440 0.9788478 1.562420e-02
## I(bs/10) 1.0637735 1.0081343 1.1224833 2.409957e-02
## af 2.7797383 1.0656499 7.2509229 3.662257e-02
## diabetes1 1.4343901 0.9447820 2.1777243 9.039150e-02
ggcoef(logreg, exponentiate=T, exclude_intercept=T, vline_color = "red",
errorbar_color = "blue", errorbar_height = 0.10)
library(Epi)
## Warning: package 'Epi' was built under R version 4.2.2
ROC(form = dx_st ~ I(age/10) + I(sbp/10) + sex + BMI + tob + stressnew +
I(eGFR/10) + I(bs/10) + af + diabetes, data=dat1, plot = "ROC", lwd=2)
# The performance of the final model demonstrated an acceptable discrimination with area under the curve (AUC) of 75 %.
# varImp function (help to weight the predictors, Weighted predictors analysis)
library(caret)
## Warning: package 'caret' was built under R version 4.2.1
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
## The following object is masked from 'package:purrr':
##
## lift
varImp(logreg, scale = F)
## Overall
## I(age/10) 7.8347066
## I(sbp/10) 5.0827920
## sex2 0.7760885
## BMI 2.7043719
## tob1 2.7743354
## tob2 0.5200759
## stressnewUnknown 2.3733663
## stressnewYes 1.0386581
## I(eGFR/10) 2.4175777
## I(bs/10) 2.2555381
## af 2.0899469
## diabetes1 1.6933361
library(rms)
moboostrap = lrm(dx_st ~ age + sbp + sex + BMI + tob + stressnew +
eGFR + bs + af + diabetes, data=dat1, x = TRUE, y = TRUE)
cal = calibrate(moboostrap, method = "boot", B=100, legend = T, digits = 3, subtitles = T)
##
## Divergence or singularity in 6 samples
plot(cal, xlab = "Predicted probability of Stroke", ylab = "Observation Proportion")
##
## n=6498 Mean absolute error=0.006 Mean squared error=1e-04
## 0.9 Quantile of absolute error=0.011
validate = validate(moboostrap, B = 100)
##
## Divergence or singularity in 3 samples
validate
## index.orig training test optimism index.corrected n
## Dxy 0.5035 0.5104 0.4959 0.0146 0.4889 97
## R2 0.1223 0.1270 0.1178 0.0092 0.1131 97
## Intercept 0.0000 0.0000 -0.1062 0.1062 -0.1062 97
## Slope 1.0000 1.0000 0.9588 0.0412 0.9588 97
## Emax 0.0000 0.0000 0.0306 0.0306 0.0306 97
## D 0.0430 0.0448 0.0414 0.0035 0.0396 97
## U -0.0003 -0.0003 0.0000 -0.0004 0.0000 97
## Q 0.0433 0.0451 0.0413 0.0038 0.0395 97
## B 0.0492 0.0492 0.0494 -0.0002 0.0494 97
## g 1.1274 1.1469 1.0952 0.0517 1.0757 97
## gp 0.0513 0.0522 0.0503 0.0019 0.0494 97
library(caret)
library(caret)
sample = createDataPartition(dat1$dx_st, p = 0.7, list = FALSE)
train = dat1[sample,]
test = dat1[-sample,]
modelval = train(dx_st ~ eGFR + bs + age + sbp + bs + sex + diabetes + af, data = train, method = "glm", family = "binomial")
modelval
## Generalized Linear Model
##
## 5140 samples
## 7 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 5140, 5140, 5140, 5140, 5140, 5140, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9422405 0.005552353
summary(modelval)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0314 -0.3857 -0.2694 -0.1817 3.0444
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.049528 0.745020 -9.462 < 2e-16 ***
## eGFR -0.012643 0.004853 -2.605 0.00918 **
## bs 0.006713 0.002787 2.409 0.01601 *
## age 0.042978 0.006657 6.456 1.07e-10 ***
## sbp 0.014845 0.002760 5.378 7.54e-08 ***
## sex2 -0.117234 0.122887 -0.954 0.34008
## diabetes1 0.524312 0.221049 2.372 0.01770 *
## af 0.920538 0.603700 1.525 0.12730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2303.4 on 5139 degrees of freedom
## Residual deviance: 2069.8 on 5132 degrees of freedom
## AIC: 2085.8
##
## Number of Fisher Scoring iterations: 6
# How to interpret the output
#6. Nomogram for Stroke prediction (Probability of Stroke)
library(rms)
ddist = datadist(dat1)
options(datadist = "ddist")
m1 = lrm(dx_st ~ age + eGFR + BMI + tob + sex + sbp + uprg +
af + diabetes, data = dat1)
p = nomogram(m1, fun = function(x)1/(1+exp(-x)), fun.at = c(0.001, 0.01, 0.05, seq(0.1, 0.9, by = 0.1), 0.95, 0.99, 0.999), funlabel = "Probability of Stroke")
plot(p, cex.axis = 0.6, lmgp = 0.2)