library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(compareGroups)
library(mlogitBMA)
## Loading required package: BMA
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-2)
## Loading required package: abind
## Loading required package: maxLik
## Loading required package: miscTools
##
## Attaching package: 'miscTools'
## The following objects are masked from 'package:robustbase':
##
## colMedians, rowMedians
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
library(ggplot2)
library(nnet)
library(gridExtra)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
library(BMA)
library(survival)
library(tableone)
library(prodlim)
#library(riskRegression)
library(CalibrationCurves)
## Loading required package: rms
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:table1':
##
## label, label<-, units
## The following objects are masked from 'package:base':
##
## format.pval, units
Data set-up
#dm = read.csv("C:\\Thach\\Research projects\\DM in Vietnam\\dm_vos2.csv")
dm = read.csv("C:\\Thach\\Research projects\\Incident DM in Vietnam\\vos_dm3.csv")
head(dm)
## id age sex ageg bmi height weight wc hip whr sbp dbp edu occu
## 1 1 77 F 70+ 18.5 156.0 45.0 78.0 88 0.8863636 100 70 1 9
## 2 2 49 F 40-49 25.4 152.3 59.0 82.0 100 0.8200000 100 60 2 4
## 3 5 56 F 50-59 29.3 154.6 70.0 95.5 107 0.8925233 120 70 5 3
## 4 8 55 F 50-59 23.9 153.0 56.0 79.0 105 0.7523810 110 70 2 9
## 5 10 49 F 40-49 27.9 149.0 62.0 92.0 100 0.9200000 100 80 4 9
## 6 14 55 M 50-59 26.0 157.4 64.5 86.0 95 0.9052632 100 70 4 5
## marital alcol smok diabdrug v1ldl v1tg v1tc v1hba1c v1diab1 v1glucose
## 1 1 0 0 0 4.07 2.54 6.42 5.5 Normal 5.2
## 2 1 0 0 0 4.05 1.28 5.53 5.4 Normal 4.9
## 3 1 0 0 0 1.83 1.50 3.30 5.8 Pre-Diabetes 4.8
## 4 1 0 0 0 2.95 2.07 4.33 5.8 Pre-Diabetes 4.6
## 5 3 0 0 0 3.66 1.00 4.79 5.4 Normal 5.0
## 6 1 0 0 0 2.32 2.45 4.24 5.5 Normal 4.2
## v1diab2 hypertension obese1 obese2 fnbmd totalhipbmd lsbmd
## 1 Normal No Normal Normal 0.5132332 0.5679375 1.1556660
## 2 Normal No Overweight Overweight 0.6875569 0.7404673 0.9151062
## 3 Normal No Overweight Obese 0.7539456 0.9021965 0.9520324
## 4 Normal No Normal Overweight 0.6967018 0.8955486 0.9571054
## 5 Normal No Overweight Obese 0.8427472 0.9672657 1.1989470
## 6 Normal No Overweight Overweight 0.6906412 0.8178084 0.8678460
## wbbmd trunkfat trunklean trunkmass trunkpfat wbtotfat wbtotlean
## 1 0.9261586 7597.105 12985.37 20582.48 36.91055 16493.88 27978.42
## 2 1.0240500 11352.020 15868.70 27220.72 41.70361 25506.65 33337.02
## 3 1.0469110 16617.950 18555.30 35173.25 47.24599 31672.95 39431.90
## 4 1.0531350 14999.500 15177.67 30177.17 49.70480 27544.03 29019.09
## 5 1.1842280 12851.210 17331.04 30182.25 42.57869 27665.33 35057.20
## 6 0.9932672 11829.440 19888.04 31717.49 37.29628 21433.45 42860.67
## wbtotmass wbtotpfat datedxa1 v2glucose v2hba1c v2ldl v2tc v2tg v2diab1
## 1 44472.30 37.08798 11/07/2015 4.6 5.7 5.49 7.70 2.59 Pre-Diabetes
## 2 58843.67 43.34647 11/07/2015 5.0 4.8 3.51 5.08 1.07 Normal
## 3 71104.84 44.54401 11/07/2015 5.1 5.4 1.83 3.36 0.97 Normal
## 4 56563.12 48.69609 26/07/2015 5.1 5.3 4.06 5.51 1.90 Normal
## 5 62722.54 44.10748 25/07/2015 4.8 5.2 3.58 5.10 0.78 Normal
## 6 64294.12 33.33656 26/07/2015 4.9 5.3 3.04 5.15 2.89 Normal
## v2diab2 antdm trunk_fat trunk_lean trunk_mass trunk_pfat wbtot_fat wbtot_lean
## 1 Normal 0 9302.805 13458.35 22761.15 40.87141 18962.98 29166.12
## 2 Normal 0 11341.490 14956.16 26297.65 43.12740 25474.24 32104.24
## 3 Normal 0 17101.520 17677.70 34779.21 49.17166 31657.79 36484.09
## 4 Normal 0 15515.730 15261.40 30777.14 50.41318 29760.77 29023.54
## 5 Normal 0 14381.650 18145.18 32526.84 44.21473 29522.69 34710.20
## 6 Normal 0 11767.680 18895.61 30663.29 38.37709 22225.49 41308.70
## wbtot_mass wbtot_pfat datedxa2 duration identifier1 patient_patient_key
## 1 48129.09 39.40023 22/10/2017 2.28 1 1JZ7110T5207200405
## 2 57578.48 44.24264 25/06/2017 1.96 2 1JZ7110UXR09200405
## 3 68141.89 46.45864 25/06/2017 1.96 5 1JZ7110WOH0B200405
## 4 58784.31 50.62707 25/06/2017 1.92 8 1JZ7260QOR08200405
## 5 64232.89 45.96195 25/06/2017 1.92 10 1JZ7250P2K05200405
## 6 63534.19 34.98193 25/06/2017 1.92 14 1JZ7260S5J0A200405
## scananalysis_patient_key scananalysis_scanid scan_date analysis_date
## 1 1JZ7110T5207200405 A1JZ7110TPU1C 11/07/2015 3/10/2015
## 2 1JZ7110UXR09200405 A1JZ7110VFI1M 11/07/2015 11/07/2015
## 3 1JZ7110WOH0B200405 A1JZ7110X6Z1W 11/07/2015 11/07/2015
## 4 1JZ7260QOR08200405 A1JZ72600RI16 26/07/2015 26/07/2015
## 5 1JZ7250P2K05200405 A1JZ7250Q1T0P 25/07/2015 25/07/2015
## 6 1JZ7260S5J0A200405 A1JZ7260SFU1E 26/07/2015 26/07/2015
## wbodycomposition_scanid androidgynoidcomposition_scanid vfat_body_fat
## 1 A1JZ7110TPU1C A1JZ7110TPU1C 668.0502
## 2 A1JZ7110VFI1M A1JZ7110VFI1M 1153.0930
## 3 A1JZ7110X6Z1W A1JZ7110X6Z1W 1703.0580
## 4 A1JZ72600RI16 A1JZ72600RI16 1428.6060
## 5 A1JZ7250Q1T0P A1JZ7250Q1T0P 1410.1150
## 6 A1JZ7260SFU1E A1JZ7260SFU1E 1178.2300
## vfat_body_lean vfat_body_mass vfat_body_pfat subcu_fat_correction gender
## 1 1256.514 1924.564 34.71177 91.54356 0
## 2 1363.755 2516.847 45.81496 126.46710 0
## 3 1695.045 3398.102 50.11791 158.63670 0
## 4 1266.956 2695.562 52.99844 201.82980 0
## 5 1567.519 2977.633 47.35689 192.23010 0
## 6 1641.275 2819.504 41.78854 72.18629 1
## v1hba1c_sd v1glucose_sd whr_cm bmi_sd whr_sd v1_dm v2_dm
## 1 16.17647 11.063830 88.63636 6.016260 13.15079 Normal Normal
## 2 15.88235 10.425530 82.00000 8.260162 12.16617 Normal Normal
## 3 17.05882 10.212770 89.25233 9.528455 13.24219 Pre-Diabetes Normal
## 4 17.05882 9.787234 75.23810 7.772357 11.16292 Pre-Diabetes Normal
## 5 15.88235 10.638300 92.00000 9.073171 13.64985 Normal Normal
## 6 16.17647 8.936170 90.52632 8.455284 13.43120 Normal Normal
## bmi_gr date2 date1 fu dm X_st X_d X_t X_t0
## 1 Normal 22oct2017 11jul2015 2.283368 0 1 0 2.283368 0
## 2 Obese 25jun2017 11jul2015 1.957563 0 1 0 1.957563 0
## 3 Obese 25jun2017 11jul2015 1.957563 0 1 0 1.957563 0
## 4 Overweight 25jun2017 26jul2015 1.916496 0 1 0 1.916496 0
## 5 Obese 25jun2017 25jul2015 1.919233 0 1 0 1.919233 0
## 6 Obese 25jun2017 26jul2015 1.916496 0 1 0 1.916496 0
dm$bmigr = as.factor(dm$bmi_gr)
dm$education = as.factor(dm$edu)
dm$occupation = as.factor(dm$occu)
dm$alcohol = as.factor(dm$alcol)
dm$smoking = as.factor(dm$smok)
createTable(compareGroups(sex ~ age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcohol + smoking + v1hba1c + v1glucose + v1_dm + v2hba1c + v2glucose + v2_dm, data = dm))
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
##
## --------Summary descriptives table by 'sex'---------
##
## ___________________________________________________
## F M p.overall
## N=1212 N=520
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 55.8 (9.18) 56.2 (9.82) 0.407
## weight 53.4 (7.89) 61.8 (9.65) <0.001
## height 153 (5.17) 163 (6.17) <0.001
## bmi 22.9 (3.03) 23.3 (3.16) 0.018
## bmigr: 0.003
## Normal 595 (49.1%) 218 (41.9%)
## Obese 261 (21.5%) 153 (29.4%)
## Overweight 297 (24.5%) 121 (23.3%)
## Underweight 59 (4.87%) 28 (5.38%)
## wc 80.0 (8.02) 84.2 (8.90) <0.001
## hip 92.7 (6.57) 92.3 (7.30) 0.301
## whr 0.86 (0.07) 0.91 (0.06) <0.001
## sbp 119 (15.7) 122 (15.2) <0.001
## dbp 76.5 (10.5) 79.1 (10.4) <0.001
## hypertension: 0.008
## No 895 (77.4%) 355 (71.1%)
## Yes 262 (22.6%) 144 (28.9%)
## education: <0.001
## 1 257 (21.2%) 74 (14.3%)
## 2 293 (24.2%) 115 (22.2%)
## 3 371 (30.7%) 184 (35.5%)
## 4 64 (5.29%) 19 (3.67%)
## 5 207 (17.1%) 123 (23.7%)
## 6 18 (1.49%) 3 (0.58%)
## occupation: .
## 1 178 (14.7%) 73 (14.0%)
## 2 136 (11.2%) 72 (13.8%)
## 3 141 (11.6%) 99 (19.0%)
## 4 53 (4.37%) 47 (9.04%)
## 5 56 (4.62%) 30 (5.77%)
## 6 425 (35.1%) 2 (0.38%)
## 7 65 (5.36%) 27 (5.19%)
## 8 3 (0.25%) 3 (0.58%)
## 9 155 (12.8%) 167 (32.1%)
## alcohol: <0.001
## 0 1178 (97.2%) 288 (55.4%)
## 1 34 (2.81%) 232 (44.6%)
## smoking: <0.001
## 0 1204 (99.3%) 283 (54.4%)
## 1 8 (0.66%) 237 (45.6%)
## v1hba1c 5.58 (0.34) 5.59 (0.33) 0.597
## v1glucose 4.97 (0.48) 5.01 (0.46) 0.078
## v1_dm: 0.251
## Normal 826 (68.2%) 339 (65.2%)
## Pre-Diabetes 386 (31.8%) 181 (34.8%)
## v2hba1c 5.54 (0.52) 5.59 (0.48) 0.052
## v2glucose 5.03 (0.67) 5.04 (0.68) 0.775
## v2_dm: 0.210
## Diabetes 54 (4.46%) 20 (3.85%)
## Normal 830 (68.5%) 338 (65.0%)
## Pre-Diabetes 328 (27.1%) 162 (31.2%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
createTable(compareGroups(v2_dm ~ sex + age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcohol + smoking + v1hba1c + v1glucose + v2hba1c + v2glucose, data = dm))
##
## --------Summary descriptives table by 'v2_dm'---------
##
## _______________________________________________________________
## Diabetes Normal Pre-Diabetes p.overall
## N=74 N=1168 N=490
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## sex: 0.210
## F 54 (73.0%) 830 (71.1%) 328 (66.9%)
## M 20 (27.0%) 338 (28.9%) 162 (33.1%)
## age 58.9 (8.33) 54.6 (9.11) 58.6 (9.50) <0.001
## weight 59.3 (9.06) 54.9 (8.95) 57.9 (9.72) <0.001
## height 154 (6.86) 156 (7.10) 156 (7.51) 0.273
## bmi 24.8 (2.82) 22.6 (2.96) 23.8 (3.15) <0.001
## bmigr: .
## Normal 18 (24.3%) 593 (50.8%) 202 (41.2%)
## Obese 38 (51.4%) 220 (18.8%) 156 (31.8%)
## Overweight 17 (23.0%) 282 (24.1%) 119 (24.3%)
## Underweight 1 (1.35%) 73 (6.25%) 13 (2.65%)
## wc 85.3 (7.51) 79.8 (8.34) 84.1 (8.14) <0.001
## hip 94.2 (8.10) 92.1 (6.66) 93.7 (6.75) <0.001
## whr 0.90 (0.06) 0.87 (0.07) 0.90 (0.06) <0.001
## sbp 121 (17.6) 119 (15.5) 122 (15.4) 0.002
## dbp 79.6 (11.9) 76.8 (10.4) 78.2 (10.4) 0.007
## hypertension: 0.006
## No 45 (63.4%) 867 (77.5%) 338 (72.5%)
## Yes 26 (36.6%) 252 (22.5%) 128 (27.5%)
## education: .
## 1 5 (6.85%) 222 (19.0%) 104 (21.3%)
## 2 26 (35.6%) 271 (23.2%) 111 (22.7%)
## 3 34 (46.6%) 364 (31.2%) 157 (32.1%)
## 4 2 (2.74%) 57 (4.89%) 24 (4.91%)
## 5 6 (8.22%) 239 (20.5%) 85 (17.4%)
## 6 0 (0.00%) 13 (1.11%) 8 (1.64%)
## occupation: .
## 1 9 (12.2%) 170 (14.6%) 72 (14.7%)
## 2 3 (4.05%) 160 (13.7%) 45 (9.18%)
## 3 12 (16.2%) 149 (12.8%) 79 (16.1%)
## 4 5 (6.76%) 61 (5.22%) 34 (6.94%)
## 5 4 (5.41%) 64 (5.48%) 18 (3.67%)
## 6 22 (29.7%) 288 (24.7%) 117 (23.9%)
## 7 7 (9.46%) 44 (3.77%) 41 (8.37%)
## 8 0 (0.00%) 4 (0.34%) 2 (0.41%)
## 9 12 (16.2%) 228 (19.5%) 82 (16.7%)
## alcohol: 0.552
## 0 64 (86.5%) 981 (84.0%) 421 (85.9%)
## 1 10 (13.5%) 187 (16.0%) 69 (14.1%)
## smoking: 0.037
## 0 68 (91.9%) 1013 (86.7%) 406 (82.9%)
## 1 6 (8.11%) 155 (13.3%) 84 (17.1%)
## v1hba1c 6.11 (0.26) 5.46 (0.28) 5.80 (0.29) <0.001
## v1glucose 5.63 (0.58) 4.85 (0.40) 5.18 (0.47) <0.001
## v2hba1c 6.90 (0.91) 5.32 (0.28) 5.91 (0.24) 0.000
## v2glucose 6.64 (1.41) 4.81 (0.40) 5.34 (0.57) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
men = subset(dm, sex == "M")
createTable(compareGroups(v2_dm ~ sex + age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcol + smok + v1hba1c + v1glucose + v2hba1c + v2glucose, data = men))
## Warning in cor(as.integer(x), as.integer(y)): the standard deviation is zero
##
## --------Summary descriptives table by 'v2_dm'---------
##
## ______________________________________________________________
## Diabetes Normal Pre-Diabetes p.overall
## N=20 N=338 N=162
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## sex: M 20 (100%) 338 (100%) 162 (100%) .
## age 59.5 (9.93) 55.3 (9.54) 57.7 (10.2) 0.013
## weight 66.0 (6.94) 60.6 (9.33) 63.9 (10.2) <0.001
## height 162 (5.28) 163 (6.10) 163 (6.46) 0.791
## bmi 25.1 (2.13) 22.9 (3.10) 24.0 (3.22) <0.001
## bmigr: .
## Normal 4 (20.0%) 155 (45.9%) 59 (36.4%)
## Obese 13 (65.0%) 78 (23.1%) 62 (38.3%)
## Overweight 3 (15.0%) 81 (24.0%) 37 (22.8%)
## Underweight 0 (0.00%) 24 (7.10%) 4 (2.47%)
## wc 89.5 (6.33) 82.9 (8.69) 86.2 (9.01) <0.001
## hip 95.3 (5.41) 91.5 (7.78) 93.6 (6.12) 0.002
## whr 0.94 (0.04) 0.90 (0.06) 0.92 (0.06) <0.001
## sbp 127 (24.3) 121 (14.5) 123 (15.1) 0.079
## dbp 83.5 (14.1) 78.6 (10.1) 79.7 (10.5) 0.078
## hypertension: 0.023
## No 9 (47.4%) 241 (74.2%) 105 (67.7%)
## Yes 10 (52.6%) 84 (25.8%) 50 (32.3%)
## education: .
## 1 3 (15.0%) 47 (14.0%) 24 (14.8%)
## 2 5 (25.0%) 76 (22.6%) 34 (21.0%)
## 3 9 (45.0%) 118 (35.1%) 57 (35.2%)
## 4 0 (0.00%) 11 (3.27%) 8 (4.94%)
## 5 3 (15.0%) 82 (24.4%) 38 (23.5%)
## 6 0 (0.00%) 2 (0.60%) 1 (0.62%)
## occupation: .
## 1 2 (10.0%) 44 (13.0%) 27 (16.7%)
## 2 2 (10.0%) 50 (14.8%) 20 (12.3%)
## 3 4 (20.0%) 64 (18.9%) 31 (19.1%)
## 4 3 (15.0%) 31 (9.17%) 13 (8.02%)
## 5 1 (5.00%) 22 (6.51%) 7 (4.32%)
## 6 0 (0.00%) 1 (0.30%) 1 (0.62%)
## 7 2 (10.0%) 14 (4.14%) 11 (6.79%)
## 8 0 (0.00%) 1 (0.30%) 2 (1.23%)
## 9 6 (30.0%) 111 (32.8%) 50 (30.9%)
## alcol 0.40 (0.50) 0.48 (0.50) 0.39 (0.49) 0.169
## smok 0.30 (0.47) 0.44 (0.50) 0.51 (0.50) 0.107
## v1hba1c 6.14 (0.25) 5.47 (0.29) 5.78 (0.27) <0.001
## v1glucose 5.59 (0.54) 4.92 (0.43) 5.13 (0.44) <0.001
## v2hba1c 6.98 (0.70) 5.35 (0.27) 5.91 (0.24) <0.001
## v2glucose 6.89 (1.47) 4.84 (0.41) 5.24 (0.57) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
women = subset(dm, sex == "F")
createTable(compareGroups(v2_dm ~ sex + age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcol + smok + v1hba1c + v1glucose + v2hba1c + v2glucose, data = women))
## Warning in cor(as.integer(x), as.integer(y)): the standard deviation is zero
##
## --------Summary descriptives table by 'v2_dm'---------
##
## ______________________________________________________________
## Diabetes Normal Pre-Diabetes p.overall
## N=54 N=830 N=328
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## sex: F 54 (100%) 830 (100%) 328 (100%) .
## age 58.6 (7.75) 54.3 (8.91) 59.1 (9.12) <0.001
## weight 56.8 (8.54) 52.6 (7.67) 54.9 (7.98) <0.001
## height 152 (5.01) 153 (5.18) 152 (5.17) 0.259
## bmi 24.7 (3.05) 22.5 (2.91) 23.6 (3.11) <0.001
## bmigr: .
## Normal 14 (25.9%) 438 (52.8%) 143 (43.6%)
## Obese 25 (46.3%) 142 (17.1%) 94 (28.7%)
## Overweight 14 (25.9%) 201 (24.2%) 82 (25.0%)
## Underweight 1 (1.85%) 49 (5.90%) 9 (2.74%)
## wc 83.8 (7.38) 78.6 (7.86) 83.0 (7.47) <0.001
## hip 93.8 (8.90) 92.3 (6.13) 93.7 (7.05) 0.001
## whr 0.89 (0.05) 0.85 (0.07) 0.89 (0.06) <0.001
## sbp 118 (14.0) 118 (15.8) 121 (15.5) 0.016
## dbp 78.2 (10.7) 76.0 (10.5) 77.4 (10.2) 0.064
## hypertension: 0.134
## No 36 (69.2%) 626 (78.8%) 233 (74.9%)
## Yes 16 (30.8%) 168 (21.2%) 78 (25.1%)
## education: .
## 1 2 (3.77%) 175 (21.1%) 80 (24.5%)
## 2 21 (39.6%) 195 (23.5%) 77 (23.5%)
## 3 25 (47.2%) 246 (29.6%) 100 (30.6%)
## 4 2 (3.77%) 46 (5.54%) 16 (4.89%)
## 5 3 (5.66%) 157 (18.9%) 47 (14.4%)
## 6 0 (0.00%) 11 (1.33%) 7 (2.14%)
## occupation: .
## 1 7 (13.0%) 126 (15.2%) 45 (13.7%)
## 2 1 (1.85%) 110 (13.3%) 25 (7.62%)
## 3 8 (14.8%) 85 (10.2%) 48 (14.6%)
## 4 2 (3.70%) 30 (3.61%) 21 (6.40%)
## 5 3 (5.56%) 42 (5.06%) 11 (3.35%)
## 6 22 (40.7%) 287 (34.6%) 116 (35.4%)
## 7 5 (9.26%) 30 (3.61%) 30 (9.15%)
## 8 0 (0.00%) 3 (0.36%) 0 (0.00%)
## 9 6 (11.1%) 117 (14.1%) 32 (9.76%)
## alcol 0.04 (0.19) 0.03 (0.17) 0.02 (0.13) 0.443
## smok 0.00 (0.00) 0.01 (0.09) 0.00 (0.06) 0.493
## v1hba1c 6.10 (0.27) 5.46 (0.28) 5.81 (0.30) <0.001
## v1glucose 5.65 (0.60) 4.83 (0.38) 5.21 (0.48) <0.001
## v2hba1c 6.87 (0.98) 5.30 (0.28) 5.91 (0.25) <0.001
## v2glucose 6.54 (1.39) 4.79 (0.40) 5.39 (0.57) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
p = ggplot(data = dm, aes(x = v1hba1c, y = v2hba1c, col = sex))
p1 = p + geom_point() + stat_smooth() +
labs(x = "HbA1c (%) measured at visit 1", y = "HbA1c (%) measured at visit 2", title = "Correlation of HbA1c") + theme(legend.position="none")
p1
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
p = ggplot(data = dm, aes(x = v1glucose, y = v2glucose, col = sex))
p2 = p + geom_point() + stat_smooth() +
labs(x = "Blood glucose (mmol/dL) measured at visit 1", y = "Blood glucose (mmol/dL) measured at visit 2", title = "Correlation of blood glucose") + theme(legend.position="none")
p2
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
grid.arrange(p1, p2, nrow = 1)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
dm$v2_dm2 = factor(dm$v2_dm, levels = c("Normal", "Pre-Diabetes", "Diabetes"))
table(dm$v1_dm, dm$v2_dm2)
##
## Normal Pre-Diabetes Diabetes
## Normal 980 181 4
## Pre-Diabetes 188 309 70
options(digits = 4)
prop.table(table(dm$v1_dm, dm$v2_dm2))
##
## Normal Pre-Diabetes Diabetes
## Normal 0.565820 0.104503 0.002309
## Pre-Diabetes 0.108545 0.178406 0.040416
table(men$v1_dm, men$v2_dm)
##
## Diabetes Normal Pre-Diabetes
## Normal 0 279 60
## Pre-Diabetes 20 59 102
prop.table(table(men$v1_dm, men$v2_dm))
##
## Diabetes Normal Pre-Diabetes
## Normal 0.00000 0.53654 0.11538
## Pre-Diabetes 0.03846 0.11346 0.19615
table(women$v1_dm, women$v2_dm)
##
## Diabetes Normal Pre-Diabetes
## Normal 4 701 121
## Pre-Diabetes 50 129 207
prop.table(table(women$v1_dm, women$v2_dm))
##
## Diabetes Normal Pre-Diabetes
## Normal 0.00330 0.57838 0.09983
## Pre-Diabetes 0.04125 0.10644 0.17079
Cox’s PH model with only clinical factor
bma.cox = bic.surv(Surv(fu, dm) ~ age + gender + bmi + whr + sbp + dbp + alcol + smok, data = dm)
summary(bma.cox)
##
## Call:
## bic.surv.formula(f = Surv(fu, dm) ~ age + gender + bmi + whr + sbp + dbp + alcol + smok, data = dm)
##
##
## 32 models were selected
## Best 5 models (cumulative posterior probability = 0.518 ):
##
## p!=0 EV SD model 1 model 2 model 3 model 4
## age 40.8 0.009592 0.01378 0.0245 . . .
## gender 7.6 0.000121 0.08815 . . . .
## bmi 100.0 0.159382 0.03367 0.1677 0.1631 0.1503 0.1483
## whr 34.4 1.154292 1.92912 . . 3.3436 3.8102
## sbp 8.2 -0.000560 0.00382 . . . .
## dbp 16.7 0.002633 0.00820 . . . .
## alcol 6.4 -0.002262 0.09214 . . . .
## smok 30.0 -0.204528 0.39517 . . . -0.7400
##
## nVar 2 1 2 3
## BIC -19.2108 -19.0978 -18.3037 -17.6364
## post prob 0.151 0.143 0.096 0.069
## model 5
## age .
## gender .
## bmi 0.1623
## whr .
## sbp .
## dbp .
## alcol .
## smok -0.6258
##
## nVar 2
## BIC -17.3537
## post prob 0.060
Cox’s PH model with all factors
bma.cox2 = bic.surv(Surv(fu, dm) ~ age + gender + bmi + whr + sbp + dbp + alcol + smok + v1hba1c_sd + v1glucose_sd, data = dm)
summary(bma.cox2)
##
## Call:
## bic.surv.formula(f = Surv(fu, dm) ~ age + gender + bmi + whr + sbp + dbp + alcol + smok + v1hba1c_sd + v1glucose_sd, data = dm)
##
##
## 35 models were selected
## Best 5 models (cumulative posterior probability = 0.454 ):
##
## p!=0 EV SD model 1 model 2 model 3
## age 20.7 -0.004317 0.01058 . . .
## gender 15.1 0.064369 0.20853 . . .
## bmi 34.7 0.020960 0.03464 . . 0.0598
## whr 10.6 0.241602 0.99038 . . .
## sbp 7.6 -0.000521 0.00288 . . .
## dbp 5.2 0.000238 0.00298 . . .
## alcol 9.5 0.035633 0.16362 . . .
## smok 55.4 -0.510119 0.58411 . -0.7837 .
## v1hba1c_sd 100.0 1.286794 0.15820 1.2540 1.3022 1.2347
## v1glucose_sd 100.0 0.629892 0.09964 0.6535 0.6219 0.6382
##
## nVar 2 3 3
## BIC -197.8753 -197.5380 -196.8032
## post prob 0.136 0.115 0.080
## model 4 model 5
## age . .
## gender . 0.4882
## bmi 0.0629 .
## whr . .
## sbp . .
## dbp . .
## alcol . .
## smok -0.8117 -1.1789
## v1hba1c_sd 1.2876 1.3134
## v1glucose_sd 0.6088 0.6296
##
## nVar 4 4
## BIC -196.7903 -195.5943
## post prob 0.079 0.044
Only CLINICAL candidate variables
options(digits = 8)
bma.0 = bic.mlogit(v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + alcol + smok, data = dm, verbose = T, approx = F, sep = '')
##
## Begg & Gray approximation started.
## 9 variables considered.
## 11 models initially selected.
## Estimating MNL coefficients for 11 models ... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
## Estimation finished.
## Final number of models: 2
summary(bma.0)
##
## Call:
## bic.mlogit(f = v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + alcol + smok, data = dm, sep = "", approx = F, verbose = T)
##
##
## 2 models were selected
## Best 2 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -9.994105 0.8194031 -9.986120 -10.092843
## Intercept.Diabetes 100.0 -11.884503 0.8325682 -11.876522 -11.983176
## age 100.0 0.041160 0.0060162 0.041254 0.039989
## bmi 100.0 0.110400 0.0189029 0.110459 0.109676
## whr 100.0 4.797263 0.8660483 4.777527 5.041291
## sbp 0.0 0.000000 0.0000000 . .
## dbp 0.0 0.000000 0.0000000 . .
## alcol 7.5 -0.017929 0.0759951 . -0.239609
## smok 0.0 0.000000 0.0000000 . .
## gender 0.0 0.000000 0.0000000 . .
##
## nVar 4 5
## BIC 2494.845822 2499.875475
## post prob 0.925 0.075
##
## MNL specification:
## ==================
## Response variable: v2_dm2
## Base choice name: Normal
## Base choice index: 1
## Frequency of alternatives:
## Normal Pre-Diabetes Diabetes
## 1168 490 74
##
## Equations:
## ----------
## alternative intercept 1 2 3 4 5 6 7
## 1 Normal:
## 2 Pre-Diabetes: ascPre-Diabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 3 Diabetes: ascDiabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 8
## 1
## 2 + gender
## 3 + gender
imageplot.mlogit(bma.0)
Candidate variables include both HbA1c and blood glucose at baseline
options(digits = 8)
bma.1 = bic.mlogit(v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + v1hba1c_sd + v1glucose_sd + alcol + smok, data = dm, verbose = T, approx = F, sep = '')
##
## Begg & Gray approximation started.
## 11 variables considered.
## 19 models initially selected.
## Estimating MNL coefficients for 19 models ... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
## Estimation finished.
## Final number of models: 4
summary(bma.1)
##
## Call:
## bic.mlogit(f = v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + v1hba1c_sd + v1glucose_sd + alcol + smok, data = dm, sep = "", approx = F, verbose = T)
##
##
## 4 models were selected
## Best 4 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -3.5168e+01 1.7912138 -3.5079e+01 -3.5336e+01
## Intercept.Diabetes 100.0 -3.7059e+01 1.8084676 -3.6969e+01 -3.7227e+01
## age 0.0 0.0000e+00 0.0000000 . .
## bmi 25.0 1.2739e-02 0.0245060 . 5.0997e-02
## whr 100.0 5.5782e+00 1.0225699 5.7412e+00 5.0394e+00
## sbp 3.5 1.8685e-04 0.0012627 . .
## dbp 0.0 0.0000e+00 0.0000000 . .
## alcol 6.4 -1.9285e-02 0.0865625 . .
## smok 0.0 0.0000e+00 0.0000000 . .
## gender 0.0 0.0000e+00 0.0000000 . .
## v1hba1c_sd 100.0 1.3787e+00 0.0835393 1.3819e+00 1.3716e+00
## v1glucose_sd 100.0 5.7577e-01 0.0714069 5.7863e-01 5.6565e-01
##
## nVar 4 5
## BIC 1.9891e+03 1.9910e+03
## post prob 0.651 0.250
## model 3 model 4
## Intercept -3.5191e+01 -3.5585e+01
## Intercept.Diabetes -3.7081e+01 -3.7475e+01
## age . .
## bmi . .
## whr 5.9862e+00 5.6441e+00
## sbp . 5.3074e-03
## dbp . .
## alcol -3.0137e-01 .
## smok . .
## gender . .
## v1hba1c_sd 1.3733e+00 1.3810e+00
## v1glucose_sd 5.8643e-01 5.7548e-01
##
## nVar 5 5
## BIC 1.9938e+03 1.9950e+03
## post prob 0.064 0.035
##
## MNL specification:
## ==================
## Response variable: v2_dm2
## Base choice name: Normal
## Base choice index: 1
## Frequency of alternatives:
## Normal Pre-Diabetes Diabetes
## 1168 490 74
##
## Equations:
## ----------
## alternative intercept 1 2 3 4 5 6 7
## 1 Normal:
## 2 Pre-Diabetes: ascPre-Diabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 3 Diabetes: ascDiabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 8 9 10
## 1
## 2 + gender + v1hba1c_sd + v1glucose_sd
## 3 + gender + v1hba1c_sd + v1glucose_sd
imageplot.mlogit(bma.1)
Candidate variables include only HbA1c at baseline
bma.2 = bic.mlogit(v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + v1hba1c_sd + alcol + smok, data = dm, verbose = T, approx = F, sep = '')
##
## Begg & Gray approximation started.
## 10 variables considered.
## 11 models initially selected.
## Estimating MNL coefficients for 11 models ... 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
## Estimation finished.
## Final number of models: 3
summary(bma.2)
##
## Call:
## bic.mlogit(f = v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + v1hba1c_sd + alcol + smok, data = dm, sep = "", approx = F, verbose = T)
##
##
## 3 models were selected
## Best 3 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -3.2282e+01 1.6539918 -3.2366e+01 -3.1961e+01
## Intercept.Diabetes 100.0 -3.4172e+01 1.6715172 -3.4256e+01 -3.3852e+01
## age 0.0 0.0000e+00 0.0000000 . .
## bmi 73.6 4.7428e-02 0.0334282 6.4541e-02 .
## whr 100.0 5.3513e+00 1.0401619 5.1136e+00 6.0243e+00
## sbp 4.2 2.3038e-04 0.0013801 . .
## dbp 0.0 0.0000e+00 0.0000000 . .
## alcol 0.0 0.0000e+00 0.0000000 . .
## smok 0.0 0.0000e+00 0.0000000 . .
## gender 0.0 0.0000e+00 0.0000000 . .
## v1hba1c_sd 100.0 1.5384e+00 0.0819569 1.5339e+00 1.5513e+00
##
## nVar 4 3
## BIC 2.0492e+03 2.0511e+03
## post prob 0.694 0.264
## model 3
## Intercept -3.2913e+01
## Intercept.Diabetes -3.4803e+01
## age .
## bmi 6.2609e-02
## whr 5.0501e+00
## sbp 5.5288e-03
## dbp .
## alcol .
## smok .
## gender .
## v1hba1c_sd 1.5328e+00
##
## nVar 5
## BIC 2.0548e+03
## post prob 0.042
##
## MNL specification:
## ==================
## Response variable: v2_dm2
## Base choice name: Normal
## Base choice index: 1
## Frequency of alternatives:
## Normal Pre-Diabetes Diabetes
## 1168 490 74
##
## Equations:
## ----------
## alternative intercept 1 2 3 4 5 6 7
## 1 Normal:
## 2 Pre-Diabetes: ascPre-Diabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 3 Diabetes: ascDiabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 8 9
## 1
## 2 + gender + v1hba1c_sd
## 3 + gender + v1hba1c_sd
imageplot.mlogit(bma.2)
dm$age.5 = dm$age/5
m.0 = multinom(v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = dm)
## # weights: 15 (8 variable)
## initial value 1902.796484
## iter 10 value 1228.282704
## final value 1225.369339
## converged
summary(m.0)
## Call:
## multinom(formula = v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = dm)
##
## Coefficients:
## (Intercept) age.5 bmi_sd whr_sd
## Pre-Diabetes -9.7388412 0.20254667 0.29676635 0.33168713
## Diabetes -13.5624979 0.23016985 0.61174808 0.26604294
##
## Std. Errors:
## (Intercept) age.5 bmi_sd whr_sd
## Pre-Diabetes 0.8508162 0.030780545 0.059650173 0.062154162
## Diabetes 1.8499212 0.064268637 0.117193767 0.132998036
##
## Residual Deviance: 2450.7387
## AIC: 2466.7387
exp(coefficients(m.0))
## (Intercept) age.5 bmi_sd whr_sd
## Pre-Diabetes 5.8948803e-05 1.2245172 1.3455009 1.3933169
## Diabetes 1.2878995e-06 1.2588138 1.8436514 1.3047911
m.1 = multinom(v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = dm)
## # weights: 15 (8 variable)
## initial value 1902.796484
## iter 10 value 1017.520743
## iter 20 value 964.981610
## final value 964.980291
## converged
summary(m.1)
## Call:
## multinom(formula = v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = dm)
##
## Coefficients:
## (Intercept) bmi_sd whr_sd v1hba1c_sd
## Pre-Diabetes -30.654876 0.18017873 0.35073499 1.4391632
## Diabetes -58.676851 0.43134243 0.29994585 2.8546363
##
## Std. Errors:
## (Intercept) bmi_sd whr_sd v1hba1c_sd
## Pre-Diabetes 1.7226142 0.066097338 0.068904381 0.087766606
## Diabetes 4.1534401 0.133893721 0.155945077 0.198697035
##
## Residual Deviance: 1929.9606
## AIC: 1945.9606
exp(coefficients(m.1))
## (Intercept) bmi_sd whr_sd v1hba1c_sd
## Pre-Diabetes 4.8613441e-14 1.1974314 1.4201109 4.2171656
## Diabetes 3.2882683e-26 1.5393226 1.3497857 17.3681200
set.seed(1234)
index <- createDataPartition(dm$dm, p = .60, list = FALSE)
train <- dm[index,]
test <- dm[-index,]
Model with only clinical factors
m.1c <- coxph(Surv(fu, dm) ~ age + bmi_sd, data = dm, id = id)
summary(m.1c)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd, data = dm, id = id)
##
## n= 1732, number of events= 74
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.024545 1.024849 0.011440 2.1455 0.03191 *
## bmi_sd 0.515837 1.675040 0.099129 5.2037 1.954e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0248 0.97575 1.0021 1.0481
## bmi_sd 1.6750 0.59700 1.3793 2.0342
##
## Concordance= 0.692 (se = 0.026 )
## Likelihood ratio test= 27.85 on 2 df, p=9e-07
## Wald test = 30.08 on 2 df, p=3e-07
## Score (logrank) test = 30.09 on 2 df, p=3e-07
zph.m1c <- cox.zph(m.1c)
zph.m1c
## chisq df p
## age 1.5686 1 0.210
## bmi_sd 0.0212 1 0.884
## GLOBAL 1.5686 2 0.456
m.1t <- coxph(Surv(fu, dm) ~ age + bmi_sd, data = train, id = id)
summary(m.1t)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd, data = train, id = id)
##
## n= 1040, number of events= 51
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.02681 1.02717 0.01445 1.8553 0.06355 .
## bmi_sd 0.57384 1.77508 0.11953 4.8010 1.579e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0272 0.97355 0.99849 1.0567
## bmi_sd 1.7751 0.56336 1.40435 2.2437
##
## Concordance= 0.704 (se = 0.031 )
## Likelihood ratio test= 23.42 on 2 df, p=8e-06
## Wald test = 25.46 on 2 df, p=3e-06
## Score (logrank) test = 25.33 on 2 df, p=3e-06
base.mean.1 <- expand.grid(age = 56, bmi_sd = 7.492)
base.mean.1
## age bmi_sd
## 1 56 7.492
base.model.1 <- survfit(m.1t, newdata = base.mean.1)
options(digits = 10)
summary(base.model.1, time = c(2))
## Call: survfit(formula = m.1t, newdata = base.mean.1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 2 142 45 0.928733 0.0129263 0.90374 0.954417
test$pi_dm.1 = 0.02681*(test$age - 56) + 0.57384383*(test$bmi_sd - 7.492);
test$dm.1 = 1 - 0.928733**exp(test$pi_dm.1)
Model with laboratory factors
m.2c <- coxph(Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = dm, id = id)
summary(m.2c)
## Call:
## coxph(formula = Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = dm,
## id = id)
##
## n= 1732, number of events= 74
##
## coef exp(coef) se(coef) z Pr(>|z|)
## v1hba1c_sd 1.25800773 3.51840489 0.14994713 8.38968 < 2.22e-16 ***
## v1glucose_sd 0.65729471 1.92956523 0.09782546 6.71906 1.8291e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## v1hba1c_sd 3.518405 0.2842197 2.622470 4.720424
## v1glucose_sd 1.929565 0.5182515 1.592906 2.337376
##
## Concordance= 0.918 (se = 0.013 )
## Likelihood ratio test= 207.49 on 2 df, p=<2e-16
## Wald test = 176.41 on 2 df, p=<2e-16
## Score (logrank) test = 236.47 on 2 df, p=<2e-16
zph.m2c <- cox.zph(m.2c)
zph.m2c
## chisq df p
## v1hba1c_sd 0.0204222 1 0.88636
## v1glucose_sd 0.8438117 1 0.35831
## GLOBAL 0.9881972 2 0.61012
m.2t <- coxph(Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = train, id = id)
summary(m.2t)
## Call:
## coxph(formula = Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = train,
## id = id)
##
## n= 1040, number of events= 51
##
## coef exp(coef) se(coef) z Pr(>|z|)
## v1hba1c_sd 1.1841089 3.2677737 0.1791379 6.61004 3.8421e-11 ***
## v1glucose_sd 0.6838519 1.9814956 0.1181161 5.78966 7.0530e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## v1hba1c_sd 3.267774 0.3060187 2.300221 4.642313
## v1glucose_sd 1.981496 0.5046693 1.572000 2.497662
##
## Concordance= 0.925 (se = 0.012 )
## Likelihood ratio test= 147.71 on 2 df, p=<2e-16
## Wald test = 126.65 on 2 df, p=<2e-16
## Score (logrank) test = 175.2 on 2 df, p=<2e-16
base.mean.2 <- expand.grid(v1hba1c_sd = 16.4225, v1glucose_sd = 10.597268)
base.mean.2
## v1hba1c_sd v1glucose_sd
## 1 16.4225 10.597268
base.model.2 <- survfit(m.2t, newdata = base.mean.2)
options(digits = 10)
summary(base.model.2, time = c(2))
## Call: survfit(formula = m.2t, newdata = base.mean.2)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 2 142 45 0.978818 0.0066789 0.965815 0.991996
test$pi_dm.2 = 1.1841089*(test$v1hba1c_sd - 16.4225) + 0.6838519*(test$v1glucose_sd - 10.597268);
test$dm.2 = 1 - 0.928733**exp(test$pi_dm.2)
Model with both clinical and laboratory factors
m.3c <- coxph(Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd, data = dm, id = id)
summary(m.3c)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd,
## data = dm, id = id)
##
## n= 1732, number of events= 74
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age -0.01687412 0.98326745 0.01370735 -1.23103 0.218313
## bmi_sd 0.16944731 1.18464992 0.09940846 1.70456 0.088277 .
## v1hba1c_sd 1.28035967 3.59793355 0.15427187 8.29937 < 2.22e-16 ***
## v1glucose_sd 0.63319981 1.88362820 0.09681953 6.54000 6.1519e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 0.9832674 1.0170173 0.9572028 1.010042
## bmi_sd 1.1846499 0.8441312 0.9749299 1.439483
## v1hba1c_sd 3.5979336 0.2779373 2.6591123 4.868213
## v1glucose_sd 1.8836282 0.5308903 1.5580531 2.277236
##
## Concordance= 0.923 (se = 0.012 )
## Likelihood ratio test= 212.29 on 4 df, p=<2e-16
## Wald test = 177.31 on 4 df, p=<2e-16
## Score (logrank) test = 248.86 on 4 df, p=<2e-16
zph.m3c <- cox.zph(m.3c)
zph.m3c
## chisq df p
## age 2.6277983 1 0.10501
## bmi_sd 0.0318263 1 0.85841
## v1hba1c_sd 0.0169519 1 0.89641
## v1glucose_sd 0.9191425 1 0.33770
## GLOBAL 3.9947878 4 0.40671
m.3t <- coxph(Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd, data = train, id = id)
summary(m.3t)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd,
## data = train, id = id)
##
## n= 1040, number of events= 51
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age -0.02287697 0.97738272 0.01800447 -1.27063 0.20386
## bmi_sd 0.19594817 1.21646386 0.11935610 1.64171 0.10065
## v1hba1c_sd 1.23301983 3.43157667 0.18733509 6.58189 4.6449e-11 ***
## v1glucose_sd 0.63200459 1.88137820 0.11576089 5.45957 4.7729e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 0.9773827 1.0231407 0.9434942 1.012488
## bmi_sd 1.2164639 0.8220548 0.9627269 1.537076
## v1hba1c_sd 3.4315767 0.2914112 2.3770252 4.953973
## v1glucose_sd 1.8813782 0.5315252 1.4994786 2.360543
##
## Concordance= 0.932 (se = 0.01 )
## Likelihood ratio test= 152.11 on 4 df, p=<2e-16
## Wald test = 129.87 on 4 df, p=<2e-16
## Score (logrank) test = 187.38 on 4 df, p=<2e-16
base.mean.3 <- expand.grid(age = 56, bmi_sd = 7.4916, v1hba1c_sd = 16.4225, v1glucose_sd = 10.597268)
base.mean.3
## age bmi_sd v1hba1c_sd v1glucose_sd
## 1 56 7.4916 16.4225 10.597268
base.model.3 <- survfit(m.3t, newdata = base.mean.3)
options(digits = 10)
summary(base.model.3, time = c(2))
## Call: survfit(formula = m.3t, newdata = base.mean.3)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 2 142 45 0.978959 0.00670121 0.965912 0.992181
test$pi_dm.3 = -0.02287697*(test$age - 56) + 0.19594817*(test$bmi_sd - 7.4916) + 1.233*(test$v1hba1c_sd - 16.4225) + 0.632*(test$v1glucose_sd - 10.597268);
test$dm.3 = 1 - 0.978959**exp(test$pi_dm.3)
par(mfrow=c(1,1))
val.prob.ci.2(p = test$dm.1, y = test$dm, pl = T, logistic.cal = F, g = 10, smooth = "rcs",
xlab="2-year predicted risk of diabetes", ylab="Observed risk of diabetes until 2 years of follow-up", ylim = c(0, 0.4), xlim = c(0,0.4), cex.axis = .8, cex.lab = 1,
statloc = c(-0.01, 0.36), cex = .8)
## Call:
## val.prob.ci.2(p = test$dm.1, y = test$dm, pl = T, smooth = "rcs",
## logistic.cal = F, xlab = "2-year predicted risk of diabetes",
## ylab = "Observed risk of diabetes until 2 years of follow-up",
## xlim = c(0, 0.4), ylim = c(0, 0.4), g = 10, statloc = c(-0.01,
## 0.36), cex = 0.8, cex.axis = 0.8, cex.lab = 1)
##
## A 95% confidence interval is given for the calibration intercept, calibration slope and c-statistic.
##
## Dxy C (ROC) R2 D
## 3.738220576e-01 6.869110288e-01 3.244798335e-02 6.796920742e-03
## D:Chi-sq D:p U U:Chi-sq
## 5.703469153e+00 1.693141531e-02 4.986843954e-02 3.650896016e+01
## U:p Q Brier Intercept
## 1.180810016e-08 -4.307151880e-02 3.787027867e-02 -1.080210745e+00
## Slope Emax Brier scaled
## 6.931305350e-01 4.839648430e-01 -1.785736741e-01
title(main= "Prediction model with age and BMI", adj = 0, cex.main = 1.4)
val.prob.ci.2(p = test$dm.2, y = test$dm, pl = T, logistic.cal = F, g = 10, smooth = "rcs",
xlab="2-year predicted risk", ylab="Observed risk of diabetes until 2 years of follow-up", ylim = c(0, 0.4), xlim = c(0,0.4), cex.axis = .8, cex.lab = 1,
statloc = c(-0.01, 0.36), cex = .8)
## singular information matrix in lrm.fit (rank= 4 ). Offending variable(s):
##
## Warning in value[[3L]](cond): The number of knots led to estimation problems,
## nk will be set to 4.
## singular information matrix in lrm.fit (rank= 3 ). Offending variable(s):
##
## Warning in value[[3L]](cond): Nk 4 also led to estimation problems, nk will be
## set to 3.
## Call:
## val.prob.ci.2(p = test$dm.2, y = test$dm, pl = T, smooth = "rcs",
## logistic.cal = F, xlab = "2-year predicted risk", ylab = "Observed risk of diabetes until 2 years of follow-up",
## xlim = c(0, 0.4), ylim = c(0, 0.4), g = 10, statloc = c(-0.01,
## 0.36), cex = 0.8, cex.axis = 0.8, cex.lab = 1)
##
## A 95% confidence interval is given for the calibration intercept, calibration slope and c-statistic.
##
## Dxy C (ROC) R2 D
## 7.710404887e-01 8.855202444e-01 2.987327889e-01 7.713102818e-02
## D:Chi-sq D:p U U:Chi-sq
## 5.437467150e+01 1.656452753e-13 2.951573938e-01 2.062489165e+02
## U:p Q Brier Intercept
## 0.000000000e+00 -2.180263656e-01 6.573604465e-02 -2.945574454e+00
## Slope Emax Brier scaled
## 5.365055337e-01 7.365272791e-01 -1.045793546e+00
title(main= "Prediction model with HbA1c and blood glucose", adj = 0, cex.main = 1.4)
val.prob.ci.2(p = test$dm.3, y = test$dm, pl = T, logistic.cal = F, g = 10, smooth = "rcs",
xlab="2-year predicted risk", ylab="Observed risk of diabetes until 2 years of follow-up", ylim = c(0, 0.45), xlim = c(0,0.45), cex.axis = .8, cex.lab = 1,
statloc = c(-0.01, 0.4), cex = .8)
## singular information matrix in lrm.fit (rank= 4 ). Offending variable(s):
##
## Warning in value[[3L]](cond): The number of knots led to estimation problems,
## nk will be set to 4.
## singular information matrix in lrm.fit (rank= 3 ). Offending variable(s):
##
## Warning in value[[3L]](cond): Nk 4 also led to estimation problems, nk will be
## set to 3.
## Call:
## val.prob.ci.2(p = test$dm.3, y = test$dm, pl = T, smooth = "rcs",
## logistic.cal = F, xlab = "2-year predicted risk", ylab = "Observed risk of diabetes until 2 years of follow-up",
## xlim = c(0, 0.45), ylim = c(0, 0.45), g = 10, statloc = c(-0.01,
## 0.4), cex = 0.8, cex.axis = 0.8, cex.lab = 1)
##
## A 95% confidence interval is given for the calibration intercept, calibration slope and c-statistic.
##
## Dxy C (ROC) R2 D
## 7.579125236e-01 8.789562618e-01 3.169713340e-01 8.213434680e-02
## D:Chi-sq D:p U U:Chi-sq
## 5.783696799e+01 2.842170943e-14 3.083897753e-02 2.334057245e+01
## U:p Q Brier Intercept
## 8.543957707e-06 5.129536927e-02 3.152862486e-02 -1.048942448e+00
## Slope Emax Brier scaled
## 8.336952192e-01 3.379840654e-01 1.878706603e-02
title(main= "Prediction model with age, BMI, HbA1c, blood glucose", adj = 0, cex.main = 1.4)
Model with only clinical factors
set.seed(1234)
index <- createDataPartition(dm$v2_dm2, p = .60, list = FALSE)
train <- dm[index,]
test <- dm[-index,]
m.0 = multinom(v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = train)
## # weights: 15 (8 variable)
## initial value 1142.556780
## iter 10 value 760.030626
## iter 20 value 732.653141
## final value 732.653051
## converged
summary(m.0)
## Call:
## multinom(formula = v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = train)
##
## Coefficients:
## (Intercept) age.5 bmi_sd whr_sd
## Pre-Diabetes -10.42990133 0.1885760756 0.3490005494 0.3661581368
## Diabetes -15.08334512 0.2640286679 0.5257510802 0.4032118299
##
## Std. Errors:
## (Intercept) age.5 bmi_sd whr_sd
## Pre-Diabetes 1.130854325 0.04042515702 0.07769270329 0.08059446156
## Diabetes 2.431963874 0.08195202042 0.15492848508 0.17222721423
##
## Residual Deviance: 1465.306102
## AIC: 1481.306102
Predicting diabetes status in the training data
train$v2_dm2Predicted = predict(m.0, newdata = train, "class")
tab.1 = table(train$v2_dm2, train$v2_dm2Predicted)
tab.1
##
## Normal Pre-Diabetes Diabetes
## Normal 661 40 0
## Pre-Diabetes 251 43 0
## Diabetes 35 10 0
# Calculate accuracy
round((sum(diag(tab.1))/sum(tab.1))*100,2)
## [1] 67.69
Predicting diabetes status in the validation data
test$v2_dm2Predicted = predict(m.0, newdata = test, "class")
tab.2 = table(test$v2_dm2, test$v2_dm2Predicted)
tab.2
##
## Normal Pre-Diabetes Diabetes
## Normal 435 32 0
## Pre-Diabetes 160 36 0
## Diabetes 25 4 0
# Calculate accuracy
round((sum(diag(tab.2))/sum(tab.2))*100,2)
## [1] 68.06
set.seed(1234)
index <- createDataPartition(dm$v2_dm2, p = .60, list = FALSE)
train <- dm[index,]
test <- dm[-index,]
m.2 = multinom(v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = train)
## # weights: 15 (8 variable)
## initial value 1142.556780
## iter 10 value 665.097021
## iter 20 value 582.764135
## iter 30 value 577.372231
## final value 577.321414
## converged
summary(m.2)
## Call:
## multinom(formula = v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = train)
##
## Coefficients:
## (Intercept) bmi_sd whr_sd v1hba1c_sd
## Pre-Diabetes -30.46988444 0.2062987029 0.3662155724 1.401559134
## Diabetes -61.45559709 0.2785279610 0.4125404774 2.987716886
##
## Std. Errors:
## (Intercept) bmi_sd whr_sd v1hba1c_sd
## Pre-Diabetes 2.227319152 0.08521027042 0.08849211709 0.1145243961
## Diabetes 5.530980974 0.18304988845 0.21077401408 0.2634449620
##
## Residual Deviance: 1154.642827
## AIC: 1170.642827
Predicting diabetes status in the training data
train$v2_dm2Predicted = predict(m.1, newdata = train, "class")
tab.1 = table(train$v2_dm2, train$v2_dm2Predicted)
tab.1
##
## Normal Pre-Diabetes Diabetes
## Normal 633 68 0
## Pre-Diabetes 149 142 3
## Diabetes 5 33 7
# Calculate accuracy
round((sum(diag(tab.1))/sum(tab.1))*100,2)
## [1] 75.19
Predicting diabetes status in the validation data
test$v2_dm2Predicted = predict(m.1, newdata = test, "class")
tab.2 = table(test$v2_dm2, test$v2_dm2Predicted)
tab.2
##
## Normal Pre-Diabetes Diabetes
## Normal 435 31 1
## Pre-Diabetes 102 93 1
## Diabetes 7 20 2
# Calculate accuracy
round((sum(diag(tab.2))/sum(tab.2))*100,2)
## [1] 76.59
The multinomial model for predicting incident diabetes using both blood glucose/hba1c
m.1b = multinom(v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd, data = dm)
## # weights: 15 (8 variable)
## initial value 1902.796484
## iter 10 value 988.587051
## iter 20 value 927.112728
## iter 30 value 927.091307
## final value 927.091139
## converged
summary(m.1b)
## Call:
## multinom(formula = v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd,
## data = dm)
##
## Coefficients:
## (Intercept) whr_sd v1hba1c_sd v1glucose_sd
## Pre-Diabetes -33.43421689 0.3865543270 1.312320893 0.5416331779
## Diabetes -63.42869237 0.4172752754 2.511002908 1.1228024646
##
## Std. Errors:
## (Intercept) whr_sd v1hba1c_sd v1glucose_sd
## Pre-Diabetes 1.835680257 0.06684803359 0.08937472783 0.07273722017
## Diabetes 4.383493139 0.16107238679 0.20362448305 0.14381554617
##
## Residual Deviance: 1854.182277
## AIC: 1870.182277
exp(coefficients(m.1b))
## (Intercept) whr_sd v1hba1c_sd v1glucose_sd
## Pre-Diabetes 3.017894822e-15 1.471900359 3.714785333 1.718811696
## Diabetes 2.839676781e-28 1.517820274 12.317276969 3.073455395
Validation of the model
m.2b = multinom(v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd, data = train)
## # weights: 15 (8 variable)
## initial value 1142.556780
## iter 10 value 613.933415
## iter 20 value 562.073459
## iter 30 value 562.062434
## final value 562.060955
## converged
summary(m.2b)
## Call:
## multinom(formula = v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd,
## data = train)
##
## Coefficients:
## (Intercept) whr_sd v1hba1c_sd v1glucose_sd
## Pre-Diabetes -32.38836522 0.4045031535 1.291038255 0.4531038535
## Diabetes -64.72270809 0.4784587070 2.650850767 0.9372892365
##
## Std. Errors:
## (Intercept) whr_sd v1hba1c_sd v1glucose_sd
## Pre-Diabetes 2.327002083 0.0864507297 0.1168052759 0.09345426755
## Diabetes 5.699255739 0.2199355502 0.2726657438 0.18403925527
##
## Residual Deviance: 1124.121909
## AIC: 1140.121909
exp(coefficients(m.2b))
## (Intercept) whr_sd v1hba1c_sd v1glucose_sd
## Pre-Diabetes 8.588388979e-15 1.498557762 3.636560275 1.573187559
## Diabetes 7.785473365e-29 1.613585477 14.166085565 2.553051311
Predicting diabetes status in the training data
train$v2_dm2Predictedb = predict(m.1b, newdata = train, "class")
tab.1b = table(train$v2_dm2, train$v2_dm2Predictedb)
tab.1b
##
## Normal Pre-Diabetes Diabetes
## Normal 635 66 0
## Pre-Diabetes 154 132 8
## Diabetes 3 29 13
# Calculate accuracy
round((sum(diag(tab.1b))/sum(tab.1b))*100,2)
## [1] 75
Predicting diabetes status in the validation data
test$v2_dm2Predictedb = predict(m.1b, newdata = test, "class")
tab.2b = table(test$v2_dm2, test$v2_dm2Predictedb)
tab.2b
##
## Normal Pre-Diabetes Diabetes
## Normal 426 41 0
## Pre-Diabetes 91 102 3
## Diabetes 4 19 6
# Calculate accuracy
round((sum(diag(tab.2b))/sum(tab.2b))*100,2)
## [1] 77.17