Incident diabetes in Vietnam- A prospective cohort study

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(compareGroups)
library(mlogitBMA)
## Loading required package: BMA
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.7-2)
## Loading required package: abind
## Loading required package: maxLik
## Loading required package: miscTools
## 
## Attaching package: 'miscTools'
## The following objects are masked from 'package:robustbase':
## 
##     colMedians, rowMedians
## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
library(ggplot2)
library(nnet)
library(gridExtra)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
library(BMA)
library(survival)
library(tableone)
library(prodlim)
#library(riskRegression)
library(CalibrationCurves)
## Loading required package: rms
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     format.pval, units

Data set-up

#dm = read.csv("C:\\Thach\\Research projects\\DM in Vietnam\\dm_vos2.csv")
dm = read.csv("C:\\Thach\\Research projects\\Incident DM in Vietnam\\vos_dm3.csv")
head(dm)
##   id age sex  ageg  bmi height weight   wc hip       whr sbp dbp edu occu
## 1  1  77   F   70+ 18.5  156.0   45.0 78.0  88 0.8863636 100  70   1    9
## 2  2  49   F 40-49 25.4  152.3   59.0 82.0 100 0.8200000 100  60   2    4
## 3  5  56   F 50-59 29.3  154.6   70.0 95.5 107 0.8925233 120  70   5    3
## 4  8  55   F 50-59 23.9  153.0   56.0 79.0 105 0.7523810 110  70   2    9
## 5 10  49   F 40-49 27.9  149.0   62.0 92.0 100 0.9200000 100  80   4    9
## 6 14  55   M 50-59 26.0  157.4   64.5 86.0  95 0.9052632 100  70   4    5
##   marital alcol smok diabdrug v1ldl v1tg v1tc v1hba1c      v1diab1 v1glucose
## 1       1     0    0        0  4.07 2.54 6.42     5.5       Normal       5.2
## 2       1     0    0        0  4.05 1.28 5.53     5.4       Normal       4.9
## 3       1     0    0        0  1.83 1.50 3.30     5.8 Pre-Diabetes       4.8
## 4       1     0    0        0  2.95 2.07 4.33     5.8 Pre-Diabetes       4.6
## 5       3     0    0        0  3.66 1.00 4.79     5.4       Normal       5.0
## 6       1     0    0        0  2.32 2.45 4.24     5.5       Normal       4.2
##   v1diab2 hypertension     obese1     obese2     fnbmd totalhipbmd     lsbmd
## 1  Normal           No     Normal     Normal 0.5132332   0.5679375 1.1556660
## 2  Normal           No Overweight Overweight 0.6875569   0.7404673 0.9151062
## 3  Normal           No Overweight      Obese 0.7539456   0.9021965 0.9520324
## 4  Normal           No     Normal Overweight 0.6967018   0.8955486 0.9571054
## 5  Normal           No Overweight      Obese 0.8427472   0.9672657 1.1989470
## 6  Normal           No Overweight Overweight 0.6906412   0.8178084 0.8678460
##       wbbmd  trunkfat trunklean trunkmass trunkpfat wbtotfat wbtotlean
## 1 0.9261586  7597.105  12985.37  20582.48  36.91055 16493.88  27978.42
## 2 1.0240500 11352.020  15868.70  27220.72  41.70361 25506.65  33337.02
## 3 1.0469110 16617.950  18555.30  35173.25  47.24599 31672.95  39431.90
## 4 1.0531350 14999.500  15177.67  30177.17  49.70480 27544.03  29019.09
## 5 1.1842280 12851.210  17331.04  30182.25  42.57869 27665.33  35057.20
## 6 0.9932672 11829.440  19888.04  31717.49  37.29628 21433.45  42860.67
##   wbtotmass wbtotpfat   datedxa1 v2glucose v2hba1c v2ldl v2tc v2tg      v2diab1
## 1  44472.30  37.08798 11/07/2015       4.6     5.7  5.49 7.70 2.59 Pre-Diabetes
## 2  58843.67  43.34647 11/07/2015       5.0     4.8  3.51 5.08 1.07       Normal
## 3  71104.84  44.54401 11/07/2015       5.1     5.4  1.83 3.36 0.97       Normal
## 4  56563.12  48.69609 26/07/2015       5.1     5.3  4.06 5.51 1.90       Normal
## 5  62722.54  44.10748 25/07/2015       4.8     5.2  3.58 5.10 0.78       Normal
## 6  64294.12  33.33656 26/07/2015       4.9     5.3  3.04 5.15 2.89       Normal
##   v2diab2 antdm trunk_fat trunk_lean trunk_mass trunk_pfat wbtot_fat wbtot_lean
## 1  Normal     0  9302.805   13458.35   22761.15   40.87141  18962.98   29166.12
## 2  Normal     0 11341.490   14956.16   26297.65   43.12740  25474.24   32104.24
## 3  Normal     0 17101.520   17677.70   34779.21   49.17166  31657.79   36484.09
## 4  Normal     0 15515.730   15261.40   30777.14   50.41318  29760.77   29023.54
## 5  Normal     0 14381.650   18145.18   32526.84   44.21473  29522.69   34710.20
## 6  Normal     0 11767.680   18895.61   30663.29   38.37709  22225.49   41308.70
##   wbtot_mass wbtot_pfat   datedxa2 duration identifier1 patient_patient_key
## 1   48129.09   39.40023 22/10/2017     2.28           1  1JZ7110T5207200405
## 2   57578.48   44.24264 25/06/2017     1.96           2  1JZ7110UXR09200405
## 3   68141.89   46.45864 25/06/2017     1.96           5  1JZ7110WOH0B200405
## 4   58784.31   50.62707 25/06/2017     1.92           8  1JZ7260QOR08200405
## 5   64232.89   45.96195 25/06/2017     1.92          10  1JZ7250P2K05200405
## 6   63534.19   34.98193 25/06/2017     1.92          14  1JZ7260S5J0A200405
##   scananalysis_patient_key scananalysis_scanid  scan_date analysis_date
## 1       1JZ7110T5207200405       A1JZ7110TPU1C 11/07/2015     3/10/2015
## 2       1JZ7110UXR09200405       A1JZ7110VFI1M 11/07/2015    11/07/2015
## 3       1JZ7110WOH0B200405       A1JZ7110X6Z1W 11/07/2015    11/07/2015
## 4       1JZ7260QOR08200405       A1JZ72600RI16 26/07/2015    26/07/2015
## 5       1JZ7250P2K05200405       A1JZ7250Q1T0P 25/07/2015    25/07/2015
## 6       1JZ7260S5J0A200405       A1JZ7260SFU1E 26/07/2015    26/07/2015
##   wbodycomposition_scanid androidgynoidcomposition_scanid vfat_body_fat
## 1           A1JZ7110TPU1C                   A1JZ7110TPU1C      668.0502
## 2           A1JZ7110VFI1M                   A1JZ7110VFI1M     1153.0930
## 3           A1JZ7110X6Z1W                   A1JZ7110X6Z1W     1703.0580
## 4           A1JZ72600RI16                   A1JZ72600RI16     1428.6060
## 5           A1JZ7250Q1T0P                   A1JZ7250Q1T0P     1410.1150
## 6           A1JZ7260SFU1E                   A1JZ7260SFU1E     1178.2300
##   vfat_body_lean vfat_body_mass vfat_body_pfat subcu_fat_correction gender
## 1       1256.514       1924.564       34.71177             91.54356      0
## 2       1363.755       2516.847       45.81496            126.46710      0
## 3       1695.045       3398.102       50.11791            158.63670      0
## 4       1266.956       2695.562       52.99844            201.82980      0
## 5       1567.519       2977.633       47.35689            192.23010      0
## 6       1641.275       2819.504       41.78854             72.18629      1
##   v1hba1c_sd v1glucose_sd   whr_cm   bmi_sd   whr_sd        v1_dm  v2_dm
## 1   16.17647    11.063830 88.63636 6.016260 13.15079       Normal Normal
## 2   15.88235    10.425530 82.00000 8.260162 12.16617       Normal Normal
## 3   17.05882    10.212770 89.25233 9.528455 13.24219 Pre-Diabetes Normal
## 4   17.05882     9.787234 75.23810 7.772357 11.16292 Pre-Diabetes Normal
## 5   15.88235    10.638300 92.00000 9.073171 13.64985       Normal Normal
## 6   16.17647     8.936170 90.52632 8.455284 13.43120       Normal Normal
##       bmi_gr     date2     date1       fu dm X_st X_d      X_t X_t0
## 1     Normal 22oct2017 11jul2015 2.283368  0    1   0 2.283368    0
## 2      Obese 25jun2017 11jul2015 1.957563  0    1   0 1.957563    0
## 3      Obese 25jun2017 11jul2015 1.957563  0    1   0 1.957563    0
## 4 Overweight 25jun2017 26jul2015 1.916496  0    1   0 1.916496    0
## 5      Obese 25jun2017 25jul2015 1.919233  0    1   0 1.919233    0
## 6      Obese 25jun2017 26jul2015 1.916496  0    1   0 1.916496    0

(1.1) Table 1. Baseline characteristics

dm$bmigr = as.factor(dm$bmi_gr)
dm$education = as.factor(dm$edu)
dm$occupation = as.factor(dm$occu)
dm$alcohol = as.factor(dm$alcol)
dm$smoking = as.factor(dm$smok)

createTable(compareGroups(sex ~ age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcohol + smoking + v1hba1c + v1glucose + v1_dm + v2hba1c + v2glucose + v2_dm, data = dm))
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## 
## --------Summary descriptives table by 'sex'---------
## 
## ___________________________________________________ 
##                       F            M      p.overall 
##                     N=1212       N=520              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## age              55.8 (9.18)  56.2 (9.82)   0.407   
## weight           53.4 (7.89)  61.8 (9.65)  <0.001   
## height            153 (5.17)  163 (6.17)   <0.001   
## bmi              22.9 (3.03)  23.3 (3.16)   0.018   
## bmigr:                                      0.003   
##     Normal       595 (49.1%)  218 (41.9%)           
##     Obese        261 (21.5%)  153 (29.4%)           
##     Overweight   297 (24.5%)  121 (23.3%)           
##     Underweight   59 (4.87%)  28 (5.38%)            
## wc               80.0 (8.02)  84.2 (8.90)  <0.001   
## hip              92.7 (6.57)  92.3 (7.30)   0.301   
## whr              0.86 (0.07)  0.91 (0.06)  <0.001   
## sbp               119 (15.7)  122 (15.2)   <0.001   
## dbp              76.5 (10.5)  79.1 (10.4)  <0.001   
## hypertension:                               0.008   
##     No           895 (77.4%)  355 (71.1%)           
##     Yes          262 (22.6%)  144 (28.9%)           
## education:                                 <0.001   
##     1            257 (21.2%)  74 (14.3%)            
##     2            293 (24.2%)  115 (22.2%)           
##     3            371 (30.7%)  184 (35.5%)           
##     4             64 (5.29%)  19 (3.67%)            
##     5            207 (17.1%)  123 (23.7%)           
##     6             18 (1.49%)   3 (0.58%)            
## occupation:                                   .     
##     1            178 (14.7%)  73 (14.0%)            
##     2            136 (11.2%)  72 (13.8%)            
##     3            141 (11.6%)  99 (19.0%)            
##     4             53 (4.37%)  47 (9.04%)            
##     5             56 (4.62%)  30 (5.77%)            
##     6            425 (35.1%)   2 (0.38%)            
##     7             65 (5.36%)  27 (5.19%)            
##     8             3 (0.25%)    3 (0.58%)            
##     9            155 (12.8%)  167 (32.1%)           
## alcohol:                                   <0.001   
##     0            1178 (97.2%) 288 (55.4%)           
##     1             34 (2.81%)  232 (44.6%)           
## smoking:                                   <0.001   
##     0            1204 (99.3%) 283 (54.4%)           
##     1             8 (0.66%)   237 (45.6%)           
## v1hba1c          5.58 (0.34)  5.59 (0.33)   0.597   
## v1glucose        4.97 (0.48)  5.01 (0.46)   0.078   
## v1_dm:                                      0.251   
##     Normal       826 (68.2%)  339 (65.2%)           
##     Pre-Diabetes 386 (31.8%)  181 (34.8%)           
## v2hba1c          5.54 (0.52)  5.59 (0.48)   0.052   
## v2glucose        5.03 (0.67)  5.04 (0.68)   0.775   
## v2_dm:                                      0.210   
##     Diabetes      54 (4.46%)  20 (3.85%)            
##     Normal       830 (68.5%)  338 (65.0%)           
##     Pre-Diabetes 328 (27.1%)  162 (31.2%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
createTable(compareGroups(v2_dm ~ sex + age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcohol + smoking + v1hba1c + v1glucose + v2hba1c + v2glucose, data = dm))
## 
## --------Summary descriptives table by 'v2_dm'---------
## 
## _______________________________________________________________ 
##                  Diabetes      Normal    Pre-Diabetes p.overall 
##                    N=74        N=1168       N=490               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## sex:                                                    0.210   
##     F           54 (73.0%)  830 (71.1%)  328 (66.9%)            
##     M           20 (27.0%)  338 (28.9%)  162 (33.1%)            
## age             58.9 (8.33) 54.6 (9.11)  58.6 (9.50)   <0.001   
## weight          59.3 (9.06) 54.9 (8.95)  57.9 (9.72)   <0.001   
## height          154 (6.86)   156 (7.10)   156 (7.51)    0.273   
## bmi             24.8 (2.82) 22.6 (2.96)  23.8 (3.15)   <0.001   
## bmigr:                                                    .     
##     Normal      18 (24.3%)  593 (50.8%)  202 (41.2%)            
##     Obese       38 (51.4%)  220 (18.8%)  156 (31.8%)            
##     Overweight  17 (23.0%)  282 (24.1%)  119 (24.3%)            
##     Underweight  1 (1.35%)   73 (6.25%)   13 (2.65%)            
## wc              85.3 (7.51) 79.8 (8.34)  84.1 (8.14)   <0.001   
## hip             94.2 (8.10) 92.1 (6.66)  93.7 (6.75)   <0.001   
## whr             0.90 (0.06) 0.87 (0.07)  0.90 (0.06)   <0.001   
## sbp             121 (17.6)   119 (15.5)   122 (15.4)    0.002   
## dbp             79.6 (11.9) 76.8 (10.4)  78.2 (10.4)    0.007   
## hypertension:                                           0.006   
##     No          45 (63.4%)  867 (77.5%)  338 (72.5%)            
##     Yes         26 (36.6%)  252 (22.5%)  128 (27.5%)            
## education:                                                .     
##     1            5 (6.85%)  222 (19.0%)  104 (21.3%)            
##     2           26 (35.6%)  271 (23.2%)  111 (22.7%)            
##     3           34 (46.6%)  364 (31.2%)  157 (32.1%)            
##     4            2 (2.74%)   57 (4.89%)   24 (4.91%)            
##     5            6 (8.22%)  239 (20.5%)   85 (17.4%)            
##     6            0 (0.00%)   13 (1.11%)   8 (1.64%)             
## occupation:                                               .     
##     1            9 (12.2%)  170 (14.6%)   72 (14.7%)            
##     2            3 (4.05%)  160 (13.7%)   45 (9.18%)            
##     3           12 (16.2%)  149 (12.8%)   79 (16.1%)            
##     4            5 (6.76%)   61 (5.22%)   34 (6.94%)            
##     5            4 (5.41%)   64 (5.48%)   18 (3.67%)            
##     6           22 (29.7%)  288 (24.7%)  117 (23.9%)            
##     7            7 (9.46%)   44 (3.77%)   41 (8.37%)            
##     8            0 (0.00%)   4 (0.34%)    2 (0.41%)             
##     9           12 (16.2%)  228 (19.5%)   82 (16.7%)            
## alcohol:                                                0.552   
##     0           64 (86.5%)  981 (84.0%)  421 (85.9%)            
##     1           10 (13.5%)  187 (16.0%)   69 (14.1%)            
## smoking:                                                0.037   
##     0           68 (91.9%)  1013 (86.7%) 406 (82.9%)            
##     1            6 (8.11%)  155 (13.3%)   84 (17.1%)            
## v1hba1c         6.11 (0.26) 5.46 (0.28)  5.80 (0.29)   <0.001   
## v1glucose       5.63 (0.58) 4.85 (0.40)  5.18 (0.47)   <0.001   
## v2hba1c         6.90 (0.91) 5.32 (0.28)  5.91 (0.24)    0.000   
## v2glucose       6.64 (1.41) 4.81 (0.40)  5.34 (0.57)   <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

(1.1.1) Table S1. Baseline characteristics by incident diabetes status

men = subset(dm, sex == "M")
createTable(compareGroups(v2_dm ~ sex + age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcol + smok + v1hba1c + v1glucose + v2hba1c + v2glucose, data = men))
## Warning in cor(as.integer(x), as.integer(y)): the standard deviation is zero
## 
## --------Summary descriptives table by 'v2_dm'---------
## 
## ______________________________________________________________ 
##                  Diabetes     Normal    Pre-Diabetes p.overall 
##                    N=20        N=338       N=162               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## sex: M           20 (100%)  338 (100%)   162 (100%)      .     
## age             59.5 (9.93) 55.3 (9.54) 57.7 (10.2)    0.013   
## weight          66.0 (6.94) 60.6 (9.33) 63.9 (10.2)   <0.001   
## height          162 (5.28)  163 (6.10)   163 (6.46)    0.791   
## bmi             25.1 (2.13) 22.9 (3.10) 24.0 (3.22)   <0.001   
## bmigr:                                                   .     
##     Normal       4 (20.0%)  155 (45.9%)  59 (36.4%)            
##     Obese       13 (65.0%)  78 (23.1%)   62 (38.3%)            
##     Overweight   3 (15.0%)  81 (24.0%)   37 (22.8%)            
##     Underweight  0 (0.00%)  24 (7.10%)   4 (2.47%)             
## wc              89.5 (6.33) 82.9 (8.69) 86.2 (9.01)   <0.001   
## hip             95.3 (5.41) 91.5 (7.78) 93.6 (6.12)    0.002   
## whr             0.94 (0.04) 0.90 (0.06) 0.92 (0.06)   <0.001   
## sbp             127 (24.3)  121 (14.5)   123 (15.1)    0.079   
## dbp             83.5 (14.1) 78.6 (10.1) 79.7 (10.5)    0.078   
## hypertension:                                          0.023   
##     No           9 (47.4%)  241 (74.2%) 105 (67.7%)            
##     Yes         10 (52.6%)  84 (25.8%)   50 (32.3%)            
## education:                                               .     
##     1            3 (15.0%)  47 (14.0%)   24 (14.8%)            
##     2            5 (25.0%)  76 (22.6%)   34 (21.0%)            
##     3            9 (45.0%)  118 (35.1%)  57 (35.2%)            
##     4            0 (0.00%)  11 (3.27%)   8 (4.94%)             
##     5            3 (15.0%)  82 (24.4%)   38 (23.5%)            
##     6            0 (0.00%)   2 (0.60%)   1 (0.62%)             
## occupation:                                              .     
##     1            2 (10.0%)  44 (13.0%)   27 (16.7%)            
##     2            2 (10.0%)  50 (14.8%)   20 (12.3%)            
##     3            4 (20.0%)  64 (18.9%)   31 (19.1%)            
##     4            3 (15.0%)  31 (9.17%)   13 (8.02%)            
##     5            1 (5.00%)  22 (6.51%)   7 (4.32%)             
##     6            0 (0.00%)   1 (0.30%)   1 (0.62%)             
##     7            2 (10.0%)  14 (4.14%)   11 (6.79%)            
##     8            0 (0.00%)   1 (0.30%)   2 (1.23%)             
##     9            6 (30.0%)  111 (32.8%)  50 (30.9%)            
## alcol           0.40 (0.50) 0.48 (0.50) 0.39 (0.49)    0.169   
## smok            0.30 (0.47) 0.44 (0.50) 0.51 (0.50)    0.107   
## v1hba1c         6.14 (0.25) 5.47 (0.29) 5.78 (0.27)   <0.001   
## v1glucose       5.59 (0.54) 4.92 (0.43) 5.13 (0.44)   <0.001   
## v2hba1c         6.98 (0.70) 5.35 (0.27) 5.91 (0.24)   <0.001   
## v2glucose       6.89 (1.47) 4.84 (0.41) 5.24 (0.57)   <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
women = subset(dm, sex == "F")
createTable(compareGroups(v2_dm ~ sex + age + weight + height + bmi + bmigr + wc + hip + whr + sbp + dbp + hypertension + education + occupation + alcol + smok + v1hba1c + v1glucose + v2hba1c + v2glucose, data = women))
## Warning in cor(as.integer(x), as.integer(y)): the standard deviation is zero
## 
## --------Summary descriptives table by 'v2_dm'---------
## 
## ______________________________________________________________ 
##                  Diabetes     Normal    Pre-Diabetes p.overall 
##                    N=54        N=830       N=328               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## sex: F           54 (100%)  830 (100%)   328 (100%)      .     
## age             58.6 (7.75) 54.3 (8.91) 59.1 (9.12)   <0.001   
## weight          56.8 (8.54) 52.6 (7.67) 54.9 (7.98)   <0.001   
## height          152 (5.01)  153 (5.18)   152 (5.17)    0.259   
## bmi             24.7 (3.05) 22.5 (2.91) 23.6 (3.11)   <0.001   
## bmigr:                                                   .     
##     Normal      14 (25.9%)  438 (52.8%) 143 (43.6%)            
##     Obese       25 (46.3%)  142 (17.1%)  94 (28.7%)            
##     Overweight  14 (25.9%)  201 (24.2%)  82 (25.0%)            
##     Underweight  1 (1.85%)  49 (5.90%)   9 (2.74%)             
## wc              83.8 (7.38) 78.6 (7.86) 83.0 (7.47)   <0.001   
## hip             93.8 (8.90) 92.3 (6.13) 93.7 (7.05)    0.001   
## whr             0.89 (0.05) 0.85 (0.07) 0.89 (0.06)   <0.001   
## sbp             118 (14.0)  118 (15.8)   121 (15.5)    0.016   
## dbp             78.2 (10.7) 76.0 (10.5) 77.4 (10.2)    0.064   
## hypertension:                                          0.134   
##     No          36 (69.2%)  626 (78.8%) 233 (74.9%)            
##     Yes         16 (30.8%)  168 (21.2%)  78 (25.1%)            
## education:                                               .     
##     1            2 (3.77%)  175 (21.1%)  80 (24.5%)            
##     2           21 (39.6%)  195 (23.5%)  77 (23.5%)            
##     3           25 (47.2%)  246 (29.6%) 100 (30.6%)            
##     4            2 (3.77%)  46 (5.54%)   16 (4.89%)            
##     5            3 (5.66%)  157 (18.9%)  47 (14.4%)            
##     6            0 (0.00%)  11 (1.33%)   7 (2.14%)             
## occupation:                                              .     
##     1            7 (13.0%)  126 (15.2%)  45 (13.7%)            
##     2            1 (1.85%)  110 (13.3%)  25 (7.62%)            
##     3            8 (14.8%)  85 (10.2%)   48 (14.6%)            
##     4            2 (3.70%)  30 (3.61%)   21 (6.40%)            
##     5            3 (5.56%)  42 (5.06%)   11 (3.35%)            
##     6           22 (40.7%)  287 (34.6%) 116 (35.4%)            
##     7            5 (9.26%)  30 (3.61%)   30 (9.15%)            
##     8            0 (0.00%)   3 (0.36%)   0 (0.00%)             
##     9            6 (11.1%)  117 (14.1%)  32 (9.76%)            
## alcol           0.04 (0.19) 0.03 (0.17) 0.02 (0.13)    0.443   
## smok            0.00 (0.00) 0.01 (0.09) 0.00 (0.06)    0.493   
## v1hba1c         6.10 (0.27) 5.46 (0.28) 5.81 (0.30)   <0.001   
## v1glucose       5.65 (0.60) 4.83 (0.38) 5.21 (0.48)   <0.001   
## v2hba1c         6.87 (0.98) 5.30 (0.28) 5.91 (0.25)   <0.001   
## v2glucose       6.54 (1.39) 4.79 (0.40) 5.39 (0.57)   <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

(1.3) Relationship between V1 and V2 measurements

p = ggplot(data = dm, aes(x = v1hba1c, y = v2hba1c, col = sex))
p1 = p + geom_point() + stat_smooth() + 
labs(x = "HbA1c (%) measured at visit 1", y = "HbA1c (%) measured at visit 2", title = "Correlation of HbA1c") + theme(legend.position="none")
p1
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

p = ggplot(data = dm, aes(x = v1glucose, y = v2glucose, col = sex))
p2 = p + geom_point() + stat_smooth() + 
labs(x = "Blood glucose (mmol/dL) measured at visit 1", y = "Blood glucose (mmol/dL) measured at visit 2", title = "Correlation of blood glucose") + theme(legend.position="none")
p2
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

grid.arrange(p1, p2, nrow = 1)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

(1.4) Change of diabetes status over time

dm$v2_dm2 = factor(dm$v2_dm, levels = c("Normal", "Pre-Diabetes", "Diabetes"))
table(dm$v1_dm, dm$v2_dm2)
##               
##                Normal Pre-Diabetes Diabetes
##   Normal          980          181        4
##   Pre-Diabetes    188          309       70
options(digits = 4)
prop.table(table(dm$v1_dm, dm$v2_dm2))
##               
##                  Normal Pre-Diabetes Diabetes
##   Normal       0.565820     0.104503 0.002309
##   Pre-Diabetes 0.108545     0.178406 0.040416

(1.4.1) Change of diabetes status over time by sexes

table(men$v1_dm, men$v2_dm)
##               
##                Diabetes Normal Pre-Diabetes
##   Normal              0    279           60
##   Pre-Diabetes       20     59          102
prop.table(table(men$v1_dm, men$v2_dm))
##               
##                Diabetes  Normal Pre-Diabetes
##   Normal        0.00000 0.53654      0.11538
##   Pre-Diabetes  0.03846 0.11346      0.19615
table(women$v1_dm, women$v2_dm)
##               
##                Diabetes Normal Pre-Diabetes
##   Normal              4    701          121
##   Pre-Diabetes       50    129          207
prop.table(table(women$v1_dm, women$v2_dm))
##               
##                Diabetes  Normal Pre-Diabetes
##   Normal        0.00330 0.57838      0.09983
##   Pre-Diabetes  0.04125 0.10644      0.17079

(2.1) The optimal model for predicting diabetes status in the 2nd visit

Cox’s PH model with only clinical factor

bma.cox = bic.surv(Surv(fu, dm) ~ age + gender + bmi + whr + sbp + dbp + alcol + smok, data = dm)
summary(bma.cox)
## 
## Call:
## bic.surv.formula(f = Surv(fu, dm) ~ age + gender + bmi + whr +     sbp + dbp + alcol + smok, data = dm)
## 
## 
##   32  models were selected
##  Best  5  models (cumulative posterior probability =  0.518 ): 
## 
##            p!=0    EV        SD       model 1   model 2   model 3   model 4 
## age         40.8   0.009592  0.01378    0.0245      .         .         .   
## gender       7.6   0.000121  0.08815      .         .         .         .   
## bmi        100.0   0.159382  0.03367    0.1677    0.1631    0.1503    0.1483
## whr         34.4   1.154292  1.92912      .         .       3.3436    3.8102
## sbp          8.2  -0.000560  0.00382      .         .         .         .   
## dbp         16.7   0.002633  0.00820      .         .         .         .   
## alcol        6.4  -0.002262  0.09214      .         .         .         .   
## smok        30.0  -0.204528  0.39517      .         .         .      -0.7400
##                                                                             
## nVar                                      2         1         2         3   
## BIC                                   -19.2108  -19.0978  -18.3037  -17.6364
## post prob                               0.151     0.143     0.096     0.069 
##            model 5 
## age            .   
## gender         .   
## bmi          0.1623
## whr            .   
## sbp            .   
## dbp            .   
## alcol          .   
## smok        -0.6258
##                    
## nVar           2   
## BIC        -17.3537
## post prob    0.060

Cox’s PH model with all factors

bma.cox2 = bic.surv(Surv(fu, dm) ~ age + gender + bmi + whr + sbp + dbp + alcol + smok + v1hba1c_sd + v1glucose_sd, data = dm)
summary(bma.cox2)
## 
## Call:
## bic.surv.formula(f = Surv(fu, dm) ~ age + gender + bmi + whr +     sbp + dbp + alcol + smok + v1hba1c_sd + v1glucose_sd, data = dm)
## 
## 
##   35  models were selected
##  Best  5  models (cumulative posterior probability =  0.454 ): 
## 
##               p!=0    EV        SD       model 1    model 2    model 3  
## age            20.7  -0.004317  0.01058      .          .          .    
## gender         15.1   0.064369  0.20853      .          .          .    
## bmi            34.7   0.020960  0.03464      .          .         0.0598
## whr            10.6   0.241602  0.99038      .          .          .    
## sbp             7.6  -0.000521  0.00288      .          .          .    
## dbp             5.2   0.000238  0.00298      .          .          .    
## alcol           9.5   0.035633  0.16362      .          .          .    
## smok           55.4  -0.510119  0.58411      .        -0.7837      .    
## v1hba1c_sd    100.0   1.286794  0.15820     1.2540     1.3022     1.2347
## v1glucose_sd  100.0   0.629892  0.09964     0.6535     0.6219     0.6382
##                                                                         
## nVar                                          2          3          3   
## BIC                                      -197.8753  -197.5380  -196.8032
## post prob                                   0.136      0.115      0.080 
##               model 4    model 5  
## age               .          .    
## gender            .         0.4882
## bmi              0.0629      .    
## whr               .          .    
## sbp               .          .    
## dbp               .          .    
## alcol             .          .    
## smok            -0.8117    -1.1789
## v1hba1c_sd       1.2876     1.3134
## v1glucose_sd     0.6088     0.6296
##                                   
## nVar               4          4   
## BIC           -196.7903  -195.5943
## post prob        0.079      0.044

Only CLINICAL candidate variables

options(digits = 8)
bma.0 = bic.mlogit(v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + alcol + smok, data = dm, verbose = T, approx = F, sep = '')
## 
## Begg & Gray approximation started.
##  9 variables considered.
##  11 models initially selected.
## Estimating MNL coefficients for 11 models ...   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,   11
## Estimation finished.
## Final number of models: 2
summary(bma.0)
## 
## Call:
## bic.mlogit(f = v2_dm2 ~ age + gender + bmi + whr + sbp + dbp +     alcol + smok, data = dm, sep = "", approx = F, verbose = T)
## 
## 
##   2  models were selected
##  Best  2  models (cumulative posterior probability =  1 ): 
## 
##                     p!=0    EV         SD         model 1      model 2    
## Intercept           100     -9.994105  0.8194031    -9.986120   -10.092843
## Intercept.Diabetes  100.0  -11.884503  0.8325682   -11.876522   -11.983176
## age                 100.0    0.041160  0.0060162     0.041254     0.039989
## bmi                 100.0    0.110400  0.0189029     0.110459     0.109676
## whr                 100.0    4.797263  0.8660483     4.777527     5.041291
## sbp                   0.0    0.000000  0.0000000        .            .    
## dbp                   0.0    0.000000  0.0000000        .            .    
## alcol                 7.5   -0.017929  0.0759951        .        -0.239609
## smok                  0.0    0.000000  0.0000000        .            .    
## gender                0.0    0.000000  0.0000000        .            .    
##                                                                           
## nVar                                                   4            5     
## BIC                                               2494.845822  2499.875475
## post prob                                            0.925        0.075   
## 
## MNL specification:
## ==================
## Response variable: v2_dm2
## Base choice name: Normal
## Base choice index: 1
## Frequency of alternatives:
##       Normal Pre-Diabetes     Diabetes 
##         1168          490           74 
## 
## Equations:
## ----------
##     alternative       intercept     1     2     3     4     5       6      7
## 1       Normal:                                                             
## 2 Pre-Diabetes: ascPre-Diabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 3     Diabetes:     ascDiabetes + age + bmi + whr + sbp + dbp + alcol + smok
##          8
## 1         
## 2 + gender
## 3 + gender
imageplot.mlogit(bma.0)

Candidate variables include both HbA1c and blood glucose at baseline

options(digits = 8)
bma.1 = bic.mlogit(v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + v1hba1c_sd + v1glucose_sd + alcol + smok, data = dm, verbose = T, approx = F, sep = '')
## 
## Begg & Gray approximation started.
##  11 variables considered.
##  19 models initially selected.
## Estimating MNL coefficients for 19 models ...   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,   11,   12,   13,   14,   15,   16,   17,   18,   19
## Estimation finished.
## Final number of models: 4
summary(bma.1)
## 
## Call:
## bic.mlogit(f = v2_dm2 ~ age + gender + bmi + whr + sbp + dbp +     v1hba1c_sd + v1glucose_sd + alcol + smok, data = dm, sep = "",     approx = F, verbose = T)
## 
## 
##   4  models were selected
##  Best  4  models (cumulative posterior probability =  1 ): 
## 
##                     p!=0    EV          SD         model 1      model 2    
## Intercept           100    -3.5168e+01  1.7912138  -3.5079e+01  -3.5336e+01
## Intercept.Diabetes  100.0  -3.7059e+01  1.8084676  -3.6969e+01  -3.7227e+01
## age                   0.0   0.0000e+00  0.0000000        .            .    
## bmi                  25.0   1.2739e-02  0.0245060        .       5.0997e-02
## whr                 100.0   5.5782e+00  1.0225699   5.7412e+00   5.0394e+00
## sbp                   3.5   1.8685e-04  0.0012627        .            .    
## dbp                   0.0   0.0000e+00  0.0000000        .            .    
## alcol                 6.4  -1.9285e-02  0.0865625        .            .    
## smok                  0.0   0.0000e+00  0.0000000        .            .    
## gender                0.0   0.0000e+00  0.0000000        .            .    
## v1hba1c_sd          100.0   1.3787e+00  0.0835393   1.3819e+00   1.3716e+00
## v1glucose_sd        100.0   5.7577e-01  0.0714069   5.7863e-01   5.6565e-01
##                                                                            
## nVar                                                  4            5       
## BIC                                                 1.9891e+03   1.9910e+03
## post prob                                           0.651        0.250     
##                     model 3      model 4    
## Intercept           -3.5191e+01  -3.5585e+01
## Intercept.Diabetes  -3.7081e+01  -3.7475e+01
## age                       .            .    
## bmi                       .            .    
## whr                  5.9862e+00   5.6441e+00
## sbp                       .       5.3074e-03
## dbp                       .            .    
## alcol               -3.0137e-01        .    
## smok                      .            .    
## gender                    .            .    
## v1hba1c_sd           1.3733e+00   1.3810e+00
## v1glucose_sd         5.8643e-01   5.7548e-01
##                                             
## nVar                   5            5       
## BIC                  1.9938e+03   1.9950e+03
## post prob            0.064        0.035     
## 
## MNL specification:
## ==================
## Response variable: v2_dm2
## Base choice name: Normal
## Base choice index: 1
## Frequency of alternatives:
##       Normal Pre-Diabetes     Diabetes 
##         1168          490           74 
## 
## Equations:
## ----------
##     alternative       intercept     1     2     3     4     5       6      7
## 1       Normal:                                                             
## 2 Pre-Diabetes: ascPre-Diabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 3     Diabetes:     ascDiabetes + age + bmi + whr + sbp + dbp + alcol + smok
##          8            9             10
## 1                                     
## 2 + gender + v1hba1c_sd + v1glucose_sd
## 3 + gender + v1hba1c_sd + v1glucose_sd
imageplot.mlogit(bma.1)

Candidate variables include only HbA1c at baseline

bma.2 = bic.mlogit(v2_dm2 ~ age + gender + bmi + whr + sbp + dbp + v1hba1c_sd + alcol + smok, data = dm, verbose = T, approx = F, sep = '')
## 
## Begg & Gray approximation started.
##  10 variables considered.
##  11 models initially selected.
## Estimating MNL coefficients for 11 models ...   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,   11
## Estimation finished.
## Final number of models: 3
summary(bma.2)
## 
## Call:
## bic.mlogit(f = v2_dm2 ~ age + gender + bmi + whr + sbp + dbp +     v1hba1c_sd + alcol + smok, data = dm, sep = "", approx = F,     verbose = T)
## 
## 
##   3  models were selected
##  Best  3  models (cumulative posterior probability =  1 ): 
## 
##                     p!=0    EV          SD         model 1      model 2    
## Intercept           100    -3.2282e+01  1.6539918  -3.2366e+01  -3.1961e+01
## Intercept.Diabetes  100.0  -3.4172e+01  1.6715172  -3.4256e+01  -3.3852e+01
## age                   0.0   0.0000e+00  0.0000000        .            .    
## bmi                  73.6   4.7428e-02  0.0334282   6.4541e-02        .    
## whr                 100.0   5.3513e+00  1.0401619   5.1136e+00   6.0243e+00
## sbp                   4.2   2.3038e-04  0.0013801        .            .    
## dbp                   0.0   0.0000e+00  0.0000000        .            .    
## alcol                 0.0   0.0000e+00  0.0000000        .            .    
## smok                  0.0   0.0000e+00  0.0000000        .            .    
## gender                0.0   0.0000e+00  0.0000000        .            .    
## v1hba1c_sd          100.0   1.5384e+00  0.0819569   1.5339e+00   1.5513e+00
##                                                                            
## nVar                                                  4            3       
## BIC                                                 2.0492e+03   2.0511e+03
## post prob                                           0.694        0.264     
##                     model 3    
## Intercept           -3.2913e+01
## Intercept.Diabetes  -3.4803e+01
## age                       .    
## bmi                  6.2609e-02
## whr                  5.0501e+00
## sbp                  5.5288e-03
## dbp                       .    
## alcol                     .    
## smok                      .    
## gender                    .    
## v1hba1c_sd           1.5328e+00
##                                
## nVar                   5       
## BIC                  2.0548e+03
## post prob            0.042     
## 
## MNL specification:
## ==================
## Response variable: v2_dm2
## Base choice name: Normal
## Base choice index: 1
## Frequency of alternatives:
##       Normal Pre-Diabetes     Diabetes 
##         1168          490           74 
## 
## Equations:
## ----------
##     alternative       intercept     1     2     3     4     5       6      7
## 1       Normal:                                                             
## 2 Pre-Diabetes: ascPre-Diabetes + age + bmi + whr + sbp + dbp + alcol + smok
## 3     Diabetes:     ascDiabetes + age + bmi + whr + sbp + dbp + alcol + smok
##          8            9
## 1                      
## 2 + gender + v1hba1c_sd
## 3 + gender + v1hba1c_sd
imageplot.mlogit(bma.2)

(2.2) The multinomial model for predicting incident diabetes

dm$age.5 = dm$age/5
m.0 = multinom(v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = dm)
## # weights:  15 (8 variable)
## initial  value 1902.796484 
## iter  10 value 1228.282704
## final  value 1225.369339 
## converged
summary(m.0)
## Call:
## multinom(formula = v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = dm)
## 
## Coefficients:
##              (Intercept)      age.5     bmi_sd     whr_sd
## Pre-Diabetes  -9.7388412 0.20254667 0.29676635 0.33168713
## Diabetes     -13.5624979 0.23016985 0.61174808 0.26604294
## 
## Std. Errors:
##              (Intercept)       age.5      bmi_sd      whr_sd
## Pre-Diabetes   0.8508162 0.030780545 0.059650173 0.062154162
## Diabetes       1.8499212 0.064268637 0.117193767 0.132998036
## 
## Residual Deviance: 2450.7387 
## AIC: 2466.7387
exp(coefficients(m.0))
##                (Intercept)     age.5    bmi_sd    whr_sd
## Pre-Diabetes 5.8948803e-05 1.2245172 1.3455009 1.3933169
## Diabetes     1.2878995e-06 1.2588138 1.8436514 1.3047911
m.1 = multinom(v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = dm)
## # weights:  15 (8 variable)
## initial  value 1902.796484 
## iter  10 value 1017.520743
## iter  20 value 964.981610
## final  value 964.980291 
## converged
summary(m.1)
## Call:
## multinom(formula = v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = dm)
## 
## Coefficients:
##              (Intercept)     bmi_sd     whr_sd v1hba1c_sd
## Pre-Diabetes  -30.654876 0.18017873 0.35073499  1.4391632
## Diabetes      -58.676851 0.43134243 0.29994585  2.8546363
## 
## Std. Errors:
##              (Intercept)      bmi_sd      whr_sd  v1hba1c_sd
## Pre-Diabetes   1.7226142 0.066097338 0.068904381 0.087766606
## Diabetes       4.1534401 0.133893721 0.155945077 0.198697035
## 
## Residual Deviance: 1929.9606 
## AIC: 1945.9606
exp(coefficients(m.1))
##                (Intercept)    bmi_sd    whr_sd v1hba1c_sd
## Pre-Diabetes 4.8613441e-14 1.1974314 1.4201109  4.2171656
## Diabetes     3.2882683e-26 1.5393226 1.3497857 17.3681200

(3) Validation of the model

(3.1) Cox’s PH model

set.seed(1234)
index <- createDataPartition(dm$dm, p = .60, list = FALSE)
train <- dm[index,]
test <- dm[-index,]

(3.1.2) Fit model in the training set

Model with only clinical factors

m.1c <- coxph(Surv(fu, dm) ~ age + bmi_sd, data = dm, id = id) 
summary(m.1c)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd, data = dm, id = id)
## 
##   n= 1732, number of events= 74 
## 
##            coef exp(coef) se(coef)      z  Pr(>|z|)    
## age    0.024545  1.024849 0.011440 2.1455   0.03191 *  
## bmi_sd 0.515837  1.675040 0.099129 5.2037 1.954e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## age       1.0248    0.97575    1.0021    1.0481
## bmi_sd    1.6750    0.59700    1.3793    2.0342
## 
## Concordance= 0.692  (se = 0.026 )
## Likelihood ratio test= 27.85  on 2 df,   p=9e-07
## Wald test            = 30.08  on 2 df,   p=3e-07
## Score (logrank) test = 30.09  on 2 df,   p=3e-07
  zph.m1c <- cox.zph(m.1c)
  zph.m1c
##         chisq df     p
## age    1.5686  1 0.210
## bmi_sd 0.0212  1 0.884
## GLOBAL 1.5686  2 0.456
m.1t <- coxph(Surv(fu, dm) ~ age + bmi_sd, data = train, id = id) 
summary(m.1t)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd, data = train, id = id)
## 
##   n= 1040, number of events= 51 
## 
##           coef exp(coef) se(coef)      z  Pr(>|z|)    
## age    0.02681   1.02717  0.01445 1.8553   0.06355 .  
## bmi_sd 0.57384   1.77508  0.11953 4.8010 1.579e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## age       1.0272    0.97355   0.99849    1.0567
## bmi_sd    1.7751    0.56336   1.40435    2.2437
## 
## Concordance= 0.704  (se = 0.031 )
## Likelihood ratio test= 23.42  on 2 df,   p=8e-06
## Wald test            = 25.46  on 2 df,   p=3e-06
## Score (logrank) test = 25.33  on 2 df,   p=3e-06
base.mean.1 <- expand.grid(age = 56, bmi_sd = 7.492) 
  base.mean.1
##   age bmi_sd
## 1  56  7.492
base.model.1 <- survfit(m.1t, newdata = base.mean.1)
  options(digits = 10)
  summary(base.model.1, time = c(2))
## Call: survfit(formula = m.1t, newdata = base.mean.1)
## 
##  time n.risk n.event survival   std.err lower 95% CI upper 95% CI
##     2    142      45 0.928733 0.0129263      0.90374     0.954417
test$pi_dm.1 = 0.02681*(test$age - 56) + 0.57384383*(test$bmi_sd - 7.492);
    test$dm.1 = 1 - 0.928733**exp(test$pi_dm.1)

Model with laboratory factors

m.2c <- coxph(Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = dm, id = id) 
summary(m.2c)
## Call:
## coxph(formula = Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = dm, 
##     id = id)
## 
##   n= 1732, number of events= 74 
## 
##                    coef  exp(coef)   se(coef)       z   Pr(>|z|)    
## v1hba1c_sd   1.25800773 3.51840489 0.14994713 8.38968 < 2.22e-16 ***
## v1glucose_sd 0.65729471 1.92956523 0.09782546 6.71906 1.8291e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## v1hba1c_sd    3.518405  0.2842197  2.622470  4.720424
## v1glucose_sd  1.929565  0.5182515  1.592906  2.337376
## 
## Concordance= 0.918  (se = 0.013 )
## Likelihood ratio test= 207.49  on 2 df,   p=<2e-16
## Wald test            = 176.41  on 2 df,   p=<2e-16
## Score (logrank) test = 236.47  on 2 df,   p=<2e-16
  zph.m2c <- cox.zph(m.2c)
  zph.m2c
##                  chisq df       p
## v1hba1c_sd   0.0204222  1 0.88636
## v1glucose_sd 0.8438117  1 0.35831
## GLOBAL       0.9881972  2 0.61012
m.2t <- coxph(Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = train, id = id) 
summary(m.2t)
## Call:
## coxph(formula = Surv(fu, dm) ~ v1hba1c_sd + v1glucose_sd, data = train, 
##     id = id)
## 
##   n= 1040, number of events= 51 
## 
##                   coef exp(coef)  se(coef)       z   Pr(>|z|)    
## v1hba1c_sd   1.1841089 3.2677737 0.1791379 6.61004 3.8421e-11 ***
## v1glucose_sd 0.6838519 1.9814956 0.1181161 5.78966 7.0530e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## v1hba1c_sd    3.267774  0.3060187  2.300221  4.642313
## v1glucose_sd  1.981496  0.5046693  1.572000  2.497662
## 
## Concordance= 0.925  (se = 0.012 )
## Likelihood ratio test= 147.71  on 2 df,   p=<2e-16
## Wald test            = 126.65  on 2 df,   p=<2e-16
## Score (logrank) test = 175.2  on 2 df,   p=<2e-16
base.mean.2 <- expand.grid(v1hba1c_sd = 16.4225, v1glucose_sd = 10.597268) 
  base.mean.2
##   v1hba1c_sd v1glucose_sd
## 1    16.4225    10.597268
base.model.2 <- survfit(m.2t, newdata = base.mean.2)
  options(digits = 10)
  summary(base.model.2, time = c(2))
## Call: survfit(formula = m.2t, newdata = base.mean.2)
## 
##  time n.risk n.event survival   std.err lower 95% CI upper 95% CI
##     2    142      45 0.978818 0.0066789     0.965815     0.991996
test$pi_dm.2 = 1.1841089*(test$v1hba1c_sd - 16.4225) + 0.6838519*(test$v1glucose_sd - 10.597268);
    test$dm.2 = 1 - 0.928733**exp(test$pi_dm.2)

Model with both clinical and laboratory factors

m.3c <- coxph(Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd, data = dm, id = id) 
summary(m.3c)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd, 
##     data = dm, id = id)
## 
##   n= 1732, number of events= 74 
## 
##                     coef   exp(coef)    se(coef)        z   Pr(>|z|)    
## age          -0.01687412  0.98326745  0.01370735 -1.23103   0.218313    
## bmi_sd        0.16944731  1.18464992  0.09940846  1.70456   0.088277 .  
## v1hba1c_sd    1.28035967  3.59793355  0.15427187  8.29937 < 2.22e-16 ***
## v1glucose_sd  0.63319981  1.88362820  0.09681953  6.54000 6.1519e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## age          0.9832674  1.0170173 0.9572028  1.010042
## bmi_sd       1.1846499  0.8441312 0.9749299  1.439483
## v1hba1c_sd   3.5979336  0.2779373 2.6591123  4.868213
## v1glucose_sd 1.8836282  0.5308903 1.5580531  2.277236
## 
## Concordance= 0.923  (se = 0.012 )
## Likelihood ratio test= 212.29  on 4 df,   p=<2e-16
## Wald test            = 177.31  on 4 df,   p=<2e-16
## Score (logrank) test = 248.86  on 4 df,   p=<2e-16
  zph.m3c <- cox.zph(m.3c)
  zph.m3c
##                  chisq df       p
## age          2.6277983  1 0.10501
## bmi_sd       0.0318263  1 0.85841
## v1hba1c_sd   0.0169519  1 0.89641
## v1glucose_sd 0.9191425  1 0.33770
## GLOBAL       3.9947878  4 0.40671
m.3t <- coxph(Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd, data = train, id = id) 
summary(m.3t)
## Call:
## coxph(formula = Surv(fu, dm) ~ age + bmi_sd + v1hba1c_sd + v1glucose_sd, 
##     data = train, id = id)
## 
##   n= 1040, number of events= 51 
## 
##                     coef   exp(coef)    se(coef)        z   Pr(>|z|)    
## age          -0.02287697  0.97738272  0.01800447 -1.27063    0.20386    
## bmi_sd        0.19594817  1.21646386  0.11935610  1.64171    0.10065    
## v1hba1c_sd    1.23301983  3.43157667  0.18733509  6.58189 4.6449e-11 ***
## v1glucose_sd  0.63200459  1.88137820  0.11576089  5.45957 4.7729e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## age          0.9773827  1.0231407 0.9434942  1.012488
## bmi_sd       1.2164639  0.8220548 0.9627269  1.537076
## v1hba1c_sd   3.4315767  0.2914112 2.3770252  4.953973
## v1glucose_sd 1.8813782  0.5315252 1.4994786  2.360543
## 
## Concordance= 0.932  (se = 0.01 )
## Likelihood ratio test= 152.11  on 4 df,   p=<2e-16
## Wald test            = 129.87  on 4 df,   p=<2e-16
## Score (logrank) test = 187.38  on 4 df,   p=<2e-16
base.mean.3 <- expand.grid(age = 56, bmi_sd = 7.4916, v1hba1c_sd = 16.4225, v1glucose_sd = 10.597268) 
  base.mean.3
##   age bmi_sd v1hba1c_sd v1glucose_sd
## 1  56 7.4916    16.4225    10.597268
base.model.3 <- survfit(m.3t, newdata = base.mean.3)
  options(digits = 10)
  summary(base.model.3, time = c(2))
## Call: survfit(formula = m.3t, newdata = base.mean.3)
## 
##  time n.risk n.event survival    std.err lower 95% CI upper 95% CI
##     2    142      45 0.978959 0.00670121     0.965912     0.992181
test$pi_dm.3 = -0.02287697*(test$age - 56) + 0.19594817*(test$bmi_sd - 7.4916) + 1.233*(test$v1hba1c_sd - 16.4225) + 0.632*(test$v1glucose_sd - 10.597268);
    test$dm.3 = 1 - 0.978959**exp(test$pi_dm.3)

(3.1.3) Validate the model in the validation set

par(mfrow=c(1,1))
val.prob.ci.2(p = test$dm.1, y = test$dm, pl = T, logistic.cal = F, g = 10, smooth = "rcs", 
              xlab="2-year predicted risk of diabetes", ylab="Observed risk of diabetes until 2 years of follow-up", ylim = c(0, 0.4), xlim = c(0,0.4),  cex.axis = .8, cex.lab = 1, 
              statloc = c(-0.01, 0.36), cex = .8)
## Call:
## val.prob.ci.2(p = test$dm.1, y = test$dm, pl = T, smooth = "rcs", 
##     logistic.cal = F, xlab = "2-year predicted risk of diabetes", 
##     ylab = "Observed risk of diabetes until 2 years of follow-up", 
##     xlim = c(0, 0.4), ylim = c(0, 0.4), g = 10, statloc = c(-0.01, 
##         0.36), cex = 0.8, cex.axis = 0.8, cex.lab = 1)
## 
## A 95% confidence interval is given for the calibration intercept, calibration slope and c-statistic. 
## 
##              Dxy          C (ROC)               R2                D 
##  3.738220576e-01  6.869110288e-01  3.244798335e-02  6.796920742e-03 
##         D:Chi-sq              D:p                U         U:Chi-sq 
##  5.703469153e+00  1.693141531e-02  4.986843954e-02  3.650896016e+01 
##              U:p                Q            Brier        Intercept 
##  1.180810016e-08 -4.307151880e-02  3.787027867e-02 -1.080210745e+00 
##            Slope             Emax     Brier scaled 
##  6.931305350e-01  4.839648430e-01 -1.785736741e-01
title(main= "Prediction model with age and BMI", adj = 0, cex.main = 1.4)

val.prob.ci.2(p = test$dm.2, y = test$dm, pl = T, logistic.cal = F, g = 10, smooth = "rcs", 
              xlab="2-year predicted risk", ylab="Observed risk of diabetes until 2 years of follow-up", ylim = c(0, 0.4), xlim = c(0,0.4),  cex.axis = .8, cex.lab = 1, 
              statloc = c(-0.01, 0.36), cex = .8)
## singular information matrix in lrm.fit (rank= 4 ).  Offending variable(s):
## 
## Warning in value[[3L]](cond): The number of knots led to estimation problems,
## nk will be set to 4.
## singular information matrix in lrm.fit (rank= 3 ).  Offending variable(s):
## 
## Warning in value[[3L]](cond): Nk 4 also led to estimation problems, nk will be
## set to 3.
## Call:
## val.prob.ci.2(p = test$dm.2, y = test$dm, pl = T, smooth = "rcs", 
##     logistic.cal = F, xlab = "2-year predicted risk", ylab = "Observed risk of diabetes until 2 years of follow-up", 
##     xlim = c(0, 0.4), ylim = c(0, 0.4), g = 10, statloc = c(-0.01, 
##         0.36), cex = 0.8, cex.axis = 0.8, cex.lab = 1)
## 
## A 95% confidence interval is given for the calibration intercept, calibration slope and c-statistic. 
## 
##              Dxy          C (ROC)               R2                D 
##  7.710404887e-01  8.855202444e-01  2.987327889e-01  7.713102818e-02 
##         D:Chi-sq              D:p                U         U:Chi-sq 
##  5.437467150e+01  1.656452753e-13  2.951573938e-01  2.062489165e+02 
##              U:p                Q            Brier        Intercept 
##  0.000000000e+00 -2.180263656e-01  6.573604465e-02 -2.945574454e+00 
##            Slope             Emax     Brier scaled 
##  5.365055337e-01  7.365272791e-01 -1.045793546e+00
title(main= "Prediction model with HbA1c and blood glucose", adj = 0, cex.main = 1.4)

val.prob.ci.2(p = test$dm.3, y = test$dm, pl = T, logistic.cal = F, g = 10, smooth = "rcs", 
              xlab="2-year predicted risk", ylab="Observed risk of diabetes until 2 years of follow-up", ylim = c(0, 0.45), xlim = c(0,0.45),  cex.axis = .8, cex.lab = 1, 
              statloc = c(-0.01, 0.4), cex = .8)
## singular information matrix in lrm.fit (rank= 4 ).  Offending variable(s):
## 
## Warning in value[[3L]](cond): The number of knots led to estimation problems,
## nk will be set to 4.
## singular information matrix in lrm.fit (rank= 3 ).  Offending variable(s):
## 
## Warning in value[[3L]](cond): Nk 4 also led to estimation problems, nk will be
## set to 3.
## Call:
## val.prob.ci.2(p = test$dm.3, y = test$dm, pl = T, smooth = "rcs", 
##     logistic.cal = F, xlab = "2-year predicted risk", ylab = "Observed risk of diabetes until 2 years of follow-up", 
##     xlim = c(0, 0.45), ylim = c(0, 0.45), g = 10, statloc = c(-0.01, 
##         0.4), cex = 0.8, cex.axis = 0.8, cex.lab = 1)
## 
## A 95% confidence interval is given for the calibration intercept, calibration slope and c-statistic. 
## 
##              Dxy          C (ROC)               R2                D 
##  7.579125236e-01  8.789562618e-01  3.169713340e-01  8.213434680e-02 
##         D:Chi-sq              D:p                U         U:Chi-sq 
##  5.783696799e+01  2.842170943e-14  3.083897753e-02  2.334057245e+01 
##              U:p                Q            Brier        Intercept 
##  8.543957707e-06  5.129536927e-02  3.152862486e-02 -1.048942448e+00 
##            Slope             Emax     Brier scaled 
##  8.336952192e-01  3.379840654e-01  1.878706603e-02
title(main= "Prediction model with age, BMI, HbA1c, blood glucose", adj = 0, cex.main = 1.4)

(3.2) Multinomial models

Model with only clinical factors

set.seed(1234)
index <- createDataPartition(dm$v2_dm2, p = .60, list = FALSE)
train <- dm[index,]
test <- dm[-index,]

m.0 = multinom(v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = train)
## # weights:  15 (8 variable)
## initial  value 1142.556780 
## iter  10 value 760.030626
## iter  20 value 732.653141
## final  value 732.653051 
## converged
summary(m.0)
## Call:
## multinom(formula = v2_dm2 ~ age.5 + bmi_sd + whr_sd, data = train)
## 
## Coefficients:
##               (Intercept)        age.5       bmi_sd       whr_sd
## Pre-Diabetes -10.42990133 0.1885760756 0.3490005494 0.3661581368
## Diabetes     -15.08334512 0.2640286679 0.5257510802 0.4032118299
## 
## Std. Errors:
##              (Intercept)         age.5        bmi_sd        whr_sd
## Pre-Diabetes 1.130854325 0.04042515702 0.07769270329 0.08059446156
## Diabetes     2.431963874 0.08195202042 0.15492848508 0.17222721423
## 
## Residual Deviance: 1465.306102 
## AIC: 1481.306102

Predicting diabetes status in the training data

train$v2_dm2Predicted = predict(m.0, newdata = train, "class")
tab.1 = table(train$v2_dm2, train$v2_dm2Predicted)
tab.1
##               
##                Normal Pre-Diabetes Diabetes
##   Normal          661           40        0
##   Pre-Diabetes    251           43        0
##   Diabetes         35           10        0
# Calculate accuracy
round((sum(diag(tab.1))/sum(tab.1))*100,2)
## [1] 67.69

Predicting diabetes status in the validation data

test$v2_dm2Predicted = predict(m.0, newdata = test, "class")
tab.2 = table(test$v2_dm2, test$v2_dm2Predicted)
tab.2
##               
##                Normal Pre-Diabetes Diabetes
##   Normal          435           32        0
##   Pre-Diabetes    160           36        0
##   Diabetes         25            4        0
# Calculate accuracy
round((sum(diag(tab.2))/sum(tab.2))*100,2)
## [1] 68.06
set.seed(1234)
index <- createDataPartition(dm$v2_dm2, p = .60, list = FALSE)
train <- dm[index,]
test <- dm[-index,]

m.2 = multinom(v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = train)
## # weights:  15 (8 variable)
## initial  value 1142.556780 
## iter  10 value 665.097021
## iter  20 value 582.764135
## iter  30 value 577.372231
## final  value 577.321414 
## converged
summary(m.2)
## Call:
## multinom(formula = v2_dm2 ~ bmi_sd + whr_sd + v1hba1c_sd, data = train)
## 
## Coefficients:
##               (Intercept)       bmi_sd       whr_sd  v1hba1c_sd
## Pre-Diabetes -30.46988444 0.2062987029 0.3662155724 1.401559134
## Diabetes     -61.45559709 0.2785279610 0.4125404774 2.987716886
## 
## Std. Errors:
##              (Intercept)        bmi_sd        whr_sd   v1hba1c_sd
## Pre-Diabetes 2.227319152 0.08521027042 0.08849211709 0.1145243961
## Diabetes     5.530980974 0.18304988845 0.21077401408 0.2634449620
## 
## Residual Deviance: 1154.642827 
## AIC: 1170.642827

Predicting diabetes status in the training data

train$v2_dm2Predicted = predict(m.1, newdata = train, "class")
tab.1 = table(train$v2_dm2, train$v2_dm2Predicted)
tab.1
##               
##                Normal Pre-Diabetes Diabetes
##   Normal          633           68        0
##   Pre-Diabetes    149          142        3
##   Diabetes          5           33        7
# Calculate accuracy
round((sum(diag(tab.1))/sum(tab.1))*100,2)
## [1] 75.19

Predicting diabetes status in the validation data

test$v2_dm2Predicted = predict(m.1, newdata = test, "class")
tab.2 = table(test$v2_dm2, test$v2_dm2Predicted)
tab.2
##               
##                Normal Pre-Diabetes Diabetes
##   Normal          435           31        1
##   Pre-Diabetes    102           93        1
##   Diabetes          7           20        2
# Calculate accuracy
round((sum(diag(tab.2))/sum(tab.2))*100,2)
## [1] 76.59

Different model (to check whether it has better performance)

The multinomial model for predicting incident diabetes using both blood glucose/hba1c

m.1b = multinom(v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd, data = dm)
## # weights:  15 (8 variable)
## initial  value 1902.796484 
## iter  10 value 988.587051
## iter  20 value 927.112728
## iter  30 value 927.091307
## final  value 927.091139 
## converged
summary(m.1b)
## Call:
## multinom(formula = v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd, 
##     data = dm)
## 
## Coefficients:
##               (Intercept)       whr_sd  v1hba1c_sd v1glucose_sd
## Pre-Diabetes -33.43421689 0.3865543270 1.312320893 0.5416331779
## Diabetes     -63.42869237 0.4172752754 2.511002908 1.1228024646
## 
## Std. Errors:
##              (Intercept)        whr_sd    v1hba1c_sd  v1glucose_sd
## Pre-Diabetes 1.835680257 0.06684803359 0.08937472783 0.07273722017
## Diabetes     4.383493139 0.16107238679 0.20362448305 0.14381554617
## 
## Residual Deviance: 1854.182277 
## AIC: 1870.182277
exp(coefficients(m.1b))
##                  (Intercept)      whr_sd   v1hba1c_sd v1glucose_sd
## Pre-Diabetes 3.017894822e-15 1.471900359  3.714785333  1.718811696
## Diabetes     2.839676781e-28 1.517820274 12.317276969  3.073455395

Validation of the model

m.2b = multinom(v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd, data = train)
## # weights:  15 (8 variable)
## initial  value 1142.556780 
## iter  10 value 613.933415
## iter  20 value 562.073459
## iter  30 value 562.062434
## final  value 562.060955 
## converged
summary(m.2b)
## Call:
## multinom(formula = v2_dm2 ~ whr_sd + v1hba1c_sd + v1glucose_sd, 
##     data = train)
## 
## Coefficients:
##               (Intercept)       whr_sd  v1hba1c_sd v1glucose_sd
## Pre-Diabetes -32.38836522 0.4045031535 1.291038255 0.4531038535
## Diabetes     -64.72270809 0.4784587070 2.650850767 0.9372892365
## 
## Std. Errors:
##              (Intercept)       whr_sd   v1hba1c_sd  v1glucose_sd
## Pre-Diabetes 2.327002083 0.0864507297 0.1168052759 0.09345426755
## Diabetes     5.699255739 0.2199355502 0.2726657438 0.18403925527
## 
## Residual Deviance: 1124.121909 
## AIC: 1140.121909
exp(coefficients(m.2b))
##                  (Intercept)      whr_sd   v1hba1c_sd v1glucose_sd
## Pre-Diabetes 8.588388979e-15 1.498557762  3.636560275  1.573187559
## Diabetes     7.785473365e-29 1.613585477 14.166085565  2.553051311

Predicting diabetes status in the training data

train$v2_dm2Predictedb = predict(m.1b, newdata = train, "class")
tab.1b = table(train$v2_dm2, train$v2_dm2Predictedb)
tab.1b
##               
##                Normal Pre-Diabetes Diabetes
##   Normal          635           66        0
##   Pre-Diabetes    154          132        8
##   Diabetes          3           29       13
# Calculate accuracy
round((sum(diag(tab.1b))/sum(tab.1b))*100,2)
## [1] 75

Predicting diabetes status in the validation data

test$v2_dm2Predictedb = predict(m.1b, newdata = test, "class")
tab.2b = table(test$v2_dm2, test$v2_dm2Predictedb)
tab.2b
##               
##                Normal Pre-Diabetes Diabetes
##   Normal          426           41        0
##   Pre-Diabetes     91          102        3
##   Diabetes          4           19        6
# Calculate accuracy
round((sum(diag(tab.2b))/sum(tab.2b))*100,2)
## [1] 77.17