Background

Injection drug use has been associated with depressive symptoms even after accounting for the potential confounding effects of alcohol use, gender, and HIV status using data from the ALIVE study. One potential next step in this research would be to develop a clinical prediction model for depression based on this data.

The dataset contains data from all active ALIVE participants at one visit occurring between 2005 and 2006. The following variables are included in the dataset:

Variable Description
idno Participant study ID
hiv HIV infection, where 0 = HIV-uninfected and 1 = prevalent HIV-infected
age Age (in years)
age10 Age (in years)/10
m0f1 Sex (male = 0, female = 1)
cesdtot Total score on CES-D score, from 0-60
cesd23 Dichotomous indicator of depressive symptoms in past 7 days as measured by CES-D scale (0 = score 0 to 22, 1 = score by 23 or higher)
inject Any reported injection in past 6M (0/1)
injher Reported injecting heroin by itself past 6M (0/1)
injcoc Reported injecting cocaine by itself past 6M (0/1)
alcyn Any reported alcohol use past 6M (0/1)
symp5ge2 Self-reported HIV-related symptoms past 6M (0 for <= 1 symptoms, 1 for else)
gnhlthst Self-reported health status past 6M (Likert 1 = excellent, to 5 = poor)

Installing and Loading packages

#Install the requires packages if needed
# Install the required packages if needed
if (!require("tidyverse")) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!require("haven")) install.packages("haven")    # To import Stata data
## Loading required package: haven
if (!require("janitor")) install.packages("janitor") # For tables
## Loading required package: janitor
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
if (!require("pROC")) install.packages("pROC")       # To create ROC curves
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## 
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
if (!require("PredictABEL")) install.packages("PredictABEL") # For calibration plots
## Loading required package: PredictABEL
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
if (!require("dplyr")) install.packages("dplyr")     # For data manipulation
if (!require("rms")) install.packages("rms")
## Loading required package: rms
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
# Load the required packages
library(tidyverse)
library(haven)
library(janitor)
library(pROC)
library(PredictABEL)
library(dplyr)
library(rms)

Setting Working Directory and Loading data

Set your working directory to where the data is, OR place this.R script in the same folder where the data is. Then:

getwd() rm(list = ls())

If you are on a Mac, the file pathway looks something like this (I’m using iCloud):

setwd("/Users/username/Library/CloudStorage/GoogleDrive-abcxyz@gmail.com/Folder\subfolder1\subfolder2\...")

If you are on Windows, the file pathway looks something like this: setwd("C:\Users\dng\Folder\subfolder1\subfolder2\...")

Then, load the data

mp <- read_dta("project_b_ts_1673833621.dta") names(mp)

Data preparation

Assessing Collinearity

# 1. Select and rename variables
variables_for_correlation <- c("hiv", "age", "aged10", "m0f1", "inject", "injher", 
                              "injcoc", "alcyn", "symp5ge2", "gnhlthst")

# 2. Calculate the correlation matrix
mp_subset <- mp[, variables_for_correlation]
corr_mat <- cor(mp_subset, use = "pairwise.complete.obs")

# 3. Print the correlation matrix
print(corr_mat) 
##                  hiv          age       aged10        m0f1      inject
## hiv       1.00000000 -0.105267383 -0.105267384  0.06332518  0.01279517
## age      -0.10526738  1.000000000  1.000000000 -0.19879312 -0.06993729
## aged10   -0.10526738  1.000000000  1.000000000 -0.19879312 -0.06993728
## m0f1      0.06332518 -0.198793115 -0.198793124  1.00000000 -0.02806918
## inject    0.01279517 -0.069937286 -0.069937282 -0.02806918  1.00000000
## injher   -0.01615080 -0.041404747 -0.041404747 -0.02257827  0.80528634
## injcoc    0.01308448 -0.003142631 -0.003142631 -0.04068168  0.71673342
## alcyn    -0.06140624  0.013174494  0.013174492 -0.09411120  0.23136598
## symp5ge2  0.14885862  0.047508190  0.047508195  0.06715877  0.14561867
## gnhlthst -0.01696807  0.124399883  0.124399886  0.07422382  0.15001971
##               injher       injcoc       alcyn   symp5ge2    gnhlthst
## hiv      -0.01615080  0.013084482 -0.06140624 0.14885862 -0.01696807
## age      -0.04140475 -0.003142631  0.01317449 0.04750819  0.12439988
## aged10   -0.04140475 -0.003142631  0.01317449 0.04750819  0.12439989
## m0f1     -0.02257827 -0.040681685 -0.09411120 0.06715877  0.07422382
## inject    0.80528634  0.716733424  0.23136598 0.14561867  0.15001971
## injher    1.00000000  0.627441916  0.20645984 0.13526389  0.13588392
## injcoc    0.62744192  1.000000000  0.21925838 0.12700516  0.15240853
## alcyn     0.20645984  0.219258384  1.00000000 0.07799013  0.15189780
## symp5ge2  0.13526389  0.127005163  0.07799013 1.00000000  0.21600773
## gnhlthst  0.13588392  0.152408533  0.15189780 0.21600773  1.00000000
# 4. Visualize the correlation matrix using a heatmap
if(!require(corrplot)){install.packages("corrplot")} 
## Loading required package: corrplot
## corrplot 0.95 loaded
library(corrplot)

corrplot(corr_mat, 
         method = "color", 
         type = "upper", 
         order = "hclust", 
         tl.col = "black",     # Text label color
         tl.srt = 45,         # Text label rotation
         addCoef.col = "black", # Add coefficients to the plot
         number.cex = 0.7,     # Size of the coefficients
         tl.cex = 0.8          # Size of the variable names
)

Splitting data into Training and Validation Sets

# Set a seed for reproducibility
set.seed(123) 

# Calculate the number of rows
n_rows <- nrow(mp)

# Determine the number of rows for the training set (70%)
n_train <- floor(0.7 * n_rows) 

# Create a vector of random indices for the training set
train_indices <- sample(1:n_rows, n_train, replace = FALSE)

# Add the 'training' column and initialize it to 0
mp$training <- 0

# Set 'training' to 1 for the training indices
mp$training[train_indices] <- 1

# Subset the data into training and test sets
mp_train <- mp[mp$training == 1, ]
mp_test <- mp[mp$training == 0, ]

# Verify the sizes
nrow(mp_train) 
## [1] 595
nrow(mp_test)  
## [1] 255
# Inspect the data
head(mp)
## # A tibble: 6 × 14
##    idno   hiv   age aged10  m0f1 cesdtot cesd23 inject injher injcoc alcyn
##   <dbl> <dbl> <dbl>  <dbl> <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1  3173     0  54.8   5.48     0      39      1      0      0      0     0
## 2  9191     0  52.4   5.24     0      28      1      1      1      1     1
## 3  4136     0  47.5   4.75     1       0      0      0      0      0     0
## 4   425     0  68.1   6.81     1       0      0      0      0      0     0
## 5  2745     1  45.5   4.55     0      12      0      0      0      0     0
## 6  2327     0  54.8   5.48     0      31      1      0      0      0     0
## # ℹ 3 more variables: symp5ge2 <dbl>, gnhlthst <dbl>, training <dbl>
table(mp$training)
## 
##   0   1 
## 255 595

Analyzing Data with ‘tabyl’

# Percentages by column
mp_train %>%
  tabyl(inject, cesd23) %>%
  adorn_totals(where="row") %>%
  adorn_totals(where="col") %>%
  adorn_percentages("col") %>%
  adorn_pct_formatting(digits=1) %>%
  adorn_ns() %>%
  adorn_title()
##               cesd23                          
##  inject            0            1        Total
##       0  73.5% (360)  51.4%  (54)  69.6% (414)
##       1  26.3% (129)  48.6%  (51)  30.3% (180)
##    <NA>   0.2%   (1)   0.0%   (0)   0.2%   (1)
##   Total 100.0% (490) 100.0% (105) 100.0% (595)
# Percentages by row
mp_train %>%
  tabyl(inject, cesd23) %>%
  adorn_totals(where="row") %>%
  adorn_totals(where="col") %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits=1) %>%
  adorn_ns() %>%
  adorn_title()
##               cesd23                         
##  inject            0           1        Total
##       0  87.0% (360) 13.0%  (54) 100.0% (414)
##       1  71.7% (129) 28.3%  (51) 100.0% (180)
##    <NA> 100.0%   (1)  0.0%   (0) 100.0%   (1)
##   Total  82.4% (490) 17.6% (105) 100.0% (595)

Building Logistic Regression Models, outcome = CES_D cut off at \(\geq\) 23 point

Model 1A: Predictor = Any Injection

dat1A <- mp %>%
  filter(training==1,!is.na(cesd23),!is.na(inject))
dat1Atest <- mp %>%
  filter(training==0,!is.na(cesd23),!is.na(inject))

model1A <- glm(cesd23 ~ inject, family=binomial(link="logit"), data=dat1A) 
summary(model1A)
## 
## Call:
## glm(formula = cesd23 ~ inject, family = binomial(link = "logit"), 
##     data = dat1A)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.8971     0.1459 -13.000  < 2e-16 ***
## inject        0.9691     0.2206   4.394 1.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 554.15  on 593  degrees of freedom
## Residual deviance: 535.20  on 592  degrees of freedom
## AIC: 539.2
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model1A))
## (Intercept)      inject 
##    0.150000    2.635659
exp(confint(model1A))
## Waiting for profiling to be done...
##                 2.5 %    97.5 %
## (Intercept) 0.1114896 0.1977748
## inject      1.7093075 4.0651223
# ROC Curve and AUC
preprob1A <- predict(model1A, type = "response")
roc1A <- roc(response = dat1A$cesd23, predictor = preprob1A)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc1A, 
     main = "ROC Curve: Model 1A (total injection <6m)", 
     print.auc = TRUE, 
     grid = TRUE,
     legacy.axes = FALSE 
)

auc(roc1A)
## Area under the curve: 0.611
# Hosmer-Lemeshow Test
mp_test <- as.data.frame(mp_test)
dat1A <- as.data.frame(dat1A)
plotCalibration(data= dat1A, cOutcome="cesd23", predRisk(model1A), groups=10)
## Warning in min(xx[xx > upper]): no non-missing arguments to min; returning Inf

## $Table_HLtest
##       total meanpred meanobs predicted observed
## 0.130   414    0.130   0.130        54       54
## 0.283   180    0.283   0.283        51       51
## 
## $Chi_square
## [1] 0
## 
## $df
## [1] 8
## 
## $p_value
## [1] 1

Model 1B: Predictor = Inject Cocaine Last 6m

dat1B <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injcoc))
dat1Btest <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injcoc))
model1B <- glm(cesd23 ~ injcoc, family=binomial(link="logit"), data=dat1B) 
summary(model1B)
## 
## Call:
## glm(formula = cesd23 ~ injcoc, family = binomial(link = "logit"), 
##     data = dat1B)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.8014     0.1299 -13.863  < 2e-16 ***
## injcoc        1.1364     0.2428   4.681 2.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 533.25  on 591  degrees of freedom
## AIC: 537.25
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model1B))
## (Intercept)      injcoc 
##   0.1650718   3.1155280
exp(confint(model1B))
## Waiting for profiling to be done...
##                 2.5 %    97.5 %
## (Intercept) 0.1269162 0.2113889
## injcoc      1.9258866 4.9998449
preprob1B <- predict(model1B, type = "response")
roc1B <- roc(response = dat1B$cesd23, predictor = preprob1B)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc1B, main = "ROC Curve: Model 1B", print.auc = TRUE, grid = TRUE, legacy.axes = FALSE)

auc(roc1B)
## Area under the curve: 0.5997
dat1B <- as.data.frame(dat1B)
plotCalibration(data= dat1B, cOutcome="cesd23", predRisk(model1B), groups=10)
## Warning in min(xx[xx > upper]): no non-missing arguments to min; returning Inf

## $Table_HLtest
##       total meanpred meanobs predicted observed
## 0.142   487    0.142   0.142        69       69
## 0.340   106    0.340   0.340        36       36
## 
## $Chi_square
## [1] 0
## 
## $df
## [1] 8
## 
## $p_value
## [1] 1

Model 1C: Predictor = Inject Heroin Last 6m

dat1C <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher))
dat1Ctest <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher))
model1C <- glm(cesd23 ~ injher, family=binomial(link="logit"), data=dat1C) 
summary(model1C)
## 
## Call:
## glm(formula = cesd23 ~ injher, family = binomial(link = "logit"), 
##     data = dat1C)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.8149     0.1348  -13.46  < 2e-16 ***
## injher        0.9746     0.2304    4.23 2.34e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 536.70  on 591  degrees of freedom
## AIC: 540.7
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model1C))
## (Intercept)      injher 
##   0.1628499   2.6501645
exp(confint(model1C))
## Waiting for profiling to be done...
##                 2.5 %    97.5 %
## (Intercept) 0.1239389 0.2104136
## injher      1.6804428 4.1543740
preprob1C_test <- predict(model1C, dat1Ctest, type = "response")
roc1C_test <- roc(response = dat1Ctest$cesd23, predictor = preprob1C_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc1C_test, main = "ROC Curve 1C in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc1C_test)
## Area under the curve: 0.5989
dat1Ctest <- as.data.frame(dat1Ctest)
plotCalibration(data= dat1Ctest, cOutcome="cesd23", predRisk=preprob1C_test, groups=10)
## Warning in min(xx[xx > upper]): no non-missing arguments to min; returning Inf

## $Table_HLtest
##       total meanpred meanobs predicted observed
## 0.140   202    0.140   0.163     28.29       33
## 0.301    53    0.301   0.358     15.98       19
## 
## $Chi_square
## [1] 1.73
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.9882

Model 2: Predictor = Inject Heroin + Cocaine Last 6m

dat2 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injcoc),!is.na(injher))
dat2test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injcoc),!is.na(injher))
model2 <- glm(cesd23 ~ injcoc + injher, family=binomial(link="logit"), data=dat2) 
summary(model2)
## 
## Call:
## glm(formula = cesd23 ~ injcoc + injher, family = binomial(link = "logit"), 
##     data = dat2)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.8575     0.1369 -13.568   <2e-16 ***
## injcoc        0.7924     0.3327   2.382   0.0172 *  
## injher        0.4802     0.3179   1.510   0.1309    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 531.04  on 590  degrees of freedom
## AIC: 537.04
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model2))
## (Intercept)      injcoc      injher 
##   0.1560675   2.2087508   1.6163227
exp(confint(model2))
## Waiting for profiling to be done...
##                 2.5 %    97.5 %
## (Intercept) 0.1182720 0.2024576
## injcoc      1.1504098 4.2539859
## injher      0.8543742 2.9817765
preprob2_test <- predict(model2, dat2test, type = "response")
roc2_test <- roc(response = dat2test$cesd23, predictor = preprob2_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc2_test, main = "ROC Curve 2 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc2_test)
## Area under the curve: 0.6133
dat2test <- as.data.frame(dat2test)
plotCalibration(data= dat2test, cOutcome="cesd23", predRisk=preprob2_test, groups=10)
## Warning in min(xx[xx > upper]): no non-missing arguments to min; returning Inf

## $Table_HLtest
##               total meanpred meanobs predicted observed
## 0.135           185    0.135   0.151     24.97       28
## 0.201            19    0.201   0.421      3.83        8
## [0.256,  Inf]    51    0.324   0.314     16.52       16
## 
## $Chi_square
## [1] 6.148
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.6307

Model 3: Predictor = Any Injection, Sex

dat3 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1))
dat3test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1))
model3 <- glm(cesd23 ~ inject + m0f1, family=binomial(link="logit"), data=dat3) 
summary(model3)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1, family = binomial(link = "logit"), 
##     data = dat3)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.1984     0.1790 -12.279  < 2e-16 ***
## inject        1.0258     0.2246   4.567 4.94e-06 ***
## m0f1          0.7896     0.2272   3.475 0.000511 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 554.15  on 593  degrees of freedom
## Residual deviance: 523.36  on 591  degrees of freedom
## AIC: 529.36
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model3))
## (Intercept)      inject        m0f1 
##   0.1109791   2.7893844   2.2024176
exp(confint(model3))
## Waiting for profiling to be done...
##                2.5 %    97.5 %
## (Intercept) 0.077032 0.1556135
## inject      1.796074 4.3398778
## m0f1        1.408429 3.4386340
preprob3_test <- predict(model3, dat3test, type = "response")
roc3_test <- roc(response = dat3test$cesd23, predictor = preprob3_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc3_test, main = "ROC Curve 3 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc3_test)
## Area under the curve: 0.6533
dat3test <- as.data.frame(dat3test)
plotCalibration(data= dat3test, cOutcome="cesd23", predRisk=preprob3_test, groups=10)
## Warning in min(xx[xx > upper]): no non-missing arguments to min; returning Inf

## $Table_HLtest
##        total meanpred meanobs predicted observed
## 0.0999   108    0.100   0.120     10.79       13
## 0.1964    66    0.196   0.197     12.96       13
## 0.2364    51    0.236   0.255     12.06       13
## 0.4054    30    0.405   0.433     12.16       13
## 
## $Chi_square
## [1] 0.696
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.9995

Model 4: Predictor = Total Injection, Sex, Age

dat4 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age))
dat4test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age))
model4 <- glm(cesd23 ~ inject + m0f1 + age, family=binomial(link="logit"), data=dat4) 
summary(model4)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1 + age, family = binomial(link = "logit"), 
##     data = dat4)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.14331    0.88072  -3.569 0.000358 ***
## inject       1.04400    0.22567   4.626 3.72e-06 ***
## m0f1         0.83617    0.23174   3.608 0.000308 ***
## age          0.01878    0.01703   1.103 0.270067    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 554.15  on 593  degrees of freedom
## Residual deviance: 522.14  on 590  degrees of freedom
## AIC: 530.14
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model4))
## (Intercept)      inject        m0f1         age 
##  0.04313986  2.84056872  2.30750744  1.01896009
exp(confint(model4))
## Waiting for profiling to be done...
##                   2.5 %    97.5 %
## (Intercept) 0.007485091 0.2376723
## inject      1.825573907 4.4297170
## m0f1        1.463489442 3.6373031
## age         0.985521285 1.0536729
preprob4_test <- predict(model4, dat4test, type = "response")
roc4_test <- roc(response = dat4test$cesd23, predictor = preprob4_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc4_test, main = "ROC Curve 4 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc4_test)
## Area under the curve: 0.6513
dat4test <- as.data.frame(dat4test)
plotCalibration(data= dat4test, cOutcome="cesd23", predRisk=preprob4_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0748,0.0926)    26    0.087   0.154      2.25        4
## [0.0926,0.0984)    25    0.095   0.120      2.39        3
## [0.0984,0.1063)    26    0.102   0.077      2.64        2
## [0.1063,0.1218)    25    0.112   0.160      2.79        4
## [0.1218,0.1863)    26    0.161   0.192      4.19        5
## [0.1863,0.2026)    25    0.194   0.200      4.85        5
## [0.2026,0.2230)    26    0.209   0.154      5.44        4
## [0.2230,0.2395)    25    0.230   0.240      5.76        6
## [0.2395,0.3707)    26    0.275   0.231      7.16        6
## [0.3707,0.4685]    25    0.404   0.520     10.09       13
## 
## $Chi_square
## [1] 4.771
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.7817

Model 5: Predictor = Injection of Heroin, Cocaine, Sex, Age

dat5 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age))
dat5test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age))
model5 <- glm(cesd23 ~ injher + injcoc + m0f1 + age, family=binomial(link="logit"), data=dat5) 
summary(model5)
## 
## Call:
## glm(formula = cesd23 ~ injher + injcoc + m0f1 + age, family = binomial(link = "logit"), 
##     data = dat5)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.78583    0.87495  -3.184 0.001453 ** 
## injher       0.47404    0.32406   1.463 0.143517    
## injcoc       0.87393    0.34143   2.560 0.010479 *  
## m0f1         0.83341    0.23275   3.581 0.000343 ***
## age          0.01253    0.01714   0.731 0.464634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 518.38  on 588  degrees of freedom
## AIC: 528.38
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model5))
## (Intercept)      injher      injcoc        m0f1         age 
##  0.06167769  1.60646750  2.39630386  2.30115409  1.01260865
exp(confint(model5))
## Waiting for profiling to be done...
##                  2.5 %    97.5 %
## (Intercept) 0.01084204 0.3365874
## injher      0.83921738 3.0010262
## injcoc      1.22781903 4.6992392
## m0f1        1.45678912 3.6352518
## age         0.97913336 1.0472737
preprob5_test <- predict(model5, dat5test, type = "response")
roc5_test <- roc(response = dat5test$cesd23, predictor = preprob5_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc5_test, main = "ROC Curve 5 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc5_test)
## Area under the curve: 0.6477
dat5test <- as.data.frame(dat5test)
plotCalibration(data= dat5test, cOutcome="cesd23", predRisk=preprob5_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0857,0.0985)    26    0.094   0.154      2.45        4
## [0.0985,0.1020)    25    0.100   0.040      2.51        1
## [0.1020,0.1058)    26    0.104   0.115      2.71        3
## [0.1058,0.1142)    25    0.110   0.160      2.75        4
## [0.1142,0.1796)    26    0.139   0.154      3.61        4
## [0.1796,0.1995)    25    0.191   0.240      4.78        6
## [0.1995,0.2110)    26    0.205   0.269      5.34        7
## [0.2110,0.2499)    25    0.219   0.160      5.47        4
## [0.2499,0.3142)    26    0.292   0.385      7.58       10
## [0.3142,0.5346]    25    0.404   0.360     10.10        9
## 
## $Chi_square
## [1] 5.652
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.6862

Model 6: Predictor = Any Injection, Sex, Age, HIV

dat6 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(hiv))
dat6test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(hiv))
model6 <- glm(cesd23 ~ inject + m0f1 + age + hiv, family=binomial(link="logit"), data=dat6) 
summary(model6)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1 + age + hiv, family = binomial(link = "logit"), 
##     data = dat6)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.43294    0.90685  -3.786 0.000153 ***
## inject       1.04146    0.22643   4.599 4.24e-06 ***
## m0f1         0.79606    0.23353   3.409 0.000652 ***
## age          0.02237    0.01729   1.294 0.195531    
## hiv          0.38780    0.23543   1.647 0.099526 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 554.15  on 593  degrees of freedom
## Residual deviance: 519.48  on 589  degrees of freedom
## AIC: 529.48
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model6))
## (Intercept)      inject        m0f1         age         hiv 
##  0.03229175  2.83336364  2.21678109  1.02262581  1.47372791
exp(confint(model6))
## Waiting for profiling to be done...
##                   2.5 %    97.5 %
## (Intercept) 0.005301136 0.1865348
## inject      1.818320791 4.4254387
## m0f1        1.400712265 3.5057560
## age         0.988610893 1.0580374
## hiv         0.924099463 2.3303873
preprob6_test <- predict(model6, dat6test, type = "response")
roc6_test <- roc(response = dat6test$cesd23, predictor = preprob6_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc6_test, main = "ROC Curve 6 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc6_test)
## Area under the curve: 0.6757
dat6test <- as.data.frame(dat6test)
plotCalibration(data= dat6test, cOutcome="cesd23", predRisk=preprob6_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0663,0.0884)    26    0.080   0.115      2.07        3
## [0.0884,0.1005)    25    0.094   0.120      2.34        3
## [0.1005,0.1197)    26    0.112   0.115      2.91        3
## [0.1197,0.1411)    25    0.128   0.160      3.20        4
## [0.1411,0.1670)    26    0.152   0.192      3.95        5
## [0.1670,0.1827)    25    0.176   0.080      4.40        2
## [0.1827,0.2122)    26    0.197   0.154      5.12        4
## [0.2122,0.2509)    25    0.228   0.320      5.71        8
## [0.2509,0.3436)    26    0.288   0.346      7.48        9
## [0.3436,0.5366]    25    0.396   0.440      9.89       11
## 
## $Chi_square
## [1] 4.943
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.7637

Model 7: Predictor = Heroin Injection, Cocaine Injection, Sex, Age, HIV

dat7 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(hiv))
dat7test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(hiv))
model7 <- glm(cesd23 ~ injher + injcoc + m0f1 + age + hiv, family=binomial(link="logit"), data=dat7) 
summary(model7)
## 
## Call:
## glm(formula = cesd23 ~ injher + injcoc + m0f1 + age + hiv, family = binomial(link = "logit"), 
##     data = dat7)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.07222    0.89864  -3.419 0.000629 ***
## injher       0.49784    0.32633   1.526 0.127120    
## injcoc       0.84304    0.34374   2.453 0.014183 *  
## m0f1         0.78885    0.23466   3.362 0.000775 ***
## age          0.01603    0.01735   0.924 0.355512    
## hiv          0.39696    0.23687   1.676 0.093764 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 515.62  on 587  degrees of freedom
## AIC: 527.62
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model7))
## (Intercept)      injher      injcoc        m0f1         age         hiv 
##  0.04631808  1.64516194  2.32342174  2.20087260  1.01616343  1.48729096
exp(confint(model7))
## Waiting for profiling to be done...
##                  2.5 %    97.5 %
## (Intercept) 0.00774924 0.2640223
## injher      0.85578594 3.0878873
## injcoc      1.18498545 4.5764364
## m0f1        1.38776391 3.4890692
## age         0.98217659 1.0514347
## hiv         0.93001835 2.3585506
preprob7_test <- predict(model7, dat7test, type = "response")
roc7_test <- roc(response = dat7test$cesd23, predictor = preprob7_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc7_test, main = "ROC Curve 7 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc7_test)
## Area under the curve: 0.6792
dat7test <- as.data.frame(dat7test)
plotCalibration(data= dat7test, cOutcome="cesd23", predRisk=preprob7_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0753,0.0912)    26    0.085   0.077      2.21        2
## [0.0912,0.1003)    25    0.096   0.120      2.39        3
## [0.1003,0.1245)    26    0.112   0.154      2.91        4
## [0.1245,0.1356)    25    0.129   0.120      3.22        3
## [0.1356,0.1595)    26    0.146   0.154      3.80        4
## [0.1595,0.1792)    25    0.170   0.240      4.25        6
## [0.1792,0.2060)    26    0.187   0.077      4.87        2
## [0.2060,0.2566)    25    0.231   0.320      5.78        8
## [0.2566,0.3224)    26    0.280   0.346      7.27        9
## [0.3224,0.5203]    25    0.402   0.440     10.04       11
## 
## $Chi_square
## [1] 5.468
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.7065

Model 8: Predictor = Any Injection, Sex, Age, HIV Symptoms

dat8 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(symp5ge2))
dat8test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(symp5ge2))
model8 <- glm(cesd23 ~ inject + m0f1 + age + symp5ge2, family=binomial(link="logit"), data=dat8)
summary(model8)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1 + age + symp5ge2, family = binomial(link = "logit"), 
##     data = dat8)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.04894    0.90304  -3.376 0.000735 ***
## inject       0.92464    0.23353   3.959 7.51e-05 ***
## m0f1         0.77652    0.23871   3.253 0.001142 ** 
## age          0.01569    0.01748   0.898 0.369409    
## symp5ge2     1.81644    0.42051   4.320 1.56e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 502.46  on 588  degrees of freedom
## AIC: 512.46
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model8))
## (Intercept)      inject        m0f1         age    symp5ge2 
##  0.04740933  2.52095106  2.17388500  1.01581128  6.14994871
exp(confint(model8))
## Waiting for profiling to be done...
##                   2.5 %     97.5 %
## (Intercept) 0.007873728  0.2729325
## inject      1.592777559  3.9869417
## m0f1        1.358772143  3.4714139
## age         0.981587917  1.0513143
## symp5ge2    2.721203147 14.3686483
preprob8_test <- predict(model8, dat8test, type = "response")
roc8_test <- roc(response = dat8test$cesd23, predictor = preprob8_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc8_test, main = "ROC Curve 8 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc8_test)
## Area under the curve: 0.6854
dat8test <- as.data.frame(dat8test)
plotCalibration(data= dat8test, cOutcome="cesd23", predRisk=preprob8_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0742,0.0889)    26    0.084   0.154      2.19        4
## [0.0889,0.0934)    25    0.091   0.120      2.28        3
## [0.0934,0.0999)    26    0.096   0.038      2.50        1
## [0.0999,0.1147)    25    0.105   0.120      2.63        3
## [0.1147,0.1722)    26    0.156   0.231      4.06        6
## [0.1722,0.1849)    25    0.179   0.160      4.46        4
## [0.1849,0.2009)    26    0.192   0.077      4.98        2
## [0.2009,0.2242)    25    0.209   0.320      5.22        8
## [0.2242,0.3493)    26    0.308   0.269      8.02        7
## [0.3493,0.8051]    25    0.535   0.560     13.37       14
## 
## $Chi_square
## [1] 8.424
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.3932

Model 9: Predictor = Heroin Injection, Cocaine Injection, Sex, Age, HIV Symptoms

dat9 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(symp5ge2))
dat9test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(symp5ge2))
model9 <- glm(cesd23 ~ injher + injcoc + m0f1 + age + symp5ge2, family=binomial(link="logit"), data=dat9)
summary(model9)
## 
## Call:
## glm(formula = cesd23 ~ injher + injcoc + m0f1 + age + symp5ge2, 
##     family = binomial(link = "logit"), data = dat9)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.711376   0.895726  -3.027  0.00247 ** 
## injher       0.326294   0.340752   0.958  0.33828    
## injcoc       0.875505   0.355716   2.461  0.01385 *  
## m0f1         0.774802   0.239277   3.238  0.00120 ** 
## age          0.009743   0.017553   0.555  0.57886    
## symp5ge2     1.829115   0.424482   4.309 1.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.37  on 591  degrees of freedom
## Residual deviance: 499.14  on 586  degrees of freedom
## AIC: 511.14
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model9))
## (Intercept)      injher      injcoc        m0f1         age    symp5ge2 
##  0.06644529  1.38582335  2.40008687  2.17016291  1.00979021  6.22837241
exp(confint(model9))
## Waiting for profiling to be done...
##                  2.5 %     97.5 %
## (Intercept) 0.01121142  0.3776557
## injher      0.69990983  2.6707990
## injcoc      1.19425419  4.8338384
## m0f1        1.35512883  3.4699775
## age         0.97558834  1.0451952
## symp5ge2    2.73285352 14.6539827
preprob9_test <- predict(model9, dat9test, type = "response")
roc9_test <- roc(response = dat9test$cesd23, predictor = preprob9_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc9_test, main = "ROC Curve 9 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc9_test)
## Area under the curve: 0.6817
dat9test <- as.data.frame(dat9test)
plotCalibration(data= dat9test, cOutcome="cesd23", predRisk=preprob9_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0843,0.0941)    26    0.091   0.154      2.36        4
## [0.0941,0.0967)    25    0.095   0.040      2.38        1
## [0.0967,0.1003)    26    0.098   0.115      2.56        3
## [0.1003,0.1067)    25    0.103   0.120      2.58        3
## [0.1067,0.1738)    26    0.133   0.154      3.46        4
## [0.1738,0.1852)    25    0.179   0.240      4.48        6
## [0.1852,0.1959)    26    0.190   0.154      4.93        4
## [0.1959,0.2521)    25    0.221   0.280      5.52        7
## [0.2521,0.4031)    26    0.295   0.269      7.66        7
## [0.4031,0.8230]    25    0.549   0.520     13.72       13
## 
## $Chi_square
## [1] 3.913
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.8649

Model 10: Predictor = Any Injection, Sex, Age, Alcohol Use, HIV

dat10 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv))
dat10test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv))
model10 <- glm(cesd23 ~ inject + m0f1 + age + alcyn + hiv, family=binomial(link="logit"), data=dat10)
summary(model10)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1 + age + alcyn + hiv, family = binomial(link = "logit"), 
##     data = dat10)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.79714    0.92525  -4.104 4.06e-05 ***
## inject       0.87141    0.23405   3.723 0.000197 ***
## m0f1         0.86362    0.23772   3.633 0.000280 ***
## age          0.02232    0.01740   1.283 0.199532    
## alcyn        0.71677    0.23756   3.017 0.002551 ** 
## hiv          0.44461    0.23888   1.861 0.062710 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 509.86  on 587  degrees of freedom
## AIC: 521.86
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model10))
## (Intercept)      inject        m0f1         age       alcyn         hiv 
##  0.02243495  2.39028407  2.37174018  1.02257577  2.04781244  1.55987761
exp(confint(model10))
## Waiting for profiling to be done...
##                   2.5 %    97.5 %
## (Intercept) 0.003539512 0.1339724
## inject      1.509768631 3.7859343
## m0f1        1.487492294 3.7851282
## age         0.988327073 1.0582341
## alcyn       1.291507781 3.2850094
## hiv         0.971980352 2.4846730
preprob10_test <- predict(model10, dat10test, type = "response")
roc10_test <- roc(response = dat10test$cesd23, predictor = preprob10_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc10_test, main = "ROC Curve 10 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc10_test)
## Area under the curve: 0.6912
dat10test <- as.data.frame(dat10test)
plotCalibration(data= dat10test, cOutcome="cesd23", predRisk=preprob10_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0479,0.0719)    26    0.061   0.154      1.59        4
## [0.0719,0.0988)    25    0.086   0.080      2.16        2
## [0.0988,0.1169)    26    0.108   0.115      2.80        3
## [0.1169,0.1322)    25    0.125   0.080      3.12        2
## [0.1322,0.1555)    26    0.143   0.154      3.71        4
## [0.1555,0.2000)    25    0.176   0.160      4.39        4
## [0.2000,0.2399)    26    0.221   0.231      5.75        6
## [0.2399,0.2622)    25    0.249   0.160      6.22        4
## [0.2622,0.3411)    26    0.296   0.385      7.70       10
## [0.3411,0.6108]    25    0.415   0.520     10.39       13
## 
## $Chi_square
## [1] 7.623
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.4711

Model 11: Predictor = Heroin Injection, Cocaine Injection, Sex, Age, Alcohol Use, HIV

dat11 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv))
dat11test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv))
model11 <- glm(cesd23 ~ injher + injcoc + m0f1 + age + alcyn + hiv, family=binomial(link="logit"), data=dat11)
summary(model11)
## 
## Call:
## glm(formula = cesd23 ~ injher + injcoc + m0f1 + age + alcyn + 
##     hiv, family = binomial(link = "logit"), data = dat11)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.49409    0.91892  -3.802 0.000143 ***
## injher       0.37165    0.33399   1.113 0.265815    
## injcoc       0.75986    0.34931   2.175 0.029605 *  
## m0f1         0.85960    0.23892   3.598 0.000321 ***
## age          0.01708    0.01744   0.980 0.327259    
## alcyn        0.70002    0.23998   2.917 0.003534 ** 
## hiv          0.46389    0.24013   1.932 0.053385 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.37  on 591  degrees of freedom
## Residual deviance: 506.63  on 585  degrees of freedom
## AIC: 520.63
## 
## Number of Fisher Scoring iterations: 4
exp(coef(model11))
## (Intercept)      injher      injcoc        m0f1         age       alcyn 
##  0.03037623  1.45012661  2.13797832  2.36222671  1.01722916  2.01378478 
##         hiv 
##  1.59024747
exp(confint(model11))
## Waiting for profiling to be done...
##                   2.5 %    97.5 %
## (Intercept) 0.004869095 0.1797581
## injher      0.742651344 2.7612655
## injcoc      1.077575296 4.2545093
## m0f1        1.478282673 3.7796546
## age         0.983032171 1.0527113
## alcyn       1.263441589 3.2446252
## hiv         0.988608886 2.5397218
preprob11_test <- predict(model11, dat11test, type = "response")
roc11_test <- roc(response = dat11test$cesd23, predictor = preprob11_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc11_test, main = "ROC Curve 11 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc11_test)
## Area under the curve: 0.6914
dat11test <- as.data.frame(dat11test)
plotCalibration(data= dat11test, cOutcome="cesd23", predRisk=preprob11_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0533,0.0727)    26    0.064   0.154      1.67        4
## [0.0727,0.1010)    25    0.089   0.080      2.22        2
## [0.1010,0.1196)    26    0.111   0.115      2.88        3
## [0.1196,0.1333)    25    0.127   0.120      3.18        3
## [0.1333,0.1504)    26    0.143   0.154      3.71        4
## [0.1504,0.1814)    25    0.167   0.080      4.19        2
## [0.1814,0.2226)    26    0.196   0.192      5.10        5
## [0.2226,0.2647)    25    0.243   0.200      6.07        5
## [0.2647,0.3400)    26    0.299   0.462      7.77       12
## [0.3400,0.5801]    25    0.432   0.480     10.81       12
## 
## $Chi_square
## [1] 8.698
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.3684

Model 12: Predictor = Any Injection, Sex, Age, Alcohol Use, HIV Symptoms

dat12 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2))
dat12test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2))
model12 <- glm(cesd23 ~ inject + m0f1 + age + alcyn + symp5ge2, family=binomial(link="logit"), data=dat12)
summary(model12)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1 + age + alcyn + symp5ge2, 
##     family = binomial(link = "logit"), data = dat12)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.34163    0.91749  -3.642 0.000270 ***
## inject       0.78322    0.23920   3.274 0.001059 ** 
## m0f1         0.85307    0.24282   3.513 0.000443 ***
## age          0.01466    0.01759   0.834 0.404424    
## alcyn        0.68109    0.24065   2.830 0.004652 ** 
## symp5ge2     1.77933    0.41992   4.237 2.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.37  on 591  degrees of freedom
## Residual deviance: 494.06  on 586  degrees of freedom
## AIC: 506.06
## 
## Number of Fisher Scoring iterations: 5
exp(coef(model12))
## (Intercept)      inject        m0f1         age       alcyn    symp5ge2 
##  0.03537923  2.18849935  2.34684909  1.01477075  1.97602889  5.92591424
exp(confint(model12))
## Waiting for profiling to be done...
##                   2.5 %     97.5 %
## (Intercept) 0.005694393  0.2089884
## inject      1.366884252  3.4980179
## m0f1        1.456438860  3.7817138
## age         0.980364919  1.0504602
## alcyn       1.238280737  3.1882469
## symp5ge2    2.624197602 13.8248636
preprob12_test <- predict(model12, dat12test, type = "response")
roc12_test <- roc(response = dat12test$cesd23, predictor = preprob12_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc12_test, main = "ROC Curve 12 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc12_test)
## Area under the curve: 0.7011
dat12test <- as.data.frame(dat12test)
plotCalibration(data= dat12test, cOutcome="cesd23", predRisk=preprob12_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0567,0.0676)    26    0.064   0.077      1.67        2
## [0.0676,0.0773)    25    0.072   0.120      1.79        3
## [0.0773,0.1205)    26    0.104   0.077      2.70        2
## [0.1205,0.1300)    25    0.126   0.120      3.14        3
## [0.1300,0.1421)    26    0.136   0.115      3.53        3
## [0.1421,0.2166)    25    0.156   0.120      3.89        3
## [0.2166,0.2393)    26    0.230   0.346      5.97        9
## [0.2393,0.2562)    25    0.247   0.280      6.18        7
## [0.2562,0.3897)    26    0.287   0.269      7.45        7
## [0.3897,0.8379]    25    0.561   0.520     14.03       13
## 
## $Chi_square
## [1] 3.837
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.8715

Model 13: Predictor = Heroin Injection, Cocaine Injection, Sex, Age, Alcohol Use, HIV Symptoms

dat13 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2))
dat13test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2))
model13 <- glm(cesd23 ~ injher + injcoc + m0f1 + age + alcyn + symp5ge2, family=binomial(link="logit"), data=dat13)
summary(model13)
## 
## Call:
## glm(formula = cesd23 ~ injher + injcoc + m0f1 + age + alcyn + 
##     symp5ge2, family = binomial(link = "logit"), data = dat13)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.050626   0.910956  -3.349 0.000812 ***
## injher       0.229438   0.346858   0.661 0.508308    
## injcoc       0.791349   0.361065   2.192 0.028401 *  
## m0f1         0.856581   0.243710   3.515 0.000440 ***
## age          0.009751   0.017615   0.554 0.579900    
## alcyn        0.659293   0.242757   2.716 0.006611 ** 
## symp5ge2     1.796496   0.422947   4.248 2.16e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 552.98  on 590  degrees of freedom
## Residual deviance: 491.42  on 584  degrees of freedom
## AIC: 505.42
## 
## Number of Fisher Scoring iterations: 5
exp(coef(model13))
## (Intercept)      injher      injcoc        m0f1         age       alcyn 
##  0.04732927  1.25789247  2.20636987  2.35509381  1.00979838  1.93342446 
##    symp5ge2 
##  6.02848423
exp(confint(model13))
## Waiting for profiling to be done...
##                   2.5 %     97.5 %
## (Intercept) 0.007732315  0.2765987
## injher      0.627331807  2.4519926
## injcoc      1.086049283  4.4890237
## m0f1        1.459334693  3.8026137
## age         0.975468051  1.0453314
## alcyn       1.205922655  3.1309985
## symp5ge2    2.652392186 14.1409814
preprob13_test <- predict(model13, dat13test, type = "response")
roc13_test <- roc(response = dat13test$cesd23, predictor = preprob13_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc13_test, main = "ROC Curve 13 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc13_test)
## Area under the curve: 0.7043
dat13test <- as.data.frame(dat13test)
plotCalibration(data= dat13test, cOutcome="cesd23", predRisk=preprob13_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0631,0.0704)    26    0.068   0.077      1.78        2
## [0.0704,0.0769)    25    0.073   0.120      1.84        3
## [0.0769,0.1250)    26    0.102   0.115      2.66        3
## [0.1250,0.1338)    25    0.129   0.120      3.23        3
## [0.1338,0.1460)    26    0.140   0.077      3.64        2
## [0.1460,0.1569)    25    0.151   0.160      3.78        4
## [0.1569,0.2415)    26    0.187   0.231      4.87        6
## [0.2415,0.2713)    25    0.255   0.240      6.37        6
## [0.2713,0.4274)    26    0.301   0.462      7.82       12
## [0.4274,0.8380]    25    0.577   0.440     14.43       11
## 
## $Chi_square
## [1] 7.24
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.5109

Model 14: Predictor = Any Injection, Sex, Age, Alcohol Use, HIV, General Health Status

dat14 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv),!is.na(gnhlthst))
dat14test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv),!is.na(gnhlthst))
model14 <- glm(cesd23 ~ inject + m0f1 + age + alcyn + hiv + gnhlthst, family=binomial(link="logit"), data=dat14)
summary(model14)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1 + age + alcyn + hiv + gnhlthst, 
##     family = binomial(link = "logit"), data = dat14)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.033250   1.066062  -5.659 1.52e-08 ***
## inject       0.574968   0.253402   2.269  0.02327 *  
## m0f1         0.764026   0.252116   3.030  0.00244 ** 
## age          0.008276   0.018408   0.450  0.65301    
## alcyn        0.559240   0.254682   2.196  0.02810 *  
## hiv          0.493854   0.256638   1.924  0.05431 .  
## gnhlthst     0.993756   0.145354   6.837 8.10e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.76  on 592  degrees of freedom
## Residual deviance: 450.61  on 586  degrees of freedom
## AIC: 464.61
## 
## Number of Fisher Scoring iterations: 5
exp(coef(model14))
## (Intercept)      inject        m0f1         age       alcyn         hiv 
## 0.002397689 1.777073102 2.146903251 1.008310376 1.749342942 1.638619554 
##    gnhlthst 
## 2.701361826
exp(confint(model14))
## Waiting for profiling to be done...
##                    2.5 %     97.5 %
## (Intercept) 0.0002810161 0.01851479
## inject      1.0784027232 2.91791078
## m0f1        1.3090985947 3.52496020
## age         0.9724537406 1.04541976
## alcyn       1.0657551596 2.89961475
## hiv         0.9872411814 2.70644714
## gnhlthst    2.0523210704 3.63330462
preprob14_test <- predict(model14, dat14test, type = "response")
roc14_test <- roc(response = dat14test$cesd23, predictor = preprob14_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc14_test, main = "ROC Curve 14 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc14_test)
## Area under the curve: 0.7249
dat14test <- as.data.frame(dat14test)
plotCalibration(data= dat14test, cOutcome="cesd23", predRisk=preprob14_test, groups=10)

## $Table_HLtest
##                  total meanpred meanobs predicted observed
## [0.00866,0.0272)    26    0.019   0.154      0.51        4
## [0.02718,0.0447)    25    0.036   0.040      0.91        1
## [0.04473,0.0715)    26    0.059   0.115      1.54        3
## [0.07152,0.1081)    25    0.088   0.080      2.21        2
## [0.10808,0.1627)    26    0.125   0.115      3.25        3
## [0.16274,0.1897)    25    0.172   0.120      4.30        3
## [0.18970,0.2620)    26    0.224   0.231      5.81        6
## [0.26203,0.3570)    25    0.298   0.240      7.44        6
## [0.35704,0.4401)    26    0.398   0.308     10.35        8
## [0.44007,0.8616]    25    0.574   0.640     14.34       16
## 
## $Chi_square
## [1] 28.88
## 
## $df
## [1] 8
## 
## $p_value
## [1] 3e-04

Model 15: Predictor = Heroin Injection, Cocaine Injection, Sex, Age, Alcohol Use, HIV, General Health Status

dat15 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv),!is.na(gnhlthst))
dat15test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(hiv),!is.na(gnhlthst))
model15 <- glm(cesd23 ~ injher + injcoc + m0f1 + age + alcyn + hiv + gnhlthst, family=binomial(link="logit"), data=dat15)
summary(model15)
## 
## Call:
## glm(formula = cesd23 ~ injher + injcoc + m0f1 + age + alcyn + 
##     hiv + gnhlthst, family = binomial(link = "logit"), data = dat15)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.884300   1.066526  -5.517 3.44e-08 ***
## injher       0.350627   0.350108   1.001  0.31659    
## injcoc       0.429136   0.369526   1.161  0.24551    
## m0f1         0.763619   0.253742   3.009  0.00262 ** 
## age          0.005639   0.018337   0.308  0.75846    
## alcyn        0.536565   0.255405   2.101  0.03565 *  
## hiv          0.524853   0.256918   2.043  0.04106 *  
## gnhlthst     0.992708   0.146232   6.789 1.13e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.37  on 591  degrees of freedom
## Residual deviance: 448.35  on 584  degrees of freedom
## AIC: 464.35
## 
## Number of Fisher Scoring iterations: 5
exp(coef(model15))
## (Intercept)      injher      injcoc        m0f1         age       alcyn 
## 0.002782793 1.419957384 1.535929269 2.146028793 1.005654520 1.710123338 
##         hiv    gnhlthst 
## 1.690209704 2.698532732
exp(confint(model15))
## Waiting for profiling to be done...
##                 2.5 %     97.5 %
## (Intercept) 0.0003265 0.02155243
## injher      0.7044227 2.79287442
## injcoc      0.7430567 3.17762664
## m0f1        1.3045668 3.53545959
## age         0.9699708 1.04246133
## alcyn       1.0400330 2.83778937
## hiv         1.0179595 2.79373049
## gnhlthst    2.0468282 3.63619685
preprob15_test <- predict(model15, dat15test, type = "response")
roc15_test <- roc(response = dat15test$cesd23, predictor = preprob15_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc15_test, main = "ROC Curve 15 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc15_test)
## Area under the curve: 0.7295
dat15test <- as.data.frame(dat15test)
plotCalibration(data= dat15test, cOutcome="cesd23", predRisk=preprob15_test, groups=10)

## $Table_HLtest
##                  total meanpred meanobs predicted observed
## [0.00912,0.0269)    26    0.019   0.115      0.51        3
## [0.02694,0.0443)    25    0.037   0.120      0.92        3
## [0.04434,0.0696)    26    0.059   0.038      1.53        1
## [0.06961,0.1091)    25    0.092   0.120      2.31        3
## [0.10915,0.1363)    26    0.122   0.115      3.17        3
## [0.13631,0.1792)    25    0.161   0.080      4.04        2
## [0.17921,0.2592)    26    0.220   0.269      5.72        7
## [0.25918,0.3617)    25    0.302   0.280      7.56        7
## [0.36170,0.4330)    26    0.400   0.231     10.39        6
## [0.43296,0.8422]    25    0.565   0.680     14.13       17
## 
## $Chi_square
## [1] 24.172
## 
## $df
## [1] 8
## 
## $p_value
## [1] 0.0021

Model 16: Predictor = Any Injection, Sex, Age, Alcohol Use, HIV Symptoms, General Health Status

dat16 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2),!is.na(gnhlthst))
dat16test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(inject),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2),!is.na(gnhlthst))
model16 <- glm(cesd23 ~ inject + m0f1 + age + alcyn + symp5ge2 + gnhlthst, family=binomial(link="logit"), data=dat16)
summary(model16)
## 
## Call:
## glm(formula = cesd23 ~ inject + m0f1 + age + alcyn + symp5ge2 + 
##     gnhlthst, family = binomial(link = "logit"), data = dat16)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.363251   1.039353  -5.160 2.47e-07 ***
## inject       0.577646   0.254538   2.269  0.02324 *  
## m0f1         0.763583   0.255494   2.989  0.00280 ** 
## age          0.001434   0.018362   0.078  0.93776    
## alcyn        0.506263   0.254878   1.986  0.04700 *  
## symp5ge2     1.267535   0.443221   2.860  0.00424 ** 
## gnhlthst     0.923152   0.146067   6.320 2.61e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 553.37  on 591  degrees of freedom
## Residual deviance: 445.03  on 585  degrees of freedom
## AIC: 459.03
## 
## Number of Fisher Scoring iterations: 5
exp(coef(model16))
## (Intercept)      inject        m0f1         age       alcyn    symp5ge2 
## 0.004685648 1.781838703 2.145951940 1.001434895 1.659079988 3.552087603 
##    gnhlthst 
## 2.517211574
exp(confint(model16))
## Waiting for profiling to be done...
##                    2.5 %     97.5 %
## (Intercept) 0.0005824962 0.03455123
## inject      1.0787614055 2.93224178
## m0f1        1.2991247819 3.54548835
## age         0.9658699526 1.03814432
## alcyn       1.0096042050 2.74924328
## symp5ge2    1.5028020607 8.66642490
## gnhlthst    1.9088492906 3.38895054
preprob16_test <- predict(model16, dat16test, type = "response")
roc16_test <- roc(response = dat16test$cesd23, predictor = preprob16_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc16_test, main = "ROC Curve 16 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc16_test)
## Area under the curve: 0.718
dat16test <- as.data.frame(dat16test)
plotCalibration(data= dat16test, cOutcome="cesd23", predRisk=preprob16_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0123,0.0307)    26    0.021   0.154      0.53        4
## [0.0307,0.0497)    25    0.034   0.080      0.85        2
## [0.0497,0.0747)    26    0.064   0.077      1.66        2
## [0.0747,0.1168)    25    0.091   0.160      2.28        4
## [0.1168,0.1470)    26    0.127   0.077      3.30        2
## [0.1470,0.1915)    25    0.170   0.080      4.26        2
## [0.1915,0.2521)    26    0.223   0.231      5.80        6
## [0.2521,0.3743)    25    0.297   0.240      7.42        6
## [0.3743,0.4585)    26    0.415   0.346     10.79        9
## [0.4585,0.9209]    25    0.617   0.600     15.41       15
## 
## $Chi_square
## [1] 28.607
## 
## $df
## [1] 8
## 
## $p_value
## [1] 4e-04

Model 17: Predictor = Heroin Injection, Cocaine Injection, Sex, Age, Alcohol Use, HIV Symptoms, General Health Status

dat17 <- mp %>% filter(training==1,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2),!is.na(gnhlthst))
dat17test <- mp %>% filter(training==0,!is.na(cesd23),!is.na(injher),!is.na(injcoc),!is.na(m0f1),!is.na(age),!is.na(alcyn),!is.na(symp5ge2),!is.na(gnhlthst))
model17 <- glm(cesd23 ~ injher + injcoc + m0f1 + age + alcyn + symp5ge2 + gnhlthst, family=binomial(link="logit"), data=dat17)
summary(model17)
## 
## Call:
## glm(formula = cesd23 ~ injher + injcoc + m0f1 + age + alcyn + 
##     symp5ge2 + gnhlthst, family = binomial(link = "logit"), data = dat17)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.18263    1.03547  -5.005 5.58e-07 ***
## injher       0.25799    0.35711   0.722  0.47001    
## injcoc       0.51374    0.37319   1.377  0.16863    
## m0f1         0.77499    0.25684   3.017  0.00255 ** 
## age         -0.00155    0.01827  -0.085  0.93239    
## alcyn        0.49114    0.25543   1.923  0.05450 .  
## symp5ge2     1.27737    0.44213   2.889  0.00386 ** 
## gnhlthst     0.91938    0.14664   6.270 3.62e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 552.98  on 590  degrees of freedom
## Residual deviance: 443.26  on 583  degrees of freedom
## AIC: 459.26
## 
## Number of Fisher Scoring iterations: 5
exp(coef(model17))
## (Intercept)      injher      injcoc        m0f1         age       alcyn 
## 0.005613245 1.294331894 1.671530887 2.170570507 0.998451134 1.634178144 
##    symp5ge2    gnhlthst 
## 3.587201231 2.507746286
exp(confint(model17))
## Waiting for profiling to be done...
##                    2.5 %    97.5 %
## (Intercept) 0.0007045615 0.0411624
## injher      0.6332545173 2.5793381
## injcoc      0.8029999017 3.4826480
## m0f1        1.3108368216 3.5965968
## age         0.9631209215 1.0348273
## alcyn       0.9931565343 2.7103368
## symp5ge2    1.5189058882 8.7214369
## gnhlthst    1.8996525947 3.3803096
preprob17_test <- predict(model17, dat17test, type = "response")
roc17_test <- roc(response = dat17test$cesd23, predictor = preprob17_test)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc17_test, main = "ROC Curve 17 in Testing Data", print.auc=TRUE, grid=TRUE, xlim=c(1,0))

auc(roc17_test)
## Area under the curve: 0.7241
dat17test <- as.data.frame(dat17test)
plotCalibration(data= dat17test, cOutcome="cesd23", predRisk=preprob17_test, groups=10)

## $Table_HLtest
##                 total meanpred meanobs predicted observed
## [0.0127,0.0312)    26    0.021   0.154      0.55        4
## [0.0312,0.0498)    25    0.034   0.080      0.86        2
## [0.0498,0.0759)    26    0.063   0.038      1.65        1
## [0.0759,0.1179)    25    0.095   0.160      2.37        4
## [0.1179,0.1510)    26    0.127   0.038      3.30        1
## [0.1510,0.1713)    25    0.160   0.120      4.01        3
## [0.1713,0.2510)    26    0.221   0.269      5.74        7
## [0.2510,0.3566)    25    0.296   0.240      7.40        6
## [0.3566,0.4542)    26    0.409   0.346     10.63        9
## [0.4542,0.9151]    25    0.622   0.600     15.54       15
## 
## $Chi_square
## [1] 28.703
## 
## $df
## [1] 8
## 
## $p_value
## [1] 4e-04

Final results

# Load necessary libraries
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# Read the cleaned data
df <- read.csv("cleaned_epi_753_models.csv")

# Create the kable table
kable(df,
      # Use the pipe symbol as the column separator
      col.names = c("Model", "Variables", "Coefficient (Intercept/Variable)", "Exponentiated (Coefficient)",
                    "95% CI (Exponentiated)", "P Value (Coefficient)", "AIC", "AUC", "Hosmer-Lemeshow P-Value"),
      # Set the alignment of the columns
      align = c("l", "l", "l", "l", "l", "l", "c", "c", "c"),
      # Set the caption of the table
      caption = "Results from the progress of building 17 models with different sets of predictors") %>%
  # Collapse the rows with the same Model value
  collapse_rows(columns = 1, valign = "top") %>%
  # Add row striping
  kable_styling(bootstrap_options = c("striped", "hover"))
Results from the progress of building 17 models with different sets of predictors
Model Variables Coefficient (Intercept/Variable) Exponentiated (Coefficient) 95% CI (Exponentiated) P Value (Coefficient) AIC AUC Hosmer-Lemeshow P-Value
1B injcoc Intercept: -1.8014, injcoc: 1.1364 Intercept: 0.1651, injcoc: 3.1155 Intercept: (0.1269, 0.2114), injcoc: (1.9259, 4.9998) Intercept: < 2e-16, injcoc: 2.86e-06 537.25 0.5997 1.0000
1C injher Intercept: -1.8149, injher: 0.9746 Intercept: 0.1628, injher: 2.6502 Intercept: (0.1239, 0.2104), injher: (1.6804, 4.1544) Intercept: < 2e-16, injher: 2.34e-05 540.70 0.5989 0.9882
2 injcoc, injher Intercept: -1.8575, injcoc: 0.7924, injher: 0.4802 Intercept: 0.1561, injcoc: 2.2088, injher: 1.6163 Intercept: (0.1183, 0.2025), injcoc: (1.1504, 4.2540), injher: (0.8544, 2.9818) Intercept: < 2e-16, injcoc: 0.0172, injher: 0.1309 537.04 0.6133 0.6307
3 inject, m0f1 Intercept: -2.1984, inject: 1.0258, m0f1: 0.7896 Intercept: 0.1110, inject: 2.7894, m0f1: 2.2024 Intercept: (0.0770, 0.1556), inject: (1.7961, 4.3399), m0f1: (1.4084, 3.4386) Intercept: < 2e-16, inject: 4.94e-06, m0f1: 0.000511 529.36 0.6533 0.9995
4 inject, m0f1, age Intercept: -3.1433, inject: 1.0440, m0f1: 0.8362, age: 0.0188 Intercept: 0.0431, inject: 2.8406, m0f1: 2.3075, age: 1.0190 Intercept: (0.0075, 0.2377), inject: (1.8256, 4.4297), m0f1: (1.4635, 3.6373), age: (0.9855, 1.0537) Intercept: 0.000358, inject: 3.72e-06, m0f1: 0.000308, age: 0.2701 530.14 0.6513 0.7817
5 injher, injcoc, m0f1, age Intercept: -2.7858, injher: 0.4740, injcoc: 0.8739, m0f1: 0.8334, age: 0.0125 Intercept: 0.0617, injher: 1.6065, injcoc: 2.3963, m0f1: 2.3012, age: 1.0126 Intercept: (0.0108, 0.3366), injher: (0.8392, 3.0010), injcoc: (1.2278, 4.6992), m0f1: (1.4568, 3.6353), age: (0.9791, 1.0473) Intercept: 0.001453, injher: 0.1435, injcoc: 0.0105, m0f1: 0.000343, age: 0.4646 528.38 0.6477 0.6862
6 inject, m0f1, age, hiv Intercept: -3.4329, inject: 1.0415, m0f1: 0.7961, age: 0.0224, hiv: 0.3878 Intercept: 0.0323, inject: 2.8334, m0f1: 2.2168, age: 1.0226, hiv: 1.4737 Intercept: (0.0053, 0.1865), inject: (1.8183, 4.4254), m0f1: (1.4007, 3.5058), age: (0.9886, 1.0580), hiv: (0.9241, 2.3304) Intercept: 0.000153, inject: 4.24e-06, m0f1: 0.000652, age: 0.1955, hiv: 0.0995 529.48 0.6757 0.7637
7 injher, injcoc, m0f1, age, hiv Intercept: -3.0722, injher: 0.4978, injcoc: 0.8430, m0f1: 0.7889, age: 0.0160, hiv: 0.3970 Intercept: 0.0463, injher: 1.6452, injcoc: 2.3234, m0f1: 2.2009, age: 1.0162, hiv: 1.4873 Intercept: (0.0077, 0.2640), injher: (0.8558, 3.0879), injcoc: (1.1850, 4.5764), m0f1: (1.3878, 3.4891), age: (0.9822, 1.0514), hiv: (0.9300, 2.3586) Intercept: 0.000629, injher: 0.1271, injcoc: 0.0142, m0f1: 0.000775, age: 0.3555, hiv: 0.0938 527.62 0.6792 0.7065
8 inject, m0f1, age, symp5ge2 Intercept: -3.0489, inject: 0.9246, m0f1: 0.7765, age: 0.0157, symp5ge2: 1.8164 Intercept: 0.0474, inject: 2.5210, m0f1: 2.1739, age: 1.0158, symp5ge2: 6.1500 Intercept: (0.0079, 0.2729), inject: (1.5928, 3.9869), m0f1: (1.3588, 3.4714), age: (0.9816, 1.0513), symp5ge2: (2.7212, 14.3686) Intercept: 0.000735, inject: 7.51e-05, m0f1: 0.001142, age: 0.3694, symp5ge2: 1.56e-05 512.46 0.6854 0.3932
9 injher, injcoc, m0f1, age, symp5ge2 Intercept: -2.7114, injher: 0.3263, injcoc: 0.8755, m0f1: 0.7748, age: 0.0097, symp5ge2: 1.8291 Intercept: 0.0664, injher: 1.3858, injcoc: 2.4001, m0f1: 2.1702, age: 1.0098, symp5ge2: 6.2284 Intercept: (0.0112, 0.3777), injher: (0.6999, 2.6708), injcoc: (1.1943, 4.8338), m0f1: (1.3551, 3.4700), age: (0.9756, 1.0452), symp5ge2: (2.7329, 14.6540) Intercept: 0.00247, injher: 0.3383, injcoc: 0.0139, m0f1: 0.00120, age: 0.5789, symp5ge2: 1.64e-05 511.14 0.7223 0.7705
10 inject, m0f1, age, alcyn, hiv Intercept: -3.7971, inject: 0.8714, m0f1: 0.8636, age: 0.0223, alcyn: 0.7168, hiv: 0.4446 Intercept: 0.0224, inject: 2.3903, m0f1: 2.3717, age: 1.0226, alcyn: 2.0478, hiv: 1.5599 Intercept: (0.0035, 0.1340), inject: (1.5098, 3.7859), m0f1: (1.4875, 3.7851), age: (0.9883, 1.0582), alcyn: (1.2915, 3.2850), hiv: (0.9720, 2.4847) Intercept: 4.06e-05, inject: 0.000197, m0f1: 0.000280, age: 0.1995, alcyn: 0.002551, hiv: 0.0627 521.86 0.7007 0.2326
11 injher, injcoc, m0f1, age, alcyn, hiv Intercept: -3.4941, injher: 0.3717, injcoc: 0.7599, m0f1: 0.8596, age: 0.0171, alcyn: 0.7000, hiv: 0.4639 Intercept: 0.0304, injher: 1.4501, injcoc: 2.1380, m0f1: 2.3622, age: 1.0172, alcyn: 2.0138, hiv: 1.5902 Intercept: (0.0049, 0.1798), injher: (0.7427, 2.7613), injcoc: (1.0776, 4.2545), m0f1: (1.4783, 3.7797), age: (0.9830, 1.0527), alcyn: (1.2634, 3.2446), hiv: (0.9886, 2.5397) Intercept: 0.000143, injher: 0.2658, injcoc: 0.0296, m0f1: 0.000321, age: 0.3273, alcyn: 0.003534, hiv: 0.0534 520.63 0.7044 0.9104
12 inject, m0f1, age, alcyn, symp5ge2 Intercept: -3.3416, inject: 0.7832, m0f1: 0.8531, age: 0.0147, alcyn: 0.6811, symp5ge2: 1.7793 Intercept: 0.0354, inject: 2.1885, m0f1: 2.3468, age: 1.0148, alcyn: 1.9760, symp5ge2: 5.9259 Intercept: (0.0057, 0.2090), inject: (1.3669, 3.4980), m0f1: (1.4564, 3.7817), age: (0.9804, 1.0505), alcyn: (1.2383, 3.1882), symp5ge2: (2.6242, 13.8249) Intercept: 0.000270, inject: 0.001059, m0f1: 0.000443, age: 0.4044, alcyn: 0.004652, symp5ge2: 2.26e-05 506.06 0.7229 0.7946
13 injher, injcoc, m0f1, age, alcyn, symp5ge2 Intercept: -3.0506, injher: 0.2294, injcoc: 0.7913, m0f1: 0.8566, age: 0.0098, alcyn: 0.6593, symp5ge2: 1.7965 Intercept: 0.0473, injher: 1.2579, injcoc: 2.2064, m0f1: 2.3551, age: 1.0098, alcyn: 1.9334, symp5ge2: 6.0285 Intercept: (0.0077, 0.2766), injher: (0.6273, 2.4520), injcoc: (1.0860, 4.4890), m0f1: (1.4593, 3.8026), age: (0.9755, 1.0453), alcyn: (1.2059, 3.1310), symp5ge2: (2.6524, 14.1410) Intercept: 0.000812, injher: 0.5083, injcoc: 0.0284, m0f1: 0.000440, age: 0.5799, alcyn: 0.006611, symp5ge2: 2.16e-05 505.42 0.7256 0.6297
14 inject, m0f1, age, alcyn, hiv, gnhlthst Intercept: -6.0333, inject: 0.5750, m0f1: 0.7640, age: 0.0083, alcyn: 0.5592, hiv: 0.4939, gnhlthst: 0.9938 Intercept: 0.0024, inject: 1.7771, m0f1: 2.1469, age: 1.0083, alcyn: 1.7493, hiv: 1.6386, gnhlthst: 2.7014 Intercept: (0.0003, 0.0185), inject: (1.0784, 2.9179), m0f1: (1.3091, 3.5250), age: (0.9725, 1.0454), alcyn: (1.0658, 2.8996), hiv: (0.9872, 2.7064), gnhlthst: (2.0523, 3.6333) Intercept: 1.52e-08, inject: 0.0233, m0f1: 0.00244, age: 0.6530, alcyn: 0.0281, hiv: 0.0543, gnhlthst: 8.10e-12 464.61 0.7971 0.8985
15 injher, injcoc, m0f1, age, alcyn, hiv, gnhlthst Intercept: -5.8843, injher: 0.3506, injcoc: 0.4291, m0f1: 0.7636, age: 0.0056, alcyn: 0.5366, hiv: 0.5249, gnhlthst: 0.9927 Intercept: 0.0028, injher: 1.4199, injcoc: 1.5359, m0f1: 2.1460, age: 1.0057, alcyn: 1.7101, hiv: 1.6902, gnhlthst: 2.6985 Intercept: (0.0003, 0.0216), injher: (0.7044, 2.7929), injcoc: (0.7431, 3.1776), m0f1: (1.3046, 3.5355), age: (0.9700, 1.0425), alcyn: (1.0400, 2.8378), hiv: (1.0180, 2.7937), gnhlthst: (2.0468, 3.6362) Intercept: 3.44e-08, injher: 0.3166, injcoc: 0.2455, m0f1: 0.00262, age: 0.7585, alcyn: 0.0357, hiv: 0.0411, gnhlthst: 1.13e-11 464.35 0.7966 0.9020
16 inject, m0f1, age, alcyn, symp5ge2, gnhlthst Intercept: -5.3633, inject: 0.5776, m0f1: 0.7636, age: 0.0014, alcyn: 0.5063, symp5ge2: 1.2675, gnhlthst: 0.9232 Intercept: 0.0047, inject: 1.7818, m0f1: 2.1460, age: 1.0014, alcyn: 1.6591, symp5ge2: 3.5521, gnhlthst: 2.5172 Intercept: (0.0006, 0.0346), inject: (1.0788, 2.9322), m0f1: (1.2991, 3.5455), age: (0.9659, 1.0381), alcyn: (1.0096, 2.7492), symp5ge2: (1.5028, 8.6664), gnhlthst: (1.9088, 3.3890) Intercept: 2.47e-07, inject: 0.0232, m0f1: 0.00280, age: 0.9378, alcyn: 0.0470, symp5ge2: 0.00424, gnhlthst: 2.61e-10 459.03 0.7985 0.0479
17 injher, injcoc, m0f1, age, alcyn, symp5ge2, gnhlthst Intercept: -5.1826, injher: 0.2580, injcoc: 0.5137, m0f1: 0.7750, age: -0.0016, alcyn: 0.4911, symp5ge2: 1.2774, gnhlthst: 0.9194 Intercept: 0.0056, injher: 1.2943, injcoc: 1.6715, m0f1: 2.1706, age: 0.9985, alcyn: 1.6342, symp5ge2: 3.5872, gnhlthst: 2.5077 Intercept: (0.0007, 0.0412), injher: (0.6333, 2.5793), injcoc: (0.8030, 3.4826), m0f1: (1.3108, 3.5966), age: (0.9631, 1.0348), alcyn: (0.9932, 2.7103), symp5ge2: (1.5189, 8.7214), gnhlthst: (1.8997, 3.3803) Intercept: 5.58e-07, injher: 0.4700, injcoc: 0.1686, m0f1: 0.00255, age: 0.9324, alcyn: 0.0545, symp5ge2: 0.00386, gnhlthst: 3.62e-10 459.26 0.7940 0.1934

Plot summary for best models

ROC Curves of models 11, 12, 14, 15

Calibration Plots of models 11, 12, 14, 15