ROC

## Load pROC package
library(pROC)

## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## 
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

## Load a demo dataset
data(aSAH)
## Check data
head(aSAH)

##    gos6 outcome gender age wfns s100b  ndka
## 29    5    Good Female  42    1  0.13  3.01
## 30    5    Good Female  37    1  0.14  8.54
## 31    5    Good Female  42    1  0.10  8.09
## 32    5    Good Female  27    1  0.04 10.42
## 33    1    Poor Female  42    3  0.13 17.40
## 34    1    Poor   Male  48    2  0.10 12.75

## Construct an ROC
roc1 <- roc(outcome ~ s100b, data = aSAH)
## Check the AUC
roc1

## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
## 
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731

## Plot
plot(roc1)

plot of chunk unnamed-chunk-1

## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
## 
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731

## Threshold
plot(roc1, print.thres = TRUE)

plot of chunk unnamed-chunk-1

## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
## 
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731

## Usual axes
plot(roc1, print.thres = TRUE, legacy.axes = TRUE)

plot of chunk unnamed-chunk-1

## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
## 
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731

## Create thresholds based on test score quantiles
thresholdsQuantiles <- quantile(aSAH$s100b, seq(0,1,0.1))
## Plot threshold on the graph
plot(roc1, print.thres = c(0.205,thresholdsQuantiles[-7]))

plot of chunk unnamed-chunk-1

## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
## 
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731

Creating a categorical variable from a continuous

## Continuous
aSAH$s100b

##   [1] 0.13 0.14 0.10 0.04 0.13 0.10 0.47 0.16 0.18 0.10 0.12 0.10 0.44 0.71
##  [15] 0.04 0.08 0.49 0.04 0.07 0.33 0.09 0.09 0.07 0.11 0.07 0.17 0.07 0.11
##  [29] 0.13 0.19 0.05 0.16 0.41 0.14 0.34 0.35 0.48 0.09 0.96 0.25 0.50 0.46
##  [43] 0.16 0.07 0.43 0.45 0.11 0.08 0.09 0.86 0.52 0.08 0.06 0.13 2.07 0.10
##  [57] 0.14 0.15 0.07 0.06 0.77 0.05 0.09 0.30 0.03 0.09 0.04 0.23 0.70 0.09
##  [71] 0.27 0.71 0.08 0.26 0.08 0.16 0.09 0.13 0.10 0.08 0.11 0.33 0.11 0.28
##  [85] 0.07 0.10 0.32 0.22 0.07 0.05 0.24 0.38 0.10 0.15 0.08 0.14 0.10 0.07
##  [99] 0.04 0.19 0.56 0.14 0.58 0.32 0.82 0.74 0.15 0.47 0.17 0.44 0.15 0.50
## [113] 0.48

summary(aSAH$s100b)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.030   0.090   0.140   0.247   0.330   2.070

## Cut into a categorical by equal widths
aSAH$s100bCat1 <- cut(aSAH$s100b, 4)
summary(aSAH)

##  gos6   outcome      gender        age       wfns       s100b      
##  1:28   Good:72   Male  :42   Min.   :18.0   1:39   Min.   :0.030  
##  2: 0   Poor:41   Female:71   1st Qu.:42.0   2:32   1st Qu.:0.090  
##  3:13                         Median :51.0   3: 4   Median :0.140  
##  4: 6                         Mean   :51.1   4:16   Mean   :0.247  
##  5:66                         3rd Qu.:61.0   5:22   3rd Qu.:0.330  
##                               Max.   :81.0          Max.   :2.070  
##       ndka              s100bCat1  
##  Min.   :  3.0   (0.028,0.54]:102  
##  1st Qu.:  9.0   (0.54,1.05] : 10  
##  Median : 12.2   (1.05,1.56] :  0  
##  Mean   : 19.7   (1.56,2.07] :  1  
##  3rd Qu.: 17.3                     
##  Max.   :419.2

## Create thresholds
thresholds2 <- quantile(aSAH$s100b, probs = seq(0,1,by=0.20))
## Cut at these thresholds
aSAH$s100bCat2 <- cut(aSAH$s100b, breaks = thresholds2)
## Check ()
summary(aSAH)

##  gos6   outcome      gender        age       wfns       s100b      
##  1:28   Good:72   Male  :42   Min.   :18.0   1:39   Min.   :0.030  
##  2: 0   Poor:41   Female:71   1st Qu.:42.0   2:32   1st Qu.:0.090  
##  3:13                         Median :51.0   3: 4   Median :0.140  
##  4: 6                         Mean   :51.1   4:16   Mean   :0.247  
##  5:66                         3rd Qu.:61.0   5:22   3rd Qu.:0.330  
##                               Max.   :81.0          Max.   :2.070  
##       ndka              s100bCat1           s100bCat2 
##  Min.   :  3.0   (0.028,0.54]:102   (0.03,0.08]  :26  
##  1st Qu.:  9.0   (0.54,1.05] : 10   (0.08,0.11]  :22  
##  Median : 12.2   (1.05,1.56] :  0   (0.11,0.162] :19  
##  Mean   : 19.7   (1.56,2.07] :  1   (0.162,0.436]:22  
##  3rd Qu.: 17.3                      (0.436,2.07] :23  
##  Max.   :419.2                      NA's         : 1

## Check which one is NA
aSAH[is.na(aSAH$s100bCat2), ]

##    gos6 outcome gender age wfns s100b  ndka    s100bCat1 s100bCat2
## 93    1    Poor Female  45    5  0.03 12.22 (0.028,0.54]      <NA>

## To include the lowest
aSAH$s100bCat3 <- cut(aSAH$s100b, breaks = thresholds2, include.lowest = TRUE)
summary(aSAH)

##  gos6   outcome      gender        age       wfns       s100b      
##  1:28   Good:72   Male  :42   Min.   :18.0   1:39   Min.   :0.030  
##  2: 0   Poor:41   Female:71   1st Qu.:42.0   2:32   1st Qu.:0.090  
##  3:13                         Median :51.0   3: 4   Median :0.140  
##  4: 6                         Mean   :51.1   4:16   Mean   :0.247  
##  5:66                         3rd Qu.:61.0   5:22   3rd Qu.:0.330  
##                               Max.   :81.0          Max.   :2.070  
##       ndka              s100bCat1           s100bCat2          s100bCat3 
##  Min.   :  3.0   (0.028,0.54]:102   (0.03,0.08]  :26   [0.03,0.08]  :27  
##  1st Qu.:  9.0   (0.54,1.05] : 10   (0.08,0.11]  :22   (0.08,0.11]  :22  
##  Median : 12.2   (1.05,1.56] :  0   (0.11,0.162] :19   (0.11,0.162] :19  
##  Mean   : 19.7   (1.56,2.07] :  1   (0.162,0.436]:22   (0.162,0.436]:22  
##  3rd Qu.: 17.3                      (0.436,2.07] :23   (0.436,2.07] :23  
##  Max.   :419.2                      NA's         : 1

## Another ROC
roc2 <- roc(outcome ~ ndka, data = aSAH, ci=TRUE)
roc2

## 
## Call:
## roc.formula(formula = outcome ~ ndka, data = aSAH, ci = TRUE)
## 
## Data: ndka in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.612
## 95% CI: 0.501-0.723 (DeLong)

## Plotting together
plot(roc1)

## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
## 
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731

plot(roc2, add = TRUE, col = 2)

plot of chunk unnamed-chunk-2

## 
## Call:
## roc.formula(formula = outcome ~ ndka, data = aSAH, ci = TRUE)
## 
## Data: ndka in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.612
## 95% CI: 0.501-0.723 (DeLong)

## Test
roc.test(roc1, roc2)

## 
##  DeLong's test for two correlated ROC curves
## 
## data:  roc1 and roc2
## Z = 1.391, p-value = 0.1643
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##      0.7314      0.6120