## Load pROC package
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
##
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## Load a demo dataset
data(aSAH)
## Check data
head(aSAH)
## gos6 outcome gender age wfns s100b ndka
## 29 5 Good Female 42 1 0.13 3.01
## 30 5 Good Female 37 1 0.14 8.54
## 31 5 Good Female 42 1 0.10 8.09
## 32 5 Good Female 27 1 0.04 10.42
## 33 1 Poor Female 42 3 0.13 17.40
## 34 1 Poor Male 48 2 0.10 12.75
## Construct an ROC
roc1 <- roc(outcome ~ s100b, data = aSAH)
## Check the AUC
roc1
##
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
##
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731
## Plot
plot(roc1)
##
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
##
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731
## Threshold
plot(roc1, print.thres = TRUE)
##
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
##
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731
## Usual axes
plot(roc1, print.thres = TRUE, legacy.axes = TRUE)
##
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
##
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731
## Create thresholds based on test score quantiles
thresholdsQuantiles <- quantile(aSAH$s100b, seq(0,1,0.1))
## Plot threshold on the graph
plot(roc1, print.thres = c(0.205,thresholdsQuantiles[-7]))
##
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
##
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731
## Continuous
aSAH$s100b
## [1] 0.13 0.14 0.10 0.04 0.13 0.10 0.47 0.16 0.18 0.10 0.12 0.10 0.44 0.71
## [15] 0.04 0.08 0.49 0.04 0.07 0.33 0.09 0.09 0.07 0.11 0.07 0.17 0.07 0.11
## [29] 0.13 0.19 0.05 0.16 0.41 0.14 0.34 0.35 0.48 0.09 0.96 0.25 0.50 0.46
## [43] 0.16 0.07 0.43 0.45 0.11 0.08 0.09 0.86 0.52 0.08 0.06 0.13 2.07 0.10
## [57] 0.14 0.15 0.07 0.06 0.77 0.05 0.09 0.30 0.03 0.09 0.04 0.23 0.70 0.09
## [71] 0.27 0.71 0.08 0.26 0.08 0.16 0.09 0.13 0.10 0.08 0.11 0.33 0.11 0.28
## [85] 0.07 0.10 0.32 0.22 0.07 0.05 0.24 0.38 0.10 0.15 0.08 0.14 0.10 0.07
## [99] 0.04 0.19 0.56 0.14 0.58 0.32 0.82 0.74 0.15 0.47 0.17 0.44 0.15 0.50
## [113] 0.48
summary(aSAH$s100b)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.030 0.090 0.140 0.247 0.330 2.070
## Cut into a categorical by equal widths
aSAH$s100bCat1 <- cut(aSAH$s100b, 4)
summary(aSAH)
## gos6 outcome gender age wfns s100b
## 1:28 Good:72 Male :42 Min. :18.0 1:39 Min. :0.030
## 2: 0 Poor:41 Female:71 1st Qu.:42.0 2:32 1st Qu.:0.090
## 3:13 Median :51.0 3: 4 Median :0.140
## 4: 6 Mean :51.1 4:16 Mean :0.247
## 5:66 3rd Qu.:61.0 5:22 3rd Qu.:0.330
## Max. :81.0 Max. :2.070
## ndka s100bCat1
## Min. : 3.0 (0.028,0.54]:102
## 1st Qu.: 9.0 (0.54,1.05] : 10
## Median : 12.2 (1.05,1.56] : 0
## Mean : 19.7 (1.56,2.07] : 1
## 3rd Qu.: 17.3
## Max. :419.2
## Create thresholds
thresholds2 <- quantile(aSAH$s100b, probs = seq(0,1,by=0.20))
## Cut at these thresholds
aSAH$s100bCat2 <- cut(aSAH$s100b, breaks = thresholds2)
## Check ()
summary(aSAH)
## gos6 outcome gender age wfns s100b
## 1:28 Good:72 Male :42 Min. :18.0 1:39 Min. :0.030
## 2: 0 Poor:41 Female:71 1st Qu.:42.0 2:32 1st Qu.:0.090
## 3:13 Median :51.0 3: 4 Median :0.140
## 4: 6 Mean :51.1 4:16 Mean :0.247
## 5:66 3rd Qu.:61.0 5:22 3rd Qu.:0.330
## Max. :81.0 Max. :2.070
## ndka s100bCat1 s100bCat2
## Min. : 3.0 (0.028,0.54]:102 (0.03,0.08] :26
## 1st Qu.: 9.0 (0.54,1.05] : 10 (0.08,0.11] :22
## Median : 12.2 (1.05,1.56] : 0 (0.11,0.162] :19
## Mean : 19.7 (1.56,2.07] : 1 (0.162,0.436]:22
## 3rd Qu.: 17.3 (0.436,2.07] :23
## Max. :419.2 NA's : 1
## Check which one is NA
aSAH[is.na(aSAH$s100bCat2), ]
## gos6 outcome gender age wfns s100b ndka s100bCat1 s100bCat2
## 93 1 Poor Female 45 5 0.03 12.22 (0.028,0.54] <NA>
## To include the lowest
aSAH$s100bCat3 <- cut(aSAH$s100b, breaks = thresholds2, include.lowest = TRUE)
summary(aSAH)
## gos6 outcome gender age wfns s100b
## 1:28 Good:72 Male :42 Min. :18.0 1:39 Min. :0.030
## 2: 0 Poor:41 Female:71 1st Qu.:42.0 2:32 1st Qu.:0.090
## 3:13 Median :51.0 3: 4 Median :0.140
## 4: 6 Mean :51.1 4:16 Mean :0.247
## 5:66 3rd Qu.:61.0 5:22 3rd Qu.:0.330
## Max. :81.0 Max. :2.070
## ndka s100bCat1 s100bCat2 s100bCat3
## Min. : 3.0 (0.028,0.54]:102 (0.03,0.08] :26 [0.03,0.08] :27
## 1st Qu.: 9.0 (0.54,1.05] : 10 (0.08,0.11] :22 (0.08,0.11] :22
## Median : 12.2 (1.05,1.56] : 0 (0.11,0.162] :19 (0.11,0.162] :19
## Mean : 19.7 (1.56,2.07] : 1 (0.162,0.436]:22 (0.162,0.436]:22
## 3rd Qu.: 17.3 (0.436,2.07] :23 (0.436,2.07] :23
## Max. :419.2 NA's : 1
## Another ROC
roc2 <- roc(outcome ~ ndka, data = aSAH, ci=TRUE)
roc2
##
## Call:
## roc.formula(formula = outcome ~ ndka, data = aSAH, ci = TRUE)
##
## Data: ndka in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.612
## 95% CI: 0.501-0.723 (DeLong)
## Plotting together
plot(roc1)
##
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
##
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.731
plot(roc2, add = TRUE, col = 2)
##
## Call:
## roc.formula(formula = outcome ~ ndka, data = aSAH, ci = TRUE)
##
## Data: ndka in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.612
## 95% CI: 0.501-0.723 (DeLong)
## Test
roc.test(roc1, roc2)
##
## DeLong's test for two correlated ROC curves
##
## data: roc1 and roc2
## Z = 1.391, p-value = 0.1643
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.7314 0.6120