## Load pROC
library(pROC)
## Load data
data(aSAH)
## Check number of rows
nrow.aSAH <- nrow(aSAH)
nrow.aSAH
[1] 113
## Create a larger dataset 50000 cases
aSAH.large <- aSAH[rep(seq_len(nrow.aSAH), 450),]
## Check number of rows
nrow(aSAH.large)
[1] 50850
## Check first rows
head(aSAH.large)
gos6 outcome gender age wfns s100b ndka
29 5 Good Female 42 1 0.13 3.01
30 5 Good Female 37 1 0.14 8.54
31 5 Good Female 42 1 0.10 8.09
32 5 Good Female 27 1 0.04 10.42
33 1 Poor Female 42 3 0.13 17.40
34 1 Poor Male 48 2 0.10 12.75
## By s100b predictor
roc.s100b <- roc((outcome == "Poor") ~ s100b, data = aSAH.large)
roc.s100b
Call:
roc.formula(formula = (outcome == "Poor") ~ s100b, data = aSAH.large)
Data: s100b in 32400 controls ((outcome == "Poor") FALSE) < 18450 cases ((outcome == "Poor") TRUE).
Area under the curve: 0.731
## By ndka predictor
roc.ndka <- roc((outcome == "Poor") ~ ndka, data = aSAH.large)
roc.ndka
Call:
roc.formula(formula = (outcome == "Poor") ~ ndka, data = aSAH.large)
Data: ndka in 32400 controls ((outcome == "Poor") FALSE) < 18450 cases ((outcome == "Poor") TRUE).
Area under the curve: 0.612
Summary: The De Long method is impossibly slow, so use bootstrap.
The bootstrap method takes several minutes but doable.
## Test these two ROCs (bootstrap)
system.time(res.roc.test <- roc.test(roc.s100b, roc.ndka, method = "bootstrap"))
user system elapsed
421.50 58.89 480.49
res.roc.test
Bootstrap test for two correlated ROC curves
data: roc.s100b and roc.ndka
D = 30.21, boot.n = 2000, boot.stratified = 1, p-value < 2.2e-16
alternative hypothesis: true difference in AUC is not equal to 0
sample estimates:
AUC of roc1 AUC of roc2
0.7314 0.6120
The DeLong method involves all possible pairwise comparison, and it does not scale well in larger data.
## Test these two ROCs (DeLong)
system.time(res.roc.test2 <- roc.test(roc.s100b, roc.ndka, method = "delong"))
res.roc.test2
This call will use these functions in sequence. The last function, pROC:::MW.kernel is a function that gives scores 1, 0.5, or 0, according to a two value comparison procedure. This function is applied to all possible pairs of cases and non-cases, taking a lot of time in larger datasets.
roc.test
function (...)
{
UseMethod("roc.test")
}
<environment: namespace:pROC>
## roc.test.roc # Suppressed. Too long.
pROC:::delong.paired.test
function (roc1, roc2)
{
n <- length(roc1$controls)
m <- length(roc1$cases)
VR <- delong.placements(roc1)
VS <- delong.placements(roc2)
SX <- matrix(NA, ncol = 2, nrow = 2)
SX[1, 1] <- sum((VR$X - VR$theta) * (VR$X - VR$theta))/(m -
1)
SX[1, 2] <- sum((VR$X - VR$theta) * (VS$X - VS$theta))/(m -
1)
SX[2, 1] <- sum((VS$X - VS$theta) * (VR$X - VR$theta))/(m -
1)
SX[2, 2] <- sum((VS$X - VS$theta) * (VS$X - VS$theta))/(m -
1)
SY <- matrix(NA, ncol = 2, nrow = 2)
SY[1, 1] <- sum((VR$Y - VR$theta) * (VR$Y - VR$theta))/(n -
1)
SY[1, 2] <- sum((VR$Y - VR$theta) * (VS$Y - VS$theta))/(n -
1)
SY[2, 1] <- sum((VS$Y - VS$theta) * (VR$Y - VR$theta))/(n -
1)
SY[2, 2] <- sum((VS$Y - VS$theta) * (VS$Y - VS$theta))/(n -
1)
S <- SX/m + SY/n
L <- c(1, -1)
sig <- sqrt(L %*% S %*% L)
zscore <- (VR$theta - VS$theta)/sig[1]
if (is.nan(zscore) && VR$theta == VR$theta && sig[1] == 0)
zscore <- 0
return(zscore)
}
<environment: namespace:pROC>
pROC:::delong.placements
function (roc)
{
V <- list()
Y <- roc$controls
X <- roc$cases
n <- length(Y)
m <- length(X)
MW <- sapply(1:n, function(j) sapply(1:m, function(i, j) MW.kernel(X[i],
Y[j]), j = j))
V$theta <- sum(MW)/(m * n)
V$X <- sapply(1:m, function(i) {
sum(MW[i, ])
})/n
V$Y <- sapply(1:n, function(j) {
sum(MW[, j])
})/m
return(V)
}
<environment: namespace:pROC>
pROC:::MW.kernel
function (x, y)
{
if (y < x)
return(1)
if (y == x)
return(0.5)
if (y > x)
return(0)
}
<environment: namespace:pROC>