ROC class notes

Le Kang

2024-04-03

ROC surface with 3 classes

Let \(Y_{1}\), \(Y_{2}\) and \(Y_{3}\) denote the observational random variables resulting from a single diagnostic test for non-diseased, intermediate and diseased subjects, respectively, and let \(F_{1}\), \(F_{2}\) and \(F_{3}\) be the cumulative distribution functions conditional on the corresponding category.

Assume the results of a diagnostic test are measured on continuous scale and higher values indicate greater severity of the disease.

Given a pair of threshold values \(c_{1}\) and \(c_{3}\) \((c_{1}<c_{3})\), let \(\delta_{1}=P(Y_1<c_1)=F_{1}(c_{1})\), \(\delta_{3}=P(Y_3>c_3)=1-F_{3}(c_{3})\) be the true classification rates for non-diseased and diseased category, respectively.

The probability that a randomly selected subject from intermediate category has a score between \(c_{1}\) and \(c_{3}\) is \[ \delta_{2}=F_{2}(c_{3})-F_{2}(c_{1})=F_{2}\left[ F_{3}^{-1}(1-\delta _{3})\right] -F_{2}\left[ F_{1}^{-1}(\delta_{1})\right] . \]

The triplet \((\delta_{1}\), \(\delta_{2}\), \(\delta_{3})\), where \(\delta_{2} =\delta_{2}(\delta_{1}\), \(\delta_{3})\) is a function of \((\delta_{1}\), \(\delta_{3})\), would produce an ROC surface in the three-dimensional space for all possible \((c_{1}\), \(c_{3})\) \(\in \mathbb{R}^{2}\).

\[ VUS =\int_{0}^{1}\int_{0}^{1-F_{3}\left[ F_{1}^{-1}(\delta_{1})\right] }% F_{2}\left[ F_{3}^{-1}(1-\delta_{3})\right] -F_{2}\left[ F_{1}^{-1}% (\delta_{1})\right] d\delta_{3}d\delta_{1} \]

The volume under the ROC surface (VUS) has been considered in order to summarize the overall diagnostic accuracy for the diagnostic test with three ordinal diagnostic categories

delta1=seq(0,1,by=0.01)
delta3=seq(0,1,by=0.01)


f <- function(delta1,delta3) 
{ r <- pnorm(qnorm(1-delta3,mean=0.2),mean=0)-pnorm(qnorm(delta1,mean=-0.5),mean=0) }

z <- outer(delta1, delta3, f)
op <- par(bg = "white")

persp(delta1, delta3, z, theta = 30, phi = 30, expand = 0.5, 
col = "blue",xlab="delta_1",ylab="delta_3",zlab="delta_2")

op <- par(bg = "white")

persp(delta1, delta3, z, theta = -60, phi = -3, expand = 0.5, 
col = "blue",xlab="delta_1",ylab="delta_3",zlab="delta_2")

n1=50
n2=50
n3=50
mu1=0;sigma1=1;
mu2=1;sigma2=1;
mu3=2;sigma3=1;
new.1=rnorm(n=n1, mean=mu1, sd=sigma1 ) 
new.2=rnorm(n=n2, mean=mu2, sd=sigma2 )
new.3=rnorm(n=n3, mean=mu3, sd=sigma3 )



F.func <- function(u) quantile(new.1,probs=u,type=1)
H.func <- function(u) quantile(new.3,probs=u,type=1)
G.func <- ecdf(new.2)

nbin=50
u <- seq(0, 1, length= nbin)
v <- u
Q.mat = matrix(0,nrow=nbin,ncol=nbin)
for (i in 1:nbin) {
for (j in 1:nbin) {
if (F.func(v[j])<=H.func(1-u[i])) Q.mat[i,j]=
G.func(H.func(1-u[i]))-G.func(F.func(v[j])) }}



persp(u, v, Q.mat, theta = -135, phi = 10, expand = 1,  col = "blue",
      ltheta = 120, shade = 0.15, ticktype = "detailed",nticks=2,
      xlab = "delta_1", ylab = "delta_3", zlab = "delta_2",las=2)

# try persp3d in rgl package

VUS estimation

Parametric
Nonparametric
Semi-parametric

Nonparametric estimation

vus.MWU <- function(new.1,new.2,new.3) {
n1=length(new.1)
n2=length(new.2)
n3=length(new.3)
usum <- function(u) {
is.small = new.2[u<new.2]
if (length(is.small)>0) return(sum(sapply(is.small, function(v) sum(v<new.3)))) else return(0)
}
return(sum(sapply(new.1,usum))/n1/n2/n3) 
}

vus.empirical <- function(new.1,new.2,new.3,nbin=100) {
F.func <- function(u) quantile(new.1,probs=u,type=1)
H.func <- function(u) quantile(new.3,probs=u,type=1)
G.func <- ecdf(new.2)
Q.mat = matrix(0,nrow=nbin,ncol=nbin)
u <- seq(0,1,length=nbin)
v <- seq(0,1,length=nbin)
for (i in 1:nbin) {
for (j in 1:nbin) {
if (F.func(v[j])<=H.func(1-u[i])) Q.mat[i,j]=
G.func(H.func(1-u[i]))-G.func(F.func(v[j])) }}
return(sum(Q.mat)/(nbin-1)^2)
}

Trinormal model

vus.normal <- function(new.1,new.2,new.3) {
mu1 = mean(new.1)
mu2 = mean(new.2)
mu3 = mean(new.3)
sigma1 = var(new.1)
sigma2 = var(new.2)
sigma3 = var(new.3)
if ((sigma1<=0)|(sigma2<=0)|(sigma3<=0)) return(NA)
a = sqrt(sigma2/sigma1)
c = sqrt(sigma2/sigma3)
b = (mu1-mu2)/sqrt(sigma1)
d = (mu3-mu2)/sqrt(sigma3)
integrand <- function(s) { pnorm(a*s-b)*pnorm(-c*s+d)*dnorm(s) }
integral <- try(integrate(integrand,lower=-Inf,upper=Inf),silent=T)
if (class(integral)=="try-error") return(NA) else return(integral$value)
}

Kernel method

vus.ker1 <- function(new.1,new.2,new.3) {
n1=length(new.1)
n2=length(new.2)
n3=length(new.3)
h1=(4/3/n1)^(1/5)*min(sd(new.1),IQR(new.1)/1.349)
h2=(4/3/n2)^(1/5)*min(sd(new.2),IQR(new.2)/1.349)
h3=(4/3/n3)^(1/5)*min(sd(new.3),IQR(new.3)/1.349)
F1 <- function(x)  sapply(x, function(u)      mean(pnorm((u-new.1)/h1)) )
f2 <- function(x)  sapply(x, function(u) 1/h2*mean(dnorm((u-new.2)/h2)) )
F3 <- function(x)  sapply(x, function(u)    1-mean(pnorm((u-new.3)/h3)) )
integrand <- function(s) { F1(s)*F3(s)*f2(s) }
integral <- try(integrate(integrand,lower=-Inf,upper=Inf),silent=T)
if (class(integral)=="try-error") return(NA) else return(integral$value)
}

vus.ker2 <- function(new.1,new.2,new.3) {
n1=length(new.1)
n2=length(new.2)
n3=length(new.3)
h1=(4/3/n1)^(1/5)*min(sd(new.1),IQR(new.1)/1.349)
h2=(4/3/n2)^(1/5)*min(sd(new.2),IQR(new.2)/1.349)
h3=(4/3/n3)^(1/5)*min(sd(new.3),IQR(new.3)/1.349)
sum.idx=0
for (i in 1:n1) {
for (j in 1:n2) {
for (k in 1:n3) {
sum.idx=sum.idx+pnorm((new.2[j]-new.1[i])/sqrt(h1^2+h2^2))*pnorm((new.3[k]-new.2[j])/sqrt(h3^2+h2^2))
}}}
return(sum.idx/n1/n2/n3)
}