2024-04-29
nonp.auc <- function(x,y) {
Ker=outer(x,y,function(x,y) (sign(y-x)+1)/2)
auc=mean(Ker)
return(auc)
}
nonp.auc.check <- function(data.1,data.2) {
auc.vec=numeric(ncol(data.1))
for (i in 1:ncol(data.1)) {
new.1=data.1[,i]
new.2=data.2[,i]
auc.vec[i]=nonp.auc(new.1,new.2) }
return(auc.vec)
}
nonpar.combine2.auc <- function(alpha,rate,data.1,data.2) {
new.1 = data.1%*%c(alpha,rate)
new.2 = data.2%*%c(alpha,rate)
nonp.auc(new.1,new.2)
}
nonpar.combine2.coef <- function(dat_1,dat_2,evalnum=201) {
rate=seq(-1,1,length=evalnum)
alpha=rev(rate)[-1]
auc.rate_x = sapply(rate, nonpar.combine2.auc, alpha=1,data.1=dat_1,data.2=dat_2)
auc.alpha_x = sapply(alpha, nonpar.combine2.auc, rate=1,data.1=dat_1,data.2=dat_2)
auc.0 = c(auc.rate_x,auc.alpha_x)
vmax.idx = which.max(auc.0)
if(vmax.idx<=evalnum) return(c(alpha=1,rate=rate[vmax.idx], auc.max=auc.0[vmax.idx]))
if(vmax.idx> evalnum) return(c(alpha=alpha[vmax.idx-evalnum],rate=1,auc.max=auc.0[vmax.idx]))
}
step.coef <- function(new.1,new.2,design='step-down') {
n1 = nrow(new.1)
n2 = nrow(new.2)
VARnum = ncol(new.1)
combcoef = matrix(0,nrow=VARnum-1,ncol=2)
if (design=='step-down') {
auc.order = sort(nonp.auc.check(data.1=new.1,data.2=new.2),index.return=T,decreasing=T)$ix } else {
auc.order = sort(nonp.auc.check(data.1=new.1,data.2=new.2),index.return=T,decreasing=F)$ix }
combmarker.1=new.1[,auc.order[1]]
combmarker.2=new.2[,auc.order[1]]
final.coef=1
for (i in 2:VARnum) {
combmarker.1 = cbind(combmarker.1,new.1[,auc.order[i]])
combmarker.2 = cbind(combmarker.2,new.2[,auc.order[i]])
temp.info = nonpar.combine2.coef(combmarker.1,combmarker.2)
combcoef[i-1,] = temp.info[1:2]
#print(temp.info)
combmarker.1 = combmarker.1%*%temp.info[1:2]
combmarker.2 = combmarker.2%*%temp.info[1:2]
final.coef = c(final.coef*combcoef[i-1,1],combcoef[i-1,2])
}
final.coef = final.coef[sort(auc.order,index.return=T)$ix]
return(list(coef=as.numeric(final.coef),
auc.combined=as.numeric(temp.info[3])))
}
library(mvtnorm)
df_neg = rmvnorm(100,mean=rep(0,6),sigma=diag(6))
df_pos = rmvnorm(100,mean=c(4,3,2,8,6,1)/3, sigma=diag(6)+0.2)
nonp.auc.check(df_neg,df_pos)
sort(nonp.auc.check(df_neg,df_pos),index.return=T,decreasing=T)
step.coef(df_neg,df_pos)
nonp.auc(df_neg%*%step.coef(df_neg,df_pos)$coef,df_pos%*%step.coef(df_neg,df_pos)$coef)
df=rbind(df_neg,df_pos)
scaled.df=apply(df,2,function(x) (x-mean(x))/sd(x))
scaled_neg=scaled.df[ 1:100,]
scaled_pos=scaled.df[101:200,]
nonp.auc.check(scaled_neg,scaled_pos)
sort(nonp.auc.check(scaled_neg,scaled_pos),index.return=T,decreasing=T)
step.coef(scaled_neg,scaled_pos)
df_neg = rmvnorm(100,mean=rep(0,6),sigma=diag(6))
df_pos = rmvnorm(100,mean=c(4,3,2,8,6,1)/3, sigma=diag(6)+0.2)
step.coef(df_neg,df_pos)$coef -> coef1
df_neg = rmvnorm(100,mean=rep(0,6),sigma=diag(6))
df_pos = rmvnorm(100,mean=c(4,3,2,8,6,1)/3, sigma=diag(6)+0.2)
nonp.auc(df_neg%*%coef1,df_pos%*%coef1)
What do we do when outcome is right censored survival time?
\(Sensitivity(c,t)=P(S_i>c|D_i(t)=1)\)
\(Specificity(c,t)=P(S_i\leq c|D_i(t)=0)\)
At any time \(t\), \[AUC(t)=\int_{-\infty}^{\infty}Se(c,t)d[1-Sp(c,t)]\]
At each time point \(t\), each individual is classified as a case or control. A case is defined as an individual with an event up to time \(t\).
\[AUC^{C/D}(t)=P(S_i>S_j|T_i\leq t,T_j>t), i\neq j.\]
A case is defined as an individual with an event at time \(t\).
\[AUC^{I/D}(t)=P(S_i>S_j|T_i= t,T_j>t), i\neq j.\] This is dichotomizing the risk set at time \(t\) into cases and controls, a natural companion to hazard models. This also allows time-averaged concordance measure c-index.
Consider empirical naive estimators.
First, assume that the survival time \(X\) is actually observed without any censoring, i.e., \(\delta_i=1\) (\(i=1, 2, \ldots, n\)). Upon randomly drawing a pair of subjects, say \((i, j)\), \(i\neq j\), we may have five types of pairs between the survival time \(X\) and the predictive score \(Y\).
These five possibilities for a random pair are comprehensive and mutually exclusive, and therefore \[\Pi_c+\Pi_d+\Pi_{tX}+\Pi_{tY}+\Pi_{tXY}=1.\]
Kim’s measure \(d_{X\cdot Y}\) \[d_{X\cdot Y}=\dfrac{\Pi_c-\Pi_d}{\Pi_c+\Pi_d+\Pi_{tY}}=\dfrac{\Pi_c-\Pi_d}{1-\Pi_{tX}-\Pi_{tXY}}\] is the probability of a concordance minus the probability of a discordance, both conditioned on the occurrence of distinct values of outcome \(X\), for quantifying the degree of relationship between \(X\) and \(Y\).
Define \(sign\) and \(csign\) (\(sign\) with censoring) functions as below, \[ sign\left( Y_i, Y_j\right) = I(Y_i > Y_j) - I(Y_i < Y_j)\] \[ csign\left(X_i, \delta_i, X_j, \delta_j\right) = I(X_i \geq X_j)\delta_j - I(X_i \leq X_j)\delta_i \]
\[C_{XY}^g = \dfrac{\Pi_c^g+\frac{1}{2}\Pi_{tY}^g}{\Pi_c^g+\Pi_d^g+\Pi_{tY}^g}=\dfrac{\Pi_c^g+\frac{1}{2}\Pi_{tY}^g}{1-\Pi_{tX}^g-\Pi_{tXY}^g}.\]