for probabilistic models like logistic regression
data <- read.csv("SampleData.csv", header=TRUE)
print("The first column indicates the true class, while the second column indicates the probability score assigned by the model")
## [1] "The first column indicates the true class, while the second column indicates the probability score assigned by the model"
head(data)
## truth pred_prob
## 1 1 0.9887905
## 2 1 0.9999989
## 3 1 0.7725319
## 4 1 0.5550344
## 5 1 0.9999394
## 6 1 0.8846755
plot_color = '#233f7d' # color of the ROC curve & shading
text_color = 'black' # color of the 'AUC: ...' text
x = seq(0,1,0.01)
num_TruePositive <- function(x0, truth=data$truth, prob=data$pred_prob){
is_true_pos <- (prob > 1-x0) & (truth==1)
tpr <- sum(is_true_pos) / sum(truth==1)
return(tpr)
}
num_FalsePositive <- function(x0, truth=data$truth, prob=data$pred_prob){
is_false_pos <- (prob > 1-x0) & (truth==0)
fpr <- sum(is_false_pos) / sum(truth==0)
return(fpr)
}
TPrate <- round(sapply(x,num_TruePositive),2)
FPrate <- round(sapply(x,num_FalsePositive),2)
roc.data <- data.frame(x=FPrate, y=TPrate)
AUC <- pracma::trapz(roc.data$x, roc.data$y)
my_roc_theme = theme_minimal() +
theme(panel.grid.minor = element_blank(),
aspect.ratio = 1)
ggplot(data=roc.data) +
geom_line(aes(x=x,y=y), size=1, color=plot_color) +
annotate('text', alpha=0.6, x=0.7, y=0.1, label=paste('AUC:', AUC), size=8, color=text_color) +
geom_ribbon(aes(x=x,ymin=0,ymax=y),
alpha=0.2, fill=plot_color) +
coord_cartesian(xlim = c(0,1), ylim=c(0,1.01), expand = FALSE) +
geom_line(aes(x=seq(0,1,0.01), y=seq(0,1,0.01)), linetype='dashed') +
labs(x='False Positive Rate', y='True Positive Rate',
title = 'ROC curve of the model <model_name>') +
my_roc_theme