For probabilistic classification models like logistic regression.

Sample Data

data <- read.csv("SampleData.csv", header=TRUE) %>% 
  mutate(truth=factor(truth, levels=c(0,1)))
print("The first column indicates the true class, while the second column indicates the probability score assigned by the model")
## [1] "The first column indicates the true class, while the second column indicates the probability score assigned by the model"
head(data)
##   truth pred_prob
## 1     1 0.9887905
## 2     1 0.9999989
## 3     1 0.7725319
## 4     1 0.5550344
## 5     1 0.9999394
## 6     1 0.8846755

Customization

labels <- c('Positive','Negative')  # the labels in the legend
colors <- c('#233f7d','grey')
breaks <- c(1,0)  # should match the encoding used in your data
my_cutoff <- 0.5  # the score cutoff that classifies an observation
cutoff_label_height <- 7  # manually adjust to fit into the graph

Compute the metrics for labeling

num_negative <- nrow(filter(data, (truth==0)))
num_positive <- nrow(filter(data, (truth==1)))

true_positive <- nrow(filter(data, (pred_prob > my_cutoff)&(truth==1)))
false_positive <- nrow(filter(data, (pred_prob > my_cutoff)&(truth==0)))

true_negative <- num_negative - false_positive
false_negative <- num_positive - true_positive

label_right <- stringr::str_interp("Classified Positive\n   True Positives: ${true_positive}\n   False Positives: ${false_positive}")

label_left <- stringr::str_interp("Classified Negative\n   True Negatives: ${true_negative}\n   False Negatives: ${false_negative} ")

label_df <- data.frame(label=c(label_right, label_left),
                       x=c(my_cutoff+0.05,my_cutoff-0.3))

Plot

ggplot(data=data) +
  scale_color_manual(name='True Class', values=colors, breaks=breaks, labels=labels) +
  scale_fill_manual(name='True Class', values=colors, breaks=breaks, labels=labels) +
  geom_density(aes(x=pred_prob, color=truth, fill=truth), alpha=0.5) +
  geom_vline(xintercept = my_cutoff, alpha = 0.5, linetype='dotted') + 
  geom_text(data=label_df,
            aes(x=x, y=cutoff_label_height, label=label),
            color="grey", hjust=0) +
  labs(x='Score Assigned', y='Relative Density',
       title='Distribution of the Scores',
       subtitle='Assigned by <model_name>') +
  coord_cartesian(xlim = c(0, 1), expand = FALSE) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.y=element_blank())