Confusion matrix visualisation

read_csv("./data/CAMDA.csv") %$% table(predicted, actual) -> CAMDA.confusion

## 
## -- Column specification --------------------------------------------------------
## cols(
##   actual = col_character(),
##   predicted = col_character()
## )

Given a multinomial confusion matrix C, OVA(C) produces a data frame of the binary confusion matrices formed by the one-versus-all treatment of each class.

OVA <- function(C){
# Takes the i^th row and column of the confusion matrix C and forms the binary ("one vs all") confusion matrix
# Let "A" be the name of the i^th row. Then
#            actual
# predicted  A  nA
#         A TP  FP
#        nA FN  TN  

  N        <- sum(C)
  nclasses <- nrow(C)
  # Make space to store the results
  TP <- TN <- FP <- FN <- integer(nclasses)

  for(i in 1:nrow(C)){
    TP[i] <- C[i,i]
    FP[i] <- sum(C[i,]) - TP[i]
    FN[i] <- sum(C[,i]) - TP[i]
    TN[i] <- N - TP[i]  - FP[i] - FN[i]
  }
  
  tibble(class=rownames(C), TP, FP, FN, TN) %>%
    mutate(
      Pos=TP+FN,
      Neg=FP+TN,
      TPR=TP/Pos,
      FPR=FP/Neg,
      O.act=Pos/Neg,          # Odds of actual class being X
      P.act=O.act/(O.act+1), # Probability of actual class being X
      O.act.g.prd=TP/FP,      # Odds of actual class being X given the prediction that it is X
      P.act.g.prd=            # Probability of actual class being X given the prediction that it is X
        O.act.g.prd/(O.act.g.prd+1)
    )
}

C <- CAMDA.confusion
C.conf <- as_tibble(C)
OVA(C) %>% 
  mutate(
    class=fct_reorder(class, P.act.g.prd, .desc = TRUE)
  ) -> C.OVA

as_tibble(C) %>%
    mutate(
    actual=factor(actual, levels=levels(C.OVA$class)),
    predicted=factor(predicted,   levels=levels(C.OVA$class))
  ) -> C.conf

filter(C.conf, n>0) %>%
  ggplot(aes(x=actual, y=predicted)) + 
  geom_count(aes(size=n)) + scale_size_area() + scale_y_discrete(limits=rev, position="right") + 
  #scale_x_discrete(position="top") + 
  #coord_equal() +
  theme(axis.text.x = element_text(angle = 90), legend.position = "none") -> p22

 ggplot(C.OVA, aes(x=class, y=P.act)) + 
  geom_step(group=1, direction="mid", color="blue") + 
  scale_y_continuous(trans="logit", position="right", breaks=c(0.01, 0.1, 0.5, 0.9, 0.99), limits = c(0.01, 0.99)) +
  scale_x_discrete(position = "top") +
  xlab("actual") + ylab("P(actual)") +
  theme(axis.text.x = element_text(angle = 90), legend.position = "none") -> p12

ggplot(C.OVA,aes(x=class)) + 
  geom_step(aes(y=P.act      ), group=1, direction="mid", color="blue") +
  geom_step(aes(y=P.act.g.prd), group=1, direction="mid", color="red") + 
  scale_y_continuous(trans="logit", breaks=c(0.01, 0.1, 0.5, 0.9, 0.99), limits = c(0.01, 0.99)) + 
  scale_x_discrete(limits=rev) + 
  xlab("predicted") +ylab("P(actual | predicted)") +
  coord_flip() -> p21

plot_spacer() + p12 +  p21 + p22 +
  plot_layout(widths=c(1,3), heights=c(1,3))

## Warning: Transformation introduced infinite values in continuous y-axis

Confusion matrix visualisation

David Lovell and Bridget McCarron

17/05/2021