Load & preprocess data

Load preprocessed azure data

## already preprocessed to some extent form previous step
azure <- read.csv(here('data/emotions_azure/preprocessed_data.csv'), fill = TRUE, header = TRUE) %>%
  rename(azure_label = label)

Load and preprocess rekognition data

rekognition <- read.csv(here('data/emotions_rekognition/face_TP_present_rekognition.csv'), fill = TRUE, header = TRUE) %>%
  select(time, angry, calm, confused, disgusted, fear, happy, sad, surprised) %>%
  rename(filename = time) %>%
  select(-confused) %>% # no corrolary in hiuman coding
  # make names match those in azure/human data
   mutate(happiness = as.numeric(happy)/100,
         surprise = as.numeric(surprised)/100,
         anger = as.numeric(angry)/100,
         disgust = as.numeric(disgusted)/100,
         fear = as.numeric(fear)/100,
         neutral = as.numeric(calm)/100, 
         contempt = 0, # filler 
         sadness = as.numeric(sad)/100) 
rekognition <- rekognition %>%
  mutate(rekog_label = case_when(pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == happiness ~ "happy",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == neutral ~ "neutral",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == surprise ~ "surprise",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == sadness ~ "sad",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == contempt ~ "contempt",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == anger ~ "anger",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == disgust ~ "disgust",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == fear ~ "fear")) 

Note that we have many more faces on rekognition dataset

# How many faces do we have for recognition vs azure?
sum(!is.na(rekognition$rekog_label))
## [1] 2723
sum(!is.na(azure$azure_label))
## [1] 837

Load and preprocess human data

humans <- read.csv(here('data/emotion_human_annotations/face_TP_present_human.csv'), fill = TRUE, header = FALSE)
## note that there are some rows where the parsing fails (~8 or so, not worrying about for now)
d.tidy <- humans %>%
  slice(-1) %>%
  `colnames<-`(c("path", "coding", "type", "x", "y", "width", "height")) %>%
  separate(coding, c("label", "happiness", "surprise", "sadness", "anger", "contempt", "disgust", "fear", "neutral", "isBaby","note"), sep = "-", remove = TRUE, convert = FALSE, extra = "warn", fill = "warn") %>%
  separate(path, c("filepath", "filename"), sep = "azure/", remove = TRUE) %>%
  select("filename", "happiness", "surprise", "sadness", "anger", "contempt", "disgust", "fear", "neutral", "isBaby","note") %>%
  mutate(happiness = as.numeric(happiness)/100,
         surprise = as.numeric(surprise)/100,
         anger = as.numeric(anger)/100,
         contempt = as.numeric(contempt)/100,
         disgust = as.numeric(disgust)/100,
         fear = as.numeric(fear)/100,
         neutral = as.numeric(neutral)/100,
         sadness = as.numeric(sadness)/100) 
## Warning: Expected 11 pieces. Additional pieces discarded in 30 rows [468,
## 477, 480, 481, 520, 523, 524, 570, 587, 589, 591, 594, 600, 603, 605, 606,
## 607, 610, 623, 624, ...].
## Warning: Expected 11 pieces. Missing pieces filled with `NA` in 416 rows
## [2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
## 23, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 92 rows [11,
## 132, 142, 144, 157, 159, 161, 174, 197, 231, 234, 236, 248, 249, 260, 261,
## 269, 270, 271, 272, ...].
## Warning: Problem with `mutate()` input `neutral`.
## ℹ NAs introduced by coercion
## ℹ Input `neutral` is `as.numeric(neutral)/100`.
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
head(d.tidy)
##                                 filename happiness surprise sadness anger
## 1  1001-A_20130624_0911_01.mp4-23725.jpg      0.05     0.00    0.02     0
## 2 10023-A_20141109_2527_01.mp4-51935.jpg      0.07     0.00    0.00     0
## 3  1003-A_20130624_0911_01.mp4-25660.jpg      0.00     0.00    0.06     0
## 4  1004-A_20130624_0911_01.mp4-25880.jpg      0.70     0.00    0.00     0
## 5 10055-A_20141109_2527_02.mp4-30595.jpg      0.85     0.15    0.00     0
## 6 10143-A_20141122_2609_01.mp4-45825.jpg      0.90     0.10    0.00     0
##   contempt disgust fear neutral isBaby note
## 1        0    0.00    0    0.93     No  Yes
## 2        0    0.00    0    0.93     No <NA>
## 3        0    0.02    0    0.92     No <NA>
## 4        0    0.00    0    0.30     No <NA>
## 5        0    0.00    0      NA   <NA> <NA>
## 6        0    0.00    0      NA   <NA> <NA>
# View(d.tidy)
 d.tidy <- d.tidy %>%
    mutate(human_label = case_when(pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == happiness ~ "happy",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == neutral ~ "neutral",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == surprise ~ "surprise",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == sadness ~ "sad",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == contempt ~ "contempt",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == anger ~ "anger",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == disgust ~ "disgust",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == fear ~ "fear")) %>%
  select(human_label, filename, isBaby) 
merged <-  d.tidy %>%
  left_join(azure %>% select(azure_label, filename, age), by='filename') %>%
  filter(!is.na(filename)) %>%
  left_join(rekognition %>% select(rekog_label, filename), by='filename')

Compare human and machine annotations

Functions to evaluate detections

# Function to evaluate detectors
evaluate_detector <- function(truth, detection) {
  if (truth == TRUE) {
    if (truth == detection) return ("TP") # e.g. was face/wrist, detected face/wrist
    else return("FN") # e.g.  was face/wrist, missed face/wrist
  }
  else {
    if (truth == detection) return("TN") # e.g.  was not face/wrist, did not detect face/wrist
    else return("FP") # e.g.  was not face/wrist, detected face/wrist
  }
}

## computers p/r/f as a function of emotion, and detector form merged data
return_eval = function(this_emotion, which_detector, merged=merged){

 for_eval <- merged %>%
  filter(!is.na(human_label)) %>%
  mutate(human = (human_label == this_emotion), machine = (!!as.name(which_detector) == this_emotion)) %>%
  rowwise() %>%
  mutate(eval = evaluate_detector(human,machine)) %>%
  ungroup() %>%
  summarize(emotion = this_emotion, detector = which_detector, samples = sum(human_label==this_emotion), tp=sum(eval == "TP"), 
          fp=sum(eval == "FP"), 
          fn=sum(eval == "FN"),
          p = tp / (tp + fp),
          r = tp / (tp + fn),
          f=( 2 * p * r )/ (p + r))
}

Go through consistent emotions and each detector and get evaluations

count = 0
emotions = c("neutral",  "happy" ,  "sad" ,  "surprise", "fear", "anger")
for (e in emotions) {
  for (detector in c('azure_label','rekog_label')) {
    this_eval = return_eval(e, detector, merged)
    count = count+ 1
    if (count==1){
      all_evals = this_eval
    }
    else {
    all_evals <- all_evals %>%
      full_join(this_eval)
    }
  }
}
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")

Output table

all_evals %>%
  kable()
emotion detector samples tp fp fn p r f
neutral azure_label 856 551 103 305 0.8425076 0.6436916 0.7298013
neutral rekog_label 856 375 65 481 0.8522727 0.4380841 0.5787037
happy azure_label 249 187 275 62 0.4047619 0.7510040 0.5260197
happy rekog_label 249 148 170 101 0.4654088 0.5943775 0.5220459
sad azure_label 85 11 55 74 0.1666667 0.1294118 0.1456954
sad rekog_label 85 22 198 63 0.1000000 0.2588235 0.1442623
surprise azure_label 32 8 38 24 0.1739130 0.2500000 0.2051282
surprise rekog_label 32 6 87 26 0.0645161 0.1875000 0.0960000
fear azure_label 6 1 1 5 0.5000000 0.1666667 0.2500000
fear rekog_label 6 4 99 2 0.0388350 0.6666667 0.0733945
anger azure_label 3 0 0 3 NaN 0.0000000 NaN
anger rekog_label 3 0 43 3 0.0000000 0.0000000 NaN

Plots of p/r/f

all_evals <- all_evals %>%
  mutate(emotion = fct_reorder(emotion, tp, .desc=TRUE))

p = ggplot(all_evals, aes(x=emotion, y=p, col=detector, size = samples)) +
  geom_point(position = position_dodge(width=.8)) +
  theme_few() + 
  theme(legend.position = 'none') + 
  ggtitle('Precision') + 
  ylim(0, 1)

r = ggplot(all_evals, aes(x=emotion, y=r, col=detector, size = samples)) +
  geom_point(position = position_dodge(width=.8)) +
  theme_few() + 
  theme(legend.position = 'none')  + 
  ggtitle('Recall') + 
  ylim(0, 1)
 

f = ggplot(all_evals, aes(x=emotion, y=f, col=detector, size = samples)) +
  geom_point(position = position_dodge(width=.8)) +
  theme_few() + 
  ggtitle('F-score') +
  ylim(0, 1)

ggarrange(p,r,f, nrow=1)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).