Load & preprocess data

Load preprocessed azure data

## already preprocessed to some extent form previous step
azure <- read.csv(here('data/emotions_azure/preprocessed_data.csv'), fill = TRUE, header = TRUE) %>%
  rename(azure_label = label)

Load and preprocess rekognition data

rekognition <- read.csv(here('data/emotions_rekognition/face_TP_present_rekognition.csv'), fill = TRUE, header = TRUE) %>%
  select(time, angry, calm, confused, disgusted, fear, happy, sad, surprised) %>%
  rename(filename = time) %>%
  select(-confused) %>% # no corrolary in hiuman coding
  # make names match those in azure/human data
   mutate(happiness = as.numeric(happy)/100,
         surprise = as.numeric(surprised)/100,
         anger = as.numeric(angry)/100,
         disgust = as.numeric(disgusted)/100,
         fear = as.numeric(fear)/100,
         neutral = as.numeric(calm)/100, 
         contempt = 0, # filler 
         sadness = as.numeric(sad)/100)

rekognition <- rekognition %>%
  mutate(rekog_label = case_when(pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == happiness ~ "happy",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == neutral ~ "neutral",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == surprise ~ "surprise",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == sadness ~ "sad",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == contempt ~ "contempt",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == anger ~ "anger",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == disgust ~ "disgust",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == fear ~ "fear"))

Note that we have many more faces on rekognition dataset

# How many faces do we have for recognition vs azure?
sum(!is.na(rekognition$rekog_label))

## [1] 2723

sum(!is.na(azure$azure_label))

## [1] 837

Load and preprocess human data

humans <- read.csv(here('data/emotion_human_annotations/face_TP_present_human.csv'), fill = TRUE, header = FALSE)

## note that there are some rows where the parsing fails (~8 or so, not worrying about for now)
d.tidy <- humans %>%
  slice(-1) %>%
  `colnames<-`(c("path", "coding", "type", "x", "y", "width", "height")) %>%
  separate(coding, c("label", "happiness", "surprise", "sadness", "anger", "contempt", "disgust", "fear", "neutral", "isBaby","note"), sep = "-", remove = TRUE, convert = FALSE, extra = "warn", fill = "warn") %>%
  separate(path, c("filepath", "filename"), sep = "azure/", remove = TRUE) %>%
  select("filename", "happiness", "surprise", "sadness", "anger", "contempt", "disgust", "fear", "neutral", "isBaby","note") %>%
  mutate(happiness = as.numeric(happiness)/100,
         surprise = as.numeric(surprise)/100,
         anger = as.numeric(anger)/100,
         contempt = as.numeric(contempt)/100,
         disgust = as.numeric(disgust)/100,
         fear = as.numeric(fear)/100,
         neutral = as.numeric(neutral)/100,
         sadness = as.numeric(sadness)/100)

## Warning: Expected 11 pieces. Additional pieces discarded in 30 rows [468,
## 477, 480, 481, 520, 523, 524, 570, 587, 589, 591, 594, 600, 603, 605, 606,
## 607, 610, 623, 624, ...].

## Warning: Expected 11 pieces. Missing pieces filled with `NA` in 416 rows
## [2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
## 23, ...].

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 92 rows [11,
## 132, 142, 144, 157, 159, 161, 174, 197, 231, 234, 236, 248, 249, 260, 261,
## 269, 270, 271, 272, ...].

## Warning: Problem with `mutate()` input `neutral`.
## ℹ NAs introduced by coercion
## ℹ Input `neutral` is `as.numeric(neutral)/100`.

## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion

head(d.tidy)

##                                 filename happiness surprise sadness anger
## 1  1001-A_20130624_0911_01.mp4-23725.jpg      0.05     0.00    0.02     0
## 2 10023-A_20141109_2527_01.mp4-51935.jpg      0.07     0.00    0.00     0
## 3  1003-A_20130624_0911_01.mp4-25660.jpg      0.00     0.00    0.06     0
## 4  1004-A_20130624_0911_01.mp4-25880.jpg      0.70     0.00    0.00     0
## 5 10055-A_20141109_2527_02.mp4-30595.jpg      0.85     0.15    0.00     0
## 6 10143-A_20141122_2609_01.mp4-45825.jpg      0.90     0.10    0.00     0
##   contempt disgust fear neutral isBaby note
## 1        0    0.00    0    0.93     No  Yes
## 2        0    0.00    0    0.93     No <NA>
## 3        0    0.02    0    0.92     No <NA>
## 4        0    0.00    0    0.30     No <NA>
## 5        0    0.00    0      NA   <NA> <NA>
## 6        0    0.00    0      NA   <NA> <NA>

# View(d.tidy)

 d.tidy <- d.tidy %>%
    mutate(human_label = case_when(pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == happiness ~ "happy",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == neutral ~ "neutral",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == surprise ~ "surprise",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == sadness ~ "sad",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == contempt ~ "contempt",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == anger ~ "anger",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == disgust ~ "disgust",
                           pmax(happiness, neutral, surprise, sadness, contempt, anger, disgust, fear) == fear ~ "fear")) %>%
  select(human_label, filename, isBaby)

merged <-  d.tidy %>%
  left_join(azure %>% select(azure_label, filename, age), by='filename') %>%
  filter(!is.na(filename)) %>%
  left_join(rekognition %>% select(rekog_label, filename), by='filename')

Compare human and machine annotations

Functions to evaluate detections

# Function to evaluate detectors
evaluate_detector <- function(truth, detection) {
  if (truth == TRUE) {
    if (truth == detection) return ("TP") # e.g. was face/wrist, detected face/wrist
    else return("FN") # e.g.  was face/wrist, missed face/wrist
  }
  else {
    if (truth == detection) return("TN") # e.g.  was not face/wrist, did not detect face/wrist
    else return("FP") # e.g.  was not face/wrist, detected face/wrist
  }
}

## computers p/r/f as a function of emotion, and detector form merged data
return_eval = function(this_emotion, which_detector, merged=merged){

 for_eval <- merged %>%
  filter(!is.na(human_label)) %>%
  mutate(human = (human_label == this_emotion), machine = (!!as.name(which_detector) == this_emotion)) %>%
  rowwise() %>%
  mutate(eval = evaluate_detector(human,machine)) %>%
  ungroup() %>%
  summarize(emotion = this_emotion, detector = which_detector, samples = sum(human_label==this_emotion), tp=sum(eval == "TP"), 
          fp=sum(eval == "FP"), 
          fn=sum(eval == "FN"),
          p = tp / (tp + fp),
          r = tp / (tp + fn),
          f=( 2 * p * r )/ (p + r))
}

Go through consistent emotions and each detector and get evaluations

count = 0
emotions = c("neutral",  "happy" ,  "sad" ,  "surprise", "fear", "anger")
for (e in emotions) {
  for (detector in c('azure_label','rekog_label')) {
    this_eval = return_eval(e, detector, merged)
    count = count+ 1
    if (count==1){
      all_evals = this_eval
    }
    else {
    all_evals <- all_evals %>%
      full_join(this_eval)
    }
  }
}

## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")
## Joining, by = c("emotion", "detector", "samples", "tp", "fp", "fn", "p", "r", "f")

Output table

all_evals %>%
  kable()

emotion	detector	samples	tp	fp	fn	p	r	f
neutral	azure_label	856	551	103	305	0.8425076	0.6436916	0.7298013
neutral	rekog_label	856	375	65	481	0.8522727	0.4380841	0.5787037
happy	azure_label	249	187	275	62	0.4047619	0.7510040	0.5260197
happy	rekog_label	249	148	170	101	0.4654088	0.5943775	0.5220459
sad	azure_label	85	11	55	74	0.1666667	0.1294118	0.1456954
sad	rekog_label	85	22	198	63	0.1000000	0.2588235	0.1442623
surprise	azure_label	32	8	38	24	0.1739130	0.2500000	0.2051282
surprise	rekog_label	32	6	87	26	0.0645161	0.1875000	0.0960000
fear	azure_label	6	1	1	5	0.5000000	0.1666667	0.2500000
fear	rekog_label	6	4	99	2	0.0388350	0.6666667	0.0733945
anger	azure_label	3	0	0	3	NaN	0.0000000	NaN
anger	rekog_label	3	0	43	3	0.0000000	0.0000000	NaN

Plots of p/r/f

all_evals <- all_evals %>%
  mutate(emotion = fct_reorder(emotion, tp, .desc=TRUE))

p = ggplot(all_evals, aes(x=emotion, y=p, col=detector, size = samples)) +
  geom_point(position = position_dodge(width=.8)) +
  theme_few() + 
  theme(legend.position = 'none') + 
  ggtitle('Precision') + 
  ylim(0, 1)

r = ggplot(all_evals, aes(x=emotion, y=r, col=detector, size = samples)) +
  geom_point(position = position_dodge(width=.8)) +
  theme_few() + 
  theme(legend.position = 'none')  + 
  ggtitle('Recall') + 
  ylim(0, 1)
 

f = ggplot(all_evals, aes(x=emotion, y=f, col=detector, size = samples)) +
  geom_point(position = position_dodge(width=.8)) +
  theme_few() + 
  ggtitle('F-score') +
  ylim(0, 1)

ggarrange(p,r,f, nrow=1)

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_point).

step3_model_validation

Bria Long

9/8/2020

Load & preprocess data

Load preprocessed azure data

Load and preprocess rekognition data

Note that we have many more faces on rekognition dataset

Load and preprocess human data

Compare human and machine annotations

Functions to evaluate detections

Go through consistent emotions and each detector and get evaluations

Output table

Plots of p/r/f