1 Set up R environment

library(tidyverse)
library(ggplot2)
library(ggpubr)
library(plyr)
library(magick)
library(png)
library(EBImage)
library(lme4)
library(lmerTest)

2 Set the R working drectory to the main experiment directory.

setwd("/Users/adambarnas/Box/MetaAwareness/data/")  

3 Read in the individual subject files.

tbl_all <- list.files(path = "./Ma_Recontact", pattern = "*.csv", full.names = T, ignore.case = F) %>%
  map_df(~read.csv(., colClasses=c("gender..m.f."="character")))
tbl_all = subset(tbl_all, select = c(user_resp.keys,user_resp.rt,workerId,image_a))
col_idx <- grep("workerId", names(tbl_all))
tbl_all <- tbl_all[, c(col_idx, (1:ncol(tbl_all))[-col_idx])]
tbl_all <- data.frame(na.omit(tbl_all))
tbl_all <- tbl_all %>%
separate(image_a,into=c('database', 'image', NA), sep = "([\\_\\-])")

4 Compute average likelihood rating.

tbl_all_subj_avg <- tbl_all %>%
  group_by(workerId,image) %>%
  dplyr::summarize(average = mean(user_resp.keys)) %>%
  spread(image,average) %>% 
  mutate(subj_avg = rowMeans(.[-1], na.rm = TRUE))
mean(tbl_all_subj_avg$subj_avg)
## [1] 2.926667
tbl_all_img_avg <- data.frame(img_avg = colMeans(tbl_all_subj_avg[,2:70], na.rm = TRUE))
tbl_all_img_avg <- tibble::rownames_to_column(tbl_all_img_avg, "image")

5 Merge Mudsplash and Meta-Awareness data files.

ma_RTs_raw <- read_csv("Ma_RTs_raw.csv")
ma_RTs_raw <- ma_RTs_raw[, -c(2,3,6:16,18,19)]
ma_RTs_raw <- ma_RTs_raw[(ma_RTs_raw$workerId %in% tbl_all_subj_avg$workerId),]

tbl_all <- tbl_all[order(tbl_all$workerId, tbl_all$image), , drop = FALSE]
ma_RTs_raw <- ma_RTs_raw[order(ma_RTs_raw$workerId, ma_RTs_raw$image), , drop = FALSE]
ma_RTs_raw <- ma_RTs_raw %>%  
    mutate(image = as.character(image))

ma_RTs_likelihood <- left_join(tbl_all, ma_RTs_raw, by = c("workerId", "image"))
colnames(ma_RTs_likelihood)[2] <- "likelihood_rating"
colnames(ma_RTs_likelihood)[3] <- "likelihood_rt"
colnames(ma_RTs_likelihood)[7] <- "detection_rt"
ma_RTs_likelihood <- ma_RTs_likelihood[, c(-4,-6)]
ma_RTs_likelihood <- ma_RTs_likelihood[,c(1,4,5,2,3)]

6 Compute likelihood rating for each image.

ma_RTs_likelihood %>%
  ggbarplot(x = "image", y = "likelihood_rating", ylab = "Likelihood of Detecting Change", title = "All images (30 per subject)", fill = "#f7a800", add = "mean_se", font.xtickslab = 8, sort.val = c("asc")) + rotate_x_text() + theme(legend.position = "none")

ma_RTs_likelihood_no_NA <- ma_RTs_likelihood %>%
  drop_na()
ma_RTs_likelihood_no_NA %>% 
  ggbarplot(x = "image", y = "likelihood_rating", ylab = "Likelihood of Detecting Change", title = "'Correct' images", fill = "#f7a800", add = "mean_se", font.xtickslab = 8, sort.val = c("asc")) + rotate_x_text() + theme(legend.position = "none")

write.csv(ma_RTs_likelihood,'ma_RTs_likelihood.csv', row.names=FALSE)
write.csv(ma_RTs_likelihood_no_NA,'ma_RTs_likelihood_no_NA.csv', row.names=FALSE)

7 Count number of ratings

ma_RTs_likelihood_count <- ma_RTs_likelihood_no_NA %>% 
  group_by(workerId,image) %>% 
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_all_counts,10)

ma_RTs_likelihood_count <- data.frame(count = colSums(ma_RTs_likelihood_count[,2:70], na.rm = TRUE))
ma_RTs_likelihood_count <- tibble::rownames_to_column(ma_RTs_likelihood_count, "image")
ma_RTs_likelihood_count
##       image count
## 1  10504629     5
## 2  10810329     5
## 3   1191801     5
## 4  12115280     3
## 5  12178414     5
## 6  13141692     4
## 7   1383096     2
## 8  13873251     4
## 9  16527526     1
## 10 18169626     1
## 11 18345691     4
## 12 22020472     3
## 13 23024660     4
## 14 23199105     3
## 15 24383097     2
## 16 25107991     3
## 17 27857618     4
## 18  3099758     6
## 19 31236119     3
## 20 32289063     2
## 21 38466626     3
## 22 38546029     1
## 23 42429798     1
## 24  4247084     4
## 25 44993860     3
## 26 45525109     2
## 27 46475259     5
## 28 46635293     3
## 29 48384711     2
## 30 48486405     6
## 31 51537628     5
## 32 51856108     4
## 33 55174490     3
## 34 56835136     2
## 35 57861456     5
## 36 61118260     6
## 37 62096551     4
## 38 62224663     5
## 39 67862299     3
## 40 69128765     2
## 41 70687495     3
## 42 72488522     3
## 43 73637203     5
## 44 74173745     5
## 45 75081153     2
## 46 75958241     5
## 47 77345858     7
## 48 77574131     2
## 49 77793328     3
## 50 79191795     1
## 51 79222679     6
## 52 79241011     4
## 53 79573638     4
## 54  8197559     2
## 55 81993755     1
## 56 83536470     2
## 57 83691215     3
## 58 83785171     5
## 59 85741618     3
## 60 86520382     2
## 61 87983207     2
## 62 88767165     2
## 63 89354846     4
## 64  8974554     6
## 65 90405028     4
## 66 95091295     3
## 67 97475929     3
## 68 98156944     5
## 69      sum   235

8 Mixed effects model and correlation.

fit0 <- lmer(detection_rt ~ likelihood_rating + (1 | workerId) + (1 | image), data=ma_RTs_likelihood_no_NA)
summary(fit0)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: detection_rt ~ likelihood_rating + (1 | workerId) + (1 | image)
##    Data: ma_RTs_likelihood_no_NA
## 
## REML criterion at convergence: 1163.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3285 -0.4990 -0.1848  0.1493  5.1449 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  image    (Intercept) 0.3704   0.6086  
##  workerId (Intercept) 3.8794   1.9696  
##  Residual             7.1450   2.6730  
## Number of obs: 235, groups:  image, 68; workerId, 10
## 
## Fixed effects:
##                   Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)         9.5588     0.8113  19.5094  11.781 2.55e-10 ***
## likelihood_rating  -0.3273     0.1537 159.3415  -2.129   0.0348 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## liklhd_rtng -0.562
corr <- ma_RTs_likelihood_no_NA %>% 
  group_by(image) %>% 
  dplyr::summarize(detection_rt = mean(detection_rt), likelihood_rating = mean(likelihood_rating))
corr %>%
  ggscatter(y = "detection_rt", x = "likelihood_rating", ylab = "Change Detection RT (sec)", xlab = "Likelihood of Detecting Change", title = "N = 10", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 1, label.y = 5, label.sep = "\n"), xlim = c(1, 5), ylim = c(0, 20))