1 Experiment details

2 Set up R environment

library(tidyverse)
library(ggplot2)
library(ggpubr)
library(plyr)
library(magick)
library(rstatix)
library(reshape2)
library(knitr)
library(lme4)
library(psycho)
library(Hmisc)

Make sure you’re in the right directory. Set the R working drectory to the main experiment directory, which is where this markdown is saved, along with any supporting material and raw data, which are stored as a subdirectory.

setwd("/Users/adambarnas/Box/MeridianCB")  

3 Read-in and manipulate datafiles

Read in the individual subject files (saved automatically on the server as csv files).

tbl_all <- list.files(path = "./data_v3/", pattern = "*.csv", full.names = T) %>% 
    map_df(~read_csv(.))
tbl_all <- data.frame(tbl_all)

Confirm the number of subjects and make sure the sample sizes reflects the number of data files in the data subdirectory.

nrow(tbl_all %>% distinct(workerId,.keep_all = FALSE))
## [1] 36

Next, define trial conditions by breaking apart the name of the image, given by objs_image column.

tbl_all <- tbl_all %>% 
separate(objs_image,into=c('change_or_no_change','rectangle_orientation','cue_loc','change_loc','validity','display_num'))

For clarity, rename all the variable values that are now given by the change_or_no_change and validity variable.

tbl_all <- tbl_all %>% mutate(change_or_no_change = recode_factor(change_or_no_change, `C`="change", `NC`="no_change"))

tbl_all <- tbl_all %>% mutate(validity = case_when(trial_number <= 9 & validity == "C" ~ "practice_catch",
                              trial_number > 9 & validity == "C" ~ "catch",
                              validity == "P" ~ "practice_main",  
                              validity == "V" ~ "valid",
                              validity == "IS" ~ "invalid_same",
                              validity == "ID" ~ "invalid_different"))

Let’s also assign the trials to bins based on the trial number. The practice trials (the first 9 for each subject) will be labeled “0” since they are not factored into any analyses.

tbl_all$bin = "filler"
tbl_all[which(tbl_all$trial_number %in% c(1:9)), "bin"] = 0
tbl_all[which(tbl_all$trial_number %in% c(10:37)), "bin"] = 1
tbl_all[which(tbl_all$trial_number %in% c(38:65)), "bin"] = 2
tbl_all[which(tbl_all$trial_number %in% c(66:93)), "bin"] = 3
tbl_all[which(tbl_all$trial_number %in% c(94:121)), "bin"] = 4
tbl_all[which(tbl_all$trial_number %in% c(122:149)), "bin"] = 5
tbl_all[which(tbl_all$trial_number %in% c(150:177)), "bin"] = 6
tbl_all[which(tbl_all$trial_number %in% c(178:205)), "bin"] = 7

tbl_all$bin <- as.numeric(tbl_all$bin)
#class(tbl_all$bin)

** This table contains the number of change trials for each individual.** There were 120 change trials that were 60% valid (72 trials) and 40% invalid (24 trials for each type). There were also 36 change catch trials. The numbers of each trial type were split evenly among the 4 cue locations. The last variable, “sum”, is the total number of change trials saved for each participant. There were 156 change trials.

tbl_all_change_counts <- tbl_all %>%
  group_by(workerId,validity) %>%
  filter(change_or_no_change == "change" & (validity=='valid' | validity=='invalid_same' | validity=='invalid_different' | validity=='catch')) %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_all_change_counts$change_total_sum = rowSums(tbl_all_change_counts[,c(-1)], na.rm = TRUE)
kable(tbl_all_change_counts)
workerId catch invalid_different invalid_same valid change_total_sum
A10AVWALIHR4UQ 36 24 24 72 156
A12FTSX85NQ8N9 36 24 24 72 156
A1916FQFP74ED6 36 24 24 72 156
A198MSVO1VTAT5 36 24 24 72 156
A1BNYBF79IQ89T 36 24 24 72 156
A1CNTYNF0MIBK8 36 24 24 72 156
A1EBGIM9CZTQLX 36 24 24 72 156
A1F3XD5WEK7H7E 36 24 24 72 156
A1G4MDRJ3Y36DA 36 24 24 72 156
A1P2RQ166VS5BT 36 24 24 72 156
A25N8DM670K4MA 36 24 24 71 155
A26LOVXF4QZZCO 36 24 24 72 156
A28AXX4NCWPH1F 36 24 24 72 156
A28RKM9Q1FIZEK 36 24 24 72 156
A2F2DDH12YU4AK 36 24 24 72 156
A2MUCL20GTQJA0 36 24 24 72 156
A2NO3OTBZYY43Z 36 24 24 72 156
A2V27A9GZA1NR2 36 24 24 72 156
A386U6K4J1ANQI 36 24 24 72 156
A3B9QNYDDXGL9I 36 24 24 72 156
A3C9SDM1TKNS65 36 24 24 72 156
A3CP03KUNUMEWF 36 24 24 72 156
A3I448WWTX2A2D 36 24 24 72 156
A3IBAGEKMDEGSI 36 24 24 72 156
A3JMLA0FWLP0WA 36 24 24 72 156
A3KCAT0C3NWVDX 36 24 24 72 156
A3PLWSCPFLCEGI 36 24 24 72 156
A3QT2DS5XYKYHU 36 24 24 72 156
A4I69DE7BI20I 36 24 24 73 157
A8C3WNWRBWUXO 36 24 24 72 156
AA8PZKO9XGCKO 36 24 24 72 156
ABYFMKPTF0SHG 36 24 24 72 156
AMPMTF5IAAMK8 36 24 24 72 156
ANV9R86DM2YG5 36 24 24 72 156
AQMJMYR9MANOG 36 24 24 72 156
ASL9UNVWVM9AW 36 24 24 72 156

The change trial counts can also be binned over time.

tbl_all_change_counts_bin <- tbl_all %>%
  group_by(workerId,validity,bin) %>%
  filter(change_or_no_change == "change" & (validity=='valid' | validity=='invalid_same' | validity=='invalid_different' | validity=='catch')) %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_all_change_counts_bin[is.na(tbl_all_change_counts_bin)] <- 0
tbl_all_change_counts_bin$sum = rowSums(tbl_all_change_counts_bin[,c(-1:-2)], na.rm = TRUE)
#kable(tbl_all_change_counts_bin)

** This table contains the number of no-change trials for each individual.** There were 40 change trials. The numbers of each trial type were split evenly among the 4 cue locations. The last variable, “sum”, is the total number of change trials saved for each participant.

tbl_all_no_change_counts <- tbl_all %>%
  group_by(workerId,validity) %>%
  filter(change_or_no_change == "no_change" & validity == "valid") %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
colnames(tbl_all_no_change_counts) <- c("workerId", "no_change_total_sum")
kable(tbl_all_no_change_counts)
workerId no_change_total_sum
A10AVWALIHR4UQ 40
A12FTSX85NQ8N9 40
A1916FQFP74ED6 40
A198MSVO1VTAT5 40
A1BNYBF79IQ89T 40
A1CNTYNF0MIBK8 40
A1EBGIM9CZTQLX 40
A1F3XD5WEK7H7E 40
A1G4MDRJ3Y36DA 40
A1P2RQ166VS5BT 40
A25N8DM670K4MA 40
A26LOVXF4QZZCO 40
A28AXX4NCWPH1F 40
A28RKM9Q1FIZEK 40
A2F2DDH12YU4AK 40
A2MUCL20GTQJA0 40
A2NO3OTBZYY43Z 40
A2V27A9GZA1NR2 40
A386U6K4J1ANQI 40
A3B9QNYDDXGL9I 40
A3C9SDM1TKNS65 40
A3CP03KUNUMEWF 40
A3I448WWTX2A2D 40
A3IBAGEKMDEGSI 40
A3JMLA0FWLP0WA 40
A3KCAT0C3NWVDX 40
A3PLWSCPFLCEGI 40
A3QT2DS5XYKYHU 40
A4I69DE7BI20I 40
A8C3WNWRBWUXO 40
AA8PZKO9XGCKO 40
ABYFMKPTF0SHG 40
AMPMTF5IAAMK8 40
ANV9R86DM2YG5 40
AQMJMYR9MANOG 40
ASL9UNVWVM9AW 40

The no-change trial counts can also be binned over time.

tbl_all_no_change_counts_bin <- tbl_all %>%
  group_by(workerId,validity,bin) %>%
  filter(change_or_no_change == "no_change" & validity=='valid') %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_all_no_change_counts_bin[is.na(tbl_all_no_change_counts_bin)] <- 0
colnames(tbl_all_no_change_counts_bin) <- c("workerId", "bin", "no_change")
#kable(tbl_all_no_change_counts_bin)

Calculate the number of change main trials, excuding catch trials. Also split the number of catch trials over time.

tbl_all_change_counts_no_catch <- tbl_all %>%
  group_by(workerId,validity) %>%
  filter(change_or_no_change == "change" & (validity=='valid' | validity=='invalid_same' | validity=='invalid_different')) %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_all_change_counts_no_catch$sum = rowSums(tbl_all_change_counts_no_catch[,c(-1)], na.rm = TRUE)
#kable(tbl_all_change_counts_no_catch)

tbl_all_change_counts_no_catch_bin <- tbl_all %>%
  group_by(workerId,validity,bin) %>%
  filter(change_or_no_change == "change" & (validity=='valid' | validity=='invalid_same' | validity=='invalid_different')) %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_all_change_counts_no_catch_bin[is.na(tbl_all_change_counts_no_catch_bin)] <- 0
tbl_all_change_counts_no_catch_bin$sum = rowSums(tbl_all_change_counts_no_catch_bin[,c(-1:-2)], na.rm = TRUE)
#kable(tbl_all_change_counts_no_catch_bin)

The data are loaded. Next, look at the quality of the data by examining the accuracy.

4 Analyze accuracy

The changing object could appear in four possible locations. Subjects were instructed to press ‘F’ if there was a changing object and to press ‘J’ if there was no changing object. Trials are labeled 1 for correct responses (‘F’ on change trials and ‘J’ on no-change trials) or 0 for incorrect responses (‘J’ on change trials and ‘F’ on no-change trials).

tbl_all$acc = "filler"

for (i in 1:length(tbl_all$workerId)){
  if (tbl_all$change_or_no_change[i] == "change"){
    if (tbl_all$key[i] == "F"){
      tbl_all$acc[i] = 1
  } else {
      tbl_all$acc[i] = 0
  }
} else {
    if (tbl_all$key[i] == "J"){
      tbl_all$acc[i] = 1
  } else {
      tbl_all$acc[i] = 0
  }
}
}

tbl_all_change_acc <- tbl_all %>%
  filter(change_or_no_change == "change")

tbl_all_no_change_acc <- tbl_all %>%
  filter(change_or_no_change == "no_change")

4.1 Accuracy on catch trials

Sum the number of good catch trials (1) to get the total number of accurate catch trials per subject. There were 36 total catch trials. ** Catch trials consisted of one object always changing to a letter. **

tbl_good_catch_acc_all_main_acc_counts <- tbl_all_change_acc %>%
  filter(validity=='catch')
tbl_good_catch_acc_all_main_acc_counts <- tbl_good_catch_acc_all_main_acc_counts %>%
  group_by(workerId,acc) %>%
  dplyr::summarize(counts = n()) %>%
  spread(acc,counts)
tbl_good_catch_acc_all_main_acc_counts[is.na(tbl_good_catch_acc_all_main_acc_counts)] <- 0
tbl_good_catch_acc_all_main_acc_counts$total = rowSums(tbl_good_catch_acc_all_main_acc_counts[,c(-1)], na.rm = TRUE)
colnames(tbl_good_catch_acc_all_main_acc_counts) <- c("workerId", "inacc_catch", "acc_catch", "total_catch")
kable(tbl_good_catch_acc_all_main_acc_counts)
workerId inacc_catch acc_catch total_catch
A10AVWALIHR4UQ 0 36 36
A12FTSX85NQ8N9 21 15 36
A1916FQFP74ED6 10 26 36
A198MSVO1VTAT5 20 16 36
A1BNYBF79IQ89T 33 3 36
A1CNTYNF0MIBK8 14 22 36
A1EBGIM9CZTQLX 0 36 36
A1F3XD5WEK7H7E 18 18 36
A1G4MDRJ3Y36DA 16 20 36
A1P2RQ166VS5BT 8 28 36
A25N8DM670K4MA 17 19 36
A26LOVXF4QZZCO 0 36 36
A28AXX4NCWPH1F 2 34 36
A28RKM9Q1FIZEK 22 14 36
A2F2DDH12YU4AK 9 27 36
A2MUCL20GTQJA0 0 36 36
A2NO3OTBZYY43Z 1 35 36
A2V27A9GZA1NR2 1 35 36
A386U6K4J1ANQI 21 15 36
A3B9QNYDDXGL9I 3 33 36
A3C9SDM1TKNS65 21 15 36
A3CP03KUNUMEWF 5 31 36
A3I448WWTX2A2D 15 21 36
A3IBAGEKMDEGSI 21 15 36
A3JMLA0FWLP0WA 19 17 36
A3KCAT0C3NWVDX 32 4 36
A3PLWSCPFLCEGI 0 36 36
A3QT2DS5XYKYHU 7 29 36
A4I69DE7BI20I 24 12 36
A8C3WNWRBWUXO 0 36 36
AA8PZKO9XGCKO 1 35 36
ABYFMKPTF0SHG 10 26 36
AMPMTF5IAAMK8 24 12 36
ANV9R86DM2YG5 14 22 36
AQMJMYR9MANOG 20 16 36
ASL9UNVWVM9AW 16 20 36

Divide the number of accurate catch trials by the number of total catch trials for each participant. The resulting value will be the subjects catch trial rate.

tbl_all_catch_acc_rate <- (tbl_good_catch_acc_all_main_acc_counts$acc_catch / tbl_good_catch_acc_all_main_acc_counts$total_catch)
tbl_all_catch_acc_rate <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_counts[,1], tbl_all_catch_acc_rate)
colnames(tbl_all_catch_acc_rate) <- c("workerId", "catch_rate")
kable(tbl_all_catch_acc_rate)
workerId catch_rate
A10AVWALIHR4UQ 1.0000000
A12FTSX85NQ8N9 0.4166667
A1916FQFP74ED6 0.7222222
A198MSVO1VTAT5 0.4444444
A1BNYBF79IQ89T 0.0833333
A1CNTYNF0MIBK8 0.6111111
A1EBGIM9CZTQLX 1.0000000
A1F3XD5WEK7H7E 0.5000000
A1G4MDRJ3Y36DA 0.5555556
A1P2RQ166VS5BT 0.7777778
A25N8DM670K4MA 0.5277778
A26LOVXF4QZZCO 1.0000000
A28AXX4NCWPH1F 0.9444444
A28RKM9Q1FIZEK 0.3888889
A2F2DDH12YU4AK 0.7500000
A2MUCL20GTQJA0 1.0000000
A2NO3OTBZYY43Z 0.9722222
A2V27A9GZA1NR2 0.9722222
A386U6K4J1ANQI 0.4166667
A3B9QNYDDXGL9I 0.9166667
A3C9SDM1TKNS65 0.4166667
A3CP03KUNUMEWF 0.8611111
A3I448WWTX2A2D 0.5833333
A3IBAGEKMDEGSI 0.4166667
A3JMLA0FWLP0WA 0.4722222
A3KCAT0C3NWVDX 0.1111111
A3PLWSCPFLCEGI 1.0000000
A3QT2DS5XYKYHU 0.8055556
A4I69DE7BI20I 0.3333333
A8C3WNWRBWUXO 1.0000000
AA8PZKO9XGCKO 0.9722222
ABYFMKPTF0SHG 0.7222222
AMPMTF5IAAMK8 0.3333333
ANV9R86DM2YG5 0.6111111
AQMJMYR9MANOG 0.4444444
ASL9UNVWVM9AW 0.5555556

Plot the group’s overall accuracy on the catch trials.

tbl_all_catch_acc_rate %>% 
  ggbarplot(y = "catch_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", add = "mean_se", ylim = c(0, 1), xlab = "Group", width = 0.5, label = TRUE, lab.nb.digits = 2, lab.vjust = -2, title = "Group Catch Performance")

Let’s also take a look at each individual subject’s catch trial performance rate.

tbl_all_catch_acc_rate %>% 
  ggbarplot(x = "workerId", y = "catch_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", ylim = c(0, 1), title = "Individual Catch Performance", sort.val = c("asc"), font.xtickslab = 8) + rotate_x_text()+ geom_hline(yintercept = 0.5, linetype = 2)

4.2 Accuracy on change trials

For the rest of the analyses, focus on the participants with good catch rate performance. Select the subjects with good catch trial rates from the original tbl_all.

#tbl_good_catch_acc_all_main_acc <- tbl_all[(tbl_all$workerId %in% tbl_good_catch_acc_rate$workerId),]
tbl_good_catch_acc_all_main_acc <- tbl_all[(tbl_all$workerId %in% tbl_all_catch_acc_rate$workerId),]

Verify subject count.

nrow(tbl_good_catch_acc_all_main_acc %>% distinct(workerId,.keep_all = FALSE))
## [1] 36

Here, is a table containing the number of trials for each individual after excluding main trials based on accuracy. Again, there were 120 change main trials that were 60% valid (72 trials) and 40% invalid (24 trials for each type). This chunk will also bin the counts in a separate table.

tbl_good_catch_acc_all_main_acc_counts <- tbl_good_catch_acc_all_main_acc %>%
  group_by(workerId,validity) %>%
  filter(change_or_no_change == "change" & (validity=='valid' | validity=='invalid_same' | validity=='invalid_different') & acc == 1) %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_good_catch_acc_all_main_acc_counts$sum = rowSums(tbl_good_catch_acc_all_main_acc_counts[,c(-1)], na.rm = TRUE)
tbl_good_catch_acc_all_main_acc_counts[is.na(tbl_good_catch_acc_all_main_acc_counts)] <- 0
kable(tbl_good_catch_acc_all_main_acc_counts)
workerId invalid_different invalid_same valid sum
A10AVWALIHR4UQ 0 2 53 55
A12FTSX85NQ8N9 1 0 67 68
A1916FQFP74ED6 13 10 41 64
A198MSVO1VTAT5 7 3 69 79
A1BNYBF79IQ89T 13 13 26 52
A1CNTYNF0MIBK8 10 12 33 55
A1EBGIM9CZTQLX 14 13 42 69
A1F3XD5WEK7H7E 3 1 57 61
A1G4MDRJ3Y36DA 9 8 37 54
A1P2RQ166VS5BT 1 2 54 57
A25N8DM670K4MA 13 17 36 66
A26LOVXF4QZZCO 8 13 56 77
A28AXX4NCWPH1F 1 0 71 72
A28RKM9Q1FIZEK 11 10 26 47
A2F2DDH12YU4AK 11 8 26 45
A2MUCL20GTQJA0 15 12 56 83
A2NO3OTBZYY43Z 14 15 52 81
A2V27A9GZA1NR2 21 15 49 85
A386U6K4J1ANQI 14 11 47 72
A3B9QNYDDXGL9I 16 15 43 74
A3C9SDM1TKNS65 10 14 37 61
A3CP03KUNUMEWF 17 15 45 77
A3I448WWTX2A2D 14 14 39 67
A3IBAGEKMDEGSI 9 11 33 53
A3JMLA0FWLP0WA 7 12 39 58
A3KCAT0C3NWVDX 4 6 12 22
A3PLWSCPFLCEGI 21 24 70 115
A3QT2DS5XYKYHU 7 7 16 30
A4I69DE7BI20I 8 8 21 37
A8C3WNWRBWUXO 8 6 63 77
AA8PZKO9XGCKO 22 20 66 108
ABYFMKPTF0SHG 10 13 37 60
AMPMTF5IAAMK8 4 8 61 73
ANV9R86DM2YG5 10 15 27 52
AQMJMYR9MANOG 14 12 34 60
ASL9UNVWVM9AW 9 11 40 60
tbl_good_catch_acc_all_main_acc_counts_bin <- tbl_good_catch_acc_all_main_acc %>%
  filter(bin != 0) %>% 
  group_by(workerId,validity,bin, .drop=FALSE) %>%
  filter(change_or_no_change == "change" & (validity=='valid' | validity=='invalid_same' | validity=='invalid_different') & acc == 1, .preserve = TRUE) %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_good_catch_acc_all_main_acc_counts_bin[is.na(tbl_good_catch_acc_all_main_acc_counts_bin)] <- 0
tbl_good_catch_acc_all_main_acc_counts_bin$sum = rowSums(tbl_good_catch_acc_all_main_acc_counts_bin[,c(-1:-2)], na.rm = TRUE)

And let’s check the number of subjects we are now working with.

nrow(tbl_good_catch_acc_all_main_acc_counts %>% distinct(workerId,.keep_all = FALSE))
## [1] 36

Get the original number of trials for the relevant subjects.

tbl_good_catch_acc_all_main_acc_counts_original <- tbl_all_change_counts_no_catch[(tbl_all_change_counts_no_catch$workerId %in% tbl_good_catch_acc_all_main_acc_counts$workerId),]
kable(tbl_good_catch_acc_all_main_acc_counts_original)
workerId invalid_different invalid_same valid sum
A10AVWALIHR4UQ 24 24 72 120
A12FTSX85NQ8N9 24 24 72 120
A1916FQFP74ED6 24 24 72 120
A198MSVO1VTAT5 24 24 72 120
A1BNYBF79IQ89T 24 24 72 120
A1CNTYNF0MIBK8 24 24 72 120
A1EBGIM9CZTQLX 24 24 72 120
A1F3XD5WEK7H7E 24 24 72 120
A1G4MDRJ3Y36DA 24 24 72 120
A1P2RQ166VS5BT 24 24 72 120
A25N8DM670K4MA 24 24 71 119
A26LOVXF4QZZCO 24 24 72 120
A28AXX4NCWPH1F 24 24 72 120
A28RKM9Q1FIZEK 24 24 72 120
A2F2DDH12YU4AK 24 24 72 120
A2MUCL20GTQJA0 24 24 72 120
A2NO3OTBZYY43Z 24 24 72 120
A2V27A9GZA1NR2 24 24 72 120
A386U6K4J1ANQI 24 24 72 120
A3B9QNYDDXGL9I 24 24 72 120
A3C9SDM1TKNS65 24 24 72 120
A3CP03KUNUMEWF 24 24 72 120
A3I448WWTX2A2D 24 24 72 120
A3IBAGEKMDEGSI 24 24 72 120
A3JMLA0FWLP0WA 24 24 72 120
A3KCAT0C3NWVDX 24 24 72 120
A3PLWSCPFLCEGI 24 24 72 120
A3QT2DS5XYKYHU 24 24 72 120
A4I69DE7BI20I 24 24 73 121
A8C3WNWRBWUXO 24 24 72 120
AA8PZKO9XGCKO 24 24 72 120
ABYFMKPTF0SHG 24 24 72 120
AMPMTF5IAAMK8 24 24 72 120
ANV9R86DM2YG5 24 24 72 120
AQMJMYR9MANOG 24 24 72 120
ASL9UNVWVM9AW 24 24 72 120

Plot the overall accuracy at the group level (collasped across workerId and condition).

tbl_overall_good_acc <- (tbl_good_catch_acc_all_main_acc_counts$sum / tbl_good_catch_acc_all_main_acc_counts_original$sum)
tbl_overall_good_acc <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_counts[,1], tbl_overall_good_acc)
colnames(tbl_overall_good_acc) <- c("workerId", "main_rate")
kable(tbl_overall_good_acc)
workerId main_rate
A10AVWALIHR4UQ 0.4583333
A12FTSX85NQ8N9 0.5666667
A1916FQFP74ED6 0.5333333
A198MSVO1VTAT5 0.6583333
A1BNYBF79IQ89T 0.4333333
A1CNTYNF0MIBK8 0.4583333
A1EBGIM9CZTQLX 0.5750000
A1F3XD5WEK7H7E 0.5083333
A1G4MDRJ3Y36DA 0.4500000
A1P2RQ166VS5BT 0.4750000
A25N8DM670K4MA 0.5546218
A26LOVXF4QZZCO 0.6416667
A28AXX4NCWPH1F 0.6000000
A28RKM9Q1FIZEK 0.3916667
A2F2DDH12YU4AK 0.3750000
A2MUCL20GTQJA0 0.6916667
A2NO3OTBZYY43Z 0.6750000
A2V27A9GZA1NR2 0.7083333
A386U6K4J1ANQI 0.6000000
A3B9QNYDDXGL9I 0.6166667
A3C9SDM1TKNS65 0.5083333
A3CP03KUNUMEWF 0.6416667
A3I448WWTX2A2D 0.5583333
A3IBAGEKMDEGSI 0.4416667
A3JMLA0FWLP0WA 0.4833333
A3KCAT0C3NWVDX 0.1833333
A3PLWSCPFLCEGI 0.9583333
A3QT2DS5XYKYHU 0.2500000
A4I69DE7BI20I 0.3057851
A8C3WNWRBWUXO 0.6416667
AA8PZKO9XGCKO 0.9000000
ABYFMKPTF0SHG 0.5000000
AMPMTF5IAAMK8 0.6083333
ANV9R86DM2YG5 0.4333333
AQMJMYR9MANOG 0.5000000
ASL9UNVWVM9AW 0.5000000
tbl_overall_good_acc %>% 
  ggbarplot(y = "main_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", add = "mean_se", ylim = c(0, 1), xlab = "Group", width = 0.5, label = TRUE, lab.nb.digits = 2, lab.vjust = -2, title = "Main Trial Accuracy")

4.2.0.1 Main trial accuracy relative to chance performance

chance <- t.test(tbl_overall_good_acc$main_rate, mu = .50, alternative="greater")
chance
## 
##  One Sample t-test
## 
## data:  tbl_overall_good_acc$main_rate
## t = 1.5059, df = 35, p-value = 0.07053
## alternative hypothesis: true mean is greater than 0.5
## 95 percent confidence interval:
##  0.495306      Inf
## sample estimates:
## mean of x 
## 0.5384835

4.2.1 Accuracy over time

Look at the overall accuracy at the group level (collasped across workerId and condition) over time.

tbl_good_no_NA_bin <- tbl_good_catch_acc_all_main_acc %>%
  group_by(workerId,validity,bin) %>%
  filter(validity=='valid' | validity=='invalid_same' | validity=='invalid_different') %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_good_no_NA_bin$sum = rowSums(tbl_good_no_NA_bin[,c(-1:-2)], na.rm = TRUE)
#head(tbl_good_no_NA_bin,10)

tbl_overall_good_acc_bin <- (tbl_good_catch_acc_all_main_acc_counts_bin$sum / tbl_all_change_counts_no_catch_bin$sum)
tbl_overall_good_acc_bin <- cbind.data.frame(tbl_good_no_NA_bin[,1:2], tbl_overall_good_acc_bin)
colnames(tbl_overall_good_acc_bin) <- c("workerId", "bin", "ACC")
tbl_overall_good_acc_bin %>% 
  ggline(y = "ACC", x = "bin", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", add = "mean_se", ylim = c(0, 1), xlab = " Bin", title = "Main Trial Accuracy Over Time", na.rm = TRUE)

Here are some descriptive and inferential statistics (repeated measures ANOVA and post-hoc t-tests) for the effect of accuracy over time.

aov_acc_time <- aov(ACC ~ bin + Error(factor(workerId)/bin), tbl_overall_good_acc_bin)
summary(aov_acc_time)
## 
## Error: factor(workerId)
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35  5.782  0.1652               
## 
## Error: factor(workerId):bin
##           Df Sum Sq Mean Sq F value Pr(>F)
## bin        1  0.000 0.00001       0  0.989
## Residuals 35  1.269 0.03626               
## 
## Error: Within
##            Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 180  3.674 0.02041
pwc_acc_time <- tbl_overall_good_acc_bin %>%
  pairwise_t_test(
    ACC ~ bin, paired = TRUE,
    p.adjust.method = "bonferroni"
    )
pwc_acc_time
## # A tibble: 21 x 10
##    .y.   group1 group2    n1    n2 statistic    df     p p.adj p.adj.signif
##  * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
##  1 ACC   1      2         36    36   -0.263     35 0.794     1 ns          
##  2 ACC   1      3         36    36   -1.60      35 0.118     1 ns          
##  3 ACC   1      4         36    36   -0.763     35 0.45      1 ns          
##  4 ACC   1      5         36    36   -0.718     35 0.478     1 ns          
##  5 ACC   1      6         36    36   -0.563     35 0.577     1 ns          
##  6 ACC   1      7         36    36   -0.0102    35 0.992     1 ns          
##  7 ACC   2      3         36    36   -1.13      35 0.265     1 ns          
##  8 ACC   2      4         36    36   -0.357     35 0.723     1 ns          
##  9 ACC   2      5         36    36   -0.354     35 0.725     1 ns          
## 10 ACC   2      6         36    36   -0.386     35 0.702     1 ns          
## # … with 11 more rows

4.2.2 Accuracy by validty

Look at the overall accuracy for the group by validity (valid, invalid-same etc.).

tbl_overall_good_acc_cond <- (tbl_good_catch_acc_all_main_acc_counts[-1] / tbl_good_catch_acc_all_main_acc_counts_original[-1])
tbl_overall_good_acc_cond <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_counts[,1], tbl_overall_good_acc_cond)
tbl_overall_good_acc_cond <- gather(tbl_overall_good_acc_cond, validity, acc, valid:invalid_different, factor_key=TRUE)
tbl_overall_good_acc_cond %>%   
  ggbarplot(x = "validity", y = "acc", ylab = "Accuracy", fill = "validity" , color = "validity", palette = c("#0d2240", "#00a8e1", "#f7a800", "#E31818", "#dfdddc"), add = "mean_se", ylim = c(0, 1), na.rm = TRUE, label = TRUE, lab.nb.digits = 2, lab.vjust = c(-2.5, -2.5, -2.5), title = "Main Trial Accuracy By Validity", xlab = "Validity")

Here are some descriptive and inferential statistics (repeated measures ANOVA and post-hoc t-tests) for the effect of accuracy by validty.

tbl_overall_good_acc_cond %>%
  group_by(validity) %>%
  get_summary_stats(acc, type = "mean_se")
## # A tibble: 3 x 5
##   validity          variable     n  mean    se
##   <fct>             <chr>    <dbl> <dbl> <dbl>
## 1 valid             acc         36 0.61  0.036
## 2 invalid_same      acc         36 0.435 0.038
## 3 invalid_different acc         36 0.427 0.039
aov_acc_validity <- aov(acc ~ validity + Error(factor(workerId)/validity), tbl_overall_good_acc_cond)
summary(aov_acc_validity)
## 
## Error: factor(workerId)
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35  2.742 0.07834               
## 
## Error: factor(workerId):validity
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## validity   2 0.7693  0.3847    9.96 0.000156 ***
## Residuals 70 2.7035  0.0386                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tbl_overall_good_acc_cond %>% 
  filter(validity == "valid" | validity == "invalid_same") %>%
  with(t.test(acc~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  acc by validity
## t = 3.1254, df = 35, p-value = 0.00356
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.06127752 0.28842879
## sample estimates:
## mean of the differences 
##               0.1748532
tbl_overall_good_acc_cond %>% 
  filter(validity == "invalid_same" | validity == "invalid_different") %>%
  with(t.test(acc~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  acc by validity
## t = 0.41573, df = 35, p-value = 0.6801
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03146148  0.04766518
## sample estimates:
## mean of the differences 
##             0.008101852

4.2.3 Individual subject accuracy

Third, we can look at the accuracy for each individual subject. The dashed line at 0.50 represents chance.

tbl_overall_good_acc %>% 
  ggbarplot(x = "workerId", y = "main_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", ylim = c(0, 1), title = "Individual Accuracy", sort.val = c("asc"), font.xtickslab = 8) + rotate_x_text() + geom_hline(yintercept = .5, linetype = 2)

4.3 Accuracy on no-change trials

Count the count of accurate no-change trials. The subject pressed ‘J’.

tbl_good_catch_acc_all_main_no_change_acc_counts <- tbl_all_no_change_acc %>%
  filter(validity=='valid' & acc == 1) %>% 
  group_by(workerId) %>%
  dplyr::summarize(counts = n())
tbl_good_catch_acc_all_main_no_change_acc_counts[is.na(tbl_good_catch_acc_all_main_no_change_acc_counts)] <- 0
colnames(tbl_good_catch_acc_all_main_no_change_acc_counts) <- c("workerId", "acc_no_change")
kable(tbl_good_catch_acc_all_main_no_change_acc_counts)
workerId acc_no_change
A10AVWALIHR4UQ 40
A12FTSX85NQ8N9 37
A1916FQFP74ED6 12
A198MSVO1VTAT5 33
A1BNYBF79IQ89T 1
A1CNTYNF0MIBK8 19
A1EBGIM9CZTQLX 37
A1F3XD5WEK7H7E 37
A1G4MDRJ3Y36DA 19
A1P2RQ166VS5BT 38
A25N8DM670K4MA 23
A26LOVXF4QZZCO 40
A28AXX4NCWPH1F 39
A28RKM9Q1FIZEK 16
A2F2DDH12YU4AK 36
A2MUCL20GTQJA0 38
A2NO3OTBZYY43Z 40
A2V27A9GZA1NR2 28
A386U6K4J1ANQI 15
A3B9QNYDDXGL9I 36
A3C9SDM1TKNS65 18
A3CP03KUNUMEWF 8
A3I448WWTX2A2D 14
A3IBAGEKMDEGSI 23
A3JMLA0FWLP0WA 22
A3KCAT0C3NWVDX 12
A3PLWSCPFLCEGI 40
A3QT2DS5XYKYHU 23
A4I69DE7BI20I 27
A8C3WNWRBWUXO 39
AA8PZKO9XGCKO 40
ABYFMKPTF0SHG 16
AMPMTF5IAAMK8 33
ANV9R86DM2YG5 24
AQMJMYR9MANOG 24
ASL9UNVWVM9AW 21

Compute the no-change accuracy rate.

tbl_no_change_acc_rate <- (tbl_good_catch_acc_all_main_no_change_acc_counts$acc_no_change / tbl_all_no_change_counts$no_change_total_sum)
tbl_no_change_acc_rate <- cbind.data.frame(tbl_good_catch_acc_all_main_no_change_acc_counts[,1], tbl_no_change_acc_rate)
colnames(tbl_no_change_acc_rate) <- c("workerId", "no_change_rate")
kable(tbl_no_change_acc_rate)
workerId no_change_rate
A10AVWALIHR4UQ 1.000
A12FTSX85NQ8N9 0.925
A1916FQFP74ED6 0.300
A198MSVO1VTAT5 0.825
A1BNYBF79IQ89T 0.025
A1CNTYNF0MIBK8 0.475
A1EBGIM9CZTQLX 0.925
A1F3XD5WEK7H7E 0.925
A1G4MDRJ3Y36DA 0.475
A1P2RQ166VS5BT 0.950
A25N8DM670K4MA 0.575
A26LOVXF4QZZCO 1.000
A28AXX4NCWPH1F 0.975
A28RKM9Q1FIZEK 0.400
A2F2DDH12YU4AK 0.900
A2MUCL20GTQJA0 0.950
A2NO3OTBZYY43Z 1.000
A2V27A9GZA1NR2 0.700
A386U6K4J1ANQI 0.375
A3B9QNYDDXGL9I 0.900
A3C9SDM1TKNS65 0.450
A3CP03KUNUMEWF 0.200
A3I448WWTX2A2D 0.350
A3IBAGEKMDEGSI 0.575
A3JMLA0FWLP0WA 0.550
A3KCAT0C3NWVDX 0.300
A3PLWSCPFLCEGI 1.000
A3QT2DS5XYKYHU 0.575
A4I69DE7BI20I 0.675
A8C3WNWRBWUXO 0.975
AA8PZKO9XGCKO 1.000
ABYFMKPTF0SHG 0.400
AMPMTF5IAAMK8 0.825
ANV9R86DM2YG5 0.600
AQMJMYR9MANOG 0.600
ASL9UNVWVM9AW 0.525

Generate plots of the average no-change accuracy rate for the group and each individual’s no-change accuracy rate.

tbl_no_change_acc_rate %>% 
  ggbarplot(y = "no_change_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", add = "mean_se", ylim = c(0, 1), xlab = "Group", width = 0.5, label = TRUE, lab.nb.digits = 2, lab.vjust = -2, title = "No Change Trial Accuracy")

tbl_no_change_acc_rate %>% 
  ggbarplot(x = "workerId", y = "no_change_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", ylim = c(0, 1), title = "Individual No Change Trial Accuracy", sort.val = c("asc"), font.xtickslab = 8) + rotate_x_text() + geom_hline(yintercept = .5, linetype = 2)

4.4 Correlations between catch, change, and no-change trials

tbl_corr <- cbind.data.frame(tbl_all_catch_acc_rate[2], tbl_overall_good_acc[2], tbl_no_change_acc_rate[2])
tbl_corr_output <- rcorr(as.matrix(tbl_corr))
tbl_corr_output
##                catch_rate main_rate no_change_rate
## catch_rate           1.00      0.57           0.62
## main_rate            0.57      1.00           0.46
## no_change_rate       0.62      0.46           1.00
## 
## n= 36 
## 
## 
## P
##                catch_rate main_rate no_change_rate
## catch_rate                0.0003    0.0000        
## main_rate      0.0003               0.0047        
## no_change_rate 0.0000     0.0047
tbl_corr %>% 
  ggscatter(x = "catch_rate", y = "main_rate", add = "reg.line", title = "Correlation Between Catch and Main Trial Accuracies", ylim = c(0, 1), xlim = c(0, 1), add.params = list(color = "blue", fill = "lightgray")) + stat_cor(method = "pearson")
## `geom_smooth()` using formula 'y ~ x'

tbl_corr %>% 
  ggscatter(x = "catch_rate", y = "no_change_rate", add = "reg.line", title = "Correlation Between Catch and No Change Trial Accuracies", ylim = c(0, 1), xlim = c(0, 1), add.params = list(color = "blue", fill = "lightgray")) + stat_cor(method = "pearson")
## `geom_smooth()` using formula 'y ~ x'

tbl_corr %>% 
  ggscatter(x = "main_rate", y = "no_change_rate", add = "reg.line", title = "Correlation Between Main Trial and No Change Trial Accuracies", ylim = c(0, 1), xlim = c(0, 1), add.params = list(color = "blue", fill = "lightgray")) + stat_cor(method = "pearson")
## `geom_smooth()` using formula 'y ~ x'

4.5 Difference between catch, change, and no-change trials

tbl_corr <- gather(tbl_corr, rate_type, rate, catch_rate:no_change_rate, factor_key=TRUE)

tbl_corr %>% 
  filter(rate_type == "catch_rate" | rate_type == "main_rate") %>%
  with(t.test(rate~rate_type,paired=TRUE))
## 
##  Paired t-test
## 
## data:  rate by rate_type
## t = 3.1615, df = 35, p-value = 0.003235
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.04228157 0.19402299
## sample estimates:
## mean of the differences 
##               0.1181523
tbl_corr %>% 
  filter(rate_type == "catch_rate" | rate_type == "no_change_rate") %>%
  with(t.test(rate~rate_type,paired=TRUE))
## 
##  Paired t-test
## 
## data:  rate by rate_type
## t = -0.38891, df = 35, p-value = 0.6997
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.09694738  0.06577454
## sample estimates:
## mean of the differences 
##             -0.01558642
tbl_corr %>% 
  filter(rate_type == "main_rate" | rate_type == "no_change_rate") %>%
  with(t.test(rate~rate_type,paired=TRUE))
## 
##  Paired t-test
## 
## data:  rate by rate_type
## t = -3.2483, df = 35, p-value = 0.002564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.21732142 -0.05015597
## sample estimates:
## mean of the differences 
##              -0.1337387

5 Analyze data

5.1 Summary statistics

Confirm subject count.

tbl_all_dprime <- tbl_all[(tbl_all$workerId %in% tbl_good_catch_acc_all_main_acc_counts$workerId),] 
nrow(data.frame(tbl_all_dprime %>% distinct(workerId,.keep_all = FALSE)))
## [1] 36

5.2 Calculate signal detection parameters

5.2.1 d’

First, get signal detection counts for each subject (hits, misses, false alarms, and correct rejections), as well as the total number of change and no-change trials.

tbl_all_dprime_hit_counts <- tbl_all_dprime %>%
  group_by(workerId, validity) %>% 
  filter((validity == "valid" | validity == "invalid_same" | validity == "invalid_different") & change_or_no_change == "change" & key == "F", .preserve = TRUE) %>%
  dplyr::summarize(counts = n()) %>% 
  spread(validity, counts)
colnames(tbl_all_dprime_hit_counts)[-1] <- paste(colnames(tbl_all_dprime_hit_counts)[-1], "hit", sep = "_")

tbl_all_dprime_miss_counts <- tbl_all_dprime %>%
  group_by(workerId, validity) %>% 
  filter((validity == "valid" | validity == "invalid_same" | validity == "invalid_different") & change_or_no_change == "change" & key == "J", .preserve = TRUE) %>%
  dplyr::summarize(counts = n()) %>% 
  spread(validity, counts)
colnames(tbl_all_dprime_miss_counts)[-1] <- paste(colnames(tbl_all_dprime_miss_counts)[-1], "miss", sep = "_")

tbl_all_dprime_falsealarm_counts <- tbl_all_dprime %>%
  group_by(workerId, validity, .drop=FALSE) %>% 
  filter((validity != "practice_main") & change_or_no_change == "no_change" & key == "F", .preserve = TRUE) %>%
  dplyr::summarize(counts = n()) %>% 
  spread(validity, counts)
colnames(tbl_all_dprime_falsealarm_counts)[-1] <- paste(colnames(tbl_all_dprime_falsealarm_counts)[-1], "false_alarm", sep = "_")

tbl_all_dprime_correctrejection_counts <- tbl_all_dprime %>%
  group_by(workerId, validity) %>% 
  filter((validity != "practice_main") & change_or_no_change == "no_change" & key == "J", .preserve = TRUE) %>%
  dplyr::summarize(counts = n()) %>% 
  spread(validity, counts)
colnames(tbl_all_dprime_correctrejection_counts)[-1] <- paste(colnames(tbl_all_dprime_correctrejection_counts)[-1], "correct_rejection", sep = "_")

tbl_num_change_trials <- tbl_all %>%
  group_by(workerId,validity) %>%
  filter(change_or_no_change == "change" & (validity=='valid' | validity=='invalid_same' | validity=='invalid_different')) %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
tbl_all_change_counts_no_catch$sum = rowSums(tbl_all_change_counts_no_catch[,c(-1)], na.rm = TRUE)
#kable(tbl_num_change_trials)

tbl_num_no_change_trials <- tbl_all %>%
  group_by(workerId,validity) %>%
  filter(change_or_no_change == "no_change" & validity == "valid") %>%
  dplyr::summarize(counts = n()) %>%
  spread(validity,counts)
colnames(tbl_num_no_change_trials) <- c("workerId", "no_change")
#kable(tbl_num_no_change_trials)

tbl_paramters <- cbind.data.frame(tbl_all_dprime_hit_counts[c(-2,-5,-6)], tbl_all_dprime_miss_counts[c(-1,-2,-5,-6)], tbl_all_dprime_falsealarm_counts[7], tbl_all_dprime_correctrejection_counts[7], tbl_num_change_trials[-1], tbl_num_no_change_trials[-1])

Next, Compute valid, invalid-same, and invalid-different hit rates, as well as false-alarm rate.

tbl_paramters <- tbl_paramters %>% 
  mutate(valid_hit_rate = valid_hit/valid) %>%
  mutate(invalid_same_hit_rate = invalid_same_hit/invalid_same) %>% 
  mutate(invalid_different_hit_rate = invalid_different_hit/invalid_different) %>%
  mutate(false_alarm_rate = valid_false_alarm/no_change)

Before z-transforming, hit and false-alarm rates of 0% are converted to 1% and rates of 100% are converted to 99% to prevent the z-transformation from resulting in -infinity or infinity.

tbl_paramters$valid_hit_rate[tbl_paramters$valid_hit_rate == 0] <- .01
tbl_paramters$valid_hit_rate[tbl_paramters$valid_hit_rate == 1] <- .99
tbl_paramters$invalid_same_hit_rate[tbl_paramters$invalid_same_hit_rate == 0] <- .01
tbl_paramters$invalid_same_hit_rate[tbl_paramters$invalid_same_hit_rate == 1] <- .99
tbl_paramters$invalid_different_hit_rate[tbl_paramters$invalid_different_hit_rate == 0] <- .01
tbl_paramters$invalid_different_hit_rate[tbl_paramters$invalid_different_hit_rate == 1] <- .99
tbl_paramters$false_alarm_rate[tbl_paramters$false_alarm_rate == 0] <- .01
tbl_paramters$false_alarm_rate[tbl_paramters$false_alarm_rate == 1] <- .99

Finally, perform z-transformation and compute d-prime values.

tbl_paramters <- tbl_paramters %>% 
  mutate(z_valid_hit_rate = qnorm(valid_hit_rate)) %>% 
  mutate(z_invalid_same_hit_rate = qnorm(invalid_same_hit_rate)) %>% 
  mutate(z_invalid_different_hit_rate = qnorm(invalid_different_hit_rate)) %>% 
  mutate(z_false_alarm_rate = qnorm(false_alarm_rate)) %>% 
  mutate(valid_d_prime = z_valid_hit_rate - z_false_alarm_rate) %>% 
  mutate(invalid_same_d_prime = z_invalid_same_hit_rate - z_false_alarm_rate) %>% 
  mutate(invalid_different_d_prime = z_invalid_different_hit_rate - z_false_alarm_rate)

5.2.2 c

tbl_paramters <- tbl_paramters %>% 
  mutate(valid_c = (z_valid_hit_rate + z_false_alarm_rate) * -0.5) %>% 
  mutate(invalid_same_c = (z_invalid_same_hit_rate + z_false_alarm_rate) * -0.5) %>% 
  mutate(invalid_different_c = (z_invalid_different_hit_rate - z_false_alarm_rate) * -0.5)

5.2.3 β

tbl_paramters <- tbl_paramters %>% 
  mutate(valid_β = exp((z_false_alarm_rate^2 - z_valid_hit_rate^2)/2)) %>% 
  mutate(invalid_same_β = exp((z_false_alarm_rate^2 - z_invalid_same_hit_rate^2)/2)) %>% 
  mutate(invalid_different_β = exp((z_false_alarm_rate^2 - z_invalid_different_hit_rate^2)/2))

5.3 d’

5.3.1 Plot

tbl_paramters_dprime <- gather(tbl_paramters, validity, dprime, valid_d_prime:invalid_different_d_prime, factor_key=TRUE)
tbl_paramters_dprime <- cbind(tbl_paramters_dprime[c(1, 28, 29)])
tbl_paramters_dprime %>% 
  ggbarplot(x = "validity", y = "dprime", ylab = "d'", fill = "validity" , color = "validity", palette = c("#0d2240", "#00a8e1", "#f7a800"), add = "mean_se", ylim = c(0, 1.5), label = TRUE, lab.nb.digits = 2, lab.vjust = c(-7, -6, -6))

5.3.2 Repeated-measures ANOVA

tbl_paramters_dprime %>%
  group_by(validity) %>%
  get_summary_stats(dprime, type = "mean_se")
## # A tibble: 3 x 5
##   validity                  variable     n  mean    se
##   <fct>                     <chr>    <dbl> <dbl> <dbl>
## 1 valid_d_prime             dprime      36 1.06  0.278
## 2 invalid_same_d_prime      dprime      36 0.448 0.216
## 3 invalid_different_d_prime dprime      36 0.425 0.205
aov_dprime <- aov(dprime ~ validity + Error(factor(workerId)/validity), tbl_paramters_dprime)
summary(aov_dprime)
## 
## Error: factor(workerId)
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35  173.4   4.954               
## 
## Error: factor(workerId):validity
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## validity   2   9.17   4.585   9.028 0.000325 ***
## Residuals 70  35.55   0.508                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.3.3 Pairwise comparisons

tbl_paramters_dprime %>% 
  filter(validity == "valid_d_prime" | validity == "invalid_same_d_prime") %>%
  with(t.test(dprime~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  dprime by validity
## t = 2.9412, df = 35, p-value = 0.005764
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1878313 1.0249134
## sample estimates:
## mean of the differences 
##               0.6063723
tbl_paramters_dprime %>% 
  filter(validity == "invalid_same_d_prime" | validity == "invalid_different_d_prime") %>%
  with(t.test(dprime~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  dprime by validity
## t = 0.30395, df = 35, p-value = 0.763
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1297602  0.1754585
## sample estimates:
## mean of the differences 
##              0.02284917

5.4 c

5.4.1 Plot

tbl_paramters_c <- gather(tbl_paramters, validity, c, valid_c:invalid_different_c, factor_key=TRUE)
tbl_paramters_c <- cbind(tbl_paramters_c[c(1, 28, 29)])
tbl_paramters_c %>% 
  ggbarplot(x = "validity", y = "c", ylab = "c", fill = "validity" , color = "validity", palette = c("#0d2240", "#00a8e1", "#f7a800"), add = "mean_se", ylim = c(-1, 1), label = TRUE, lab.nb.digits = 2, lab.vjust = c(-2, -3, -1))

5.4.2 Repeated-measures ANOVA

tbl_paramters_c %>%
  group_by(validity) %>%
  get_summary_stats(c, type = "mean_se")
## # A tibble: 3 x 5
##   validity            variable     n   mean    se
##   <fct>               <chr>    <dbl>  <dbl> <dbl>
## 1 valid_c             c           36  0.154 0.067
## 2 invalid_same_c      c           36  0.457 0.123
## 3 invalid_different_c c           36 -0.213 0.103
aov_c <- aov(c ~ validity + Error(factor(workerId)/validity), tbl_paramters_c)
summary(aov_c)
## 
## Error: factor(workerId)
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35  10.01   0.286               
## 
## Error: factor(workerId):validity
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## validity   2  8.097   4.049   10.14 0.000135 ***
## Residuals 70 27.940   0.399                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.4.3 Pairwise comparisons

tbl_paramters_c %>% 
  filter(validity == "valid_c" | validity == "invalid_same_c") %>%
  with(t.test(c~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  c by validity
## t = -2.9412, df = 35, p-value = 0.005764
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.51245669 -0.09391566
## sample estimates:
## mean of the differences 
##              -0.3031862
tbl_paramters_c %>% 
  filter(validity == "invalid_same_c" | validity == "invalid_different_c") %>%
  with(t.test(c~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  c by validity
## t = 3.6754, df = 35, p-value = 0.0007894
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2997919 1.0396209
## sample estimates:
## mean of the differences 
##               0.6697064

5.5 β

tbl_paramters_β <- gather(tbl_paramters, validity, β, valid_β:invalid_different_β, factor_key=TRUE)
tbl_paramters_β <- cbind(tbl_paramters_β[c(1, 28, 29)])
tbl_paramters_β %>% 
  ggbarplot(x = "validity", y = "β", ylab = "β", fill = "validity" , color = "validity", palette = c("#0d2240", "#00a8e1", "#f7a800"), add = "mean_se", ylim = c(0, 4), label = TRUE, lab.nb.digits = 2, lab.vjust = c(-5, -5, -5))

5.5.1 Repeated-measures ANOVA

tbl_paramters_β %>%
  group_by(validity) %>%
  get_summary_stats(β, type = "mean_se")
## # A tibble: 3 x 5
##   validity            variable     n  mean    se
##   <fct>               <chr>    <dbl> <dbl> <dbl>
## 1 valid_β             β           36  2.53 0.532
## 2 invalid_same_β      β           36  2.57 0.591
## 3 invalid_different_β β           36  2.55 0.565
aov_β <- aov(β ~ validity + Error(factor(workerId)/validity), tbl_paramters_β)
summary(aov_β)
## 
## Error: factor(workerId)
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35   1082    30.9               
## 
## Error: factor(workerId):validity
##           Df Sum Sq Mean Sq F value Pr(>F)
## validity   2   0.04  0.0185   0.011  0.989
## Residuals 70 117.31  1.6758

5.5.2 Pairwise comparisons

tbl_paramters_β %>% 
  filter(validity == "valid_β" | validity == "invalid_same_β") %>%
  with(t.test(β~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  β by validity
## t = -0.17488, df = 35, p-value = 0.8622
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5703278  0.4798591
## sample estimates:
## mean of the differences 
##             -0.04523433
tbl_paramters_β %>% 
  filter(validity == "invalid_same_β" | validity == "invalid_different_β") %>%
  with(t.test(β~validity,paired=TRUE))
## 
##  Paired t-test
## 
## data:  β by validity
## t = 0.097901, df = 35, p-value = 0.9226
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5063759  0.5576901
## sample estimates:
## mean of the differences 
##              0.02565711