1 Set up R environment

library(tidyverse)
library(ggplot2)
library(ggpubr)
library(plyr)
library(magick)
library(png)
library(EBImage)
library(lme4)
library(lmerTest)

Set the R working drectory to the main experiment directory.

setwd("/Users/adambarnas/Box/Mudsplash/Results")

2 Format & manipulate raw data files

2.1 Read-in datafiles

First, read in the individual subject files (saved automatically on the server as csv files).

tbl_all <- list.files(path = "./Rensink_data/", pattern = "*.csv", full.names = T) %>%
  map_df(~read_csv(.))
tbl_all <- data.frame(tbl_all)
#head(tbl_all,10)

Get a count of the number of subjects.

nrow(tbl_all %>% distinct(workerId,.keep_all = FALSE))

## [1] 44

Next, rename the catch trials to the same convention as the main trials and break apart the unmod_image column into database (the lab where the stims come from) and image (the name of the image file).

tbl_all$unmod_image[tbl_all$unmod_image == "catchAirplane-a"] <- "wolfe_catchAirplane-a"
tbl_all$unmod_image[tbl_all$unmod_image == "catchBoat-a"] <- "wolfe_catchBoat-a"
tbl_all$unmod_image[tbl_all$unmod_image == "catchCow-a"] <- "wolfe_catchCow-a"
tbl_all$unmod_image <- lapply(tbl_all$unmod_image, gsub, pattern='-a', replacement='')
tbl_all <- tbl_all %>%
separate(unmod_image,into=c('database', 'image'), sep = "([\\_])", extra = "merge")
#head(tbl_all,10)

Let’s, for now, also assign the trials to bins based on the trial number.

tbl_all$bin = "block_0"
tbl_all[which(tbl_all$trial_number %in% c(3:8)), "bin"] = "block_1"
tbl_all[which(tbl_all$trial_number %in% c(9:14)), "bin"] = "block_2"
tbl_all[which(tbl_all$trial_number %in% c(15:20)), "bin"] = "block_3"
tbl_all[which(tbl_all$trial_number %in% c(21:26)), "bin"] = "block_4"
tbl_all[which(tbl_all$trial_number %in% c(27:32)), "bin"] = "block_5"

Get the total number of trials for each subject and the initial count for each image.

tbl_all_counts <- tbl_all %>%
  group_by(workerId,image) %>%
  filter(image!= "catchAirplane" & image!= "catchBoat" & image!= "catchCow") %>%
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_all_counts,10)

image_count_initial <- data.frame(image_count = colSums(tbl_all_counts[,2:49], na.rm = TRUE))
knitr::kable(image_count_initial)

	image_count
Amish	28
Army	30
Barns	31
BarnTrack	27
Barrels	29
Beach	27
Birds	27
Boat	27
Bus	28
Cactus	29
Camel	28
CanalBridge	29
Castle	27
Chopper	29
Cockpit	29
Description	27
Dinner	27
Diver	26
Eating	27
Egypt	26
FarmByPond	28
Farmer	28
Fishing	28
Floatplane	28
Fountain	29
Harbor	28
Horizon	28
Ice	28
Kayak	28
Kayaker	29
Kids	25
Lake	27
Market	27
Marling	26
Mosque	24
NotreDame	28
Nurses	26
Obelisk	26
OtherDiver	26
Pilots	28
Seal	27
Soldiers	28
Station	27
SummerLake	29
Turtle	27
Water	27
Window	26
Wine	26

2.2 Compute number of cycles

One cycle (A-BS-B-AS) lasted 4200 ms (2000-100-2000-100).

tbl_all$cycles <- tbl_all$rt / 4200

The data are loaded. Let’s move on and examine the quality of the data.

2.3 Analyze accuracy

In this chunk, every click for a given image is compared to the image difference hull. The process involves the addition of two arrays - the difference hull array and an array created by the script and the subject’s click. The difference hull array is composed of 0s and 1s, with 1s corresponding to the changing object. An equally sized array of all 0s is composed, with one 1 corresponding to the X,Y coordinates of the click. These two arrays are added together and the maximum value is queried. A maximum value of 2 indicates that the click occurred within the boundaries of the image difference hall (an accurate click). A values less than 2 indicates that the click occurred outside the boundaries of the image difference hall (an inaccurate click). In the new click_ACC column, 1s correspond to accurate clicks and 0s correspond to inaccurate clicks. This will analyze the accuracy for the 2 practice images, all main images, and the 1 catch image.

img_train <- list.files(path = "/Users/adambarnas/Box/Mudsplash/Boxes_Rensink/", pattern = ".png", all.files = TRUE,full.names = TRUE,no.. = TRUE)
img_array <- readPNG(img_train)
img_list <- lapply(img_train, readPNG)
img_names <- row.names(image_count_initial)
img_names <- c("catchAirplane", "catchBoat", "catchCow", img_names)
names(img_list) = img_names

tbl_all$x[tbl_all$x == "0"] <- 1
tbl_all$y[tbl_all$y == "0"] <- 1

tbl_all$click_ACC= "filler"

for (i in 1:length(tbl_all$workerId)){
  img <- data.frame(img_list[tbl_all$image[i]])
  blank <- data.frame(array(c(0,0), dim = c(nrow(img),ncol(img))))
  blank[tbl_all$y[i], tbl_all$x[i]] <- 1
  combo <- img + blank
  which(combo==2, arr.ind=TRUE)
  if (max(combo, na.rm=TRUE) == 2){
    tbl_all$click_ACC[i] = 1
  } else {
    tbl_all$click_ACC[i] = 0
  }
}

2.3.1 Catch trials

Check the accuracy of the catch trial. As a reminder, the catch trial was a large, salient changing object. If a subject did not click on the changing object during the catch trial, their performance on the main trials is likely poor and will be excluded. This chunk will filter the data by accuracy for both inaccurate (bad) catch trials and accurate (good) catch trials and save new dataframes. This chunk will also provide the number and workerIds for inaccurate and accurate catch trial performance.

tbl_all_catch_acc <- tbl_all %>%
  filter(image == "catchCow")
tbl_bad_catch_acc <- tbl_all_catch_acc %>%
  filter(click_ACC == 0)
tbl_good_catch_acc <- tbl_all_catch_acc %>%
  filter(click_ACC == 1)

tbl_bad_catch_acc <- tbl_all[(tbl_all$workerId %in% tbl_bad_catch_acc$workerId),]
nrow(tbl_bad_catch_acc %>% distinct(workerId,.keep_all = FALSE))

## [1] 1

tbl_good_catch_acc <- tbl_all[(tbl_all$workerId %in% tbl_good_catch_acc$workerId),]
nrow(tbl_good_catch_acc %>% distinct(workerId,.keep_all = FALSE))

## [1] 43

2.3.2 Main trials

Now, check the accuracy of the clicks for the main images. This chunk will compute the total number of inaccurate and accurate clicks for each subject.

tbl_good_catch_acc_all_main_acc <- tbl_good_catch_acc %>%
  filter(image!= "catchAirplane" & image!= "catchBoat" & image!= "catchCow")
tbl_good_catch_acc_all_main_acc_counts <- tbl_good_catch_acc_all_main_acc %>%
  group_by(workerId,click_ACC) %>%
  dplyr::summarize(counts = n()) %>%
  spread(click_ACC,counts) %>%
  mutate(total = rowSums(.[2:3], na.rm = TRUE))
colnames(tbl_good_catch_acc_all_main_acc_counts) <- c("workerId", "inacc", "acc", "total")

Here, we can plot the overall accuracy of the main trial clicks for the group.

tbl_good_catch_acc_all_main_acc_rate <- (tbl_good_catch_acc_all_main_acc_counts$acc / tbl_good_catch_acc_all_main_acc_counts$total)
tbl_good_catch_acc_all_main_acc_rate <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_counts[,1], tbl_good_catch_acc_all_main_acc_rate)
colnames(tbl_good_catch_acc_all_main_acc_rate) <- c("workerId", "acc_rate")

tbl_good_catch_acc_all_main_acc_rate %>% 
  ggbarplot(y = "acc_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", add = "mean_se", ylim = c(0, 1), xlab = "Group", width = 0.5, label = TRUE, lab.nb.digits = 2, lab.vjust = -1.6, title = "Main Trial Accuracy for All Subjects")

tbl_good_catch_acc_all_main_acc_rate %>% 
  ggbarplot(x = "workerId", y = "acc_rate", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", ylim = c(0, 1), title = "Main Trial Accuracy for Individual Subjects", font.xtickslab = 8, sort.val = c("asc")) + rotate_x_text()

Count the number of subjects and only remove inaccurate trials.

nrow(tbl_good_catch_acc_all_main_acc %>% distinct(workerId,.keep_all = FALSE))

## [1] 43

tbl_good_catch_acc_all_main_acc_inacc_trials_removed <- tbl_good_catch_acc_all_main_acc %>% 
  filter(click_ACC == 1)

3 Raw RTs

3.1 Remove outlier trials

Next, we can remove outlier RTs that are more than 3 SDs away from the mean.

Let’s get the number of trials. This is the initial number of trials.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  group_by(workerId,image) %>%
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts,10)

Before the data are trimmed, let’s generate histograms of all RTs and the mean RT of each subject

tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt_s = tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt/1000
tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  gghistogram(x = "rt_s", fill = "#f7a800", rug = TRUE, bins = 60, xlim = c(0,60), ylim = c(0,800), xlab = ("Detection Raw RT (sec)"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  group_by(workerId) %>%
  dplyr::summarize(mean_rt = mean(rt_s, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_RT %>%
  gghistogram(x = "mean_rt", fill = "#f7a800", rug = TRUE, bins = 35, xlim = c(0,35), ylim = c(0,10), xlab = ("Mean Detection Raw RT (sec)"), title = "All Subjects")

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed)

## [1] 1056

Trial timer maxed out at 60 sec. Any RTs recorded as 60 sec should be discarded.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>% 
  filter(rt < 60000)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed)

## [1] 1054

Next, data are inspected for RT outliers. Two additional columns are added to the data table. First, an “outliers” column is added that labels an RT as an outlier or not (0 = not an outlier, 1 = an outlier less than 3 SDs, 2 = an outlier greater than 3 SDs). Second, a “removed_RT” column is added that contains non-outlier RTs.

Note: code can be changed to allow for replacement of outliers with the cutoff values.

correct.trials <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed[tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed$click_ACC == "1",]
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed <- ddply(correct.trials, .(workerId), function(x){
  m <- mean(x$rt)
  s <- sd(x$rt)
  upper <- m + 3*s #change 3 with another number to increase or decrease cutoff criteria
  lower <- m - 3*s #change 3 with another number to increase or decrease cutoff criteria

  x$outliers <- 0
  x$outliers[x$rt > upper] <- 2
  x$outliers[x$rt < lower] <- 1
  x$removed_RT <- x$rt
  x$removed_RT[x$rt > upper]<- NA #change NA with upper to replace an outlier with the upper cutoff
  x$removed_RT[x$rt < lower]<- NA #change NA with lower to replace an outlier with the lower cutoff
  
  x
})
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed,10)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed)

## [1] 1054

Next, let’s completely toss out the outlier trials (labeled as NA).

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed[!is.na(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed$removed_RT),]
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed,10)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed)

## [1] 1031

Let’s get the number of trials. This is the number of trials that “survive” the data trimming.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(workerId,image) %>%
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts,10)

Here are new histograms of all RTs and the mean RT of each subject.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  gghistogram(x = "rt_s", fill = "#f7a800", rug = TRUE, bins = 30, xlim = c(0,30), ylim = c(0,300), xlab = ("Detection Raw RT (sec)"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(workerId) %>%
  dplyr::summarize(mean_rt = mean(rt_s, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT %>%
  gghistogram(x = "mean_rt", fill = "#f7a800", rug = TRUE, bins = 15, xlim = c(0,15), ylim = c(0,8), xlab = ("Mean Detection Raw RT (sec)"), title = "All Subjects")

What is the percentage of outlier RTs that were removed overall?

tbl_all_main_acc_rts_3SD_removed_count <- data.frame(total_removed = tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum - tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts$sum)

per_RTs_removed <- (sum(tbl_all_main_acc_rts_3SD_removed_count) / sum(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum)) * 100
per_RTs_removed

## [1] 2.367424

What is the percentage of outlier RTs that were removed per subject? This is easy to visualize in a plot.

tbl_per_rts_3SD_removed_by_subj <- data.frame((tbl_all_main_acc_rts_3SD_removed_count / tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum) * 100)
tbl_per_rts_3SD_removed_by_subj <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts[1],tbl_all_main_acc_rts_3SD_removed_count,tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum,tbl_per_rts_3SD_removed_by_subj)
colnames(tbl_per_rts_3SD_removed_by_subj) <- c("workerId", "outlier_RTs", "total_RTs", "percent_excluded")
#head(tbl_per_rts_3SD_removed_by_subj,10)

tbl_per_rts_3SD_removed_by_subj %>% 
  ggbarplot(x = "workerId", y = "percent_excluded", ylab = "% Trials Excluded", fill = "#f7a800", font.tickslab = 8, sort.val = c("asc")) + rotate_x_text()

3.2 Summary statistics

Let’s again confirm how many subjects we’re working with. This is the total number of subjects with good catch trial accuracy and good main trial accuracy.

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts %>% distinct(workerId,.keep_all = FALSE))

## [1] 43

3.3 Plot the results

This is a plot of the mean detection RT for each image.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  ggbarplot(x = "image", y = "rt_s", ylab = "Mean Detection Raw RT (sec)", fill = "#f7a800", add = "mean_se", font.xtickslab = 8, sort.val = c("asc")) + rotate_x_text() + theme(legend.position = "none")

This table contains the final count for each image. This is after RTs were excluded that were more than 3 SDs from the mean.

image_count_final <- data.frame(image_count = colSums(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts[,2:49], na.rm = TRUE))
knitr::kable(image_count_final)

	image_count
Amish	25
Army	23
Barns	22
BarnTrack	19
Barrels	14
Beach	23
Birds	20
Boat	23
Bus	19
Cactus	27
Camel	23
CanalBridge	19
Castle	19
Chopper	18
Cockpit	26
Description	21
Dinner	21
Diver	21
Eating	23
Egypt	23
FarmByPond	23
Farmer	23
Fishing	24
Floatplane	24
Fountain	22
Harbor	24
Horizon	20
Ice	21
Kayak	23
Kayaker	21
Kids	20
Lake	24
Market	18
Marling	18
Mosque	20
NotreDame	23
Nurses	21
Obelisk	21
OtherDiver	17
Pilots	22
Seal	23
Soldiers	20
Station	22
SummerLake	25
Turtle	20
Water	22
Window	20
Wine	21

3.4 Splash vs. Flicker

This final section compares the RT data from the images with the mudsplashes and the images without mudsplashes.

rensink_mudsplash <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(image) %>%
  dplyr::summarize(mean_rt = mean(rt_s, na.rm=TRUE))

rensink_flicker <- read_csv("./change_blindness_rensink_behav.csv")
colnames(rensink_flicker)[1] <- "trial_number"

rensink_RTs <- cbind.data.frame(rensink_mudsplash,rensink_flicker[3])
colnames(rensink_RTs) <- c("image", "splash", "flicker")

rensink_RTs_long <- gather(rensink_RTs, condition, RT, splash:flicker, factor_key=TRUE)

rensink_RTs_long %>%
  group_by(condition) %>%
  get_summary_stats(RT, type = "mean_se")

## # A tibble: 2 x 5
##   condition variable     n  mean    se
##   <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 splash    RT          48  8.40 0.145
## 2 flicker   RT          48  8.84 0.597

rensink_RTs_long %>% 
  with(t.test(RT~condition,paired=TRUE))

## 
##  Paired t-test
## 
## data:  RT by condition
## t = -0.81504, df = 47, p-value = 0.4192
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.5421074  0.6528445
## sample estimates:
## mean of the differences 
##              -0.4446314

rensink_RTs_long %>% 
  ggbarplot("image", "RT", fill = "condition", color = "condition", palette = "jco", position = position_dodge(0.9), font.xtickslab = 8,  ylab = "Mean Detection Raw RT (sec)") + rotate_x_text()

rensink_RTs %>% 
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylab = "Mean Detection Raw RT (sec)")

rensink_RTs %>%
  ggscatter(x = "splash", y = "flicker", xlab = "Mean Splash Detection Raw RT (sec)", ylab = "Mean Flicker Detection Raw RT (sec)", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.sep = "\n"), ylim = c(0, 30), xlim = c(0, 30), title = "All Rensink Images")

raw_RT_lmer = lmer(RT ~ condition + (1 | image), data = rensink_RTs_long)
summary(raw_RT_lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: RT ~ condition + (1 | image)
##    Data: rensink_RTs_long
## 
## REML criterion at convergence: 479.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3562 -0.3358 -0.0438  0.2151  6.3088 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  image    (Intercept) 1.925    1.388   
##  Residual             7.143    2.673   
## Number of obs: 96, groups:  image, 48
## 
## Fixed effects:
##                  Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)        8.3960     0.4346 89.9455  19.317   <2e-16 ***
## conditionflicker   0.4446     0.5455 47.0000   0.815    0.419    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condtnflckr -0.628

Remove “Horizon” image (potential outlier)

rensink_RTs_no_horizon <- rensink_RTs %>%
  filter(image!= "Horizon")

rensink_RTs_no_horizon_long <- gather(rensink_RTs_no_horizon, condition, RT, splash:flicker, factor_key=TRUE)

rensink_RTs_no_horizon_long %>%
  group_by(condition) %>%
  get_summary_stats(RT, type = "mean_se")

## # A tibble: 2 x 5
##   condition variable     n  mean    se
##   <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 splash    RT          47  8.40 0.148
## 2 flicker   RT          47  8.41 0.419

rensink_RTs_no_horizon_long %>% 
  with(t.test(RT~condition,paired=TRUE))

## 
##  Paired t-test
## 
## data:  RT by condition
## t = -0.026191, df = 46, p-value = 0.9792
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6833853  0.6658296
## sample estimates:
## mean of the differences 
##            -0.008777862

rensink_RTs_no_horizon_long %>% 
  ggbarplot("image", "RT", fill = "condition", color = "condition", palette = "jco", ylim = c(0, 20), position = position_dodge(0.9), font.xtickslab = 8,  ylab = "Mean Detection Raw RT (sec)") + rotate_x_text()

rensink_RTs_no_horizon %>% 
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylim = c(0, 20), ylab = "Mean Detection Raw RT (sec)")

raw <- ggscatter(rensink_RTs_no_horizon, x = "splash", y = "flicker", xlab = "Mean Splash Detection Raw RT (sec)", ylab = "Mean Flicker Detection Raw RT (sec)", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.sep = "\n"), ylim = c(0, 20), xlim = c(0, 20), title = "All Rensink Images; No 'Horizon'")
suppressMessages(print(raw))

raw_RT_lmer = lmer(RT ~ condition + (1 | image), data = rensink_RTs_no_horizon_long)
summary(raw_RT_lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: RT ~ condition + (1 | image)
##    Data: rensink_RTs_no_horizon_long
## 
## REML criterion at convergence: 400.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.29944 -0.54795  0.02048  0.33392  2.84128 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  image    (Intercept) 1.993    1.412   
##  Residual             2.640    1.625   
## Number of obs: 94, groups:  image, 47
## 
## Fixed effects:
##                   Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)       8.397222   0.313944 77.632680  26.748   <2e-16 ***
## conditionflicker  0.008778   0.335143 46.000000   0.026    0.979    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condtnflckr -0.534

4 Log RTs

4.1 Remove outlier trials

Next, we can remove outlier RTs that are more than 3 SDs away from the mean.

Let’s get the number of trials. This is the initial number of trials.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  group_by(workerId,image) %>%
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts,10)

Convert raw RTs to log-transformed RTs

tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt_s = tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt/1000
tbl_good_catch_acc_all_main_acc_inacc_trials_removed$log_rt = log10(tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt_s)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed)

## [1] 1056

Before the data are trimmed, let’s generate histograms of all RTs and the mean RT of each subject

tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  gghistogram(x = "log_rt", fill = "#f7a800", rug = TRUE, bins = 60, xlim = c(0,3), ylim = c(0,300), xlab = ("Detection Log RT"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_log_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  group_by(workerId) %>%
  dplyr::summarize(mean_rt = mean(log_rt, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_log_RT %>%
  gghistogram(x = "mean_rt", fill = "#f7a800", rug = TRUE, bins = 35, xlim = c(0,1.5), ylim = c(0,4), xlab = ("Mean Detection Log RT"), title = "All Subjects")

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed)

## [1] 1056

Trial timer maxed out at 60 sec. Any RTs recorded as 60 sec should be discarded.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>% 
  filter(rt < 60000)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed)

## [1] 1054

Note: code can be changed to allow for replacement of outliers with the cutoff values.

correct.trials <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed[tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed$click_ACC == "1",]
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed <- ddply(correct.trials, .(workerId), function(x){
  m <- mean(x$log_rt)
  s <- sd(x$log_rt)
  upper <- m + 3*s #change 3 with another number to increase or decrease cutoff criteria
  lower <- m - 3*s #change 3 with another number to increase or decrease cutoff criteria

  x$outliers <- 0
  x$outliers[x$log_rt > upper] <- 2
  x$outliers[x$log_rt < lower] <- 1
  x$removed_RT <- x$log_rt
  x$removed_RT[x$log_rt > upper]<- NA #change NA with upper to replace an outlier with the upper cutoff
  x$removed_RT[x$log_rt < lower]<- NA #change NA with lower to replace an outlier with the lower cutoff
  
  x
})
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed,10)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed)

## [1] 1054

Next, let’s completely toss out the outlier trials (labeled as NA).

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed[!is.na(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed$removed_RT),]
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed,10)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed)

## [1] 1037

Let’s get the number of trials. This is the number of trials that “survive” the data trimming.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(workerId,image) %>%
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts,10)

Here are new histograms of all RTs and the mean RT of each subject.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  gghistogram(x = "log_rt", fill = "#f7a800", rug = TRUE, bins = 30, xlim = c(0,2), ylim = c(0,400), xlab = ("Detection Log RT"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(workerId) %>%
  dplyr::summarize(mean_rt = mean(log_rt, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT %>%
  gghistogram(x = "mean_rt", fill = "#f7a800", rug = TRUE, bins = 15, xlim = c(0,1.5), ylim = c(0,8), xlab = ("Mean Detection Log RT"), title = "All Subjects")

What is the percentage of outlier RTs that were removed overall?

tbl_all_main_acc_rts_3SD_removed_count <- data.frame(total_removed = tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum - tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts$sum)

per_RTs_removed <- (sum(tbl_all_main_acc_rts_3SD_removed_count) / sum(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum)) * 100
per_RTs_removed

## [1] 1.799242

What is the percentage of outlier RTs that were removed per subject? This is easy to visualize in a plot.

tbl_per_rts_3SD_removed_by_subj <- data.frame((tbl_all_main_acc_rts_3SD_removed_count / tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum) * 100)
tbl_per_rts_3SD_removed_by_subj <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts[1],tbl_all_main_acc_rts_3SD_removed_count,tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum,tbl_per_rts_3SD_removed_by_subj)
colnames(tbl_per_rts_3SD_removed_by_subj) <- c("workerId", "outlier_RTs", "total_RTs", "percent_excluded")
#head(tbl_per_rts_3SD_removed_by_subj,10)

tbl_per_rts_3SD_removed_by_subj %>% 
  ggbarplot(x = "workerId", y = "percent_excluded", ylab = "% Trials Excluded", fill = "#f7a800", font.tickslab = 8, sort.val = c("asc")) + rotate_x_text()

4.2 Summary statistics

Let’s again confirm how many subjects we’re working with. This is the total number of subjects with good catch trial accuracy and good main trial accuracy.

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts %>% distinct(workerId,.keep_all = FALSE))

## [1] 43

4.3 Plot the results

This is a plot of the mean detection RT for each image.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  ggbarplot(x = "image", y = "log_rt", ylab = "Mean Detection Log RT", fill = "#f7a800", add = "mean_se", ylim = c(0,1.2), font.xtickslab = 8, sort.val = c("asc")) + rotate_x_text() + theme(legend.position = "none")

This table contains the final count for each image. This is after RTs were excluded that were more than 3 SDs from the mean.

image_count_final <- data.frame(image_count = colSums(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts[,2:49], na.rm = TRUE))
knitr::kable(image_count_final)

	image_count
Amish	25
Army	23
Barns	22
BarnTrack	20
Barrels	15
Beach	23
Birds	21
Boat	23
Bus	19
Cactus	27
Camel	23
CanalBridge	19
Castle	19
Chopper	19
Cockpit	26
Description	21
Dinner	21
Diver	21
Eating	23
Egypt	23
FarmByPond	23
Farmer	23
Fishing	24
Floatplane	24
Fountain	22
Harbor	24
Horizon	22
Ice	21
Kayak	23
Kayaker	21
Kids	20
Lake	24
Market	18
Marling	18
Mosque	20
NotreDame	23
Nurses	21
Obelisk	21
OtherDiver	17
Pilots	22
Seal	23
Soldiers	20
Station	22
SummerLake	25
Turtle	20
Water	22
Window	20
Wine	21

4.4 Splash vs. Flicker

This final section compares the RT data from the images with the mudsplashes and the images without mudsplashes.

rensink_mudsplash <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(image) %>%
  dplyr::summarize(mean_rt = mean(log_rt, na.rm=TRUE))

rensink_flicker <- read_csv("./change_blindness_rensink_behav.csv")

## Warning: Missing column names filled in: 'X1' [1]

colnames(rensink_flicker)[1] <- "trial_number"

rensink_RTs <- cbind.data.frame(rensink_mudsplash,rensink_flicker[7])
colnames(rensink_RTs) <- c("image", "splash", "flicker")

rensink_RTs_long <- gather(rensink_RTs, condition, RT, splash:flicker, factor_key=TRUE)

rensink_RTs_long %>%
  group_by(condition) %>%
  get_summary_stats(RT, type = "mean_se")

## # A tibble: 2 x 5
##   condition variable     n  mean    se
##   <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 splash    RT          48 0.904 0.007
## 2 flicker   RT          48 0.914 0.023

rensink_RTs_long %>% 
  with(t.test(RT~condition,paired=TRUE))

## 
##  Paired t-test
## 
## data:  RT by condition
## t = -0.48841, df = 47, p-value = 0.6275
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04824407  0.02939486
## sample estimates:
## mean of the differences 
##            -0.009424604

rensink_RTs_long %>% 
  ggbarplot("image", "RT", fill = "condition", color = "condition", palette = "jco", ylim = c(0, 2), position = position_dodge(0.9), font.xtickslab = 8, ylab = "Mean Detection Log RT (sec)") + rotate_x_text()

rensink_RTs %>% 
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylim = c(0, 2), ylab = "Mean Detection Log RT (sec)")

rensink_RTs %>%
  ggscatter(x = "splash", y = "flicker", xlab = "Mean Splash Detection Log RT", ylab = "Mean Flicker Detection Log RT", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.sep = "\n"), ylim = c(0, 2), xlim = c(0, 2), title = "All Rensink Images")

log_RT_lmer = lmer(RT ~ condition + (1 | image), data = rensink_RTs_long)
summary(log_RT_lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: RT ~ condition + (1 | image)
##    Data: rensink_RTs_long
## 
## REML criterion at convergence: -134.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4305 -0.4402  0.0314  0.3124  4.1881 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  image    (Intercept) 0.004869 0.06978 
##  Residual             0.008936 0.09453 
## Number of obs: 96, groups:  image, 48
## 
## Fixed effects:
##                   Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)       0.904311   0.016959 83.600323  53.322   <2e-16 ***
## conditionflicker  0.009425   0.019296 47.000000   0.488    0.628    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condtnflckr -0.569

Remove “Horizon” image (potential outlier)

rensink_RTs_no_horizon <- rensink_RTs %>%
  filter(image!= "Horizon")

rensink_RTs_no_horizon_long <- gather(rensink_RTs_no_horizon, condition, RT, splash:flicker, factor_key=TRUE)

rensink_RTs_no_horizon_long %>%
  group_by(condition) %>%
  get_summary_stats(RT, type = "mean_se")

## # A tibble: 2 x 5
##   condition variable     n  mean    se
##   <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 splash    RT          47 0.903 0.007
## 2 flicker   RT          47 0.902 0.02

rensink_RTs_no_horizon_long %>% 
  with(t.test(RT~condition,paired=TRUE))

## 
##  Paired t-test
## 
## data:  RT by condition
## t = 0.079444, df = 46, p-value = 0.937
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03168063  0.03428411
## sample estimates:
## mean of the differences 
##              0.00130174

rensink_RTs_no_horizon_long %>% 
  ggbarplot("image", "RT", fill = "condition", color = "condition", palette = "jco", ylim = c(0, 2), position = position_dodge(0.9), font.xtickslab = 8, ylab = "Mean Detection Log RT (sec)") + rotate_x_text()

rensink_RTs_no_horizon %>% 
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylim = c(0, 2), ylab = "Mean Detection Log RT (sec)")

log <- ggscatter(rensink_RTs_no_horizon, x = "splash", y = "flicker", xlab = "Mean Splash Detection Log RT", ylab = "Mean Flicker Detection Log RT", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.sep = "\n"), ylim = c(0, 2), xlim = c(0, 2), title = "All Rensink Images; No 'Horizon'")
suppressMessages(print(log))

log_RT_lmer = lmer(RT ~ condition + (1 | image), data = rensink_RTs_no_horizon_long)
summary(log_RT_lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: RT ~ condition + (1 | image)
##    Data: rensink_RTs_no_horizon_long
## 
## REML criterion at convergence: -157.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.52983 -0.55502  0.04261  0.39899  2.29330 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  image    (Intercept) 0.004380 0.06618 
##  Residual             0.006309 0.07943 
## Number of obs: 94, groups:  image, 47
## 
## Fixed effects:
##                   Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)       0.903278   0.015081 78.775272  59.896   <2e-16 ***
## conditionflicker -0.001302   0.016386 46.000000  -0.079    0.937    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condtnflckr -0.543

5 Cycles

5.1 Remove outlier trials

Next, we can remove outlier RTs that are more than 3 SDs away from the mean.

Let’s get the number of trials. This is the initial number of trials.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  group_by(workerId,image) %>%
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts,10)

Before the data are trimmed, let’s generate histograms of all RTs and the mean RT of each subject

tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  gghistogram(x = "cycles", fill = "#f7a800", rug = TRUE, bins = 60, xlim = c(0,60), ylim = c(0,800), xlab = ("Number of Cycles"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
  group_by(workerId) %>%
  dplyr::summarize(mean_cycles = mean(cycles, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_RT %>%
  gghistogram(x = "mean_cycles", fill = "#f7a800", rug = TRUE, bins = 35, xlim = c(0,10), ylim = c(0,10), xlab = ("Mean Number of Cycles"), title = "All Subjects")

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed)

## [1] 1056

Trial timer maxed out at 60 sec. Any RTs recorded as 60 sec should be discarded.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>% 
  filter(rt < 60000)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed)

## [1] 1054

Note: code can be changed to allow for replacement of outliers with the cutoff values.

correct.trials <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed[tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed$click_ACC == "1",]
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed <- ddply(correct.trials, .(workerId), function(x){
  m <- mean(x$cycles)
  s <- sd(x$cycles)
  upper <- m + 3*s #change 3 with another number to increase or decrease cutoff criteria
  lower <- m - 3*s #change 3 with another number to increase or decrease cutoff criteria

  x$outliers <- 0
  x$outliers[x$cycles > upper] <- 2
  x$outliers[x$cycles < lower] <- 1
  x$removed_RT <- x$cycles
  x$removed_RT[x$cycles > upper]<- NA #change NA with upper to replace an outlier with the upper cutoff
  x$removed_RT[x$cycles < lower]<- NA #change NA with lower to replace an outlier with the lower cutoff
  
  x
})
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed,10)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed)

## [1] 1054

Next, let’s completely toss out the outlier trials (labeled as NA).

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed[!is.na(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed$removed_RT),]
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed,10)

Count the number of trials

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed)

## [1] 1031

Let’s get the number of trials. This is the number of trials that “survive” the data trimming.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(workerId,image) %>%
  dplyr::summarize(counts = n()) %>%
  spread(image,counts) %>%
  mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts,10)

Here are new histograms of all RTs and the mean RT of each subject.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  gghistogram(x = "cycles", fill = "#f7a800", rug = TRUE, bins = 30, xlim = c(0,10), ylim = c(0,300), xlab = ("Number of Cycles"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(workerId) %>%
  dplyr::summarize(mean_cycles = mean(cycles, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT %>%
  gghistogram(x = "mean_cycles", fill = "#f7a800", rug = TRUE, bins = 15, xlim = c(0,5), ylim = c(0,8), xlab = ("Mean Number of Cycles"), title = "All Subjects")

What is the percentage of outlier RTs that were removed overall?

tbl_all_main_acc_rts_3SD_removed_count <- data.frame(total_removed = tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum - tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts$sum)

per_RTs_removed <- (sum(tbl_all_main_acc_rts_3SD_removed_count) / sum(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum)) * 100
per_RTs_removed

## [1] 2.367424

What is the percentage of outlier RTs that were removed per subject? This is easy to visualize in a plot.

tbl_per_rts_3SD_removed_by_subj <- data.frame((tbl_all_main_acc_rts_3SD_removed_count / tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum) * 100)
tbl_per_rts_3SD_removed_by_subj <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts[1],tbl_all_main_acc_rts_3SD_removed_count,tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum,tbl_per_rts_3SD_removed_by_subj)
colnames(tbl_per_rts_3SD_removed_by_subj) <- c("workerId", "outlier_RTs", "total_RTs", "percent_excluded")
#head(tbl_per_rts_3SD_removed_by_subj,10)

tbl_per_rts_3SD_removed_by_subj %>% 
  ggbarplot(x = "workerId", y = "percent_excluded", ylab = "% Trials Excluded", fill = "#f7a800", font.tickslab = 8, sort.val = c("asc")) + rotate_x_text()

5.2 Summary statistics

Let’s again confirm how many subjects we’re working with. This is the total number of subjects with good catch trial accuracy and good main trial accuracy.

nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts %>% distinct(workerId,.keep_all = FALSE))

## [1] 43

5.3 Plot the results

This is a plot of the mean cyclesfor each image.

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  ggbarplot(x = "image", y = "cycles", ylab = "Mean Number of Cycles", ylim = c(0,3), fill = "#f7a800", add = "mean_se", font.xtickslab = 8, sort.val = c("asc")) + rotate_x_text() + theme(legend.position = "none")

This table contains the final count for each image. This is after RTs were excluded that were more than 3 SDs from the mean.

image_count_final <- data.frame(image_count = colSums(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts[,2:49], na.rm = TRUE))
knitr::kable(image_count_final)

	image_count
Amish	25
Army	23
Barns	22
BarnTrack	19
Barrels	14
Beach	23
Birds	20
Boat	23
Bus	19
Cactus	27
Camel	23
CanalBridge	19
Castle	19
Chopper	18
Cockpit	26
Description	21
Dinner	21
Diver	21
Eating	23
Egypt	23
FarmByPond	23
Farmer	23
Fishing	24
Floatplane	24
Fountain	22
Harbor	24
Horizon	20
Ice	21
Kayak	23
Kayaker	21
Kids	20
Lake	24
Market	18
Marling	18
Mosque	20
NotreDame	23
Nurses	21
Obelisk	21
OtherDiver	17
Pilots	22
Seal	23
Soldiers	20
Station	22
SummerLake	25
Turtle	20
Water	22
Window	20
Wine	21

5.4 Splash vs. Flicker

This final section compares the RT data from the images with the mudsplashes and the images without mudsplashes.

rensink_mudsplash <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
  group_by(image) %>%
  dplyr::summarize(mean_cycles = mean(cycles, na.rm=TRUE))

rensink_flicker <- read_csv("./change_blindness_rensink_behav.csv")

## Warning: Missing column names filled in: 'X1' [1]

colnames(rensink_flicker)[1] <- "trial_number"

rensink_cycles <- cbind.data.frame(rensink_mudsplash,rensink_flicker[9])
colnames(rensink_cycles) <- c("image", "splash", "flicker")

rensink_cycles_long <- gather(rensink_cycles, condition, cycles, splash:flicker, factor_key=TRUE)

rensink_cycles_long %>%
  group_by(condition) %>%
  get_summary_stats(cycles, type = "mean_se")

## # A tibble: 2 x 5
##   condition variable     n  mean    se
##   <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 splash    cycles      48  2.00 0.034
## 2 flicker   cycles      48  9.21 0.622

rensink_cycles_long %>% 
  with(t.test(cycles~condition,paired=TRUE))

## 
##  Paired t-test
## 
## data:  cycles by condition
## t = -11.876, df = 47, p-value = 9.387e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.431286 -5.988624
## sample estimates:
## mean of the differences 
##               -7.209955

rensink_cycles_long %>% 
  ggbarplot("image", "cycles", fill = "condition", color = "condition", palette = "jco", position = position_dodge(0.9), font.xtickslab = 8,  ylab = "Mean Number of Cycles") + rotate_x_text()

rensink_cycles %>% 
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylab = "Mean Number of Cycles")

rensink_cycles %>%
  ggscatter(x = "splash", y = "flicker", xlab = "Mean Splash Number of Cycles", ylab = "Mean Flicker Number of Cycles", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.y = 2, label.sep = "\n"), ylim = c(0, 30), xlim = c(0, 30), title = "All Rensink Images")

cycles_lmer = lmer(cycles ~ condition + (1 | image), data = rensink_cycles_long)
summary(cycles_lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cycles ~ condition + (1 | image)
##    Data: rensink_cycles_long
## 
## REML criterion at convergence: 484.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1818 -0.2845 -0.0049  0.0805  6.8064 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  image    (Intercept) 0.4775   0.691   
##  Residual             8.8457   2.974   
## Number of obs: 96, groups:  image, 48
## 
## Fixed effects:
##                  Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)        1.9991     0.4407 93.7541   4.536 1.70e-05 ***
## conditionflicker   7.2100     0.6071 47.0000  11.876 9.39e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condtnflckr -0.689

Remove “Horizon” image (potential outlier)

rensink_cycles_no_horizon <- rensink_cycles %>%
  filter(image!= "Horizon")

rensink_cycles_no_horizon_long <- gather(rensink_cycles_no_horizon, condition, cycles, splash:flicker, factor_key=TRUE)

rensink_cycles_no_horizon_long %>%
  group_by(condition) %>%
  get_summary_stats(cycles, type = "mean_se")

## # A tibble: 2 x 5
##   condition variable     n  mean    se
##   <fct>     <chr>    <dbl> <dbl> <dbl>
## 1 splash    cycles      47  2.00 0.035
## 2 flicker   cycles      47  8.76 0.436

rensink_cycles_no_horizon_long %>% 
  with(t.test(cycles~condition,paired=TRUE))

## 
##  Paired t-test
## 
## data:  cycles by condition
## t = -16.367, df = 46, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.587885 -5.925937
## sample estimates:
## mean of the differences 
##               -6.756911

rensink_cycles_no_horizon_long %>% 
  ggbarplot("image", "cycles", fill = "condition", color = "condition", palette = "jco", ylim = c(0,20), position = position_dodge(0.9), font.xtickslab = 8,  ylab = "Mean Number of Cycles") + rotate_x_text()

rensink_cycles_no_horizon %>% 
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylim = c(0,20), ylab = "Mean Number of Cycles")

cycles <- ggscatter(rensink_cycles_no_horizon, x = "splash", y = "flicker", xlab = "Mean Splash Number of Cycles", ylab = "Mean Flicker Number of Cycles", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.y = 3, label.sep = "\n"), ylim = c(0, 20), xlim = c(0, 20), title = "All Rensink Images; No 'Horizon'")
suppressMessages(print(cycles))

cycles_lmer = lmer(cycles ~ condition + (1 | image), data = rensink_cycles_no_horizon_long)
summary(cycles_lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cycles ~ condition + (1 | image)
##    Data: rensink_cycles_no_horizon_long
## 
## REML criterion at convergence: 406.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4504 -0.2140  0.0006  0.1213  3.6053 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  image    (Intercept) 0.4942   0.703   
##  Residual             4.0050   2.001   
## Number of obs: 94, groups:  image, 47
## 
## Fixed effects:
##                  Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)        1.9993     0.3094 90.9030   6.462 5.03e-09 ***
## conditionflicker   6.7569     0.4128 46.0000  16.367  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condtnflckr -0.667

Rensink Mudsplash

Adam Barnas

Last compiled at 1:54 AM on July 21, 2020

1 Set up R environment

2 Format & manipulate raw data files

2.1 Read-in datafiles

2.2 Compute number of cycles

2.3 Analyze accuracy

2.3.1 Catch trials

2.3.2 Main trials

3 Raw RTs

3.1 Remove outlier trials

3.2 Summary statistics

3.3 Plot the results

3.4 Splash vs. Flicker

4 Log RTs

4.1 Remove outlier trials

4.2 Summary statistics

4.3 Plot the results

4.4 Splash vs. Flicker

5 Cycles

5.1 Remove outlier trials

5.2 Summary statistics

5.3 Plot the results

5.4 Splash vs. Flicker