Remove outlier trials
Next, we can remove outlier RTs that are more than 3 SDs away from the mean.
Let’s get the number of trials. This is the initial number of trials.
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
group_by(workerId,image) %>%
dplyr::summarize(counts = n()) %>%
spread(image,counts) %>%
mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts,10)
Convert raw RTs to log-transformed RTs
tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt_s = tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt/1000
tbl_good_catch_acc_all_main_acc_inacc_trials_removed$log_rt = log10(tbl_good_catch_acc_all_main_acc_inacc_trials_removed$rt_s)
Count the number of trials
nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed)
## [1] 1056
Before the data are trimmed, let’s generate histograms of all RTs and the mean RT of each subject
tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
gghistogram(x = "log_rt", fill = "#f7a800", rug = TRUE, bins = 60, xlim = c(0,3), ylim = c(0,300), xlab = ("Detection Log RT"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_log_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
group_by(workerId) %>%
dplyr::summarize(mean_rt = mean(log_rt, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_mean_subj_log_RT %>%
gghistogram(x = "mean_rt", fill = "#f7a800", rug = TRUE, bins = 35, xlim = c(0,1.5), ylim = c(0,4), xlab = ("Mean Detection Log RT"), title = "All Subjects")

Count the number of trials
nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed)
## [1] 1056
Trial timer maxed out at 60 sec. Any RTs recorded as 60 sec should be discarded.
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed %>%
filter(rt < 60000)
Count the number of trials
nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed)
## [1] 1054
Next, data are inspected for RT outliers. Two additional columns are added to the data table. First, an “outliers” column is added that labels an RT as an outlier or not (0 = not an outlier, 1 = an outlier less than 3 SDs, 2 = an outlier greater than 3 SDs). Second, a “removed_RT” column is added that contains non-outlier RTs.
Note: code can be changed to allow for replacement of outliers with the cutoff values.
correct.trials <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed[tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed$click_ACC == "1",]
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed <- ddply(correct.trials, .(workerId), function(x){
m <- mean(x$log_rt)
s <- sd(x$log_rt)
upper <- m + 3*s #change 3 with another number to increase or decrease cutoff criteria
lower <- m - 3*s #change 3 with another number to increase or decrease cutoff criteria
x$outliers <- 0
x$outliers[x$log_rt > upper] <- 2
x$outliers[x$log_rt < lower] <- 1
x$removed_RT <- x$log_rt
x$removed_RT[x$log_rt > upper]<- NA #change NA with upper to replace an outlier with the upper cutoff
x$removed_RT[x$log_rt < lower]<- NA #change NA with lower to replace an outlier with the lower cutoff
x
})
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed,10)
Count the number of trials
nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed)
## [1] 1054
Next, let’s completely toss out the outlier trials (labeled as NA).
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed[!is.na(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed$removed_RT),]
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed,10)
Count the number of trials
nrow(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed)
## [1] 1037
Let’s get the number of trials. This is the number of trials that “survive” the data trimming.
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
group_by(workerId,image) %>%
dplyr::summarize(counts = n()) %>%
spread(image,counts) %>%
mutate(sum = rowSums(.[-1], na.rm = TRUE))
#head(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts,10)
Here are new histograms of all RTs and the mean RT of each subject.
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
gghistogram(x = "log_rt", fill = "#f7a800", rug = TRUE, bins = 30, xlim = c(0,2), ylim = c(0,400), xlab = ("Detection Log RT"), title = "All Trials")

tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
group_by(workerId) %>%
dplyr::summarize(mean_rt = mean(log_rt, na.rm=TRUE))
tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_mean_subj_RT %>%
gghistogram(x = "mean_rt", fill = "#f7a800", rug = TRUE, bins = 15, xlim = c(0,1.5), ylim = c(0,8), xlab = ("Mean Detection Log RT"), title = "All Subjects")

What is the percentage of outlier RTs that were removed overall?
tbl_all_main_acc_rts_3SD_removed_count <- data.frame(total_removed = tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum - tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed_counts$sum)
per_RTs_removed <- (sum(tbl_all_main_acc_rts_3SD_removed_count) / sum(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum)) * 100
per_RTs_removed
## [1] 1.799242
What is the percentage of outlier RTs that were removed per subject? This is easy to visualize in a plot.
tbl_per_rts_3SD_removed_by_subj <- data.frame((tbl_all_main_acc_rts_3SD_removed_count / tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum) * 100)
tbl_per_rts_3SD_removed_by_subj <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts[1],tbl_all_main_acc_rts_3SD_removed_count,tbl_good_catch_acc_all_main_acc_inacc_trials_removed_counts$sum,tbl_per_rts_3SD_removed_by_subj)
colnames(tbl_per_rts_3SD_removed_by_subj) <- c("workerId", "outlier_RTs", "total_RTs", "percent_excluded")
#head(tbl_per_rts_3SD_removed_by_subj,10)
tbl_per_rts_3SD_removed_by_subj %>%
ggbarplot(x = "workerId", y = "percent_excluded", ylab = "% Trials Excluded", fill = "#f7a800", font.tickslab = 8, sort.val = c("asc")) + rotate_x_text()

Splash vs. Flicker
This final section compares the RT data from the images with the mudsplashes and the images without mudsplashes.
rensink_mudsplash <- tbl_good_catch_acc_all_main_acc_inacc_trials_removed_timeout_trials_removed_rts_3SD_trimmed_rts_3SD_removed %>%
group_by(image) %>%
dplyr::summarize(mean_rt = mean(log_rt, na.rm=TRUE))
rensink_flicker <- read_csv("./change_blindness_rensink_behav.csv")
## Warning: Missing column names filled in: 'X1' [1]
colnames(rensink_flicker)[1] <- "trial_number"
rensink_RTs <- cbind.data.frame(rensink_mudsplash,rensink_flicker[7])
colnames(rensink_RTs) <- c("image", "splash", "flicker")
rensink_RTs_long <- gather(rensink_RTs, condition, RT, splash:flicker, factor_key=TRUE)
rensink_RTs_long %>%
group_by(condition) %>%
get_summary_stats(RT, type = "mean_se")
## # A tibble: 2 x 5
## condition variable n mean se
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 splash RT 48 0.904 0.007
## 2 flicker RT 48 0.914 0.023
rensink_RTs_long %>%
with(t.test(RT~condition,paired=TRUE))
##
## Paired t-test
##
## data: RT by condition
## t = -0.48841, df = 47, p-value = 0.6275
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04824407 0.02939486
## sample estimates:
## mean of the differences
## -0.009424604
rensink_RTs_long %>%
ggbarplot("image", "RT", fill = "condition", color = "condition", palette = "jco", position = position_dodge(0.9), font.xtickslab = 8, ylab = "Mean Detection Log RT (sec)") + rotate_x_text()

rensink_RTs %>%
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylab = "Mean Detection Log RT (sec)")

rensink_RTs %>%
ggscatter(x = "splash", y = "flicker", xlab = "Mean Splash Detection Log RT", ylab = "Mean Flicker Detection Log RT", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.sep = "\n"), ylim = c(0, 2), xlim = c(0, 2), title = "All Rensink Images")

Remove “Horizon” image (potential outlier)
rensink_RTs_no_horizon <- rensink_RTs %>%
filter(image!= "Horizon")
rensink_RTs_no_horizon_long <- gather(rensink_RTs_no_horizon, condition, RT, splash:flicker, factor_key=TRUE)
rensink_RTs_no_horizon_long %>%
group_by(condition) %>%
get_summary_stats(RT, type = "mean_se")
## # A tibble: 2 x 5
## condition variable n mean se
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 splash RT 47 0.903 0.007
## 2 flicker RT 47 0.902 0.02
rensink_RTs_no_horizon_long %>%
with(t.test(RT~condition,paired=TRUE))
##
## Paired t-test
##
## data: RT by condition
## t = 0.079444, df = 46, p-value = 0.937
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03168063 0.03428411
## sample estimates:
## mean of the differences
## 0.00130174
rensink_RTs_no_horizon_long %>%
ggbarplot("image", "RT", fill = "condition", color = "condition", palette = "jco", position = position_dodge(0.9), font.xtickslab = 8, ylab = "Mean Detection Log RT (sec)") + rotate_x_text()

rensink_RTs_no_horizon %>%
ggpaired(cond1 = "splash", cond2 = "flicker", fill = "condition", palette = "jco", ylab = "Mean Detection Log RT (sec)")

log <- ggscatter(rensink_RTs_no_horizon, x = "splash", y = "flicker", xlab = "Mean Splash Detection Log RT", ylab = "Mean Flicker Detection Log RT", fill = "#f7a800", color = "#f7a800", add = "reg.line", cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.x = 0, label.sep = "\n"), ylim = c(0, 2), xlim = c(0, 2), title = "All Rensink Images; No 'Horizon'")
suppressMessages(print(log))
