3.2.2 Main trials
For the rest of the analyses, focus on the participants with good catch rate performance. Select the subjects with good catch trial rates from the original tbl_all.
tbl_good_catch_acc_all_main_acc <- tbl_all[(tbl_all$workerId %in% tbl_good_catch_acc_rate$workerId),]
#head(tbl_good,10)
Verify subject count.
nrow(tbl_good_catch_acc_all_main_acc %>% distinct(workerId,.keep_all = FALSE))
## [1] 30
Here, now, is table containing the number of trials for each individual after excluding main trials based on accuracy. Again, there were 80 target-present trials that were 70% valid (56 trials) and 30% invalid (8 trials for each type).
tbl_good_catch_acc_all_main_acc_counts <- tbl_good_catch_acc_all_main_acc %>%
group_by(workerId,validity) %>%
filter((validity=='valid_same' | validity=='invalid_same' | validity=='invalid_different' | validity=='invalid_far') & click_ACC == 1) %>%
dplyr::summarize(counts = n()) %>%
spread(validity,counts)
tbl_good_catch_acc_all_main_acc_counts$sum = rowSums(tbl_good_catch_acc_all_main_acc_counts[,c(-1)], na.rm = TRUE)
#head(tbl_good_catch_acc_all_main_acc_counts,10)
Same table, but binned.
tbl_good_catch_acc_all_main_acc_counts_bin <- tbl_good_catch_acc_all_main_acc %>%
group_by(workerId,validity,bin) %>%
filter((validity=='valid_same' | validity=='invalid_same' | validity=='invalid_different' | validity=='invalid_far') & click_ACC == 1) %>%
dplyr::summarize(counts = n()) %>%
spread(validity,counts)
tbl_good_catch_acc_all_main_acc_counts_bin$sum = rowSums(tbl_good_catch_acc_all_main_acc_counts_bin[,c(-1:-2)], na.rm = TRUE)
#head(tbl_good_catch_acc_all_main_acc_counts_bin,10)
Some subjects may have no surviving data for a particular condition (e.g., subject 204070 now does not have any data for the invalid-different condition). These subjects should be tossed because they have an unequal number of conditions compared to the other subjects.
tbl_good_catch_acc_all_main_acc_NA_conditions_removed <- tbl_good_catch_acc_all_main_acc_counts %>%
filter(valid_same!="NA" & invalid_same!="NA" & invalid_different!="NA" & invalid_far!="NA")
#head(tbl_good_catch_acc_all_main_acc_NA_conditions_removed,10)
Same table, but binned.
tbl_good_catch_acc_all_main_acc_NA_conditions_removed_bin <- tbl_good_catch_acc_all_main_acc_counts_bin[(tbl_good_catch_acc_all_main_acc_counts_bin$workerId %in% tbl_good_catch_acc_all_main_acc_NA_conditions_removed$workerId),]
#head(tbl_good_catch_acc_all_main_acc_NA_conditions_removed_bin,10)
Now, let’s get rid of any subjects with NA from tbl_good_catch_acc_all_main_acc.
tbl_good_catch_acc_all_main_acc_NA_subjs_removed <- tbl_good_catch_acc_all_main_acc[(tbl_good_catch_acc_all_main_acc$workerId %in% tbl_good_catch_acc_all_main_acc_NA_conditions_removed$workerId),]
#head(tbl_good_catch_acc_all_main_acc_NA_subjs_removed,10)
And let’s check the number of subjects we are now working with.
nrow(tbl_good_catch_acc_all_main_acc_NA_subjs_removed %>% distinct(workerId,.keep_all = FALSE))
## [1] 29
After dropping subjects based on catch trial performance and for accuracy on the main trials (dropping any additional subjects with unequal conditions), get the original number of trials for the relevant subjects.
tbl_good_catch_acc_all_main_acc_NA_subjs_removed_counts <- tbl_all_counts_no_catch[(tbl_all_counts_no_catch$workerId %in% tbl_good_catch_acc_all_main_acc_NA_conditions_removed$workerId),]
#head(tbl_good_catch_acc_all_main_acc_NA_subjs_removed_counts,10)
Plot the overall accuracy at the group level (collasped across workerId and condition).
tbl_overall_good_acc <- (tbl_good_catch_acc_all_main_acc_NA_conditions_removed$sum / tbl_good_catch_acc_all_main_acc_NA_subjs_removed_counts$sum)
tbl_overall_good_acc <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_NA_subjs_removed_counts[,1], tbl_overall_good_acc)
colnames(tbl_overall_good_acc) <- c("workerId", "ACC")
tbl_overall_good_acc %>%
ggbarplot(y = "ACC", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", add = "mean_se", ylim = c(0, 1), xlab = "Group", width = 0.5, label = TRUE, lab.nb.digits = 2, lab.vjust = -1, title = "Main Trial Accuracy")
Look at the overall accuracy at the group level (collasped across workerId and condition) over time.
tbl_good_no_NA_bin <- tbl_good_catch_acc_all_main_acc_NA_subjs_removed %>%
group_by(workerId,validity,bin) %>%
filter(validity=='valid_same' | validity=='invalid_same' | validity=='invalid_different' | validity=='invalid_far') %>%
dplyr::summarize(counts = n()) %>%
spread(validity,counts)
tbl_good_no_NA_bin$sum = rowSums(tbl_good_no_NA_bin[,c(-1:-2)], na.rm = TRUE)
#head(tbl_good_no_NA_bin,10)
tbl_overall_good_acc_bin <- (tbl_good_catch_acc_all_main_acc_NA_conditions_removed_bin$sum / tbl_good_no_NA_bin$sum)
tbl_overall_good_acc_bin <- cbind.data.frame(tbl_good_no_NA_bin[,1:2], tbl_overall_good_acc_bin)
colnames(tbl_overall_good_acc_bin) <- c("workerId", "bin", "ACC")
tbl_overall_good_acc_bin %>%
ggbarplot(y = "ACC", x = "bin", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", add = "mean_se", ylim = c(0, 1), xlab = " Bin", width = 0.5, label = TRUE, lab.nb.digits = 2, lab.vjust = -1, title = "Main Trial Accuracy Over Time", na.rm = TRUE)
Here are some descriptive and inferential statistics for the effect of accuracy over time.
tbl_overall_good_acc_bin %>%
group_by(bin) %>%
get_summary_stats(ACC, type = "mean_se")
## # A tibble: 4 x 5
## bin variable n mean se
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 block_1 ACC 29 0.887 0.02
## 2 block_2 ACC 29 0.902 0.018
## 3 block_3 ACC 29 0.924 0.021
## 4 block_4 ACC 29 0.922 0.016
tbl_overall_good_acc_bin %>%
group_by(bin) %>%
identify_outliers(ACC)
## # A tibble: 4 x 5
## bin workerId ACC is.outlier is.extreme
## <chr> <dbl> <dbl> <lgl> <lgl>
## 1 block_3 202822 0.714 TRUE FALSE
## 2 block_3 204847 0.737 TRUE FALSE
## 3 block_3 204859 0.55 TRUE TRUE
## 4 block_4 204859 0.667 TRUE FALSE
res.aov <- anova_test(data = tbl_overall_good_acc_bin, dv = ACC, wid = workerId, within = bin)
get_anova_table(res.aov, correction = "none")
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 bin 3 84 1.857 0.143 0.023
pwc <- tbl_overall_good_acc_bin %>%
pairwise_t_test(
ACC ~ bin, paired = TRUE,
p.adjust.method = "bonferroni"
)
pwc
## # A tibble: 6 x 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 ACC block_1 block_2 29 29 -0.747 28 0.461 1 ns
## 2 ACC block_1 block_3 29 29 -1.68 28 0.105 0.63 ns
## 3 ACC block_1 block_4 29 29 -1.85 28 0.074 0.446 ns
## 4 ACC block_2 block_3 29 29 -1.14 28 0.265 1 ns
## 5 ACC block_2 block_4 29 29 -1.54 28 0.135 0.81 ns
## 6 ACC block_3 block_4 29 29 0.122 28 0.903 1 ns
Look at the overall accuracy for the group by validity (valid, invalid-same etc.).
tbl_overall_good_acc_cond <- (tbl_good_catch_acc_all_main_acc_NA_conditions_removed / tbl_good_catch_acc_all_main_acc_NA_subjs_removed_counts)
tbl_overall_good_acc_cond <- cbind.data.frame(tbl_good_catch_acc_all_main_acc_NA_subjs_removed_counts[,1], tbl_overall_good_acc_cond[,2:5])
tbl_overall_good_acc_cond <- gather(tbl_overall_good_acc_cond, validity, acc, valid_same:invalid_far, factor_key=TRUE)
tbl_overall_good_acc_cond %>%
ggbarplot(x = "validity", y = "acc", ylab = "Accuracy", fill = "validity" , color = "validity", palette = c("#0d2240", "#00a8e1", "#f7a800", "#E31818", "#dfdddc"), add = "mean_se", ylim = c(0, 1), na.rm = TRUE, label = TRUE, lab.nb.digits = 2, lab.vjust = c(-1.2, -1.4, -1.4, -1.4), title = "Main Trial Accuracy By Validity", xlab = "Validity")
Here are some descriptive and inferential statistics for the effect of accuracy by validity.
tbl_overall_good_acc_cond %>%
group_by(validity) %>%
get_summary_stats(acc, type = "mean_se")
## # A tibble: 4 x 5
## validity variable n mean se
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 valid_same acc 29 0.932 0.015
## 2 invalid_same acc 29 0.852 0.033
## 3 invalid_different acc 29 0.877 0.03
## 4 invalid_far acc 29 0.847 0.032
tbl_overall_good_acc_cond %>%
group_by(validity) %>%
identify_outliers(acc)
## # A tibble: 2 x 5
## validity workerId acc is.outlier is.extreme
## <fct> <dbl> <dbl> <lgl> <lgl>
## 1 valid_same 203560 0.786 TRUE FALSE
## 2 valid_same 204859 0.625 TRUE TRUE
res.aov <- anova_test(data = tbl_overall_good_acc_cond, dv = acc, wid = workerId, within = validity)
get_anova_table(res.aov, correction = "none")
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 validity 3 84 2.745 0.048 * 0.047
pwc <- tbl_overall_good_acc_cond %>%
pairwise_t_test(
acc ~ validity, paired = TRUE,
p.adjust.method = "bonferroni"
)
pwc
## # A tibble: 6 x 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 acc valid_sa… invalid_… 29 29 2.64 28 0.013 0.08 ns
## 2 acc valid_sa… invalid_… 29 29 1.73 28 0.095 0.569 ns
## 3 acc valid_sa… invalid_… 29 29 2.95 28 0.006 0.038 *
## 4 acc invalid_… invalid_… 29 29 -0.621 28 0.54 1 ns
## 5 acc invalid_… invalid_… 29 29 0.166 28 0.87 1 ns
## 6 acc invalid_… invalid_… 29 29 0.794 28 0.434 1 ns
Third, we can look at the accuracy for each individual subject.
tbl_overall_good_acc %>%
ggbarplot(x = "workerId", y = "ACC", ylab = "Accuracy", fill = "#f7a800", color = "#f7a800", ylim = c(0, 1), title = "Individual Accuracy", sort.val = c("asc")) + rotate_x_text()