1 Set up R environment
2 Read-in datafiles
3 Matching task
4 IB performance
5 Post IB questions
- 5.1 Expecting an unexpected event
- 5.2 Familiarity with IB experiments
6 Predicting IB

Scripts were generated by PsychoPy.

1 Set up R environment

library(tidyverse)
library(ggplot2)
library(ggpubr)
library(plyr)

Set the R working drectory to the main experiment directory.

setwd("/Users/adambarnas/Box/Predicting_IB")

2 Read-in datafiles

Read in the individual subject files.

Get a count of the number of subjects.

nrow(tbl_all %>% distinct(worker_ID,.keep_all = FALSE))

## [1] 119

3 Matching task

tbl_all_matching <- tbl_all %>% 
  filter(grepl('matching', stimFile))
tbl_all_matching = subset(tbl_all_matching, select = -c(Test_IB,Critical_IB,Q1,Q2,Q3,Q4,Q5))
write.csv(tbl_all_matching,'matching_v4.csv', row.names=FALSE)

3.1 Initial RT descriptives

summary(tbl_all_matching$matching_response.rt)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##   0.00074   1.11429   2.06588   2.27119   3.06542 119.35300

3.2 Accuracy

3.2.1 Plot

tbl_all_matching_acc <- tbl_all_matching %>%
  group_by(worker_ID,matching_response.corr) %>%
  dplyr::summarize(counts = n()) %>%
  spread(matching_response.corr,counts) %>% 
  mutate(total = rowSums(.[2:3], na.rm = TRUE))
colnames(tbl_all_matching_acc) <- c("worker_ID", "inacc", "acc", "total")
tbl_all_matching_acc[is.na(tbl_all_matching_acc)] <- 0
tbl_all_matching_acc$rate <- tbl_all_matching_acc$acc / tbl_all_matching_acc$total

tbl_all_matching_acc %>%
  ggbarplot("worker_ID", "rate", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, ylab = "Accuracy", ylim = c(0, 1), position = position_dodge(0.8)) + rotate_x_text() + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

tbl_all_matching_acc %>%
  ggbarplot(y = "rate", add = "mean_se",fill = "#0d2240", color = "#0d2240", ylab = "Accuracy", xlab = "Matching task", ticks = FALSE, ylim = c(0, 1), position = position_dodge(0.8), label = TRUE, lab.vjust = -2, lab.nb.digits = 2) + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

matching_chance <-t.test(tbl_all_matching_acc$rate, mu = .50, alternative="greater")
matching_chance

## 
##  One Sample t-test
## 
## data:  tbl_all_matching_acc$rate
## t = 12.464, df = 118, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0.5
## 95 percent confidence interval:
##  0.6426167       Inf
## sample estimates:
## mean of x 
## 0.6644958

3.2.2 Remove subjects below chance and replot

tbl_all_matching_acc <- subset(tbl_all_matching_acc, select = c(worker_ID, rate)) %>% 
  filter(rate > 0.5)

#tbl_all_matching_acc_bad <- subset(tbl_all_matching_acc, select = c(worker_ID, rate)) %>% 
#  filter(rate <= 0.5)

#tbl_all_matching_acc <- subset(tbl_all_matching_acc, select = c(worker_ID, rate))

tbl_all_matching_acc %>%
  ggbarplot("worker_ID", "rate", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, ylab = "Accuracy", ylim = c(0, 1), position = position_dodge(0.8)) + rotate_x_text() + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

tbl_all_matching_acc %>%
  ggbarplot(y = "rate", add = "mean_se",fill = "#0d2240", color = "#0d2240", ylab = "Accuracy", xlab = "Matching task", ticks = FALSE, ylim = c(0, 1), position = position_dodge(0.8), label = TRUE, lab.vjust = -2, lab.nb.digits = 2) + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

matching_chance <-t.test(tbl_all_matching_acc$rate, mu = .50, alternative="greater")
matching_chance

## 
##  One Sample t-test
## 
## data:  tbl_all_matching_acc$rate
## t = 18.925, df = 96, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0.5
## 95 percent confidence interval:
##  0.693733      Inf
## sample estimates:
## mean of x 
## 0.7123711

3.2.3 Count subjects

nrow(tbl_all_matching_acc %>% distinct(worker_ID,.keep_all = FALSE))

## [1] 97

3.3 RTs

3.3.1 Plot

tbl_all_matching_rts <- tbl_all_matching[(tbl_all_matching$worker_ID %in% tbl_all_matching_acc$worker_ID),] %>% 
  filter(matching_response.corr == 1)

tbl_all_matching_rts <- tbl_all_matching[(tbl_all_matching$worker_ID %in% tbl_all_matching_acc$worker_ID),]

tbl_all_matching_rts %>%
  ggbarplot("worker_ID", "matching_response.rt", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, add = "median", position = position_dodge(0.8), ylab = "Median RT (sec)", ylim = c(0,6)) + rotate_x_text() + theme(legend.position = "none") + geom_hline(yintercept = .2, linetype = 2)

tbl_all_matching_rts %>%
  ggbarplot(y = "matching_response.rt", add = "median", fill = "#0d2240", color = "#0d2240", position = position_dodge(0.8), ylab = "Median RT (sec)", xlab = "Matching task", ylim = c(0,5), label = TRUE, lab.vjust = -1, lab.nb.digits = 2) + theme(legend.position = "none")

tbl_all_matching_rts_median <- tbl_all_matching_rts %>%
  group_by(worker_ID) %>%
  dplyr::summarize(median_rt = median(matching_response.rt, na.rm=TRUE))

3.3.2 Remove subjects with median RT below 200 ms and replot

tbl_all_matching_rts_median <- tbl_all_matching_rts_median %>% 
  filter(median_rt > .2)

#tbl_all_matching_rts_median <- tbl_all_matching_rts_median

tbl_all_matching_rts_median_above_200ms <- tbl_all_matching_rts[(tbl_all_matching_rts$worker_ID %in% tbl_all_matching_rts_median$worker_ID),]

tbl_all_matching_rts_median_above_200ms %>%
  ggbarplot("worker_ID", "matching_response.rt", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, add = "median", position = position_dodge(0.8), ylab = "Median RT (sec)", ylim = c(0,5)) + rotate_x_text() + theme(legend.position = "none") + geom_hline(yintercept = .2, linetype = 2)

tbl_all_matching_rts_median_above_200ms %>%
  ggbarplot(y = "matching_response.rt", add = "median", fill = "#0d2240", color = "#0d2240", position = position_dodge(0.8), ylab = "Median RT (sec)", xlab = "Matching task", ylim = c(0,5), label = TRUE, lab.vjust = -1, lab.nb.digits = 2) + theme(legend.position = "none")

3.3.3 Count subjects

nrow(tbl_all_matching_rts_median %>% distinct(worker_ID,.keep_all = FALSE))

## [1] 93

3.4 Final RT descriptives

summary(tbl_all_matching_rts_median_above_200ms$matching_response.rt)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.009   1.573   2.397   2.582   3.295  30.932

3.5 Speed-accuracy trade-off

Should see a flat line or a negative correlation.

#tbl_speed_acc_trade_off <- cbind.data.frame(tbl_all_matching_acc, tbl_all_matching_rts_median[2])
tbl_speed_acc_trade_off <- full_join(tbl_all_matching_acc, tbl_all_matching_rts_median, by = "worker_ID") %>% 
  drop_na()
tbl_speed_acc_trade_off %>% 
  ggscatter("rate", "median_rt", add = "reg.line", add.params = list(color = "#f7a800", fill = "lightgray"), xlim = c(0.5,1), conf.int = TRUE, cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.sep = "\n"))

## `geom_smooth()` using formula 'y ~ x'

4 IB performance

tbl_all_IB <- tbl_all[(tbl_all$worker_ID %in% tbl_all_matching_rts_median$worker_ID),]

tbl_all_IB = subset(tbl_all_IB, select = c(worker_ID,stimFile,Test_IB,Critical_IB,Q1,Q2,Q3,Q4,Q5)) %>% 
  filter(!grepl('matching', stimFile))

nrow(tbl_all_IB %>% distinct(worker_ID,.keep_all = FALSE))

## [1] 93

4.1 Counting task

All subjects

tbl_all_counting = subset(tbl_all_IB, select = -c(Q1,Q2,Q3,Q4,Q5)) %>% 
  filter(grepl('.mp4', stimFile))

tbl_all_counting$IB <- ifelse(!is.na(tbl_all_counting$Test_IB), tbl_all_counting$Test_IB, tbl_all_counting$Critical_IB)

tbl_all_counting$IB[is.na(tbl_all_counting$IB)] <- 0

tbl_counts <- read_csv("./IB_stims/IB_counting.csv")

tbl_counts_comparison <- full_join(tbl_all_counting, tbl_counts, by = "stimFile") %>% 
  filter(!is.na(worker_ID))

tbl_counts_comparison$IB  <- as.numeric(tbl_counts_comparison$IB)
tbl_counts_comparison$count  <- as.numeric(tbl_counts_comparison$count)

tbl_counts_comparison$error <- (abs(tbl_counts_comparison$IB - tbl_counts_comparison$count) / tbl_counts_comparison$count) * 100

tbl_counts_comparison %>%
  ggbarplot("worker_ID", "error", fill = "#f7a800", color = "#f7a800", font.xtickslab = 4, ylim = c(0,200), add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task") + rotate_x_text()

Remove subjects with an average error greater than 100%

tbl_counts_comparison_average <- tbl_counts_comparison %>% 
  group_by(worker_ID) %>% 
  dplyr::summarize(mean_error = mean(error, na.rm=TRUE))

tbl_counts_comparison_average <- tbl_counts_comparison_average %>%
  filter(mean_error <= 150)
  
tbl_counts_comparison_average %>%
  ggbarplot(y = "mean_error", fill = "#f7a800", color = "#f7a800", ylim = c(0,100), sort.val = c("asc") ,xlab = "Group", add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task")

error <-t.test(tbl_counts_comparison_average$mean_error, mu = 0, alternative="greater")
error

## 
##  One Sample t-test
## 
## data:  tbl_counts_comparison_average$mean_error
## t = 10.973, df = 89, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  31.63532      Inf
## sample estimates:
## mean of x 
##  37.28258

write.csv(tbl_counts_comparison,'IB_counting.csv', row.names=FALSE)

tbl_counts_comparison_average %>% 
  ggplot(aes(x=mean_error)) + geom_histogram(aes(y=..density..), color="black", fill="#f7a800", bins = 10) +geom_density(alpha=.2, fill="white") + geom_vline(aes(xintercept=mean(mean_error)), color="blue", linetype="dashed", size=1)

4.1.1 Remove subjects greater than 40%

tbl_counts_comparison_average_good <- tbl_counts_comparison_average %>% 
  filter(mean_error < 40)

#tbl_counts_comparison_average_bad <- tbl_counts_comparison_average %>% 
#  filter(mean_error > 40)

#tbl_counts_comparison_average_good <- tbl_counts_comparison_average

tbl_counts_comparison_good <- tbl_counts_comparison[(tbl_counts_comparison$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]

tbl_counts_comparison_good %>%
  ggbarplot("worker_ID", "error", fill = "#f7a800", color = "#f7a800", ylim = c(0,100), font.xtickslab = 6, add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task") + rotate_x_text()

tbl_counts_comparison_average_good %>%
  ggbarplot(y = "mean_error", fill = "#f7a800", color = "#f7a800", ylim = c(0,100), sort.val = c("asc") ,xlab = "Group", add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task")

error_good <-t.test(tbl_counts_comparison_average_good$mean_error, mu = 0, alternative="greater")
error_good

## 
##  One Sample t-test
## 
## data:  tbl_counts_comparison_average_good$mean_error
## t = 10.247, df = 51, p-value = 2.787e-14
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  10.617    Inf
## sample estimates:
## mean of x 
##  12.69199

tbl_counts_comparison_average_good %>% 
ggplot(aes(x=mean_error)) + geom_histogram(aes(y=..density..), color="black", fill="#f7a800", bins = 10)+geom_density(alpha=.2, fill="white")

4.1.2 Count subjects

nrow(tbl_counts_comparison_average_good %>% distinct(worker_ID,.keep_all = FALSE))

## [1] 52

4.2 Noticers vs. Non-noticers

tbl_all_notice <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_notice = subset(tbl_all_notice, select = c(worker_ID,Q1))
tbl_all_notice <- tbl_all_notice[!apply(is.na(tbl_all_notice) | tbl_all_notice$Q1 == "", 1, all),]
write.csv(tbl_all_notice,'IB_notice.csv', row.names=FALSE)

tbl_all_notice <- read_csv("IB_notice_2.csv")

## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q1 = col_character(),
##   noticer_or_nonnoticer = col_double()
## )

knitr::kable(tbl_all_notice)

worker_ID	Q1	noticer_or_nonnoticer
A1198W1SPF1R4	I did not notice anything different.	0
A11S8IAAVDXCUS	im not sure what was different	0
A1561P9VVA3C1C	no	0
A16335MOISDG1F	no	0
A1721FMJBGZVYE	there was a grey cross going side to side	1
A1BWS5AD2T4NIR	There appeared to be a red character.	0
A1C5SQZ045W0L5	did not notice	0
A1C7XI68SED8JE	a gray cross	1
A1EFKRW7L4COZZ	There is no different.	0
A1GOP4VZYFUSXV	no	0
A1H5Q9HRH4RPZU	no	0
A1HX76XWAQ1EJV	in the last task i was a plus symbol on the code	1
A1IAMNSZB9U4Z1	I noticed a gray colored letter boucing across the screen	1
A1IZ6M6NJ4WKFV	17	0
A1PVBCIZ867MXP	it went faster	0
A1RKRL7971AMD	NA	0
A1TR615HGENX1M	a dark grey cross moving horizontally across the line	1
A1XVEKS9O73ERE	I did not	0
A219Y0MX694TM2	No	0
A21HUPW67XA7UJ	they were completely different	0
A21Q7FCKM86VL8	I did not notice anything	0
A233GOCNYJF1N2	plus symbol appears in the line	1
A277UVT82WHF65	no	0
A2BY3NK0YQO891	yes i noticed that there was a shape that looked like a cross which was moving along the center of the line	1
A2E0LU8V4EUX5C	looked same	0
A2FMFDML1VV3R5	maybe the letters were moving more quickly	0
A2KBTDHM44J7X4	Ther wass a crosshair that moved across the screen	1
A2OFN0A5CPLH57	no	0
A2OROMUVBD1NVC	NA	0
A2QXAZLMU2P0QO	I did not notice anything different	0
A2VA0NCPWJ8XZG	there was a gray cross moving along the center line	1
A394JO4NEPCY3M	no I did not	0
A3DCRZTX0HIHBJ	something was red	0
A3FF5CCILJAWYT	The movemen of letters was paused for some period of time	0
A3JXQ4WFIJSQ3D	a plus sign moved across the horizontal line	1
A3KF6O09H04SP7	There was some kind of shape or letter that was differencomma but I was so focused on the counting that I didnapostrophet really see what it was	0
A3NEJLDF75PC11	moving a lot	0
A3NMQ3019X6YE0	no	0
A3SFNY43VM330P	No	0
A3V8C77FN4FLUA	no	0
A4E1UYPDHE8D8	A grey plus character moved across the center line coming from the right	1
A8E7IOAZRC2IC	Nothing noticed	0
A96CZ20YAZKZ8	plus sign	1
A98E8M4QLI9RS	no	0
AASXXNW6FJ76I	movement	0
AB09F0Q4T10TE	I did not notice anything different.	0
AD15OW944ZV20	There was a plus sign going right to left through the center	1
AK3H5QRAROFGP	no	0
AR39OP4746H4F	I did not notice a difference	0
ASFBN5SC3SGDG	No.	0
ASVRLMDNQBUD9	a grey plus sign crossed the screen	1
AWVLT2L5AP873	no i didnt notice anything	0

table(tbl_all_notice$noticer_or_nonnoticer)

## 
##  0  1 
## 38 14

4.3 Unexpected item description

Noticers: describe the shape correctly even if they don’t describe how it moves. Non-noticers: describe the shape correctly but describe the wrong movement.

tbl_all_item <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_item = subset(tbl_all_item, select = c(worker_ID,Q2))
tbl_all_item <- tbl_all_item[!apply(is.na(tbl_all_item) | tbl_all_item$Q2 == "", 1, all),]
write.csv(tbl_all_item,'IB_item.csv', row.names=FALSE)

tbl_all_item <- read_csv("IB_item_2.csv")

## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q2 = col_character(),
##   noticer_or_nonnoticer = col_double()
## )

knitr::kable(tbl_all_item)

worker_ID	Q2	noticer_or_nonnoticer
A1198W1SPF1R4	x	0
A11S8IAAVDXCUS	I might have seen the plus sign	1
A1561P9VVA3C1C	x	0
A16335MOISDG1F	no	0
A1721FMJBGZVYE	+	1
A1BWS5AD2T4NIR	no I did not notice any of these characters	0
A1C5SQZ045W0L5	no idea. pehaps the +	1
A1C7XI68SED8JE	LEFTleft	0
A1EFKRW7L4COZZ	+	1
A1GOP4VZYFUSXV	+	1
A1H5Q9HRH4RPZU	i did not	0
A1HX76XWAQ1EJV	+	1
A1IAMNSZB9U4Z1	I think it was the + sign or the letter L	1
A1IZ6M6NJ4WKFV	E	0
A1PVBCIZ867MXP	e	0
A1RKRL7971AMD	+	1
A1TR615HGENX1M	+	1
A1XVEKS9O73ERE	E	0
A219Y0MX694TM2	No	0
A21HUPW67XA7UJ	+	1
A21Q7FCKM86VL8	e it moved in straight line	0
A233GOCNYJF1N2	+	1
A277UVT82WHF65	no	0
A2BY3NK0YQO891	yes i noticed the plus sign	1
A2E0LU8V4EUX5C	not notice	0
A2FMFDML1VV3R5	+	1
A2KBTDHM44J7X4	left	0
A2OFN0A5CPLH57	E moved left to right	0
A2OROMUVBD1NVC	NA	0
A2QXAZLMU2P0QO	I did not see those	0
A2VA0NCPWJ8XZG	gray cross	1
A394JO4NEPCY3M	x	0
A3DCRZTX0HIHBJ	x I dont know	0
A3FF5CCILJAWYT	+	1
A3JXQ4WFIJSQ3D	yes th plus sign moved on horizontal	1
A3KF6O09H04SP7	E	0
A3NEJLDF75PC11	E	0
A3NMQ3019X6YE0	e	0
A3SFNY43VM330P	X	0
A3V8C77FN4FLUA	x	0
A4E1UYPDHE8D8	#NAME?	0
A8E7IOAZRC2IC	Not noticed	0
A96CZ20YAZKZ8	+	1
A98E8M4QLI9RS	#NAME?	0
AASXXNW6FJ76I	+	1
AB09F0Q4T10TE	No I was focused on the white items.	0
AD15OW944ZV20	yes. The Plus sign.	1
AK3H5QRAROFGP	+	1
AR39OP4746H4F	x	0
ASFBN5SC3SGDG	+	1
ASVRLMDNQBUD9	+	1
AWVLT2L5AP873	i didnt notice these at all	0

table(tbl_all_item$noticer_or_nonnoticer)

## 
##  0  1 
## 30 22

5 Post IB questions

5.1 Expecting an unexpected event

tbl_all_expecting <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_expecting = subset(tbl_all_expecting, select = c(worker_ID,Q4))
tbl_all_expecting <- tbl_all_expecting[!apply(is.na(tbl_all_expecting) | tbl_all_expecting$Q4 == "", 1, all),]
tbl_all_expecting <- data.frame(lapply(tbl_all_expecting, function(v) {
  if (is.character(v)) return(toupper(v))
  else return(v)
}))
write.csv(tbl_all_expecting,'IB_expecting.csv', row.names=FALSE)

tbl_all_expecting <- read_csv("IB_expecting_2.csv")

## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q4 = col_character(),
##   expecting_unexpected = col_character()
## )

knitr::kable(tbl_all_expecting)

worker_ID	Q4	expecting_unexpected
A1198W1SPF1R4	NO	NO
A11S8IAAVDXCUS	NO I DIDNT EXPECT ANYTHING TO APPEAR UNEXPECTEDLY	NO
A1561P9VVA3C1C	NO	NO
A16335MOISDG1F	NO	NO
A1721FMJBGZVYE	NO	NO
A1BWS5AD2T4NIR	NO	NO
A1C5SQZ045W0L5	NO	NO
A1C7XI68SED8JE	NO	NO
A1EFKRW7L4COZZ	NO	NO
A1GOP4VZYFUSXV	NO	NO
A1H5Q9HRH4RPZU	NO	NO
A1HX76XWAQ1EJV	NO	NO
A1IAMNSZB9U4Z1	NO	NO
A1IZ6M6NJ4WKFV	NO	NO
A1PVBCIZ867MXP	NO	NO
A1RKRL7971AMD	YES	YES
A1TR615HGENX1M	NO	NO
A1XVEKS9O73ERE	NO	NO
A219Y0MX694TM2	NO	NO
A21HUPW67XA7UJ	YES	YES
A21Q7FCKM86VL8	NO.	NO
A233GOCNYJF1N2	YES	YES
A277UVT82WHF65	N	NO
A2BY3NK0YQO891	NO	NO
A2E0LU8V4EUX5C	YS	YES
A2FMFDML1VV3R5	NO	NO
A2KBTDHM44J7X4	NO	NO
A2OFN0A5CPLH57	NO	NO
A2OROMUVBD1NVC	NO	NO
A2QXAZLMU2P0QO	NO	NO
A2VA0NCPWJ8XZG	YES	YES
A394JO4NEPCY3M	NO	NO
A3DCRZTX0HIHBJ	NO	NO
A3FF5CCILJAWYT	NO	NO
A3JXQ4WFIJSQ3D	NO	NO
A3KF6O09H04SP7	NO	NO
A3NEJLDF75PC11	NO	NO
A3NMQ3019X6YE0	NO	NO
A3SFNY43VM330P	NO	NO
A3V8C77FN4FLUA	NO	NO
A4E1UYPDHE8D8	NO	NO
A8E7IOAZRC2IC	NO	NO
A96CZ20YAZKZ8	NO	NO
A98E8M4QLI9RS	NO	NO
AASXXNW6FJ76I	YES	YES
AB09F0Q4T10TE	NO.	NO
AD15OW944ZV20	NO	NO
AK3H5QRAROFGP	NO	NO
AR39OP4746H4F	NO	NO
ASFBN5SC3SGDG	NO	NO
ASVRLMDNQBUD9	NO	NO
AWVLT2L5AP873	NO	NO

table(tbl_all_expecting$expecting_unexpected)

## 
##  NO YES 
##  46   6

5.2 Familiarity with IB experiments

tbl_all_familiarity <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_familiarity = subset(tbl_all_familiarity, select = c(worker_ID,Q5))
tbl_all_familiarity <- tbl_all_familiarity[!apply(is.na(tbl_all_familiarity) | tbl_all_familiarity$Q5 == "", 1, all),]
tbl_all_familiarity <- data.frame(lapply(tbl_all_familiarity, function(v) {
  if (is.character(v)) return(toupper(v))
  else return(v)
}))
write.csv(tbl_all_familiarity,'IB_familiarity.csv', row.names=FALSE)

tbl_all_familiarity <- read_csv("IB_familiarity_2.csv")

## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q5 = col_character(),
##   familiarity = col_character()
## )

knitr::kable(tbl_all_familiarity)

worker_ID	Q5	familiarity
A1198W1SPF1R4	NO	NO
A11S8IAAVDXCUS	NO	NO
A1561P9VVA3C1C	NO	NO
A16335MOISDG1F	NO	NO
A1721FMJBGZVYE	NO	NO
A1BWS5AD2T4NIR	NO	NO
A1C5SQZ045W0L5	NO	NO
A1C7XI68SED8JE	YES	YES
A1EFKRW7L4COZZ	NO	NO
A1GOP4VZYFUSXV	NO	NO
A1H5Q9HRH4RPZU	NO	NO
A1HX76XWAQ1EJV	NO	NO
A1IAMNSZB9U4Z1	NO	NO
A1IZ6M6NJ4WKFV	NO	NO
A1PVBCIZ867MXP	NO	NO
A1RKRL7971AMD	NO	NO
A1TR615HGENX1M	YES	YES
A1XVEKS9O73ERE	NO	NO
A219Y0MX694TM2	NO	NO
A21HUPW67XA7UJ	NO	NO
A21Q7FCKM86VL8	YES.	YES
A233GOCNYJF1N2	NO	NO
A277UVT82WHF65	NO	NO
A2BY3NK0YQO891	NO	NO
A2E0LU8V4EUX5C	NO	NO
A2FMFDML1VV3R5	NO	NO
A2KBTDHM44J7X4	YES	YES
A2OFN0A5CPLH57	NO	NO
A2OROMUVBD1NVC	YES	YES
A2QXAZLMU2P0QO	NO	NO
A2VA0NCPWJ8XZG	YES	YES
A394JO4NEPCY3M	YES	YES
A3DCRZTX0HIHBJ	YES	YES
A3FF5CCILJAWYT	NO	NO
A3JXQ4WFIJSQ3D	YES	YES
A3KF6O09H04SP7	YES	YES
A3NEJLDF75PC11	NO	NO
A3NMQ3019X6YE0	NO	NO
A3SFNY43VM330P	YES	YES
A3V8C77FN4FLUA	YES	YES
A4E1UYPDHE8D8	YES	YES
A8E7IOAZRC2IC	NO	NO
A96CZ20YAZKZ8	NO	NO
A98E8M4QLI9RS	NO	NO
AASXXNW6FJ76I	NO	NO
AB09F0Q4T10TE	NO.	NO
AD15OW944ZV20	NO	NO
AK3H5QRAROFGP	NO	NO
AR39OP4746H4F	NO	NO
ASFBN5SC3SGDG	YES	YES
ASVRLMDNQBUD9	YES	YES
AWVLT2L5AP873	YES	YES

table(tbl_all_familiarity$familiarity)

## 
##  NO YES 
##  36  16

6 Predicting IB

tbl_median_rts <- tbl_all_matching_rts_median[(tbl_all_matching_rts_median$worker_ID %in% tbl_all_item$worker_ID),]
tbl_acc <- tbl_all_matching_acc[(tbl_all_matching_acc$worker_ID %in% tbl_all_item$worker_ID),]
tbl_log_reg <- cbind.data.frame(tbl_all_item, tbl_median_rts[2], tbl_acc[2], tbl_counts_comparison_average_good[2])
names(tbl_log_reg)[names(tbl_log_reg)=="noticer_or_nonnoticer"] <- "notice"
tbl_log_reg$notice <- as.numeric(tbl_log_reg$notice)

#write.csv(tbl_log_reg,'predicting_IB_v4.csv', row.names=FALSE)
write.csv(tbl_log_reg,'predicting_IB_v4_clean.csv', row.names=FALSE)

6.1 Logistic regression w/ matching median RT

log_reg_matching_median <- glm(notice ~ median_rt, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median)

## 
## Call:
## glm(formula = notice ~ median_rt, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4292  -1.0513  -0.8556   1.2782   1.5084  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -1.2871     0.9562  -1.346    0.178
## median_rt     0.3528     0.3280   1.076    0.282
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 69.656  on 50  degrees of freedom
## AIC: 73.656
## 
## Number of Fisher Scoring iterations: 4

plot_matching_median <- ggplot(tbl_log_reg, aes(x=median_rt, y=notice)) + geom_point() + stat_smooth(method="glm", method.args=list(family="binomial"), se=TRUE, color="#f7a800") + theme_classic((base_size = 15))
suppressMessages(print(plot_matching_median))

6.2 Logistic regression w/ matching accuracy

log_reg_matching_acc <- glm(notice ~ rate, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_acc)

## 
## Call:
## glm(formula = notice ~ rate, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1733  -1.0471  -0.9809   1.3006   1.4051  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.8945     2.5395   0.352    0.725
## rate         -1.5727     3.2983  -0.477    0.633
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 70.623  on 50  degrees of freedom
## AIC: 74.623
## 
## Number of Fisher Scoring iterations: 4

plot_matching_median <- ggplot(tbl_log_reg, aes(x=rate, y=notice)) + geom_point() + stat_smooth(method="glm", method.args=list(family="binomial"), se=TRUE, color="#f7a800") + theme_classic((base_size = 15))
suppressMessages(print(plot_matching_median))

6.3 Logistic regression w/ matching median rt and accuracy

log_reg_matching_median_acc <- glm(notice ~ median_rt + rate, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median_acc)

## 
## Call:
## glm(formula = notice ~ median_rt + rate, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6262  -1.0114  -0.8838   1.2450   1.5633  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   1.1462     2.6049   0.440    0.660
## median_rt     0.5017     0.3692   1.359    0.174
## rate         -3.7084     3.7396  -0.992    0.321
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 68.645  on 49  degrees of freedom
## AIC: 74.645
## 
## Number of Fisher Scoring iterations: 4

6.4 Logistic regressions controlling for IB error

6.4.1 Logistic regression w/ matching median RT

log_reg_matching_median_error <- glm(notice ~ median_rt + mean_error, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median_error)

## 
## Call:
## glm(formula = notice ~ median_rt + mean_error, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3037  -1.0684  -0.8206   1.2194   1.7406  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.86832    1.06730  -0.814    0.416
## median_rt    0.33231    0.32924   1.009    0.313
## mean_error  -0.02921    0.03402  -0.859    0.391
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 68.883  on 49  degrees of freedom
## AIC: 74.883
## 
## Number of Fisher Scoring iterations: 4

6.4.2 Logistic regression w/ matching accuracy

log_reg_matching_acc_error <- glm(notice ~ rate + mean_error, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_acc_error)

## 
## Call:
## glm(formula = notice ~ rate + mean_error, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2427  -1.0614  -0.8706   1.1768   1.7661  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  3.02256    3.07831   0.982    0.326
## rate        -3.59028    3.70856  -0.968    0.333
## mean_error  -0.04679    0.03777  -1.239    0.215
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 68.975  on 49  degrees of freedom
## AIC: 74.975
## 
## Number of Fisher Scoring iterations: 4

6.4.3 Logistic regression w/ matching median RT and accuracy

log_reg_matching_median_acc_error <- glm(notice ~ median_rt + rate + mean_error, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median_acc_error)

## 
## Call:
## glm(formula = notice ~ median_rt + rate + mean_error, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5186  -1.0235  -0.7296   1.0893   1.7713  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  3.63705    3.19948   1.137    0.256
## median_rt    0.55852    0.37256   1.499    0.134
## rate        -6.30092    4.26076  -1.479    0.139
## mean_error  -0.05342    0.03868  -1.381    0.167
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 66.568  on 48  degrees of freedom
## AIC: 74.568
## 
## Number of Fisher Scoring iterations: 4

Matching Task Predicting Sustained Inattentional Blindness

Adam Barnas

Last compiled at 4:59 PM on September 04, 2020