Scripts were generated by PsychoPy.

1 Set up R environment

library(tidyverse)
library(ggplot2)
library(ggpubr)
library(plyr)

Set the R working drectory to the main experiment directory.

setwd("/Users/adambarnas/Box/Predicting_IB")  

2 Read-in datafiles

Read in the individual subject files.

Get a count of the number of subjects.

nrow(tbl_all %>% distinct(worker_ID,.keep_all = FALSE))
## [1] 119

3 Matching task

tbl_all_matching <- tbl_all %>% 
  filter(grepl('matching', stimFile))
tbl_all_matching = subset(tbl_all_matching, select = -c(Test_IB,Critical_IB,Q1,Q2,Q3,Q4,Q5))
write.csv(tbl_all_matching,'matching_v4.csv', row.names=FALSE)

3.1 Initial RT descriptives

summary(tbl_all_matching$matching_response.rt)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##   0.00074   1.11429   2.06588   2.27119   3.06542 119.35300

3.2 Accuracy

3.2.1 Plot

tbl_all_matching_acc <- tbl_all_matching %>%
  group_by(worker_ID,matching_response.corr) %>%
  dplyr::summarize(counts = n()) %>%
  spread(matching_response.corr,counts) %>% 
  mutate(total = rowSums(.[2:3], na.rm = TRUE))
colnames(tbl_all_matching_acc) <- c("worker_ID", "inacc", "acc", "total")
tbl_all_matching_acc[is.na(tbl_all_matching_acc)] <- 0
tbl_all_matching_acc$rate <- tbl_all_matching_acc$acc / tbl_all_matching_acc$total

tbl_all_matching_acc %>%
  ggbarplot("worker_ID", "rate", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, ylab = "Accuracy", ylim = c(0, 1), position = position_dodge(0.8)) + rotate_x_text() + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

tbl_all_matching_acc %>%
  ggbarplot(y = "rate", add = "mean_se",fill = "#0d2240", color = "#0d2240", ylab = "Accuracy", xlab = "Matching task", ticks = FALSE, ylim = c(0, 1), position = position_dodge(0.8), label = TRUE, lab.vjust = -2, lab.nb.digits = 2) + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

matching_chance <-t.test(tbl_all_matching_acc$rate, mu = .50, alternative="greater")
matching_chance
## 
##  One Sample t-test
## 
## data:  tbl_all_matching_acc$rate
## t = 12.464, df = 118, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0.5
## 95 percent confidence interval:
##  0.6426167       Inf
## sample estimates:
## mean of x 
## 0.6644958

3.2.2 Remove subjects below chance and replot

tbl_all_matching_acc <- subset(tbl_all_matching_acc, select = c(worker_ID, rate)) %>% 
  filter(rate > 0.5)

#tbl_all_matching_acc_bad <- subset(tbl_all_matching_acc, select = c(worker_ID, rate)) %>% 
#  filter(rate <= 0.5)

#tbl_all_matching_acc <- subset(tbl_all_matching_acc, select = c(worker_ID, rate))

tbl_all_matching_acc %>%
  ggbarplot("worker_ID", "rate", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, ylab = "Accuracy", ylim = c(0, 1), position = position_dodge(0.8)) + rotate_x_text() + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

tbl_all_matching_acc %>%
  ggbarplot(y = "rate", add = "mean_se",fill = "#0d2240", color = "#0d2240", ylab = "Accuracy", xlab = "Matching task", ticks = FALSE, ylim = c(0, 1), position = position_dodge(0.8), label = TRUE, lab.vjust = -2, lab.nb.digits = 2) + geom_hline(yintercept = .5, linetype = 2) + theme(legend.position = "none")

matching_chance <-t.test(tbl_all_matching_acc$rate, mu = .50, alternative="greater")
matching_chance
## 
##  One Sample t-test
## 
## data:  tbl_all_matching_acc$rate
## t = 18.925, df = 96, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0.5
## 95 percent confidence interval:
##  0.693733      Inf
## sample estimates:
## mean of x 
## 0.7123711

3.2.3 Count subjects

nrow(tbl_all_matching_acc %>% distinct(worker_ID,.keep_all = FALSE))
## [1] 97

3.3 RTs

3.3.1 Plot

tbl_all_matching_rts <- tbl_all_matching[(tbl_all_matching$worker_ID %in% tbl_all_matching_acc$worker_ID),] %>% 
  filter(matching_response.corr == 1)

tbl_all_matching_rts <- tbl_all_matching[(tbl_all_matching$worker_ID %in% tbl_all_matching_acc$worker_ID),]

tbl_all_matching_rts %>%
  ggbarplot("worker_ID", "matching_response.rt", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, add = "median", position = position_dodge(0.8), ylab = "Median RT (sec)", ylim = c(0,6)) + rotate_x_text() + theme(legend.position = "none") + geom_hline(yintercept = .2, linetype = 2)

tbl_all_matching_rts %>%
  ggbarplot(y = "matching_response.rt", add = "median", fill = "#0d2240", color = "#0d2240", position = position_dodge(0.8), ylab = "Median RT (sec)", xlab = "Matching task", ylim = c(0,5), label = TRUE, lab.vjust = -1, lab.nb.digits = 2) + theme(legend.position = "none")

tbl_all_matching_rts_median <- tbl_all_matching_rts %>%
  group_by(worker_ID) %>%
  dplyr::summarize(median_rt = median(matching_response.rt, na.rm=TRUE))

3.3.2 Remove subjects with median RT below 200 ms and replot

tbl_all_matching_rts_median <- tbl_all_matching_rts_median %>% 
  filter(median_rt > .2)

#tbl_all_matching_rts_median <- tbl_all_matching_rts_median

tbl_all_matching_rts_median_above_200ms <- tbl_all_matching_rts[(tbl_all_matching_rts$worker_ID %in% tbl_all_matching_rts_median$worker_ID),]

tbl_all_matching_rts_median_above_200ms %>%
  ggbarplot("worker_ID", "matching_response.rt", fill = "#0d2240", color = "#0d2240", font.xtickslab = 4, add = "median", position = position_dodge(0.8), ylab = "Median RT (sec)", ylim = c(0,5)) + rotate_x_text() + theme(legend.position = "none") + geom_hline(yintercept = .2, linetype = 2)

tbl_all_matching_rts_median_above_200ms %>%
  ggbarplot(y = "matching_response.rt", add = "median", fill = "#0d2240", color = "#0d2240", position = position_dodge(0.8), ylab = "Median RT (sec)", xlab = "Matching task", ylim = c(0,5), label = TRUE, lab.vjust = -1, lab.nb.digits = 2) + theme(legend.position = "none")

3.3.3 Count subjects

nrow(tbl_all_matching_rts_median %>% distinct(worker_ID,.keep_all = FALSE))
## [1] 93

3.4 Final RT descriptives

summary(tbl_all_matching_rts_median_above_200ms$matching_response.rt)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.009   1.573   2.397   2.582   3.295  30.932

3.5 Speed-accuracy trade-off

Should see a flat line or a negative correlation.

#tbl_speed_acc_trade_off <- cbind.data.frame(tbl_all_matching_acc, tbl_all_matching_rts_median[2])
tbl_speed_acc_trade_off <- full_join(tbl_all_matching_acc, tbl_all_matching_rts_median, by = "worker_ID") %>% 
  drop_na()
tbl_speed_acc_trade_off %>% 
  ggscatter("rate", "median_rt", add = "reg.line", add.params = list(color = "#f7a800", fill = "lightgray"), xlim = c(0.5,1), conf.int = TRUE, cor.coef = TRUE, cor.coeff.args = list(method = "pearson", label.sep = "\n"))
## `geom_smooth()` using formula 'y ~ x'

4 IB performance

tbl_all_IB <- tbl_all[(tbl_all$worker_ID %in% tbl_all_matching_rts_median$worker_ID),]

tbl_all_IB = subset(tbl_all_IB, select = c(worker_ID,stimFile,Test_IB,Critical_IB,Q1,Q2,Q3,Q4,Q5)) %>% 
  filter(!grepl('matching', stimFile))

nrow(tbl_all_IB %>% distinct(worker_ID,.keep_all = FALSE))
## [1] 93

4.1 Counting task

All subjects

tbl_all_counting = subset(tbl_all_IB, select = -c(Q1,Q2,Q3,Q4,Q5)) %>% 
  filter(grepl('.mp4', stimFile))

tbl_all_counting$IB <- ifelse(!is.na(tbl_all_counting$Test_IB), tbl_all_counting$Test_IB, tbl_all_counting$Critical_IB)

tbl_all_counting$IB[is.na(tbl_all_counting$IB)] <- 0

tbl_counts <- read_csv("./IB_stims/IB_counting.csv")

tbl_counts_comparison <- full_join(tbl_all_counting, tbl_counts, by = "stimFile") %>% 
  filter(!is.na(worker_ID))

tbl_counts_comparison$IB  <- as.numeric(tbl_counts_comparison$IB)
tbl_counts_comparison$count  <- as.numeric(tbl_counts_comparison$count)

tbl_counts_comparison$error <- (abs(tbl_counts_comparison$IB - tbl_counts_comparison$count) / tbl_counts_comparison$count) * 100

tbl_counts_comparison %>%
  ggbarplot("worker_ID", "error", fill = "#f7a800", color = "#f7a800", font.xtickslab = 4, ylim = c(0,200), add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task") + rotate_x_text()

Remove subjects with an average error greater than 100%

tbl_counts_comparison_average <- tbl_counts_comparison %>% 
  group_by(worker_ID) %>% 
  dplyr::summarize(mean_error = mean(error, na.rm=TRUE))

tbl_counts_comparison_average <- tbl_counts_comparison_average %>%
  filter(mean_error <= 150)
  
tbl_counts_comparison_average %>%
  ggbarplot(y = "mean_error", fill = "#f7a800", color = "#f7a800", ylim = c(0,100), sort.val = c("asc") ,xlab = "Group", add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task")

error <-t.test(tbl_counts_comparison_average$mean_error, mu = 0, alternative="greater")
error
## 
##  One Sample t-test
## 
## data:  tbl_counts_comparison_average$mean_error
## t = 10.973, df = 89, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  31.63532      Inf
## sample estimates:
## mean of x 
##  37.28258
write.csv(tbl_counts_comparison,'IB_counting.csv', row.names=FALSE)

tbl_counts_comparison_average %>% 
  ggplot(aes(x=mean_error)) + geom_histogram(aes(y=..density..), color="black", fill="#f7a800", bins = 10) +geom_density(alpha=.2, fill="white") + geom_vline(aes(xintercept=mean(mean_error)), color="blue", linetype="dashed", size=1)

4.1.1 Remove subjects greater than 40%

tbl_counts_comparison_average_good <- tbl_counts_comparison_average %>% 
  filter(mean_error < 40)

#tbl_counts_comparison_average_bad <- tbl_counts_comparison_average %>% 
#  filter(mean_error > 40)

#tbl_counts_comparison_average_good <- tbl_counts_comparison_average

tbl_counts_comparison_good <- tbl_counts_comparison[(tbl_counts_comparison$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]

tbl_counts_comparison_good %>%
  ggbarplot("worker_ID", "error", fill = "#f7a800", color = "#f7a800", ylim = c(0,100), font.xtickslab = 6, add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task") + rotate_x_text()

tbl_counts_comparison_average_good %>%
  ggbarplot(y = "mean_error", fill = "#f7a800", color = "#f7a800", ylim = c(0,100), sort.val = c("asc") ,xlab = "Group", add = "mean_se", ylab = "Mean Percent Error on Midline-crossings Task")

error_good <-t.test(tbl_counts_comparison_average_good$mean_error, mu = 0, alternative="greater")
error_good
## 
##  One Sample t-test
## 
## data:  tbl_counts_comparison_average_good$mean_error
## t = 10.247, df = 51, p-value = 2.787e-14
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  10.617    Inf
## sample estimates:
## mean of x 
##  12.69199
tbl_counts_comparison_average_good %>% 
ggplot(aes(x=mean_error)) + geom_histogram(aes(y=..density..), color="black", fill="#f7a800", bins = 10)+geom_density(alpha=.2, fill="white")                                

4.1.2 Count subjects

nrow(tbl_counts_comparison_average_good %>% distinct(worker_ID,.keep_all = FALSE))
## [1] 52

4.2 Noticers vs. Non-noticers

tbl_all_notice <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_notice = subset(tbl_all_notice, select = c(worker_ID,Q1))
tbl_all_notice <- tbl_all_notice[!apply(is.na(tbl_all_notice) | tbl_all_notice$Q1 == "", 1, all),]
write.csv(tbl_all_notice,'IB_notice.csv', row.names=FALSE)
tbl_all_notice <- read_csv("IB_notice_2.csv")
## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q1 = col_character(),
##   noticer_or_nonnoticer = col_double()
## )
knitr::kable(tbl_all_notice)
worker_ID Q1 noticer_or_nonnoticer
A1198W1SPF1R4 I did not notice anything different. 0
A11S8IAAVDXCUS im not sure what was different 0
A1561P9VVA3C1C no 0
A16335MOISDG1F no 0
A1721FMJBGZVYE there was a grey cross going side to side 1
A1BWS5AD2T4NIR There appeared to be a red character. 0
A1C5SQZ045W0L5 did not notice 0
A1C7XI68SED8JE a gray cross 1
A1EFKRW7L4COZZ There is no different. 0
A1GOP4VZYFUSXV no 0
A1H5Q9HRH4RPZU no 0
A1HX76XWAQ1EJV in the last task i was a plus symbol on the code 1
A1IAMNSZB9U4Z1 I noticed a gray colored letter boucing across the screen 1
A1IZ6M6NJ4WKFV 17 0
A1PVBCIZ867MXP it went faster 0
A1RKRL7971AMD NA 0
A1TR615HGENX1M a dark grey cross moving horizontally across the line 1
A1XVEKS9O73ERE I did not 0
A219Y0MX694TM2 No 0
A21HUPW67XA7UJ they were completely different 0
A21Q7FCKM86VL8 I did not notice anything 0
A233GOCNYJF1N2 plus symbol appears in the line 1
A277UVT82WHF65 no 0
A2BY3NK0YQO891 yes i noticed that there was a shape that looked like a cross which was moving along the center of the line 1
A2E0LU8V4EUX5C looked same 0
A2FMFDML1VV3R5 maybe the letters were moving more quickly 0
A2KBTDHM44J7X4 Ther wass a crosshair that moved across the screen 1
A2OFN0A5CPLH57 no 0
A2OROMUVBD1NVC NA 0
A2QXAZLMU2P0QO I did not notice anything different 0
A2VA0NCPWJ8XZG there was a gray cross moving along the center line 1
A394JO4NEPCY3M no I did not 0
A3DCRZTX0HIHBJ something was red 0
A3FF5CCILJAWYT The movemen of letters was paused for some period of time 0
A3JXQ4WFIJSQ3D a plus sign moved across the horizontal line 1
A3KF6O09H04SP7 There was some kind of shape or letter that was differencomma but I was so focused on the counting that I didnapostrophet really see what it was 0
A3NEJLDF75PC11 moving a lot 0
A3NMQ3019X6YE0 no 0
A3SFNY43VM330P No 0
A3V8C77FN4FLUA no 0
A4E1UYPDHE8D8 A grey plus character moved across the center line coming from the right 1
A8E7IOAZRC2IC Nothing noticed 0
A96CZ20YAZKZ8 plus sign 1
A98E8M4QLI9RS no 0
AASXXNW6FJ76I movement 0
AB09F0Q4T10TE I did not notice anything different. 0
AD15OW944ZV20 There was a plus sign going right to left through the center 1
AK3H5QRAROFGP no 0
AR39OP4746H4F I did not notice a difference 0
ASFBN5SC3SGDG No. 0
ASVRLMDNQBUD9 a grey plus sign crossed the screen 1
AWVLT2L5AP873 no i didnt notice anything 0
table(tbl_all_notice$noticer_or_nonnoticer)
## 
##  0  1 
## 38 14

4.3 Unexpected item description

Noticers: describe the shape correctly even if they don’t describe how it moves. Non-noticers: describe the shape correctly but describe the wrong movement.

tbl_all_item <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_item = subset(tbl_all_item, select = c(worker_ID,Q2))
tbl_all_item <- tbl_all_item[!apply(is.na(tbl_all_item) | tbl_all_item$Q2 == "", 1, all),]
write.csv(tbl_all_item,'IB_item.csv', row.names=FALSE)
tbl_all_item <- read_csv("IB_item_2.csv")
## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q2 = col_character(),
##   noticer_or_nonnoticer = col_double()
## )
knitr::kable(tbl_all_item)
worker_ID Q2 noticer_or_nonnoticer
A1198W1SPF1R4 x 0
A11S8IAAVDXCUS I might have seen the plus sign 1
A1561P9VVA3C1C x 0
A16335MOISDG1F no 0
A1721FMJBGZVYE + 1
A1BWS5AD2T4NIR no I did not notice any of these characters 0
A1C5SQZ045W0L5 no idea. pehaps the + 1
A1C7XI68SED8JE LEFTleft 0
A1EFKRW7L4COZZ + 1
A1GOP4VZYFUSXV + 1
A1H5Q9HRH4RPZU i did not 0
A1HX76XWAQ1EJV + 1
A1IAMNSZB9U4Z1 I think it was the + sign or the letter L 1
A1IZ6M6NJ4WKFV E 0
A1PVBCIZ867MXP e 0
A1RKRL7971AMD + 1
A1TR615HGENX1M + 1
A1XVEKS9O73ERE E 0
A219Y0MX694TM2 No 0
A21HUPW67XA7UJ + 1
A21Q7FCKM86VL8 e it moved in straight line 0
A233GOCNYJF1N2 + 1
A277UVT82WHF65 no 0
A2BY3NK0YQO891 yes i noticed the plus sign 1
A2E0LU8V4EUX5C not notice 0
A2FMFDML1VV3R5 + 1
A2KBTDHM44J7X4 left 0
A2OFN0A5CPLH57 E moved left to right 0
A2OROMUVBD1NVC NA 0
A2QXAZLMU2P0QO I did not see those 0
A2VA0NCPWJ8XZG gray cross 1
A394JO4NEPCY3M x 0
A3DCRZTX0HIHBJ x I dont know 0
A3FF5CCILJAWYT + 1
A3JXQ4WFIJSQ3D yes th plus sign moved on horizontal 1
A3KF6O09H04SP7 E 0
A3NEJLDF75PC11 E 0
A3NMQ3019X6YE0 e 0
A3SFNY43VM330P X 0
A3V8C77FN4FLUA x 0
A4E1UYPDHE8D8 #NAME? 0
A8E7IOAZRC2IC Not noticed 0
A96CZ20YAZKZ8 + 1
A98E8M4QLI9RS #NAME? 0
AASXXNW6FJ76I + 1
AB09F0Q4T10TE No I was focused on the white items. 0
AD15OW944ZV20 yes. The Plus sign. 1
AK3H5QRAROFGP + 1
AR39OP4746H4F x 0
ASFBN5SC3SGDG + 1
ASVRLMDNQBUD9 + 1
AWVLT2L5AP873 i didnt notice these at all 0
table(tbl_all_item$noticer_or_nonnoticer)
## 
##  0  1 
## 30 22

5 Post IB questions

5.1 Expecting an unexpected event

tbl_all_expecting <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_expecting = subset(tbl_all_expecting, select = c(worker_ID,Q4))
tbl_all_expecting <- tbl_all_expecting[!apply(is.na(tbl_all_expecting) | tbl_all_expecting$Q4 == "", 1, all),]
tbl_all_expecting <- data.frame(lapply(tbl_all_expecting, function(v) {
  if (is.character(v)) return(toupper(v))
  else return(v)
}))
write.csv(tbl_all_expecting,'IB_expecting.csv', row.names=FALSE)
tbl_all_expecting <- read_csv("IB_expecting_2.csv")
## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q4 = col_character(),
##   expecting_unexpected = col_character()
## )
knitr::kable(tbl_all_expecting)
worker_ID Q4 expecting_unexpected
A1198W1SPF1R4 NO NO
A11S8IAAVDXCUS NO I DIDNT EXPECT ANYTHING TO APPEAR UNEXPECTEDLY NO
A1561P9VVA3C1C NO NO
A16335MOISDG1F NO NO
A1721FMJBGZVYE NO NO
A1BWS5AD2T4NIR NO NO
A1C5SQZ045W0L5 NO NO
A1C7XI68SED8JE NO NO
A1EFKRW7L4COZZ NO NO
A1GOP4VZYFUSXV NO NO
A1H5Q9HRH4RPZU NO NO
A1HX76XWAQ1EJV NO NO
A1IAMNSZB9U4Z1 NO NO
A1IZ6M6NJ4WKFV NO NO
A1PVBCIZ867MXP NO NO
A1RKRL7971AMD YES YES
A1TR615HGENX1M NO NO
A1XVEKS9O73ERE NO NO
A219Y0MX694TM2 NO NO
A21HUPW67XA7UJ YES YES
A21Q7FCKM86VL8 NO. NO
A233GOCNYJF1N2 YES YES
A277UVT82WHF65 N NO
A2BY3NK0YQO891 NO NO
A2E0LU8V4EUX5C YS YES
A2FMFDML1VV3R5 NO NO
A2KBTDHM44J7X4 NO NO
A2OFN0A5CPLH57 NO NO
A2OROMUVBD1NVC NO NO
A2QXAZLMU2P0QO NO NO
A2VA0NCPWJ8XZG YES YES
A394JO4NEPCY3M NO NO
A3DCRZTX0HIHBJ NO NO
A3FF5CCILJAWYT NO NO
A3JXQ4WFIJSQ3D NO NO
A3KF6O09H04SP7 NO NO
A3NEJLDF75PC11 NO NO
A3NMQ3019X6YE0 NO NO
A3SFNY43VM330P NO NO
A3V8C77FN4FLUA NO NO
A4E1UYPDHE8D8 NO NO
A8E7IOAZRC2IC NO NO
A96CZ20YAZKZ8 NO NO
A98E8M4QLI9RS NO NO
AASXXNW6FJ76I YES YES
AB09F0Q4T10TE NO. NO
AD15OW944ZV20 NO NO
AK3H5QRAROFGP NO NO
AR39OP4746H4F NO NO
ASFBN5SC3SGDG NO NO
ASVRLMDNQBUD9 NO NO
AWVLT2L5AP873 NO NO
table(tbl_all_expecting$expecting_unexpected)
## 
##  NO YES 
##  46   6

5.2 Familiarity with IB experiments

tbl_all_familiarity <- tbl_all_IB[(tbl_all_IB$worker_ID %in% tbl_counts_comparison_average_good$worker_ID),]
tbl_all_familiarity = subset(tbl_all_familiarity, select = c(worker_ID,Q5))
tbl_all_familiarity <- tbl_all_familiarity[!apply(is.na(tbl_all_familiarity) | tbl_all_familiarity$Q5 == "", 1, all),]
tbl_all_familiarity <- data.frame(lapply(tbl_all_familiarity, function(v) {
  if (is.character(v)) return(toupper(v))
  else return(v)
}))
write.csv(tbl_all_familiarity,'IB_familiarity.csv', row.names=FALSE)
tbl_all_familiarity <- read_csv("IB_familiarity_2.csv")
## Parsed with column specification:
## cols(
##   worker_ID = col_character(),
##   Q5 = col_character(),
##   familiarity = col_character()
## )
knitr::kable(tbl_all_familiarity)
worker_ID Q5 familiarity
A1198W1SPF1R4 NO NO
A11S8IAAVDXCUS NO NO
A1561P9VVA3C1C NO NO
A16335MOISDG1F NO NO
A1721FMJBGZVYE NO NO
A1BWS5AD2T4NIR NO NO
A1C5SQZ045W0L5 NO NO
A1C7XI68SED8JE YES YES
A1EFKRW7L4COZZ NO NO
A1GOP4VZYFUSXV NO NO
A1H5Q9HRH4RPZU NO NO
A1HX76XWAQ1EJV NO NO
A1IAMNSZB9U4Z1 NO NO
A1IZ6M6NJ4WKFV NO NO
A1PVBCIZ867MXP NO NO
A1RKRL7971AMD NO NO
A1TR615HGENX1M YES YES
A1XVEKS9O73ERE NO NO
A219Y0MX694TM2 NO NO
A21HUPW67XA7UJ NO NO
A21Q7FCKM86VL8 YES. YES
A233GOCNYJF1N2 NO NO
A277UVT82WHF65 NO NO
A2BY3NK0YQO891 NO NO
A2E0LU8V4EUX5C NO NO
A2FMFDML1VV3R5 NO NO
A2KBTDHM44J7X4 YES YES
A2OFN0A5CPLH57 NO NO
A2OROMUVBD1NVC YES YES
A2QXAZLMU2P0QO NO NO
A2VA0NCPWJ8XZG YES YES
A394JO4NEPCY3M YES YES
A3DCRZTX0HIHBJ YES YES
A3FF5CCILJAWYT NO NO
A3JXQ4WFIJSQ3D YES YES
A3KF6O09H04SP7 YES YES
A3NEJLDF75PC11 NO NO
A3NMQ3019X6YE0 NO NO
A3SFNY43VM330P YES YES
A3V8C77FN4FLUA YES YES
A4E1UYPDHE8D8 YES YES
A8E7IOAZRC2IC NO NO
A96CZ20YAZKZ8 NO NO
A98E8M4QLI9RS NO NO
AASXXNW6FJ76I NO NO
AB09F0Q4T10TE NO. NO
AD15OW944ZV20 NO NO
AK3H5QRAROFGP NO NO
AR39OP4746H4F NO NO
ASFBN5SC3SGDG YES YES
ASVRLMDNQBUD9 YES YES
AWVLT2L5AP873 YES YES
table(tbl_all_familiarity$familiarity)
## 
##  NO YES 
##  36  16

6 Predicting IB

tbl_median_rts <- tbl_all_matching_rts_median[(tbl_all_matching_rts_median$worker_ID %in% tbl_all_item$worker_ID),]
tbl_acc <- tbl_all_matching_acc[(tbl_all_matching_acc$worker_ID %in% tbl_all_item$worker_ID),]
tbl_log_reg <- cbind.data.frame(tbl_all_item, tbl_median_rts[2], tbl_acc[2], tbl_counts_comparison_average_good[2])
names(tbl_log_reg)[names(tbl_log_reg)=="noticer_or_nonnoticer"] <- "notice"
tbl_log_reg$notice <- as.numeric(tbl_log_reg$notice)

#write.csv(tbl_log_reg,'predicting_IB_v4.csv', row.names=FALSE)
write.csv(tbl_log_reg,'predicting_IB_v4_clean.csv', row.names=FALSE)

6.1 Logistic regression w/ matching median RT

log_reg_matching_median <- glm(notice ~ median_rt, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median)
## 
## Call:
## glm(formula = notice ~ median_rt, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4292  -1.0513  -0.8556   1.2782   1.5084  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -1.2871     0.9562  -1.346    0.178
## median_rt     0.3528     0.3280   1.076    0.282
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 69.656  on 50  degrees of freedom
## AIC: 73.656
## 
## Number of Fisher Scoring iterations: 4
plot_matching_median <- ggplot(tbl_log_reg, aes(x=median_rt, y=notice)) + geom_point() + stat_smooth(method="glm", method.args=list(family="binomial"), se=TRUE, color="#f7a800") + theme_classic((base_size = 15))
suppressMessages(print(plot_matching_median))

6.2 Logistic regression w/ matching accuracy

log_reg_matching_acc <- glm(notice ~ rate, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_acc)
## 
## Call:
## glm(formula = notice ~ rate, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1733  -1.0471  -0.9809   1.3006   1.4051  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.8945     2.5395   0.352    0.725
## rate         -1.5727     3.2983  -0.477    0.633
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 70.623  on 50  degrees of freedom
## AIC: 74.623
## 
## Number of Fisher Scoring iterations: 4
plot_matching_median <- ggplot(tbl_log_reg, aes(x=rate, y=notice)) + geom_point() + stat_smooth(method="glm", method.args=list(family="binomial"), se=TRUE, color="#f7a800") + theme_classic((base_size = 15))
suppressMessages(print(plot_matching_median))

6.3 Logistic regression w/ matching median rt and accuracy

log_reg_matching_median_acc <- glm(notice ~ median_rt + rate, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median_acc)
## 
## Call:
## glm(formula = notice ~ median_rt + rate, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6262  -1.0114  -0.8838   1.2450   1.5633  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   1.1462     2.6049   0.440    0.660
## median_rt     0.5017     0.3692   1.359    0.174
## rate         -3.7084     3.7396  -0.992    0.321
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 68.645  on 49  degrees of freedom
## AIC: 74.645
## 
## Number of Fisher Scoring iterations: 4

6.4 Logistic regressions controlling for IB error

6.4.1 Logistic regression w/ matching median RT

log_reg_matching_median_error <- glm(notice ~ median_rt + mean_error, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median_error)
## 
## Call:
## glm(formula = notice ~ median_rt + mean_error, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3037  -1.0684  -0.8206   1.2194   1.7406  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.86832    1.06730  -0.814    0.416
## median_rt    0.33231    0.32924   1.009    0.313
## mean_error  -0.02921    0.03402  -0.859    0.391
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 68.883  on 49  degrees of freedom
## AIC: 74.883
## 
## Number of Fisher Scoring iterations: 4

6.4.2 Logistic regression w/ matching accuracy

log_reg_matching_acc_error <- glm(notice ~ rate + mean_error, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_acc_error)
## 
## Call:
## glm(formula = notice ~ rate + mean_error, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2427  -1.0614  -0.8706   1.1768   1.7661  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  3.02256    3.07831   0.982    0.326
## rate        -3.59028    3.70856  -0.968    0.333
## mean_error  -0.04679    0.03777  -1.239    0.215
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 68.975  on 49  degrees of freedom
## AIC: 74.975
## 
## Number of Fisher Scoring iterations: 4

6.4.3 Logistic regression w/ matching median RT and accuracy

log_reg_matching_median_acc_error <- glm(notice ~ median_rt + rate + mean_error, data = tbl_log_reg, family = binomial(link = "logit"))
summary(log_reg_matching_median_acc_error)
## 
## Call:
## glm(formula = notice ~ median_rt + rate + mean_error, family = binomial(link = "logit"), 
##     data = tbl_log_reg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5186  -1.0235  -0.7296   1.0893   1.7713  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  3.63705    3.19948   1.137    0.256
## median_rt    0.55852    0.37256   1.499    0.134
## rate        -6.30092    4.26076  -1.479    0.139
## mean_error  -0.05342    0.03868  -1.381    0.167
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 70.852  on 51  degrees of freedom
## Residual deviance: 66.568  on 48  degrees of freedom
## AIC: 74.568
## 
## Number of Fisher Scoring iterations: 4