library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(here)

## here() starts at /Users/caoanjie/Desktop/projects/pokebaby_fam

library(ggthemes)

short_math_data <- read_csv(here("data/short_math_data.csv"))

## Rows: 1522 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): subject, block_type, item_type, trial_type, trial_complexity, item_...
## dbl (7): block_number, forced_exposure_time, trial_number, rt, block_deviant...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

short_memory_data <- read_csv(here("data/short_memory_data.csv"))

## Rows: 2786 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): subject, block_type, item_type, trial_type, trial_complexity, expos...
## dbl (7): block_number, forced_exposure_time, trial_number, rt, block_deviant...
## lgl (3): item_id, memory_false_stimulus, memory_true_stimulus
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

mixed_duration_data <- read_csv(here("data/mixed_duration_data.csv"))

## Rows: 12356 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): subject, block_type, item_type, trial_type, trial_complexity, item_...
## dbl (6): block_number, trial_number, rt, block_deviant_number, first_dev_pos...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

paired_presentation_data <- read_csv(here("data/paired_presentation_data.csv"))

## Rows: 228843 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): subject, stimulus_processed, complexity, gaze_location, phase, sti...
## dbl  (8): window_width, window_height, block_number, exposure_time, x, y, t,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

follow_up_viewing_data <- read_csv(here("data/follow_up_viewing_data.csv"))

## Rows: 2861 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): subject, stimulus_displayed, p_id, stimulus_type, complexity
## dbl (4): trial_number, trial_looking_time, res_lt, m_res_lt
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Experiments overview

We ran five experiments in total, with three with experimental paradigms similar to the self-paced viewing experiments with the exception of first trial manipulation (ie.. forced short / forced long). The other two included one that was paired presentation (MB5 adult pilots) and one that was a follow up free-viewing paradigm with participants who participated in the old cogsci experiment (6 months later).

Short math experiment: didn’t preregister
Short memory experiment: https://aspredicted.org/N8F_WBZ
Mixed duration experiment: https://aspredicted.org/QTL_2M6
Paired presentation experiment: https://osf.io/9af5z/
Follow up viewing experiment: https://docs.google.com/document/d/1GCLTzXJaNjHHpHGQnBDl1Ndztm-Uon1PPmMYTzvj0So/edit#heading=h.chyhfxh5ycxv

First trial manipulation experiment

first_trial_manpulation_d <- mixed_duration_data %>% 
  filter(exposure_type == "forced_short") %>% 
  mutate(exp = "mixed_duration") %>% 
  select(subject, block_number, trial_number, trial_type, trial_complexity, rt, exp, first_dev_position, second_dev_position) %>% 
  bind_rows(short_memory_data %>% select(subject, block_number, trial_number, trial_type, trial_complexity, rt, first_dev_position, second_dev_position) %>% mutate(exp = "forced_short_memory"), 
            short_math_data %>% select(subject, block_number, trial_number, trial_type, trial_complexity, rt, first_dev_position, second_dev_position) %>% mutate(exp = "forced_short_math"))

How many participants in each dataset?

first_trial_manpulation_d %>% 
  distinct(exp, subject) %>% 
  group_by(exp) %>% 
  count()

## # A tibble: 3 × 2
## # Groups:   exp [3]
##   exp                     n
##   <chr>               <int>
## 1 forced_short_math      50
## 2 forced_short_memory    50
## 3 mixed_duration        205

ok it seems to suggest:

super noisy
task demands don’t seem to matter too much - mixed duration is also a memory task

first_trial_manpulation_d %>% 
  filter(trial_number == 2) %>% 
  ggplot(aes(x = exp, y = rt, color = trial_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  theme_few()

look at it by block, still super noisy

first_trial_manpulation_d %>% 
  filter(trial_number == 2) %>% 
  ggplot(aes(x = exp, y = rt, color = trial_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  theme_few() + 
  facet_wrap(~block_number)

complexity doesn’t seem to matter too much?

first_trial_manpulation_d %>% 
  filter(trial_number == 2) %>% 
  ggplot(aes(x = exp, y = rt, color = trial_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  theme_few() + 
  facet_wrap(~trial_complexity)

Paired presentation experiment

not a lot of ppl

paired_presentation_data %>% 
  distinct(subject) %>% count()

## # A tibble: 1 × 1
##       n
##   <int>
## 1    82

ok i guess the measurement is precise enough there’s just no familiarity preference lol though you get some habituation?

paired_presentation_data %>% 
  filter(phase == "pref") %>% 
  group_by(block_number, exposure_time,
           complexity, gaze_location_type, subject) %>% 
  summarise(sum_dwell_time = sum(dwell_time, na.rm = TRUE)) %>% 
  filter(gaze_location_type != "not_on_target") %>% 
  ggplot(aes(x = as.factor(exposure_time), color = gaze_location_type, 
              y = sum_dwell_time)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .3)) + 
  geom_jitter(width = 0.2, alpha = .1) + 
  theme_classic()

## `summarise()` has grouped output by 'block_number', 'exposure_time',
## 'complexity', 'gaze_location_type'. You can override using the `.groups`
## argument.

complexity also doesn’t seem to make a huge difference though?

paired_presentation_data %>% 
  filter(phase == "pref") %>% 
  group_by(block_number, exposure_time,
           complexity, gaze_location_type, subject) %>% 
  summarise(sum_dwell_time = sum(dwell_time, na.rm = TRUE)) %>% 
  filter(gaze_location_type != "not_on_target") %>% 
  ggplot(aes(x = as.factor(exposure_time), color = gaze_location_type, 
              y = sum_dwell_time)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .3)) + 
  geom_jitter(width = 0.2, alpha = .1) + 
  facet_wrap(~complexity) + 
  theme_classic()

## `summarise()` has grouped output by 'block_number', 'exposure_time',
## 'complexity', 'gaze_location_type'. You can override using the `.groups`
## argument.

Free viewing experiment

again not a huge experiment

follow_up_viewing_data %>% 
  distinct(subject) %>% 
  count()

## # A tibble: 1 × 1
##       n
##   <int>
## 1   144

So this is the key manipulation: if there’s evidence for familiarity preference, background / deviant (which are stimuli they saw 6M ago) should receive longer looking time than novel

follow_up_viewing_data %>% 
  ggplot(aes(x = stimulus_type, 
             y = trial_looking_time), 
         group = complexity) + 
  stat_summary(fun.data = "mean_cl_boot") +
  theme_classic()

this is by when they saw the stmuli in the sequence

follow_up_viewing_data %>% 
  ggplot(aes(x = stimulus_type, 
             y = trial_looking_time), 
         group = complexity) + 
  stat_summary(fun.data = "mean_cl_boot") +
  theme_classic() + 
  facet_wrap(~trial_number)

some more wacky exploration we did:

look at LT residualed on trial number

follow_up_viewing_data %>% 
  ggplot(aes(x = stimulus_type, 
             y = m_res_lt), 
         group = complexity) + 
  stat_summary(fun.data = "mean_cl_boot") +
  theme_classic()

look at LT subjects who remembered (rating on familiarity in background and novel has different greater than 1) (also residualed)

clean_rating_df <- read_csv(here("data/follow_up_rating_data.csv"))

## Rows: 2960 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): subject, stimulus, p_id, stimulus_type, complexity
## dbl (1): rating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

remembered_simple <- clean_rating_df %>% 
  group_by(subject, stimulus_type, complexity) %>% 
  summarise(mean_ratings = mean(rating)) %>% 
  pivot_wider(names_from = stimulus_type, values_from = mean_ratings) %>% 
  filter(complexity == "simple") %>% 
  mutate(diff_background_novel = background - novel) %>% 
  filter(diff_background_novel > 1)

## `summarise()` has grouped output by 'subject', 'stimulus_type'. You can
## override using the `.groups` argument.

remembered_complex <- clean_rating_df %>% 
  group_by(subject, stimulus_type, complexity) %>% 
  summarise(mean_ratings = mean(rating)) %>% 
  pivot_wider(names_from = stimulus_type, values_from = mean_ratings) %>% 
  filter(complexity == "complex") %>% 
  mutate(diff_background_novel = background - novel) %>% 
  filter(diff_background_novel > 1)

## `summarise()` has grouped output by 'subject', 'stimulus_type'. You can
## override using the `.groups` argument.

again, super noisy, but the remembered group’s residual looks more reasonable? - especially the remembered simple groups

follow_up_viewing_data %>% 
  mutate(
    subject_type = case_when(
    subject %in% (remembered_simple$subject) ~ "remembered simple",
    subject %in% (remembered_complex$subject) ~ "remembered complex",
    TRUE ~ "not_remembered"
  )) %>% 
  ggplot(aes(x = stimulus_type, 
             y = m_res_lt), 
         group = complexity) + 
  stat_summary(fun.data = "mean_cl_boot") +
  theme_classic() + 
  facet_wrap(~subject_type) + 
  ylab("Residuals")

follow_up_viewing_data %>% 
  mutate(
    subject_type = case_when(
    subject %in% (remembered_simple$subject) ~ "remembered simple",
    subject %in% (remembered_complex$subject) ~ "remembered complex",
    TRUE ~ "not_remembered"
  )) %>% 
  ggplot(aes(x = stimulus_type, 
             y = trial_looking_time), 
         group = complexity) + 
  stat_summary(fun.data = "mean_cl_boot") +
  theme_classic() + 
  facet_wrap(~subject_type) + 
  ylab("LT")