library(tidyverse)
library(here)
library(jsonlite)
library(kableExtra)

RAW_DATA_PATH <- here("data/01_merged_data/merged_data.csv")
MAIN_TASK_PATH <- here("data/02_processed_data/processed_rt_task_data.csv")
similarity_data <- read_csv(here('data/02_processed_data/processed_similarity_data.csv'))
complexity_data <- read_csv(here('data/02_processed_data/processed_complexity_data.csv'))


raw_data <- read_csv(RAW_DATA_PATH)
d <- read_csv(MAIN_TASK_PATH)

Basic info

potential concern: memory is sampled less and lead to the highest attrition rate (Participants will also be excluded based on their task responses if they a) in the math question condition and the memory question condition, answer 3 out of 8 questions wrong)

pre-exclusion

source(here("helper/tidy_rt_task_data.r"))
tidy_all_rt_task_data <- tidy_all_rt_task_data(raw_data)
## `summarise()` has grouped output by 'subject'. You can override using the `.groups` argument.
tidy_all_rt_task_data %>% 
  distinct(subject, task_type) %>% 
  group_by(task_type) %>% 
  count() %>% 
  kableExtra::kable()
task_type n
curiosity 156
math 156
memory 137

post-exclusion

d %>% 
  distinct(subject, task_type) %>% 
  count(task_type) %>% 
   kableExtra::kable()
task_type n
curiosity 143
math 139
memory 98

Check raw looking time distribution

good: the distribution across three tasks look pretty similar bad (?): the irregular shapes

by block type and trial type

d %>% 
  ggplot(aes(x = trial_looking_time, fill = task_type)) + 
  scale_x_log10() + 
  geom_density(alpha = .5) + 
  facet_grid(trial_type~block_type)

by position of deviant and trial_type

d %>% 
  ggplot(aes(x = trial_looking_time, fill = task_type)) + 
  scale_x_log10() + 
  geom_density(alpha = .5) + 
  facet_grid(trial_type~deviant_position)

by block_number and trial_type

the only thing caught my eyes here is that at the last block participants who saw memory task looks has higher deviant looking time than participants in the other blocks

d %>% 
  ggplot(aes(x = trial_looking_time, fill = task_type)) + 
  scale_x_log10() + 
  geom_density(alpha = .5) + 
  facet_grid(trial_type~block_number)

Look at within block dynamics

complexity effect

yay we have a pretty clear complexity effect. interesting that the complexity effect is more salient in deviant trials than in the background trials. on a side note, am i crazy to say that the 2nd and 3rd trial doesn’t look too different? is exponential a good fit?

d %>% 
  ggplot(aes(x=trial_number, y=log(trial_looking_time), colour=trial_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  geom_smooth(method = "lm", 
              formula = y ~ I(exp(1)**(-x)), se = FALSE) + 
  facet_wrap(~block_type) +
  #langcog::theme_mikabr() +
  theme_classic()+
  langcog::scale_color_solarized(name = "Trial Type") + 
  theme(legend.position = "bottom") + 
  ylab("log RT (seconds)") + 
  xlab("Trial Number")

d %>% 
  ggplot(aes(x=trial_number, y=log(trial_looking_time), colour=block_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  geom_smooth(method = "lm", 
              formula = y ~ I(exp(1)**(-x)), se = FALSE) + 
  facet_wrap(~trial_type) +
  #langcog::theme_mikabr() +
  theme_classic()+
  langcog::scale_color_solarized(name = "Trial Type") + 
  theme(legend.position = "bottom") + 
  ylab("log RT (seconds)") + 
  xlab("Trial Number")

sanity check task type doesn’t influence

look pretty similar to me

d %>% 
  ggplot(aes(x=trial_number, y=log(trial_looking_time), colour=trial_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  geom_smooth(method = "lm", 
              formula = y ~ I(exp(1)**(-x)), se = FALSE) + 
  facet_wrap(~task_type) +
  #langcog::theme_mikabr() +
  theme_classic()+
  langcog::scale_color_solarized(name = "Trial Type") + 
  theme(legend.position = "bottom") + 
  ylab("log RT (seconds)") + 
  xlab("Trial Number")

Look at across blocks dynamics

across block habituation effect is present but less salient

overall

d %>% 
  ggplot(aes(x=block_number, y=log(trial_looking_time), colour=block_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  geom_smooth() + 
  #langcog::theme_mikabr() +
  theme_classic()+
  langcog::scale_color_solarized(name = "Trial Type") + 
  theme(legend.position = "bottom") + 
  ylab("log RT (seconds)") + 
  xlab("Trial Number")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

by task

d %>% 
  ggplot(aes(x=block_number, y=log(trial_looking_time), colour=block_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) + 
  geom_smooth() + 
  facet_wrap(~task_type) +
  #langcog::theme_mikabr() +
  theme_classic()+
  langcog::scale_color_solarized(name = "Trial Type") + 
  theme(legend.position = "bottom") + 
  ylab("log RT (seconds)") + 
  xlab("Trial Number")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

Look at curiosity ratings

there is a sampling bug in experiment file causes the curiosity is either novel or background, similar issue with the memory task. however, this still speaks against with our hypothesis.

depending on the type of question?

no difference

d %>% 
  filter(task_type == "curiosity") %>% 
  select(subject, block_number,  task_question_type, task_question_response, deviant_position) %>% 
  distinct(.keep_all = TRUE) %>% 
  ggplot(aes(x = task_question_type, y = task_question_response)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  theme_classic()

complexity?

pretty clear effect on complexity though!

d %>% 
  filter(task_type == "curiosity") %>% 
  select(subject, block_number,  task_question_type, task_question_response, deviant_position, block_type) %>% 
   distinct(.keep_all = TRUE) %>% 
  ggplot(aes(x = task_question_type, y = task_question_response)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type) + 
  theme_classic()

changing across blocks?

doesn’t look meaningful to me

d %>% 
  filter(task_type == "curiosity") %>% 
  select(subject, block_number,  task_question_type, task_question_response, deviant_position, block_type) %>% 
   distinct(.keep_all = TRUE) %>% 
  ggplot(aes(x = block_number, y = task_question_response, color = task_question_type)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  theme_classic()

Similarity and Complexity Ratings?

similarity by complexity interaction

this might explain why complexity effect shows up more in the dishabituation? they are perceived as more different?

similarity_data <- similarity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus_left) | grepl("complex", stimulus_right) ~ "complex", 
      grepl("simple", stimulus_right) | grepl("simple", stimulus_right) ~ "simple"
    ), 
    stimulus_left_number = as.numeric(str_extract(stimulus_left, "[[:digit:]]+")), 
    stimulus_right_number = as.numeric(str_extract(stimulus_right, "[[:digit:]]+")), 
    similarity = case_when(
      stimulus_left_number == stimulus_right_number ~ "similar", 
      TRUE ~ "dissimilar"
    )
  )

similarity_data %>% 
  ggplot(aes(x = similarity, y= rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~complexity)

complexity

complexity_data <- complexity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus) ~ "complex", 
      grepl("simple", stimulus) ~ "simple"
    )
  )

complexity_data %>% 
  ggplot(aes(x = complexity, y = rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot")