library(tidyverse)
library(here)
library(jsonlite)
library(kableExtra)
RAW_DATA_PATH <- here("data/01_merged_data/merged_data.csv")
MAIN_TASK_PATH <- here("data/02_processed_data/processed_rt_task_data.csv")
similarity_data <- read_csv(here('data/02_processed_data/processed_similarity_data.csv'))
complexity_data <- read_csv(here('data/02_processed_data/processed_complexity_data.csv'))
raw_data <- read_csv(RAW_DATA_PATH)
d <- read_csv(MAIN_TASK_PATH)
potential concern: memory is sampled less and lead to the highest attrition rate (Participants will also be excluded based on their task responses if they a) in the math question condition and the memory question condition, answer 3 out of 8 questions wrong)
source(here("helper/tidy_rt_task_data.r"))
tidy_all_rt_task_data <- tidy_all_rt_task_data(raw_data)
## `summarise()` has grouped output by 'subject'. You can override using the `.groups` argument.
tidy_all_rt_task_data %>%
distinct(subject, task_type) %>%
group_by(task_type) %>%
count() %>%
kableExtra::kable()
task_type | n |
---|---|
curiosity | 156 |
math | 156 |
memory | 137 |
d %>%
distinct(subject, task_type) %>%
count(task_type) %>%
kableExtra::kable()
task_type | n |
---|---|
curiosity | 143 |
math | 139 |
memory | 98 |
good: the distribution across three tasks look pretty similar bad (?): the irregular shapes
d %>%
ggplot(aes(x = trial_looking_time, fill = task_type)) +
scale_x_log10() +
geom_density(alpha = .5) +
facet_grid(trial_type~block_type)
d %>%
ggplot(aes(x = trial_looking_time, fill = task_type)) +
scale_x_log10() +
geom_density(alpha = .5) +
facet_grid(trial_type~deviant_position)
the only thing caught my eyes here is that at the last block participants who saw memory task looks has higher deviant looking time than participants in the other blocks
d %>%
ggplot(aes(x = trial_looking_time, fill = task_type)) +
scale_x_log10() +
geom_density(alpha = .5) +
facet_grid(trial_type~block_number)
yay we have a pretty clear complexity effect. interesting that the complexity effect is more salient in deviant trials than in the background trials. on a side note, am i crazy to say that the 2nd and 3rd trial doesn’t look too different? is exponential a good fit?
d %>%
ggplot(aes(x=trial_number, y=log(trial_looking_time), colour=trial_type)) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) +
geom_smooth(method = "lm",
formula = y ~ I(exp(1)**(-x)), se = FALSE) +
facet_wrap(~block_type) +
#langcog::theme_mikabr() +
theme_classic()+
langcog::scale_color_solarized(name = "Trial Type") +
theme(legend.position = "bottom") +
ylab("log RT (seconds)") +
xlab("Trial Number")
d %>%
ggplot(aes(x=trial_number, y=log(trial_looking_time), colour=block_type)) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) +
geom_smooth(method = "lm",
formula = y ~ I(exp(1)**(-x)), se = FALSE) +
facet_wrap(~trial_type) +
#langcog::theme_mikabr() +
theme_classic()+
langcog::scale_color_solarized(name = "Trial Type") +
theme(legend.position = "bottom") +
ylab("log RT (seconds)") +
xlab("Trial Number")
look pretty similar to me
d %>%
ggplot(aes(x=trial_number, y=log(trial_looking_time), colour=trial_type)) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) +
geom_smooth(method = "lm",
formula = y ~ I(exp(1)**(-x)), se = FALSE) +
facet_wrap(~task_type) +
#langcog::theme_mikabr() +
theme_classic()+
langcog::scale_color_solarized(name = "Trial Type") +
theme(legend.position = "bottom") +
ylab("log RT (seconds)") +
xlab("Trial Number")
across block habituation effect is present but less salient
d %>%
ggplot(aes(x=block_number, y=log(trial_looking_time), colour=block_type)) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) +
geom_smooth() +
#langcog::theme_mikabr() +
theme_classic()+
langcog::scale_color_solarized(name = "Trial Type") +
theme(legend.position = "bottom") +
ylab("log RT (seconds)") +
xlab("Trial Number")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
d %>%
ggplot(aes(x=block_number, y=log(trial_looking_time), colour=block_type)) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) +
geom_smooth() +
facet_wrap(~task_type) +
#langcog::theme_mikabr() +
theme_classic()+
langcog::scale_color_solarized(name = "Trial Type") +
theme(legend.position = "bottom") +
ylab("log RT (seconds)") +
xlab("Trial Number")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
there is a sampling bug in experiment file causes the curiosity is either novel or background, similar issue with the memory task. however, this still speaks against with our hypothesis.
no difference
d %>%
filter(task_type == "curiosity") %>%
select(subject, block_number, task_question_type, task_question_response, deviant_position) %>%
distinct(.keep_all = TRUE) %>%
ggplot(aes(x = task_question_type, y = task_question_response)) +
stat_summary(fun.data = "mean_cl_boot") +
theme_classic()
pretty clear effect on complexity though!
d %>%
filter(task_type == "curiosity") %>%
select(subject, block_number, task_question_type, task_question_response, deviant_position, block_type) %>%
distinct(.keep_all = TRUE) %>%
ggplot(aes(x = task_question_type, y = task_question_response)) +
stat_summary(fun.data = "mean_cl_boot") +
facet_wrap(~block_type) +
theme_classic()
doesn’t look meaningful to me
d %>%
filter(task_type == "curiosity") %>%
select(subject, block_number, task_question_type, task_question_response, deviant_position, block_type) %>%
distinct(.keep_all = TRUE) %>%
ggplot(aes(x = block_number, y = task_question_response, color = task_question_type)) +
stat_summary(fun.data = "mean_cl_boot") +
theme_classic()
this might explain why complexity effect shows up more in the dishabituation? they are perceived as more different?
similarity_data <- similarity_data %>%
mutate(
complexity = case_when(
grepl("complex", stimulus_left) | grepl("complex", stimulus_right) ~ "complex",
grepl("simple", stimulus_right) | grepl("simple", stimulus_right) ~ "simple"
),
stimulus_left_number = as.numeric(str_extract(stimulus_left, "[[:digit:]]+")),
stimulus_right_number = as.numeric(str_extract(stimulus_right, "[[:digit:]]+")),
similarity = case_when(
stimulus_left_number == stimulus_right_number ~ "similar",
TRUE ~ "dissimilar"
)
)
similarity_data %>%
ggplot(aes(x = similarity, y= rating)) +
geom_point(alpha = 0.1,
position = position_jitter(width = 0.3)) +
stat_summary(fun.data = "mean_cl_boot") +
facet_wrap(~complexity)
complexity_data <- complexity_data %>%
mutate(
complexity = case_when(
grepl("complex", stimulus) ~ "complex",
grepl("simple", stimulus) ~ "simple"
)
)
complexity_data %>%
ggplot(aes(x = complexity, y = rating)) +
geom_point(alpha = 0.1,
position = position_jitter(width = 0.3)) +
stat_summary(fun.data = "mean_cl_boot")