library(lme4)
library(here)
library(tidyverse)
library(stringr) # for parsing r string 
library(jsonlite) # for parsing r string 
library(ggiraphExtra)
library(plotrix)
library(lmerTest)
RT_data <- read_csv(here('data/processed_data/trimmed_RTdata.csv'))
#pref_data <- read_csv(here('data/processed_data/trimmed_prefdata.csv'))
similarity_data <- read_csv(here('data/processed_data/trimmed_similaritydata.csv'))
complexity_data <- read_csv(here('data/processed_data/trimmed_complexitydata.csv'))
demog_data <- read_csv(here('data/processed_data/trimmed_demogdata.csv'))

Descriptive Info

N = 402

sample size

RT_data %>% 
  distinct(subject) %>% 
  count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1   402
# reorder
RT_data$trial_complexity = factor(RT_data$trial_complexity, levels=c('simple', 'complex'))
RT_data$item_type = factor(RT_data$item_type, levels=c('background', 'similar deviant', 'dissimilar deviant'))

demographic

age

demog_data %>% 
  ggplot(aes(x = as.numeric(age))) + 
  geom_histogram()
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

ethnicity

demog_data %>% 
  ggplot(aes(x = ethnicity)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Gender

demog_data %>% 
  ggplot(aes(x = gender)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

education

demog_data %>% 
  ggplot(aes(x = education)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

RT raw

overall

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by trial type

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~trial_type)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

trial type and block

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_grid(block_type~trial_type)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by trial complexity

RT_data %>% 
  ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~trial_complexity)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by block type

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~block_type)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by item id

simple

RT_data %>% 
  filter(trial_complexity == "simple") %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~item_id)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

complex

RT_data %>% 
  filter(trial_complexity == "complex") %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~item_id)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Trial level RT basic plotting

Model

model_log <- lmer(log(rt) ~ log(trial_number) * item_type * trial_complexity + 
                     (1 | subject), 
                   data = RT_data)

summary(model_log)$coef %>% knitr::kable(digits = 2)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 6.67 0.05 545.53 125.17 0.00
log(trial_number) -0.39 0.02 24493.11 -20.24 0.00
item_typesimilar deviant -0.38 0.12 24502.59 -3.11 0.00
item_typedissimilar deviant -0.29 0.12 24503.87 -2.39 0.02
trial_complexitycomplex 0.08 0.03 24489.32 2.66 0.01
log(trial_number):item_typesimilar deviant 0.30 0.09 24503.05 3.31 0.00
log(trial_number):item_typedissimilar deviant 0.33 0.09 24503.42 3.66 0.00
log(trial_number):trial_complexitycomplex 0.02 0.03 24492.99 0.81 0.42
item_typesimilar deviant:trial_complexitycomplex 0.15 0.17 24502.01 0.89 0.38
item_typedissimilar deviant:trial_complexitycomplex 0.03 0.17 24503.81 0.19 0.85
log(trial_number):item_typesimilar deviant:trial_complexitycomplex -0.14 0.13 24501.94 -1.07 0.28
log(trial_number):item_typedissimilar deviant:trial_complexitycomplex -0.05 0.12 24504.10 -0.37 0.71

break down by block

ggplot(RT_data, aes(x=trial_number, y=log(rt), colour=item_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) + 
  facet_wrap(~block_type) 
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

break down by complexity

ggplot(RT_data, aes(x=trial_number, y=log(rt), color = trial_complexity)) + 
  stat_summary(fun.data = "mean_cl_boot", 
               position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) + 
  facet_grid(~trial_type)
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

ggplot(RT_data, aes(x=trial_type, y=log(rt))) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)

ggplot(RT_data, aes(x=trial_type_index, y=log(rt))) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)

only look at trials before first deviant

RT_data %>% 
  filter(is.na(first_dev_position) | 
         trial_number < first_dev_position | 
         trial_number == first_dev_position) %>% 
  ggplot(aes(x=trial_number, y=log(rt), color = item_type)) + 
  stat_summary(fun.data = "mean_cl_boot", 
               position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) +
  facet_wrap(~block_type)
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

RT_data %>% 
  filter(is.na(first_dev_position) | 
         trial_number < first_dev_position | 
         trial_number == first_dev_position) %>% 
  filter(trial_number != 1) %>% 
  ggplot(aes(x=trial_number, y=log(rt), color = item_type)) + 
  stat_summary(fun.data = "mean_cl_boot", 
               position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) +
  facet_wrap(~block_type) + 
  labs(title = "No first trial")
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

Block level RT basic plotting

by block type

RT_data %>% 
  mutate(block_number_for_plot = block_number + 1) %>% 
ggplot(aes(x=block_number_for_plot, y=log(rt), colour=block_type)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  stat_smooth(method = "lm", 
              formula = y ~ log(x))

by number of deviants

RT_data %>% 
  mutate(block_number_for_plot = block_number + 1) %>% 
ggplot(aes(x=block_number_for_plot, y=log(rt), colour=as.factor(block_deviant_number))) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  stat_smooth(method = "lm", 
              formula = y ~ log(x))

by complexity

RT_data %>% 
  mutate(block_number_for_plot = block_number + 1) %>% 
ggplot(aes(x=block_number_for_plot, 
           y=log(rt), 
           colour=trial_complexity)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  stat_smooth(method = "lm", 
              formula = y ~ log(x))

Similarity ratings

Raw

similarity_data %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

by intuitive similarity

similarity_data <- similarity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus_left) | grepl("complex", stimulus_right) ~ "complex", 
      grepl("simple", stimulus_right) | grepl("simple", stimulus_right) ~ "simple"
    ), 
    stimulus_left_number = as.numeric(str_extract(stimulus_left, "[[:digit:]]+")), 
    stimulus_right_number = as.numeric(str_extract(stimulus_right, "[[:digit:]]+")), 
    similarity = case_when(
      stimulus_left_number == stimulus_right_number ~ "similar", 
      TRUE ~ "dissimilar"
    )
  )

similarity_data %>% 
  ggplot(aes(x = similarity, y= rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~complexity)

Model

similarity_rating_model_interaction <- lmer(rating~complexity*similarity + (1|subject), 
     data = similarity_data, 
     REML = FALSE)

similarity_rating_model <- lmer(rating~similarity + (1|subject), 
     data = similarity_data, 
     REML = FALSE)

anova(similarity_rating_model_interaction, similarity_rating_model)
## Data: similarity_data
## Models:
## similarity_rating_model: rating ~ similarity + (1 | subject)
## similarity_rating_model_interaction: rating ~ complexity * similarity + (1 | subject)
##                                     Df   AIC   BIC  logLik deviance  Chisq
## similarity_rating_model              4 16781 16807 -8386.6    16773       
## similarity_rating_model_interaction  6 16641 16680 -8314.7    16629 143.76
##                                     Chi Df Pr(>Chisq)    
## similarity_rating_model                                  
## similarity_rating_model_interaction      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(similarity_rating_model_interaction)$coef %>% knitr::kable(digits = 2)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 2.45 0.06 786.69 41.75 0
complexitysimple 0.63 0.05 4394.18 12.05 0
similaritysimilar 1.99 0.05 4394.95 38.65 0
complexitysimple:similaritysimilar -0.58 0.07 4395.24 -7.89 0

Complexity Ratings

Raw

complexity_data %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

by intuitive complexity

complexity_data <- complexity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus) ~ "complex", 
      grepl("simple", stimulus) ~ "simple"
    )
  )

complexity_data %>% 
  ggplot(aes(x = complexity, y = rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") 

model

complexity_rating_model <- lmer(rating~complexity + (1|subject), 
     data = complexity_data)

summary(complexity_rating_model)$coef %>% knitr::kable(digits = 2)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 4.35 0.05 544.53 84.15 0
complexitysimple -1.86 0.04 4351.62 -46.98 0

by item

first figure out action

complexity_action <- complexity_data %>%
  mutate(item_id_full = 
             str_match(stimulus, "spore_stims/\\s*(.*?).gif")[,2], 
         item_id_no_action = str_match(stimulus, "spore_stims/\\s*(.*?)..\\s*.gif")[,2],
         action_type = str_sub(item_id_full, -1)) 

action_summary <- complexity_action %>% 
  group_by(action_type) %>% 
  tidyboot::tidyboot_mean(rating) 
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: Problem with `mutate()` input `strap`.
## ℹ `as_data_frame()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## ℹ Input `strap` is `purrr::map(strap, dplyr::as_data_frame)`.
## Warning: `as_data_frame()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`
action_summary %>% 
 ggplot(aes(x = action_type, y = mean)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper))

by complexity type

complexity_summary <- complexity_action %>% 
  group_by(item_id_no_action) %>% 
  tidyboot::tidyboot_mean(rating) %>% 
  mutate(
    complexity = case_when(
      grepl("complex", item_id_no_action) ~ "complex", 
      TRUE ~ "simple")
  )
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`
complexity_summary %>% 
  ggplot(aes(x = reorder(item_id_no_action, mean), y = mean, color = complexity)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = position_dodge(1.5)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: position_dodge requires non-overlapping x intervals

dishabituation effect

log_rt_deviant <- RT_data %>% 
  filter(trial_type == "deviant") %>% 
  group_by(item_id) %>% 
  tidyboot::tidyboot_mean(log(rt)) %>% 
  mutate(type = "log_rt", 
    complexity = case_when(
      grepl("complex", item_id) ~ "complex", 
      TRUE ~ "simple")
  ) 
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`
complexity_temp <- complexity_summary %>% 
  mutate(type = "rating") %>% 
  mutate(item_id = item_id_no_action) %>% 
  select(-item_id_no_action)

dishab_df <- bind_rows(log_rt_deviant, 
                       complexity_temp)

raw dishabitutaiton

dishab_df %>% 
  ggplot(aes(x = reorder(item_id, mean), y = mean, 
             color = complexity, 
             shape = type)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = position_dodge(1.5)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: position_dodge requires non-overlapping x intervals

effect?

c_temp <- complexity_temp %>% 
  select(item_id, complexity, mean) %>% 
  rename(mean_rating = mean)

log_rt_deviant %>% 
  select(item_id, mean) %>% 
  rename(log_mean_dishab = mean) %>% 
  left_join(c_temp, by = "item_id") %>% 
  ggplot(aes(x = mean_rating, y = log_mean_dishab, 
             color = complexity)) + 
  geom_point()

log_rt_deviant %>% 
  filter(complexity == "complex") %>% 
  select(item_id, mean) %>% 
  rename(log_mean_dishab = mean) %>% 
  left_join(c_temp, by = "item_id") %>% 
  ggplot(aes(x = mean_rating, y = log_mean_dishab, 
             color = complexity)) + 
  geom_point() + 
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

log_rt_deviant %>% 
  filter(complexity == "simple") %>% 
  select(item_id, mean) %>% 
  rename(log_mean_dishab = mean) %>% 
  left_join(c_temp, by = "item_id") %>% 
  ggplot(aes(x = mean_rating, y = log_mean_dishab, 
             color = complexity)) + 
  geom_point() + 
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

first deviant only

log_rt_deviant <- RT_data %>% 
  filter(trial_type_index == "first_deviant") %>% 
  group_by(item_id) %>% 
  tidyboot::tidyboot_mean(log(rt)) %>% 
  mutate(type = "log_rt", 
    complexity = case_when(
      grepl("complex", item_id) ~ "complex", 
      TRUE ~ "simple")
  ) 
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`
complexity_temp <- complexity_summary %>% 
  mutate(type = "rating") %>% 
  mutate(item_id = item_id_no_action) %>% 
  select(-item_id_no_action)

dishab_df <- bind_rows(log_rt_deviant, 
                       complexity_temp)


dishab_df %>% 
  ggplot(aes(x = reorder(item_id, mean), y = mean, 
             color = complexity, 
             shape = type)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = position_dodge(1.5)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 
## Warning: position_dodge requires non-overlapping x intervals

Similarity redux

similarity_data %>% 
  mutate(
    stimulus_left_id =  str_match(stimulus_left, "spore_stims/\\s*(.*?)..\\s*.gif")[,2],
    stimulus_right_id = str_match(stimulus_right, "spore_stims/\\s*(.*?)..\\s*.gif")[,2],
   ) %>% 
  group_by(stimulus_left_id, similarity) %>% 
  tidyboot::tidyboot_mean(rating) %>% 
  ggplot(aes(x = reorder(stimulus_left_id, mean), y = mean, 
             color = similarity)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`

LOOK AT FIRST BLOCK

RT_data %>% 
  filter(block_number == 0) %>% 
  ggplot(aes(x=trial_number, y=log(rt), colour=item_type)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)

RT_data %>% 
  filter(block_number == 0) %>% 
  ggplot(aes(x = item_type, y = log(rt))) + 
  stat_summary(fun.data = "mean_cl_boot") +
  facet_wrap(~block_type)

LOOK AT pre-deviant and after deviant

block_dev_summary <- RT_data %>% 
  filter(trial_type == "deviant") %>% 
  mutate(
    block_id =  paste(subject, block_number),
    temp_dev_id = paste(trial_type, trial_number, trial_complexity)) %>% 
  select(temp_dev_id, block_id)

dev_pos_df <- RT_data %>% 
  mutate(block_id = paste(subject, block_number)) %>% 
  left_join(block_dev_summary, by = "block_id") %>% 
  separate(temp_dev_id, into = c("trial_type_temp", "deviant_pos", "deviant_complexity")) %>% 
  select(-c(block_id, trial_type)) %>% 
  rowwise() %>% 
  mutate(dishabituation_position = case_when(
    (trial_number == (as.numeric(deviant_pos)) - 1) ~ "before_deviant", 
    (trial_number == (as.numeric(deviant_pos)) + 1) ~ "after_deviant", 
    TRUE ~ "regular_deviant"
  )) %>% 
  filter(dishabituation_position != "regular_deviant" && deviant_pos != 8) 


dev_pos_df %>% 
  ggplot(aes(x = trial_number, y = log(rt), 
             color = dishabituation_position)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~deviant_complexity)

dev_pos_df %>% 
  ggplot(aes(x = trial_number, y = log(rt), 
             color = dishabituation_position)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)