library(lme4)
library(here)
library(tidyverse)
library(stringr) # for parsing r string 
library(jsonlite) # for parsing r string 
library(ggiraphExtra)
library(plotrix)
library(lmerTest)

RT_data <- read_csv(here('data/processed_data/trimmed_RTdata.csv'))
#pref_data <- read_csv(here('data/processed_data/trimmed_prefdata.csv'))
similarity_data <- read_csv(here('data/processed_data/trimmed_similaritydata.csv'))
complexity_data <- read_csv(here('data/processed_data/trimmed_complexitydata.csv'))
demog_data <- read_csv(here('data/processed_data/trimmed_demogdata.csv'))

Descriptive Info

N = 402

sample size

RT_data %>% 
  distinct(subject) %>% 
  count()

## # A tibble: 1 x 1
##       n
##   <int>
## 1   402

# reorder
RT_data$trial_complexity = factor(RT_data$trial_complexity, levels=c('simple', 'complex'))
RT_data$item_type = factor(RT_data$item_type, levels=c('background', 'similar deviant', 'dissimilar deviant'))

demographic

age

demog_data %>% 
  ggplot(aes(x = as.numeric(age))) + 
  geom_histogram()

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 1 rows containing non-finite values (stat_bin).

ethnicity

demog_data %>% 
  ggplot(aes(x = ethnicity)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## Warning: Ignoring unknown parameters: binwidth, bins, pad

Gender

demog_data %>% 
  ggplot(aes(x = gender)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## Warning: Ignoring unknown parameters: binwidth, bins, pad

education

demog_data %>% 
  ggplot(aes(x = education)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## Warning: Ignoring unknown parameters: binwidth, bins, pad

RT raw

overall

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by trial type

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~trial_type)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

trial type and block

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_grid(block_type~trial_type)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by trial complexity

RT_data %>% 
  ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~trial_complexity)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by block type

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~block_type)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by item id

simple

RT_data %>% 
  filter(trial_complexity == "simple") %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~item_id)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

complex

RT_data %>% 
  filter(trial_complexity == "complex") %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~item_id)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Trial level RT basic plotting

Model

model_log <- lmer(log(rt) ~ log(trial_number) * item_type * trial_complexity + 
                     (1 | subject), 
                   data = RT_data)

summary(model_log)$coef %>% knitr::kable(digits = 2)

	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	6.67	0.05	545.53	125.17	0.00
log(trial_number)	-0.39	0.02	24493.11	-20.24	0.00
item_typesimilar deviant	-0.38	0.12	24502.59	-3.11	0.00
item_typedissimilar deviant	-0.29	0.12	24503.87	-2.39	0.02
trial_complexitycomplex	0.08	0.03	24489.32	2.66	0.01
log(trial_number):item_typesimilar deviant	0.30	0.09	24503.05	3.31	0.00
log(trial_number):item_typedissimilar deviant	0.33	0.09	24503.42	3.66	0.00
log(trial_number):trial_complexitycomplex	0.02	0.03	24492.99	0.81	0.42
item_typesimilar deviant:trial_complexitycomplex	0.15	0.17	24502.01	0.89	0.38
item_typedissimilar deviant:trial_complexitycomplex	0.03	0.17	24503.81	0.19	0.85
log(trial_number):item_typesimilar deviant:trial_complexitycomplex	-0.14	0.13	24501.94	-1.07	0.28
log(trial_number):item_typedissimilar deviant:trial_complexitycomplex	-0.05	0.12	24504.10	-0.37	0.71

break down by block

ggplot(RT_data, aes(x=trial_number, y=log(rt), colour=item_type)) + 
  stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) + 
  facet_wrap(~block_type)

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

break down by complexity

ggplot(RT_data, aes(x=trial_number, y=log(rt), color = trial_complexity)) + 
  stat_summary(fun.data = "mean_cl_boot", 
               position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) + 
  facet_grid(~trial_type)

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

ggplot(RT_data, aes(x=trial_type, y=log(rt))) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)

ggplot(RT_data, aes(x=trial_type_index, y=log(rt))) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)

only look at trials before first deviant

RT_data %>% 
  filter(is.na(first_dev_position) | 
         trial_number < first_dev_position | 
         trial_number == first_dev_position) %>% 
  ggplot(aes(x=trial_number, y=log(rt), color = item_type)) + 
  stat_summary(fun.data = "mean_cl_boot", 
               position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) +
  facet_wrap(~block_type)

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

RT_data %>% 
  filter(is.na(first_dev_position) | 
         trial_number < first_dev_position | 
         trial_number == first_dev_position) %>% 
  filter(trial_number != 1) %>% 
  ggplot(aes(x=trial_number, y=log(rt), color = item_type)) + 
  stat_summary(fun.data = "mean_cl_boot", 
               position = position_dodge(width = .1)) + 
  stat_summary(geom = "line", alpha = .5) + 
  geom_smooth(method = "lm", 
              formula = y ~ log(x)) +
  facet_wrap(~block_type) + 
  labs(title = "No first trial")

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

Block level RT basic plotting

by block type

RT_data %>% 
  mutate(block_number_for_plot = block_number + 1) %>% 
ggplot(aes(x=block_number_for_plot, y=log(rt), colour=block_type)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  stat_smooth(method = "lm", 
              formula = y ~ log(x))

by number of deviants

RT_data %>% 
  mutate(block_number_for_plot = block_number + 1) %>% 
ggplot(aes(x=block_number_for_plot, y=log(rt), colour=as.factor(block_deviant_number))) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  stat_smooth(method = "lm", 
              formula = y ~ log(x))

by complexity

RT_data %>% 
  mutate(block_number_for_plot = block_number + 1) %>% 
ggplot(aes(x=block_number_for_plot, 
           y=log(rt), 
           colour=trial_complexity)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  stat_smooth(method = "lm", 
              formula = y ~ log(x))

Similarity ratings

Raw

similarity_data %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

by intuitive similarity

similarity_data <- similarity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus_left) | grepl("complex", stimulus_right) ~ "complex", 
      grepl("simple", stimulus_right) | grepl("simple", stimulus_right) ~ "simple"
    ), 
    stimulus_left_number = as.numeric(str_extract(stimulus_left, "[[:digit:]]+")), 
    stimulus_right_number = as.numeric(str_extract(stimulus_right, "[[:digit:]]+")), 
    similarity = case_when(
      stimulus_left_number == stimulus_right_number ~ "similar", 
      TRUE ~ "dissimilar"
    )
  )

similarity_data %>% 
  ggplot(aes(x = similarity, y= rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~complexity)

Model

similarity_rating_model_interaction <- lmer(rating~complexity*similarity + (1|subject), 
     data = similarity_data, 
     REML = FALSE)

similarity_rating_model <- lmer(rating~similarity + (1|subject), 
     data = similarity_data, 
     REML = FALSE)

anova(similarity_rating_model_interaction, similarity_rating_model)

## Data: similarity_data
## Models:
## similarity_rating_model: rating ~ similarity + (1 | subject)
## similarity_rating_model_interaction: rating ~ complexity * similarity + (1 | subject)
##                                     Df   AIC   BIC  logLik deviance  Chisq
## similarity_rating_model              4 16781 16807 -8386.6    16773       
## similarity_rating_model_interaction  6 16641 16680 -8314.7    16629 143.76
##                                     Chi Df Pr(>Chisq)    
## similarity_rating_model                                  
## similarity_rating_model_interaction      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(similarity_rating_model_interaction)$coef %>% knitr::kable(digits = 2)

	Estimate	Std. Error	df	t value
(Intercept)	2.45	0.06	786.69	41.75
complexitysimple	0.63	0.05	4394.18	12.05
similaritysimilar	1.99	0.05	4394.95	38.65
complexitysimple:similaritysimilar	-0.58	0.07	4395.24	-7.89

Complexity Ratings

Raw

complexity_data %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

by intuitive complexity

complexity_data <- complexity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus) ~ "complex", 
      grepl("simple", stimulus) ~ "simple"
    )
  )

complexity_data %>% 
  ggplot(aes(x = complexity, y = rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot")

model

complexity_rating_model <- lmer(rating~complexity + (1|subject), 
     data = complexity_data)

summary(complexity_rating_model)$coef %>% knitr::kable(digits = 2)

	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	4.35	0.05	544.53	84.15	0
complexitysimple	-1.86	0.04	4351.62	-46.98	0

by item

first figure out action

complexity_action <- complexity_data %>%
  mutate(item_id_full = 
             str_match(stimulus, "spore_stims/\\s*(.*?).gif")[,2], 
         item_id_no_action = str_match(stimulus, "spore_stims/\\s*(.*?)..\\s*.gif")[,2],
         action_type = str_sub(item_id_full, -1)) 

action_summary <- complexity_action %>% 
  group_by(action_type) %>% 
  tidyboot::tidyboot_mean(rating)

## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Warning: Problem with `mutate()` input `strap`.
## ℹ `as_data_frame()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## ℹ Input `strap` is `purrr::map(strap, dplyr::as_data_frame)`.

## Warning: `as_data_frame()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`

action_summary %>% 
 ggplot(aes(x = action_type, y = mean)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper))

by complexity type

complexity_summary <- complexity_action %>% 
  group_by(item_id_no_action) %>% 
  tidyboot::tidyboot_mean(rating) %>% 
  mutate(
    complexity = case_when(
      grepl("complex", item_id_no_action) ~ "complex", 
      TRUE ~ "simple")
  )

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`

complexity_summary %>% 
  ggplot(aes(x = reorder(item_id_no_action, mean), y = mean, color = complexity)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = position_dodge(1.5)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## Warning: position_dodge requires non-overlapping x intervals

dishabituation effect

log_rt_deviant <- RT_data %>% 
  filter(trial_type == "deviant") %>% 
  group_by(item_id) %>% 
  tidyboot::tidyboot_mean(log(rt)) %>% 
  mutate(type = "log_rt", 
    complexity = case_when(
      grepl("complex", item_id) ~ "complex", 
      TRUE ~ "simple")
  )

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`

complexity_temp <- complexity_summary %>% 
  mutate(type = "rating") %>% 
  mutate(item_id = item_id_no_action) %>% 
  select(-item_id_no_action)

dishab_df <- bind_rows(log_rt_deviant, 
                       complexity_temp)

raw dishabitutaiton

dishab_df %>% 
  ggplot(aes(x = reorder(item_id, mean), y = mean, 
             color = complexity, 
             shape = type)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = position_dodge(1.5)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## Warning: position_dodge requires non-overlapping x intervals

effect?

c_temp <- complexity_temp %>% 
  select(item_id, complexity, mean) %>% 
  rename(mean_rating = mean)

log_rt_deviant %>% 
  select(item_id, mean) %>% 
  rename(log_mean_dishab = mean) %>% 
  left_join(c_temp, by = "item_id") %>% 
  ggplot(aes(x = mean_rating, y = log_mean_dishab, 
             color = complexity)) + 
  geom_point()

log_rt_deviant %>% 
  filter(complexity == "complex") %>% 
  select(item_id, mean) %>% 
  rename(log_mean_dishab = mean) %>% 
  left_join(c_temp, by = "item_id") %>% 
  ggplot(aes(x = mean_rating, y = log_mean_dishab, 
             color = complexity)) + 
  geom_point() + 
  geom_smooth(method = "lm")

## `geom_smooth()` using formula 'y ~ x'

log_rt_deviant %>% 
  filter(complexity == "simple") %>% 
  select(item_id, mean) %>% 
  rename(log_mean_dishab = mean) %>% 
  left_join(c_temp, by = "item_id") %>% 
  ggplot(aes(x = mean_rating, y = log_mean_dishab, 
             color = complexity)) + 
  geom_point() + 
  geom_smooth(method = "lm")

## `geom_smooth()` using formula 'y ~ x'

first deviant only

log_rt_deviant <- RT_data %>% 
  filter(trial_type_index == "first_deviant") %>% 
  group_by(item_id) %>% 
  tidyboot::tidyboot_mean(log(rt)) %>% 
  mutate(type = "log_rt", 
    complexity = case_when(
      grepl("complex", item_id) ~ "complex", 
      TRUE ~ "simple")
  )

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`

complexity_temp <- complexity_summary %>% 
  mutate(type = "rating") %>% 
  mutate(item_id = item_id_no_action) %>% 
  select(-item_id_no_action)

dishab_df <- bind_rows(log_rt_deviant, 
                       complexity_temp)


dishab_df %>% 
  ggplot(aes(x = reorder(item_id, mean), y = mean, 
             color = complexity, 
             shape = type)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), 
                  position = position_dodge(1.5)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## Warning: position_dodge requires non-overlapping x intervals

Similarity redux

similarity_data %>% 
  mutate(
    stimulus_left_id =  str_match(stimulus_left, "spore_stims/\\s*(.*?)..\\s*.gif")[,2],
    stimulus_right_id = str_match(stimulus_right, "spore_stims/\\s*(.*?)..\\s*.gif")[,2],
   ) %>% 
  group_by(stimulus_left_id, similarity) %>% 
  tidyboot::tidyboot_mean(rating) %>% 
  ggplot(aes(x = reorder(stimulus_left_id, mean), y = mean, 
             color = similarity)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
   theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## Warning: `cols` is now required when using unnest().
## Please use `cols = c(strap)`

LOOK AT FIRST BLOCK

RT_data %>% 
  filter(block_number == 0) %>% 
  ggplot(aes(x=trial_number, y=log(rt), colour=item_type)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)

RT_data %>% 
  filter(block_number == 0) %>% 
  ggplot(aes(x = item_type, y = log(rt))) + 
  stat_summary(fun.data = "mean_cl_boot") +
  facet_wrap(~block_type)

LOOK AT pre-deviant and after deviant

block_dev_summary <- RT_data %>% 
  filter(trial_type == "deviant") %>% 
  mutate(
    block_id =  paste(subject, block_number),
    temp_dev_id = paste(trial_type, trial_number, trial_complexity)) %>% 
  select(temp_dev_id, block_id)

dev_pos_df <- RT_data %>% 
  mutate(block_id = paste(subject, block_number)) %>% 
  left_join(block_dev_summary, by = "block_id") %>% 
  separate(temp_dev_id, into = c("trial_type_temp", "deviant_pos", "deviant_complexity")) %>% 
  select(-c(block_id, trial_type)) %>% 
  rowwise() %>% 
  mutate(dishabituation_position = case_when(
    (trial_number == (as.numeric(deviant_pos)) - 1) ~ "before_deviant", 
    (trial_number == (as.numeric(deviant_pos)) + 1) ~ "after_deviant", 
    TRUE ~ "regular_deviant"
  )) %>% 
  filter(dishabituation_position != "regular_deviant" && deviant_pos != 8) 


dev_pos_df %>% 
  ggplot(aes(x = trial_number, y = log(rt), 
             color = dishabituation_position)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~deviant_complexity)

dev_pos_df %>% 
  ggplot(aes(x = trial_number, y = log(rt), 
             color = dishabituation_position)) + 
  stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~block_type)

02_visualization

anjie & gal

03/04/2021

Descriptive Info

sample size

demographic

age

ethnicity

Gender

education

RT raw

overall

by trial type

trial type and block

by trial complexity

by block type

by item id

simple

complex

Trial level RT basic plotting

Model

break down by block

break down by complexity

only look at trials before first deviant

Block level RT basic plotting

by block type

by number of deviants

by complexity

Similarity ratings

Raw

by intuitive similarity

Model

Complexity Ratings

Raw

by intuitive complexity

model

by item

by complexity type

dishabituation effect

raw dishabitutaiton

effect?

first deviant only

Similarity redux

LOOK AT FIRST BLOCK

LOOK AT pre-deviant and after deviant