suppressMessages(library(here))
suppressMessages(library(jpeg))
suppressMessages(library(grid))
suppressMessages(library(lmerTest))
suppressMessages(library(car))

source(here::here("helper/common.R"))
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:car':
## 
##     some
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:rlang':
## 
##     set_names
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:rlang':
## 
##     has_name
## ── Attaching packages ──────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ readr   1.3.1
## ✓ tibble  3.0.0     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ forcats 0.4.0
## ── Conflicts ─────────────────────── tidyverse_conflicts() ──
## x rlang::%@%()          masks purrr::%@%()
## x rlang::as_function()  masks purrr::as_function()
## x tidyr::expand()       masks Matrix::expand()
## x tidyr::extract()      masks magrittr::extract()
## x dplyr::filter()       masks stats::filter()
## x rlang::flatten()      masks purrr::flatten()
## x rlang::flatten_chr()  masks purrr::flatten_chr()
## x rlang::flatten_dbl()  masks purrr::flatten_dbl()
## x rlang::flatten_int()  masks purrr::flatten_int()
## x rlang::flatten_lgl()  masks purrr::flatten_lgl()
## x rlang::flatten_raw()  masks purrr::flatten_raw()
## x tibble::has_name()    masks assertthat::has_name(), rlang::has_name()
## x rlang::invoke()       masks purrr::invoke()
## x dplyr::lag()          masks stats::lag()
## x rlang::list_along()   masks purrr::list_along()
## x rlang::modify()       masks purrr::modify()
## x tidyr::pack()         masks Matrix::pack()
## x rlang::prepend()      masks purrr::prepend()
## x dplyr::recode()       masks car::recode()
## x magrittr::set_names() masks rlang::set_names(), purrr::set_names()
## x purrr::some()         masks car::some()
## x rlang::splice()       masks purrr::splice()
## x tidyr::unpack()       masks Matrix::unpack()
source(here("helper/preprocessing_helper.R"))

knitr::opts_chunk$set(cache = TRUE, warn = FALSE, message = FALSE)

1 Intro

Pilot data analysis for MB2.

We can re-preprocess all of our data, this is set not to evaluate by default. In order to do this, you will need to register with Eyelink to get their binary package and then install edfR.

labs <- dir(here::here("pilot_data"))

for (lab in labs) {
  print(lab)
  source(here::here("pilot_data", lab, "import_scripts", "import.R"))
}

2 File reading

labs <- dir(here::here("pilot_data"))

d <- labs %>%
  map_df(function(lab) {
    aoi_data <- read_csv(here(paste0("pilot_data/",
                                     lab,"/processed_data/aoi_data.csv"))) 
    subjects <- read_csv(here(paste0("pilot_data/",
                                     lab,"/processed_data/subjects.csv"))) 
    trials <- read_csv(here(paste0("pilot_data/",
                                   lab,"/processed_data/trials.csv"))) 
    datasets <- read_csv(here(paste0("pilot_data/",
                                     lab,"/processed_data/datasets.csv")))
    
    left_join(aoi_data, subjects) %>%
      left_join(trials) %>%
      left_join(datasets) %>%
      select(lab_subject_id, lab_dataset_id, lab_trial_id, trial_id, 
             age, t, aoi, trial_num, error, experiment_num) %>%
      rename(subid = lab_subject_id, 
             lab = lab_dataset_id, 
             stimulus = lab_trial_id)
  })

3 Exclusions

d$experiment = ifelse(grepl("1a", d$experiment_num), "1a", "1b")

# exclude subject marked with any error and/or less than 8 trials
d <- d %>% 
  group_by(lab, subid, experiment) %>%
  mutate(error_subj = any(error)) %>%
      exclude_by(quo(error_subj), quiet=FALSE) 
## 
## filtering by error_subj 
## [1] "This variable excludes NA trials, which is  NA % of all trials."
## [1] "10  subjects, 10.8 %, have all trials where error_subj is true: exclude"
## [1] "Excluded: "
## 
## 
## lab                  subid     experiment 
## -------------------  --------  -----------
## CEU_babylab          mb2_p14   1a         
## CEU_babylab          mb2_p8    1a         
## copenhagen_babylab   mb2p01    1a         
## copenhagen_babylab   mb2p05    1a         
## copenhagen_babylab   mb2p13    1a
# exclude trials under 32s (which are not complete trials)
# changed from 35s to 32 after pilot 1b because no_outcome
# trials are shorter
d <- ungroup(d) %>% 
  group_by(lab, trial_id, subid, experiment) %>%
  mutate(time_range = (max(t) - min(t))/1000) %>%
          exclude_by(quo(time_range <= 32), quiet=FALSE)
## 
## filtering by time_range <= 32 
## [1] "This variable excludes 23 trials, which is  3.6 % of all trials."
## [1] "0  subjects, 0 %, have all trials where time_range <= 32 is true: exclude"
## [1] "Excluded: "
## 
## 
## lab                   trial_id  subid     experiment 
## -------------------  ---------  --------  -----------
## CEU_babylab                 16  p1b_03    1b         
## CEU_babylab                 58  mb2_p17   1a         
## CEU_babylab                 86  mb2_p5    1a         
## copenhagen_babylab          20  mb2p04    1a         
## copenhagen_babylab          22  mb2p04    1a
# print trial time ranges by lab
ungroup(d) %>%
  group_by(lab, experiment) %>% 
  summarise(shortest_trial=min(time_range),
            longest_trial=max(time_range)) %>%
  kable(digits=2)
lab experiment shortest_trial longest_trial
CEU_babylab 1a 33.73 38.62
CEU_babylab 1b 33.98 38.08
copenhagen_babylab 1a 32.67 38.62
copenhagen_babylab 1b 35.05 38.08
goettingen_babylab 1a 38.30 38.73
lmu_babylab 1a 35.62 38.65
trento_babylab 1a 34.70 38.75
ubc_infantlab 1a 36.45 38.70
uchicago_babylab 1a 32.42 38.62
uchicago_babylab 1b 32.17 38.15
# exclude subjects who did not complete 7/8 trials
d <- ungroup(d) %>% 
  group_by(lab, subid, experiment) %>%
  mutate(trials_completed = length(unique(trial_id))) %>%
           exclude_by(quo(trials_completed < 7),quiet=FALSE) %>% 
  ungroup(d) %>% 
mutate(subid = paste(subid, lab, experiment, sep="_"))
## 
## filtering by trials_completed < 7 
## [1] "This variable excludes 51 trials, which is  8.2 % of all trials."
## [1] "11  subjects, 13.3 %, have all trials where trials_completed < 7 is true: exclude"
## [1] "Excluded: "
## 
## 
## lab                  subid         experiment 
## -------------------  ------------  -----------
## CEU_babylab          p1b_03        1b         
## copenhagen_babylab   mb2p04        1a         
## copenhagen_babylab   mb2p09        1a         
## lmu_babylab          lmu_child10   1a         
## trento_babylab       MB2_P1        1a

4 Analysis

Descriptives

d %>%
  group_by(experiment, lab, subid) %>%
  summarise(age = mean(age)) %>%
  summarise(n = n(), 
            age = mean(age)/30.25) %>%
  kable(digits = 2)
experiment lab n age
1a CEU_babylab 14 24.83
1a copenhagen_babylab 8 22.05
1a goettingen_babylab 6 24.79
1a lmu_babylab 12 23.21
1a trento_babylab 4 20.98
1a ubc_infantlab 10 21.22
1a uchicago_babylab 7 24.70
1b CEU_babylab 3 NA
1b copenhagen_babylab 4 NA
1b uchicago_babylab 4 22.40

Anticipation plot across all trials.

ms <- d %>% 
  group_by(t, trial_num, experiment_num) %>%
  summarise(target = mean(aoi == "target", na.rm=TRUE),
            distractor = mean(aoi == "distractor", na.rm=TRUE)) %>%
  gather(region, looking, target, distractor) 

ggplot(ms, aes(x = t, y = looking, col = region)) + 
  geom_line() + 
  geom_vline(xintercept = 120, col = "red", lty = 2) + 
  facet_grid(experiment_num ~ .) + 
  coord_cartesian(xlim = c(-4000+120, 4120)) #only consider the 4sec window before and after POD

In the primary time period of interest

ms <- d %>%
  group_by(t, experiment_num) %>%
  summarise(target = mean(aoi == "target", na.rm = TRUE),
            distractor = mean(aoi == "distractor", na.rm = TRUE)) %>%
  gather(region, looking, target, distractor) 
  
ggplot(ms, aes(x = t, y = looking, col = region)) +
  geom_point() + 
  xlim(-4000 + 120, 4000 + 120) + 
  geom_vline(xintercept = 120, col = "red", lty = 2) + 
  geom_text(x = -4000, y = .95, group = 1, col = "black", 
            label = "Anticipation", hjust = 0) + 
  geom_text(x = 200, y = .95, group = 1, col = "black", 
            label = "Reaction", hjust = 0) + 
  facet_grid(. ~ experiment_num)

Now, broken down by trial. Summary across anticipation window.

ms <- d %>%
  filter(t > -4000, t < 120) %>%
  group_by(lab, subid, trial_num, experiment_num) %>%
  summarise(target = mean(aoi == "target", na.rm = TRUE)) %>%
  group_by(trial_num, experiment_num) %>%
  langcog::multi_boot_standard(col = "target", na.rm = TRUE)


ggplot(ms, aes(x = trial_num, y = mean)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  geom_line() + 
  facet_grid(. ~ experiment_num)

Binned for cleaner curves

ms <- d %>%
  mutate(block = ifelse(trial_num < 5, "Trials 1-4", "Trials 5-8")) %>%
  group_by(t, block, experiment_num) %>%
  summarise(target = mean(aoi == "target", na.rm = TRUE),
            distractor = mean(aoi == "distractor", na.rm = TRUE)) %>%
  gather(region, looking, target, distractor) 
  
ggplot(ms, aes(x = t, y = looking, col = region)) +
  geom_point() + 
  # geom_smooth(span = 2, se = FALSE) + 
  xlim(-4000 + 120, 4000 + 120) +
  geom_vline(xintercept = 120, col = "black", lty = 3) + 
  annotate("text", x = -3800, y = 1, col = "black",
            label = "Anticipation", hjust = 0) +
  annotate("text", x = 200, y = 1, col = "black", 
            label = "Reaction", hjust = 0) + 
  ggthemes::scale_color_solarized(name = "Area of Interest") + 
  xlab("Time (msec)") + 
  ylab("Proportion gaze in AOI") + 
  theme(legend.position = "bottom") + 
  facet_wrap(experiment_num~block)

And by lab:

ms <- d %>%
  mutate(block = ifelse(trial_num < 5, 1, 2)) %>%
  group_by(t, lab, block, experiment_num) %>%
  summarise(target = mean(aoi == "target", na.rm = TRUE),
            distractor = mean(aoi == "distractor", na.rm = TRUE)) %>%
  gather(region, looking, target, distractor) 
  
ggplot(ms, aes(x = t, y = looking, col = region)) +
  geom_point() + 
  # geom_smooth(span = 2, se = FALSE) + 
  xlim(-4000 + 120, 4000 + 120) + 
  geom_vline(xintercept = 120, col = "red", lty = 2) +
  facet_grid(lab~block + experiment_num)

5 Main analysis 1: First anticipatory look

Create a dataframe to identify the first look and see if the first look is >100ms

# get time and location of first look (either target or distractor)

df_t_first_aoi <- d %>% 
  # for now only look at exp1, and at window between bear disappearing and point of disambiguation (+120 to account for time to react)
  filter(., t >= -4000+120, t <= 120) %>% 
  # create variable for AOI in order to exploit rle, which handles NaN's neatly
    mutate(aoi_recode = ifelse(aoi == "other", NA, aoi)) %>%
  group_by(subid, trial_num) %>% 
    # 4 consecutive timepoints looking at target or distractor is the first look
  mutate(first_look_aoi = rle(aoi_recode)$values[rle(aoi_recode)$lengths >= 4][1],
         first_look_t = t[sum(rle(aoi_recode)$lengths[0:(which.max(rle(aoi_recode)$lengths >= 4)-1)])+1]) %>% 
  ungroup()

# short df with first looks to check
first_looks_df <- df_t_first_aoi %>% 
  #select(subid, trial_num, first_look_aoi, first_look_t, age, experiment_num) %>% 
  group_by(subid, trial_num, experiment_num) %>%
  summarize(aoi = unique(first_look_aoi)) %>% 
  mutate(aoi = case_when (is.na(aoi) ~ "other", 
                          TRUE ~ as.character(aoi)))

Number of first look in differnet experiments, looks like pilot 1b outcome and no outcome conditions have similiar number of first look

Note that NA represents trials where the kid only looked at other places (not target/distractor) during the whole anticipatory window between -3880 to +120 ms

I checked online, didn’t find any concensus of how to analyze the “other” looking. Different people have different decisions on how to treat the “other” looking in the analyses.

I suggest removing “NA (other)” in the following analyses, as I think that looking at “distractor” and “other” are two different concepts. Theortically, we are interested in understanding whether kids looked at target more than distractor and we may not want to include the time when kids were bored and didn’t pay attention to trials/simply did not understand the task. In addition, the number of cases when kids only looked at “other” is low across experiments, so we are good.

first_looks_df %>% 
  group_by(aoi, experiment_num) %>% 
  count()
## # A tibble: 9 x 3
## # Groups:   aoi, experiment_num [9]
##   aoi        experiment_num          n
##   <chr>      <chr>               <int>
## 1 distractor pilot_1a              128
## 2 distractor pilot_1b_no_outcome     9
## 3 distractor pilot_1b_outcome       12
## 4 other      pilot_1a               22
## 5 other      pilot_1b_no_outcome     3
## 6 other      pilot_1b_outcome        1
## 7 target     pilot_1a              328
## 8 target     pilot_1b_no_outcome    31
## 9 target     pilot_1b_outcome       31

5.1 Data visualization across different test trials

Pilot 1a

ggplot(data = filter(first_looks_df, experiment_num == "pilot_1a")) + 
  geom_bar(mapping = aes(x = aoi, y = ..prop.., group = 1, stat = "count")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  facet_wrap(.~trial_num, ncol = 4)

Pilot 1b

ggplot(data = filter(first_looks_df, experiment_num != "pilot_1a")) + 
  geom_bar(mapping = aes(x = aoi, y = ..prop.., group = 1, stat = "count")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  facet_wrap(trial_num~experiment_num, ncol = 4)

5.2 Descriptive stats

Note that the following analyses removed NA (i.e., when kids looked at other all the time during the anticipatory window between -3880ms to +120ms)

Mean and sd of first look location in different trials. The means of all trials are above 50%

mean_sd_aoi <- first_looks_df %>% 
  filter(!is.na(aoi)) %>% 
  mutate(aoi_dummy = ifelse(aoi == "target", 1, 0)) %>% 
  group_by(trial_num, experiment_num) %>% 
  summarize(number_of_kids_each_trial = n(),
            mean(aoi_dummy),
            sd(aoi_dummy))

Data visualization

ggplot(mean_sd_aoi, aes(x = trial_num, y = `mean(aoi_dummy)`)) +
  geom_line()+
  facet_grid(.~experiment_num)

5.3 Mixed-level analysis: First look location

Pilot 1a: kids looked at target less over time. But generally speaking, they still looked at targat significantly more than distracter

first_looks_df_1a <- first_looks_df %>% 
  filter(experiment_num == "pilot_1a", !is.na(aoi)) %>% 
  mutate(aoi_dummy = ifelse(aoi == "target", 1, 0))
  
first_look_1a <- glmer(aoi_dummy ~ trial_num + (1|subid), data = first_looks_df_1a, family = "binomial")

summary(first_look_1a)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: aoi_dummy ~ trial_num + (1 | subid)
##    Data: first_looks_df_1a
## 
##      AIC      BIC   logLik deviance df.resid 
##    589.7    602.2   -291.9    583.7      475 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9392 -1.1323  0.5435  0.6759  0.9313 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subid  (Intercept) 0.3446   0.5871  
## Number of obs: 478, groups:  subid, 61
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.32238    0.24951   5.300 1.16e-07 ***
## trial_num   -0.10507    0.04542  -2.313   0.0207 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## trial_num -0.852

Pilot 1a (trial 1 to 4) vs Pilot 1b_outcome: no difference between 1a and 1b_outcome in the first look at the target

first_looks_df_1a_1b <- first_looks_df %>% 
  filter(experiment_num != "pilot_1b_no_outcome", 
         trial_num < 5,
         !is.na(aoi)) %>% 
  mutate(aoi_dummy = ifelse(aoi == "target", 1, 0))
  
first_look_1a_1b <- glmer(aoi_dummy ~ trial_num + experiment_num + (1|subid), data = first_looks_df_1a_1b, family = "binomial")

summary(first_look_1a_1b)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: aoi_dummy ~ trial_num + experiment_num + (1 | subid)
##    Data: first_looks_df_1a_1b
## 
##      AIC      BIC   logLik deviance df.resid 
##    341.6    356.2   -166.8    333.6      283 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8839 -1.2762  0.5402  0.6026  0.7929 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subid  (Intercept) 0.2558   0.5057  
## Number of obs: 287, groups:  subid, 72
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      1.3884     0.3623   3.833 0.000127 ***
## trial_num                       -0.1269     0.1223  -1.038 0.299412    
## experiment_numpilot_1b_outcome  -0.1466     0.4063  -0.361 0.718238    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trl_nm
## trial_num   -0.877       
## exprmnt__1_ -0.187  0.005

Pilot 1b_outcome vs Pilot 1b_no_outcome, no difference between 1a and 1b in the first look at the target, but need to interpret the results with caution because we have so little data and even the intercept is not significant

first_looks_df_1b <- first_looks_df %>% 
  filter(experiment_num != "pilot_1a", 
         !is.na(aoi)) %>% 
  mutate(aoi_dummy = ifelse(aoi == "target", 1, 0))
  
first_look_1b <- glmer(aoi_dummy ~ trial_num + experiment_num + (1|subid), data = first_looks_df_1b, family = "binomial")

summary(first_look_1b)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: aoi_dummy ~ trial_num + experiment_num + (1 | subid)
##    Data: first_looks_df_1b
## 
##      AIC      BIC   logLik deviance df.resid 
##    110.1    120.0    -51.0    102.1       83 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1576 -1.0273  0.5021  0.6177  0.9786 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  subid  (Intercept) 0.5613   0.7492  
## Number of obs: 87, groups:  subid, 11
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)
## (Intercept)                     0.45889    1.49936   0.306    0.760
## trial_num                       0.09076    0.22386   0.405    0.685
## experiment_numpilot_1b_outcome  0.29372    1.01437   0.290    0.772
## 
## Correlation of Fixed Effects:
##             (Intr) trl_nm
## trial_num   -0.958       
## exprmnt__1_ -0.918  0.872

6 Main analysis 2: Differential analysis

This analysis explores whether kids looked at the target more than distractor during anticipatory period.

6.1 Data visualization across different test trials

df_analysis2 <- d %>%  
  filter(t >= -4000+120 & t <= 120)

df_analysis2_afterPOD <- d %>%  
  filter(t >= 120 & t <= 4000+120)

Pilot 1a: Before POD

ggplot(data = filter(df_analysis2, experiment_num == "pilot_1a")) + 
  geom_bar(mapping = aes(x = aoi, y = ..prop.., group = 1, stat = "count")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  facet_wrap(.~trial_num, ncol = 4)

Pilot 1a: after POD

ggplot(data = filter(df_analysis2_afterPOD, experiment_num == "pilot_1a")) + 
  geom_bar(mapping = aes(x = aoi, y = ..prop.., group = 1, stat = "count")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  facet_wrap(.~trial_num, ncol = 4)

Pilot 1b : before POD

ggplot(data = filter(df_analysis2, experiment_num != "pilot_1a")) + 
  geom_bar(mapping = aes(x = aoi, y = ..prop.., group = 1, stat = "count")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  facet_wrap(trial_num~experiment_num, ncol = 4)

Pilot 1b : after POD

ggplot(data = filter(df_analysis2_afterPOD, experiment_num != "pilot_1a")) + 
  geom_bar(mapping = aes(x = aoi, y = ..prop.., group = 1, stat = "count")) + 
  scale_y_continuous(labels = scales::percent_format()) +
  facet_wrap(trial_num~experiment_num, ncol = 4)

For proportion score, I only focus on target and distractor (target/target+distractor)

ms_proportion_trials <- d %>%
  group_by(t, trial_num, experiment_num) %>%
  summarise(target = mean(aoi == "target", na.rm = TRUE),
            distractor = mean(aoi == "distractor", na.rm = TRUE)) %>%
  mutate(proportion_to_target = target / (target + distractor)) %>% 
  filter(t >= -4000+120 & t <= 4120)

ms_proportion_trial_exp1a <- ms_proportion_trials %>% 
  filter(experiment_num == "pilot_1a")

ms_proportion_trial_exp1b <- ms_proportion_trials %>% 
  filter(experiment_num != "pilot_1a")

Proportion looking in EXP 1A

ggplot(ms_proportion_trial_exp1a, 
       aes(x = t, y = proportion_to_target)) +
  geom_point(alpha = 0.3) + 
  ylim(0,1) + 
  # geom_smooth(span = 2, se = FALSE) + 
  xlim(-4000+120, 4120) + 
  geom_vline(xintercept = 120, col = "red", lty = 2) + 
  geom_text(x = -4000, y = .95, group = 1, col = "black", 
            label = "Anticipation", hjust = 0 ) + 
  geom_text(x = 200, y = .95, group = 1, col = "black", 
            label = "Reaction", hjust = 0 )+
  facet_wrap(experiment_num~trial_num, ncol = 4)   

Proportion looking in EXP 1B

ggplot(ms_proportion_trial_exp1b, 
       aes(x = t, y = proportion_to_target)) +
  geom_point(alpha = 0.3) + 
  ylim(0,1) + 
  # geom_smooth(span = 2, se = FALSE) + 
  xlim(-4000+120, 4120) + 
  geom_vline(xintercept = 120, col = "red", lty = 2) + 
  geom_text(x = -4000, y = .95, group = 1, col = "black", 
            label = "Anticipation", hjust = 0 ) + 
  geom_text(x = 200, y = .95, group = 1, col = "black", 
            label = "Reaction", hjust = 0 )+
  facet_wrap(experiment_num~trial_num, ncol = 4)   

Create data-set for the mixed-level analysis

ms_proportion_trials <- d %>%
  filter (t >= -4000+120 & t <= 120) %>% 
  group_by(experiment_num, trial_num, subid, lab) %>%
  summarise(mean_target = mean(aoi == "target", na.rm = TRUE),
            sem_target = sd(aoi == "target", na.rm = TRUE) / sqrt(length(aoi == "target")),
            mean_distractor = mean(aoi == "distractor", na.rm = TRUE), 
            sem_distractor = sd(aoi == "distractor", na.rm = TRUE) / sqrt(length(aoi == "distractor")),
            up_ci_target = mean_target + (sem_target * 1.96),
            low_ci_target = mean_target - (sem_target * 1.96),
            up_ci_distractor = mean_distractor + (sem_distractor * 1.96),
            low_ci_distractor = mean_distractor - (sem_distractor * 1.96)) %>% 
  mutate(proportion_to_target = mean_target / (mean_target + mean_distractor))

6.2 Logistic Mixed-level analysis during the pre-disambiguation period (-1880 to 120 window)

6.2.1 compare pilot 1a (trial 1 to 4) and pilot 1b outcome

d_4120_window_1a_1b_outcome <- d %>% 
  filter(t >= -4000+120, t <= 120, 
         experiment_num %in% c("pilot_1a", "pilot_1b_outcome"), 
         trial_num <5,
         aoi != "other") %>%
  mutate(aoi_dummy = case_when(aoi == "target" ~ 1, 
                               aoi == "distractor" ~ 0))

ls_target_4120_1a_1b_outcome <- glmer(aoi_dummy ~ trial_num + experiment_num + (trial_num|subid), 
                                     data = d_4120_window_1a_1b_outcome, family = "binomial")
summary(ls_target_4120_1a_1b_outcome)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: aoi_dummy ~ trial_num + experiment_num + (trial_num | subid)
##    Data: d_4120_window_1a_1b_outcome
## 
##      AIC      BIC   logLik deviance df.resid 
##  17605.7  17653.4  -8796.8  17593.7    21118 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -24.504   0.027   0.284   0.515 198.275 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  subid  (Intercept) 15.748   3.968         
##         trial_num    2.687   1.639    -0.86
## Number of obs: 21124, groups:  subid, 71
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      2.0348     0.4870   4.178 2.94e-05 ***
## trial_num                       -0.1423     0.1966  -0.724    0.469    
## experiment_numpilot_1b_outcome   0.3437     0.6839   0.503    0.615    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trl_nm
## trial_num   -0.835       
## exprmnt__1_ -0.214 -0.002

6.2.2 only focus on pilot 1b non outcome

d_4120_window_1b_no_outcome <- d %>% 
  filter(t >= -4000+120, t <= 120, 
         experiment_num == "pilot_1b_no_outcome",
         aoi != "other") %>%
  mutate(aoi_dummy = case_when(aoi == "target" ~ 1, 
                               aoi == "distractor" ~ 0))

ls_target_4120_1b_no_outcome <- glmer(aoi_dummy ~ trial_num + (trial_num|subid), 
                                     data = d_4120_window_1b_no_outcome, family = "binomial")
summary(ls_target_4120_1b_no_outcome)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: aoi_dummy ~ trial_num + (trial_num | subid)
##    Data: d_4120_window_1b_no_outcome
## 
##      AIC      BIC   logLik deviance df.resid 
##   2284.1   2313.3  -1137.0   2274.1     2574 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0140 -0.5681  0.2491  0.5810  1.8608 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  subid  (Intercept) 79.897   8.938         
##         trial_num    1.777   1.333    -0.97
## Number of obs: 2579, groups:  subid, 11
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -1.6542     2.7602  -0.599    0.549
## trial_num     0.5061     0.4150   1.219    0.223
## 
## Correlation of Fixed Effects:
##           (Intr)
## trial_num -0.972

6.3 Logistic Mixed-level analysis during the poste-disambiguation period (0 to 2120), compare pilot 1b outcome vs no-outcome

d_4120_window_1b <- d %>% 
  filter(t >= 0, t <= 4120, 
         experiment_num != "pilot_1a",
         aoi != "other") %>%
  mutate(aoi_dummy = case_when(aoi == "target" ~ 1, 
                               aoi == "distractor" ~ 0))

ls_target_4120_1b <- glmer(aoi_dummy ~ trial_num + experiment_num + (trial_num|subid), 
                                     data = d_4120_window_1b, family = "binomial")
summary(ls_target_4120_1b)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: aoi_dummy ~ trial_num + experiment_num + (trial_num | subid)
##    Data: d_4120_window_1b
## 
##      AIC      BIC   logLik deviance df.resid 
##   4170.6   4214.2  -2079.3   4158.6    10544 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -23.2984   0.0096   0.0848   0.1516   1.6502 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr 
##  subid  (Intercept) 13.7122  3.7030        
##         trial_num    0.2305  0.4801   -0.87
## Number of obs: 10550, groups:  subid, 11
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      2.8794     1.2057   2.388   0.0169 *  
## trial_num                       -0.2117     0.1615  -1.311   0.1899    
## experiment_numpilot_1b_outcome   4.0677     0.2134  19.062   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trl_nm
## trial_num   -0.889       
## exprmnt__1_ -0.109  0.123
# df_pilot_1a_b_trial1to4 <- ms_proportion_trials %>% filter(experiment_num != "pilot_1b_no_outcome", trial_num <5)
# 
# 
# prop_model_1a_b_trial_1to4 <- lmer(proportion_to_target ~ experiment_num + trial_num + (1|subid), data = df_pilot_1a_b_trial1to4)
# summary(prop_model_1a_b_trial_1to4) 

## Linear hypothesis test to see if the intercept of the prop_model is signficantly higher than chance level (0.5)
 
# linearHypothesis(prop_model, "(Intercept) = 0.5")

# the linear hypothesis test is also confirmed by calculating the z score of this intercept, I used the coefficient and the sd of the random intercept
# z_intercept <- 0.7525/0.066 #much higher than 1.96