todo:

new exclusion criteria
ideal learner stuff
look at target reaction time?
import the complexity norm?

Visualizing raw trial looking time

d_no_target <- d %>% 
  filter(trial_stimulus_type != "target")

Looking time distribution

RAW

data_with_demog %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram(bins = 90) + 
  xlim(0, 9000)

## Warning: Removed 72 rows containing non-finite values (stat_bin).

## Warning: Removed 2 rows containing missing values (geom_bar).

data_with_demog %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram() + 
  scale_x_log10()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

AFTER EXCLUSION

d %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram() + 
  scale_x_log10()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# 
# d %>% 
#   ggplot(aes(x = trial_looking_time)) + 
#   geom_histogram(bins = 90) + 
#   xlim(0, 6000) + 
#   facet_wrap(~subject)

Digging through the second hump

Something wacky is happening in background trials.
Deviant trials are slower
target trials are likely a mixture between hits and misses

d %>% 
  ggplot(aes(x = trial_looking_time, 
             fill = trial_pressed_space_bar)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~trial_stimulus_type)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Zoom in on background trials. The “hump” (second mode) is there in all parts of the experiment.

d %>% 
  mutate(block_num = ceiling(total_trial_num / 48)) %>%
  filter(trial_stimulus_type == "background") %>%
  ggplot(aes(x = trial_looking_time, 
             fill = trial_stimulus_complexity)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~block_num)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

d %>% 
  filter(trial_stimulus_type == "background") %>%
  group_by(subject) %>%
  mutate(mlog = mean(log(trial_looking_time)), 
         sdlog = sd(log(trial_looking_time)),
         outlier = log(trial_looking_time) > mlog + 3*sdlog |
           log(trial_looking_time) < mlog - 3*sdlog) %>%
  ggplot(aes(x = trial_looking_time, fill = outlier)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~subject)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

We see this pattern within subs to some extent, irrespective of outliers. Is it after target press?

No, it seems that even on background trials where there was not a target beforehand.

d %>% 
  group_by(subject) %>%
  mutate(t_to_b = c(FALSE, diff(as.numeric(as.factor(trial_stimulus_type == "target"))) == -1) & trial_stimulus_type == "background",
         d_to_b = c(FALSE, diff(as.numeric(as.factor(trial_stimulus_type == "deviant"))) == -1) & trial_stimulus_type == "background") %>%
  filter(trial_stimulus_type == "background") %>%
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_grid(d_to_b~t_to_b)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

just look at the best of trials of each individual

d %>% 
  group_by(subject) %>% 
  mutate(mlog = mean(log(trial_looking_time)), 
         sdlog = sd(log(trial_looking_time)),
         best_trials = log(trial_looking_time) < mlog + 2*sdlog &&
           log(trial_looking_time) > mlog - 2*sdlog) %>%
  filter(best_trials == TRUE) %>% 
  ggplot(aes(x = trial_looking_time, fill = trial_stimulus_complexity)) + 
  geom_histogram() + 
  scale_x_log10()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## block type by complexity

only looking at trials within 2sd of the means

d %>% 
  group_by(subject) %>% 
  mutate(mlog = mean(log(trial_looking_time)), 
         sdlog = sd(log(trial_looking_time)),
         best_trials = log(trial_looking_time) < mlog + 2*sdlog &&
           log(trial_looking_time) > mlog - 2*sdlog) %>%
  filter(best_trials == TRUE) %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram() + 
  scale_x_log10() +
  facet_wrap(~block)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

doesn’t seem to make a huge difference with or without the outlier

d %>% 
  group_by(subject) %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram() + 
  scale_x_log10() +
  facet_wrap(~block)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

break down by trial type and stimuli type

d %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram()+
  scale_x_log10() +
  facet_grid(block~trial_stimulus_type)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ok, maybe it’s time to consider number of repetitions. maybe the second hump means participants getting very bored and they started zoning out???

library(mgcv)

## Loading required package: nlme

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:lme4':
## 
##     lmList

## The following object is masked from 'package:dplyr':
## 
##     collapse

## This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.

rep_d <- d %>% 
  mutate(
    number = 1) %>% 
  group_by(
  subject, block, trial_stimulus_type
  ) %>% 
  mutate(num_times_stimulus_seen = cumsum(number)) %>% 
  filter(trial_stimulus_type == "background")

gam_d <- rep_d %>% 
  mutate(trial_stimulus_complexity = as.factor(trial_stimulus_complexity), 
         block = as.factor(block))

gam_m <- gam(log(trial_looking_time) ~ s(num_times_stimulus_seen), 
             data = gam_d, 
             method = "REML")
summary(gam_m)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log(trial_looking_time) ~ s(num_times_stimulus_seen)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.270316   0.008667   723.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df     F p-value    
## s(num_times_stimulus_seen) 7.001  8.072 12.81  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0181   Deviance explained = 1.93%
## -REML = 5591.4  Scale est. = 0.4234    n = 5636

plot(gam_m)

note this is not a very good model

gam.check(gam_m)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.0005601243,-3.726007e-07]
## (score 5591.437 & scale 0.4233963).
## Hessian positive definite, eigenvalue range [1.99837,2817.004].
## Model rank =  10 / 10 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k' edf k-index p-value
## s(num_times_stimulus_seen)  9   7       1    0.44

but still let’s give it a try: maybe 10 repetitions are the “turning point”

rep_d %>% 
  mutate(
    reptition_type = if_else(num_times_stimulus_seen > 10, "more", "less")
  ) %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram()+
  scale_x_log10() +
  facet_grid(block~reptition_type)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

umm this really doesn’t make any sense to me…

age really has nothing to do with it

d %>% 
  mutate(
    m_age = mean(demog_age, na.rm = TRUE), 
    age_group = if_else(demog_age < m_age | demog_age == m_age, "young", "old")
  ) %>% 
  filter(!is.na(age_group)) %>% 
  ggplot(aes(x = trial_looking_time, fill = age_group)) + 
  geom_histogram() + 
  scale_x_log10()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

complexity difference

d %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = trial_stimulus_complexity), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000)

## Warning: Removed 94 rows containing non-finite values (stat_density).

d %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = trial_stimulus_complexity), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000) + 
  facet_wrap(~subject)

## Warning: Removed 94 rows containing non-finite values (stat_density).

block difference

d %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = block), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000)

## Warning: Removed 94 rows containing non-finite values (stat_density).

d %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = block), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000) + 
  facet_wrap(~subject)

## Warning: Removed 94 rows containing non-finite values (stat_density).

Complexity differences no target

d_no_target %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = trial_stimulus_complexity), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000)

## Warning: Removed 64 rows containing non-finite values (stat_density).

d_no_target %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = trial_stimulus_complexity), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000) + 
  facet_wrap(~subject)

## Warning: Removed 64 rows containing non-finite values (stat_density).

Block differences no target

d_no_target %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = block), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000)

## Warning: Removed 64 rows containing non-finite values (stat_density).

d_no_target %>% 
  ggplot(aes(x = trial_looking_time, 
            fill = block), 
         ) + 
  geom_density(alpha = 0.5)+ 
  xlim(0, 6000) + 
  facet_wrap(~subject)

## Warning: Removed 64 rows containing non-finite values (stat_density).

## Warning: Groups with fewer than two data points have been dropped.

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

Visualizing aggregarded looking time

d_sum_individual <- d %>% 
  group_by(subject, block) %>% 
  summarise(
    mean_lt = mean(trial_looking_time, na.rm = TRUE), 
    sd = sd(trial_looking_time, na.rm = TRUE), 
    n = n(), 
    ci_range_95 = qt(1 - (0.05 / 2), n - 1) * (sd/sqrt(n)), 
    ci_ub = mean_lt + ci_range_95, 
    ci_lb = mean_lt - ci_range_95
  )

## `summarise()` regrouping output by 'subject' (override with `.groups` argument)

d_sum <- d %>% 
  group_by(block) %>% 
  summarise(
    mean_lt = mean(trial_looking_time, na.rm = TRUE), 
    sd = sd(trial_looking_time, na.rm = TRUE), 
    n = n(), 
    ci_range_95 = qt(1 - (0.05 / 2), n - 1) * (sd/sqrt(n)), 
    ci_ub = mean_lt + ci_range_95, 
    ci_lb = mean_lt - ci_range_95
  )

## `summarise()` ungrouping output (override with `.groups` argument)

aggregated

d_sum %>% ggplot(aes(x = block, y = mean_lt)) + 
  geom_pointrange(aes(ymin = ci_lb, ymax = ci_ub)) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

individual

something weird happened

d_sum_individual %>% 
  ggplot(aes(x = block, y = mean_lt)) + 
  geom_pointrange(aes(ymin = ci_lb, ymax = ci_ub)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

d_sum_individual %>% 
  ggplot(aes(x = block, y = mean_lt)) + 
  geom_pointrange(aes(ymin = ci_lb, ymax = ci_ub))  + 
  facet_wrap(~subject) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

weird ones

We can see three very weird ones: SS1604513317537 SS1604515995769 SS1604516882157

weird <- c("SS1604513317537", 
           "SS1604515995769", 
           "SS1604516660396")

d_sum_individual %>% 
  filter(subject %in% weird) %>% 
  ggplot(aes(x = block, y = mean_lt)) + 
  geom_pointrange(aes(ymin = ci_lb, ymax = ci_ub))  + 
  facet_wrap(~subject) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

d %>% 
  filter(subject %in% weird) %>% 
  ggplot(aes(x = trial_looking_time)) + 
  geom_histogram(bins = 90) +
  facet_wrap(~subject) + 
  xlim(0, 6000)+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

## Warning: Removed 2 rows containing non-finite values (stat_bin).

## Warning: Removed 6 rows containing missing values (geom_bar).

Excluding weird ones

d_no_weird <- d %>% 
  filter(!(subject %in% weird)) 

  
d_no_weird_sum <- d_no_weird %>% 
  group_by(block) %>% 
  summarise(
    mean_lt = mean(trial_looking_time, na.rm = TRUE), 
    sd = sd(trial_looking_time, na.rm = TRUE), 
    n = n(), 
    ci_range_95 = qt(1 - (0.05 / 2), n - 1) * (sd/sqrt(n)), 
    ci_ub = mean_lt + ci_range_95, 
    ci_lb = mean_lt - ci_range_95
  )

## `summarise()` ungrouping output (override with `.groups` argument)

d_no_weird_sum %>% ggplot(aes(x = block, y = mean_lt)) + 
  geom_pointrange(aes(ymin = ci_lb, ymax = ci_ub)) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Demographic

SES

education == “Some high school” ~ 1, education == “High school diploma” ~ 2, education == “Associate Degree/Technical certification” ~ 3, education == “Bachelor’s Degree” ~ 4, education == “Master’s Degree” ~ 5, education == “Doctorate/Professional degree” ~ 6, education == “Other” ~ NA_real_

d %>% 
  distinct(subject, .keep_all = TRUE) %>% 
  ggplot(aes(x = demog_education)) + 
  geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 4 rows containing non-finite values (stat_bin).

null_m <- lmer(trial_looking_time ~ 1 + (1|subject), 
               data = filter(d, !is.na(demog_education)))

edu_m <- lmer(trial_looking_time ~ demog_education + (1|subject), 
     data = filter(d, !is.na(demog_education)))

summary(edu_m)

## Linear mixed model fit by REML ['lmerMod']
## Formula: trial_looking_time ~ demog_education + (1 | subject)
##    Data: filter(d, !is.na(demog_education))
## 
## REML criterion at convergence: 139982.2
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -0.940 -0.136 -0.087 -0.027 32.089 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept)  272043   521.6  
##  Residual             8451381  2907.1  
## Number of obs: 7448, groups:  subject, 38
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)        174.8      490.0   0.357
## demog_education    197.5      124.5   1.587
## 
## Correlation of Fixed Effects:
##             (Intr)
## demog_edctn -0.983

anova(null_m, edu_m)

## refitting model(s) with ML (instead of REML)

## Data: filter(d, !is.na(demog_education))
## Models:
## null_m: trial_looking_time ~ 1 + (1 | subject)
## edu_m: trial_looking_time ~ demog_education + (1 | subject)
##        Df    AIC    BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## null_m  3 140013 140034 -70004   140007                         
## edu_m   4 140012 140040 -70002   140004 2.5686      1      0.109

Age

d %>% 
  distinct(subject, .keep_all = TRUE) %>% 
  ggplot(aes(x = demog_age)) + 
  geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 4 rows containing non-finite values (stat_bin).

null_m <- lmer(trial_looking_time ~ 1 + (1|subject), 
               data = filter(d, !is.na(demog_age)))

age_m <- lmer(trial_looking_time ~ demog_age + (1|subject), 
     data = filter(d, !is.na(demog_age)))

summary(age_m)

## Linear mixed model fit by REML ['lmerMod']
## Formula: trial_looking_time ~ demog_age + (1 | subject)
##    Data: filter(d, !is.na(demog_age))
## 
## REML criterion at convergence: 139990.2
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -0.938 -0.139 -0.086 -0.025 32.086 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept)  293140   541.4  
##  Residual             8451383  2907.1  
## Number of obs: 7448, groups:  subject, 38
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  853.827    286.498   2.980
## demog_age      2.296      7.309   0.314
## 
## Correlation of Fixed Effects:
##           (Intr)
## demog_age -0.945

anova(null_m, age_m)

## refitting model(s) with ML (instead of REML)

## Data: filter(d, !is.na(demog_age))
## Models:
## null_m: trial_looking_time ~ 1 + (1 | subject)
## age_m: trial_looking_time ~ demog_age + (1 | subject)
##        Df    AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## null_m  3 140013 140034 -70004   140007                        
## age_m   4 140015 140043 -70003   140007 0.104      1     0.7471

number of background repetition on looking time

full, aggregated, by complexity

full_aggregated <- d %>% 
  mutate(
    number = 1) %>% 
  group_by(
  subject, block, trial_stimulus_type
  ) %>% 
  mutate(num_times_stimulus_seen = cumsum(number))

full_aggregated %>% 
  filter(trial_stimulus_type == "background") %>% 
  ggplot(
    aes(y = log(trial_looking_time), 
        x = num_times_stimulus_seen,
        color = trial_stimulus_complexity)
  ) + 
  geom_point(aes(alpha = 0.2), size = 3, shape = ".") + 
  guides(alpha = FALSE) + 
  labs(color = "Stimulus Complexity") + 
  ylab("Mean Looking Time (log)") + 
  xlab("Number of Stimulus Reptitions") + 
  geom_smooth(method = "lm") + 
  theme(axis.text = element_text(size = 10))

## `geom_smooth()` using formula 'y ~ x'

full, aggregated, by blocks

full_aggregated %>% 
  filter(trial_stimulus_type == "background") %>% 
  ggplot(
    aes(y = log(trial_looking_time), 
        x = num_times_stimulus_seen,
        color = block)
  ) + 
  geom_point(aes(alpha = 0.2), size = 3, shape = ".") + 
  guides(alpha = FALSE) + 
  labs(color = "Stimulus Complexity") + 
  ylab("Mean Looking Time (log)") + 
  xlab("Number of Stimulus Reptitions") + 
  geom_smooth(method = "lm") + 
  theme(axis.text = element_text(size = 10))

## `geom_smooth()` using formula 'y ~ x'

no weird, aggregated, by complexity

excluded_sum <- full_aggregated %>% 
  filter(trial_stimulus_type == "background") %>% 
  filter(!(subject %in% weird))

excluded_sum %>% 
  ggplot(
    aes(y = log(trial_looking_time), 
        x = num_times_stimulus_seen,
        color = trial_stimulus_complexity)
  ) + 
  geom_point(aes(alpha = 0.2), size = 3, shape = ".") + 
  guides(alpha = FALSE) + 
  labs(color = "Stimulus Complexity") + 
  ylab("Mean Looking Time (log)") + 
  xlab("Number of Stimulus Reptitions") + 
  geom_smooth(method = "lm") + 
  theme(axis.text = element_text(size = 10))

## `geom_smooth()` using formula 'y ~ x'

no weird, aggregated, by blocks

excluded_sum %>% 
  ggplot(
    aes(y = log(trial_looking_time), 
        x = num_times_stimulus_seen,
        color = block)
  ) + 
  geom_point(aes(alpha = 0.2), size = 3, shape = ".") + 
  guides(alpha = FALSE) + 
  labs(color = "Stimulus Complexity") + 
  ylab("Mean Looking Time (log)") + 
  xlab("Number of Stimulus Reptitions") + 
  geom_smooth(method = "lm") + 
  theme(axis.text = element_text(size = 10))

## `geom_smooth()` using formula 'y ~ x'

individual, by complexity

full_aggregated %>% 
  filter(trial_stimulus_type == "background") %>% 
  ggplot(
    aes(y = log(trial_looking_time), 
        x = num_times_stimulus_seen,
        color = trial_stimulus_complexity)
  ) + 
  geom_point(aes(alpha = 0.2), size = 3, shape = ".") + 
  guides(alpha = FALSE) + 
  labs(color = "Stimulus Complexity") + 
  ylab("Mean Looking Time (log)") + 
  xlab("Number of Stimulus Reptitions") + 
  geom_smooth(method = "lm") + 
  theme(axis.text = element_text(size = 10)) + 
  facet_wrap(~subject)

## `geom_smooth()` using formula 'y ~ x'

individual, by block

full_aggregated %>% 
  filter(trial_stimulus_type == "background") %>% 
  ggplot(
    aes(y = log(trial_looking_time), 
        x = num_times_stimulus_seen,
        color = block)
  ) + 
  geom_point(aes(alpha = 0.2), size = 3, shape = ".") + 
  guides(alpha = FALSE) + 
  labs(color = "Stimulus Complexity") + 
  ylab("Mean Looking Time (log)") + 
  xlab("Number of Stimulus Reptitions") + 
  geom_smooth(method = "lm") + 
  theme(axis.text = element_text(size = 10)) + 
  facet_wrap(~subject)

## `geom_smooth()` using formula 'y ~ x'

Model

linear

block

null_m <- lmer(trial_looking_time ~ 1 + (1|subject), 
               data = d)

basic_m <- lmer(trial_looking_time ~ block + (1|subject), 
     data = d)

summary(basic_m)

## Linear mixed model fit by REML ['lmerMod']
## Formula: trial_looking_time ~ block + (1 | subject)
##    Data: d
## 
## REML criterion at convergence: 151943
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -1.108 -0.127 -0.067 -0.016 33.208 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept)   568448  754    
##  Residual             13515244 3676    
## Number of obs: 7888, groups:  subject, 42
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                  939.86     142.80   6.582
## blockall_simple              233.59     116.02   2.013
## blockmixed_complex_deviant    34.13     116.79   0.292
## blockmixed_simple_deviant     64.13     118.66   0.540
## 
## Correlation of Fixed Effects:
##             (Intr) blckl_ blckmxd_c_
## blckll_smpl -0.406                  
## blckmxd_cm_ -0.409  0.497           
## blckmxd_sm_ -0.397  0.489  0.490

anova(null_m, basic_m)

## refitting model(s) with ML (instead of REML)

## Data: d
## Models:
## null_m: trial_looking_time ~ 1 + (1 | subject)
## basic_m: trial_looking_time ~ block + (1 | subject)
##         Df    AIC    BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## null_m   3 151999 152020 -75996   151993                         
## basic_m  6 152000 152042 -75994   151988 4.7832      3     0.1884

stimuli repetition

null_m <- lmer(log(trial_looking_time) ~ 1 + (1|subject), 
               data = excluded_sum)

rep_m <- lmer(log(trial_looking_time) ~ num_times_stimulus_seen + (1|subject), 
     data = excluded_sum)

summary(rep_m)

## Linear mixed model fit by REML ['lmerMod']
## Formula: log(trial_looking_time) ~ num_times_stimulus_seen + (1 | subject)
##    Data: excluded_sum
## 
## REML criterion at convergence: 9108.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3206 -0.4424 -0.1691  0.1589  9.2370 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept) 0.1305   0.3613  
##  Residual             0.3031   0.5505  
## Number of obs: 5434, groups:  subject, 39
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)              6.3491570  0.0598278  106.12
## num_times_stimulus_seen -0.0043827  0.0007391   -5.93
## 
## Correlation of Fixed Effects:
##             (Intr)
## nm_tms_stm_ -0.222

anova(null_m, rep_m)

## refitting model(s) with ML (instead of REML)

## Data: excluded_sum
## Models:
## null_m: log(trial_looking_time) ~ 1 + (1 | subject)
## rep_m: log(trial_looking_time) ~ num_times_stimulus_seen + (1 | subject)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## null_m  3 9133.5 9153.3 -4563.7   9127.5                             
## rep_m   4 9100.4 9126.8 -4546.2   9092.4 35.056      1  3.204e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

stimuli repetition and block interaction

interaction_m <- lmer(log(trial_looking_time) ~ num_times_stimulus_seen * block + (1|subject), 
     data = excluded_sum)

summary(interaction_m)

## Linear mixed model fit by REML ['lmerMod']
## Formula: log(trial_looking_time) ~ num_times_stimulus_seen * block + (1 |  
##     subject)
##    Data: excluded_sum
## 
## REML criterion at convergence: 9147.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3465 -0.4503 -0.1655  0.1712  9.2622 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept) 0.1306   0.3614  
##  Residual             0.3028   0.5502  
## Number of obs: 5434, groups:  subject, 39
## 
## Fixed effects:
##                                                     Estimate Std. Error t value
## (Intercept)                                         6.342578   0.065381  97.010
## num_times_stimulus_seen                            -0.005310   0.001475  -3.601
## blockall_simple                                     0.073070   0.043044   1.698
## blockmixed_complex_deviant                         -0.013373   0.043044  -0.311
## blockmixed_simple_deviant                          -0.033754   0.043133  -0.783
## num_times_stimulus_seen:blockall_simple            -0.001964   0.002085  -0.942
## num_times_stimulus_seen:blockmixed_complex_deviant  0.002592   0.002085   1.243
## num_times_stimulus_seen:blockmixed_simple_deviant   0.003113   0.002094   1.487
## 
## Correlation of Fixed Effects:
##                            (Intr) nm_t__ blckl_ blckmxd_c_ blckmxd_s_ nm___:_
## nm_tms_stm_                -0.406                                            
## blckll_smpl                -0.329  0.617                                     
## blckmxd_cm_                -0.329  0.617  0.500                              
## blckmxd_sm_                -0.328  0.615  0.499  0.499                       
## nm_tms_s_:_                 0.287 -0.707 -0.872 -0.436     -0.435            
## nm_tms_stmls_sn:blckmxd_c_  0.287 -0.707 -0.436 -0.872     -0.435      0.500 
## nm_tms_stmls_sn:blckmxd_s_  0.286 -0.704 -0.434 -0.434     -0.871      0.498 
##                            nm_tms_stmls_sn:blckmxd_c_
## nm_tms_stm_                                          
## blckll_smpl                                          
## blckmxd_cm_                                          
## blckmxd_sm_                                          
## nm_tms_s_:_                                          
## nm_tms_stmls_sn:blckmxd_c_                           
## nm_tms_stmls_sn:blckmxd_s_  0.498

anova(null_m, interaction_m)

## refitting model(s) with ML (instead of REML)

## Data: excluded_sum
## Models:
## null_m: log(trial_looking_time) ~ 1 + (1 | subject)
## interaction_m: log(trial_looking_time) ~ num_times_stimulus_seen * block + (1 | 
## interaction_m:     subject)
##               Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## null_m         3 9133.5 9153.3 -4563.7   9127.5                             
## interaction_m 10 9100.9 9166.9 -4540.4   9080.9 46.575      7  6.755e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

GAM

only focusing on looking time at the background

library(mgcv)
gam_d <- full_aggregated %>% 
  filter(trial_stimulus_type == "background") %>% 
  mutate(trial_stimulus_complexity = as.factor(trial_stimulus_complexity), 
         block = as.factor(block))

number of repetition

gam_m <- gam(trial_looking_time ~ s(num_times_stimulus_seen), 
             data = gam_d, 
             method = "REML")
summary(gam_m)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## trial_looking_time ~ s(num_times_stimulus_seen)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   917.30      56.23   16.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df    F p-value
## s(num_times_stimulus_seen) 1.057  1.111 0.32   0.643
## 
## R-sq.(adj) =  -0.000115   Deviance explained = 0.00725%
## -REML =  55035  Scale est. = 1.7822e+07  n = 5636

plot(gam_m, 
     pages = 1, 
     se = TRUE, 
     shade = TRUE)

gam.check(gam_m)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.0001684747,-5.863759e-05]
## (score 55035.44 & scale 17821988).
## Hessian positive definite, eigenvalue range [0.001528264,2817].
## Model rank =  10 / 10 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'  edf k-index p-value
## s(num_times_stimulus_seen) 9.00 1.06    1.01    0.71

concurvity(gam_m, full = TRUE)

##                  para s(num_times_stimulus_seen)
## worst    1.113898e-26               1.124554e-26
## observed 1.113898e-26               5.090767e-31
## estimate 1.113898e-26               3.587588e-29

number of repetition (with log?)

gam_m <- gam(log(trial_looking_time) ~ s(num_times_stimulus_seen), 
             data = gam_d, 
             method = "REML")
summary(gam_m)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log(trial_looking_time) ~ s(num_times_stimulus_seen)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.270316   0.008667   723.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df     F p-value    
## s(num_times_stimulus_seen) 7.001  8.072 12.81  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0181   Deviance explained = 1.93%
## -REML = 5591.4  Scale est. = 0.4234    n = 5636

plot(gam_m, 
     pages = 1, 
     se = TRUE, 
     shade = TRUE)

gam.check(gam_m)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.0005601243,-3.726007e-07]
## (score 5591.437 & scale 0.4233963).
## Hessian positive definite, eigenvalue range [1.99837,2817.004].
## Model rank =  10 / 10 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k' edf k-index p-value
## s(num_times_stimulus_seen)  9   7    1.02    0.95

concurvity(gam_m, full = TRUE)

##                  para s(num_times_stimulus_seen)
## worst    1.113898e-26               1.124554e-26
## observed 1.113898e-26               6.851835e-29
## estimate 1.113898e-26               3.587588e-29

number of repetition by stimulus complexity

gam_m <- gam(trial_looking_time ~ s(num_times_stimulus_seen, 
                                    by = trial_stimulus_complexity), 
             data = gam_d, 
             method = "REML")

summary(gam_m)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## trial_looking_time ~ s(num_times_stimulus_seen, by = trial_stimulus_complexity)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   917.31      56.21   16.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                               edf Ref.df     F
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex 1.036  1.070 3.789
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple  1.360  1.634 0.739
##                                                             p-value  
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex  0.0503 .
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple   0.3521  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.000686   Deviance explained = 0.111%
## -REML =  55027  Scale est. = 1.7808e+07  n = 5636

plot(gam_m, 
     pages = 1, 
     se = TRUE, 
     shade = TRUE)

gam.check(gam_m)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.01454695,0.01786747]
## (score 55027.21 & scale 17807718).
## Hessian positive definite, eigenvalue range [0.0142575,2816.482].
## Model rank =  19 / 19 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                               k'  edf k-index
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex 9.00 1.04       1
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple  9.00 1.36       1
##                                                             p-value
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex    0.14
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple     0.19

concurvity(gam_m, full = TRUE)

##                  para
## worst    1.207005e-05
## observed 1.207005e-05
## estimate 1.207005e-05
##          s(num_times_stimulus_seen):trial_stimulus_complexitycomplex
## worst                                                   6.119153e-06
## observed                                                3.148278e-08
## estimate                                                5.550473e-07
##          s(num_times_stimulus_seen):trial_stimulus_complexitysimple
## worst                                                  5.950974e-06
## observed                                               1.834908e-07
## estimate                                               5.413397e-07

number of repetition by stimulus complexity (log?)

gam_m <- gam(log(trial_looking_time) ~ s(num_times_stimulus_seen, 
                                    by = trial_stimulus_complexity), 
             data = gam_d, 
             method = "REML")

summary(gam_m)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log(trial_looking_time) ~ s(num_times_stimulus_seen, by = trial_stimulus_complexity)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.270357   0.008666   723.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                               edf Ref.df      F
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex 5.158  6.269  4.719
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple  6.463  7.606 10.020
##                                                              p-value    
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex 5.71e-05 ***
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple   < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0185   Deviance explained = 2.05%
## -REML =   5597  Scale est. = 0.42323   n = 5636

plot(gam_m, 
     pages = 1, 
     se = TRUE, 
     shade = TRUE)

gam.check(gam_m)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-0.0002553821,-2.062741e-10]
## (score 5597.005 & scale 0.423228).
## Hessian positive definite, eigenvalue range [0.8220389,2816.504].
## Model rank =  19 / 19 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                               k'  edf k-index
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex 9.00 5.16    0.98
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple  9.00 6.46    0.98
##                                                             p-value  
## s(num_times_stimulus_seen):trial_stimulus_complexitycomplex    0.12  
## s(num_times_stimulus_seen):trial_stimulus_complexitysimple     0.09 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

concurvity(gam_m, full = TRUE)

##                  para
## worst    1.207005e-05
## observed 1.207005e-05
## estimate 1.207005e-05
##          s(num_times_stimulus_seen):trial_stimulus_complexitycomplex
## worst                                                   6.119153e-06
## observed                                                1.516816e-06
## estimate                                                5.550473e-07
##          s(num_times_stimulus_seen):trial_stimulus_complexitysimple
## worst                                                  5.950974e-06
## observed                                               1.684933e-06
## estimate                                               5.413397e-07

number of repetition by block

gam_m <- gam(trial_looking_time ~ s(num_times_stimulus_seen, by = block), 
             data = gam_d, 
             method = "REML")

plot(gam_m, all.terms = TRUE, pages = 1,
     shade = TRUE,
     rug = TRUE, 
     se = TRUE)

summary(gam_m)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## trial_looking_time ~ s(num_times_stimulus_seen, by = block)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   917.45      56.21   16.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                         edf Ref.df     F
## s(num_times_stimulus_seen):blockall_complex           1.345  1.612 4.193
## s(num_times_stimulus_seen):blockall_simple            1.009  1.018 1.646
## s(num_times_stimulus_seen):blockmixed_complex_deviant 1.064  1.126 0.121
## s(num_times_stimulus_seen):blockmixed_simple_deviant  1.050  1.097 0.180
##                                                       p-value  
## s(num_times_stimulus_seen):blockall_complex            0.0444 *
## s(num_times_stimulus_seen):blockall_simple             0.1971  
## s(num_times_stimulus_seen):blockmixed_complex_deviant  0.7547  
## s(num_times_stimulus_seen):blockmixed_simple_deviant   0.6922  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.000711   Deviance explained = 0.15%
## -REML =  55014  Scale est. = 1.7807e+07  n = 5636

gam.check(gam_m)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 6 iterations.
## Gradient range [-0.01500944,0.01919385]
## (score 55014.2 & scale 17807260).
## Hessian positive definite, eigenvalue range [0.00108231,2815.481].
## Model rank =  37 / 37 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                         k'  edf k-index p-value
## s(num_times_stimulus_seen):blockall_complex           9.00 1.35    1.01    0.68
## s(num_times_stimulus_seen):blockall_simple            9.00 1.01    1.01    0.62
## s(num_times_stimulus_seen):blockmixed_complex_deviant 9.00 1.06    1.01    0.61
## s(num_times_stimulus_seen):blockmixed_simple_deviant  9.00 1.05    1.01    0.71

concurvity(gam_m, full = TRUE)

##                  para s(num_times_stimulus_seen):blockall_complex
## worst    4.313787e-05                                6.092671e-06
## observed 4.313787e-05                                5.126272e-06
## estimate 4.313787e-05                                4.633189e-06
##          s(num_times_stimulus_seen):blockall_simple
## worst                                  5.199012e-06
## observed                               2.897025e-06
## estimate                               2.704647e-06
##          s(num_times_stimulus_seen):blockmixed_complex_deviant
## worst                                             1.180826e-05
## observed                                          3.280353e-06
## estimate                                          2.557590e-06
##          s(num_times_stimulus_seen):blockmixed_simple_deviant
## worst                                            2.003918e-05
## observed                                         5.720030e-06
## estimate                                         7.292117e-06

number of repetition by block (log?)

gam_m <- gam(log(trial_looking_time) ~ s(num_times_stimulus_seen, by = block), 
             data = gam_d, 
             method = "REML")

plot(gam_m, all.terms = TRUE, pages = 1,
     shade = TRUE,
     rug = TRUE, 
     se = TRUE)

summary(gam_m)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log(trial_looking_time) ~ s(num_times_stimulus_seen, by = block)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.27042    0.00867   723.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                                         edf Ref.df     F
## s(num_times_stimulus_seen):blockall_complex           2.463  3.068 4.672
## s(num_times_stimulus_seen):blockall_simple            6.443  7.587 8.832
## s(num_times_stimulus_seen):blockmixed_complex_deviant 3.332  4.136 4.069
## s(num_times_stimulus_seen):blockmixed_simple_deviant  2.302  2.869 1.355
##                                                       p-value    
## s(num_times_stimulus_seen):blockall_complex           0.00267 ** 
## s(num_times_stimulus_seen):blockall_simple            < 2e-16 ***
## s(num_times_stimulus_seen):blockmixed_complex_deviant 0.00228 ** 
## s(num_times_stimulus_seen):blockmixed_simple_deviant  0.21364    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0176   Deviance explained = 2.02%
## -REML = 5603.3  Scale est. = 0.4236    n = 5636

gam.check(gam_m)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [5.879741e-11,5.306652e-06]
## (score 5603.29 & scale 0.4235977).
## Hessian positive definite, eigenvalue range [0.04763123,2815.503].
## Model rank =  37 / 37 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                         k'  edf k-index p-value
## s(num_times_stimulus_seen):blockall_complex           9.00 2.46       1    0.46
## s(num_times_stimulus_seen):blockall_simple            9.00 6.44       1    0.37
## s(num_times_stimulus_seen):blockmixed_complex_deviant 9.00 3.33       1    0.47
## s(num_times_stimulus_seen):blockmixed_simple_deviant  9.00 2.30       1    0.46

concurvity(gam_m, full = TRUE)

##                  para s(num_times_stimulus_seen):blockall_complex
## worst    4.313787e-05                                6.092671e-06
## observed 4.313787e-05                                3.784249e-06
## estimate 4.313787e-05                                4.633189e-06
##          s(num_times_stimulus_seen):blockall_simple
## worst                                  5.199012e-06
## observed                               1.156366e-06
## estimate                               2.704647e-06
##          s(num_times_stimulus_seen):blockmixed_complex_deviant
## worst                                             1.180826e-05
## observed                                          1.124140e-06
## estimate                                          2.557590e-06
##          s(num_times_stimulus_seen):blockmixed_simple_deviant
## worst                                            2.003918e-05
## observed                                         1.607399e-05
## estimate                                         7.292117e-06

within subject difference

Expoloratory analysis for Pilot-B data

Anjie Cao (anjiecao@stanford.edu)

November 12, 2020