Motivation

Peelle and Van Engen (2020) style multiverse analysis considering possible time windows with logistic growth curve models in a dataset with words of varying frequency, stimuli with varying levels of noise, and with young or old adults.

For our analysis, we will restrict ourselves to familiar words, and will model age effects.

# get local copy by running peekbank_explore.Rmd
load("data/aoi_data_joined.Rds")

fam_data <- aoi_data_joined %>%
  filter(age > 12, age <= 60, 
         stimulus_novelty == "familiar",
         dataset_name!="mahr_coartic") %>% # (only has endTimes up to 1800ms)
  mutate(age_binned = cut(age, seq(0,60,12))) 

rm(aoi_data_joined)

Run Window Models

# t_norm = [-1000,4000] 
get_alpha <- function (df) {
                    dfw <- pivot_wider(df,
                                       names_from = english_stimulus_label,
                                       values_from = prop_corr) %>%
                      select(-administration_id)
                    alphas <- psych::alpha(dfw)
                    return(alphas$total$raw_alpha)
}

# inter-item correlations
get_interitem <- function (df) {
   dfw <- pivot_wider(df,
                      names_from = english_stimulus_label,
                      values_from = prop_corr) %>%
     ungroup() %>%
     select(-administration_id)
   corrs <- corrr::correlate(dfw)
   mean_corr <- mean(rowMeans(select(corrs, -term), na.rm=TRUE), na.rm=TRUE)
   rm(corrs)
   return(mean_corr)
}
# Time bins are 25 ms 
startTs <- seq(from = -300, to = 1500, by = 25) 
# could use min(pb datasets min time) and max(pb datasets max time)
#windowLengths <- seq(from = 300, to = 2500, by = 25)
endTs <- seq(from = 0, to = 4000, by = 25)

param.grid <- expand_grid(startTs, endTs) %>%
   rename(startTimes = startTs, endTimes = endTs) %>%
   filter((endTimes - startTimes) > 25)

do_interitem_analysis <- function(startTime, endTime, fam_data) {
   print(paste(startTime, endTime))
      fam_data %>% filter(t_norm > startTime, 
                          t_norm < endTime) %>%
         group_by(dataset_name, age_binned, administration_id, english_stimulus_label) %>%
         summarise(prop_corr = sum(aoi == "target") /
                      sum(aoi %in% c("target", "distractor"))) %>%
         group_by(dataset_name, age_binned) %>%
         nest() %>%
         mutate(interitem =  lapply(data, quietly(get_interitem)),
                interitem = interitem[[1]]$result) %>%
         select(-data) %>%
         unnest(cols = "interitem") %>%
         group_by(age_binned) %>%
         summarise(mean = mean(interitem, na.rm=TRUE)) %>%
         mutate(startTime = startTime,
                endTime = endTime)
}

# test
#do_interitem_analysis(-300, 0)


mout <- map2_dfr(param.grid$startTimes, param.grid$endTimes, 
             do_interitem_analysis, fam_data)
# `summarise()` regrouping output by 'dataset_name', 'age_binned', 'administration_id' (override with `.groups` argument)
# `summarise()` ungrouping output (override with `.groups` argument)

# this should also work, but does not
#mout <- param.grid %>% map2_dfr(startTimes, endTimes, 
#             do_interitem_analysis)
# Error in as_mapper(.f, ...) : object 'endTimes' not found


write.table(mout, file="data/time-window-results.csv")      

Cross-validation

Leave one dataset out, evaluate correlation

cv_results <- list()
for(dset in datasets$dataset_name) {
   dat <- fam_data %>% filter(dataset_name != dset)
   cv_results[[dset]] <- map2_dfr(param.grid$startTimes, param.grid$endTimes, 
             do_interitem_analysis, dat)
}

save(cv_results, file="data/time_window_cv.Rdata")
# correlate ICCs with each left-out dataset with each other left-out dataset
# (maybe we really just want total ICCs vs. with each dataset left out)
cor_mat <- matrix(NA, nrow=length(cv_results), ncol=length(cv_results))
colnames(cor_mat) = names(cv_results)
rownames(cor_mat) = names(cv_results)

for(i in names(cv_results)) {
   for(j in names(cv_results)) {
      if(i!=j) cor_mat[i,j] = cor(cv_results[[i]]$mean, cv_results[[j]]$mean)
   }
}

cor_mat %>% kableExtra::kable(digits=2)

Below we show the correlations and sum of squared error between the ICCs from the full dataset vs. with each dataset held out. Holding out attword has the largest influence, and holding out reflook_v4 also shows some deviation. We will visualize the ICCs with each of these datasets held out and try to characterize their influence.

held_out_data r SSE
attword_processed 0.780 21.783
reflook_v4 0.914 11.263
frank_tablet_2016 0.963 7.902
reflook_socword 0.941 7.123
casillas_tseltal_2015 0.990 1.969
potter_remix 0.988 1.509
garrison_bergelson_2020 0.991 1.045
adams_marchman_2018 0.995 0.609
potter_canine 0.997 0.362
swingley_aslin_2002 0.997 0.349
byers-heinlein_2017 0.998 0.274
pomper_saffran_2016 0.999 0.189
pomper_salientme 0.999 0.109
perry_cowpig 0.999 0.081
mahr_coartic 1.000 0.000

Visualization

Visualize inter-item correlation (per-subject, dataset, and age-bin) as a function of start time and window length.

ICCs without attword

Without attword, the strongest ICCs (in 48-60 month-olds at short, early time windows) generally disappear, and ICCs increase at later end times for this oldest age group.

ICCs without reflook_v4

Without reflook_v4, again the changes are mostly in the oldest age group: early start times and late end times (upper left of bottom right panel) show ICCs close to 0 (instead of positive).

Maybe annotate the region that we recommend?

Additional analyses

What is the overall relationship between strength of correlation and start time, end time, or window length?

mout <- mout %>% mutate(windowLength = endTime - startTime) 
# summary(lm(data=mout, mean ~ startTime +  windowLength))

cor(mout[,2:5])[1,2:4]
##    startTime      endTime windowLength 
##    0.3173282    0.6259257    0.4623842

Correlations only among the positive IICs:

prop_positive = length(which(mout$mean>0)) / nrow(mout) # 92.6% positive
hist(mout$mean)

mout_pos <- mout %>% filter(mean > 0)
cor(mout_pos[,2:5])[1,2:4]
##    startTime      endTime windowLength 
##    0.1746226    0.4532195    0.3253236

0.9 of the time windows investigated resulted in positive IICs, and the median IIC from all simulations was 0.06. The correlations of IIC with start time, end time, and window length are shown above: end time is most predictive of higher IIC.

Recommendations

Where do we get bad ICCs? (<.01 – should we consider a higher threshold? e.g. median IIC)

#require(GGally)
mout_neg <- mout %>% filter(mean < 0.01)
#ggpairs(mout_neg, columns=2:5, aes(color=age_binned), alpha=.3)
summary(mout_neg)
##   age_binned             mean              startTime         endTime      
##  Length:5345        Min.   :-0.1468689   Min.   :-300.0   Min.   :   0.0  
##  Class :character   1st Qu.:-0.0189314   1st Qu.:-150.0   1st Qu.: 500.0  
##  Mode  :character   Median :-0.0095453   Median :  50.0   Median : 800.0  
##                     Mean   :-0.0111150   Mean   : 106.3   Mean   : 794.1  
##                     3rd Qu.: 0.0005957   3rd Qu.: 275.0   3rd Qu.:1025.0  
##                     Max.   : 0.0099724   Max.   :1500.0   Max.   :2125.0  
##   windowLength   
##  Min.   :  50.0  
##  1st Qu.: 350.0  
##  Median : 650.0  
##  Mean   : 687.8  
##  3rd Qu.:1000.0  
##  Max.   :1900.0
start_lt300 = length(which(mout_neg$startTime < 300)) / nrow(mout_neg)
start_lt500 = length(which(mout_neg$startTime < 500)) / nrow(mout_neg) 

# condition on start time at least 300ms like reasonable researchers? 
# which(mout_neg$startTime >= 300 & 
winLength_lt1500 = length(which(mout_neg$windowLength < 1500)) / nrow(mout_neg) # .94
win1500_start300 = length(which(mout_neg$startTime >= 300 & mout_neg$windowLength >= 1500)) / nrow(mout_neg)

rec_settings_iic_avg = mean(subset(mout, startTime>=500 & windowLength>=1500)$mean)

75% of low IICs (<.01) had start times <300ms, but pushing the start time out to at least 500 ms eliminated 87% of the low IICs. A window length of at least 1500 ms eliminated 97% of low ICCs, and this threshold combined with a start time of at least 300 ms eliminated all but 0% of low ICCs. A start time of 500 ms and a window of at least 1500 ms resulted in no IICs < .01, and an average IIC of 0.08.