1 Document Overview:

In this document, using condition level data, we:

estimate the size of perceptual novelty (PN) and violation of expectation (VOE) in experimental conditions to see how the size of these effects compare to each other
visualize the effect sizes of the conditions using forest and density plots
investigate publication bias using funnel plots and Egger’s test, then account for publication bias using the trim and fill method and selection models.
determine whether these effects are better modeled as differences or as ratios.
visualize PN and VOE in a scatterplot, find excluded conditions’ means and standard deviations for each trial type, and run a power analysis to determine necessary sample size to get power = 0.8 and alpha = 0.05 for estimated size of PN and VOE effects.

2 Imports

options(scipen = 1, digits = 3)
library(pacman)

pacman::p_load(plyr,
               tidyverse,
               metafor,
               lme4,
               lmerTest,
               cowplot,
               easystats,
               sjPlot,
               kableExtra,
               easystats,
               patchwork,
               weightr,
               pwr)

sessionInfo()

## R version 4.3.2 (2023-10-31)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.6.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] pwr_1.3-0           weightr_2.0.2       scales_1.2.1       
##  [4] patchwork_1.1.3     kableExtra_1.3.4    sjPlot_2.8.15      
##  [7] see_0.8.0           report_0.5.7        parameters_0.21.2  
## [10] performance_0.10.8  modelbased_0.8.6    insight_0.19.6     
## [13] effectsize_0.8.6    datawizard_0.9.0    correlation_0.8.4  
## [16] bayestestR_0.13.1   easystats_0.6.0     cowplot_1.1.1      
## [19] lmerTest_3.1-3      lme4_1.1-34         metafor_4.4-0      
## [22] numDeriv_2016.8-1.1 metadat_1.2-0       Matrix_1.6-1.1     
## [25] lubridate_1.9.3     forcats_1.0.0       stringr_1.5.0      
## [28] dplyr_1.1.3         purrr_1.0.2         readr_2.1.4        
## [31] tidyr_1.3.0         tibble_3.2.1        ggplot2_3.4.4      
## [34] tidyverse_2.0.0     plyr_1.8.9          pacman_0.5.1       
## 
## loaded via a namespace (and not attached):
##  [1] sandwich_3.0-2     rlang_1.1.1        magrittr_2.0.3     multcomp_1.4-25   
##  [5] compiler_4.3.2     systemfonts_1.0.5  vctrs_0.6.4        rvest_1.0.3       
##  [9] pkgconfig_2.0.3    fastmap_1.1.1      backports_1.4.1    utf8_1.2.4        
## [13] rmarkdown_2.25     tzdb_0.4.0         nloptr_2.0.3       xfun_0.40         
## [17] cachem_1.0.8       jsonlite_1.8.7     sjmisc_2.8.9       ggeffects_1.3.2   
## [21] broom_1.0.5        R6_2.5.1           bslib_0.5.1        stringi_1.7.12    
## [25] boot_1.3-28.1      jquerylib_0.1.4    estimability_1.4.1 Rcpp_1.0.11       
## [29] knitr_1.45         modelr_0.1.11      zoo_1.8-12         splines_4.3.2     
## [33] timechange_0.2.0   tidyselect_1.2.0   rstudioapi_0.15.0  yaml_2.3.7        
## [37] codetools_0.2-19   sjlabelled_1.2.0   lattice_0.21-9     withr_2.5.2       
## [41] evaluate_0.22      survival_3.5-7     xml2_1.3.5         pillar_1.9.0      
## [45] generics_0.1.3     mathjaxr_1.6-0     hms_1.1.3          munsell_0.5.0     
## [49] minqa_1.2.6        glue_1.6.2         emmeans_1.8.9      tools_4.3.2       
## [53] webshot_0.5.5      mvtnorm_1.2-3      grid_4.3.2         colorspace_2.1-0  
## [57] nlme_3.1-163       cli_3.6.1          fansi_1.0.5        viridisLite_0.4.2 
## [61] svglite_2.1.2      sjstats_0.18.2     gtable_0.3.4       sass_0.4.7        
## [65] digest_0.6.33      TH.data_1.1-2      htmltools_0.5.6.1  lifecycle_1.0.3   
## [69] httr_1.4.7         MASS_7.3-60

#Calculate measure of heterogeneity I^2 from Q and degrees of freedom
i2_from_q_df <- function(Q, df) {
  return ((Q-df)/Q*100)
}

3 Read Meta-analysis data

meta_analysis_data <- read_csv('processed_data/meta-analysis_data.csv') %>%
  filter(study_ID != "sanal-hayes_2022") #comment out to replicate results with Sanal-Hayes (2022) found in SM

## Rows: 109 Columns: 66
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (34): study_ID, long_cite, short_cite, doi, peer_reviewed, coder, expt_c...
## dbl (18): expt_num, n_1, mean_age_1, sd_age_1, x_1, x_2, x_3, SD_1, SD_2, SD...
## lgl (14): group_name_1, group_name_2, n_2, mean_age_2, t, F, r, d, d_var, co...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#Number of conditions and infants for both experimental and control conditions
num_cond <- meta_analysis_data %>%
  summarize(count = n())

num_infants_meta <- meta_analysis_data %>%
  summarize(num_infants = sum(n_1))

We have condition level data (including experimental and control conditions) from 107 conditions, with a total of 2494 infants.

#Effect size analysis is based on just experimental data
meta_analysis_data_exp <- meta_analysis_data %>%
  filter(exp_or_control == "experimental")

#Prepare data and contrast
meta_analysis_data_long <- meta_analysis_data_exp %>%
  pivot_longer(cols = c(x_1, x_2, x_3, SD_1, SD_2, SD_3), names_to = c(".value", "trial_type"), names_sep = "_") %>%
  mutate(trial_type = ifelse(trial_type == 1, "last_train", ifelse(trial_type == 2, "expected", "unexpected"))) %>%
  mutate(variance = SD^2/n_1) 

meta_analysis_data_long$trial_type <- as.factor(meta_analysis_data_long$trial_type)

4 Estimating size of PN and VOE effects

results <- rma.mv(x, variance, mods = ~trial_type, random = ~1|paper_expt_info, data = meta_analysis_data_long)

## Warning: 3 rows with NAs omitted from model fitting.

i2_from_q_df(results$QE, results$QEdf)

## [1] 91.5

effect_size_results <-data.frame(b = round(results$b, 5), SE = round(results$se, 3), z = round(results$zval, 3), p = ifelse(results$pval <= .001, "<.001", round(results$pval, 3)), `95% CI` = paste(round(results$ci.lb, 3), round(results$ci.ub, 3), sep = " - "))

rownames(effect_size_results) <- c("Expected", "Last train - Expected", "Unexpected - Expected")
colnames(effect_size_results) <- c("Estimate", "Standard Error", "z", "p", "95% CI")

#For PN value, negate last train - expected row.
effect_size_results%>%
  kable("html",align = 'clc',table.attr="id=effect_sizetable")%>%
  kable_styling("striped")

	Estimate	Standard Error	z	p	95% CI
Expected	13.84	0.985	14.05	<.001	11.906 - 15.767
Last train - Expected	-1.73	0.223	-7.78	<.001	-2.169 - -1.297
Unexpected - Expected	2.09	0.250	8.35	<.001	1.597 - 2.578

#tests whether the two differences are significantly different from each other
diff_check <- anova(results, X=c(0,-1,-1))
diff_check

## 
## Hypothesis:                                                    
## 1: -trial_typelast_train - trial_typeunexpected = 0 
## 
## Results:
##    estimate     se    zval   pval 
## 1:  -0.3545 0.4053 -0.8746 0.3818 
## 
## Test of Hypothesis:
## QM(df = 1) = 0.7649, p-val = 0.3818

We found that both the perceptual novelty and the violation of expectation effects were significantly greater than 0 (PN (first expected - last training trial): Mean difference = 1.733, standard error (SE) = 0.223, [1.297, 2.169], z = 7.783, p <.001; VOE (first unexpected - first expected trial): Mean difference = 2.088 seconds, standard error (SE) = 0.25, [1.597, 2.578], z = 8.347, p <.001). These two effect sizes were not significantly different from each other (p = 0.382).

4.1 Visualization: Barplot and Boxplot of data separated by trial type

#Values are weighted means from model estimating size of both effects
mean_eff <- results$beta[1]
mean_hab <- results$beta[1] + results$beta[2]
mean_ineff <-results$beta[1] + results$beta[3]

cil_eff <- results$ci.lb[1]
cil_hab <- results$ci.lb[1] + results$ci.lb[2]
cil_ineff <-results$ci.lb[1] + results$ci.lb[3]

ciu_eff <- results$ci.ub[1]
ciu_hab <- results$ci.ub[1] + results$ci.ub[2]
ciu_ineff <-results$ci.ub[1] + results$ci.ub[3]

trial_type = c("last_train", "expected", "unexpected")
means = c(mean_hab, mean_eff, mean_ineff)
cil = c(cil_hab, cil_eff, cil_ineff)
ciu = c(ciu_hab, ciu_eff, ciu_ineff)
mean_ci<-data.frame(trial_type, means, cil, ciu)

#Barplot
ggplot(mean_ci, aes(x=factor(trial_type, level=c('last_train', 'expected', 'unexpected')), y = means, fill=trial_type)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c('#00BA38', '#F8766D', '#619CFF')) +
  scale_fill_discrete(limits = c("last_train", "expected", "unexpected")) +
  geom_point(aes(x = trial_type, y = x, size = n_1), data = meta_analysis_data_long, alpha = .1)  +
  geom_errorbar(data = meta_analysis_data_long, width = .1, alpha = 0.1, inherit.aes = FALSE, aes(x = trial_type, ymin = x - SD/sqrt(n_1)*1.96, ymax = x + SD/sqrt(n_1)*1.96))+
  ylab("Looking Time (s)")+
  xlab("Trial Type")+
  coord_cartesian(ylim=c(0,20))+
  geom_line(data = meta_analysis_data_long, aes(x = trial_type, y = x, group = paper_expt_info), alpha = .1) + 
  geom_errorbar(data = mean_ci, width = .2, aes(ymin = cil, ymax = ciu), color = "black")+
  theme_cowplot(20) +
  theme(axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=0.5))

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

## Warning: Removed 3 rows containing missing values (`geom_point()`).

## Warning: Removed 3 rows containing missing values (`geom_line()`).

#Boxplot
ggplot(meta_analysis_data_long, aes(x = trial_type, y = x)) +
  geom_boxplot() +
  scale_x_discrete(limits = c("last_train", "expected", "unexpected")) +
  geom_point(aes(x = trial_type, y = x, size = n_1, color = trial_type), data = meta_analysis_data_long, alpha = .2)  +
  geom_line(data = meta_analysis_data_long, aes(x = trial_type, y = x, group = paper_expt_info), alpha = .1) +
  ylab("Looking Time (s)")+
  xlab("Trial Type")+
  scale_color_manual(values=c('#00BA38', '#F8766D', '#619CFF'))

## Warning: Removed 3 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 3 rows containing missing values (`geom_point()`).

## Warning: Removed 3 rows containing missing values (`geom_line()`).

5 Forest and Density Plots

#get standard mean difference values for PN and VOE
smd_pn <- metafor::escalc(measure = "SMD", m1i = x_2, sd1i = SD_2, n1i = n_1, m2i = x_1, sd2i = SD_1, n2i = n_1, data = meta_analysis_data_exp)

smd_voe <- metafor::escalc(measure = "SMD", m1i = x_3, sd1i = SD_3, n1i = n_1, m2i = x_2, sd2i = SD_2, n2i = n_1, data = meta_analysis_data_exp)

#PN Forest plot
forest_plot_pn <- smd_pn %>%
  mutate(se_pn = sqrt(vi))%>%
  dplyr::select(paper_expt_info, yi, vi, se_pn, n_1)
forest_plot_pn$paper_expt_info=reorder(forest_plot_pn$paper_expt_info, forest_plot_pn$yi, FUN=mean)

no_mods_pn <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_pn)

## Warning: 1 row with NAs omitted from model fitting.

no_mods_pn

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.2398  0.4896     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2421  0.0672  3.6041  0.0003  0.1104  0.3737  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

group_row_pn = matrix(c('Summary', no_mods_pn$b[1], no_mods_pn$se[1]^2, no_mods_pn$se[1], 1), nrow = 1)
group_row_pn = as.data.frame(group_row_pn)
names(group_row_pn) = names(forest_plot_pn)
forest_plot_pn <- rbind(forest_plot_pn, group_row_pn)
forest_plot_pn$yi = as.numeric(forest_plot_pn$yi)
forest_plot_pn$vi = as.numeric(forest_plot_pn$vi)
forest_plot_pn$se_pn = as.numeric(forest_plot_pn$se_pn)
forest_plot_pn$n_1 = as.numeric(forest_plot_pn$n_1)

PN_forest_plot <- ggplot(data = forest_plot_pn, aes(paper_expt_info, yi)) + 
  geom_point(aes(size = n_1)) +
  geom_point(data=subset(forest_plot_pn, paper_expt_info=='Summary'), color='black', shape=18, size=4)+
  scale_size(range = c(1, 3)) + 
  coord_flip() +
  geom_errorbar(width = .15, alpha = 1, aes(ymin = yi - 1.96*se_pn, ymax = yi + 1.96*se_pn)) +
  ggtitle("PN") +
  ylab("SMD") +
  xlab("Condition") +
  ylim(-3,3) +
  theme(plot.title = element_text(hjust = 0.5))

PN_forest_plot

## Warning: Removed 1 rows containing missing values (`geom_point()`).

#VOE forest plot
forest_plot_voe <- smd_voe %>%
  mutate(se_voe = sqrt(vi))%>%
  dplyr::select(paper_expt_info, yi, vi, se_voe, n_1)
forest_plot_voe$paper_expt_info=reorder(forest_plot_voe$paper_expt_info, forest_plot_voe$yi, FUN=mean)

no_mods_voe <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_voe)

## Warning: 1 row with NAs omitted from model fitting.

no_mods_voe

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.0439  0.2096     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2851  0.0421  6.7741  <.0001  0.2026  0.3676  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

group_row_voe = matrix(c('Summary', no_mods_voe$b[1], no_mods_voe$se[1]^2, no_mods_voe$se[1], 1), nrow = 1)
group_row_voe = as.data.frame(group_row_voe)
names(group_row_voe) = names(forest_plot_voe)
forest_plot_voe <- rbind(forest_plot_voe, group_row_voe)
forest_plot_voe$yi = as.numeric(forest_plot_voe$yi)
forest_plot_voe$vi = as.numeric(forest_plot_voe$vi)
forest_plot_voe$se_voe = as.numeric(forest_plot_voe$se_voe)
forest_plot_voe$n_1 = as.numeric(forest_plot_voe$n_1)

VOE_forest_plot <- ggplot(data = forest_plot_voe, aes(paper_expt_info, yi)) +
  geom_point(aes(size = n_1)) +
  geom_point(data=subset(forest_plot_voe, paper_expt_info=='Summary'), color='black', shape=18, size=4)+
  scale_size(range = c(1, 3)) + 
  coord_flip() +
  geom_errorbar(width = .15, alpha = 1, aes(ymin = yi - 1.96*se_voe, ymax = yi + 1.96*se_voe)) +
  ggtitle("VOE") +
  ylab("SMD") +
  xlab("Condition") +
  ylim(-3,3) +
  theme(plot.title = element_text(hjust = 0.5))

VOE_forest_plot

## Warning: Removed 1 rows containing missing values (`geom_point()`).

#Forest plot of both effects
#PN_forest_plot + VOE_forest_plot

#PN density plot

#Looking at distribution of PN effect sizes
num_cond_pn_observed <- sum(smd_pn$yi>0) #49 conditions where PN effect is numerically greater than 0

density_pn <- ggplot(aes(x = yi), data = smd_pn) +
  geom_density(fill = "grey50") +
  xlim(c(-2,2)) +
  ylim(c(-0.02,1.05))+
  xlab("Standard Mean Difference") +
  ylab("Density") +
  ggtitle("PN") +
  geom_point(x = as.numeric(group_row_pn$yi), y = 0, shape = 18, size = 3) +
  geom_errorbarh(height = 0.02, linewidth = 0.5, alpha = 1, aes(xmin = no_mods_pn$ci.lb, xmax = no_mods_pn$ci.ub, y = 0)) +
  theme(plot.title = element_text(hjust = 0.5))

#VOE density plot

#Looking at distribution of VOE effect sizes
num_cond_voe_observed <- sum(smd_voe$yi>0) #62 conditions where VOE effect is numerically greater than 0

density_voe <- ggplot(aes(x = yi), data = smd_voe) +
  geom_density(fill = "grey50") +
  xlim(c(-2,2)) +
  ylim(c(-0.02,1.05)) +
  xlab("Standard Mean Difference") +
  ylab("Density") +
  ggtitle("VOE") +
  geom_point(x = as.numeric(group_row_voe$yi), y = 0, shape = 18, size = 3) +
  geom_errorbarh(height = 0.02, linewidth = 0.5, alpha = 1, aes(xmin = no_mods_voe$ci.lb, xmax = no_mods_voe$ci.ub, y = 0)) +
  theme(plot.title = element_text(hjust = 0.5))

#Density plot both effects
density_pn + density_voe

## Warning: Removed 1 rows containing non-finite values (`stat_density()`).
## Removed 1 rows containing non-finite values (`stat_density()`).

6 Publication Bias tests

6.1 Funnel Plots and Egger’s Test

#Funnel plot of SMD PN
rma_pn_no_mods <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_pn)

## Warning: 1 row with NAs omitted from model fitting.

summary(rma_pn_no_mods)

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc   
## -66.2537  132.5074  136.5074  141.1155  136.6764   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.2398  0.4896     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2421  0.0672  3.6041  0.0003  0.1104  0.3737  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

i2_from_q_df(rma_pn_no_mods$QE, rma_pn_no_mods$QEdf)

## [1] 73.3

funnel_pn <- metafor::funnel(rma_pn_no_mods, xlim = c(-2,2), ylim = c(0, 0.6))

#Egger's Test PN
pn_egger <- regtest(rma_pn_no_mods$yi, rma_pn_no_mods$vi)

#Funnel plot of SMD VOE
rma_voe_no_mods <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_voe)

## Warning: 1 row with NAs omitted from model fitting.

summary(rma_voe_no_mods)

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc   
## -35.2516   70.5031   74.5031   79.1113   74.6721   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.0439  0.2096     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2851  0.0421  6.7741  <.0001  0.2026  0.3676  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

i2_from_q_df(rma_voe_no_mods$QE, rma_voe_no_mods$QEdf)

## [1] 38

funnel_voe <- metafor::funnel(rma_voe_no_mods, xlim = c(-2,2), ylim = c(0, 0.6))

#Egger's Test VOE
voe_egger <- regtest(rma_voe_no_mods$yi, rma_voe_no_mods$vi)

Using Egger’s test for funnel plot asymmetry, we found that the distribution of effect sizes relative to study precision was not asymmetrical for the PN effect (b = 0.056, [-0.51, 0.622], p = 0.508), but was asymmetrical for the VOE effect (b = -0.49, [-0.807, -0.173], p<.001). This suggests some degree of publication bias for the VOE effect, which could take the form of selective reporting, the file drawer problem, and/or journals unwilling to publish null findings. In contrast, we found no evidence for publication bias based on the PN effect, which was not studied but was incidentally measured, in this literature.

6.2 Trim and Fill VOE

#found VOE to be asymmetrical - using trim and fill method to address asymmetry
rma_voe_no_random <-rma(yi, vi, data = smd_voe) #same as rma.mv when no moderators

## Warning: 1 study with NAs omitted from model fitting.

summary(rma_voe_no_random)

## 
## Random-Effects Model (k = 75; tau^2 estimator: REML)
## 
##   logLik  deviance       AIC       BIC      AICc   
## -35.2516   70.5031   74.5031   79.1113   74.6721   
## 
## tau^2 (estimated amount of total heterogeneity): 0.0439 (SE = 0.0208)
## tau (square root of estimated tau^2 value):      0.2096
## I^2 (total heterogeneity / total variability):   34.82%
## H^2 (total variability / sampling variability):  1.53
## 
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2851  0.0421  6.7741  <.0001  0.2026  0.3676  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

trim_fill_voe <- trimfill(rma_voe_no_random) #Makes funnel plot symmetric by adding points
summary(trim_fill_voe)

## 
## Estimated number of missing studies on the left side: 18 (SE = 5.6752)
## 
## Random-Effects Model (k = 93; tau^2 estimator: REML)
## 
##   logLik  deviance       AIC       BIC      AICc   
## -67.8062  135.6123  139.6123  144.6559  139.7471   
## 
## tau^2 (estimated amount of total heterogeneity): 0.1013 (SE = 0.0291)
## tau (square root of estimated tau^2 value):      0.3183
## I^2 (total heterogeneity / total variability):   53.30%
## H^2 (total variability / sampling variability):  2.14
## 
## Test for Heterogeneity:
## Q(df = 92) = 204.6868, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.1662  0.0468  3.5541  0.0004  0.0746  0.2579  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

funnel_voe_trimfill <- metafor::funnel(trim_fill_voe, xlim = c(-2,2), ylim = c(0, 0.6))

We consider the implications of these results for the current and subsequent analyses. First, it is plausible that the above analysis overestimated the size of the VOE effect, relative to the size of the PN effect, because imprecise studies that show small VOE effects could be “missing” from our dataset. To examine the sensitivity of our results to this factor, we used the trim and fill method (Duval & Tweedie, 2000) to estimate the VOE effect size accounting for publication bias. This adjusted effect size (effect size = 0.166 standardized mean difference units, 95% CI [0.075, 0.258], z = 3.554, p < .001) was still similar, with overlapping confidence intervals, to the PN effect for which we found no evidence for publication bias (effect size = 0.242, 95% CI [0.11, 0.374], z = 3.604, p < .001). However, we caution against interpreting this estimate as a more “valid” estimate of the VOE effect size; rather, this shows that the VOE effect is robustly above 0, and as large as the PN effect, after considering the possibility of publication bias.

6.3 Selection Models

pn_weightfunct <- weightfunct(smd_pn$yi, smd_pn$vi)
pn_weightfunct

## 
## Unadjusted Model (k = 75):
## 
## tau^2 (estimated amount of total heterogeneity): 0.2350 (SE = 0.0553)
## tau (square root of estimated tau^2 value):  0.4848
## 
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val = 6.64e-25
## 
## Model Results:
## 
##           estimate std.error z-stat  p-val  ci.lb  ci.ub
## Intercept    0.242   0.06669  3.628 0.0003 0.1112 0.3727
## 
## Adjusted Model (k = 75):
## 
## tau^2 (estimated amount of total heterogeneity): 0.2652 (SE = 0.0706)
## tau (square root of estimated tau^2 value):  0.5150
## 
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val = 6.64e-25
## 
## Model Results:
## 
##               estimate std.error z-stat p-val   ci.lb  ci.ub
## Intercept       0.3261    0.1192  2.735 0.006 0.09243 0.5597
## 0.025 < p < 1   1.5299    0.6949  2.202 0.028 0.16799 2.8918
## 
## Likelihood Ratio Test:
## X^2(df = 1) = 0.884, p-val = 0.3
## There were  1 cases removed from your dataset due to the presence of missing data. To view the row numbers of these cases, use the attribute '$removed'.

voe_weightfunct <- weightfunct(smd_voe$yi, smd_voe$vi)
voe_weightfunct

## 
## Unadjusted Model (k = 75):
## 
## tau^2 (estimated amount of total heterogeneity): 0.0421 (SE = 0.0206)
## tau (square root of estimated tau^2 value):  0.2052
## 
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.000862
## 
## Model Results:
## 
##           estimate std.error z-stat p-val  ci.lb  ci.ub
## Intercept   0.2844   0.04262  6.673 3e-11 0.2009 0.3679
## 
## Adjusted Model (k = 75):
## 
## tau^2 (estimated amount of total heterogeneity): 0.0282 (SE = 0.0238)
## tau (square root of estimated tau^2 value):  0.1680
## 
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.000862
## 
## Model Results:
## 
##               estimate std.error z-stat  p-val    ci.lb  ci.ub
## Intercept       0.2379   0.06927  3.435 0.0006  0.10214 0.3737
## 0.025 < p < 1   0.6529   0.37604  1.736   0.08 -0.08415 1.3899
## 
## Likelihood Ratio Test:
## X^2(df = 1) = 0.544, p-val = 0.5
## There were  1 cases removed from your dataset due to the presence of missing data. To view the row numbers of these cases, use the attribute '$removed'.

We found no evidence for publication bias for either the VOE effect nor the PN effect using selection models (VOE: Likelihood Ratio Test, p = 0.3, PN: Likelihood Ratio Test, p = 0.3). We found the VOE effect was still significant (effect size = 0.238 standard mean difference units, 95% CI [0.102, 0.374], z = 3.435, p < .001)

7 Ratio vs Difference test

#Ratio of Means (log of ratio of means)
rom_pn <- metafor::escalc(measure = "ROM", m1i = x_2, sd1i = SD_2, n1i = n_1, m2i = x_1, sd2i = SD_1, n2i = n_1, data = meta_analysis_data_exp)

rom_voe <- metafor::escalc(measure = "ROM", m1i = x_3, sd1i = SD_3, n1i = n_1, m2i = x_2, sd2i = SD_2, n2i = n_1, data = meta_analysis_data_exp)

rma_pn_rom<- rma.mv(yi, vi, random = ~1|paper_expt_info, data = rom_pn)

## Warning: 1 row with NAs omitted from model fitting.

summary(rma_pn_rom)

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc   
## -49.8399   99.6797  103.6797  108.2879  103.8487   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.1714  0.4140     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 394.7626, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2081  0.0547  3.8070  0.0001  0.1010  0.3153  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

rma_voe_rom<- rma.mv(yi, vi, random = ~1|paper_expt_info, data = rom_voe)

## Warning: 1 row with NAs omitted from model fitting.

summary(rma_voe_rom)

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc   
##  -8.3744   16.7489   20.7489   25.3570   20.9179   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.0328  0.1811     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 141.8014, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2221  0.0313  7.1035  <.0001  0.1608  0.2834  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Comparisons of ROM vs SMD fits
rom_fit_pn <- fitstats(rma_pn_rom)
smd_fit_pn <- fitstats(rma_pn_no_mods)

rom_fit_voe <- fitstats(rma_voe_rom)
smd_fit_voe <- fitstats(rma_voe_no_mods)

We found that for both the violation-of-expectation and perceptual novelty effects, a model that expressed these condition-level effects as a log ratio of means fit the data better than a model that expressed these effects as a standardized mean difference (VOE: log ratio of means fit - [log likelihood = -8.374, deviance = 16.749, AIC = 20.749, BIC = 25.357] vs standard mean difference fit - [log likelihood = -35.252, deviance = 70.503, AIC = 74.503, BIC = 79.111]; PN: log ratio of means fit - [log likelihood = -49.84, deviance = 99.68, AIC = 103.68, BIC = 108.288] vs standard mean difference fit - [log likelihood = -66.254, deviance = 132.507, AIC = 136.507, BIC = 141.116])

8 Scatterplot PN vs VOE

pn_voe_in_sec <- meta_analysis_data_exp %>%
  mutate(PN = x_2 - x_1) %>%
  mutate(VOE = x_3 - x_2) %>%
  mutate(PN_sd = SD_2 + SD_1) %>%
  mutate(VOE_sd = SD_3 + SD_2)

scatter_pn_voe <- ggplot(data = pn_voe_in_sec, aes(x = VOE, y = PN)) +
  geom_point(aes(size = n_1), alpha = 0.5) +
  scale_size_continuous(range = c(1,4)) +
  geom_smooth(method = "lm")+
  geom_errorbar(width = .2, alpha = 0.1, inherit.aes = FALSE, aes(x = VOE, ymin = PN - PN_sd/sqrt(n_1), ymax = PN + PN_sd/sqrt(n_1))) +
  geom_errorbarh(alpha = 0.2, inherit.aes = FALSE, aes(y = PN, xmin = VOE - VOE_sd/sqrt(n_1), xmax = VOE + VOE_sd/sqrt(n_1))) +
  coord_cartesian(xlim = c(-15,30), ylim = c(-15,30)) +
  theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15))
scatter_pn_voe

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 1 rows containing missing values (`geom_point()`).

## Warning: Removed 1 rows containing missing values (`geom_errorbarh()`).

9 Excluded paper info

#Sanal-Hayes et al. (2022) means and sds
sanal_hayes <- read_csv('outputs_(3)/sanal-hayes_2022.csv') %>%
  group_by(expt_cond) %>%
  dplyr::summarize(mean_hab = mean(train4, na.rm = TRUE), sd_hab = sd(train4, na.rm = TRUE), mean_expected = mean(expected1, na.rm = TRUE), sd_expected = sd(expected1, na.rm = TRUE), mean_unexpected = mean(unexpected1, na.rm = TRUE), sd_unexpected = sd(unexpected1, na.rm = TRUE))

## Rows: 16 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): paper, expt_cond, training_type, order
## dbl (8): expt_num, subj, train1, train2, train3, train4, expected1, unexpected1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

sanal_hayes

## # A tibble: 2 × 7
##   expt_cond mean_hab sd_hab mean_expected sd_expected mean_unexpected
##   <chr>        <dbl>  <dbl>         <dbl>       <dbl>           <dbl>
## 1 2_large       2.25  0.999          3.42       1.01             2.90
## 2 2_small       2.27  0.473          2.47       0.531            2.76
## # ℹ 1 more variable: sd_unexpected <dbl>

10 Power analysis

no_mods_pn

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.2398  0.4896     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2421  0.0672  3.6041  0.0003  0.1104  0.3737  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

no_mods_voe

## 
## Multivariate Meta-Analysis Model (k = 75; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed           factor 
## sigma^2    0.0439  0.2096     75     no  paper_expt_info 
## 
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub      
##   0.2851  0.0421  6.7741  <.0001  0.2026  0.3676  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#PN
pwr.t.test(d = no_mods_pn$ci.lb, type = "paired", power = 0.8)$n #661

## [1] 646

pwr.t.test(d = no_mods_pn$b, type = "paired", power = 0.8)$n #139

## [1] 136

pwr.t.test(d = no_mods_pn$ci.ub, type = "paired", power = 0.8)$n #60

## [1] 58.2

#VOE
pwr.t.test(d = no_mods_voe$ci.lb, type = "paired", power = 0.8)$n #184

## [1] 193

pwr.t.test(d = no_mods_voe$b, type = "paired", power = 0.8)$n #96

## [1] 98.5

pwr.t.test(d = no_mods_voe$ci.ub, type = "paired", power = 0.8)$n #59

## [1] 60

1) Estimating PN and VOE effects using meta-analysis