In this document, using condition level data, we:
estimate the size of perceptual novelty (PN) and violation of expectation (VOE) in experimental conditions to see how the size of these effects compare to each other
visualize the effect sizes of the conditions using forest and density plots
investigate publication bias using funnel plots and Egger’s test, then account for publication bias using the trim and fill method and selection models.
determine whether these effects are better modeled as differences or as ratios.
visualize PN and VOE in a scatterplot, find excluded conditions’ means and standard deviations for each trial type, and run a power analysis to determine necessary sample size to get power = 0.8 and alpha = 0.05 for estimated size of PN and VOE effects.
options(scipen = 1, digits = 3)
library(pacman)
pacman::p_load(plyr,
tidyverse,
metafor,
lme4,
lmerTest,
cowplot,
easystats,
sjPlot,
kableExtra,
easystats,
patchwork,
weightr,
pwr)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.6.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pwr_1.3-0 weightr_2.0.2 scales_1.2.1
## [4] patchwork_1.1.3 kableExtra_1.3.4 sjPlot_2.8.15
## [7] see_0.8.0 report_0.5.7 parameters_0.21.2
## [10] performance_0.10.8 modelbased_0.8.6 insight_0.19.6
## [13] effectsize_0.8.6 datawizard_0.9.0 correlation_0.8.4
## [16] bayestestR_0.13.1 easystats_0.6.0 cowplot_1.1.1
## [19] lmerTest_3.1-3 lme4_1.1-34 metafor_4.4-0
## [22] numDeriv_2016.8-1.1 metadat_1.2-0 Matrix_1.6-1.1
## [25] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.0
## [28] dplyr_1.1.3 purrr_1.0.2 readr_2.1.4
## [31] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.4
## [34] tidyverse_2.0.0 plyr_1.8.9 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] sandwich_3.0-2 rlang_1.1.1 magrittr_2.0.3 multcomp_1.4-25
## [5] compiler_4.3.2 systemfonts_1.0.5 vctrs_0.6.4 rvest_1.0.3
## [9] pkgconfig_2.0.3 fastmap_1.1.1 backports_1.4.1 utf8_1.2.4
## [13] rmarkdown_2.25 tzdb_0.4.0 nloptr_2.0.3 xfun_0.40
## [17] cachem_1.0.8 jsonlite_1.8.7 sjmisc_2.8.9 ggeffects_1.3.2
## [21] broom_1.0.5 R6_2.5.1 bslib_0.5.1 stringi_1.7.12
## [25] boot_1.3-28.1 jquerylib_0.1.4 estimability_1.4.1 Rcpp_1.0.11
## [29] knitr_1.45 modelr_0.1.11 zoo_1.8-12 splines_4.3.2
## [33] timechange_0.2.0 tidyselect_1.2.0 rstudioapi_0.15.0 yaml_2.3.7
## [37] codetools_0.2-19 sjlabelled_1.2.0 lattice_0.21-9 withr_2.5.2
## [41] evaluate_0.22 survival_3.5-7 xml2_1.3.5 pillar_1.9.0
## [45] generics_0.1.3 mathjaxr_1.6-0 hms_1.1.3 munsell_0.5.0
## [49] minqa_1.2.6 glue_1.6.2 emmeans_1.8.9 tools_4.3.2
## [53] webshot_0.5.5 mvtnorm_1.2-3 grid_4.3.2 colorspace_2.1-0
## [57] nlme_3.1-163 cli_3.6.1 fansi_1.0.5 viridisLite_0.4.2
## [61] svglite_2.1.2 sjstats_0.18.2 gtable_0.3.4 sass_0.4.7
## [65] digest_0.6.33 TH.data_1.1-2 htmltools_0.5.6.1 lifecycle_1.0.3
## [69] httr_1.4.7 MASS_7.3-60
#Calculate measure of heterogeneity I^2 from Q and degrees of freedom
i2_from_q_df <- function(Q, df) {
return ((Q-df)/Q*100)
}
meta_analysis_data <- read_csv('processed_data/meta-analysis_data.csv') %>%
filter(study_ID != "sanal-hayes_2022") #comment out to replicate results with Sanal-Hayes (2022) found in SM
## Rows: 109 Columns: 66
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (34): study_ID, long_cite, short_cite, doi, peer_reviewed, coder, expt_c...
## dbl (18): expt_num, n_1, mean_age_1, sd_age_1, x_1, x_2, x_3, SD_1, SD_2, SD...
## lgl (14): group_name_1, group_name_2, n_2, mean_age_2, t, F, r, d, d_var, co...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Number of conditions and infants for both experimental and control conditions
num_cond <- meta_analysis_data %>%
summarize(count = n())
num_infants_meta <- meta_analysis_data %>%
summarize(num_infants = sum(n_1))
We have condition level data (including experimental and control conditions) from 107 conditions, with a total of 2494 infants.
#Effect size analysis is based on just experimental data
meta_analysis_data_exp <- meta_analysis_data %>%
filter(exp_or_control == "experimental")
#Prepare data and contrast
meta_analysis_data_long <- meta_analysis_data_exp %>%
pivot_longer(cols = c(x_1, x_2, x_3, SD_1, SD_2, SD_3), names_to = c(".value", "trial_type"), names_sep = "_") %>%
mutate(trial_type = ifelse(trial_type == 1, "last_train", ifelse(trial_type == 2, "expected", "unexpected"))) %>%
mutate(variance = SD^2/n_1)
meta_analysis_data_long$trial_type <- as.factor(meta_analysis_data_long$trial_type)
results <- rma.mv(x, variance, mods = ~trial_type, random = ~1|paper_expt_info, data = meta_analysis_data_long)
## Warning: 3 rows with NAs omitted from model fitting.
i2_from_q_df(results$QE, results$QEdf)
## [1] 91.5
effect_size_results <-data.frame(b = round(results$b, 5), SE = round(results$se, 3), z = round(results$zval, 3), p = ifelse(results$pval <= .001, "<.001", round(results$pval, 3)), `95% CI` = paste(round(results$ci.lb, 3), round(results$ci.ub, 3), sep = " - "))
rownames(effect_size_results) <- c("Expected", "Last train - Expected", "Unexpected - Expected")
colnames(effect_size_results) <- c("Estimate", "Standard Error", "z", "p", "95% CI")
#For PN value, negate last train - expected row.
effect_size_results%>%
kable("html",align = 'clc',table.attr="id=effect_sizetable")%>%
kable_styling("striped")
| Estimate | Standard Error | z | p | 95% CI | |
|---|---|---|---|---|---|
| Expected | 13.84 | 0.985 | 14.05 | <.001 | 11.906 - 15.767 |
| Last train - Expected | -1.73 | 0.223 | -7.78 | <.001 | -2.169 - -1.297 |
| Unexpected - Expected | 2.09 | 0.250 | 8.35 | <.001 | 1.597 - 2.578 |
#tests whether the two differences are significantly different from each other
diff_check <- anova(results, X=c(0,-1,-1))
diff_check
##
## Hypothesis:
## 1: -trial_typelast_train - trial_typeunexpected = 0
##
## Results:
## estimate se zval pval
## 1: -0.3545 0.4053 -0.8746 0.3818
##
## Test of Hypothesis:
## QM(df = 1) = 0.7649, p-val = 0.3818
We found that both the perceptual novelty and the violation of expectation effects were significantly greater than 0 (PN (first expected - last training trial): Mean difference = 1.733, standard error (SE) = 0.223, [1.297, 2.169], z = 7.783, p <.001; VOE (first unexpected - first expected trial): Mean difference = 2.088 seconds, standard error (SE) = 0.25, [1.597, 2.578], z = 8.347, p <.001). These two effect sizes were not significantly different from each other (p = 0.382).
#Values are weighted means from model estimating size of both effects
mean_eff <- results$beta[1]
mean_hab <- results$beta[1] + results$beta[2]
mean_ineff <-results$beta[1] + results$beta[3]
cil_eff <- results$ci.lb[1]
cil_hab <- results$ci.lb[1] + results$ci.lb[2]
cil_ineff <-results$ci.lb[1] + results$ci.lb[3]
ciu_eff <- results$ci.ub[1]
ciu_hab <- results$ci.ub[1] + results$ci.ub[2]
ciu_ineff <-results$ci.ub[1] + results$ci.ub[3]
trial_type = c("last_train", "expected", "unexpected")
means = c(mean_hab, mean_eff, mean_ineff)
cil = c(cil_hab, cil_eff, cil_ineff)
ciu = c(ciu_hab, ciu_eff, ciu_ineff)
mean_ci<-data.frame(trial_type, means, cil, ciu)
#Barplot
ggplot(mean_ci, aes(x=factor(trial_type, level=c('last_train', 'expected', 'unexpected')), y = means, fill=trial_type)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c('#00BA38', '#F8766D', '#619CFF')) +
scale_fill_discrete(limits = c("last_train", "expected", "unexpected")) +
geom_point(aes(x = trial_type, y = x, size = n_1), data = meta_analysis_data_long, alpha = .1) +
geom_errorbar(data = meta_analysis_data_long, width = .1, alpha = 0.1, inherit.aes = FALSE, aes(x = trial_type, ymin = x - SD/sqrt(n_1)*1.96, ymax = x + SD/sqrt(n_1)*1.96))+
ylab("Looking Time (s)")+
xlab("Trial Type")+
coord_cartesian(ylim=c(0,20))+
geom_line(data = meta_analysis_data_long, aes(x = trial_type, y = x, group = paper_expt_info), alpha = .1) +
geom_errorbar(data = mean_ci, width = .2, aes(ymin = cil, ymax = ciu), color = "black")+
theme_cowplot(20) +
theme(axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=0.5))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Removed 3 rows containing missing values (`geom_line()`).
#Boxplot
ggplot(meta_analysis_data_long, aes(x = trial_type, y = x)) +
geom_boxplot() +
scale_x_discrete(limits = c("last_train", "expected", "unexpected")) +
geom_point(aes(x = trial_type, y = x, size = n_1, color = trial_type), data = meta_analysis_data_long, alpha = .2) +
geom_line(data = meta_analysis_data_long, aes(x = trial_type, y = x, group = paper_expt_info), alpha = .1) +
ylab("Looking Time (s)")+
xlab("Trial Type")+
scale_color_manual(values=c('#00BA38', '#F8766D', '#619CFF'))
## Warning: Removed 3 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Removed 3 rows containing missing values (`geom_line()`).
#get standard mean difference values for PN and VOE
smd_pn <- metafor::escalc(measure = "SMD", m1i = x_2, sd1i = SD_2, n1i = n_1, m2i = x_1, sd2i = SD_1, n2i = n_1, data = meta_analysis_data_exp)
smd_voe <- metafor::escalc(measure = "SMD", m1i = x_3, sd1i = SD_3, n1i = n_1, m2i = x_2, sd2i = SD_2, n2i = n_1, data = meta_analysis_data_exp)
#PN Forest plot
forest_plot_pn <- smd_pn %>%
mutate(se_pn = sqrt(vi))%>%
dplyr::select(paper_expt_info, yi, vi, se_pn, n_1)
forest_plot_pn$paper_expt_info=reorder(forest_plot_pn$paper_expt_info, forest_plot_pn$yi, FUN=mean)
no_mods_pn <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_pn)
## Warning: 1 row with NAs omitted from model fitting.
no_mods_pn
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.2398 0.4896 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2421 0.0672 3.6041 0.0003 0.1104 0.3737 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
group_row_pn = matrix(c('Summary', no_mods_pn$b[1], no_mods_pn$se[1]^2, no_mods_pn$se[1], 1), nrow = 1)
group_row_pn = as.data.frame(group_row_pn)
names(group_row_pn) = names(forest_plot_pn)
forest_plot_pn <- rbind(forest_plot_pn, group_row_pn)
forest_plot_pn$yi = as.numeric(forest_plot_pn$yi)
forest_plot_pn$vi = as.numeric(forest_plot_pn$vi)
forest_plot_pn$se_pn = as.numeric(forest_plot_pn$se_pn)
forest_plot_pn$n_1 = as.numeric(forest_plot_pn$n_1)
PN_forest_plot <- ggplot(data = forest_plot_pn, aes(paper_expt_info, yi)) +
geom_point(aes(size = n_1)) +
geom_point(data=subset(forest_plot_pn, paper_expt_info=='Summary'), color='black', shape=18, size=4)+
scale_size(range = c(1, 3)) +
coord_flip() +
geom_errorbar(width = .15, alpha = 1, aes(ymin = yi - 1.96*se_pn, ymax = yi + 1.96*se_pn)) +
ggtitle("PN") +
ylab("SMD") +
xlab("Condition") +
ylim(-3,3) +
theme(plot.title = element_text(hjust = 0.5))
PN_forest_plot
## Warning: Removed 1 rows containing missing values (`geom_point()`).
#VOE forest plot
forest_plot_voe <- smd_voe %>%
mutate(se_voe = sqrt(vi))%>%
dplyr::select(paper_expt_info, yi, vi, se_voe, n_1)
forest_plot_voe$paper_expt_info=reorder(forest_plot_voe$paper_expt_info, forest_plot_voe$yi, FUN=mean)
no_mods_voe <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_voe)
## Warning: 1 row with NAs omitted from model fitting.
no_mods_voe
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.0439 0.2096 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2851 0.0421 6.7741 <.0001 0.2026 0.3676 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
group_row_voe = matrix(c('Summary', no_mods_voe$b[1], no_mods_voe$se[1]^2, no_mods_voe$se[1], 1), nrow = 1)
group_row_voe = as.data.frame(group_row_voe)
names(group_row_voe) = names(forest_plot_voe)
forest_plot_voe <- rbind(forest_plot_voe, group_row_voe)
forest_plot_voe$yi = as.numeric(forest_plot_voe$yi)
forest_plot_voe$vi = as.numeric(forest_plot_voe$vi)
forest_plot_voe$se_voe = as.numeric(forest_plot_voe$se_voe)
forest_plot_voe$n_1 = as.numeric(forest_plot_voe$n_1)
VOE_forest_plot <- ggplot(data = forest_plot_voe, aes(paper_expt_info, yi)) +
geom_point(aes(size = n_1)) +
geom_point(data=subset(forest_plot_voe, paper_expt_info=='Summary'), color='black', shape=18, size=4)+
scale_size(range = c(1, 3)) +
coord_flip() +
geom_errorbar(width = .15, alpha = 1, aes(ymin = yi - 1.96*se_voe, ymax = yi + 1.96*se_voe)) +
ggtitle("VOE") +
ylab("SMD") +
xlab("Condition") +
ylim(-3,3) +
theme(plot.title = element_text(hjust = 0.5))
VOE_forest_plot
## Warning: Removed 1 rows containing missing values (`geom_point()`).
#Forest plot of both effects
#PN_forest_plot + VOE_forest_plot
#PN density plot
#Looking at distribution of PN effect sizes
num_cond_pn_observed <- sum(smd_pn$yi>0) #49 conditions where PN effect is numerically greater than 0
density_pn <- ggplot(aes(x = yi), data = smd_pn) +
geom_density(fill = "grey50") +
xlim(c(-2,2)) +
ylim(c(-0.02,1.05))+
xlab("Standard Mean Difference") +
ylab("Density") +
ggtitle("PN") +
geom_point(x = as.numeric(group_row_pn$yi), y = 0, shape = 18, size = 3) +
geom_errorbarh(height = 0.02, linewidth = 0.5, alpha = 1, aes(xmin = no_mods_pn$ci.lb, xmax = no_mods_pn$ci.ub, y = 0)) +
theme(plot.title = element_text(hjust = 0.5))
#VOE density plot
#Looking at distribution of VOE effect sizes
num_cond_voe_observed <- sum(smd_voe$yi>0) #62 conditions where VOE effect is numerically greater than 0
density_voe <- ggplot(aes(x = yi), data = smd_voe) +
geom_density(fill = "grey50") +
xlim(c(-2,2)) +
ylim(c(-0.02,1.05)) +
xlab("Standard Mean Difference") +
ylab("Density") +
ggtitle("VOE") +
geom_point(x = as.numeric(group_row_voe$yi), y = 0, shape = 18, size = 3) +
geom_errorbarh(height = 0.02, linewidth = 0.5, alpha = 1, aes(xmin = no_mods_voe$ci.lb, xmax = no_mods_voe$ci.ub, y = 0)) +
theme(plot.title = element_text(hjust = 0.5))
#Density plot both effects
density_pn + density_voe
## Warning: Removed 1 rows containing non-finite values (`stat_density()`).
## Removed 1 rows containing non-finite values (`stat_density()`).
#Funnel plot of SMD PN
rma_pn_no_mods <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_pn)
## Warning: 1 row with NAs omitted from model fitting.
summary(rma_pn_no_mods)
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## logLik Deviance AIC BIC AICc
## -66.2537 132.5074 136.5074 141.1155 136.6764
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.2398 0.4896 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2421 0.0672 3.6041 0.0003 0.1104 0.3737 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
i2_from_q_df(rma_pn_no_mods$QE, rma_pn_no_mods$QEdf)
## [1] 73.3
funnel_pn <- metafor::funnel(rma_pn_no_mods, xlim = c(-2,2), ylim = c(0, 0.6))
#Egger's Test PN
pn_egger <- regtest(rma_pn_no_mods$yi, rma_pn_no_mods$vi)
#Funnel plot of SMD VOE
rma_voe_no_mods <- rma.mv(yi, vi, random = ~1|paper_expt_info, data = smd_voe)
## Warning: 1 row with NAs omitted from model fitting.
summary(rma_voe_no_mods)
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## logLik Deviance AIC BIC AICc
## -35.2516 70.5031 74.5031 79.1113 74.6721
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.0439 0.2096 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2851 0.0421 6.7741 <.0001 0.2026 0.3676 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
i2_from_q_df(rma_voe_no_mods$QE, rma_voe_no_mods$QEdf)
## [1] 38
funnel_voe <- metafor::funnel(rma_voe_no_mods, xlim = c(-2,2), ylim = c(0, 0.6))
#Egger's Test VOE
voe_egger <- regtest(rma_voe_no_mods$yi, rma_voe_no_mods$vi)
Using Egger’s test for funnel plot asymmetry, we found that the distribution of effect sizes relative to study precision was not asymmetrical for the PN effect (b = 0.056, [-0.51, 0.622], p = 0.508), but was asymmetrical for the VOE effect (b = -0.49, [-0.807, -0.173], p<.001). This suggests some degree of publication bias for the VOE effect, which could take the form of selective reporting, the file drawer problem, and/or journals unwilling to publish null findings. In contrast, we found no evidence for publication bias based on the PN effect, which was not studied but was incidentally measured, in this literature.
#found VOE to be asymmetrical - using trim and fill method to address asymmetry
rma_voe_no_random <-rma(yi, vi, data = smd_voe) #same as rma.mv when no moderators
## Warning: 1 study with NAs omitted from model fitting.
summary(rma_voe_no_random)
##
## Random-Effects Model (k = 75; tau^2 estimator: REML)
##
## logLik deviance AIC BIC AICc
## -35.2516 70.5031 74.5031 79.1113 74.6721
##
## tau^2 (estimated amount of total heterogeneity): 0.0439 (SE = 0.0208)
## tau (square root of estimated tau^2 value): 0.2096
## I^2 (total heterogeneity / total variability): 34.82%
## H^2 (total variability / sampling variability): 1.53
##
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2851 0.0421 6.7741 <.0001 0.2026 0.3676 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
trim_fill_voe <- trimfill(rma_voe_no_random) #Makes funnel plot symmetric by adding points
summary(trim_fill_voe)
##
## Estimated number of missing studies on the left side: 18 (SE = 5.6752)
##
## Random-Effects Model (k = 93; tau^2 estimator: REML)
##
## logLik deviance AIC BIC AICc
## -67.8062 135.6123 139.6123 144.6559 139.7471
##
## tau^2 (estimated amount of total heterogeneity): 0.1013 (SE = 0.0291)
## tau (square root of estimated tau^2 value): 0.3183
## I^2 (total heterogeneity / total variability): 53.30%
## H^2 (total variability / sampling variability): 2.14
##
## Test for Heterogeneity:
## Q(df = 92) = 204.6868, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.1662 0.0468 3.5541 0.0004 0.0746 0.2579 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
funnel_voe_trimfill <- metafor::funnel(trim_fill_voe, xlim = c(-2,2), ylim = c(0, 0.6))
We consider the implications of these results for the current and subsequent analyses. First, it is plausible that the above analysis overestimated the size of the VOE effect, relative to the size of the PN effect, because imprecise studies that show small VOE effects could be “missing” from our dataset. To examine the sensitivity of our results to this factor, we used the trim and fill method (Duval & Tweedie, 2000) to estimate the VOE effect size accounting for publication bias. This adjusted effect size (effect size = 0.166 standardized mean difference units, 95% CI [0.075, 0.258], z = 3.554, p < .001) was still similar, with overlapping confidence intervals, to the PN effect for which we found no evidence for publication bias (effect size = 0.242, 95% CI [0.11, 0.374], z = 3.604, p < .001). However, we caution against interpreting this estimate as a more “valid” estimate of the VOE effect size; rather, this shows that the VOE effect is robustly above 0, and as large as the PN effect, after considering the possibility of publication bias.
pn_weightfunct <- weightfunct(smd_pn$yi, smd_pn$vi)
pn_weightfunct
##
## Unadjusted Model (k = 75):
##
## tau^2 (estimated amount of total heterogeneity): 0.2350 (SE = 0.0553)
## tau (square root of estimated tau^2 value): 0.4848
##
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val = 6.64e-25
##
## Model Results:
##
## estimate std.error z-stat p-val ci.lb ci.ub
## Intercept 0.242 0.06669 3.628 0.0003 0.1112 0.3727
##
## Adjusted Model (k = 75):
##
## tau^2 (estimated amount of total heterogeneity): 0.2652 (SE = 0.0706)
## tau (square root of estimated tau^2 value): 0.5150
##
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val = 6.64e-25
##
## Model Results:
##
## estimate std.error z-stat p-val ci.lb ci.ub
## Intercept 0.3261 0.1192 2.735 0.006 0.09243 0.5597
## 0.025 < p < 1 1.5299 0.6949 2.202 0.028 0.16799 2.8918
##
## Likelihood Ratio Test:
## X^2(df = 1) = 0.884, p-val = 0.3
## There were 1 cases removed from your dataset due to the presence of missing data. To view the row numbers of these cases, use the attribute '$removed'.
voe_weightfunct <- weightfunct(smd_voe$yi, smd_voe$vi)
voe_weightfunct
##
## Unadjusted Model (k = 75):
##
## tau^2 (estimated amount of total heterogeneity): 0.0421 (SE = 0.0206)
## tau (square root of estimated tau^2 value): 0.2052
##
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.000862
##
## Model Results:
##
## estimate std.error z-stat p-val ci.lb ci.ub
## Intercept 0.2844 0.04262 6.673 3e-11 0.2009 0.3679
##
## Adjusted Model (k = 75):
##
## tau^2 (estimated amount of total heterogeneity): 0.0282 (SE = 0.0238)
## tau (square root of estimated tau^2 value): 0.1680
##
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.000862
##
## Model Results:
##
## estimate std.error z-stat p-val ci.lb ci.ub
## Intercept 0.2379 0.06927 3.435 0.0006 0.10214 0.3737
## 0.025 < p < 1 0.6529 0.37604 1.736 0.08 -0.08415 1.3899
##
## Likelihood Ratio Test:
## X^2(df = 1) = 0.544, p-val = 0.5
## There were 1 cases removed from your dataset due to the presence of missing data. To view the row numbers of these cases, use the attribute '$removed'.
We found no evidence for publication bias for either the VOE effect nor the PN effect using selection models (VOE: Likelihood Ratio Test, p = 0.3, PN: Likelihood Ratio Test, p = 0.3). We found the VOE effect was still significant (effect size = 0.238 standard mean difference units, 95% CI [0.102, 0.374], z = 3.435, p < .001)
#Ratio of Means (log of ratio of means)
rom_pn <- metafor::escalc(measure = "ROM", m1i = x_2, sd1i = SD_2, n1i = n_1, m2i = x_1, sd2i = SD_1, n2i = n_1, data = meta_analysis_data_exp)
rom_voe <- metafor::escalc(measure = "ROM", m1i = x_3, sd1i = SD_3, n1i = n_1, m2i = x_2, sd2i = SD_2, n2i = n_1, data = meta_analysis_data_exp)
rma_pn_rom<- rma.mv(yi, vi, random = ~1|paper_expt_info, data = rom_pn)
## Warning: 1 row with NAs omitted from model fitting.
summary(rma_pn_rom)
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## logLik Deviance AIC BIC AICc
## -49.8399 99.6797 103.6797 108.2879 103.8487
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.1714 0.4140 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 394.7626, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2081 0.0547 3.8070 0.0001 0.1010 0.3153 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rma_voe_rom<- rma.mv(yi, vi, random = ~1|paper_expt_info, data = rom_voe)
## Warning: 1 row with NAs omitted from model fitting.
summary(rma_voe_rom)
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## logLik Deviance AIC BIC AICc
## -8.3744 16.7489 20.7489 25.3570 20.9179
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.0328 0.1811 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 141.8014, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2221 0.0313 7.1035 <.0001 0.1608 0.2834 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Comparisons of ROM vs SMD fits
rom_fit_pn <- fitstats(rma_pn_rom)
smd_fit_pn <- fitstats(rma_pn_no_mods)
rom_fit_voe <- fitstats(rma_voe_rom)
smd_fit_voe <- fitstats(rma_voe_no_mods)
We found that for both the violation-of-expectation and perceptual novelty effects, a model that expressed these condition-level effects as a log ratio of means fit the data better than a model that expressed these effects as a standardized mean difference (VOE: log ratio of means fit - [log likelihood = -8.374, deviance = 16.749, AIC = 20.749, BIC = 25.357] vs standard mean difference fit - [log likelihood = -35.252, deviance = 70.503, AIC = 74.503, BIC = 79.111]; PN: log ratio of means fit - [log likelihood = -49.84, deviance = 99.68, AIC = 103.68, BIC = 108.288] vs standard mean difference fit - [log likelihood = -66.254, deviance = 132.507, AIC = 136.507, BIC = 141.116])
pn_voe_in_sec <- meta_analysis_data_exp %>%
mutate(PN = x_2 - x_1) %>%
mutate(VOE = x_3 - x_2) %>%
mutate(PN_sd = SD_2 + SD_1) %>%
mutate(VOE_sd = SD_3 + SD_2)
scatter_pn_voe <- ggplot(data = pn_voe_in_sec, aes(x = VOE, y = PN)) +
geom_point(aes(size = n_1), alpha = 0.5) +
scale_size_continuous(range = c(1,4)) +
geom_smooth(method = "lm")+
geom_errorbar(width = .2, alpha = 0.1, inherit.aes = FALSE, aes(x = VOE, ymin = PN - PN_sd/sqrt(n_1), ymax = PN + PN_sd/sqrt(n_1))) +
geom_errorbarh(alpha = 0.2, inherit.aes = FALSE, aes(y = PN, xmin = VOE - VOE_sd/sqrt(n_1), xmax = VOE + VOE_sd/sqrt(n_1))) +
coord_cartesian(xlim = c(-15,30), ylim = c(-15,30)) +
theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15))
scatter_pn_voe
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_errorbarh()`).
#Sanal-Hayes et al. (2022) means and sds
sanal_hayes <- read_csv('outputs_(3)/sanal-hayes_2022.csv') %>%
group_by(expt_cond) %>%
dplyr::summarize(mean_hab = mean(train4, na.rm = TRUE), sd_hab = sd(train4, na.rm = TRUE), mean_expected = mean(expected1, na.rm = TRUE), sd_expected = sd(expected1, na.rm = TRUE), mean_unexpected = mean(unexpected1, na.rm = TRUE), sd_unexpected = sd(unexpected1, na.rm = TRUE))
## Rows: 16 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): paper, expt_cond, training_type, order
## dbl (8): expt_num, subj, train1, train2, train3, train4, expected1, unexpected1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sanal_hayes
## # A tibble: 2 × 7
## expt_cond mean_hab sd_hab mean_expected sd_expected mean_unexpected
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2_large 2.25 0.999 3.42 1.01 2.90
## 2 2_small 2.27 0.473 2.47 0.531 2.76
## # ℹ 1 more variable: sd_unexpected <dbl>
no_mods_pn
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.2398 0.4896 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 276.8066, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2421 0.0672 3.6041 0.0003 0.1104 0.3737 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
no_mods_voe
##
## Multivariate Meta-Analysis Model (k = 75; method: REML)
##
## Variance Components:
##
## estim sqrt nlvls fixed factor
## sigma^2 0.0439 0.2096 75 no paper_expt_info
##
## Test for Heterogeneity:
## Q(df = 74) = 119.3246, p-val = 0.0007
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.2851 0.0421 6.7741 <.0001 0.2026 0.3676 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#PN
pwr.t.test(d = no_mods_pn$ci.lb, type = "paired", power = 0.8)$n #661
## [1] 646
pwr.t.test(d = no_mods_pn$b, type = "paired", power = 0.8)$n #139
## [1] 136
pwr.t.test(d = no_mods_pn$ci.ub, type = "paired", power = 0.8)$n #60
## [1] 58.2
#VOE
pwr.t.test(d = no_mods_voe$ci.lb, type = "paired", power = 0.8)$n #184
## [1] 193
pwr.t.test(d = no_mods_voe$b, type = "paired", power = 0.8)$n #96
## [1] 98.5
pwr.t.test(d = no_mods_voe$ci.ub, type = "paired", power = 0.8)$n #59
## [1] 60