1 Introduction
2 Sample sizes and exclusions
3 Publication Bias
4 Data availability/reporting standards
5 Individual meta-analysis power
- 5.1 What is the power to determine if the overall effect is greater than zero?
- 5.2 What is the power to determine if there’s a moderator (e.g. age)?
6 P-curve [Christina]
7 Trim-and-fill [Christina?]

1 Introduction

# load data
setwd('../dashboard/')
source('global.R')

## [1] "../data/infant_directed_speech_preference.csv"
## [1] "../data/label_advantage.csv"
## [1] "../data/mutual_exclusivity.csv"
## [1] "../data/phonemic_discrimination_native.csv"
## [1] "../data/phonemic_discrimination_nonnative.csv"
## [1] "../data/transitive_alternation.csv"
## [1] "../data/word_segmentation.csv"

We currently have 6 datasets included: IDS, label advantage, mutual exclusivity, phonemic discrimination native, phonemic discrimination non-native, and word segmentation.

2 Sample sizes and exclusions

2.1 How do n’s vary as a function of age?

all = all_data %>%
  mutate(n_total = ifelse(!is.na(n_2), n_1 + n_2, n_1)) %>%
  select(dataset, d_calc, 
         mean_age_1, n_total,
         n_excluded, num_trials, method)
 
ggplot(all, aes(x = mean_age_1, y = n_total, color = dataset)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~dataset, scales = "free") +
  theme_bw() +
  theme(legend.position="none")

2.2 Do n’s vary with d’s?

ns_ds = all_data %>%
  mutate(n_total = ifelse(!is.na(n_2), n_1 + n_2, n_1)) %>%
  select(dataset, d_calc, 
         mean_age_1, n_total) 

ggplot(ns_ds , aes(y = n_total, x = d_calc, color = dataset)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~dataset, scales = "free") +
  theme_bw() +
  xlab("Effect size") + 
  theme(legend.position="none")

2.3 Does the number of excluded infants affect effect sizes?

Some issues:

not here that different data sets use differently names columsn to indicate exclusions (“n_excluded”, “n_excluded_1”, “n_excluded_fussers”)
an additional complicaiton: are the exlcusions part of the n?
we don’t have num_trials for any, but this would be great to look at
what’s up with the n_excluded > n?

exclusions = all_data %>%
  filter(!is.na(n_excluded_1) | !is.na(n_excluded)) %>%
  mutate(n_excluded = ifelse(is.na(n_excluded), n_excluded_1,
                             n_excluded),
         n_total = ifelse(!is.na(n_2), n_1 + n_2, n_1)) %>%
  select(dataset, d_calc, 
         mean_age_1, n_total,
         n_excluded, num_trials, method) %>%
  mutate(prop_excluded = n_excluded/n_total) %>%
  filter(prop_excluded < 1)
 
ggplot(exclusions, aes(x = prop_excluded, y =d_calc ,
                       color = dataset)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ggtitle("prop. excluded as a function of age") +
  xlab("proportion excluded") +
  ylab("d") +
  theme_bw()

2.4 Does proportion excluded vary as a function of age?

Looks like maybe increases.

ggplot(exclusions, aes(x = mean_age_1, y = prop_excluded,
                       color = dataset)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ggtitle("prop. excluded as a function of age") +
  xlab("mean age (months)") +
  ylab("proportion excluded") +
  theme_bw()

3 Publication Bias

3.1 Do effect sizes vary as a function of year?

years.df = all_data %>%
  mutate(year = as.numeric(unlist(lapply(strsplit(unlist(all_data$unique_ID), "[^0-9]+"),  function(x) unlist(x)[2])))) 

#years.sig = rep('', length(unique(years.df$dataset)))
#for (i in 1:length(unique(years.df$dataset))) {
#  my_data = all_data[all_data$dataset ==  datasets[i],]
#  m = rma(d ~ year + mean_age_1, vi = d_var, slab = as.character(unique_ID), data = #my_data, method = "REML")
#  years.sig[i]  = ifelse(m$pval[2] <.05, "*", years.sig)
#}
#ann_text <- data.frame(sig = years.sig, lab = "Text",
#                       df = unique(years.df$dataset))

ggplot(years.df , aes(x = year, y = d_calc, color = dataset)) +
  geom_point() +
  #scale_colour_gradientn(colours = topo.colors(10)) +
  geom_smooth(method = "lm") +
  facet_wrap(~ dataset, scales = "free_y") +
  theme_bw() +
  #geom_text(data = ann_text,aes(label = sig), x = 2000, y = 1) +
  xlab("published year") +
  theme(legend.position="none")

rma(d_calc ~ year + mean_age_1 + dataset, vi = d_var_calc, data = years.df, method = "REML")

## 
## Mixed-Effects Model (k = 642; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.1920 (SE = 0.0146)
## tau (square root of estimated tau^2 value):             0.4382
## I^2 (residual heterogeneity / unaccounted variability): 80.99%
## H^2 (unaccounted variability / sampling variability):   5.26
## R^2 (amount of heterogeneity accounted for):            18.91%
## 
## Test for Residual Heterogeneity: 
## QE(df = 634) = 2655.4436, p-val < .0001
## 
## Test of Moderators (coefficient(s) 2,3,4,5,6,7,8): 
## QM(df = 7) = 141.3138, p-val < .0001
## 
## Model Results:
## 
##                                              estimate      se     zval
## intrcpt                                       23.6517  6.9990   3.3793
## year                                          -0.0116  0.0035  -3.2979
## mean_age_1                                     0.0010  0.0002   6.2815
## datasetLabel advantage in concept learning    -0.3898  0.1046  -3.7248
## datasetMutual exclusivity                     -0.4635  0.1490  -3.1101
## datasetPhonemic discrimination (native)       -0.0855  0.1038  -0.8239
## datasetPhonemic discrimination (non-native)   -0.0918  0.1206  -0.7612
## datasetWord segmentation                      -0.4844  0.0954  -5.0776
##                                                pval    ci.lb    ci.ub     
## intrcpt                                      0.0007   9.9339  37.3696  ***
## year                                         0.0010  -0.0184  -0.0047  ***
## mean_age_1                                   <.0001   0.0007   0.0013  ***
## datasetLabel advantage in concept learning   0.0002  -0.5949  -0.1847  ***
## datasetMutual exclusivity                    0.0019  -0.7556  -0.1714   **
## datasetPhonemic discrimination (native)      0.4100  -0.2889   0.1179     
## datasetPhonemic discrimination (non-native)  0.4466  -0.3281   0.1445     
## datasetWord segmentation                     <.0001  -0.6714  -0.2974  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.2 Does age vary as a function of year?

ggplot(years.df , aes(x = year, y = mean_age_1, color = dataset)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~ dataset, scales = "free_y") +
  theme_bw() +
  #geom_text(data = ann_text,aes(label = sig), x = 2000, y = 1) +
  xlab("published year")  +
    theme(legend.position="none")

3.3 Does effect size vary as a function of number of authors?

Not really enough data here. Controling for age, though, studies with more authors have bigger effect size.

all_data$short_cite2 = str_replace_all(all_data$short_cite, " ", "")
all_data$short_cite2 = str_replace_all(all_data$short_cite2, "&", ",")
all_data$num_authors = unlist(lapply(lapply(all_data$short_cite2, 
                                     function(x) unlist(strsplit(x, ',+'))), 
                              function(x) length(which(x != ""))))
all_data$num_authors =  as.numeric(ifelse(grepl("et al", all_data$short_cite), "NA",
                                          all_data$num_authors)) # deal with miss coded short_cites (et al.)
all_data$short_cite2 <- NULL

all_data %>%
  ggplot(aes(x = num_authors, y = d_calc, colour = dataset)) +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~dataset, scales = "free_y") +
  theme_bw()+
  theme(legend.position="none")

rma(d_calc ~ num_authors + mean_age_1 + dataset, vi = d_var_calc, 
          data = all_data, method = "REML")

## 
## Mixed-Effects Model (k = 622; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0.2006 (SE = 0.0153)
## tau (square root of estimated tau^2 value):             0.4479
## I^2 (residual heterogeneity / unaccounted variability): 81.95%
## H^2 (unaccounted variability / sampling variability):   5.54
## R^2 (amount of heterogeneity accounted for):            14.26%
## 
## Test for Residual Heterogeneity: 
## QE(df = 614) = 2664.0054, p-val < .0001
## 
## Test of Moderators (coefficient(s) 2,3,4,5,6,7,8): 
## QM(df = 7) = 116.7815, p-val < .0001
## 
## Model Results:
## 
##                                              estimate      se     zval
## intrcpt                                        0.7701  0.1356   5.6788
## num_authors                                   -0.0056  0.0151  -0.3719
## mean_age_1                                     0.0009  0.0002   6.0108
## datasetLabel advantage in concept learning    -0.6876  0.1449  -4.7440
## datasetMutual exclusivity                     -0.7395  0.1815  -4.0737
## datasetPhonemic discrimination (native)       -0.4454  0.1418  -3.1403
## datasetPhonemic discrimination (non-native)   -0.4773  0.1560  -3.0603
## datasetWord segmentation                      -0.7963  0.1370  -5.8107
##                                                pval    ci.lb    ci.ub     
## intrcpt                                      <.0001   0.5043   1.0359  ***
## num_authors                                  0.7100  -0.0353   0.0240     
## mean_age_1                                   <.0001   0.0006   0.0012  ***
## datasetLabel advantage in concept learning   <.0001  -0.9717  -0.4035  ***
## datasetMutual exclusivity                    <.0001  -1.0954  -0.3837  ***
## datasetPhonemic discrimination (native)      0.0017  -0.7233  -0.1674   **
## datasetPhonemic discrimination (non-native)  0.0022  -0.7830  -0.1716   **
## datasetWord segmentation                     <.0001  -1.0649  -0.5277  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4 Data availability/reporting standards

Here we’re intersted in describing the extent to which papers report all statistics that are desirable from the perspective of meta-analysis (i.e. t/F, M, SD, d)

counts = all_data %>%
            summarise(test_statistic = sum(!is.na(t) | !is.na(F) | !is.na(r)),
                      means = sum(!is.na(x_1)),
                      SD = sum(!is.na(SD_1)),
                      d = sum(!is.na(d_calc)),
                      g = sum(!is.na(g_calc)),
                      r = sum(!is.na(r_calc)),
                      age_range = sum(!is.na(age_range_1)),
                      gender = sum(!is.na(gender_1))) %>%
            gather("coded_variable", "n") %>%
            mutate(coded = "coded") %>%
            mutate(total = nrow(all_data))  %>%
            mutate(coded_variable = factor(coded_variable, levels = c("d", "g", "r", "means",
                                                                        "SD", "test_statistic",
                                                                        "age_range", "gender")))
counts = counts %>%
             mutate(n = total - n, 
                    coded = "uncoded")  %>%
             bind_rows(counts) %>%
             mutate(n_lab = ifelse(coded == "coded", n, "")) %>%
             arrange(coded)
ggplot(counts) + 
  geom_bar(aes(x = coded_variable, 
               y = n/total,
               fill = coded,
              order = coded), 
           stat = "identity") + 
  ylim(0,1) + 
  ylab("Number coded") + 
  xlab("Coded variable") + 
  ggtitle("All data") + 
   annotate("text", x = 1, y = .9, 
            label = paste("N =", counts$total[1]), size = 6) + 
  scale_fill_manual(values=c( "lightgreen", "grey")) + 
  geom_text(aes(label = n_lab, x = coded_variable, y = n/total -.06) )+
  theme_bw() + 
  theme(panel.border = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
       axis.line = element_line(colour = "black"),
       text = element_text(size=20))

By MA

counts = all_data %>%
            group_by(dataset) %>%
            summarise(test_statistic = sum(!is.na(t) | !is.na(F)),
                      means = sum(!is.na(x_1)),
                      SD = sum(!is.na(SD_1)),
                      d = sum(!is.na(d_calc)),
                      g = sum(!is.na(g_calc)),
                      r = sum(!is.na(r_calc)),
                      age_range = sum(!is.na(age_range_1)),
                      gender = sum(!is.na(gender_1)),
                      total = n()) %>%
            gather(coded_variable, n, -dataset, -total) %>%
            mutate(coded = "coded") %>%
            mutate(coded_variable = factor(coded_variable, levels = c("d", "g", "r", "means",
                                                                        "SD", "test_statistic",
                                                                        "age_range", "gender")))

counts = counts %>%
             mutate(n = total - n, 
                    coded = "uncoded")  %>%
             bind_rows(counts) %>%
             mutate(n_lab = ifelse(coded == "coded", n, "")) %>%
  arrange(coded)

ggplot(counts, aes(fill = coded)) + 
  geom_bar(aes(x = factor(coded_variable), 
               y = n/total),  ## FIX THIS
           stat = "identity",
           position = "fill") + 
  facet_wrap(~dataset, nrow=3, ncol=2) +
  ylim(0,1) + 
  ylab("Number coded") + 
  xlab("Coded variable") + 
  scale_fill_manual(values = c("lightgreen", "grey")) + 
  geom_text(aes(label = n_lab,
              x = coded_variable, 
              y = n/total -.06)) +
  theme_bw() + 
  theme(panel.border = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size=20),
        strip.background  = element_blank(), 
        axis.text.x = element_text(angle = 30, hjust = 1))

#annotate("text", x = 1, y = .9, 
           # label = paste("N =", totals), size = 6) + 
#totals_text = all_data %>%
      #      group_by(dataset) %>%
      #     summarise(total = n())%>%
      #     mutate(coded_variable = 1,
      #             lab = paste("N =", total))

5 Individual meta-analysis power

5.1 What is the power to determine if the overall effect is greater than zero?

Here I calculated the power to detect an effect using a random effect model (Hedges & Pigott, 2001; Valentine, Pigott, & Rothstein, 2010). Note that this information is redundant with confidence intervals on the estimated effect size, and the power in all of these MAs is essentially 1. Below, I ran a simulation sampling n papers from each meta-analysis, and calculating power. We need surprisingly few studies to get relatively high power.

calculate_ma_power <- function(sample_data) {
  ALPHA <-  .05
  NTAILS <-  2
  c_crit <-  qnorm(ALPHA/NTAILS, lower.tail = F)
  model <- rma(d_calc, vi = d_var_calc, data = sample_data,
               method = "REML", control = list(maxiter = 1000, stepadj = 0.5))
  lambda  <-  model$b[1] / model$se[1] # Valentine, Eq. [6]
  power <-  1 - pnorm(c_crit - lambda)
  power
}

one_sample <- function(ma_data, summary_function, n) {
   function(k) {
      do.call(summary_function, list(sample_n(ma_data, size = n, replace = F)))
   }
}

all_samples <- function(ma_data, summary_function, n, nboot) {
  sample_values <- 1:nboot %>%
    map(one_sample(ma_data, summary_function, n)) %>%
    unlist()
  data.frame(n = n,
             mean = mean(sample_values),
             ci_lower = ci_lower(sample_values),
             ci_upper = ci_upper(sample_values),
             row.names = NULL)
}

dataset_data <- function(ma_data, summary_function, nboot) {
  overall <- do.call(summary_function, list(ma_data))
  seq(1, nrow(ma_data), by = 1) %>%
    map(function(n) all_samples(ma_data, 
                                summary_function, n, nboot)) %>%
    bind_rows() %>%
    mutate(dataset = unique(ma_data$dataset),
           overall_est = overall)
}

power_data <- all_data %>%
  split(.$dataset) %>%
  map(function(ma_data) dataset_data(ma_data, 
                                     calculate_ma_power, 50)) %>%
  bind_rows()

ggplot(power_data, aes(x = n, y = mean, fill = dataset)) +
  geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), alpha = 0.5) +
  geom_hline(aes(yintercept = overall_est), linetype = "dashed") +
  geom_line( aes(x = n, y = mean)) +
  facet_wrap(~dataset) +
  xlab("Number of effect sizes") +
  ylab("Meta-analytic power") +
  xlim(0,150) +
  scale_fill_solarized(guide = FALSE) +
  theme_bw() +
  theme(text = element_text(family = "Open Sans"),
        panel.grid.minor = element_blank())

5.2 What is the power to determine if there’s a moderator (e.g. age)?

Not totally sure how to do this. Valentine et al. (2001) say it’s difficult without the raw data?

6 P-curve [Christina]

Corrects for selective reporting on the basis of significance (fewer insignificant results; Simonsohn, Nelson, & Simmons, 2014)

# calculate degrees of freedom based on Ns and design (within = n_1-1, between = n_1 + n_2 - 2
pcurve.data = 
  select(all_data, dataset, unique_ID, t, F, n_1, n_2, participant_design) %>%
  filter(!is.na(t)|!is.na(F)) %>%
  mutate(df = ifelse(participant_design == "between", n_1 + n_2-2, n_1-1))
         
#summary(pcurve.data$df)

#pcurve.df %>% 
#  filter(participant_design == "between") %>% 
#  data.frame()

# code from Simonsohn, Nelson, & Simmons (2014) appendix
loss = function(t_obs,df_obs,d_est) { 
  t_obs=abs(t_obs)
  p_obs=2*(1-pt(t_obs,df=df_obs)) 
  t.sig=subset(t_obs,p_obs<.05)
  df.sig=subset(df_obs,p_obs<.05)
  ncp_est=sqrt((df.sig+2)/4)*d_est 
  tc=qt(.975,df.sig) 
  power_est=1-pt(tc,df.sig,ncp_est)
  p_larger=pt(t.sig,df=df.sig,ncp=ncp_est)
  ppr=(p_larger-(1-power_est))/power_est 
  KSD=ks.test(ppr,punif)$statistic 
  return(KSD)
}

plotloss=function(t_obs,df_obs,dmin,dmax) { 
  loss.all=c()
  di=c()
  for (i in 0:((dmax-dmin)*100)) {
    d = dmin + i/100
    di = c(di,d)
    options(warn = -1)
    loss.all=c(loss.all,loss(df_obs=df_obs,t_obs=t_obs,d_est=d)) 
    options(warn = 0)
  }
    
  imin=match(min(loss.all),loss.all) 
  dstart=dmin+imin/100
  dhat=optimize(loss,c(dstart-.1, dstart+.1), df_obs=df_obs,t_obs=t_obs)

  plot(di,
       loss.all, xlab="Effect size\nCohen-d", 
       ylab="Loss (D stat in KS test)",
       ylim=c(0,1),
       main="How well does each effect size fit? (lower is better)")
  points(dhat$minimum,dhat$objective,pch=19,col="red",cex=2) 
  text(dhat$minimum,dhat$objective-.08,
       paste0("p-curve’s estimate of effect size:\nd=",round(dhat$minimum,3)),col="red") 
  return(dhat$minimum)
} 

#Example
#t_obs= c(1.7, 2.8, -3.1, 2.4) # include one p > .05 and one negative t-value to highlight how we treat those 
#df_obs=c(44, 75, 125, 200)
#plotloss(t_obs=t_obs,df_obs=df_obs,dmin=-1,dmax=1)

7 Trim-and-fill [Christina?]

Corrects for selective reporting on the basis of effect size (fewer small effects; Duval & Tweedie, 2000a, 2000b)

### fit random-effects model and plot

for (i in 1:length(datasets)) {
  my_data = all_data[all_data$dataset ==  datasets[i],]
  res <- rma(d, d_var, data = my_data)
  taf <- trimfill(res)
  funnel(taf, main = my_data$dataset[1])
}

res <- rma(d, d_var, data = all_data)
taf <- trimfill(res)
funnel(taf, main = "all data")

#regtest

Metalab Reproducibility Analyses

Molly Lewis and Christina Bergmann

2016-02-25