Metalab is a dataset of effect sizes across 13 different phenomenon in language acquistion experimental research (n > 850 effect sizes). Here, I ask whether properties of the commmunity structure of a literature predictive of measures of bias in that literature.

Summary of key findings:



Web of Science Data

We merge the metalab dataset with the web of science database based on dois. MAs with 5 or fewer papers are excluded. This leaves us with 12 MAs. Note that we’re losing papers here in two ways - those that don’t have dois (e.g. conference papers, n = approx. 40) and those with dois that are not in web of science (n = approx. 50).

Literature Networks

We examine four different network-types using the web of science data:

Then for each MA (n = 12), we create a network of each of the four network-types. For each of those networks (N = 48), we quantify five properties of the network:

Plotted below are the network metrics for each of the 48 networks.

# Define network function.
getGraph <- function (my.paper.data, 
                        this.analysis,
                        this.network,
                        type,
                        remove.multiple = FALSE,
                        noloops = FALSE) {
    # note: when remove.multiple = TRUE, clustering functions fails on some networks

    # get web of science data for domain
    this.wos = left_join(my.paper.data, wos, by = c("doi" = "DI")) %>%
             as.data.frame() # necessary for biblioNetwork
    
    sep = ifelse(this.analysis == "co-citation", ". ", 
                 ifelse(this.network == "authors", ";", "; ")) # bibliometrix_1_6

    # make the network
    this.net <- biblioNetwork(this.wos, 
                   analysis = this.analysis, 
                   network = this.network, 
                   sep = sep) 
    
    # get network into useable form
    if (length(this.net > 0)) {
      # munge the network
      graph <- graph.adjacency(this.net, mode = "undirected")
      graph <- igraph::simplify(graph, remove.multiple = remove.multiple, 
                                remove.loops = noloops)
      
      if (type == "plot"){
        
        gn = ggnetwork(asNetwork(graph)) # make into df for tidy plotting
          
      } else if (type == "stats"){
        
        clustering = cluster_leading_eigen(graph, options = list(maxiter = 1000000))
        closeness = mean(estimate_closeness(graph, cutoff = 100))
        betweeness = mean(estimate_betweenness(graph, cutoff = 100))
        degree = mean(igraph::degree(graph))
        
      }
    }
    
    # return stuff
      if (type == "plot"){ 
        
          cc <- try(gn, silent = TRUE)  # gets rid of too-small networks
          
          if(!is(cc,"try-error")) {
             return(gn)
          } 
          
      } else if (type == "stats") {
        
            data.frame(analysis = this.analysis,
                  network = this.network,
                  Q = round(clustering$modularity, 4), 
                  n.groups = round(length(clustering), 4),
                  closeness  = round(closeness, 4),
                  betweeness  = round(betweeness, 4),
                  degree = mean(degree))
      }
}
# Make all networks and get network measures 
analyses_list <- rep(ANALYSES, each = n.mas) 
networks_list <- rep(NETWORKS, each = n.mas)

dfs <- paper.data %>% 
  mutate_each(funs(as.factor)) %>%
  split(.$short_name) %>%
  rep(length(ANALYSES))

args = list(dfs, analyses_list, networks_list)

modularity.data = args %>%
  pmap(getGraph, "stats") %>%
  bind_rows(.id = "short_name")

modularity.data.long = modularity.data %>%
    mutate(closeness = log(closeness),
            betweeness = log(betweeness),
            degree = log(degree)) %>%
    gather("network.measure", "network.value", 4:8) %>%
    unite(network, analysis, network, sep = ".")
ggplot(modularity.data.long, aes(x = short_name, 
                                 y = network.value)) +
        geom_bar(stat = "identity", aes(fill = short_name))  +
        facet_wrap(network.measure ~ network, 
                   ncol = 4, scales = "free") +
        ggtitle("Network summary statistics") +
        theme_bw() +
        theme(axis.text.x = element_blank()) 

Literature Bias

For each meta-analysis, we examine three aspects of the literature:

Plotted below are the bias estimates for each of the literatures in our dataset.

all_mas = read.csv("ES_data_for_networks2.csv") # overall ES data

ma.es = all_mas %>%
  select(short_name, overall.d.age,
         fsn_string,  tau2 ) %>% 
  mutate(overall.d.age = abs(overall.d.age),
         tau2 = log(tau2)) %>%
  rename(fail_safe_n = fsn_string) %>%
  gather("bias.measure", "bias.value", 2:4) 
ma.es %>%
  filter(short_name %in% modularity.data.long$short_name) %>%
  ggplot(aes(x = short_name, y = bias.value)) +
        geom_bar(stat = "identity", aes(fill = short_name))  +
        facet_wrap(~bias.measure, scales = "free") +
        ggtitle("Bias summary statistics") +
        theme_bw() +
        theme(axis.text.x = element_blank()) 

Correlations between network properties and bias

Given properties of the networks and estimates of the scientific bias in each literature, we ask whether these two measures are correlated. Specifically, we predict the bias value (e.g. fail-safe-n) with the network value (e.g. modularity), controlling for the number of papers in the MA, and weighting the MAs by the number of studies in the literature. We fit a seperate model for each bias-network pair, for each network type (e.g. co-citation references).

net.es = left_join(modularity.data.long, ma.es) %>%
  dplyr::as_data_frame() %>%
  left_join(ns)

corr.sigs = net.es %>%
  filter(is.finite(network.value) & is.finite(bias.value)) %>%
  group_by(network, bias.measure, network.measure) %>%
  do(tidy(lm(bias.value ~ n.papers + network.value, weights = .$n.papers, data=.))) %>%
  filter(term == "network.value") %>%
  mutate(sig.col = ifelse(p.value < .05, "sig", "nsig")) %>%
  select(estimate, sig.col) %>%
  mutate(network.value = Inf, bias.value = Inf) # this is a hack
# Define plotting functions
network_plot <- function(d, analysis, network){
  networkname <- paste(analysis, network, sep = ".")
  
  d %>%
    split(.$short_name) %>%
    map(getGraph, analysis, network, "plot") %>%
    bind_rows(.id = "short_name") %>%
    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
       geom_edges(color = "grey50") +
       facet_wrap(~ short_name) +
       geom_nodes(aes(color = short_name)) +
       ggtitle(networkname) +
       theme_blank() +
       theme(legend.position = "none")
}

corr_plot <- function(d, corrs, analysis, network){
  networkname <- paste(analysis, network, sep = ".")
  
  ggplot(filter(d, network == networkname), 
       aes(x = network.value, y = bias.value)) +
     geom_rect(data = filter(corrs, network == networkname),
              aes(fill = sig.col), 
            xmin = -Inf, xmax = Inf,
            ymin = -Inf, ymax = Inf, alpha = 0.2) +
    geom_point(aes(size = n.papers, color = short_name)) +
    geom_smooth(method= "lm", color = "black") +
    ggtitle(networkname)+
    facet_grid(bias.measure ~ network.measure, scales = "free") +
    scale_fill_manual(values = c( "grey99", "red1")) +
    theme_bw() +
    theme(legend.position = "none")
}

Collaboration authors

Networks

p <- 1
network_plot(paper.data, ANALYSES[p], NETWORKS[p])

Models

corr_plot(net.es, corr.sigs, ANALYSES[p], NETWORKS[p])

Each point corresponds to a meta-analysis (MA). Size of the point corresponds to number of papers in MA. Red facets indicate models where the network measure is a significant predictor of the bias value (controling for number of papers).

Coupling authors

Networks

p <- 2
network_plot(paper.data, ANALYSES[p], NETWORKS[p])

Models

corr_plot(net.es, corr.sigs, ANALYSES[p], NETWORKS[p])

Co-citation references

Networks

p <- 3
network_plot(paper.data, ANALYSES[p], NETWORKS[p])

Models

corr_plot(net.es, corr.sigs, ANALYSES[p], NETWORKS[p])

Co-occurences keywords

Networks

p <- 4
network_plot(paper.data, ANALYSES[p], NETWORKS[p])

Models

corr_plot(net.es, corr.sigs, ANALYSES[p], NETWORKS[p])