Question: Are properties of the commmunity structure of a literature predictive of effect size measures?

These analy



Parameters:

MIN_NUM_PAPERS <- 9 # per MA

# Analysis-network pairs of interest
ANALYSES <- c("co-citation", "collaboration","coupling", "co-occurrences")
NETWORKS <- c("references", "authors", "authors", "keywords")

Read and process data

(1) Web of Science data

Get dois for each paper in metalab dataset (obtained from: https://apps.crossref.org/SimpleTextQuery/) .

# copy-pasting 50 at a time into search engine
dois <- read.csv("dois.csv") %>%
      select(study_ID, doi) %>%
      filter(doi != "" & !is.na(doi)) %>%
      mutate(doi = ifelse(grepl(".org/", doi),
                          unlist(lapply(str_split(doi, ".org/"), 
                                   function(x) {x[2]})), as.character(doi)))

Scrape data from web of science at paper level using bibliometrix package

## Articles extracted   100 
## Articles extracted   163

For each paper in a meta-analysis, merge in a doi. Then for each doi, get the mean d_calc, d_var_calc, n (average across conditions/experiments in the same paper). This is what we’re trying to predict.

source("../../dashboard/global.R", chdir = TRUE) # all_data source

all_mas = read.csv("ES_data_for_networks2.csv")

paper.data = all_data %>%
  select(short_name, study_ID, d_calc, d_var_calc, n_1, n_2) %>%
  inner_join(dois, by = "study_ID") %>%
  mutate(doi = tolower(doi)) %>%
  group_by(doi, short_name) %>%
  summarize(n = mean(c(n_1, n_2), na.rm = T),
            d_calc = mean(d_calc), 
            d_var_calc = mean(d_var_calc)) %>%
  select(short_name, d_calc, d_var_calc, n) %>%
  left_join(all_mas %>% select(short_name, overall.d))

Add in residuals

single_method_datasets = all_data %>%
  group_by(short_name) %>%
  summarise(n_methods = length(levels(as.factor(method)))) %>%
  filter(n_methods == 1) %>%
  .[["short_name"]]

# get model fits
all_data.resid = data.frame()
for (i in 1:length(datasets$short_name)) {
    d = filter(all_data, short_name == datasets$short_name[i])
    if (datasets$short_name[i] %in% single_method_datasets) {
      full.model = rma(d_calc ~ mean_age, vi = d_var_calc, data = d)
    } else {
      #full.model = rma(d_calc ~ method + mean_age, vi = d_var_calc, data = d)
       full.model = rma(d_calc ~ mean_age, vi = d_var_calc, data = d)
    }
  
  d = as.data.frame(rstandard(full.model)$resid) %>%
       cbind(d) %>%
       rename(residual.d = `rstandard(full.model)$resid`) %>%
       mutate(residual.d = residual.d + full.model$b[1]) %>% # add in intercept term
       inner_join(all_data) 
  
  all_data.resid = all_data.resid %>%
                      bind_rows(d) %>%
                      select(short_name, study_ID, residual.d)
}

paper.data = all_data.resid %>%
               left_join(dois, by = "study_ID") %>%
               filter(!is.na(doi)) %>%
               group_by(doi, short_name) %>%
               summarize(residual.d = mean(residual.d)) %>%
               ungroup() %>%
               mutate(doi = tolower(doi)) %>%
               inner_join(paper.data)

Merge in web of science data for each doi.

Number of papers with dois in web of science in each MA:

ns = paper.data %>%
  inner_join(wos, by = c("doi" = "DI")) %>%
  group_by(short_name) %>%
  summarize(n.papers = n()) 

kable(ns)
short_name n.papers
catBias 7
gaze_following 13
idspref 13
inphondb-native 23
inphondb-nonnative 10
inworddb 50
labadv 13
mutex 16
phonotactics 2
pointing_concurrent 7
pointing_longitudinal 10
sounds 6
symbolism 5
word_recognition 6
paper.data = paper.data %>%
  left_join(ns) %>%
  filter(n.papers > MIN_NUM_PAPERS) 

n.mas = length(unique(paper.data$short_name))

MAs with 9 or fewer papers are excluded. This leaves us with 8 MAs. Note that we’re losing papers here in two ways - those that don’t have dois (e.g. conference papers, n = approx. 40) and those with dois that are not in web of science (n = approx. 50).

(2) Network data

getNodeData <- function (my.paper.data, 
                          this.analysis,
                          this.network,
                          remove.multiple = TRUE,
                          noloops = TRUE) {
    
    # FOR DEBUGGING
     #my.paper.data = filter(paper.data, short_name == unique(paper.data$short_name)[2])
     #this.analysis = "collaboration"
     #this.network = "authors"
  
    # get web of science data for domain
    this.wos = left_join(my.paper.data, wos, by = c("doi" = "DI")) %>%
             as.data.frame() # necessary for biblioNetwork
    
    #sep = ifelse(this.analysis == "co-citation", ". ", "; ") #bibliometrix_1_5
    sep = ifelse(this.analysis == "co-citation", ". ", ifelse(this.network == "authors", ";", "; ")) #bibliometrix_1_6

    # make the network
    this.net <- biblioNetwork(this.wos, 
                   analysis = this.analysis, 
                   network = this.network, 
                   sep = sep) 
    
    if (length(this.net > 0)) {
      # munge the network
      graph <- graph.adjacency(this.net, mode = "undirected")
      graph <- igraph::simplify(graph, remove.multiple = remove.multiple, 
                          remove.loops = noloops)
      
        # make into df for plotting
      gn = asNetwork(graph)
      

    }
    
    cc <- try(ggnetwork(gn), silent = TRUE)  # gets rid of too-small networks
    
    if(!is(cc,"try-error")) {
      deg.df = data.frame(analysis = this.analysis, network = this.network, 
                          node_name = names(degree(graph)), 
                          degree = degree(graph), 
                          closeness = closeness(graph), row.names = NULL)
       return(deg.df)
    } 
}

Make four different networks out of each MA. Then get individual node degrees and closeness.

analyses_list <- rep(ANALYSES, each = n.mas) 
networks_list <- rep(NETWORKS, each = n.mas)

dfs <- paper.data %>% 
  mutate_each(funs(as.factor)) %>%
  split(.$short_name) %>%
  rep(length(ANALYSES))

args = list(dfs, analyses_list, networks_list)

dfs is a list of dataframes containing length(analyses_list) * length(networks_list) number of copies of each raw meta-analysis data (each row is an effect size in an experiment).

Now, for each MA and network-type we create a network, and return the mean degree and log mean closeness of every node in that network.

node.data = args %>%
  pmap(getNodeData) %>%
  bind_rows(.id = "short_name")

Main Analyses

getNodeMeans <- function(one.paper.set, this.analysis, this.network) {
 mean.degree = node.data %>%
      filter(analysis == this.analysis, network == this.network) %>%
      filter(as.character(node_name) %in% unlist(one.paper.set)) %>%
      summarize(mean.degree = mean(degree, na.rm = TRUE),
                log.mean.closeness = log(mean(closeness))) %>%
      as.numeric()
 
  return(mean.degree)
}

makeNodePlots <- function(d, iv){
  d %>%
    filter(node.measure == iv) %>%
    select(d_calc, residual.d, node.measure, node.value, short_name) %>%
    gather("iv", "iv.value", 1:2) %>%
    na.omit(.) %>%
    ggplot(aes(x = node.value, y = iv.value)) +
    ggtitle(iv)+
    facet_grid(iv ~ short_name , scales = "free") +
    geom_point() +
    geom_smooth(method = "lm", , se = FALSE)
}

co-citation and references

Two articles are linked when both are cited in a third article. This means

wos$CR <- str_replace_all(as.character(wos$CR), "DOI;", "DOI ")
fields =  strsplit(wos$CR, ";")
dois_long = lapply(fields, function(x) {str_replace_all(x, ".* DOI ", "")}) 
dois.clean = lapply(dois_long, function(x) {x[str_detect(x,"/")]})
names(dois.clean) = wos$DI

md.cr.df_all = map(dois.clean, getNodeMeans, 
                    "co-citation", "references") %>%
                bind_rows() %>% 
                mutate(node.measure = c("mean.degree", "log.mean.closeness")) %>%
                gather("doi", "node.value", -node.measure) %>%
                left_join(paper.data)

makeNodePlots(md.cr.df_all, "mean.degree")

makeNodePlots(md.cr.df_all, "log.mean.closeness")

MODELS

get_betas <- function(iv, dv, this.d){
  d <- this.d %>%
       filter(node.measure == iv) %>%
       gather("dv.measure", "dv.value", c(-1:-4, -8)) %>%
       filter(dv.measure == dv) %>%
       filter(!is.na(short_name))
  
  model <- summary(lmer(dv.value ~ node.value + n + (1|short_name), d))
  t.node.measure <- model$coefficients["node.value", "t value"]
  
  data.frame(dv = dv, iv = iv, t = t.node.measure)
}

model.combos = expand.grid(ivs = c("mean.degree", "log.mean.closeness"),  
            dvs = c("d_calc", "residual.d")) 
map2_df(model.combos$ivs, model.combos$dvs, get_betas, md.cr.df_all) %>%
  bind_rows() %>%
  mutate(large = ifelse(abs(t) > 2, "*", "")) %>%
  kable()
dv iv t large
d_calc mean.degree -4.0097252 *
d_calc log.mean.closeness 1.4180078
residual.d mean.degree -2.3031564 *
residual.d log.mean.closeness 0.1550964

collaboration and authors

authors =  strsplit(wos$AU, ";")
names(authors) = wos$DI

md.ca.df_all = map(authors, getNodeMeans, 
               "collaboration", "authors") %>%
                bind_rows() %>% 
                mutate(node.measure = c("mean.degree", "log.mean.closeness")) %>%
                gather("doi", "node.value", -node.measure) %>%
                left_join(paper.data)

makeNodePlots(md.ca.df_all, "mean.degree")

makeNodePlots(md.ca.df_all, "log.mean.closeness")

MODELS

map2_df(model.combos$ivs, model.combos$dvs, get_betas, md.ca.df_all) %>%
  bind_rows() %>%
  mutate(large = ifelse(abs(t) > 2, "*", "")) %>%
  kable()
dv iv t large
d_calc mean.degree 0.3558275
d_calc log.mean.closeness 1.8369987
residual.d mean.degree 0.1318775
residual.d log.mean.closeness 2.1361326 *

coupling and authors

Two articles are said to be bibliographically coupled if at least one author in a cited source appears in the bibliographies of both articles (Kessler, 1963).

md.co.df_all = map(authors, getNodeMeans, 
                "coupling", "authors") %>%
                bind_rows() %>% 
                mutate(node.measure = c("mean.degree", "log.mean.closeness")) %>%
                gather("doi", "node.value", -node.measure) %>%
                left_join(paper.data)

makeNodePlots(md.co.df_all, "mean.degree")

makeNodePlots(md.co.df_all, "log.mean.closeness")

MODELS

map2_df(model.combos$ivs, model.combos$dvs, get_betas, md.co.df_all) %>%
  bind_rows() %>%
  mutate(large = ifelse(abs(t) > 2, "*", "")) %>%
  kable()
dv iv t large
d_calc mean.degree -2.6410704 *
d_calc log.mean.closeness 0.9207871
residual.d mean.degree -1.4190326
residual.d log.mean.closeness 1.7253676

co-occurrences and keywords

keywords =  strsplit(wos$ID, ";")
names(keywords) = wos$DI

md.cok.df_all = map(keywords, getNodeMeans, 
                "co-occurrences", "keywords") %>%
                bind_rows() %>% 
                mutate(node.measure = c("mean.degree", "log.mean.closeness")) %>%
                gather("doi", "node.value", -node.measure) %>%
                left_join(paper.data)

makeNodePlots(md.cok.df_all, "mean.degree")

makeNodePlots(md.cok.df_all, "log.mean.closeness")

MODELS

map2_df(model.combos$ivs, model.combos$dvs, get_betas, md.cok.df_all) %>%
  bind_rows() %>%
  mutate(large = ifelse(abs(t) > 2, "*", "")) %>%
  kable()
dv iv t large
d_calc mean.degree 0.4312527
d_calc log.mean.closeness 4.1688149 *
residual.d mean.degree 0.7054020
residual.d log.mean.closeness 2.0978467 *

Summary

Closesness: “Closeness lies in the interval [0,1]: nodes with closeness approaching 1 are nodes with short distance from the other nodes, while nodes with low closeness are distant from the other nodes. For instance, if a node is directly connected to each other node, then its closeness is 1, while an isolated node has closeness equal to 1/n.”

  • mean degree is negatively related to d for citations and coreferences (both d and red d) - citations more networked, smaller effect size
  • closeness is postively related to d for collaboration and authors (just resid) - authors more central, bigger effect size
  • mean degree is negatively related to d for coupling and authors (just d) - if authors co-occur in bib, smaller effect size
  • closeness is both positively related to d for keywords (both d and resid d) - if keywords of papers are central big effect size