Metalab is a dataset of effect sizes across 13 different phenomenon in language acquistion experimental research (n > 850 effect sizes). Here, I ask whether properties of the commmunity structure of a literature predictive of measures of bias in that literature.
Summary of key findings:
We merge the metalab dataset with the web of science database based on dois. MAs with 5 or fewer papers are excluded. This leaves us with 12 MAs. Note that we’re losing papers here in two ways - those that don’t have dois (e.g. conference papers, n = approx. 40) and those with dois that are not in web of science (n = approx. 50).
We examine four different network-types using the web of science data:
Then for each MA (n = 12), we create a network of each of the four network-types. For each of those networks (N = 48), we quantify five properties of the network:
Plotted below are the network metrics for each of the 48 networks.
# Define network function.
getGraph <- function (my.paper.data,
this.analysis,
this.network,
type,
remove.multiple = FALSE,
noloops = FALSE) {
# note: when remove.multiple = TRUE, clustering functions fails on some networks
# get web of science data for domain
this.wos = left_join(my.paper.data, wos, by = c("doi" = "DI")) %>%
as.data.frame() # necessary for biblioNetwork
sep = ifelse(this.analysis == "co-citation", ". ",
ifelse(this.network == "authors", ";", "; ")) # bibliometrix_1_6
# make the network
this.net <- biblioNetwork(this.wos,
analysis = this.analysis,
network = this.network,
sep = sep)
# get network into useable form
if (length(this.net > 0)) {
# munge the network
graph <- graph.adjacency(this.net, mode = "undirected")
graph <- igraph::simplify(graph, remove.multiple = remove.multiple,
remove.loops = noloops)
if (type == "plot"){
gn = ggnetwork(asNetwork(graph)) # make into df for tidy plotting
} else if (type == "stats"){
clustering = cluster_leading_eigen(graph, options = list(maxiter = 1000000))
closeness = mean(estimate_closeness(graph, cutoff = 100))
betweeness = mean(estimate_betweenness(graph, cutoff = 100))
degree = mean(igraph::degree(graph))
}
}
# return stuff
if (type == "plot"){
cc <- try(gn, silent = TRUE) # gets rid of too-small networks
if(!is(cc,"try-error")) {
return(gn)
}
} else if (type == "stats") {
data.frame(analysis = this.analysis,
network = this.network,
Q = round(clustering$modularity, 4),
n.groups = round(length(clustering), 4),
closeness = round(closeness, 4),
betweeness = round(betweeness, 4),
degree = mean(degree))
}
}# Make all networks and get network measures
analyses_list <- rep(ANALYSES, each = n.mas)
networks_list <- rep(NETWORKS, each = n.mas)
dfs <- paper.data %>%
mutate_each(funs(as.factor)) %>%
split(.$short_name) %>%
rep(length(ANALYSES))
args = list(dfs, analyses_list, networks_list)
modularity.data = args %>%
pmap(getGraph, "stats") %>%
bind_rows(.id = "short_name")
modularity.data.long = modularity.data %>%
mutate(closeness = log(closeness),
betweeness = log(betweeness),
degree = log(degree)) %>%
gather("network.measure", "network.value", 4:8) %>%
unite(network, analysis, network, sep = ".")ggplot(modularity.data.long, aes(x = short_name,
y = network.value)) +
geom_bar(stat = "identity", aes(fill = short_name)) +
facet_wrap(network.measure ~ network,
ncol = 4, scales = "free") +
ggtitle("Network summary statistics") +
theme_bw() +
theme(axis.text.x = element_blank()) For each meta-analysis, we examine three aspects of the literature:
Plotted below are the bias estimates for each of the literatures in our dataset.
all_mas = read.csv("ES_data_for_networks2.csv") # overall ES data
ma.es = all_mas %>%
select(short_name, overall.d.age,
fsn_string, tau2 ) %>%
mutate(overall.d.age = abs(overall.d.age),
tau2 = log(tau2)) %>%
rename(fail_safe_n = fsn_string) %>%
gather("bias.measure", "bias.value", 2:4) ma.es %>%
filter(short_name %in% modularity.data.long$short_name) %>%
ggplot(aes(x = short_name, y = bias.value)) +
geom_bar(stat = "identity", aes(fill = short_name)) +
facet_wrap(~bias.measure, scales = "free") +
ggtitle("Bias summary statistics") +
theme_bw() +
theme(axis.text.x = element_blank()) Given properties of the networks and estimates of the scientific bias in each literature, we ask whether these two measures are correlated. Specifically, we predict the bias value (e.g. fail-safe-n) with the network value (e.g. modularity), controlling for the number of papers in the MA, and weighting the MAs by the number of studies in the literature. We fit a seperate model for each bias-network pair, for each network type (e.g. co-citation references).
net.es = left_join(modularity.data.long, ma.es) %>%
dplyr::as_data_frame() %>%
left_join(ns)
corr.sigs = net.es %>%
filter(is.finite(network.value) & is.finite(bias.value)) %>%
group_by(network, bias.measure, network.measure) %>%
do(tidy(lm(bias.value ~ n.papers + network.value, weights = .$n.papers, data=.))) %>%
filter(term == "network.value") %>%
mutate(sig.col = ifelse(p.value < .05, "sig", "nsig")) %>%
select(estimate, sig.col) %>%
mutate(network.value = Inf, bias.value = Inf) # this is a hack# Define plotting functions
network_plot <- function(d, analysis, network){
networkname <- paste(analysis, network, sep = ".")
d %>%
split(.$short_name) %>%
map(getGraph, analysis, network, "plot") %>%
bind_rows(.id = "short_name") %>%
ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "grey50") +
facet_wrap(~ short_name) +
geom_nodes(aes(color = short_name)) +
ggtitle(networkname) +
theme_blank() +
theme(legend.position = "none")
}
corr_plot <- function(d, corrs, analysis, network){
networkname <- paste(analysis, network, sep = ".")
ggplot(filter(d, network == networkname),
aes(x = network.value, y = bias.value)) +
geom_rect(data = filter(corrs, network == networkname),
aes(fill = sig.col),
xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = Inf, alpha = 0.2) +
geom_point(aes(size = n.papers, color = short_name)) +
geom_smooth(method= "lm", color = "black") +
ggtitle(networkname)+
facet_grid(bias.measure ~ network.measure, scales = "free") +
scale_fill_manual(values = c( "grey99", "red1")) +
theme_bw() +
theme(legend.position = "none")
}p <- 3
network_plot(paper.data, ANALYSES[p], NETWORKS[p])corr_plot(net.es, corr.sigs, ANALYSES[p], NETWORKS[p])p <- 4
network_plot(paper.data, ANALYSES[p], NETWORKS[p])corr_plot(net.es, corr.sigs, ANALYSES[p], NETWORKS[p])