Get dois (Obtained from: https://apps.crossref.org/SimpleTextQuery/ (copy-pasting 50 at a time))
dois <- read.csv("dois2.csv") %>%
select(study_ID, doi) %>%
filter(doi != "" & !is.na(doi)) %>%
mutate(doi = ifelse(grepl(".org/", doi),
unlist(lapply(str_split(doi, ".org/"),
function(x) {x[2]})), as.character(doi)))Scrape data from web of science at paper level using bibliometrix package
string = ""
for (i in 1:length(dois$doi)) {
string = paste(string, dois$doi[i], sep = " OR ")
}
## search web of science-> save to marked list -> download marked list as .txt
D <- readFiles("savedrecs (3).txt")
M <- convert2df(D, dbsource = "isi", format = "plaintext") %>%
mutate(DI = tolower(DI),
DI = str_replace(DI, "//", "/")) %>%
arrange(DI) %>%
filter(!is.na(DI)) %>%
mutate_each(funs(as.factor), -AU, -DE, -ID)## Articles extracted 100
## Articles extracted 163
M <- metaTagExtraction(M, Field = "AU_CO", sep = ";")Get ES and ns at paper level
paper.data = all_data %>%
inner_join(dois, by="study_ID") %>%
mutate(doi = tolower(doi)) %>%
group_by(doi) %>%
summarize(d_calc = mean(d_calc, na.rm = T),
short_name = short_name[1],
n = sum(n_1, na.rm = T) + sum(n_2, na.rm = T))
# get web of science data for domain
ns = paper.data %>%
inner_join(M, by = c("doi" = "DI")) %>%
group_by(short_name) %>%
summarize(n.papers = n())
kable(ns)| short_name | n.papers |
|---|---|
| catBias | 7 |
| gaze_following | 12 |
| idspref | 13 |
| inphondb-native | 23 |
| inphondb-nonnative | 4 |
| inworddb | 49 |
| labadv | 13 |
| mutex | 16 |
| phonotactics | 2 |
| pointing_concurrent | 4 |
| pointing_longitudinal | 1 |
| sounds | 6 |
| symbolism | 5 |
| word_recognition | 6 |
paper.data = paper.data %>%
left_join(ns) %>%
filter(n.papers > 2) %>%
filter(short_name != "pointing_longitudinal")
n.mas = length(unique(paper.data$short_name))getGraphData <- function (my.paper.data,
this.analysis,
this.network,
remove.multiple = FALSE,
remove.isolates = FALSE,
noloops = FALSE) {
# FOR DEBUGGING
#my.paper.data = filter(paper.data, short_name == unique(paper.data$short_name)[2])
# this.analysis = "co-occurrences"
# this.network = "keywords"
# get web of science data for domain
my.M = left_join(my.paper.data, M, by = c("doi" = "DI")) %>%
as.data.frame() # necessary for biblioNetwork
SEP = ifelse(this.analysis == "co-citation", ". ", "; ")
# make the network
this.net <- biblioNetwork(my.M,
analysis = this.analysis,
network = this.network,
sep = SEP)
if (length(this.net > 0)) {
# munge the network
bsk.network <- graph.adjacency(this.net, mode = "undirected")
V(bsk.network)$id <- colnames(this.net)
deg <- igraph::degree(bsk.network, mode = "all")
V(bsk.network)$size = rep(5, length(V(bsk.network)))
n = dim(this.net)[1]
NetDegree <- unname(sort(deg, decreasing = TRUE)[n])
bsk.network <- igraph::delete.vertices(bsk.network,
which(igraph::degree(bsk.network) <
NetDegree))
bsk.network <- igraph::simplify(bsk.network,
remove.multiple = remove.multiple,
remove.loops = noloops)
if (isTRUE(remove.isolates)) {
bsk.network <- igraph::delete.isolates(bsk.network, mode = "in")
}
# make into df for plotting
gn = asNetwork(bsk.network)
}
cc <- try(ggnetwork(gn), silent = TRUE) # gets rid of too-small networks
if(!is(cc,"try-error")) {
return(ggnetwork(gn))
}
}getModularity <- function (my.paper.data,
this.analysis,
this.network,
remove.multiple = FALSE,
remove.isolates = FALSE,
noloops = FALSE) {
# get web of science data for domain
my.M = left_join(my.paper.data, M, by = c("doi" = "DI")) %>%
as.data.frame() # necessary for biblioNetwork
SEP = ifelse(this.analysis == "co-citation", ". ", "; ")
# make the network
this.net <- biblioNetwork(my.M,
analysis = this.analysis,
network = this.network,
sep = SEP)
if (length(this.net > 0)) {
# munge the network
graph <- graph.adjacency(this.net, mode = "undirected")
clustering = cluster_leading_eigen(graph,
options = list(maxiter=1000000))
closeness = mean(estimate_closeness(graph, cutoff= 100))
betweeness = mean(estimate_betweenness(graph, cutoff= 100))
degree = mean(igraph::degree(graph))
data.frame(analysis = this.analysis,
network = this.network,
Q = round(clustering$modularity,4),
n.groups = round(length(clustering),4),
closeness = round(closeness, 4),
betweeness = round(betweeness, 4),
degree = mean(degree))
}
}
# get network measures for all networks
analyses <- rep(c("co-citation", "coupling", "co-occurrences", "collaboration"), each = n.mas)
networks <- rep(c("references", "authors", "keywords", "authors"), each = n.mas)
dfs <- paper.data %>%
mutate_each(funs(as.factor)) %>%
split(.$short_name) %>%
rep(4)
args = list(dfs, analyses, networks)
modularity.data = args %>%
pmap(getModularity) %>%
bind_rows(.id = "short_name")
modularity.data.long = modularity.data %>%
mutate(closeness = log(closeness),
betweeness = log(betweeness),
degree = log(degree)) %>%
gather("network.measure", "network.value", 4:8) %>%
unite(network, analysis, network, sep = ".")
ggplot(modularity.data.long, aes(x = short_name, y = network.value)) +
geom_bar(stat = "identity", aes(fill = short_name)) +
facet_wrap(network.measure ~ network,
ncol = 4, scales = "free") +
theme_bw() +
theme(axis.text.x = element_blank()) 
ma.es = read.csv("ES_data_for_networks.csv") %>%
# mutate(d_var = ci_upper-ci_lower)%>%
select(short_name, overall.d, overall.d.age,
fsn_string, egg.random.z, stouffer.Z.pp) %>%
gather("bias.measure", "bias.value", 2:6)
#filter(bias.measure %in% c("egg.random.z", "fsn_string"))Get model fits
d = left_join(modularity.data.long, ma.es) %>%
dplyr::as_data_frame() %>%
left_join(ns)
corrs = d %>%
filter(is.finite(network.value) & is.finite(bias.value)) %>%
group_by(network, bias.measure, network.measure) %>%
#do(tidy(cor.test(.$bias.value, .$value, na.action = "na.pass"))) %>%
do(tidy(lm(bias.value ~ n.papers + network.value, data=.))) %>%
filter(term == "network.value") %>%
mutate(sig.col = ifelse(p.value < .05, "sig", "nsig")) %>%
select(estimate, sig.col) %>%
mutate(network.value = Inf, bias.value = Inf) %>% # this is a hack
as.data.frame()ANALYSIS <- "co-citation"
paper.data %>%
split(.$short_name) %>%
map(getGraphData, ANALYSIS, "references") %>%
bind_rows(.id = "short_name") %>%
ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "grey50") +
facet_wrap(~short_name) +
geom_nodes(aes(color = short_name)) +
ggtitle("references") +
theme_blank() +
theme(legend.position = "none")
ggplot(filter(d, network == "co-citation.references"),
aes(x = network.value, y = bias.value)) +
geom_point(aes(size = n.papers, color = short_name)) +
geom_smooth(method= "lm") +
ggtitle("co-citation.references") +
facet_grid(bias.measure ~ network.measure, scales = "free") +
geom_rect(data = filter(corrs, network == "co-citation.references"),
aes(fill = sig.col),
xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = Inf, alpha = 0.2) +
scale_fill_manual(values = c( "grey99", "red1")) +
theme_bw() +
theme(legend.position = "none")
## Coupling references{.tabset}
### Networks
ANALYSIS <- "coupling"
paper.data %>%
split(.$short_name) %>%
map(getGraphData, ANALYSIS, "references") %>%
bind_rows(.id = "short_name") %>%
ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "grey50") +
facet_wrap(~short_name) +
geom_nodes(aes(color = short_name)) +
ggtitle("references") +
theme_blank() +
theme(legend.position = "none")ANALYSIS <- "co-occurrences"
paper.data %>%
split(.$short_name) %>%
map(getGraphData, ANALYSIS, "keywords") %>%
bind_rows(.id = "short_name") %>%
ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "grey50") +
facet_wrap(~short_name) +
ggtitle("keywords") +
geom_nodes(aes(color = short_name)) +
theme_blank() +
theme(legend.position = "none")
ggplot(filter(d, network == "co-occurrences.keywords"),
aes(x = network.value, y = bias.value)) +
geom_point(aes(size = n.papers, color = short_name)) +
geom_smooth(method= "lm") +
ggtitle("co-occurrences.keywords") +
facet_grid(bias.measure ~ network.measure, scales = "free") +
geom_rect(data = filter(corrs, network == "co-occurrences.keywords"),
aes(fill = sig.col),
xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = Inf, alpha = 0.2) +
scale_fill_manual(values = c( "grey99", "red1")) +
theme_bw() +
theme(legend.position = "none")
Paper level analyses
getModularity_paper <- function (my.paper.data,
this.analysis,
this.network,
remove.multiple = FALSE,
remove.isolates = FALSE,
noloops = FALSE) {
# get web of science data for domain
my.M = left_join(my.paper.data, M, by = c("doi" = "DI")) %>%
as.data.frame() # necessary for biblioNetwork
SEP = ifelse(this.analysis == "co-citation", ". ", "; ")
# make the network
this.net <- biblioNetwork(my.M,
analysis = this.analysis,
network = this.network,
sep = SEP)
if (length(this.net > 0)) {
# munge the network
graph <- graph.adjacency(this.net, mode = "undirected")
data.frame(analysis = this.analysis,
network = this.network,
closeness = estimate_closeness(graph, cutoff= 100),
betweeness = estimate_betweenness(graph, cutoff= 100),
degree = igraph::degree(graph))
}
}
dfs <- paper.data %>%
mutate_each(funs(as.factor)) %>%
split(.$short_name) %>%
rep(4)
analyses <- rep(c("co-citation", "coupling", "co-occurrences", "collaboration"), each = 14)
networks <- rep(c("references", "authors", "keywords", "authors"), each = 14)
args = list(dfs, analyses, networks)
modularity.data = args %>%
pmap(getModularity_paper) %>%
filter(analysis == "co-citation") %>%
bind_rows(.id = "short_name")k = M %>%
left_join(paper.data, by=c("DI"= "doi")) %>%
split(.$short_name) %>%
map(biblioAnalysis) %>%
bind_rows()
bibliometrix::summary(k, pause = FALSE)
S = summary(object = m, k = 10, pause = FALSE)TO DO: