library(tidyverse)
library(tosr)
library(bibliometrix)
library(lubridate)
library(igraph)
library(tidytext)
library(wordcloud)
library(rebus)
library(ggrepel) # improve donut visualization
This template is based in this paper
https://revistas.ucm.es/index.php/REVE/article/view/75566/4564456557467
For a detail explanation of how to use it, please watch this video
wos_scopus_tos <-
tosr::tosr_load("scopus_277.bib", # Create data from searches
"wos_savedrecs_18.txt")
[1] 2
Converting your scopus collection into a bibliographic dataframe
Done!
Generating affiliation field tag AU_UN from C1: Done!
Converting your wos collection into a bibliographic dataframe
Done!
Generating affiliation field tag AU_UN from C1: Done!
12 duplicated documents have been removed
tree_of_science <-
tosr::tosR("scopus_277.bib", # Create tree of science
"wos_savedrecs_18.txt")
[1] 2
Converting your scopus collection into a bibliographic dataframe
Done!
Generating affiliation field tag AU_UN from C1: Done!
Converting your wos collection into a bibliographic dataframe
Done!
Generating affiliation field tag AU_UN from C1: Done!
12 duplicated documents have been removed
Computing TOS SAP
Computing TOS subfields
wos <-
bibliometrix::convert2df(c("wos_savedrecs_18.txt")) # create data frame from wos file
Converting your wos collection into a bibliographic dataframe
Done!
Generating affiliation field tag AU_UN from C1: Done!
scopus <-
bibliometrix::convert2df("scopus_277.bib", # Create dataframe from scopus file
dbsource = "scopus",
format = "bibtex")
Converting your scopus collection into a bibliographic dataframe
Done!
Generating affiliation field tag AU_UN from C1: Done!
table_1 <-
tibble(wos = length(wos$SR), # Create a dataframe with the values.
scopus = length(scopus$SR),
total = length(wos_scopus_tos$df$SR))
table_1
main_languages <-
wos_scopus_tos$df |>
select(LA) |>
separate_rows(LA, sep = "; ") |>
count(LA, sort = TRUE) |>
slice(1:5)
other_languages <-
wos_scopus_tos$df |>
separate_rows(LA, sep = "; ") |>
select(LA) |>
count(LA, sort = TRUE) |>
slice(6:n) |>
summarise(n = sum(n)) |>
mutate(LA = "OTHERS") |>
select(LA, n)
Warning in 6:n :
numerical expression has 5 elements: only the first used
languages <-
main_languages |>
bind_rows(other_languages) |>
mutate(percentage = n / sum(n),
percentage = round(percentage,
digits = 2) ) |>
rename(language = LA) |>
select(language, percentage, count = n)
languages
df <- languages |>
rename(value = percentage, group = language) |>
mutate(value = value * 100) |>
select(value, group)
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
ggplot(df, aes(x = 2 , y = value, fill = fct_inorder(group))) +
geom_col(width = 1, color = 1) +
coord_polar(theta = "y") +
geom_label_repel(data = df2,
aes(y = pos, label = paste0(value, "%")),
size = 4.5, nudge_x = 1, show.legend = FALSE) +
theme(panel.background = element_blank(),
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, size = 18)) +
labs(title = "Languages") +
guides(fill = guide_legend(title = "")) +
theme_void() +
xlim(0.5, 2.5)
wos_anual_production <-
wos |>
select(PY) |>
count(PY, sort = TRUE) |>
na.omit() |>
filter(PY >= 2000,
PY < year(today())) |>
mutate(ref_type = "wos")
scopus_anual_production <-
scopus |>
select(PY) |>
count(PY, sort = TRUE) |>
na.omit() |>
filter(PY >= 2000,
PY < year(today())) |>
mutate(ref_type = "scopus")
total_anual_production <-
wos_scopus_tos$df |>
select(PY) |>
count(PY, sort = TRUE) |>
na.omit() |>
filter(PY >= 2000,
PY < year(today())) |>
mutate(ref_type = "total")
wos_scopus_total_annual_production <-
wos_anual_production |>
bind_rows(scopus_anual_production,
total_anual_production)
figure_2_data <-
wos_scopus_total_annual_production |>
mutate(PY = replace_na(PY, replace = 0)) |>
pivot_wider(names_from = ref_type,
values_from = n) |>
arrange(desc(PY))
figure_2_data
wos_scopus_total_annual_production |>
ggplot(aes(x = PY, y = n, color = ref_type)) +
geom_line() +
labs(title = "Annual Scientific Production",
x = "years",
y = "papers") +
theme(plot.title = element_text(hjust = 0.5))
data_biblio_wos <- biblioAnalysis(wos)
wos_country <-
data_biblio_wos$Countries |>
data.frame() |>
mutate(database = "wos") |>
select(country = Tab, papers = Freq, database ) |>
arrange(desc(papers))
data_biblio_scopus <- biblioAnalysis(scopus)
scopus_country <-
data_biblio_scopus$Countries |>
data.frame() |>
mutate(database = "scopus") |>
select(country = Tab, papers = Freq, database ) |>
arrange(desc(papers))
data_biblio_total <- biblioAnalysis(wos_scopus_tos$df)
total_country <-
data_biblio_total$Countries |>
data.frame() |>
mutate(database = "total") |>
select(country = Tab, papers = Freq, database ) |>
arrange(desc(papers))
wos_scopus_total_country <-
wos_country |>
bind_rows(scopus_country,
total_country) |>
mutate(country = as.character(country)) |>
pivot_wider(names_from = database,
values_from = papers) |>
arrange(desc(total)) |>
slice(1:10) |>
mutate(percentage = total / (table_1 |> pull(total)),
percentage = round(percentage, digits = 2))
wos_scopus_total_country
wos_journal <-
wos |>
filter(str_detect(DT, "ARTICLE")) |>
select(journal = SO) |>
na.omit() |>
count(journal, sort = TRUE) |>
slice(1:20) |>
rename(publications = n) |>
mutate(database = "wos")
scopus_journal <-
scopus |>
filter(str_detect(DT, "ARTICLE")) |>
select(journal = SO) |>
na.omit() |>
count(journal, sort = TRUE) |>
slice(1:20) |>
rename(publications = n) |>
mutate(database = "scopus")
total_journal <-
wos_scopus_tos$df |>
filter(str_detect(DT, "ARTICLE")) |>
select(journal = SO) |>
na.omit() |>
count(journal, sort = TRUE) |>
slice(1:20) |>
rename(publications = n) |>
mutate(database = "total")
wos_scopus_total_journal <-
wos_journal |>
bind_rows(scopus_journal,
total_journal) |>
pivot_wider(names_from = database,
values_from = publications) |>
arrange(desc(total)) |>
slice(1:10) |>
mutate(percentage = total / table_1 |> pull(total),
percentage = round(percentage, digits = 2))
wos_scopus_total_journal
wos_scopus_country_collab_matrix <-
biblioNetwork(M = wos_scopus_tos$df,
analysis = "collaboration",
network = "countries")
plot_country_collab <-
networkPlot(wos_scopus_country_collab_matrix,
weighted=T, n = 30,
Title = "Country Collaboration Network",
type = "fruchterman",
size=T,
edgesize = 5,
labelsize=0.7)
wos_scopus_keyword_co_occurrence_matrix <-
biblioNetwork(M = wos_scopus_tos$df,
analysis = "co-occurrences",
network = "keywords",
sep = ";")
plot_net_co_occurrence <-
networkPlot(wos_scopus_keyword_co_occurrence_matrix,
weighted=T, n = 30,
Title = "Keyword Co-occurrence Network",
type = "fruchterman",
size=T,
edgesize = 5,
labelsize=0.7)
tree_of_science
Finding the clusters
nodes <- # Create a dataframe with the fullname of articles
tibble(name = V(wos_scopus_tos$graph)$name) |>
left_join(wos_scopus_tos$nodes,
by = c("name" = "ID_TOS"))
wos_scopus_citation_network_1 <- # Add the article names to the citation network
wos_scopus_tos$graph |>
igraph::set.vertex.attribute(name = "full_name",
index = V(wos_scopus_tos$graph)$name,
value = nodes$CITE)
nodes_1 <- # Create a dataframe with subfields (clusters)
tibble(name = V(wos_scopus_citation_network_1)$name,
cluster = V(wos_scopus_citation_network_1)$subfield,
full_name = V(wos_scopus_citation_network_1)$full_name)
nodes_2 <- # Count the number of articles per cluster
nodes_1 |>
count(cluster, sort = TRUE) |>
mutate(cluster_1 = row_number()) |>
select(cluster, cluster_1)
nodes_3 <-
nodes_1 |>
left_join(nodes_2) |>
rename(subfield = cluster_1) |>
select(name, full_name, subfield)
Joining, by = "cluster"
edge_list <-
get.edgelist(wos_scopus_citation_network_1) |>
data.frame() |>
rename(Source = X1, Target = X2)
wos_scopus_citation_network <-
graph.data.frame(d = edge_list,
directed = TRUE,
vertices = nodes_3)
wos_scopus_citation_network |>
summary()
IGRAPH 4500b06 DN-- 2963 7642 --
+ attr: name (v/c), full_name (v/c), subfield (v/n)
Choosing clusters
We proposed the tipping point option to choose the number of clusters. See this paper:
https://www.nature.com/articles/s41598-021-85041-8
clusters <-
tibble(cluster = V(wos_scopus_citation_network)$subfield) |>
count(cluster, sort = TRUE)
clusters |>
ggplot(aes(x = reorder(cluster, n), y = n)) +
geom_point()
Removing not chosen clusters
wos_scopus_citation_network_clusters <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 1 & # filter clusters
V(wos_scopus_citation_network)$subfield != 2 &
V(wos_scopus_citation_network)$subfield != 3 &
V(wos_scopus_citation_network)$subfield != 4))
wos_scopus_citation_network_clusters |>
summary()
IGRAPH 7df63b6 DN-- 1962 4612 --
+ attr: name (v/c), full_name (v/c), subfield (v/n)
pal <- brewer.pal(8,"Dark2")
nodes_full_data <-
tibble(name = V(wos_scopus_citation_network)$name,
cluster = V(wos_scopus_citation_network)$subfield,
full_name = V(wos_scopus_citation_network)$full_name)
cluster_1 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 1))
cluster_1_page_rank <-
cluster_1 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_1)$vector)
cluster_1_df <-
tibble(name = V(cluster_1_page_rank)$name,
full_name = V(cluster_1_page_rank)$full_name,
page_rank = V(cluster_1_page_rank)$page_rank,
cluster = V(cluster_1_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 1) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |> # Tokenization
anti_join(stop_words) |> # Removing stop words
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"), # Words removed
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
Joining, by = "word"
cluster_2 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 2))
cluster_2_page_rank <-
cluster_2 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_2)$vector)
cluster_2_df <-
tibble(name = V(cluster_2_page_rank)$name,
full_name = V(cluster_2_page_rank)$full_name,
page_rank = V(cluster_2_page_rank)$page_rank,
cluster = V(cluster_2_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 2) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |>
anti_join(stop_words) |>
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"),
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
Joining, by = "word"
cluster_3 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 3))
cluster_3_page_rank <-
cluster_3 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_3)$vector)
cluster_3_df <-
tibble(name = V(cluster_3_page_rank)$name,
full_name = V(cluster_3_page_rank)$full_name,
page_rank = V(cluster_3_page_rank)$page_rank,
cluster = V(cluster_3_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 3) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |>
anti_join(stop_words) |>
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"),
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
Joining, by = "word"
cluster_4 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 4))
cluster_4_page_rank <-
cluster_4 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_4)$vector)
cluster_4_df <-
tibble(name = V(cluster_4_page_rank)$name,
full_name = V(cluster_4_page_rank)$full_name,
page_rank = V(cluster_4_page_rank)$page_rank,
cluster = V(cluster_4_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 4) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |>
anti_join(stop_words) |>
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"),
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
Joining, by = "word"
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
competitive could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
entrepreneurial could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
entrepreneurship could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
perspective could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
framework could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
knowledge could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
development could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
practice could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
organization could not be fit on page. It will not be plotted.
Warning in wordcloud(word, n, random.order = FALSE, max.words = 50, colors = pal) :
technology could not be fit on page. It will not be plotted.
write_csv(table_1, "table_1.csv") # Exporting table 1
write_csv(wos_scopus_total_country, "table_2_.csv") # Exporting table 2
write_csv(wos_scopus_authors, "table_3.csv") # Exporting table 3
write_csv(wos_scopus_total_journal, "table_4.csv") # Exporting table 4
write_csv(languages, "figure_1.csv") # Exporting data figure 1
write_csv(figure_2_data, "figure_2.csv") # Exporting data figure 2
write.graph(wos_scopus_citation_network, "citation_network_full.graphml", "graphml") # Exporting graph
write.graph(wos_scopus_citation_network_clusters,
"wos_scopus_citation_network_clusters.graphml",
"graphml")
write.csv(tree_of_science, "tree_of_science.csv") # Exporting Tree of Science
write.csv(cluster_1_df, "cluster_1.csv") # Exporting cluster 1
write.csv(cluster_2_df, "cluster_2.csv") # Exporting cluster 2
write.csv(cluster_3_df, "cluster_3.csv") # Exporting cluster 3
write.csv(cluster_4_df, "cluster_4.csv") # Exporting cluster 4
write.csv(nodes_full_data, "nodes_full_data.csv") # Exporting all nodes