library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tosr)
library(bibliometrix)
## To cite bibliometrix in publications, please use:
##
## Aria, M. & Cuccurullo, C. (2017) bibliometrix: An R-tool for comprehensive science mapping analysis,
## Journal of Informetrics, 11(4), pp 959-975, Elsevier.
##
##
## https://www.bibliometrix.org
##
##
## For information and bug reports:
## - Send an email to info@bibliometrix.org
## - Write a post on https://github.com/massimoaria/bibliometrix/issues
##
## Help us to keep Bibliometrix free to download and use by contributing with a small donation to support our research team (https://bibliometrix.org/donate.html)
##
##
## To start with the shiny web-interface, please digit:
## biblioshiny()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:lubridate':
##
## %--%, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(tidytext)
library(wordcloud)
## Loading required package: RColorBrewer
library(rebus)
##
## Attaching package: 'rebus'
## The following objects are masked from 'package:igraph':
##
## %c%, graph
## The following object is masked from 'package:stringr':
##
## regex
## The following object is masked from 'package:ggplot2':
##
## alpha
library(ggrepel) # improve donut visualization
This template is based in this paper
https://revistas.ucm.es/index.php/REVE/article/view/75566/4564456557467
For a detail explanation of how to use it, please watch this video
wos_scopus_tos <-
tosr::tosr_load("savedrecs.txt",
"scopus.bib") # Create data from searches)
## [1] 2
##
## Converting your wos collection into a bibliographic dataframe
##
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
##
##
## Converting your scopus collection into a bibliographic dataframe
##
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
##
##
## 170 duplicated documents have been removed
tree_of_science <-
tosr::tosR("savedrecs.txt",
"scopus.bib") # Create data from searches
## [1] 2
##
## Converting your wos collection into a bibliographic dataframe
##
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
##
##
## Converting your scopus collection into a bibliographic dataframe
##
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
##
##
## 170 duplicated documents have been removed
## Computing TOS SAP
## Computing TOS subfields
wos <-
bibliometrix::convert2df("savedrecs.bib", dbsource="wos", format="bibtex")
##
## Converting your wos collection into a bibliographic dataframe
##
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
scopus <-
bibliometrix::convert2df("scopus.bib", # Create dataframe from scopus file
dbsource = "scopus",
format = "bibtex")
##
## Converting your scopus collection into a bibliographic dataframe
##
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
table_1 <-
tibble(wos = length(wos$SR), # Create a dataframe with the values.
scopus = length(scopus$SR),
total = length(wos_scopus_tos$df$SR))
table_1
## # A tibble: 1 × 3
## wos scopus total
## <int> <int> <int>
## 1 339 856 1025
main_languages <-
wos_scopus_tos$df |>
select(LA) |>
separate_rows(LA, sep = "; ") |>
count(LA, sort = TRUE) |>
slice(1:5)
other_languages <-
wos_scopus_tos$df |>
separate_rows(LA, sep = "; ") |>
select(LA) |>
count(LA, sort = TRUE) |>
slice(6:n) |>
summarise(n = sum(n)) |>
mutate(LA = "OTHERS") |>
select(LA, n)
## Warning in 6:n: numerical expression has 19 elements: only the first used
languages <-
main_languages |>
bind_rows(other_languages) |>
mutate(percentage = n / sum(n),
percentage = round(percentage,
digits = 2) ) |>
rename(language = LA) |>
select(language, percentage, count = n)
languages
## # A tibble: 6 × 3
## language percentage count
## <chr> <dbl> <int>
## 1 ENGLISH 0.85 880
## 2 RUSSIAN 0.04 41
## 3 <NA> 0.03 30
## 4 GERMAN 0.02 25
## 5 FRENCH 0.02 18
## 6 OTHERS 0.04 38
df <- languages |>
rename(value = percentage, group = language) |>
mutate(value = value * 100) |>
select(value, group)
df2 <- df %>%
mutate(csum = rev(cumsum(rev(value))),
pos = value/2 + lead(csum, 1),
pos = if_else(is.na(pos), value/2, pos))
ggplot(df, aes(x = 2 , y = value, fill = fct_inorder(group))) +
geom_col(width = 1, color = 1) +
coord_polar(theta = "y") +
geom_label_repel(data = df2,
aes(y = pos, label = paste0(value, "%")),
size = 4.5, nudge_x = 1, show.legend = FALSE) +
theme(panel.background = element_blank(),
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, size = 18)) +
labs(title = "Languages") +
guides(fill = guide_legend(title = "")) +
theme_void() +
xlim(0.5, 2.5)
wos_anual_production <-
wos |>
select(PY) |>
count(PY, sort = TRUE) |>
na.omit() |>
filter(PY >= 2000,
PY < year(today())) |>
mutate(ref_type = "wos")
scopus_anual_production <-
scopus |>
select(PY) |>
count(PY, sort = TRUE) |>
na.omit() |>
filter(PY >= 2000,
PY < year(today())) |>
mutate(ref_type = "scopus")
total_anual_production <-
wos_scopus_tos$df |>
select(PY) |>
count(PY, sort = TRUE) |>
na.omit() |>
filter(PY >= 2000,
PY < year(today())) |>
mutate(ref_type = "total")
wos_scopus_total_annual_production <-
wos_anual_production |>
bind_rows(scopus_anual_production,
total_anual_production)
figure_2_data <-
wos_scopus_total_annual_production |>
mutate(PY = replace_na(PY, replace = 0)) |>
pivot_wider(names_from = ref_type,
values_from = n) |>
arrange(desc(PY))
figure_2_data
## # A tibble: 21 × 4
## PY wos scopus total
## <dbl> <int> <int> <int>
## 1 2020 37 49 70
## 2 2019 39 67 85
## 3 2018 27 27 44
## 4 2017 28 44 59
## 5 2016 18 27 36
## 6 2015 26 37 51
## 7 2014 13 28 36
## 8 2013 11 36 43
## 9 2012 12 29 36
## 10 2011 10 25 29
## # … with 11 more rows
wos_scopus_total_annual_production |>
ggplot(aes(x = PY, y = n, color = ref_type)) +
geom_line() +
labs(title = "Annual Scientific Production",
x = "years",
y = "papers") +
theme(plot.title = element_text(hjust = 0.5))
data_biblio_wos <- biblioAnalysis(wos)
wos_country <-
data_biblio_wos$Countries |>
data.frame() |>
mutate(database = "wos") |>
select(country = Tab, papers = Freq, database ) |>
arrange(desc(papers))
data_biblio_scopus <- biblioAnalysis(scopus)
scopus_country <-
data_biblio_scopus$Countries |>
data.frame() |>
mutate(database = "scopus") |>
select(country = Tab, papers = Freq, database ) |>
arrange(desc(papers))
data_biblio_total <- biblioAnalysis(wos_scopus_tos$df)
total_country <-
data_biblio_total$Countries |>
data.frame() |>
mutate(database = "total") |>
select(country = Tab, papers = Freq, database ) |>
arrange(desc(papers))
wos_scopus_total_country <-
wos_country |>
bind_rows(scopus_country,
total_country) |>
mutate(country = as.character(country)) |>
pivot_wider(names_from = database,
values_from = papers) |>
arrange(desc(total)) |>
slice(1:10) |>
mutate(percentage = total / (table_1 |> pull(total)),
percentage = round(percentage, digits = 2))
wos_scopus_total_country
## # A tibble: 10 × 5
## country wos scopus total percentage
## <chr> <int> <int> <int> <dbl>
## 1 CHINA 64 81 120 0.12
## 2 USA 19 69 77 0.08
## 3 ITALY 30 27 50 0.05
## 4 INDIA 26 34 48 0.05
## 5 JAPAN 22 13 33 0.03
## 6 GERMANY 13 22 30 0.03
## 7 KOREA 17 11 23 0.02
## 8 IRAN 10 17 22 0.02
## 9 UNITED KINGDOM 10 22 22 0.02
## 10 FRANCE 7 18 22 0.02
wos_journal <-
wos |>
filter(str_detect(DT, "ARTICLE")) |>
select(journal = SO) |>
na.omit() |>
count(journal, sort = TRUE) |>
slice(1:20) |>
rename(publications = n) |>
mutate(database = "wos")
scopus_journal <-
scopus |>
filter(str_detect(DT, "ARTICLE")) |>
select(journal = SO) |>
na.omit() |>
count(journal, sort = TRUE) |>
slice(1:20) |>
rename(publications = n) |>
mutate(database = "scopus")
total_journal <-
wos_scopus_tos$df |>
filter(str_detect(DT, "ARTICLE")) |>
select(journal = SO) |>
na.omit() |>
count(journal, sort = TRUE) |>
slice(1:20) |>
rename(publications = n) |>
mutate(database = "total")
wos_scopus_total_journal <-
wos_journal |>
bind_rows(scopus_journal,
total_journal) |>
pivot_wider(names_from = database,
values_from = publications) |>
arrange(desc(total)) |>
slice(1:10) |>
mutate(percentage = total / table_1 |> pull(total),
percentage = round(percentage, digits = 2))
wos_scopus_total_journal
## # A tibble: 10 × 5
## journal wos scopus total percentage
## <chr> <int> <int> <int> <dbl>
## 1 JOURNAL OF FLUIDS ENGINEERING-TRANSACTIONS OF … 24 NA 24 0.02
## 2 WORLD PUMPS NA 25 24 0.02
## 3 BULLETIN OF THE TOMSK POLYTECHNIC UNIVERSITY-G… 11 NA 11 0.01
## 4 ENERGIES 11 5 11 0.01
## 5 PROCEEDINGS OF THE INSTITUTION OF MECHANICAL E… 11 NA 11 0.01
## 6 JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY 7 4 9 0.01
## 7 ENERGY 8 NA 8 0.01
## 8 RENEWABLE ENERGY 7 NA 8 0.01
## 9 JOURNAL OF ENGINEERING FOR GAS TURBINES AND PO… 7 NA 7 0.01
## 10 WEAR 7 NA 7 0.01
wos_scopus_country_collab_matrix <-
biblioNetwork(M = wos_scopus_tos$df,
analysis = "collaboration",
network = "countries")
plot_country_collab <-
networkPlot(wos_scopus_country_collab_matrix,
weighted=T, n = 30,
Title = "Country Collaboration Network",
type = "fruchterman",
size=T,
edgesize = 5,
labelsize=0.7)
wos_scopus_keyword_co_occurrence_matrix <-
biblioNetwork(M = wos_scopus_tos$df,
analysis = "co-occurrences",
network = "keywords",
sep = ";")
plot_net_co_occurrence <-
networkPlot(wos_scopus_keyword_co_occurrence_matrix,
weighted=T, n = 30,
Title = "Keyword Co-occurrence Network",
type = "fruchterman",
size=T,
edgesize = 5,
labelsize=0.7)
tree_of_science
## # A tibble: 81 × 2
## TOS cite
## <chr> <chr>
## 1 Root GULICH JF, 2010, CENTRIFUGAL PUMPS, SECOND EDITION, P1, DOI 10.1007/97…
## 2 Root MENTER FR, 1994, AIAA J, V32, P1598, DOI 10.2514/3.12149
## 3 Root ARNDT N, 1990, J TURBOMACH, V112, P98, DOI 10.1115/1.2927428
## 4 Root KAYA D, 2008, ENERG CONVERS MANAGE, V49, P1662, DOI 10.1016/J.ENCONMAN…
## 5 Root BRENNEN C. E., 1994, HYDRODYNAMICS PUMPS, P48
## 6 Root STEPANOFF A.J., 1957, CENTRIFUGAL AXIAL FL, V2ND ED.
## 7 Root LANGTHJEM MA, 2004, J FLUID STRUCT, V19, P369, DOI 10.1016/J.JFLUIDSTR…
## 8 Root SHOJAEEFARD MH, 2012, COMPUT FLUIDS, V60, P61, DOI 10.1016/J.COMPFLUID…
## 9 Root GUELICH JF, 1992, J VIB ACOUST, V114, P272, DOI 10.1115/1.2930257
## 10 Root DONG R, 1997, J TURBOMACH, V119, P506, DOI 10.1115/1.2841152
## # … with 71 more rows
Finding the clusters
nodes <- # Create a dataframe with the fullname of articles
tibble(name = V(wos_scopus_tos$graph)$name) |>
left_join(wos_scopus_tos$nodes,
by = c("name" = "ID_TOS"))
wos_scopus_citation_network_1 <- # Add the article names to the citation network
wos_scopus_tos$graph |>
igraph::set.vertex.attribute(name = "full_name",
index = V(wos_scopus_tos$graph)$name,
value = nodes$CITE)
nodes_1 <- # Create a dataframe with subfields (clusters)
tibble(name = V(wos_scopus_citation_network_1)$name,
cluster = V(wos_scopus_citation_network_1)$subfield,
full_name = V(wos_scopus_citation_network_1)$full_name)
nodes_2 <- # Count the number of articles per cluster
nodes_1 |>
count(cluster, sort = TRUE) |>
mutate(cluster_1 = row_number()) |>
select(cluster, cluster_1)
nodes_3 <-
nodes_1 |>
left_join(nodes_2) |>
rename(subfield = cluster_1) |>
select(name, full_name, subfield)
## Joining, by = "cluster"
edge_list <-
get.edgelist(wos_scopus_citation_network_1) |>
data.frame() |>
rename(Source = X1, Target = X2)
wos_scopus_citation_network <-
graph.data.frame(d = edge_list,
directed = TRUE,
vertices = nodes_3)
wos_scopus_citation_network |>
summary()
## IGRAPH f5ee4ea DN-- 1691 3315 --
## + attr: name (v/c), full_name (v/c), subfield (v/n)
Choosing clusters
We proposed the tipping point option to choose the number of clusters. See this paper:
https://www.nature.com/articles/s41598-021-85041-8
clusters <-
tibble(cluster = V(wos_scopus_citation_network)$subfield) |>
count(cluster, sort = TRUE)
clusters |>
ggplot(aes(x = reorder(cluster, n), y = n)) +
geom_point()
Removing not chosen clusters
wos_scopus_citation_network_clusters <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 1 & # filter clusters
V(wos_scopus_citation_network)$subfield != 2 &
V(wos_scopus_citation_network)$subfield != 3 &
V(wos_scopus_citation_network)$subfield != 4))
wos_scopus_citation_network_clusters |>
summary()
## IGRAPH f83d229 DN-- 569 1028 --
## + attr: name (v/c), full_name (v/c), subfield (v/n)
pal <- brewer.pal(8,"Dark2")
nodes_full_data <-
tibble(name = V(wos_scopus_citation_network)$name,
cluster = V(wos_scopus_citation_network)$subfield,
full_name = V(wos_scopus_citation_network)$full_name)
cluster_1 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 1))
cluster_1_page_rank <-
cluster_1 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_1)$vector)
cluster_1_df <-
tibble(name = V(cluster_1_page_rank)$name,
full_name = V(cluster_1_page_rank)$full_name,
page_rank = V(cluster_1_page_rank)$page_rank,
cluster = V(cluster_1_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 1) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |> # Tokenization
anti_join(stop_words) |> # Removing stop words
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"), # Words removed
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
## Joining, by = "word"
cluster_2 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 2))
cluster_2_page_rank <-
cluster_2 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_2)$vector)
cluster_2_df <-
tibble(name = V(cluster_2_page_rank)$name,
full_name = V(cluster_2_page_rank)$full_name,
page_rank = V(cluster_2_page_rank)$page_rank,
cluster = V(cluster_2_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 2) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |>
anti_join(stop_words) |>
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"),
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
## Joining, by = "word"
cluster_3 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 3))
cluster_3_page_rank <-
cluster_3 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_3)$vector)
cluster_3_df <-
tibble(name = V(cluster_3_page_rank)$name,
full_name = V(cluster_3_page_rank)$full_name,
page_rank = V(cluster_3_page_rank)$page_rank,
cluster = V(cluster_3_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 3) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |>
anti_join(stop_words) |>
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"),
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
## Joining, by = "word"
### Cluster 4
cluster_4 <-
wos_scopus_citation_network |>
delete.vertices(which(V(wos_scopus_citation_network)$subfield != 4))
cluster_4_page_rank <-
cluster_4 |>
set.vertex.attribute(name = "page_rank",
value = page_rank(cluster_4)$vector)
cluster_4_df <-
tibble(name = V(cluster_4_page_rank)$name,
full_name = V(cluster_4_page_rank)$full_name,
page_rank = V(cluster_4_page_rank)$page_rank,
cluster = V(cluster_4_page_rank)$subfield,)
nodes_full_data |>
filter(cluster == 4) |>
select(full_name) |>
mutate(full_name = str_extract(full_name, SPC %R% # Regular expressions
one_or_more(WRD) %R%
SPC %R%
one_or_more(or(WRD, ANY_CHAR))),
full_name = str_remove(full_name, OPEN_PAREN %R%
repeated(DGT, 4) %R%
CLOSE_PAREN %R%
one_or_more(or(WRD,ANY_CHAR))),
full_name = str_trim(full_name)) |>
unnest_tokens(output = word, input = full_name) |>
anti_join(stop_words) |>
filter(word != "doi",
!str_detect(word, "[0-9]")) |> # WoS data
filter(word == str_remove(word, pattern = "citation"),
word == str_remove(word, pattern = "research"),
word == str_remove(word, pattern = "analysis"),
word == str_remove(word, pattern = "science"),
word == str_remove(word, pattern = "scientometric"),
word == str_remove(word, pattern = "vulnerability")) |>
count(word, sort = TRUE) |>
with(wordcloud(word,
n,
random.order = FALSE,
max.words = 50,
colors=pal))
## Joining, by = "word"
write_csv(table_1, "table_1.csv") # Exporting table 1
write_csv(wos_scopus_total_country, "table_2_.csv") # Exporting table 2
write_csv(wos_scopus_authors, "table_3.csv") # Exporting table 3
write_csv(wos_scopus_total_journal, "table_4.csv") # Exporting table 4
write_csv(languages, "figure_1.csv") # Exporting data figure 1
write_csv(figure_2_data, "figure_2.csv") # Exporting data figure 2
write.graph(wos_scopus_citation_network, "citation_network_full.graphml", "graphml") # Exporting graph
write.graph(wos_scopus_citation_network_clusters,
"wos_scopus_citation_network_clusters.graphml",
"graphml")
write.csv(tree_of_science, "tree_of_science.csv") # Exporting Tree of Science
write.csv(cluster_1_df, "cluster_1.csv") # Exporting cluster 1
write.csv(cluster_2_df, "cluster_2.csv") # Exporting cluster 2
write.csv(cluster_3_df, "cluster_3.csv") # Exporting cluster 3
write.csv(cluster_4_df, "cluster_4.csv") # Exporting cluster 4
write.csv(nodes_full_data, "nodes_full_data.csv") # Exporting all nodes