INDUSTRIAL CENTRIFUGAL PUMPS

Creating the environment

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tosr)
library(bibliometrix)
## To cite bibliometrix in publications, please use:
## 
## Aria, M. & Cuccurullo, C. (2017) bibliometrix: An R-tool for comprehensive science mapping analysis, 
##                                  Journal of Informetrics, 11(4), pp 959-975, Elsevier.
##                         
## 
## https://www.bibliometrix.org
## 
##                         
## For information and bug reports:
##                         - Send an email to info@bibliometrix.org   
##                         - Write a post on https://github.com/massimoaria/bibliometrix/issues
##                         
## Help us to keep Bibliometrix free to download and use by contributing with a small donation to support our research team (https://bibliometrix.org/donate.html)
## 
##                         
## To start with the shiny web-interface, please digit:
## biblioshiny()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(tidytext)
library(wordcloud)
## Loading required package: RColorBrewer
library(rebus)
## 
## Attaching package: 'rebus'
## The following objects are masked from 'package:igraph':
## 
##     %c%, graph
## The following object is masked from 'package:stringr':
## 
##     regex
## The following object is masked from 'package:ggplot2':
## 
##     alpha
library(ggrepel) # improve donut visualization

This template is based in this paper

https://revistas.ucm.es/index.php/REVE/article/view/75566/4564456557467

For a detail explanation of how to use it, please watch this video

https://www.youtube.com/watch?v=jtKSifvNvTM

Data getting

wos_scopus_tos <- 
  tosr::tosr_load("savedrecs.txt",
                  "scopus.bib") # Create data from searches)   
## [1] 2
## 
## Converting your wos collection into a bibliographic dataframe
## 
## Done!
## 
## 
## Generating affiliation field tag AU_UN from C1:  Done!
## 
## 
## Converting your scopus collection into a bibliographic dataframe
## 
## Done!
## 
## 
## Generating affiliation field tag AU_UN from C1:  Done!
## 
## 
##  170 duplicated documents have been removed
tree_of_science <- 
  tosr::tosR("savedrecs.txt",
             "scopus.bib") # Create data from searches   
## [1] 2
## 
## Converting your wos collection into a bibliographic dataframe
## 
## Done!
## 
## 
## Generating affiliation field tag AU_UN from C1:  Done!
## 
## 
## Converting your scopus collection into a bibliographic dataframe
## 
## Done!
## 
## 
## Generating affiliation field tag AU_UN from C1:  Done!
## 
## 
##  170 duplicated documents have been removed
## Computing TOS SAP
## Computing TOS subfields
wos <-
  bibliometrix::convert2df("savedrecs.bib", dbsource="wos", format="bibtex")
## 
## Converting your wos collection into a bibliographic dataframe
## 
## Done!
## 
## 
## Generating affiliation field tag AU_UN from C1:  Done!
scopus <- 
  bibliometrix::convert2df("scopus.bib", # Create dataframe from scopus file
                           dbsource = "scopus", 
                           format = "bibtex")
## 
## Converting your scopus collection into a bibliographic dataframe
## 
## Done!
## 
## 
## Generating affiliation field tag AU_UN from C1:  Done!

Table 1. Search Criteria

table_1 <- 
  tibble(wos = length(wos$SR), # Create a dataframe with the values.
         scopus = length(scopus$SR), 
         total = length(wos_scopus_tos$df$SR))
table_1
## # A tibble: 1 × 3
##     wos scopus total
##   <int>  <int> <int>
## 1   339    856  1025

Figure 1. Languages

main_languages <- 
  wos_scopus_tos$df |> 
  select(LA) |> 
  separate_rows(LA, sep = "; ") |> 
  count(LA, sort = TRUE) |> 
  slice(1:5)

other_languages <- 
  wos_scopus_tos$df |> 
  separate_rows(LA, sep = "; ") |> 
  select(LA) |> 
  count(LA, sort = TRUE) |> 
  slice(6:n) |> 
  summarise(n = sum(n)) |> 
  mutate(LA = "OTHERS") |> 
  select(LA, n)
## Warning in 6:n: numerical expression has 19 elements: only the first used
languages <- 
  main_languages |> 
  bind_rows(other_languages) |> 
  mutate(percentage = n / sum(n),
         percentage = round(percentage, 
                            digits = 2) ) |> 
  rename(language = LA) |>
  select(language, percentage, count = n)

languages
## # A tibble: 6 × 3
##   language percentage count
##   <chr>         <dbl> <int>
## 1 ENGLISH        0.85   880
## 2 RUSSIAN        0.04    41
## 3 <NA>           0.03    30
## 4 GERMAN         0.02    25
## 5 FRENCH         0.02    18
## 6 OTHERS         0.04    38
df <- languages |> 
  rename(value = percentage, group = language) |>
  mutate(value = value * 100) |> 
  select(value, group)

df2 <- df %>% 
  mutate(csum = rev(cumsum(rev(value))), 
         pos = value/2 + lead(csum, 1),
         pos = if_else(is.na(pos), value/2, pos))

ggplot(df, aes(x = 2 , y = value, fill = fct_inorder(group))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  geom_label_repel(data = df2,
                   aes(y = pos, label = paste0(value, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  theme(panel.background = element_blank(),
        axis.line = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 18)) +
  labs(title = "Languages") +
  guides(fill = guide_legend(title = "")) +
  theme_void() +
  xlim(0.5, 2.5)

Figure 2. Scientific Production

wos_anual_production <- 
  wos |> 
  select(PY) |> 
  count(PY, sort = TRUE) |> 
  na.omit() |> 
  filter(PY >= 2000,
         PY < year(today())) |> 
  mutate(ref_type = "wos")

scopus_anual_production  <- 
  scopus |> 
  select(PY) |> 
  count(PY, sort = TRUE) |> 
  na.omit() |> 
  filter(PY >= 2000,
         PY < year(today())) |>
  mutate(ref_type = "scopus")

total_anual_production <- 
  wos_scopus_tos$df |> 
  select(PY) |> 
  count(PY, sort = TRUE) |> 
  na.omit() |> 
  filter(PY >= 2000,
         PY < year(today())) |>
  mutate(ref_type = "total")

wos_scopus_total_annual_production <- 
  wos_anual_production |> 
  bind_rows(scopus_anual_production,
            total_anual_production) 

figure_2_data <- 
  wos_scopus_total_annual_production |> 
  mutate(PY = replace_na(PY, replace = 0)) |> 
  pivot_wider(names_from = ref_type, 
              values_from = n) |> 
  arrange(desc(PY))

figure_2_data 
## # A tibble: 21 × 4
##       PY   wos scopus total
##    <dbl> <int>  <int> <int>
##  1  2020    37     49    70
##  2  2019    39     67    85
##  3  2018    27     27    44
##  4  2017    28     44    59
##  5  2016    18     27    36
##  6  2015    26     37    51
##  7  2014    13     28    36
##  8  2013    11     36    43
##  9  2012    12     29    36
## 10  2011    10     25    29
## # … with 11 more rows
wos_scopus_total_annual_production |> 
  ggplot(aes(x = PY, y = n, color = ref_type)) +
  geom_line() +
  labs(title = "Annual Scientific Production", 
       x = "years",
       y = "papers") +
  theme(plot.title = element_text(hjust = 0.5)) 

Table 2. Country production

data_biblio_wos <- biblioAnalysis(wos)

wos_country <- 
  data_biblio_wos$Countries |> 
  data.frame() |> 
  mutate(database = "wos") |> 
  select(country = Tab, papers = Freq, database ) |> 
  arrange(desc(papers)) 

data_biblio_scopus <- biblioAnalysis(scopus)

scopus_country <- 
  data_biblio_scopus$Countries |> 
  data.frame() |> 
  mutate(database = "scopus") |> 
  select(country = Tab, papers = Freq, database ) |> 
  arrange(desc(papers)) 

data_biblio_total <- biblioAnalysis(wos_scopus_tos$df)

total_country <- 
  data_biblio_total$Countries |> 
  data.frame() |> 
  mutate(database = "total") |> 
  select(country = Tab, papers = Freq, database ) |> 
  arrange(desc(papers)) 

wos_scopus_total_country <- 
  wos_country |> 
  bind_rows(scopus_country, 
            total_country) |> 
  mutate(country = as.character(country)) |> 
  pivot_wider(names_from = database, 
              values_from = papers) |> 
  arrange(desc(total)) |> 
  slice(1:10) |> 
  mutate(percentage = total / (table_1 |> pull(total)),
         percentage = round(percentage, digits = 2))

wos_scopus_total_country
## # A tibble: 10 × 5
##    country          wos scopus total percentage
##    <chr>          <int>  <int> <int>      <dbl>
##  1 CHINA             64     81   120       0.12
##  2 USA               19     69    77       0.08
##  3 ITALY             30     27    50       0.05
##  4 INDIA             26     34    48       0.05
##  5 JAPAN             22     13    33       0.03
##  6 GERMANY           13     22    30       0.03
##  7 KOREA             17     11    23       0.02
##  8 IRAN              10     17    22       0.02
##  9 UNITED KINGDOM    10     22    22       0.02
## 10 FRANCE             7     18    22       0.02

Table 3. Author production

wos_authors <- 
  data_biblio_wos$Authors |> 
  data.frame() |> 
  rename(authors_wos = AU, papers_wos = Freq) |> 
  arrange(desc(papers_wos)) |> 
  slice(1:10) |> 
  mutate(database_wos = "wos")


scopus_authors <- 
  data_biblio_scopus$Authors |> 
  data.frame() |> 
  rename(authors_scopus = AU, papers_scopus = Freq) |> 
  arrange(desc(papers_scopus)) |> 
  slice(1:10) |> 
  mutate(database_scopus = "scopus")

total_authors <- 
  data_biblio_total$Authors |> 
  data.frame() |> 
  rename(authors_total = AU, 
         papers_total = Freq) |> 
  arrange(desc(papers_total)) |> 
  slice(1:10) |> 
  mutate(database_total = "total")

wos_scopus_authors <- 
  wos_authors |> 
  bind_cols(scopus_authors,
            total_authors)

wos_scopus_authors
##    authors_wos papers_wos database_wos authors_scopus papers_scopus
## 1        LIU H          7          wos          NA NA            69
## 2      CHOI YS          6          wos          MBA D             9
## 3         LI X          6          wos         YUAN S             8
## 4     PAVESI G          6          wos           LI X             7
## 5     YAMANE T          6          wos          LIU H             7
## 6   BIEBERLE A          5          wos         WANG W             7
## 7  CAVAZZINI G          5          wos         YUAN J             7
## 8     HAMPEL U          5          wos        AHOLA J             6
## 9       KIM JH          5          wos       AHONEN T             6
## 10        LI Y          5          wos         ANON A             6
##    database_scopus authors_total papers_total database_total
## 1           scopus          NA N           64          total
## 2           scopus         KIM J           14          total
## 3           scopus          LI X           11          total
## 4           scopus       ADDIE G           10          total
## 5           scopus         LIU H           10          total
## 6           scopus        YUAN S           10          total
## 7           scopus          LI W            9          total
## 8           scopus          LI Y            9          total
## 9           scopus         MBA D            9          total
## 10          scopus        WANG W            9          total

Table 4. Journal production

wos_journal <- 
  wos |> 
  filter(str_detect(DT, "ARTICLE")) |> 
  select(journal = SO) |> 
  na.omit() |> 
  count(journal, sort = TRUE) |> 
  slice(1:20) |> 
  rename(publications = n) |> 
  mutate(database = "wos")

scopus_journal <- 
  scopus |> 
  filter(str_detect(DT, "ARTICLE")) |> 
  select(journal = SO) |> 
  na.omit() |> 
  count(journal, sort = TRUE) |> 
  slice(1:20) |> 
  rename(publications = n) |> 
  mutate(database = "scopus")

total_journal <- 
  wos_scopus_tos$df |> 
  filter(str_detect(DT, "ARTICLE")) |> 
  select(journal = SO) |> 
  na.omit() |> 
  count(journal, sort = TRUE) |> 
  slice(1:20) |> 
  rename(publications = n) |> 
  mutate(database = "total")

wos_scopus_total_journal <- 
  wos_journal |> 
  bind_rows(scopus_journal, 
            total_journal) |> 
  pivot_wider(names_from = database, 
              values_from = publications) |> 
  arrange(desc(total)) |> 
  slice(1:10) |> 
  mutate(percentage = total / table_1 |> pull(total),
         percentage = round(percentage, digits = 2))


wos_scopus_total_journal
## # A tibble: 10 × 5
##    journal                                           wos scopus total percentage
##    <chr>                                           <int>  <int> <int>      <dbl>
##  1 JOURNAL OF FLUIDS ENGINEERING-TRANSACTIONS OF …    24     NA    24       0.02
##  2 WORLD PUMPS                                        NA     25    24       0.02
##  3 BULLETIN OF THE TOMSK POLYTECHNIC UNIVERSITY-G…    11     NA    11       0.01
##  4 ENERGIES                                           11      5    11       0.01
##  5 PROCEEDINGS OF THE INSTITUTION OF MECHANICAL E…    11     NA    11       0.01
##  6 JOURNAL OF MECHANICAL SCIENCE AND TECHNOLOGY        7      4     9       0.01
##  7 ENERGY                                              8     NA     8       0.01
##  8 RENEWABLE ENERGY                                    7     NA     8       0.01
##  9 JOURNAL OF ENGINEERING FOR GAS TURBINES AND PO…     7     NA     7       0.01
## 10 WEAR                                                7     NA     7       0.01

Figure 3. Co-citation network

Author co-citation network

wos_scopus_author_metatag <- 
  metaTagExtraction(wos_scopus_tos$df, Field = "CR_AU")

wos_scopus_author_co_citation_matrix <- 
  biblioNetwork(M = wos_scopus_author_metatag, 
                analysis = "co-citation", 
                network = "authors")

plot_net_author_co_citation <- 
  networkPlot(wos_scopus_author_co_citation_matrix, 
              weighted=T, 
              n = 30, 
              Title = "Author Co-citation Network", 
              type = "fruchterman", 
              size=T,
              edgesize = 5,
              labelsize=0.7)

Author Collaboration network

wos_scopus_author_collab_matrix <- 
  biblioNetwork(M = wos_scopus_tos$df, 
                analysis = "collaboration", 
                network = "authors")

plot_author_collab <- 
  networkPlot(NetMatrix = wos_scopus_author_collab_matrix, 
              weighted=T, n = 30, 
              Title = "Author Collaboration Network", 
              type = "fruchterman", 
              size=T,
              edgesize = 5,
              labelsize=0.7)

Country Collaboration Network

wos_scopus_country_collab_matrix <- 
  biblioNetwork(M = wos_scopus_tos$df, 
                analysis = "collaboration", 
                network = "countries")

plot_country_collab <- 
  networkPlot(wos_scopus_country_collab_matrix, 
              weighted=T, n = 30, 
              Title = "Country Collaboration Network", 
              type = "fruchterman", 
              size=T,
              edgesize = 5,
              labelsize=0.7)

Keyword co-Ocurrence network

wos_scopus_keyword_co_occurrence_matrix <- 
  biblioNetwork(M = wos_scopus_tos$df, 
                analysis = "co-occurrences", 
                network = "keywords", 
                sep = ";")

plot_net_co_occurrence <- 
  networkPlot(wos_scopus_keyword_co_occurrence_matrix, 
              weighted=T, n = 30, 
              Title = "Keyword Co-occurrence Network", 
              type = "fruchterman", 
              size=T,
              edgesize = 5,
              labelsize=0.7)

Figure 4. Tree of Science

Tree of Science

tree_of_science
## # A tibble: 81 × 2
##    TOS   cite                                                                   
##    <chr> <chr>                                                                  
##  1 Root  GULICH JF, 2010, CENTRIFUGAL PUMPS, SECOND EDITION, P1, DOI 10.1007/97…
##  2 Root  MENTER FR, 1994, AIAA J, V32, P1598, DOI 10.2514/3.12149               
##  3 Root  ARNDT N, 1990, J TURBOMACH, V112, P98, DOI 10.1115/1.2927428           
##  4 Root  KAYA D, 2008, ENERG CONVERS MANAGE, V49, P1662, DOI 10.1016/J.ENCONMAN…
##  5 Root  BRENNEN C. E., 1994, HYDRODYNAMICS PUMPS, P48                          
##  6 Root  STEPANOFF A.J., 1957, CENTRIFUGAL AXIAL FL, V2ND ED.                   
##  7 Root  LANGTHJEM MA, 2004, J FLUID STRUCT, V19, P369, DOI 10.1016/J.JFLUIDSTR…
##  8 Root  SHOJAEEFARD MH, 2012, COMPUT FLUIDS, V60, P61, DOI 10.1016/J.COMPFLUID…
##  9 Root  GUELICH JF, 1992, J VIB ACOUST, V114, P272, DOI 10.1115/1.2930257      
## 10 Root  DONG R, 1997, J TURBOMACH, V119, P506, DOI 10.1115/1.2841152           
## # … with 71 more rows

Clustering analysis

Finding the clusters

nodes <-  # Create a dataframe with the fullname of articles 
  tibble(name = V(wos_scopus_tos$graph)$name) |> 
  left_join(wos_scopus_tos$nodes, 
            by = c("name" = "ID_TOS"))

wos_scopus_citation_network_1 <- # Add the article names to the citation network
  wos_scopus_tos$graph |> 
  igraph::set.vertex.attribute(name = "full_name", 
                               index = V(wos_scopus_tos$graph)$name, 
                               value = nodes$CITE)

nodes_1 <- # Create a dataframe with subfields (clusters)
  tibble(name = V(wos_scopus_citation_network_1)$name,
         cluster = V(wos_scopus_citation_network_1)$subfield,
         full_name = V(wos_scopus_citation_network_1)$full_name)

nodes_2 <- # Count the number of articles per cluster
  nodes_1 |> 
  count(cluster, sort = TRUE) |> 
  mutate(cluster_1 = row_number()) |> 
  select(cluster, cluster_1)

nodes_3 <- 
  nodes_1 |> 
  left_join(nodes_2) |> 
  rename(subfield = cluster_1) |> 
  select(name, full_name, subfield)
## Joining, by = "cluster"
edge_list <- 
  get.edgelist(wos_scopus_citation_network_1) |> 
  data.frame() |> 
  rename(Source = X1, Target = X2)

wos_scopus_citation_network <- 
  graph.data.frame(d = edge_list, 
                   directed = TRUE, 
                   vertices = nodes_3)

wos_scopus_citation_network |> 
  summary()
## IGRAPH f5ee4ea DN-- 1691 3315 -- 
## + attr: name (v/c), full_name (v/c), subfield (v/n)

Choosing clusters

We proposed the tipping point option to choose the number of clusters. See this paper:

https://www.nature.com/articles/s41598-021-85041-8

clusters <- 
  tibble(cluster = V(wos_scopus_citation_network)$subfield) |> 
  count(cluster, sort = TRUE)

clusters |> 
  ggplot(aes(x = reorder(cluster, n), y = n)) +
  geom_point() 

Removing not chosen clusters

wos_scopus_citation_network_clusters <- 
  wos_scopus_citation_network |> 
  delete.vertices(which(V(wos_scopus_citation_network)$subfield != 1 & # filter clusters 
                          V(wos_scopus_citation_network)$subfield != 2 &
                          V(wos_scopus_citation_network)$subfield != 3  &
                          V(wos_scopus_citation_network)$subfield != 4))

wos_scopus_citation_network_clusters |> 
  summary()
## IGRAPH f83d229 DN-- 569 1028 -- 
## + attr: name (v/c), full_name (v/c), subfield (v/n)

Cluster 1

pal <- brewer.pal(8,"Dark2")

nodes_full_data <- 
  tibble(name = V(wos_scopus_citation_network)$name,
         cluster = V(wos_scopus_citation_network)$subfield,
         full_name = V(wos_scopus_citation_network)$full_name)

cluster_1 <- 
  wos_scopus_citation_network |> 
  delete.vertices(which(V(wos_scopus_citation_network)$subfield != 1))

cluster_1_page_rank <- 
  cluster_1 |> 
  set.vertex.attribute(name = "page_rank", 
                       value = page_rank(cluster_1)$vector)

cluster_1_df <- 
  tibble(name = V(cluster_1_page_rank)$name,
         full_name = V(cluster_1_page_rank)$full_name,
         page_rank = V(cluster_1_page_rank)$page_rank,
         cluster = V(cluster_1_page_rank)$subfield,)

nodes_full_data |> 
  filter(cluster == 1) |> 
  select(full_name) |> 
  mutate(full_name = str_extract(full_name, SPC %R%  # Regular expressions 
                                   one_or_more(WRD) %R% 
                                   SPC %R% 
                                   one_or_more(or(WRD, ANY_CHAR))),
         full_name = str_remove(full_name, OPEN_PAREN %R% 
                                  repeated(DGT, 4) %R% 
                                  CLOSE_PAREN %R%
                                  one_or_more(or(WRD,ANY_CHAR))),
         full_name = str_trim(full_name))  |> 
  unnest_tokens(output = word, input = full_name) |> # Tokenization
  anti_join(stop_words) |>  # Removing stop words
  filter(word != "doi",
         !str_detect(word, "[0-9]")) |>  # WoS data
  filter(word == str_remove(word, pattern = "citation"),
         word == str_remove(word, pattern = "research"),  # Words removed
         word == str_remove(word, pattern = "analysis"), 
         word == str_remove(word, pattern = "science"),
         word == str_remove(word, pattern = "scientometric"),
         word == str_remove(word, pattern = "vulnerability")) |>
  count(word, sort = TRUE) |> 
  with(wordcloud(word, 
                 n, 
                 random.order = FALSE, 
                 max.words = 50, 
                 colors=pal))
## Joining, by = "word"

Cluster 2

cluster_2 <- 
  wos_scopus_citation_network |> 
  delete.vertices(which(V(wos_scopus_citation_network)$subfield != 2))

cluster_2_page_rank <- 
  cluster_2 |> 
  set.vertex.attribute(name = "page_rank", 
                       value = page_rank(cluster_2)$vector)

cluster_2_df <- 
  tibble(name = V(cluster_2_page_rank)$name,
         full_name = V(cluster_2_page_rank)$full_name,
         page_rank = V(cluster_2_page_rank)$page_rank,
         cluster = V(cluster_2_page_rank)$subfield,)

nodes_full_data |> 
  filter(cluster == 2) |> 
  select(full_name) |> 
  mutate(full_name = str_extract(full_name, SPC %R%  # Regular expressions 
                                   one_or_more(WRD) %R% 
                                   SPC %R% 
                                   one_or_more(or(WRD, ANY_CHAR))),
         full_name = str_remove(full_name, OPEN_PAREN %R% 
                                  repeated(DGT, 4) %R% 
                                  CLOSE_PAREN %R%
                                  one_or_more(or(WRD,ANY_CHAR))),
         full_name = str_trim(full_name))  |> 
  unnest_tokens(output = word, input = full_name) |> 
  anti_join(stop_words) |>
  filter(word != "doi",
         !str_detect(word, "[0-9]")) |>  # WoS data
  filter(word == str_remove(word, pattern = "citation"),
         word == str_remove(word, pattern = "research"), 
         word == str_remove(word, pattern = "analysis"), 
         word == str_remove(word, pattern = "science"),
         word == str_remove(word, pattern = "scientometric"),
         word == str_remove(word, pattern = "vulnerability")) |>
  count(word, sort = TRUE) |> 
  with(wordcloud(word, 
                 n, 
                 random.order = FALSE, 
                 max.words = 50, 
                 colors=pal))
## Joining, by = "word"

Cluster 3

cluster_3 <- 
  wos_scopus_citation_network |> 
  delete.vertices(which(V(wos_scopus_citation_network)$subfield != 3))

cluster_3_page_rank <- 
  cluster_3 |> 
  set.vertex.attribute(name = "page_rank", 
                       value = page_rank(cluster_3)$vector)

cluster_3_df <- 
  tibble(name = V(cluster_3_page_rank)$name,
         full_name = V(cluster_3_page_rank)$full_name,
         page_rank = V(cluster_3_page_rank)$page_rank,
         cluster = V(cluster_3_page_rank)$subfield,)

nodes_full_data |> 
  filter(cluster == 3) |> 
  select(full_name) |> 
  mutate(full_name = str_extract(full_name, SPC %R%  # Regular expressions 
                                   one_or_more(WRD) %R% 
                                   SPC %R% 
                                   one_or_more(or(WRD, ANY_CHAR))),
         full_name = str_remove(full_name, OPEN_PAREN %R% 
                                  repeated(DGT, 4) %R% 
                                  CLOSE_PAREN %R%
                                  one_or_more(or(WRD,ANY_CHAR))),
         full_name = str_trim(full_name))  |> 
  unnest_tokens(output = word, input = full_name) |> 
  anti_join(stop_words) |>
  filter(word != "doi",
         !str_detect(word, "[0-9]")) |>  # WoS data 
  filter(word == str_remove(word, pattern = "citation"),
         word == str_remove(word, pattern = "research"), 
         word == str_remove(word, pattern = "analysis"), 
         word == str_remove(word, pattern = "science"),
         word == str_remove(word, pattern = "scientometric"),
         word == str_remove(word, pattern = "vulnerability")) |>
  count(word, sort = TRUE) |> 
  with(wordcloud(word, 
                 n, 
                 random.order = FALSE, 
                 max.words = 50, 
                 colors=pal))
## Joining, by = "word"

### Cluster 4

cluster_4 <- 
  wos_scopus_citation_network |> 
  delete.vertices(which(V(wos_scopus_citation_network)$subfield != 4))

cluster_4_page_rank <- 
  cluster_4 |> 
  set.vertex.attribute(name = "page_rank", 
                       value = page_rank(cluster_4)$vector)

cluster_4_df <- 
  tibble(name = V(cluster_4_page_rank)$name,
         full_name = V(cluster_4_page_rank)$full_name,
         page_rank = V(cluster_4_page_rank)$page_rank,
         cluster = V(cluster_4_page_rank)$subfield,)

nodes_full_data |> 
  filter(cluster == 4) |> 
  select(full_name) |> 
  mutate(full_name = str_extract(full_name, SPC %R%  # Regular expressions 
                                   one_or_more(WRD) %R% 
                                   SPC %R% 
                                   one_or_more(or(WRD, ANY_CHAR))),
         full_name = str_remove(full_name, OPEN_PAREN %R% 
                                  repeated(DGT, 4) %R% 
                                  CLOSE_PAREN %R%
                                  one_or_more(or(WRD,ANY_CHAR))),
         full_name = str_trim(full_name))  |> 
  unnest_tokens(output = word, input = full_name) |> 
  anti_join(stop_words) |> 
  filter(word != "doi",
         !str_detect(word, "[0-9]")) |>  # WoS data
  filter(word == str_remove(word, pattern = "citation"),
         word == str_remove(word, pattern = "research"), 
         word == str_remove(word, pattern = "analysis"), 
         word == str_remove(word, pattern = "science"),
         word == str_remove(word, pattern = "scientometric"),
         word == str_remove(word, pattern = "vulnerability")) |>
  count(word, sort = TRUE) |> 
  with(wordcloud(word, 
                 n, 
                 random.order = FALSE, 
                 max.words = 50, 
                 colors=pal))
## Joining, by = "word"

Exporting files

write_csv(table_1, "table_1.csv") # Exporting table 1
write_csv(wos_scopus_total_country, "table_2_.csv")  # Exporting table 2
write_csv(wos_scopus_authors, "table_3.csv") # Exporting table 3
write_csv(wos_scopus_total_journal, "table_4.csv") # Exporting table 4


write_csv(languages, "figure_1.csv") # Exporting data figure 1 
write_csv(figure_2_data, "figure_2.csv") # Exporting data figure 2

write.graph(wos_scopus_citation_network, "citation_network_full.graphml", "graphml") # Exporting graph
write.graph(wos_scopus_citation_network_clusters, 
            "wos_scopus_citation_network_clusters.graphml", 
            "graphml")

write.csv(tree_of_science, "tree_of_science.csv") # Exporting Tree of Science

write.csv(cluster_1_df, "cluster_1.csv") # Exporting cluster 1
write.csv(cluster_2_df, "cluster_2.csv") # Exporting cluster 2
write.csv(cluster_3_df, "cluster_3.csv") # Exporting cluster 3
write.csv(cluster_4_df, "cluster_4.csv") # Exporting cluster 4

write.csv(nodes_full_data, "nodes_full_data.csv") # Exporting all nodes