Creating the environment

library(tidyverse)
library(tidygraph)
library(igraph)
library(bibliometrix)
library(tosr)
library(here)
library(lubridate)
# library(sjrdata)
library(openxlsx)
library(zoo)
library(RSQLite)
library(journalabbr)
library(ggraph)
library(openxlsx)
library(XML)
library(plyr)
library(readxl)
source("verbs.R")
windowsFonts("Times" = windowsFont("Times"))
windowsFonts("Times New Roman" = windowsFont("Times New Roman"))

giant.component <- function(graph) {
  cl <- igraph::clusters(graph)
  igraph::induced.subgraph(graph, 
                           which(cl$membership == which.max(cl$csize)))
}

Data getting

library(readxl)
library(httr)
url1<-'https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ&exportFormat=xlsx'
httr::GET(url1, write_disk(tf <- tempfile(fileext = ".xlsx")))

## Response [https://doc-08-0s-sheets.googleusercontent.com/export/mq6he3r7ig44qobar1fsg51390/v8ovao5o0md8pjl2e1rqo4rai0/1675786175000/107660325299273654627/*/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ?exportFormat=xlsx]
##   Date: 2023-02-07 16:09
##   Status: 200
##   Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
##   Size: 3.55 MB
## <ON DISK>  C:\Users\User\AppData\Local\Temp\RtmpsxaQUX\file2dd413d93ee0.xlsx

wos_scopus <- readxl::read_excel(tf, 1L)
wos <- readxl::read_excel(tf, 2L)
scopus <- readxl::read_excel(tf, 3L)
reference_df <- readxl::read_excel(tf, 4L)
journal_df <- readxl::read_excel(tf, 5L)
author_df <- readxl::read_excel(tf, 6L)
TC_all <- readxl::read_excel(tf, 7L)
figure_1_data <- readxl::read_excel(tf, 8L)
table_2_country <- readxl::read_excel(tf, 10L)
figure_2_country_wos_scopus <- readxl::read_excel(tf, 11L)
figure_2_country_wos_scopus_1 <- 
  readxl::read_excel(tf, 12L) |> 
  tidygraph::as_tbl_graph(directed = FALSE) |> 
  activate(nodes) |> 
  dplyr::mutate(community = tidygraph::group_louvain(),
                degree = tidygraph::centrality_degree(),
                community = as.factor(community))
table_3_journal <- readxl::read_excel(tf, 13L)
table_4_authors <- readxl::read_excel(tf, 14L)
AU_CO_links <- readxl::read_excel(tf, 15L)
tos <- readxl::read_excel(tf, 16L)
edges_tos <- readxl::read_excel(tf, 17L)
nodes_tos <- readxl::read_excel(tf, 18L)
SO_edges <- readxl::read_excel(tf, 19L)
SO_nodes <- readxl::read_excel(tf, 20L)
AU_ego_edges <- readxl::read_excel(tf, 21L)
AU_ego_nodes <- readxl::read_excel(tf, 22L)

# wos_scopus <-
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=1744606135")
# 
# wos <-
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=627517954")  # create dataframe from wos file
# 
# scopus <-
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=226642650")
# 
# reference_df <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=530689611")
# 
# journal_df <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=1459119042")
# 
# author_df <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=2051021533")
# 
# TC_all <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=390660984")
# 
# figure_1_data <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=7617004")
# 
# table_2_country <- #table_2_country
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=2062723438")
# 
# figure_2_country_wos_scopus <- #figure_2_country_wos_scopus
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=1467986298")
# 
# figure_2_country_wos_scopus_1 <- #figure_2_country_wos_scopus_1
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=86607473") |> 
#   tidygraph::as_tbl_graph(directed = FALSE) |> 
#   activate(nodes) |> 
#   dplyr::mutate(community = tidygraph::group_louvain(),
#                 degree = tidygraph::centrality_degree(),
#                 community = as.factor(community))
# # 
# table_3_journal  <-  
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=557941275")
# # 
# table_4_authors  <- #table_4_authors
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=1892976712")
# # 
# AU_CO_links <-
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=906609028")
# 
# tos <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=1132617046")
# 
# edges_tos <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=1839422003")
# 
# nodes_tos <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=97139230")
# 
# SO_edges <-
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=474125292")
# 
# SO_nodes <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=195931613")
# 
# AU_ego_edges <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=1941108735")
# 
# AU_ego_nodes <- 
#   read_csv("https://docs.google.com/spreadsheets/d/1pweeV72mxXlND00drdtKP0J31nsTjDU1LMjpYen5cFQ/export?format=csv&gid=2014441040")

Resutls

Scientometric Analysis

3.1 Scientific Production

Figure 1a - Scopus + WoS

Combine charts using Python Matplotlib & Reticulate

library(reticulate)
# create a new environment
# conda_create("r-reticulate")
# install Matplotlib
# conda_install("r-reticulate", "matplotlib")
# import Matplotlib (it will be automatically discovered in "r-reticulate")
plt <- import("matplotlib")
np <- import("numpy")

# From Double get integers 
# TC y
TC_all$TC_sum_all <- as.integer(TC_all$TC_sum_all)

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
# ax=axes
fig, ax = plt.subplots()
# First plot Total Publications - time series
ax.plot(tpx, tpy, color='r',marker='o', label='Total Publications')
ax.set_xlabel('Year')
ax.set_ylabel('Total Publications', color='r')
# Customization for bar charts
barw = 0.5
ax.bar(sx, sy, color='g', label = 'Scopus', alpha = 0.5, width=barw)

## <BarContainer object of 23 artists>

ax.bar(wx1, wy, color='orange', label = 'WoS', alpha=0.8, width=barw)
# Y2 - Total citations

## <BarContainer object of 23 artists>

twin_axes = ax.twinx()
twin_axes.plot(tcx, tcy, color = 'purple',marker='o', label='Total Citations')
twin_axes.set_ylabel('Total Citations', color='purple')
# Customize
plt.title('Total Scientific Production vs. Total Citations')
# y2 Total Citation label location
plt.legend(loc='center left')
# True or False to get the grid at the background
ax.grid(False)
# y1 label location
ax.legend(loc='upper left')
# Y2 limit depends of tcy scale in this case 1400 improves label location
plt.ylim(0, 1100) #########  <-----Important--------- """"Change Y2 Coordinate"""""
# plt.annotate() customize numbers for each position

## (0.0, 1100.0)

for i, label in enumerate(tcy):
  plt.annotate(label, (tcx[i], tcy[i] + 0.5), color='purple', size=8)

for i, label in enumerate(tpy):
  ax.annotate(label, (tpx[i], tpy[i] + 0.8), color='red', size=8)

for i, label in enumerate(wy):
  ax.annotate(label, (wx1[i], wy[i] + 0.1), color='brown', size=8)

for i, label in enumerate(sy):
  ax.annotate(label, (sx[i], sy[i] + 0.2),color='green', size=8)

# Rotate x ticks
plt.xticks(tpx)

## ([<matplotlib.axis.XTick object at 0x000002040C3F62B0>, <matplotlib.axis.XTick object at 0x000002040C3F6100>, <matplotlib.axis.XTick object at 0x0000020409CAD3D0>, <matplotlib.axis.XTick object at 0x000002040C472070>, <matplotlib.axis.XTick object at 0x000002040C4726A0>, <matplotlib.axis.XTick object at 0x000002040C472DF0>, <matplotlib.axis.XTick object at 0x000002040C47A580>, <matplotlib.axis.XTick object at 0x000002040C47ACD0>, <matplotlib.axis.XTick object at 0x000002040C47ADC0>, <matplotlib.axis.XTick object at 0x000002040C472E20>, <matplotlib.axis.XTick object at 0x000002040C480340>, <matplotlib.axis.XTick object at 0x000002040C480B20>, <matplotlib.axis.XTick object at 0x000002040C4862B0>, <matplotlib.axis.XTick object at 0x000002040C486A00>, <matplotlib.axis.XTick object at 0x000002040C48D190>, <matplotlib.axis.XTick object at 0x000002040C486AC0>, <matplotlib.axis.XTick object at 0x000002040C480790>, <matplotlib.axis.XTick object at 0x000002040C48D250>, <matplotlib.axis.XTick object at 0x000002040C48DDF0>, <matplotlib.axis.XTick object at 0x000002040C494580>, <matplotlib.axis.XTick object at 0x000002040C494CD0>, <matplotlib.axis.XTick object at 0x000002040C49B460>, <matplotlib.axis.XTick object at 0x000002040C494670>], [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])

fig.autofmt_xdate(rotation = 70)
# The Y1 ticks depends from tpy scale limits
yticks = [0,10,20,30,40,50] ########## <-----Important---- Choose scale .. just specify which numbers you want
ax.set_yticks(yticks)
# Export Figure as SVG
#plt.savefig("ScientificProd_4charts.svg")

plt.show()

# figure_1a <- 
# figure_1_data |> 
# pivot_longer(!PY, names_to = "ref_type", values_to = "n") |> 
# filter(ref_type != "total") |> 
# ggplot(aes(x = factor(PY), 
# y = n, 
# fill = ref_type)) +
# geom_bar(stat = "identity", 
# position = "dodge") +
# geom_text(aes(label = n),
# vjust = -0.3,
# position = position_dodge(0.9),
# size = 3,
# family = "Times") +
# scale_fill_manual(values = c("springgreen3",
# "orange3")) +
# theme(text = element_text(family = "Times",
# face = "bold",
# size =12),
# panel.background = element_rect(fill = "white"),
# legend.position = "bottom",
# legend.title = element_text(size = 0),
# axis.text.x = element_text(face = "bold", 
# angle = 45, 
# vjust = 0.5),
# axis.line = element_line(color = "black", 
# size = 0.2)) +
# labs(y = "Number of publications", 
# x = "Year") 
# 
# figure_1a

Figure 1b - Total production

# figure_1b <- 
#   figure_1_data |> 
#   ggplot(aes(x = PY, y = total)) +
#   geom_line(stat = "identity", color = "red") +
#   geom_point(stat = "identity", color = "red") +
#   scale_x_continuous(breaks = seq(2002, year(today()) - 1, by = 1)) +
#   geom_text(aes(label = total),
#             vjust = -0.3,
#             position = position_dodge(0.9),
#             size = 3,
#             family = "Times New Roman", 
#             color = "red") +
#   scale_fill_manual(values = c("springgreen3",
#                                              "orange3")) +
#                                                theme(text = element_text(family = "Times New Roman",
#                                                                          face = "bold",
#                                                                          size =12),
#                                                      panel.background = element_rect(fill = "white"),
#                                                      legend.position = "bottom",
#                                                      legend.title = element_text(size = 0),
#                                                      axis.text.x = element_text(face = "bold", 
#                                                                                 angle = 45, 
#                                                                                 vjust = 0.5),
#                                                      axis.line = element_line(color = "black", 
#                                                                               size = 0.2)) +
#   labs(y = "Number of total publications", 
#        x = "Year") 
# figure_1b

Figure 1c - Total Citations

# figure_1c <- 
#   TC_all |> 
#   ggplot(aes(x = PY , y = TC_sum_all)) +
#   geom_line(stat = "identity", color = "purple") +
#   geom_point(color = "purple") +
#   scale_x_continuous(breaks = seq(2002, year(today()) - 1 , by = 1)) +
#   geom_text(aes(label = TC_sum_all),
#             vjust = -0.3,
#             position = position_dodge(0.9),
#             size = 3,
#             family = "Times New Roman", 
#             color = "purple") +
#   scale_fill_manual(values = c("springgreen3",
#                                              "orange3")) +
#                                                theme(text = element_text(family = "Times New Roman",
#                                                                          face = "bold",
#                                                                          size =12),
#                                                      panel.background = element_rect(fill = "white"),
#                                                      legend.position = "bottom",
#                                                      legend.title = element_text(size = 0),
#                                                      axis.text.x = element_text(face = "bold", 
#                                                                                 angle = 45, 
#                                                                                 vjust = 0.5),
#                                                      axis.line = element_line(color = "black", 
#                                                                               size = 0.2)) +
#   labs(y = "Number of citations", 
#        x = "Year") 
# figure_1c

3.2 Country analysis

Table 2 - Country production

table_2_country |>
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Figure 2a - Country Collaboration

# figure_2 <- 
#   figure_2_country_wos_scopus_1 |>
#   ggraph(layout = "graphopt") +
#   geom_edge_link(aes(width = weight),
#                  colour = "lightgray") +
#   scale_edge_width(name = "Link strength") +
#   geom_node_point(aes(color = community, 
#                       size = degree)) +
#   geom_node_text(aes(label = name), repel = TRUE) +
#   scale_size(name = "Degree") +
#   # scale_color_binned(name = "Communities") +
#   theme_graph()
# 
# figure_2

figure_2a <- 
  figure_2_country_wos_scopus_1 |>
  activate(edges) |> 
  dplyr::rename(weight = n) |> 
  ggraph(layout = "graphopt") +
  geom_edge_link(aes(width = weight),
                 colour = "lightgray") +
  scale_edge_width(name = "Link strength") +
  geom_node_point(aes(color = community, 
                      size = degree)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  scale_size(name = "Degree") +
  # scale_color_binned(name = "Communities") +
  theme_graph()

figure_2a

# country_collab_graphml_nodes <- 
#   figure_2_country_wos_scopus_1 |> 
#   activate(nodes) |> 
#   as_tibble() |> 
#   dplyr::rename(author = name) |> 
#   rownames_to_column("name")
# 
# country_collab_graphml_edges <- 
#   figure_2_country_wos_scopus_1 |> 
#   activate(edges) |> 
#   as_tibble() 
# 
# AU_CO_weighted_TM <- 
#   graph_from_data_frame(d = country_collab_graphml_edges, 
#                         directed = FALSE, 
#                         vertices = country_collab_graphml_nodes)
# 
# write_graph(AU_CO_weighted_TM, "AU_CO_weighted_TM.graphml", "graphml") # Export author co-citation graph

Figure 2b Clusters

figure_2b <- 
  figure_2_country_wos_scopus_1 |> 
  activate(nodes) |> 
  data.frame() |> 
  group_by(community) |> 
  dplyr::count(community, sort = TRUE) |> 
  slice(1:10) |>  
  ggplot(aes(x = reorder(community, n), y = n)) +
  geom_point(stat = "identity") +
  geom_line(group = 1) + 
  # geom_text(label = as.numeric(community),
  #           nudge_x = 0.5,
  #           nudge_y = 0.5,
  #           check_overlap = T) +
  labs(title = "Communities by size", 
       x = "communities", 
       y = "Countries") +
  theme(text = element_text(color = "black",
                            face = "bold",
                            family = "Times New Roman"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black"),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20)
        ) 

figure_2b

Figure 2c Longitudinal

# Create a dataframe with links 
figure_2c_edges <- 
  figure_2_country_wos_scopus |>
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(year = PY) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::mutate(percentage = n/max(n)) |> 
  dplyr::select(year, percentage)
# Create a data frame with author and year 
figure_2c_nodes <- # 21 row 
  figure_2_country_wos_scopus |>
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(CO = from, 
                year = PY) |>
  bind_rows(figure_2_country_wos_scopus |>  
              tidygraph::as_tbl_graph() |> 
              tidygraph::activate(edges) |> 
              tidygraph::as_tibble() |> 
              dplyr::select(CO = to, 
                            year = PY)) |> 
  unique() |> 
  dplyr::group_by(CO) |> 
  dplyr::slice(which.min(year)) |>
  dplyr::ungroup() |> 
  dplyr::select(year) |> 
  dplyr::group_by(year) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::ungroup() |> 
  dplyr::mutate(percentage = n / max(n)) |> 
  select(year, percentage)

figure_2c <- 
  figure_2c_nodes |> 
  mutate(type = "nodes",
         year = as.numeric(year)) |> 
  bind_rows(figure_2c_edges |> 
              mutate(type = "links",
                     year = as.numeric(year))) |> 
  ggplot(aes(x = year, 
             y = percentage, 
             color = type)) +
  geom_point() +
  geom_line() +
  theme(legend.position = "right", 
        text = element_text(color = "black", 
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black", 
                                   angle = 45, vjust = 0.5
        ),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.text = element_text(size = "15"),
        legend.title = element_blank()) +
  labs(title = "Nodes and links through time", 
       y = "Percentage") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(2002, 2022, by = 1))

figure_2c

# library(patchwork)
#  figure_2c / figure_2b | figure_2a

# library(gridExtra)
# grid.arrange(figure_2c, 
#              figure_2b, 
#              figure_2a, 
#              ncol=3, 
#              widths=c(3,2, 2), 
#              heights=c(1,1, 1), 
#              layout_matrix = rbind(c(1,3, 3), c(2,3, 3)))

3.3 Journal Analysis

Table 3 Most productive journals

table_3_journal |> 
  arrange(desc(total)) |> 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Figure 3 Journal Citation Network

Creating the graph object

journal_citation_graph_weighted_tbl_small <- 
  journal_df |> 
  dplyr::select(JI_main, JI_ref) |> 
  dplyr::group_by(JI_main, JI_ref) |> 
  dplyr::count() |> 
  dplyr::rename(weight = n) |> 
  as_tbl_graph(directed = FALSE) |> 
  # convert(to_simple) |> 
  activate(nodes) |> 
  dplyr::mutate(components = tidygraph::group_components(type = "weak"))  |> 
  dplyr::filter(components == 1) |> 
  activate(nodes) |> 
  dplyr::mutate(degree = centrality_degree(),
                community = tidygraph::group_louvain()) |> 
  dplyr::select(-components) |> 
  dplyr::filter(degree >= 1)
# activate(edges) |>
# dplyr::filter(weight != 1)

communities <- 
  journal_citation_graph_weighted_tbl_small |> 
  activate(nodes) |> 
  data.frame() |> 
  dplyr::count(community, sort = TRUE) |> 
  dplyr::slice(1:10) |> 
  dplyr::select(community) |> 
  dplyr::pull()
# Filtering biggest communities 
journal_citation_graph_weighted_tbl_small_fig <- 
  journal_citation_graph_weighted_tbl_small |> 
  activate(nodes) |> 
  dplyr::filter(community %in% communities)

Selecting nodes to show

jc_com_1 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[1]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_2 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[2]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_3 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[3]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_4 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[4]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_5 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[5]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_6 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[6]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_7<- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[7]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_8 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[8]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_9 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[9]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com_10 <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(community == communities[10]) |> 
  dplyr::mutate(degree = centrality_degree()) |> 
  dplyr::arrange(desc(degree)) |> 
  dplyr::slice(1:10) |> 
  data.frame() |> 
  dplyr::select(name)
jc_com <- 
  jc_com_1 |> 
  bind_rows(jc_com_2,
            jc_com_3,
            # jc_com_4,
            # jc_com_5,
            # jc_com_6,
            # jc_com_7,
            # jc_com_8,
            # jc_com_9,
            # jc_com_10
  )

figure_3a <- 
  journal_citation_graph_weighted_tbl_small_fig |> 
  activate(nodes) |> 
  dplyr::filter(name %in% jc_com$name) |>
  dplyr::mutate(degree = centrality_degree(),
                community = factor(community)) |> 
  dplyr::filter(degree != 0) |> 
  ggraph(layout = "graphopt") +
  geom_edge_link(aes(width = weight),
                 colour = "lightgray") +
  scale_edge_width(name = "Link strength") +
  geom_node_point(aes(color = community, 
                      size = degree)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  scale_size(name = "Degree") +
  # scale_color_binned(name = "Communities") +
  theme_graph()
figure_3a

Figure 3b clusters

figure_3b <- 
  journal_citation_graph_weighted_tbl_small |> 
  activate(nodes) |> 
  data.frame() |> 
  dplyr::select(community) |> 
  dplyr::count(community, sort = TRUE) |> 
  dplyr::slice(1:10) |> 
  ggplot(aes(x = reorder(community, n), y = n)) +
  geom_point(stat = "identity") +
  geom_line(group = 1) + 
  # geom_text(label = as.numeric(community),
  #           nudge_x = 0.5,
  #           nudge_y = 0.5,
  #           check_overlap = T) +
  labs(title = "Communities by size", 
       x = "communities", 
       y = "Journals") +
  theme(text = element_text(color = "black",
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black"),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20)
        ) 
figure_3b

Figure 3c Longitudinal

# Create a dataframe with links 
figure_3c_edges <- 
  journal_df |>
  select(from = JI_main, to = JI_ref, PY = PY_ref) %>% 
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(year = PY) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::mutate(percentage = n/max(n)) |> 
  dplyr::select(year, percentage)
# Create a data frame with author and year 
figure_3c_nodes <- # 21 row 
  journal_df |>
  select(from = JI_main, to = JI_ref, PY = PY_ref) %>% 
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(CO = from, 
                year = PY) |>
  bind_rows(journal_df |>
              select(from = JI_main, 
                     to = JI_ref, 
                     PY = PY_ref) %>%  
              tidygraph::as_tbl_graph() |> 
              tidygraph::activate(edges) |> 
              tidygraph::as_tibble() |> 
              dplyr::select(CO = to, 
                            year = PY)) |> 
  unique() |> 
  dplyr::group_by(CO) |> 
  dplyr::slice(which.min(year)) |>
  dplyr::ungroup() |> 
  dplyr::select(year) |> 
  dplyr::group_by(year) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::ungroup() |> 
  dplyr::mutate(percentage = n / max(n)) |> 
  select(year, percentage)

plotting figure 3b

figure_3c <- 
  figure_3c_nodes |> 
  mutate(type = "nodes") |> 
  bind_rows(figure_3c_edges |> 
              mutate(type = "links")) |> 
  ggplot(aes(x = year, 
             y = percentage, 
             color = type)) +
  geom_point() +
  geom_line() +
  theme(legend.position = "right", 
        text = element_text(color = "black", 
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black", 
                                   angle = 45, vjust = 0.5
        ),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.text = element_text(size = "15"),
        legend.title = element_blank()) +
  labs(title = "Nodes and links through time", 
       y = "Percentage") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(2002, 2022, by = 1))

figure_3c

# library(patchwork)
#  figure_3c / figure_3b | figure_3a

3.4 Author Analysis

Table 4

table_4_authors |> 
  dplyr::select(authors_total, papers_total) %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Creating the ASN - graph object

author_network_time <- 
  author_df |> 
  tidygraph::as_tbl_graph(directed = FALSE) |> 
  activate(nodes) |> 
  dplyr::mutate(components = tidygraph::group_components(type = "weak")) |> 
  dplyr::filter(components == 1) |> 
  dplyr::mutate(degree = centrality_degree(),
                community = as.factor(group_louvain()))

author_network <- 
  author_df |> 
  dplyr::select(-PY) |> 
  dplyr::group_by(from, to) |> 
  dplyr::count() |> 
  dplyr::rename(weight = n) |> 
  tidygraph::as_tbl_graph(directed = FALSE) |> 
  activate(nodes) |> 
  # dplyr::mutate(components = tidygraph::group_components(type = "weak")) |> 
  # dplyr::filter(components == 1) |> 
  dplyr::mutate(degree = centrality_degree(),
                community = as.factor(group_louvain()))

Figure 4a clusters

figure_4a <- 
  author_network |> 
  activate(nodes) |> 
  data.frame() |> 
  dplyr::count(community) |>
  slice(1:10) |>  
  ggplot(aes(x = reorder(community, n), y = n)) +
  geom_point(stat = "identity") +
  geom_line(group = 1) + 
  # geom_text(label = as.numeric(community),
  #           nudge_x = 0.5,
  #           nudge_y = 0.5,
  #           check_overlap = T) +
  labs(title = "Communities by size", 
       x = "communities", 
       y = "Authors") +
  theme(text = element_text(color = "black",
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black"),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20)
        ) 

figure_4a

Figure 4b Longitudinal

# Create a dataframe with links 
fig_1c_edges <- 
  author_network_time |>
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(year = PY) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::mutate(percentage = n/max(n)) |> 
  dplyr::select(year, percentage)
# Create a data frame with author and year 
fig_1c_nodes <- # 21 row 
  author_network_time |>
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(author = from, 
                year = PY) |>
  bind_rows(author_network_time |> 
              activate(edges) |> 
              as_tibble() |> 
              dplyr::select(author = to, 
                            year = PY)) |> 
  unique() |> 
  dplyr::group_by(author) |> 
  dplyr::slice(which.min(year)) |>
  dplyr::ungroup() |> 
  dplyr::select(year) |> 
  dplyr::group_by(year) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::ungroup() |> 
  dplyr::mutate(percentage = n / max(n)) |> 
  select(year, percentage)

plotting figure 4b

figure_4b <- 
  fig_1c_nodes |> 
  mutate(type = "nodes") |> 
  bind_rows(fig_1c_edges |> 
              mutate(type = "links")) |> 
  ggplot(aes(x = year, 
             y = percentage, 
             color = type)) +
  geom_point() +
  geom_line() +
  theme(legend.position = "right", 
        text = element_text(color = "black", 
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black", 
                                   angle = 45, vjust = 0.5
        ),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.text = element_text(size = "15"),
        legend.title = element_blank()) +
  labs(title = "Nodes and links through time", 
       y = "Percentage") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(2002, 2022, by = 1))

figure_4b

Filtering only the top 10 nodes with best degree in the first 6 clusters.

asn_TM_connected_1 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 1) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_2 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 2) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community))|> 
  dplyr::slice(1:10)
asn_TM_connected_3 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 3) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_4 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 4) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_5 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 5) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_6 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 6) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)

Saving the nodes we’re gonna show

nodes_community_1 <- 
  asn_TM_connected_1 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_community_2 <- 
  asn_TM_connected_2 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_community_3 <- 
  asn_TM_connected_3 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_community_4 <- 
  asn_TM_connected_4 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_community_5 <- 
  asn_TM_connected_5 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_community_6 <- 
  asn_TM_connected_6 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_selected_10 <- 
  nodes_community_1 |> 
  bind_rows(nodes_community_2, 
            nodes_community_3,
            # nodes_community_4,
            # nodes_community_5,
            # nodes_community_6
  )

Filtering selected nodes

asn_selected_nodes <- 
  author_network |> 
  activate(nodes) |> 
  dplyr::filter(name %in% nodes_selected_10$name)  |> 
  dplyr::mutate(degree = centrality_degree())

# dplyr::mutate(final_plot = tidygraph::group_components(type = "weak")) |> 
# dplyr::filter(final_plot == 1)

Figure 4c Author Network

figure_4c <- 
  asn_selected_nodes |> 
  ggraph(layout = "graphopt") +
  geom_edge_link(width = 1, 
                 colour = "lightgray") +
  geom_node_point(aes(color = community, 
                      size = degree)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_graph()

figure_4c

# library(patchwork)
#  figure_4b / figure_4a | figure_4c

3.4.1 Ego top 10 authors

merging ego_networks

egos  <- 
  tidygraph::as_tbl_graph(x = AU_ego_edges) %>% 
  tidygraph::activate(nodes) %>% 
  dplyr::left_join(AU_ego_nodes %>% 
                     mutate(id = as.character(id)), 
                   by = c("name" = "id")) %>% 
  dplyr::mutate(component = as.character(component))


egos |>
  ggraph(layout = "graphopt") +
  geom_edge_link(aes(width = weight),
                 colour = "lightgray") +
  scale_edge_width(name = "Link strength") +
  geom_node_point(aes(color = component, 
                      size = degree)) +
  geom_node_text(aes(label = Label), repel = TRUE) +
  scale_size(name = "Degree") +
  # scale_color_binned(name = "Communities") +
  theme_graph()

Tree of Science

tos %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Scientometric Analysis

Sebastian Robledo

1/10/2023

Creating the environment

Data getting

Resutls

Scientometric Analysis

3.1 Scientific Production

Figure 1a - Scopus + WoS

Figure 1b - Total production

Figure 1c - Total Citations

3.2 Country analysis

Table 2 - Country production

Figure 2a - Country Collaboration

Figure 2b Clusters

Figure 2c Longitudinal

3.3 Journal Analysis

Table 3 Most productive journals

Figure 3 Journal Citation Network

Figure 3b clusters

Figure 3c Longitudinal

3.4 Author Analysis

Table 4

Figure 4a clusters

Figure 4b Longitudinal

Figure 4c Author Network

3.4.1 Ego top 10 authors

Tree of Science