Creating the environment

library(tidyverse)
library(tidygraph)
library(igraph)
library(bibliometrix)
library(tosr)
library(here)
library(lubridate)
# library(sjrdata)
library(openxlsx)
library(zoo)
library(RSQLite)
library(journalabbr)
library(ggraph)
library(openxlsx)
library(XML)
library(plyr)
library(readxl)
source("verbs.R")
windowsFonts("Times" = windowsFont("Times"))
windowsFonts("Times New Roman" = windowsFont("Times New Roman"))

giant.component <- function(graph) {
  cl <- igraph::clusters(graph)
  igraph::induced.subgraph(graph, 
                           which(cl$membership == which.max(cl$csize)))
}

Data getting

library(readxl)
library(httr)
tf<-"C:/coreR/Paula_oct/Art_08/all_data_ariza_cultural_heritage_web_applications.xlsx"
wos_scopus <- readxl::read_excel(tf,sheet =  1)
wos <- readxl::read_excel(tf, sheet = 2)
scopus <- readxl::read_excel(tf, sheet = 3)
## Warning: Expecting logical in H1367 / R1367C8: got 'VALENCIA'
## Warning: Expecting logical in H1381 / R1381C8: got 'KUNMING'
## Warning: Expecting logical in H1409 / R1409C8: got 'CHANIA, CRETE'
## Warning: Expecting logical in H1410 / R1410C8: got 'MARRAKESH'
## Warning: Expecting logical in H1411 / R1411C8: got 'BARCELONA'
## Warning: Expecting logical in H1412 / R1412C8: got 'BUDAPEST'
## Warning: Expecting logical in H1413 / R1413C8: got 'ROME'
## Warning: Expecting logical in H1414 / R1414C8: got 'TRABZON'
## Warning: Expecting logical in H1416 / R1416C8: got 'COMO'
## Warning: Expecting logical in H1417 / R1417C8: got 'CHERBOURG'
## Warning: Expecting logical in H1418 / R1418C8: got 'HERAKLION, CRETE'
## Warning: Expecting logical in H1420 / R1420C8: got 'HERAKLION'
## Warning: Expecting logical in H1421 / R1421C8: got 'HERAKLION'
## Warning: Expecting logical in H1424 / R1424C8: got 'IOANNINA'
## Warning: Expecting logical in H1425 / R1425C8: got 'MINNEAPOLIS, MN'
## Warning: Expecting logical in H1427 / R1427C8: got 'FLORENCE'
## Warning: Expecting logical in H1428 / R1428C8: got 'SALVADOR'
## Warning: Expecting logical in H1429 / R1429C8: got 'PIRAEUS-ATHENS'
## Warning: Expecting logical in H1430 / R1430C8: got 'PIRAEUS-ATHENS'
## Warning: Expecting logical in H1431 / R1431C8: got 'VIENNA'
## Warning: Expecting logical in H1432 / R1432C8: got 'MARSEILLE'
## Warning: Expecting logical in H1433 / R1433C8: got 'MARSEILLE'
## Warning: Expecting logical in H1434 / R1434C8: got 'MARSEILLE'
## Warning: Expecting logical in H1435 / R1435C8: got 'MARSEILLE'
## Warning: Expecting logical in H1436 / R1436C8: got 'MARSEILLE'
## Warning: Expecting logical in H1437 / R1437C8: got 'MARSEILLE'
## Warning: Expecting logical in H1438 / R1438C8: got 'MARSEILLE'
## Warning: Expecting logical in H1441 / R1441C8: got 'SAN JOSE, CA'
## Warning: Expecting logical in H1442 / R1442C8: got 'LISBON'
## Warning: Expecting logical in H1443 / R1443C8: got 'NAPLES'
## Warning: Expecting logical in H1444 / R1444C8: got 'NAPLES'
## Warning: Expecting logical in H1447 / R1447C8: got 'MUNICH'
## Warning: Expecting logical in H1448 / R1448C8: got 'LISBON'
## Warning: Expecting logical in H1451 / R1451C8: got 'BARCELONA'
## Warning: Expecting logical in H1452 / R1452C8: got 'LAS VEGAS, NV'
## Warning: Expecting logical in H1453 / R1453C8: got 'CAMBRIDGE'
## Warning: Expecting logical in H1457 / R1457C8: got 'MOUNTAIN VIEW, CA'
## Warning: Expecting logical in H1458 / R1458C8: got 'BURLINGAME, CA'
## Warning: Expecting logical in H1467 / R1467C8: got 'HONG KONG'
## Warning: Expecting logical in H1469 / R1469C8: got 'NIAGARA FALLS, ON'
## Warning: Expecting logical in H1470 / R1470C8: got 'PUERTO DE LA CRUZ,
## TENERIFE'
## Warning: Expecting logical in H1471 / R1471C8: got 'PUERTO DE LA CRUZ,
## TENERIFE'
## Warning: Expecting logical in H1472 / R1472C8: got 'PUERTO DE LA CRUZ,
## TENERIFE'
## Warning: Expecting logical in H1473 / R1473C8: got 'PARIS'
## Warning: Expecting logical in H1475 / R1475C8: got 'LAS VEGAS, NV'
## Warning: Expecting logical in H1478 / R1478C8: got 'NARA'
## Warning: Expecting logical in H1479 / R1479C8: got 'MAUI, HI'
## Warning: Expecting logical in H1480 / R1480C8: got 'NARA'
## Warning: Expecting logical in H1481 / R1481C8: got 'ATLANTA, GA'
## Warning: Expecting logical in H1482 / R1482C8: got 'PITTSBURGH, PA'
## Warning: Expecting logical in H1483 / R1483C8: got 'IRVINE, CA'
## Warning: Expecting logical in H1484 / R1484C8: got 'ATLANTA, GA'
## Warning: Expecting logical in H1485 / R1485C8: got 'ATLANTA, GA'
## Warning: Expecting logical in H1486 / R1486C8: got 'ATLANTA, GA'
## Warning: Expecting logical in H1487 / R1487C8: got 'MILAN'
## Warning: Expecting logical in H1488 / R1488C8: got 'MILAN'
## Warning: Expecting logical in H1489 / R1489C8: got 'MILAN'
## Warning: Expecting logical in H1490 / R1490C8: got 'MILAN'
## Warning: Expecting logical in H1491 / R1491C8: got 'MILAN'
## Warning: Expecting logical in H1492 / R1492C8: got 'ROCHESTER, NY'
## Warning: Expecting logical in H1493 / R1493C8: got 'LIVERPOOL'
## Warning: Expecting logical in H1494 / R1494C8: got 'LIMASSOL'
## Warning: Expecting logical in H1495 / R1495C8: got 'LIMASSOL'
## Warning: Expecting logical in H1496 / R1496C8: got 'LIMASSOL'
## Warning: Expecting logical in H1497 / R1497C8: got 'MADRID'
## Warning: Expecting logical in H1498 / R1498C8: got 'MELBOURNE, VIC'
## Warning: Expecting logical in H1499 / R1499C8: got 'PALERMO'
## Warning: Expecting logical in H1500 / R1500C8: got 'BERLIN'
## Warning: Expecting logical in H1501 / R1501C8: got 'LAVAL'
## Warning: Expecting logical in H1502 / R1502C8: got 'SALVADOR DE BAHIA'
## Warning: Expecting logical in H1503 / R1503C8: got 'WASHINGTON, DC'
## Warning: Expecting logical in H1504 / R1504C8: got 'LONDON'
## Warning: Expecting logical in H1505 / R1505C8: got 'LONDON'
## Warning: Expecting logical in H1507 / R1507C8: got 'LYON'
## Warning: Expecting logical in H1508 / R1508C8: got 'XIAMEN'
## Warning: Expecting logical in H1509 / R1509C8: got 'MODENA'
## Warning: Expecting logical in H1511 / R1511C8: got 'VIENNA'
## Warning: Expecting logical in H1512 / R1512C8: got 'ST. LOUIS, MO'
## Warning: Expecting logical in H1513 / R1513C8: got 'PITTSBURGH, PA'
## Warning: Expecting logical in H1516 / R1516C8: got 'BASEL'
## Warning: Expecting logical in H1517 / R1517C8: got 'BASEL'
## Warning: Expecting logical in H1518 / R1518C8: got 'NOTTINGHAM'
## Warning: Expecting logical in H1519 / R1519C8: got 'HOBART, TAS'
## Warning: Expecting logical in H1520 / R1520C8: got 'RENDE, CS'
## Warning: Expecting logical in H1521 / R1521C8: got 'HONG KONG'
## Warning: Expecting logical in H1522 / R1522C8: got 'NOTTINGHAM'
## Warning: Expecting logical in H1525 / R1525C8: got 'SARAJEVO'
## Warning: Expecting logical in H1526 / R1526C8: got 'QUEENSTOWN'
## Warning: Expecting logical in H1527 / R1527C8: got 'QUEENSTOWN'
## Warning: Expecting logical in H1528 / R1528C8: got 'LISBON'
## Warning: Expecting logical in H1529 / R1529C8: got 'CHAVES'
## Warning: Expecting logical in H1531 / R1531C8: got 'ATHENS'
## Warning: Expecting logical in H1532 / R1532C8: got 'ABU DHABI'
## Warning: Expecting logical in H1535 / R1535C8: got 'TAIPEI'
## Warning: Expecting logical in H1537 / R1537C8: got 'IZMIR'
## Warning: Expecting logical in H1546 / R1546C8: got 'HERSONISSOS, HERAKLION,
## CRETE'
## Warning: Expecting logical in H1547 / R1547C8: got 'BRISBANE, QLD'
## Warning: Expecting logical in H1548 / R1548C8: got 'SEOUL'
## Warning: Expecting logical in H1549 / R1549C8: got 'SEOUL'
## Warning: Expecting logical in H1550 / R1550C8: got 'SEOUL'
## Warning: Expecting logical in H1551 / R1551C8: got 'SEOUL'
## Warning: Expecting logical in H1552 / R1552C8: got 'SEOUL'
## Warning: Expecting logical in H1554 / R1554C8: got 'LONDON'
## Warning: Expecting logical in H1556 / R1556C8: got 'SIERRE'
## Warning: Expecting logical in H1557 / R1557C8: got 'HERAKLION, CRETE'
## Warning: Expecting logical in H1559 / R1559C8: got 'BEIJING'
## Warning: Expecting logical in H1560 / R1560C8: got 'CANNES, LA BOCCA'
## Warning: Expecting logical in H1562 / R1562C8: got 'DOULIOU, YUNLIN'
## Warning: Expecting logical in H1563 / R1563C8: got 'CAIRO'
## Warning: Expecting logical in H1564 / R1564C8: got 'YOKOHAMA'
## Warning: Expecting logical in H1565 / R1565C8: got 'PORTO'
## Warning: Expecting logical in H1566 / R1566C8: got 'BERKELEY, CA'
## Warning: Expecting logical in H1568 / R1568C8: got 'VIENNA'
## Warning: Expecting logical in H1569 / R1569C8: got 'VIENNA'
## Warning: Expecting logical in H1570 / R1570C8: got 'HERAKLION, CRETE'
## Warning: Expecting logical in H1576 / R1576C8: got 'FUNCHAL, MADEIRA'
## Warning: Expecting logical in H1577 / R1577C8: got 'FUNCHAL, MADEIRA'
## Warning: Expecting logical in H1579 / R1579C8: got 'COPENHAGEN'
## Warning: Expecting logical in H1580 / R1580C8: got 'AMSTERDAM'
## Warning: Expecting logical in H1581 / R1581C8: got 'FUNCHAL, MADEIRA'
## Warning: Expecting logical in H1583 / R1583C8: got 'STANFORD, CA'
## Warning: Expecting logical in H1584 / R1584C8: got 'CHERBOURG-OCTEVILLE'
## Warning: Expecting logical in H1585 / R1585C8: got 'NANJING'
## Warning: Expecting logical in H1588 / R1588C8: got 'PORTO'
## Warning: Expecting logical in H1589 / R1589C8: got 'NARA'
## Warning: Expecting logical in H1590 / R1590C8: got 'VANCOUVER, BC'
## Warning: Expecting logical in H1596 / R1596C8: got 'DELFT'
## Warning: Expecting logical in H1597 / R1597C8: got 'VILAMOURA'
## Warning: Expecting logical in H1599 / R1599C8: got 'MONTREAL, QC'
## Warning: Expecting logical in H1600 / R1600C8: got 'PLZEN'
## Warning: Expecting logical in H1601 / R1601C8: got 'LIMASSOL'
## Warning: Expecting logical in H1602 / R1602C8: got 'KUALA LUMPUR'
## Warning: Expecting logical in H1605 / R1605C8: got 'TRENTO'
## Warning: Expecting logical in H1606 / R1606C8: got 'PARIS'
## Warning: Expecting logical in H1607 / R1607C8: got 'PARIS'
## Warning: Expecting logical in H1608 / R1608C8: got 'AMSTERDAM'
## Warning: Expecting logical in H1609 / R1609C8: got 'TRENTO'
## Warning: Expecting logical in H1612 / R1612C8: got 'SAN JOSE, CA'
## Warning: Expecting logical in H1613 / R1613C8: got 'LONDON'
## Warning: Expecting logical in H1614 / R1614C8: got 'LONDON'
## Warning: Expecting logical in H1620 / R1620C8: got 'MONTEREY, CA'
## Warning: Expecting logical in H1630 / R1630C8: got 'GLYFADA, NR ATHENS'
## Warning: Expecting logical in H1631 / R1631C8: got 'GLYFADA, NR ATHENS'
## Warning: Expecting logical in H1632 / R1632C8: got 'GLYFADA, NR ATHENS'
## Warning: Expecting logical in H1633 / R1633C8: got 'GLYFADA, NR ATHENS'
## Warning: Expecting logical in H1634 / R1634C8: got 'SAN JOSE, CA'
reference_df <- readxl::read_excel(tf,sheet =  4)
journal_df <- readxl::read_excel(tf, sheet = 5)
author_df <- readxl::read_excel(tf, sheet = 6)
TC_all <- readxl::read_excel(tf,sheet =  7)
figure_1_data <- readxl::read_excel(tf, sheet = 8)
table_2_country <- readxl::read_excel(tf, sheet = 10)
figure_2_country_wos_scopus <- readxl::read_excel(tf, sheet = 11)
figure_2_country_wos_scopus_1 <-
  readxl::read_excel(tf, sheet = 12) |>
  tidygraph::as_tbl_graph(directed = FALSE) |>
  activate(nodes) |>
  dplyr::mutate(community = tidygraph::group_louvain(),
                degree = tidygraph::centrality_degree(),
                community = as.factor(community))
table_3_journal <- readxl::read_excel(tf, sheet = 13)
table_4_authors <- readxl::read_excel(tf, sheet = 14)
AU_CO_links <- readxl::read_excel(tf, sheet = 15)
tos <- readxl::read_excel(tf, sheet = 16)
edges_tos <- readxl::read_excel(tf, sheet = 17)
nodes_tos <- readxl::read_excel(tf, sheet = 18)
SO_edges <- readxl::read_excel(tf, sheet = 19)
SO_nodes <- readxl::read_excel(tf, sheet = 20)
AU_ego_edges <- readxl::read_excel(tf, sheet = 21)
AU_ego_nodes <- readxl::read_excel(tf, sheet = 22)

Summary of WoS and Scopus

table_1 <- 
  tibble(wos = length(wos$AU), # Create a dataframe with the values.
         scopus = length(scopus$AU), 
         total = length(wos_scopus$AU))
table_1 %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))
wos_scopus %>% 
  tidyr::separate_rows(DT, sep = ";") %>% 
  dplyr::count(DT, sort = TRUE)%>% 
  dplyr::mutate(percentage = n /sum(n),
                percentage = percentage * 100,
                percentage = round(percentage, digits = 2)) %>%
  dplyr::rename(total = n) %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Resutls

Scientometric Analysis

3.1 Scientific Production

Figure 1a - Scopus + WoS

Combine charts using Python Matplotlib & Reticulate

library(reticulate)
# create a new environment
# conda_create("r-reticulate")
# install Matplotlib
# conda_install("r-reticulate", "matplotlib")
# import Matplotlib (it will be automatically discovered in "r-reticulate")
plt <- import("matplotlib")
np <- import("numpy")
# From Double get integers 
# TC y
TC_all$TC_sum_all <- as.integer(TC_all$TC_sum_all)
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
# ax=axes
fig, ax = plt.subplots()
# First plot Total Publications - time series
ax.plot(tpx, tpy, color='r',marker='o', label='Total Publications')
ax.set_xlabel('Year')
ax.set_ylabel('Total Publications', color='r')
# Customization for bar charts
barw = 0.5
ax.bar(sx, sy, color='g', label = 'Scopus', alpha = 0.5, width=barw)
## <BarContainer object of 23 artists>
ax.bar(wx1, wy, color='orange', label = 'WoS', alpha=0.8, width=barw)
# Y2 - Total citations
## <BarContainer object of 23 artists>
twin_axes = ax.twinx()
twin_axes.plot(tcx, tcy, color = 'purple',marker='o', label='Total Citations')
twin_axes.set_ylabel('Total Citations', color='purple')
# Customize
plt.title('Total Scientific Production vs. Total Citations')
# y2 Total Citation label location
plt.legend(loc='center left')
# True or False to get the grid at the background
ax.grid(False)
# y1 label location
ax.legend(loc='upper left')
# Y2 limit depends of tcy scale in this case 1400 improves label location
plt.ylim(0,4000) #########  <-----Important--------- """"Change Y2 Coordinate"""""
# plt.annotate() customize numbers for each position
## (0.0, 4000.0)
for i, label in enumerate(tcy):
  plt.annotate(label, (tcx[i], tcy[i] + 0.5), color='purple', size=8)

for i, label in enumerate(tpy):
  ax.annotate(label, (tpx[i], tpy[i] + 0.8), color='red', size=8)

for i, label in enumerate(wy):
  ax.annotate(label, (wx1[i], wy[i] + 0.1), color='brown', size=8)

for i, label in enumerate(sy):
  ax.annotate(label, (sx[i], sy[i] + 0.2),color='green', size=8)

# Rotate x ticks
plt.xticks(tpx)
## ([<matplotlib.axis.XTick object at 0x0000023DAE71B160>, <matplotlib.axis.XTick object at 0x0000023DAE71B100>, <matplotlib.axis.XTick object at 0x0000023DAE4DE880>, <matplotlib.axis.XTick object at 0x0000023DAE77E550>, <matplotlib.axis.XTick object at 0x0000023DAE77EA60>, <matplotlib.axis.XTick object at 0x0000023DAE77EF70>, <matplotlib.axis.XTick object at 0x0000023DAE7854C0>, <matplotlib.axis.XTick object at 0x0000023DAE7859D0>, <matplotlib.axis.XTick object at 0x0000023DAE785EE0>, <matplotlib.axis.XTick object at 0x0000023DAE785730>, <matplotlib.axis.XTick object at 0x0000023DAE77E7C0>, <matplotlib.axis.XTick object at 0x0000023DAE78B430>, <matplotlib.axis.XTick object at 0x0000023DAE78B940>, <matplotlib.axis.XTick object at 0x0000023DAE78BE50>, <matplotlib.axis.XTick object at 0x0000023DAE7903A0>, <matplotlib.axis.XTick object at 0x0000023DAE7908B0>, <matplotlib.axis.XTick object at 0x0000023DAE790DC0>, <matplotlib.axis.XTick object at 0x0000023DAE798310>, <matplotlib.axis.XTick object at 0x0000023DAE798820>, <matplotlib.axis.XTick object at 0x0000023DAE790580>, <matplotlib.axis.XTick object at 0x0000023DAE78B610>, <matplotlib.axis.XTick object at 0x0000023DAE798460>, <matplotlib.axis.XTick object at 0x0000023DAE798D30>], <a list of 23 Text major ticklabel objects>)
fig.autofmt_xdate(rotation = 70)
# The Y1 ticks depends from tpy scale limits
yticks = [0,20,40,60,80,100,120,140,160,180,200,220,240,260] ########## <-----Important---- Choose scale .. just specify which numbers you want
ax.set_yticks(yticks)
# Export Figure as SVG
plt.savefig("paola_13.svg")

plt.show()

3.2 Country analysis

Table 2 - Country production

table_2_country |>
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Figure 2a - Country Collaboration

figure_2a <- 
  figure_2_country_wos_scopus_1 |>
  activate(edges) |> 
  # tidygraph::rename(weight = n) |> 
  ggraph(layout = "graphopt") +
  geom_edge_link(aes(width = Weight),
                 colour = "lightgray") +
  scale_edge_width(name = "Link strength") +
  geom_node_point(aes(color = community, 
                      size = degree)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  scale_size(name = "Degree") +
  # scale_color_binned(name = "Communities") +
  theme_graph()

figure_2a

Figure 2b Clusters

figure_2b <- 
  figure_2_country_wos_scopus_1 |> 
  activate(nodes) |> 
  data.frame() |> 
  group_by(community) |> 
  dplyr::count(community, sort = TRUE) |> 
  slice(1:10) |>  
  ggplot(aes(x = reorder(community, n), y = n)) +
  geom_point(stat = "identity") +
  geom_line(group = 1) + 
  # geom_text(label = as.numeric(community),
  #           nudge_x = 0.5,
  #           nudge_y = 0.5,
  #           check_overlap = T) +
  labs(title = "Communities by size", 
       x = "communities", 
       y = "Countries") +
  theme(text = element_text(color = "black",
                            face = "bold",
                            family = "Times New Roman"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black"),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20)
        ) 

figure_2b

Figure 2c Longitudinal

# Create a dataframe with links 
figure_2c_edges <- 
  figure_2_country_wos_scopus |>
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(year = PY) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::mutate(percentage = n/max(n)) |> 
  dplyr::select(year, percentage)
# Create a data frame with author and year 
figure_2c_nodes <- # 21 row 
  figure_2_country_wos_scopus |>
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(CO = from, 
                year = PY) |>
  bind_rows(figure_2_country_wos_scopus |>  
              tidygraph::as_tbl_graph() |> 
              tidygraph::activate(edges) |> 
              tidygraph::as_tibble() |> 
              dplyr::select(CO = to, 
                            year = PY)) |> 
  unique() |> 
  dplyr::group_by(CO) |> 
  dplyr::slice(which.min(year)) |>
  dplyr::ungroup() |> 
  dplyr::select(year) |> 
  dplyr::group_by(year) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::ungroup() |> 
  dplyr::mutate(percentage = n / max(n)) |> 
  select(year, percentage)
figure_2c <- 
  figure_2c_nodes |> 
  mutate(type = "nodes",
         year = as.numeric(year)) |> 
  bind_rows(figure_2c_edges |> 
              mutate(type = "links",
                     year = as.numeric(year))) |> 
  ggplot(aes(x = year, 
             y = percentage, 
             color = type)) +
  geom_point() +
  geom_line() +
  theme(legend.position = "right", 
        text = element_text(color = "black", 
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black", 
                                   angle = 45, vjust = 0.5
        ),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.text = element_text(size = "15"),
        legend.title = element_blank()) +
  labs(title = "Nodes and links through time", 
       y = "Percentage") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(2002, 2022, by = 1))

figure_2c

3.3 Journal Analysis

Table 3 Most productive journals

table_3_journal |> 
  arrange(desc(total)) |> 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Figure 3 Journal Citation Network

Creating the graph object

journal_citation_graph_weighted_tbl_small <-
  journal_df |>
  dplyr::select(JI_main, JI_ref) |>
  dplyr::group_by(JI_main, JI_ref) |>
  dplyr::count() |>
  dplyr::rename(weight = n) |>
  as_tbl_graph(directed = FALSE) |>
  # convert(to_simple) |>
  activate(nodes) |>
  dplyr::mutate(components = tidygraph::group_components(type = "weak"))  |>
  dplyr::filter(components == 1) |>
  activate(nodes) |>
  dplyr::mutate(degree = centrality_degree(),
                community = tidygraph::group_louvain()) |>
  dplyr::select(-components) |>
  dplyr::filter(degree >= 1)

Selecting nodes to show

figure_3a_1 <- 
  SO_edges %>% 
  tidygraph::as_tbl_graph() %>% 
  tidygraph::activate(nodes) %>% 
  # tidygraph::mutate(id = name) %>% 
  tidygraph::left_join(SO_nodes, by = c("name" = "id")) %>% 
  tidygraph::select(-name) %>% 
  tidygraph::rename(name = Label) %>% 
  ggraph(layout = "graphopt") +
  geom_edge_link(aes(width = weight),
                 colour = "lightgray") +
  scale_edge_width(name = "Link strength") +
  geom_node_point(aes(color = community, 
                      size = degree)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  scale_size(name = "Degree") +
  # scale_color_binned(name = "Communities") +
  theme_graph()

figure_3a_1

Figure 3b clusters

figure_3b <- 
  journal_citation_graph_weighted_tbl_small |> 
  activate(nodes) |> 
  data.frame() |> 
  dplyr::select(community) |> 
  dplyr::count(community, sort = TRUE) |> 
  dplyr::slice(1:10) |> 
  ggplot(aes(x = reorder(community, n), y = n)) +
  geom_point(stat = "identity") +
  geom_line(group = 1) + 
  # geom_text(label = as.numeric(community),
  #           nudge_x = 0.5,
  #           nudge_y = 0.5,
  #           check_overlap = T) +
  labs(title = "Communities by size", 
       x = "communities", 
       y = "Journals") +
  theme(text = element_text(color = "black",
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black"),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20)
        ) 
figure_3b

Figure 3c Longitudinal

# Create a dataframe with links 
figure_3c_edges <- 
  journal_df |>
  select(from = JI_main, to = JI_ref, PY = PY_ref) %>% 
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(year = PY) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::mutate(percentage = n/max(n)) |> 
  dplyr::select(year, percentage)
# Create a data frame with author and year 
figure_3c_nodes <- # 21 row 
  journal_df |>
  select(from = JI_main, to = JI_ref, PY = PY_ref) %>% 
  dplyr::filter(from != to) |> 
  tidygraph::as_tbl_graph() |> 
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(CO = from, 
                year = PY) |>
  bind_rows(journal_df |>
              select(from = JI_main, 
                     to = JI_ref, 
                     PY = PY_ref) %>%  
              tidygraph::as_tbl_graph() |> 
              tidygraph::activate(edges) |> 
              tidygraph::as_tibble() |> 
              dplyr::select(CO = to, 
                            year = PY)) |> 
  unique() |> 
  dplyr::group_by(CO) |> 
  dplyr::slice(which.min(year)) |>
  dplyr::ungroup() |> 
  dplyr::select(year) |> 
  dplyr::group_by(year) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::ungroup() |> 
  dplyr::mutate(percentage = n / max(n)) |> 
  select(year, percentage)

plotting figure 3b

figure_3c <- 
  figure_3c_nodes |> 
  mutate(type = "nodes") |> 
  bind_rows(figure_3c_edges |> 
              mutate(type = "links")) |> 
  ggplot(aes(x = year, 
             y = percentage, 
             color = type)) +
  geom_point() +
  geom_line() +
  theme(legend.position = "right", 
        text = element_text(color = "black", 
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black", 
                                   angle = 45, vjust = 0.5
        ),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.text = element_text(size = "15"),
        legend.title = element_blank()) +
  labs(title = "Nodes and links through time", 
       y = "Percentage") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(2002, 2022, by = 1))

figure_3c

3.4 Author Analysis

Table 4

table_4_authors |> 
  dplyr::select(authors_total, papers_total) %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Creating the ASN - graph object

author_network_time <- 
  author_df |> 
  tidygraph::as_tbl_graph(directed = FALSE) |> 
  activate(nodes) |> 
  dplyr::mutate(components = tidygraph::group_components(type = "weak")) |> 
  dplyr::filter(components == 1) |> 
  dplyr::mutate(degree = centrality_degree(),
                community = as.factor(group_louvain()))

author_network <- 
  author_df |> 
  dplyr::select(-PY) |> 
  dplyr::group_by(from, to) |> 
  dplyr::count() |> 
  dplyr::rename(weight = n) |> 
  tidygraph::as_tbl_graph(directed = FALSE) |> 
  activate(nodes) |> 
  # dplyr::mutate(components = tidygraph::group_components(type = "weak")) |> 
  # dplyr::filter(components == 1) |> 
  dplyr::mutate(degree = centrality_degree(),
                community = as.factor(group_louvain()))

Figure 4a clusters

figure_4a <- 
  author_network |> 
  activate(nodes) |> 
  data.frame() |> 
  dplyr::count(community) |>
  slice(1:10) |>  
  ggplot(aes(x = reorder(community, n), y = n)) +
  geom_point(stat = "identity") +
  geom_line(group = 1) + 
  # geom_text(label = as.numeric(community),
  #           nudge_x = 0.5,
  #           nudge_y = 0.5,
  #           check_overlap = T) +
  labs(title = "Communities by size", 
       x = "communities", 
       y = "Authors") +
  theme(text = element_text(color = "black",
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black"),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20)
        ) 

figure_4a

Figure 4b Longitudinal

# Create a dataframe with links 
fig_1c_edges <- 
  author_network_time |>
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(year = PY) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::mutate(percentage = n/max(n)) |> 
  dplyr::select(year, percentage)
# Create a data frame with author and year 
fig_1c_nodes <- # 21 row 
  author_network_time |>
  activate(edges) |> 
  as_tibble() |> 
  dplyr::select(author = from, 
                year = PY) |>
  bind_rows(author_network_time |> 
              activate(edges) |> 
              as_tibble() |> 
              dplyr::select(author = to, 
                            year = PY)) |> 
  unique() |> 
  dplyr::group_by(author) |> 
  dplyr::slice(which.min(year)) |>
  dplyr::ungroup() |> 
  dplyr::select(year) |> 
  dplyr::group_by(year) |> 
  dplyr::count(year) |> 
  dplyr::filter(year >= 2002,
                year <= 2022) |> 
  dplyr::ungroup() |> 
  dplyr::mutate(percentage = n / max(n)) |> 
  select(year, percentage)

plotting figure 4b

figure_4b <- 
  fig_1c_nodes |> 
  mutate(type = "nodes") |> 
  bind_rows(fig_1c_edges |> 
              mutate(type = "links")) |> 
  ggplot(aes(x = year, 
             y = percentage, 
             color = type)) +
  geom_point() +
  geom_line() +
  theme(legend.position = "right", 
        text = element_text(color = "black", 
                            face = "bold",
                            family = "Times"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black", 
                                   angle = 45, vjust = 0.5
        ),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.text = element_text(size = "15"),
        legend.title = element_blank()) +
  labs(title = "Nodes and links through time", 
       y = "Percentage") +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(breaks = seq(2002, 2022, by = 1))

figure_4b

Filtering only the top 10 nodes with best degree in the first 6 clusters.

asn_TM_connected_1 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 1) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_2 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 2) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community))|> 
  dplyr::slice(1:10)
asn_TM_connected_3 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 3) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_4 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 4) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_5 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 5) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)
asn_TM_connected_6 <- 
  author_network |> 
  activate(nodes) |>
  dplyr::mutate(community = as.numeric(community)) |> 
  # filter(community >= 6) |> 
  dplyr::filter(community == 6) |> 
  # group_by(community) |> 
  dplyr::mutate(degree_community = centrality_degree()) |> 
  dplyr::arrange(desc(degree_community)) |> 
  dplyr::slice(1:10)

Saving the nodes we’re gonna show

nodes_community_1 <- 
  asn_TM_connected_1 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_community_2 <- 
  asn_TM_connected_2 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
nodes_community_3 <- 
  asn_TM_connected_3 |> 
  activate(nodes) |> 
  as_tibble() |> 
  dplyr::select(name)
# nodes_community_4 <- 
#   asn_TM_connected_4 |> 
#   activate(nodes) |> 
#   as_tibble() |> 
#   dplyr::select(name)
# nodes_community_5 <- 
#   asn_TM_connected_5 |> 
#   activate(nodes) |> 
#   as_tibble() |> 
#   dplyr::select(name)
# nodes_community_6 <- 
#   asn_TM_connected_6 |> 
#   activate(nodes) |> 
#   as_tibble() |> 
#   dplyr::select(name)
nodes_selected_10 <- 
  nodes_community_1 |> 
  bind_rows(nodes_community_2, 
            nodes_community_3,
            # nodes_community_4,
            # nodes_community_5,
            # nodes_community_6
  )

Filtering selected nodes

asn_selected_nodes <- 
  author_network |> 
  activate(nodes) |> 
  dplyr::filter(name %in% nodes_selected_10$name)  |> 
  dplyr::mutate(degree = centrality_degree())

# dplyr::mutate(final_plot = tidygraph::group_components(type = "weak")) |> 
# dplyr::filter(final_plot == 1)

Figure 4c Author Network

figure_4c <- 
  asn_selected_nodes |> 
  ggraph(layout = "graphopt") +
  geom_edge_link(width = 1, 
                 colour = "lightgray") +
  geom_node_point(aes(color = community, 
                      size = degree)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_graph()

figure_4c

3.4.1 Ego top 10 authors

merging ego_networks

egos  <- 
  tidygraph::as_tbl_graph(x = AU_ego_edges) %>% 
  tidygraph::activate(nodes) %>% 
  dplyr::left_join(AU_ego_nodes %>% 
                     mutate(id = as.character(id)), 
                   by = c("name" = "id")) %>% 
  dplyr::mutate(component = as.character(component))


egos |>
  ggraph(layout = "graphopt") +
  geom_edge_link(aes(width = weight),
                 colour = "lightgray") +
  scale_edge_width(name = "Link strength") +
  geom_node_point(aes(color = component, 
                      size = degree)) +
  geom_node_text(aes(label = Label), repel = TRUE) +
  scale_size(name = "Degree") +
  # scale_color_binned(name = "Communities") +
  theme_graph()

Tree of Science

tos %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))