title: “Scientometric Analysis”
author: “Sebastian Robledo”
date: “13/05/2023”
output:
html_document:
toc: TRUE
toc_float: TRUE
code_folding: hide

Creating the environment

library(tidyverse)
library(tidygraph)
library(igraph)
library(bibliometrix)
library(tosr)
library(here)
library(lubridate)
# library(sjrdata)
library(openxlsx)
library(zoo)
library(RSQLite)
library(journalabbr)
library(ggraph)
library(openxlsx)
library(XML)
library(plyr)
library(readxl)
source("verbs.R")
windowsFonts("Times" = windowsFont("Times"))
windowsFonts("Times New Roman" = windowsFont("Times New Roman"))

giant.component <- function(graph) {
  cl <- igraph::clusters(graph)
  igraph::induced.subgraph(graph, 
                           which(cl$membership == which.max(cl$csize)))
}

Data getting

library(readxl)
library(httr)
tf<-"C:/Users/david/OneDrive/Desktop/Gephi-Inkscape/Torres and Amaya/all_data_darwin_amaya.xlsx" 
wos_scopus <- readxl::read_excel(tf,sheet =  1)
wos <- readxl::read_excel(tf, sheet = 2)
## Warning: Expecting logical in AG1098 / R1098C33: got '494-ENFL'
## Warning: Expecting logical in AG1099 / R1099C33: got '1245-INOR'
## Warning: Expecting logical in AG1101 / R1101C33: got '21-PHYS'
## Warning: Expecting logical in AG1102 / R1102C33: got '155-PETR'
## Warning: Expecting logical in AG1103 / R1103C33: got '465-ENFL'
scopus <- readxl::read_excel(tf, sheet = 3)
reference_df <- readxl::read_excel(tf,sheet =  4)
journal_df <- readxl::read_excel(tf, sheet = 5)
author_df <- readxl::read_excel(tf, sheet = 6)
TC_all <- readxl::read_excel(tf,sheet =  7)
figure_1_data <- readxl::read_excel(tf, sheet = 8)
table_2_country <- readxl::read_excel(tf, sheet = 10)
figure_2_country_wos_scopus <- readxl::read_excel(tf, sheet = 11)
figure_2_country_wos_scopus_1 <-
  readxl::read_excel(tf, sheet = 12) |>
  tidygraph::as_tbl_graph(directed = FALSE) |>
  activate(nodes) |>
  dplyr::mutate(community = tidygraph::group_louvain(),
                degree = tidygraph::centrality_degree(),
                community = as.factor(community))
table_3_journal <- readxl::read_excel(tf, sheet = 13)
table_4_authors <- readxl::read_excel(tf, sheet = 14)
AU_CO_links <- readxl::read_excel(tf, sheet = 15)
tos <- readxl::read_excel(tf, sheet = 16)
edges_tos <- readxl::read_excel(tf, sheet = 17)
nodes_tos <- readxl::read_excel(tf, sheet = 18)
SO_edges <- readxl::read_excel(tf, sheet = 19)
SO_nodes <- readxl::read_excel(tf, sheet = 20)
AU_ego_edges <- readxl::read_excel(tf, sheet = 21)
AU_ego_nodes <- readxl::read_excel(tf, sheet = 22)

Summary of WoS and Scopus

table_1 <- 
  tibble(wos = length(wos$AU), # Create a dataframe with the values.
         scopus = length(scopus$AU), 
         total = length(wos_scopus$AU))
table_1 %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))
wos_scopus %>% 
  tidyr::separate_rows(DT, sep = ";") %>% 
  dplyr::count(DT, sort = TRUE)%>% 
  dplyr::mutate(percentage = n /sum(n),
                percentage = percentage * 100,
                percentage = round(percentage, digits = 2)) %>%
  dplyr::rename(total = n) %>% 
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Resutls

Scientometric Analysis

3.1 Scientific Production

Figure 1a - Scopus + WoS

Combine charts using Python Matplotlib & Reticulate

library(reticulate)
# py_install("matplotlib")
# create a new environment
# conda_create("r-reticulate")
# install Matplotlib
# conda_install("r-reticulate", "matplotlib")
# import Matplotlib (it will be automatically discovered in "r-reticulate")
plt <- import("matplotlib")
np <- import("numpy")
# From Double get integers 
# TC y
TC_all$TC_sum_all <- as.integer(TC_all$TC_sum_all)
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
# ax=axes
fig, ax = plt.subplots()
# First plot Total Publications - time series
ax.plot(tpx, tpy, color='r',marker='o', label='Total Publications')
ax.set_xlabel('Year')
ax.set_ylabel('Total Publications', color='r')
# Customization for bar charts
barw = 0.5
ax.bar(sx, sy, color='g', label = 'Scopus', alpha = 0.5, width=barw)
## <BarContainer object of 23 artists>
ax.bar(wx1, wy, color='orange', label = 'WoS', alpha=0.8, width=barw)
# Y2 - Total citations
## <BarContainer object of 23 artists>
twin_axes = ax.twinx()
twin_axes.plot(tcx, tcy, color = 'purple',marker='o', label='Total Citations')
twin_axes.set_ylabel('Total Citations', color='purple')
# Customize
plt.title('Total Scientific Production vs. Total Citations')
# y2 Total Citation label location
plt.legend(loc='center left')
# True or False to get the grid at the background
ax.grid(False)
# y1 label location
ax.legend(loc='upper left')
# Y2 limit depends of tcy scale in this case 1400 improves label location
plt.ylim(0, 4000) #########  <-----Important--------- """"Change Y2 Coordinate"""""
# plt.annotate() customize numbers for each position
## (0.0, 4000.0)
for i, label in enumerate(tcy):
  plt.annotate(label, (tcx[i], tcy[i] + 0.5), color='purple', size=8)

for i, label in enumerate(tpy):
  ax.annotate(label, (tpx[i], tpy[i] + 0.8), color='red', size=8)

for i, label in enumerate(wy):
  ax.annotate(label, (wx1[i], wy[i] + 0.1), color='brown', size=8)

for i, label in enumerate(sy):
  ax.annotate(label, (sx[i], sy[i] + 0.2),color='green', size=8)

# Rotate x ticks
plt.xticks(tpx)
## ([<matplotlib.axis.XTick object at 0x000001E58A1F14F0>, <matplotlib.axis.XTick object at 0x000001E58A1F14C0>, <matplotlib.axis.XTick object at 0x000001E589800520>, <matplotlib.axis.XTick object at 0x000001E58A26BA00>, <matplotlib.axis.XTick object at 0x000001E58A274B80>, <matplotlib.axis.XTick object at 0x000001E58A27B670>, <matplotlib.axis.XTick object at 0x000001E58A27BEE0>, <matplotlib.axis.XTick object at 0x000001E58A27FC10>, <matplotlib.axis.XTick object at 0x000001E58A2743A0>, <matplotlib.axis.XTick object at 0x000001E58A288670>, <matplotlib.axis.XTick object at 0x000001E58A288EE0>, <matplotlib.axis.XTick object at 0x000001E58A28DC10>, <matplotlib.axis.XTick object at 0x000001E58A293700>, <matplotlib.axis.XTick object at 0x000001E58A288820>, <matplotlib.axis.XTick object at 0x000001E58A293F70>, <matplotlib.axis.XTick object at 0x000001E58A299A60>, <matplotlib.axis.XTick object at 0x000001E58A2A1550>, <matplotlib.axis.XTick object at 0x000001E58A2A1DC0>, <matplotlib.axis.XTick object at 0x000001E58A293970>, <matplotlib.axis.XTick object at 0x000001E58A2A5820>, <matplotlib.axis.XTick object at 0x000001E58A2AD310>, <matplotlib.axis.XTick object at 0x000001E58A2ADDC0>, <matplotlib.axis.XTick object at 0x000001E58A2B48B0>], [Text(2022.0, 0, '2022'), Text(2021.0, 0, '2021'), Text(2020.0, 0, '2020'), Text(2019.0, 0, '2019'), Text(2018.0, 0, '2018'), Text(2017.0, 0, '2017'), Text(2016.0, 0, '2016'), Text(2015.0, 0, '2015'), Text(2014.0, 0, '2014'), Text(2013.0, 0, '2013'), Text(2012.0, 0, '2012'), Text(2011.0, 0, '2011'), Text(2010.0, 0, '2010'), Text(2009.0, 0, '2009'), Text(2008.0, 0, '2008'), Text(2007.0, 0, '2007'), Text(2006.0, 0, '2006'), Text(2005.0, 0, '2005'), Text(2004.0, 0, '2004'), Text(2003.0, 0, '2003'), Text(2002.0, 0, '2002'), Text(2001.0, 0, '2001'), Text(2000.0, 0, '2000')])
fig.autofmt_xdate(rotation = 70)
# The Y1 ticks depends from tpy scale limits
yticks = [0,21,42,63,84,105,126,147,165,] ########## <-----Important---- Choose scale .. just specify which numbers you want
ax.set_yticks(yticks)
# Export Figure as SVG
plt.savefig("Torres&Amaya.svg")

plt.show()

3.2 Country analysis

Table 2 - Country production

table_2_country |>
  DT::datatable(class = "cell-border stripe", 
                rownames = F, 
                filter = "top", 
                editable = FALSE, 
                extensions = "Buttons", 
                options = list(dom = "Bfrtip",
                               buttons = c("copy",
                                           "csv",
                                           "excel", 
                                           "pdf", 
                                           "print")))

Figure 2a - Country Collaboration

figure_2a <- 
  figure_2_country_wos_scopus_1 |>
  activate(edges) |> 
  # tidygraph::rename(weight = n) |> 
  ggraph(layout = "graphopt") +
  geom_edge_link(aes(width = Weight),
                 colour = "lightgray") +
  scale_edge_width(name = "Link strength") +
  geom_node_point(aes(color = community, 
                      size = degree)) +
  geom_node_text(aes(label = name), repel = TRUE) +
  scale_size(name = "Degree") +
  # scale_color_binned(name = "Communities") +
  theme_graph()

figure_2a

Figure 2b Clusters

figure_2b <- 
  figure_2_country_wos_scopus_1 |> 
  activate(nodes) |> 
  data.frame() |> 
  group_by(community) |> 
  dplyr::count(community, sort = TRUE) |> 
  slice(1:10) |>  
  ggplot(aes(x = reorder(community, n), y = n)) +
  geom_point(stat = "identity") +
  geom_line(group = 1) + 
  # geom_text(label = as.numeric(community),
  #           nudge_x = 0.5,
  #           nudge_y = 0.5,
  #           check_overlap = T) +
  labs(title = "Communities by size", 
       x = "communities", 
       y = "Countries") +
  theme(text = element_text(color = "black",
                            face = "bold",
                            family = "Times New Roman"),
        plot.title = element_text(size = 25),
        panel.background = element_rect(fill = "white"), 
        axis.text.y = element_text(size = 15, 
                                   colour = "black"),
        axis.text.x = element_text(size = 15,
                                   colour = "black"),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20)
        ) 

figure_2b