library(conflicted)
## Warning: package 'conflicted' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
library(tidygraph)
## Warning: package 'tidygraph' was built under R version 4.4.3
library(igraph)
## Warning: package 'igraph' was built under R version 4.4.3
library(ggplot2)
library(bibliometrix)
## Warning: package 'bibliometrix' was built under R version 4.4.3
## Please note that our software is open source and available for use, distributed under the MIT license.
## When it is used in a publication, we ask that authors properly cite the following reference:
##
## Aria, M. & Cuccurullo, C. (2017) bibliometrix: An R-tool for comprehensive science mapping analysis,
## Journal of Informetrics, 11(4), pp 959-975, Elsevier.
##
## Failure to properly cite the software is considered a violation of the license.
##
## For information and bug reports:
## - Take a look at https://www.bibliometrix.org
## - Send an email to info@bibliometrix.org
## - Write a post on https://github.com/massimoaria/bibliometrix/issues
##
## Help us to keep Bibliometrix and Biblioshiny free to download and use by contributing with a small donation to support our research team (https://bibliometrix.org/donate.html)
##
##
## To start with the Biblioshiny app, please digit:
## biblioshiny()
library(tosr)
## Warning: package 'tosr' was built under R version 4.4.3
library(here)
## Warning: package 'here' was built under R version 4.4.3
## here() starts at C:/Users/user/Documents/AR_SMG
library(lubridate)
#library(sjrdata)
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.4.3
library(zoo)
## Warning: package 'zoo' was built under R version 4.4.3
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.4.3
library(plyr)
## Warning: package 'plyr' was built under R version 4.4.3
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
library(dplyr)
library(journalabbr)
## Warning: package 'journalabbr' was built under R version 4.4.3
library(ggraph)
## Warning: package 'ggraph' was built under R version 4.4.3
library(XML)
## Warning: package 'XML' was built under R version 4.4.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(svglite)
## Warning: package 'svglite' was built under R version 4.4.3
source("verbs.R")
giant.component <- function(graph) {
cl <- igraph::clusters(graph)
igraph::induced.subgraph(graph,
which(cl$membership == which.max(cl$csize)))
}
table_1 <-
tibble(wos = length(wos$AU), # Create a dataframe with the values.
scopus = length(scopus$AU),
total = length(wos_scopus$AU))
table_1 %>%
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
wos_scopus %>%
tidyr::separate_rows(DT, sep = ";") %>%
dplyr::count(DT, sort = TRUE)%>%
dplyr::mutate(percentage = n /sum(n),
percentage = percentage * 100,
percentage = round(percentage, digits = 2)) %>%
dplyr::rename(total = n) %>%
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
Combine charts using Python Matplotlib & Reticulate
library(reticulate)
numpy <- import("numpy")
matplotlib <- import("matplotlib")
year_start <- 2002
year_end <- 2024
years_full <- seq(year_start, year_end)
years_missing <- setdiff(years_full, figure_1_data$PY)
for (year in years_missing) {
total_wos <- wos %>% dplyr::filter(PY == year) %>% nrow()
total_scopus <- scopus %>% dplyr::filter(PY == year) %>% nrow()
total_wos_scopus <- wos_scopus %>% dplyr::filter(PY == year) %>% nrow()
new_row <- data.frame(PY = year, total = total_wos_scopus, scopus = total_scopus, wos = total_wos)
figure_1_data <- dplyr::bind_rows(figure_1_data, new_row)
}
figure_1_data <- figure_1_data %>% dplyr::arrange(desc(PY))
TC_all <- data.frame(PY = integer(), TC_sum_all = numeric(), TC_percentage = numeric())
for (year in years_full) {
ncitas_wos <- wos %>% dplyr::filter(PY == year) %>% dplyr::summarize(sum(TC, na.rm = TRUE)) %>% dplyr::pull()
ncitas_scopus <- scopus %>% dplyr::filter(PY == year) %>% dplyr::summarize(sum(TC, na.rm = TRUE)) %>% dplyr::pull()
ncitas <- ncitas_wos + ncitas_scopus
new_row_tc <- data.frame(PY = year, TC_sum_all = ncitas, TC_percentage = NA)
TC_all <- dplyr::bind_rows(TC_all, new_row_tc)
}
total_citas <- sum(TC_all$TC_sum_all, na.rm = TRUE)
TC_all <- TC_all %>% dplyr::mutate(TC_percentage = round(TC_sum_all / total_citas * 100, 2))
TC_all <- TC_all %>% dplyr::arrange(desc(PY))
TC_all$TC_sum_all <- as.integer(TC_all$TC_sum_all)
import matplotlib.pyplot as plt
# from matplotlib.ticker import FuncFormatter
def clean_integer_formatter(x, pos):
return '{:d}'.format(int(x))
fig, ax = plt.subplots(figsize=(11, 7.5))
ax.plot(tpx, tpy, color='r', marker='o', label='Total Publications')
ax.set_xlabel('Year', fontsize=20)
ax.set_ylabel('Total Publications', color='r', fontsize=20)
barw = 0.5
ax.bar(sx, sy, color='g', label='Scopus', alpha=0.5, width=barw)
ax.bar(wx1, wy, color='orange', label='WoS', alpha=0.8, width=barw)
twin_axes = ax.twinx()
twin_axes.plot(tcx, tcy, color='purple', marker='o', label='Total Citations')
twin_axes.set_ylabel('Total Citations', color='purple', fontsize=20)
plt.title('Total Scientific Production vs. Total Citations', fontsize=23)
plt.legend(loc='center left', fontsize=15)
ax.grid(False)
ax.legend(loc='upper left', fontsize=15)
twin_axes.set_ylim(0, max(tcy)+100)
## (0.0, 5796.0)
for i, label in enumerate(tcy):
twin_axes.annotate(label, (tcx[i], tcy[i] + 0.5), color='purple', size=13)
for i, label in enumerate(tpy):
ax.annotate(label, (tpx[i], tpy[i] + 0.8), color='red', size=13)
for i, label in enumerate(wy):
ax.annotate(label, (wx1[i], wy[i] + 0.1), color='brown', size=13)
for i, label in enumerate(sy):
ax.annotate(label, (sx[i], sy[i] + 0.2), color='green', size=13)
ax.set_xticks(tpx)
ax.set_xticklabels([int(year) for year in tpx], fontsize=14, rotation=75)
# ax.yaxis.set_major_formatter(FuncFormatter(clean_integer_formatter))
# twin_axes.yaxis.set_major_formatter(FuncFormatter(clean_integer_formatter))
ax.tick_params(axis='y', labelsize=15)
twin_axes.tick_params(axis='y', labelsize=15)
plt.savefig("./f1/figura_1.svg")
plt.show()
table_2_country |>
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
figure_2a <-
figure_2_country_wos_scopus_1 |>
activate(edges) |>
# tidygraph::rename(weight = n) |>
ggraph(layout = "graphopt") +
geom_edge_link(aes(width = Weight),
colour = "lightgray") +
scale_edge_width(name = "Link strength") +
geom_node_point(aes(color = community,
size = degree)) +
geom_node_text(aes(label = name), repel = TRUE) +
scale_size(name = "Degree") +
# scale_color_binned(name = "Communities") +
theme_graph()
figure_2a
ggsave("./figura_dos/figura_2a.svg",
plot = figure_2a,
device = "svg")
library(svglite)
figure_2b <-
figure_2_country_wos_scopus_1 |>
activate(nodes) |>
data.frame() |>
group_by(community) |>
dplyr::count(community, sort = TRUE) |>
slice(1:10) |>
ggplot(aes(x = reorder(community, n), y = n)) +
geom_point(stat = "identity") +
geom_line(group = 1) +
# geom_text(label = as.numeric(community),
# nudge_x = 0.5,
# nudge_y = 0.5,
# check_overlap = T) +
labs(title = "Communities by size",
x = "communities",
y = "Countries") +
theme(text = element_text(color = "black",
face = "bold",
family = "Times"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black"),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20)
)
figure_2b
ggsave("./figura_dos/figura_2b.svg",
plot = figure_2b,
device = "svg")
# Create a dataframe with links
figure_2c_edges <-
figure_2_country_wos_scopus |>
dplyr::filter(from != to) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(year = PY) |>
dplyr::count(year) |>
dplyr::filter(year >= year_start,
year <= year_end) |>
dplyr::mutate(percentage = n/max(n)) |>
dplyr::select(year, percentage)
# Create a data frame with author and year
figure_2c_nodes <- # 21 row
figure_2_country_wos_scopus |>
dplyr::filter(from != to) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(CO = from,
year = PY) |>
bind_rows(figure_2_country_wos_scopus |>
tidygraph::as_tbl_graph() |>
tidygraph::activate(edges) |>
tidygraph::as_tibble() |>
dplyr::select(CO = to,
year = PY)) |>
unique() |>
dplyr::group_by(CO) |>
dplyr::slice(which.min(year)) |>
dplyr::ungroup() |>
dplyr::select(year) |>
dplyr::group_by(year) |>
dplyr::count(year) |>
dplyr::filter(year >= year_start,
year <= year_end) |>
dplyr::ungroup() |>
dplyr::mutate(percentage = n / max(n)) |>
select(year, percentage)
figure_2c <-
figure_2c_nodes |>
dplyr::mutate(type = "nodes",
year = as.numeric(year)) |>
bind_rows(figure_2c_edges |>
dplyr::mutate(type = "links",
year = as.numeric(year))) |>
ggplot(aes(x = year,
y = percentage,
color = type)) +
geom_point() +
geom_line() +
theme(legend.position = "right",
text = element_text(color = "black",
face = "bold",
family = "Times"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black",
angle = 45, vjust = 0.5
),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text = element_text(size = "15"),
legend.title = element_blank()) +
labs(title = "Nodes and links through time",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(breaks = seq(year_start, year_end, by = 1))
figure_2c
ggsave("./figura_dos/figura_2c.svg",
plot = figure_2c,
device = "svg")
library(dplyr)
library(stringr)
journals_sorted <- table_3_journal %>%
dplyr::arrange(desc(total)) %>%
head(10)
journals_sorted <- journals_sorted %>%
dplyr::mutate(journal = tolower(journal))
names_journals <- journals_sorted$journal
colnames(journals_sorted) <- c("SO", "wos", "scopus", "total", "percentage")
wos_scopus_sn <- wos_scopus %>%
select(SO, SN) %>%
dplyr::mutate(SO = tolower(SO))
wos_scopus_sn <- wos_scopus_sn %>%
dplyr::filter(SO %in% names_journals) %>%
distinct(SO, .keep_all = TRUE)
tabla_apariciones <- merge(wos_scopus_sn, journals_sorted, by = "SO", all.x = TRUE)
dataa <- data_scimago %>%
dplyr::mutate(
Issn = str_split(Issn, ",") %>%
lapply(function(x) toupper(str_trim(x))),
Title = tolower(Title),
SJR = if_else(is.na(SJR), "--", as.character(SJR)),
QUARTIL = if_else(is.na(QUARTIL), "--", as.character(QUARTIL)),
`H index` = if_else(is.na(`H.index`), "--", as.character(`H.index`))
)
resultados <- list()
for (i in 1:nrow(tabla_apariciones)) {
row <- tabla_apariciones[i, ]
sn <- tolower(gsub("-", "", as.character(row$SN)))
nombre_revista <- row$SO
wos <- row$wos
scopus <- row$scopus
total <- row$total
coincidencias <- dataa %>%
dplyr::filter(sapply(Issn, function(x) sn %in% x))
if (nrow(coincidencias) > 0) {
for (j in 1:nrow(coincidencias)) {
match <- coincidencias[j, ]
resultados <- append(resultados, list(data.frame(
Nombre_Revista = nombre_revista,
SN = sn,
WOS = wos,
SCOPUS = scopus,
Total = total,
`Impact factor` = match$SJR,
QUARTIL = match$QUARTIL,
`H index` = match$`H index`
)))
}
} else {
coincidencias_nombre <- dataa %>%
dplyr::filter(Title == nombre_revista)
if (nrow(coincidencias_nombre) > 0) {
for (j in 1:nrow(coincidencias_nombre)) {
match <- coincidencias_nombre[j, ]
resultados <- append(resultados, list(data.frame(
Nombre_Revista = nombre_revista,
SN = sn,
WOS = wos,
SCOPUS = scopus,
Total = total,
`Impact factor` = match$SJR,
QUARTIL = match$QUARTIL,
`H index` = match$`H index`
)))
}
} else {
resultados <- append(resultados, list(data.frame(
Nombre_Revista = nombre_revista,
SN = sn,
WOS = wos,
SCOPUS = scopus,
Total = total,
`Impact factor` = "--",
QUARTIL = "--",
`H index` = "--"
)))
}
}
}
df_resultado <- do.call(rbind, resultados)
# Convertir la lista de resultados a un data frame y ordenarlo por "Total" en orden descendente
df_resultado <- df_resultado %>%
dplyr::mutate(Nombre_Revista = str_to_title(Nombre_Revista)) %>%
dplyr::arrange(desc(Total))
DT::datatable(df_resultado,
class = "cell-border stripe",
rownames = FALSE,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy", "csv", "excel", "pdf", "print")))
Creating the graph object
journal_citation_graph_weighted_tbl_small <-
journal_df |>
dplyr::select(JI_main, JI_ref) |>
dplyr::group_by(JI_main, JI_ref) |>
dplyr::count() |>
dplyr::rename(weight = n) |>
as_tbl_graph(directed = FALSE) |>
# convert(to_simple) |>
activate(nodes) |>
dplyr::mutate(components = tidygraph::group_components(type = "weak")) |>
dplyr::filter(components == 1) |>
activate(nodes) |>
dplyr::mutate(degree = centrality_degree(),
community = tidygraph::group_louvain()) |>
dplyr::select(-components) |>
dplyr::filter(degree >= 1)
Selecting nodes to show
figure_3a_1 <-
SO_edges %>%
tidygraph::as_tbl_graph() %>%
tidygraph::activate(nodes) %>%
tidygraph::mutate(id = SO_nodes$id) %>%
tidygraph::left_join(SO_nodes) %>%
tidygraph::select(-id) %>%
tidygraph::rename(name = Label) %>%
ggraph(layout = "graphopt") +
geom_edge_link(aes(width = weight), colour = "lightgray") +
scale_edge_width(name = "Link strength") +
geom_node_point(aes(color = as.factor(community), size = degree)) +
geom_node_text(aes(label = name), repel = TRUE) +
scale_size(name = "Degree") +
scale_color_discrete(name = "Communities") + # Cambié scale_color_binned a scale_color_discrete
theme_graph()
figure_3a_1
ggsave("./figura_tres/figura_3a_1.svg",
plot = figure_3a_1,
device = "svg")
figure_3b <-
journal_citation_graph_weighted_tbl_small |>
activate(nodes) |>
data.frame() |>
dplyr::select(community) |>
dplyr::count(community, sort = TRUE) |>
dplyr::slice(1:10) |>
ggplot(aes(x = reorder(community, n), y = n)) +
geom_point(stat = "identity") +
geom_line(group = 1) +
# geom_text(label = as.numeric(community),
# nudge_x = 0.5,
# nudge_y = 0.5,
# check_overlap = T) +
labs(title = "Communities by size",
x = "communities",
y = "Journals") +
theme(text = element_text(color = "black",
face = "bold",
family = "Times"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black"),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20)
)
figure_3b
ggsave("./figura_tres/figura_3b.svg",
plot = figure_3b,
device = "svg")
# Create a dataframe with links
figure_3c_edges <-
journal_df |>
select(from = JI_main, to = JI_ref, PY = PY_ref) %>%
dplyr::filter(from != to) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(year = PY) |>
dplyr::count(year) |>
dplyr::filter(year >= year_start,
year <= year_end) |>
dplyr::mutate(percentage = n/max(n)) |>
dplyr::select(year, percentage)
# Create a data frame with author and year
figure_3c_nodes <- # 21 row
journal_df |>
select(from = JI_main, to = JI_ref, PY = PY_ref) %>%
dplyr::filter(from != to) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(CO = from,
year = PY) |>
bind_rows(journal_df |>
select(from = JI_main,
to = JI_ref,
PY = PY_ref) %>%
tidygraph::as_tbl_graph() |>
tidygraph::activate(edges) |>
tidygraph::as_tibble() |>
dplyr::select(CO = to,
year = PY)) |>
unique() |>
dplyr::group_by(CO) |>
dplyr::slice(which.min(year)) |>
dplyr::ungroup() |>
dplyr::select(year) |>
dplyr::group_by(year) |>
dplyr::count(year) |>
dplyr::filter(year >= year_start,
year <= year_end) |>
dplyr::ungroup() |>
dplyr::mutate(percentage = n / max(n)) |>
select(year, percentage)
plotting figure 3b
figure_3c <-
figure_3c_nodes |>
dplyr::mutate(type = "nodes") |>
bind_rows(figure_3c_edges |>
dplyr::mutate(type = "links")) |>
ggplot(aes(x = year,
y = percentage,
color = type)) +
geom_point() +
geom_line() +
theme(legend.position = "right",
text = element_text(color = "black",
face = "bold",
family = "Times"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black",
angle = 60, vjust = 0.5
),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text = element_text(size = "15"),
legend.title = element_blank()) +
labs(title = "Nodes and links through time",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(breaks = seq(year_start, year_end, by = 1))
figure_3c
ggsave("./figura_tres/figura_3c.svg",
plot = figure_3c,
device = "svg")
tos %>%
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
egos <-
AU_ego_edges %>%
tidygraph::as_tbl_graph() %>%
tidygraph::activate(nodes) %>%
tidygraph::mutate(id = AU_ego_nodes$id) %>%
dplyr::left_join(AU_ego_nodes %>%
tidygraph::mutate(id = as.character(id))) %>%
dplyr::mutate(component = as.character(component))
## Joining with `by = join_by(id)`
egos |>
ggraph(layout = "graphopt") +
geom_edge_link(aes(width = weight),
colour = "lightgray") +
scale_edge_width(name = "Link strength") +
geom_node_point(aes(color = component,
size = degree)) +
geom_node_text(aes(label = Label), repel = TRUE) +
scale_size(name = "Degree") +
# scale_color_binned(name = "Communities") +
theme_graph()
## Warning: ggrepel: 569 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave("./figura_cuatro/egos_network.svg", device = "svg")
## Saving 7 x 5 in image
## Warning: ggrepel: 566 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps