library(tidyverse)
library(tidygraph)
library(tidytext)
library(igraph)
library(bibliometrix)
library(tosr)
library(here)
library(lubridate)
library(sjrdata)
library(openxlsx)
library(zoo)
library(RSQLite)
library(journalabbr)
library(ggraph)
library(openxlsx)
library(XML)
library(plyr)
source("verbs.R")
library(RColorBrewer)
library(wordcloud)
# library(ggthemes)
# library(extrafont)
# library(remotes)
# remotes::install_version("Rttf2pt1", version = "1.3.8")
# extrafont::font_import()
giant.component <- function(graph) {
cl <- igraph::clusters(graph)
igraph::induced.subgraph(graph,
which(cl$membership == which.max(cl$csize)))
}
wos_scopus <- #ok
read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=1535289313") |>
filter(!is.na(AU))
ToS <-
read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=2132528010")
# wos <-
# read_csv("https://docs.google.com/spreadsheets/d/1PGRAZv6FLhBQD7dct0mIZ_pa4LNBw05Mk-WtfI3Vjxk/export?format=csv&gid=849275704") # create dataframe from wos file
# scopus <-
# read_csv("https://docs.google.com/spreadsheets/d/1PGRAZv6FLhBQD7dct0mIZ_pa4LNBw05Mk-WtfI3Vjxk/export?format=csv&gid=560156967")
reference_df <-
read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=270496836")
# journal_df <-
# read_csv("https://docs.google.com/spreadsheets/d/1PGRAZv6FLhBQD7dct0mIZ_pa4LNBw05Mk-WtfI3Vjxk/export?format=csv&gid=686001397")
AU_links <-
read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=640727835")
# figure_1_data <- # ok
# read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=1442849006")
# TC_all <-
# read_csv("https://docs.google.com/spreadsheets/d/1PGRAZv6FLhBQD7dct0mIZ_pa4LNBw05Mk-WtfI3Vjxk/export?format=csv&gid=1096651366")
# table_2 <-
# read_csv("https://docs.google.com/spreadsheets/d/1PGRAZv6FLhBQD7dct0mIZ_pa4LNBw05Mk-WtfI3Vjxk/export?format=csv&gid=1805154497")
# figure_2_countries_wos_scopus_1 <-
# read_csv("https://docs.google.com/spreadsheets/d/1PGRAZv6FLhBQD7dct0mIZ_pa4LNBw05Mk-WtfI3Vjxk/export?format=csv&gid=388025682") |>
# tidygraph::as_tbl_graph(directed = FALSE) |>
# activate(nodes) |>
# dplyr::mutate(community = tidygraph::group_louvain(),
# degree = tidygraph::centrality_degree(),
# community = as.factor(community))
# figure_2_AU_CO_time <-
# read_csv("https://docs.google.com/spreadsheets/d/1PGRAZv6FLhBQD7dct0mIZ_pa4LNBw05Mk-WtfI3Vjxk/export?format=csv&gid=774818804")
# table_3_journal <- # table_3_journal
# read_csv("https://docs.google.com/spreadsheets/d/1kkuLjeWK44WfTbPfsVsvoiSksRDoLHXY/export?format=csv&gid=1076593220")
# wos_scopus_authors <-
# read_csv("https://docs.google.com/spreadsheets/d/1kkuLjeWK44WfTbPfsVsvoiSksRDoLHXY/export?format=csv&gid=1994870037")
AU_CO_df <-
read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=1136105302")
AU_CO_links <-
read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=1134821228")
SO_links <-
read_csv("https://docs.google.com/spreadsheets/d/1nwszUhhHBzJS1WA0aP5lVMZY_lEVxjT4QO1l3bKiVtE/export?format=csv&gid=1871024023") |>
dplyr::filter(!stringr::str_detect(string = JI_ref,
pattern = ".*[0-9]{4}"))
pal <- brewer.pal(8,"Dark2")
ToS |>
filter(ToS == "rama 1") |>
select(TI) |>
unnest_tokens(output = word,
input = TI) |>
dplyr::anti_join(stop_words) |>
dplyr::count(word, sort = TRUE) |>
dplyr::filter(word != "coastal",
word != "management",
word != "zone",
word != "program",
word != "u.s") |>
with(wordcloud(word,
n,random.order = FALSE,
max.words = 50,
colors = pal))
## Joining, by = "word"
### Branch 2
ToS |>
filter(ToS == "rama 2") |>
select(TI) |>
unnest_tokens(output = word,
input = TI) |>
dplyr::anti_join(stop_words) |>
dplyr::count(word, sort = TRUE) |>
dplyr::filter(word != "coastal",
word != "management",
word != "zone",
word != "program",
word != "u.s") |>
with(wordcloud(word,
n,
random.order = FALSE,
min.freq = 2,
max.words = 50,
colors = pal))
## Joining, by = "word"
ToS |>
filter(ToS == "rama 3") |>
select(TI) |>
unnest_tokens(output = word,
input = TI) |>
dplyr::anti_join(stop_words) |>
dplyr::count(word, sort = TRUE) |>
dplyr::filter(word != "coastal",
word != "management",
word != "zone",
word != "program",
word != "u.s") |>
with(wordcloud(word,
n,
random.order = FALSE,
min.freq = 2,
max.words = 50,
colors = pal))
## Joining, by = "word"
Converting data from Google sheet file
range_tbl <-
tibble(PY = range(wos_scopus$PY)[1]:range(wos_scopus$PY)[2])
total_anual_production <-
wos_scopus |>
tidyr::drop_na(ref_type) |>
dplyr::select(PY) |>
dplyr::count(PY, sort = TRUE) |>
na.omit() |>
# dplyr::filter(PY >= 2000,
# PY < year(today())) |>
# dplyr::arrange(desc(PY))
right_join(range_tbl) |>
replace_na(list(n = 0)) |>
dplyr::mutate(ref_type = "total")
## Joining, by = "PY"
total_anual_production |>
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
figure_1a <-
total_anual_production |>
ggplot(aes(x = factor(PY),
y = n)) +
geom_bar(stat = "identity",
position = "dodge",
color = "springgreen3",
fill = "springgreen3") +
geom_text(aes(label = n),
vjust = -0.3,
position = position_dodge(0.9),
size = 3,
family = "Times") +
# scale_fill_manual(values = "springgreen3") +
theme(
# text = element_text(family = "Serif",
# face = "bold",
# size =12),
panel.background = element_rect(fill = "white"),
legend.position = "bottom",
legend.title = element_text(size = 0),
axis.text.x = element_text(face = "bold",
angle = 45,
vjust = 0.5),
axis.line = element_line(color = "black",
size = 0.2)) +
labs(y = "Number of publications",
x = "Year")
figure_1a
Creating data
TC_all <-
wos_scopus |>
dplyr::select(PY, TC) |>
dplyr::group_by(PY) |>
dplyr::summarise(TC_sum = sum(TC)) |>
arrange(desc(PY)) |>
na.omit() |>
dplyr::right_join(range_tbl) |>
tidyr::replace_na(list(TC_sum = 0))
## Joining, by = "PY"
TC_all |>
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
Figure
figure_1c <-
TC_all |>
ggplot(aes(x = PY , y = TC_sum)) +
geom_line(stat = "identity", color = "purple") +
geom_point(color = "purple") +
scale_x_continuous(breaks = seq(1974, year(today()) , by = 1)) +
geom_text(aes(label = TC_sum),
vjust = -0.3,
position = position_dodge(0.9),
size = 3,
family = "Times",
color = "purple") +
scale_fill_manual(values = c("springgreen3",
"orange3")) +
theme(text = element_text(family = "Times",
face = "bold",
size =12),
panel.background = element_rect(fill = "white"),
legend.position = "bottom",
legend.title = element_text(size = 0),
axis.text.x = element_text(face = "bold",
angle = 45,
vjust = 0.5),
axis.line = element_line(color = "black",
size = 0.2)) +
labs(y = "Number of citations",
x = "Year")
figure_1c
wos_scopus_countries <-
wos_scopus |>
select(SR, AU_CO, TC, quartile) |>
separate_rows(AU_CO, sep = ";") |>
unique() |>
dplyr::filter(!is.na(AU_CO))
# wos_scopus_countries_journals <-
# wos_scopus_countries |>
# left_join(wos_scopus |>
# select(SR, SO, PY),
# by = "SR")
table_2a_production <-
wos_scopus_countries |>
dplyr::select(AU_CO) |>
dplyr::group_by(AU_CO) |>
dplyr::summarise(count_co = n()) |>
dplyr::mutate(percentage_co = count_co / sum(count_co) * 100,
percentage_co = round(percentage_co, digits = 2)) |>
dplyr::arrange(desc(count_co))
table_2b_citation <-
wos_scopus_countries |>
select(AU_CO, TC) |>
separate_rows(AU_CO, sep = ";") |>
dplyr::group_by(AU_CO) |>
dplyr::summarise(citation = sum(TC)) |>
dplyr::mutate(percentage_ci = citation / sum(citation) * 100) |>
dplyr::arrange(desc(citation))
table_2c_quality <-
wos_scopus_countries |>
dplyr::group_by(AU_CO) |>
dplyr::count(quartile, sort = TRUE) |>
pivot_wider(names_from = quartile,
values_from = n) |>
dplyr::select(AU_CO, Q1, Q2, Q3, Q4) |>
dplyr::mutate(Q1 = replace_na(Q1, 0),
Q2 = replace_na(Q2, 0),
Q3 = replace_na(Q3, 0),
Q4 = replace_na(Q4, 0))
table_2 <-
table_2a_production |>
left_join(table_2b_citation, by = "AU_CO") |>
left_join(table_2c_quality, by = "AU_CO") |>
tidyr::drop_na() |>
mutate(percentage_ci = round(percentage_ci, digits = 2),
no_category = count_co - (Q1 + Q2 + Q3 + Q4))
table_2 |>
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
edgelist_countries_weighted <-
AU_CO_links |>
dplyr::select(from, to) |>
dplyr::group_by(from, to) |>
dplyr::count(from, to) |>
dplyr::filter(from != to) |>
dplyr::rename(weight = n) |>
tidygraph::as_tbl_graph(directed = FALSE) |>
activate(nodes) |>
# edgelist_wos_scopus_countries_weighted_properties |>
dplyr::mutate(community = tidygraph::group_louvain(),
degree = tidygraph::centrality_degree(),
community = as.factor(community))
figure_2a_graph <-
edgelist_countries_weighted |>
ggraph(layout = "graphopt") +
geom_edge_link(aes(width = weight),
colour = "lightgray") +
scale_edge_width(name = "Link strength") +
geom_node_point(aes(color = community,
size = degree)) +
geom_node_text(aes(label = name), repel = TRUE) +
scale_size(name = "Degree") +
# scale_color_binned(name = "Communities") +
theme_graph()
figure_2a_graph
figure_2b_cluster <-
edgelist_countries_weighted |>
activate(nodes) |>
data.frame() |>
group_by(community) |>
dplyr::count(community, sort = TRUE) |>
# mutate(community = as.factor(community)) |>
slice(1:10) |>
ggplot(aes(x = fct_reorder(community, n), y = n)) +
geom_point(stat = "identity") +
geom_line(group = 1) +
# geom_text(label = as.numeric(community),
# nudge_x = 0.5,
# nudge_y = 0.5,
# check_overlap = T) +
labs(title = "Communities by size",
x = "communities",
y = "Countries") +
geom_text(aes(label = n),
vjust = -0.3,
position = position_dodge(0.9),
size = 5,
family = "Times"
# color = "purple"
) +
theme(text = element_text(color = "black",
face = "bold",
family = "Times New Roman"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black"),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20))
figure_2b_cluster
range_tbl_fig_2 <-
tibble(PY = range(AU_CO_links$PY)[1]:range(AU_CO_links$PY)[2])
# Create a dataframe with links
figure_2c_cluster_edges <-
AU_CO_links |>
dplyr::filter(from != to) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(year = PY) |>
dplyr::count(year) |>
dplyr::filter(year >= 2000,
year <= 2020) |>
dplyr::mutate(percentage = n/max(n)) |>
dplyr::select(year, percentage) |>
dplyr::right_join(range_tbl_fig_2,
by = c("year" = "PY")) |>
tidyr::replace_na(list(percentage = 0))
# Create a data frame with author and year
figure_2c_cluster_nodes <- # 21 row
AU_CO_links |>
dplyr::filter(from != to) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(CO = from,
year = PY) |>
bind_rows(AU_CO_links |>
tidygraph::as_tbl_graph() |>
tidygraph::activate(edges) |>
tidygraph::as_tibble() |>
dplyr::select(CO = to,
year = PY)) |>
unique() |>
dplyr::group_by(CO) |>
dplyr::slice(which.min(year)) |>
dplyr::ungroup() |>
dplyr::select(year) |>
dplyr::group_by(year) |>
dplyr::count(year) |>
dplyr::filter(year >= 2000,
year <= 2020) |>
dplyr::ungroup() |>
dplyr::mutate(percentage = n / max(n)) |>
select(year, percentage) |>
dplyr::right_join(range_tbl_fig_2,
by = c("year" = "PY")) |>
tidyr::replace_na(list(percentage = 0))
figure_2c_longitudinal <-
figure_2c_cluster_nodes |>
mutate(type = "nodes") |>
bind_rows(figure_2c_cluster_edges |>
mutate(type = "links")) |>
ggplot(aes(x = year,
y = percentage,
color = type)) +
geom_point() +
geom_line() +
theme(legend.position = "right",
text = element_text(color = "black",
face = "bold",
family = "Times New Roman"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black",
angle = 45, vjust = 0.5
),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text = element_text(size = "15"),
legend.title = element_blank()) +
labs(title = "Nodes and links through time",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(breaks = seq(1999, 2022, by = 1))
figure_2c_longitudinal
wos_scopus |>
dplyr::select(journal = SO) |>
na.omit() |>
dplyr::group_by(journal) |>
dplyr::count(journal, sort = TRUE) |>
dplyr::rename(publications = n) |>
dplyr::arrange(desc(publications)) |>
DT::datatable(class = "cell-border stripe",
rownames = F,
filter = "top",
editable = FALSE,
extensions = "Buttons",
options = list(dom = "Bfrtip",
buttons = c("copy",
"csv",
"excel",
"pdf",
"print")))
Creating the graph object
journal_citation_graph_weighted_tbl_small <-
SO_links |>
dplyr::select(JI_main, JI_ref) |>
dplyr::group_by(JI_main, JI_ref) |>
dplyr::count() |>
dplyr::rename(weight = n) |>
as_tbl_graph(directed = FALSE) |>
# convert(to_simple) |>
activate(nodes) |>
dplyr::mutate(components = tidygraph::group_components(type = "weak")) |>
dplyr::filter(components == 1) |>
activate(nodes) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::filter(degree > 1) |>
dplyr::mutate(community = tidygraph::group_louvain()) |>
dplyr::select(-components)
# dplyr::filter(degree >= 1)
# activate(edges) |>
# dplyr::filter(weight != 1)
communities <-
journal_citation_graph_weighted_tbl_small |>
activate(nodes) |>
data.frame() |>
dplyr::count(community, sort = TRUE) |>
dplyr::slice(1:10) |>
dplyr::select(community) |>
dplyr::pull()
# Filtering biggest communities
journal_citation_graph_weighted_tbl_small_fig <-
journal_citation_graph_weighted_tbl_small |>
activate(nodes) |>
dplyr::filter(community %in% communities)
Selecting nodes to show
jc_com_1 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[1]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_2 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[2]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_3 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[3]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_4 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[4]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_5 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[5]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_6 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[6]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_7<-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[7]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_8 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[8]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_9 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[9]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com_10 <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(community == communities[10]) |>
dplyr::mutate(degree = centrality_degree()) |>
dplyr::arrange(desc(degree)) |>
dplyr::slice(1:10) |>
data.frame() |>
dplyr::select(name)
jc_com <-
jc_com_1 |>
bind_rows(jc_com_2,
jc_com_3,
jc_com_4,
jc_com_5
# jc_com_6,
# jc_com_7,
# jc_com_8,
# jc_com_9,
# jc_com_10
)
figure_3a_graph <-
journal_citation_graph_weighted_tbl_small_fig |>
activate(nodes) |>
dplyr::filter(name %in% jc_com$name) |>
dplyr::mutate(degree = centrality_degree(),
community = factor(community)
) |>
dplyr::filter(degree != 0) |>
ggraph(layout = "graphopt") +
geom_edge_link(aes(width = weight),
colour = "lightgray") +
scale_edge_width(name = "Link strength") +
geom_node_point(aes(color = community,
size = degree)) +
# geom_node_text(aes(label = name), repel = TRUE) +
scale_size(name = "Degree") +
# scale_color_binned(name = "Communities") +
theme_graph()
figure_3a_graph
# figure_3a_data <-
# journal_citation_graph_weighted_tbl_small_fig |>
# activate(nodes) |>
# dplyr::filter(name %in% jc_com$name) |>
# dplyr::mutate(degree = centrality_degree(),
# community = factor(community)) |>
# dplyr::filter(degree != 0)
#
# figure_3a_data_graphml_nodes <-
# figure_3a_data |>
# activate(nodes) |>
# as_tibble() |>
# dplyr::rename(author = name) |>
# tibble::rownames_to_column("name")
#
# figure_3a_data_graphml_edges <-
# figure_3a_data |>
# activate(edges) |>
# as_tibble()
#
# figure_3a_data_graphml <-
# graph_from_data_frame(d = figure_3a_data_graphml_edges,
# directed = FALSE,
# vertices = figure_3a_data_graphml_nodes)
# write_graph(figure_3a_data_graphml, "data/figure_3a_data_graphml.graphml", "graphml")
# journal_citation_graph_weighted_tbl_small_fig |>
# activate(nodes) |>
# dplyr::filter(name %in% jc_com$name) |>
# dplyr::mutate(degree = centrality_degree(),
# #community = factor(community)
# ) |>
# dplyr::filter(degree != 0) |>
# tidygraph::as.igraph() |>
# igraph::simplify() |>
# write_graph("data/figure_3a_data_graphml.graphml", "graphml")
figure_3b_clusters <-
journal_citation_graph_weighted_tbl_small |>
activate(nodes) |>
data.frame() |>
dplyr::select(community) |>
dplyr::count(community, sort = TRUE) |>
dplyr::slice(1:10) |>
ggplot(aes(x = reorder(community, n), y = n)) +
geom_point(stat = "identity") +
geom_line(group = 1) +
# geom_text(label = as.numeric(community),
# nudge_x = 0.5,
# nudge_y = 0.5,
# check_overlap = T) +
labs(title = "Communities by size",
x = "communities",
y = "Journals") +
# geom_text(aes(label = n),
# vjust = -0.3,
# position = position_dodge(0.9),
# size = 5,
# family = "Times"
# # color = "purple"
# ) +
theme(text = element_text(color = "black",
face = "bold",
family = "Times New Roman"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black"),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20)) +
geom_text(aes(label = n),
vjust = -0.3,
position = position_dodge(0.9),
size = 5,
family = "Times"
# color = "purple"
)
figure_3b_clusters
range_tbl_fig_3 <-
tibble(PY_main = range(SO_links$PY_main)[1]:range(SO_links$PY_main)[2])
# Create a dataframe with links
figure_3c_cluster_edges <-
SO_links |>
dplyr::filter(JI_main != JI_ref) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(year = PY_main) |>
dplyr::count(year) |>
# dplyr::filter(year >= 2000,
# year <= 2020) |>
dplyr::mutate(percentage = n/max(n)) |>
dplyr::select(year, percentage) |>
dplyr::right_join(range_tbl_fig_3,
by = c("year" = "PY_main")) |>
tidyr::replace_na(list(percentage = 0))
# Create a data frame with author and year
figure_3c_cluster_nodes <- # 21 row
SO_links |>
dplyr::filter(JI_main != JI_ref) |>
tidygraph::as_tbl_graph() |>
activate(edges) |>
as_tibble() |>
dplyr::select(CO = from,
year = PY_main) |>
bind_rows(AU_CO_links |>
tidygraph::as_tbl_graph() |>
tidygraph::activate(edges) |>
tidygraph::as_tibble() |>
dplyr::select(CO = to,
year = PY)) |>
unique() |>
dplyr::group_by(CO) |>
dplyr::slice(which.min(year)) |>
dplyr::ungroup() |>
dplyr::select(year) |>
dplyr::group_by(year) |>
dplyr::count(year) |>
# dplyr::filter(year >= 2000,
# year <= 2020) |>
dplyr::ungroup() |>
dplyr::mutate(percentage = n / max(n)) |>
select(year, percentage) |>
dplyr::right_join(range_tbl_fig_3,
by = c("year" = "PY_main")) |>
tidyr::replace_na(list(percentage = 0))
figure_3_longitudinal <-
figure_3c_cluster_nodes |>
mutate(type = "nodes") |>
dplyr::filter(year >= 2000) |>
bind_rows(figure_3c_cluster_edges |>
dplyr::filter(year >= 2000) |>
mutate(type = "links")) |>
ggplot(aes(x = year,
y = percentage,
color = type)) +
geom_point() +
geom_line() +
theme(legend.position = "right",
text = element_text(color = "black",
face = "bold",
family = "Times New Roman"),
plot.title = element_text(size = 25),
panel.background = element_rect(fill = "white"),
axis.text.y = element_text(size = 15,
colour = "black"),
axis.text.x = element_text(size = 15,
colour = "black",
angle = 45, vjust = 0.5
),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.text = element_text(size = "15"),
legend.title = element_blank()) +
labs(title = "Nodes and links through time",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(breaks = seq(2000, 2022, by = 1))
figure_3_longitudinal