Analysis for OJS Global Paper

Current preprint available here.

Loading and cleaning beacon data

# Libraries
pacman::p_load(tidyverse, countrycode, sf, state, htmltools, htmlwidgets, urltools, janitor, DT, RColorBrewer, plotly, readxl, rvest)

# raw beacon data
df <- read_csv(here::here("data/beacon-public.csv")) 

# ancillary data and vectors for cleaning and plotting
iso_to_name <- read_csv(here::here("data/country-iso3166-to-name.csv"))

canada.states <- 
  c(
    "Alberta", "British Columbia", "Labrador", "Manitoba", "New Brunswick", "Newfoundland", "Nova Scotia", "Nunavut", "North West Terr.", "Ontario", "Prince Edward Is.", "Québec (Province)", "Saskatchewan", "Yukon"
  )

# world map shapefile
shapefile <-
  read_sf(here::here("data/TM_WORLD_BORDERS_SIMPL-0.3.shp")) %>%
  clean_names() %>%
  filter(name %in% c("Western Sahara", "Morocco")) %>% # handling boundary issues for morocco and western sahara
  mutate(name = if_else(name == "Western Sahara", "Morocco", name)) %>%
  arrange(name) %>%
  group_by(name) %>%
  summarize(
    geometry = st_union(geometry),
    lat = first(lat),
    lon = first(lon),
    region = first(region),
    subregion = first(subregion)
  ) %>%
  st_cast("MULTIPOLYGON") %>% 
  bind_rows(
    read_sf(here::here("data/TM_WORLD_BORDERS_SIMPL-0.3.shp")) %>%
      clean_names() %>%
      filter(!(name %in% c("Western Sahara", "Morocco")))
  )

# Cleaning oai urls
df <- 
  df %>% 
  mutate(oai_url = if_else(str_detect(oai_url, "^//"), str_c("http:", oai_url), oai_url))


# Getting domain, tld, and merging in country names
df <-  
  bind_cols(
    df,
    df %>% pull(oai_url) %>% domain() %>% tld_extract()
  ) %>% 
  mutate(country_consolidated = str_to_lower(country_consolidated) %>% str_trim()) %>% 
  left_join(iso_to_name, by = c("country_consolidated" = "tld"))


# Cleaning country names
df <-
  df %>% 
  mutate(
    country = country_clean,
    country = if_else(country == "Washington (State)", "United States", country),
    country = if_else(str_detect(country, "United States|New York|District of Columbia"), "United States", country),
    country = if_else(country %in% state.name, "United States", country),
    country = if_else(country %in% canada.states, "Canada", country),
    country = if_else(str_detect(country, "China"), "China", country),
    country = if_else(str_detect(country, "Armenia"), "Armenia", country),
    country = if_else(str_detect(country, "Georgia"), "Georgia", country),
    country = if_else(str_detect(country, "Australia|New South Wales|Queensland|Victoria"), "Australia", country),
    country = if_else(str_detect(country, "England|British"), "United Kingdom", country),
    country = if_else(str_detect(country, "Russia|Soviet Union"), "Russia", country),
    country = if_else(str_detect(country, "Palestine"), "Palestine", country),
    country = if_else(country == "Korea (South)", "South Korea", country)
  ) %>% 
  select(-country_clean, -country_marc, -country_issn, -country_tld, -country_ip)


# Mapping countries to continents
continents <-
  df %>%
  pull(country) %>% 
  countrycode(origin = "country.name", destination = "continent")

df <- df %>% bind_cols(continents)

# Rename last column to `continent`
names(df)[length(names(df))] <- "continent"

# Filter to active OJS journals
df <-
  df %>% 
  filter(application == "ojs", record_count_2020 >= 5) %>% 
  distinct(oai_url, repository_name, set_spec, .keep_all = T)

Total active journals using OJS (JUOJS):

df %>% count()

Total articles published in active JUOJS:

df %>% summarise(total = sum(record_count_2020, na.rm = T))

Average articles published in active JUOJS:

df %>% summarise(total = mean(record_count_2020, na.rm = T))

Total distinct ISSNs:

df %>% 
  mutate(
    issn = str_extract(issn, "[^\n]+") # correcting for multiple reported issns
  ) %>% 
  distinct(issn) %>% 
  drop_na(issn) %>% 
  count()

JUOJS Global Presence in 2020

# global df
df_world <-
  df %>%
  drop_na(country) %>%
  count(country, name = "total")

labels <- function(x) {
  if_else(x < 500, as.character(x), "500+")
}

shapefile %>% 
  clean_names() %>%
  rename(country = name) %>%
  mutate(
    country = if_else(country == "Libyan Arab Jamahiriya", "Libya", country),
    country = if_else(country == "United Republic of Tanzania", "Tanzania", country),
    country = if_else(country == "Cote d'Ivoire", "Côte d'Ivoire", country),
    country = if_else(country == "Congo", "Republic of the Congo", country),
    country = if_else(country == "Viet Nam", "Vietnam", country),
    country = if_else(str_detect(country, "Iran"), "Iran", country),
    country = if_else(str_detect(country, "Korea, Republic of"), "South Korea", country),
    country = if_else(str_detect(country, "Korea, Democratic People's Republic of"), "North Korea", country),
    country = if_else(str_detect(country, "Surinam"), "Surinam", country)
  ) %>%
  left_join(df_world, by = "country") %>% #arrange(area) %>% select(country, total, area)
  filter(total > 0 | area > 1000) %>% 
  filter(country != "Antarctica") %>% 
  mutate(
    total = replace_na(total, 0),
    total = pmin(total, 500)
  ) %>%
  ggplot() +
  geom_sf(aes(fill = total), size = 0.1, color = "gray", show.legend = T) +
  scale_fill_gradientn(
    breaks = seq(0, 500, 100),
    labels = labels,
    colors = RColorBrewer::brewer.pal(n = 9, name = "Blues")
  ) +
  guides(
    fill =
      guide_colorbar(
        barheight = 0.5,
        barwidth = 15,
        title = "Journals",
        title.vjust = 1,
      )
  ) +
  theme_void() +
  theme(legend.position = "bottom")

JUOJS Growth (2010-20)

 read_csv(here::here("data/beacon-public.csv")) %>% 
  select(context_name, record_count_2010:record_count_2020) %>% 
  pivot_longer(cols = starts_with("record_count")) %>% 
  mutate(
    name = parse_number(name)
  ) %>% 
  filter(value >= 5) %>% 
  count(name) %>%
  mutate(name = as.integer(name)) %>% 
  ggplot(aes(name, n)) +
  geom_line() +
  geom_point(size = 2) +
  theme_classic() +
  scale_x_continuous(breaks = seq(2010, 2020, 1)) +
  scale_y_continuous(breaks = seq(0, 25000, 5000)) +
  theme(
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 10),
    axis.ticks = element_blank(),
    plot.title = element_text(hjust = 0.5)
  ) +
  labs(
    x = "Year",
    y = "Journals",
  )

Assessing overlaps

Web of Science

Total overlap:

df_wos <-
  read_table(here::here("data/overlaps/wos.txt"), na = c("NULL", "NA", "")) %>% 
  clean_names() %>% 
  remove_empty() %>% 
  filter(str_detect(issn, "^[0-9]{4}-[0-9]{3}[0-9xX]$")) %>% 
  distinct(issn) %>% 
  drop_na(issn)

# total overlap
df %>% 
  mutate(
    issn = str_extract(issn, "[^\n]+")
  ) %>%
  inner_join(df_wos, by = "issn") %>% 
  count()

Top 10 countries in overlap:

# top 10 countries in overlap
df %>% 
  mutate(
    issn = str_extract(issn, "[^\n]+")
  ) %>%
  inner_join(df_wos, by = "issn") %>% 
  count(country) %>% 
  arrange(-n) %>% 
  head(10) %>% 
  mutate(country = fct_inorder(country) %>% fct_rev()) %>% 
  ggplot(aes(country, n)) +
  geom_col(fill = "#0072B2") +
  theme_classic() +
  theme(
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 18),
    axis.ticks = element_blank()
  ) +
  coord_flip() +
  labs(
    x = "Country", y = "Number of journals"
  )

Scopus

Total overlap:

# Scopus data
df_scopus <-
  read_excel(here::here("data/overlaps/scopus.xlsx")) %>%
  clean_names() %>% 
  remove_empty() %>%
  transmute(
    issn = str_replace(print_issn, "-", ""),
    e_issn = str_replace(e_issn, "-", "")
  ) %>%
  distinct() %>% 
  remove_empty()

# join A (issn to issn)
df_join_a <-
  df %>%
  drop_na(issn) %>% 
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+"),
    issn = str_replace(issn, "-", "")
  ) %>%
  distinct(issn, .keep_all = T) %>% 
  inner_join(df_scopus %>% select(-e_issn), by = "issn")

# join B (issn to e-issn)
df_join_b <-
  df %>%
  drop_na(issn) %>% 
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+"),
    issn = str_replace(issn, "-", "")
  ) %>%
  distinct(issn, .keep_all = T) %>% 
  inner_join(df_scopus %>% select(-issn), by = c("issn" = "e_issn"))

bind_rows(df_join_a, df_join_b) %>% 
  distinct() %>% 
  count()

# 1646/41957
# 1646/22809

Top 10 countries in overlap:

bind_rows(df_join_a, df_join_b) %>% 
  distinct() %>%
  drop_na(country) %>% 
  count(country) %>% 
  arrange(-n) %>% 
  head(10) %>% 
  mutate(country = fct_inorder(country) %>% fct_rev()) %>% 
  ggplot(aes(country, n)) +
  geom_col(fill = "#0072B2") +
  theme_classic() +
  theme(
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 18),
    axis.ticks = element_blank()
  ) +
  coord_flip() +
  labs(
    x = "Country", y = "Number of journals"
  )

Dimensions

Total overlap:

# Dimensions data
df_dimensions <-
  read_excel(here::here("data/overlaps/dimensions.xlsx")) %>%
  clean_names() %>% 
  remove_empty() %>% 
  transmute(
    issn = str_replace(issn_print, "-", "") %>% na_if("NULL"),
    e_issn = str_replace(issn_e, "-", "") %>% na_if("NULL")
  ) %>%
  distinct() %>% 
  remove_empty()


# join A (issn to issn)
df_join_a <-
  df %>%
  drop_na(issn) %>% 
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+"),
    issn = str_replace(issn, "-", "")
  ) %>%
  distinct(issn, .keep_all = T) %>% 
  inner_join(df_dimensions %>% select(-e_issn), by = "issn")

# join B (issn to e-issn)
df_join_b <-
  df %>%
  drop_na(issn) %>% 
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+"),
    issn = str_replace(issn, "-", "")
  ) %>%
  distinct(issn, .keep_all = T) %>% 
  inner_join(df_dimensions %>% select(-issn), by = c("issn" = "e_issn"))

bind_rows(df_join_a, df_join_b) %>% 
  distinct() %>% 
  count()

#12435/22809
#12435/72990

Top 10 countries in overlap:

bind_rows(df_join_a, df_join_b) %>% 
  distinct() %>%
  drop_na(country) %>% 
  count(country) %>% 
  arrange(-n) %>% 
  head(10) %>% 
  mutate(country = fct_inorder(country) %>% fct_rev()) %>% 
  ggplot(aes(country, n)) +
  geom_col(fill = "#0072B2") +
  theme_classic() +
  theme(
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 18),
    axis.ticks = element_blank()
  ) +
  coord_flip() +
  labs(
    x = "Country", y = "Number of journals"
  )

EBSCO Host

Total Overlap:

# EBSCO Host
df_ebsco <-
  read_excel(here::here("data/overlaps/ebscohost.xls")) %>%
  clean_names() %>% 
  remove_empty() %>% 
  distinct(issn) %>% 
  drop_na(issn)

df %>% 
  drop_na(issn) %>%
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+")
  ) %>%
  inner_join(df_ebsco, by = "issn") %>% 
  count()

Top 10 countries in overlap:

df %>% 
  drop_na(issn) %>%
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+")
  ) %>%
  inner_join(df_ebsco, by = "issn") %>%
  drop_na(country) %>% 
  count(country) %>% 
  arrange(-n) %>% 
  head(10) %>% 
  mutate(country = fct_inorder(country) %>% fct_rev()) %>% 
  ggplot(aes(country, n)) +
  geom_col(fill = "#0072B2") +
  coord_flip() +
  theme_classic() +
  theme(
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 10),
    axis.ticks = element_blank()
  ) +
  labs(
    x = "Country", y = "Number of journals"
  )

Google Scholar

# Read in python processed google scholar URLs
## first iteration using journal URLs ##

df_gscholar <- 
  read_csv(here::here("scripts/gscholar_urls_mapped.csv")) %>% 
  select(-result_json) %>% 
  mutate(n_results = parse_integer(n_results))

df_gscholar <-
  df %>%
  separate(oai_url, c("url1", "url2"), "\n") %>% 
  remove_empty() %>% 
  rename(oai_url = url1) %>% 
  filter(!str_detect(oai_url, "\\?page=")) %>% 
  transmute(
    country, total_record_count, context_name,
    url = str_replace(oai_url, "index/oai", set_spec),
    url = str_replace(url, "^https://", ""),
    url = str_replace(url, "^http://", ""),
    url = str_replace(url, "^www108.", ""),
    url = str_replace(url, "^www5.", ""),
    url = str_replace(url, "^www3.", ""),
    url = str_replace(url, "^www2.", ""),
    url = str_replace(url, "^www.", ""),
  ) %>%
  bind_rows(
    df %>%
      separate(oai_url, c("url1", "url2"), "\n") %>% 
      remove_empty() %>% 
      rename(oai_url = url1) %>% 
      filter(str_detect(oai_url, "\\?page=")) %>% 
      transmute(
        country, total_record_count, context_name,
        url = str_replace(oai_url, "oai$", set_spec),
        url = str_replace(url, "^https://", ""),
        url = str_replace(url, "^http://", ""),
        url = str_replace(url, "^www108.", ""),
        url = str_replace(url, "^www5.", ""),
        url = str_replace(url, "^www3.", ""),
        url = str_replace(url, "^www2.", ""),
        url = str_replace(url, "^www.", ""),
      ), .
  ) %>% 
  distinct() %>% 
  inner_join(df_gscholar, by = "url")


# bind_cols(
#     df_gscholar,
#     df_gscholar %>% pull(url) %>% domain() %>% tld_extract()
#   ) %>%
#   left_join(read_csv(here::here("scripts/gscholar_citations.csv")), by = "url") %>% 
#   group_by(domain) %>% 
#   summarise(
#     n_results = sum(n_results, na.rm = T),
#     n_citations = sum(n_citations, na.rm = T)
#   ) %>% 
#   filter(n_results > 0) %>% 
#   select(-n_results) %>% 
#   write_csv(here::here("data/scholar_present_domains_set1.csv"))

Total overlap:

# loading all domains
domains_in_scholar <-
  bind_rows(
    read_csv(here::here("data/scholar_present_domains_set1.csv")),
    read_csv(here::here("data/scholar_present_domains_set2.csv")),
  ) %>% 
  distinct()

# total overlap
bind_cols(
    df_gscholar,
    df_gscholar %>% pull(url) %>% domain() %>% tld_extract()
  ) %>% 
  inner_join(domains_in_scholar, by = "domain") %>%
  count()

#22679/25671

#domains_in_scholar %>% summary()
#domains_in_scholar %>% filter(n_citations < 1)
#552/8548

Top 10 countries in overlap:

# top 10 countries in overlap
bind_cols(
    df_gscholar,
    df_gscholar %>% pull(url) %>% domain() %>% tld_extract()
  ) %>% 
  inner_join(domains_in_scholar, by = "domain") %>% 
  count(country) %>% 
  arrange(-n) %>% 
  mutate(country = fct_inorder(country) %>% fct_rev()) %>% 
  head(10) %>% 
  ggplot(aes(country, n)) +
  geom_col(fill = "#0072B2") +
  scale_y_continuous(breaks = scales::breaks_width(1000)) +
  theme_classic() +
  theme(
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 10),
    axis.ticks = element_blank()
  ) +
  coord_flip() +
  labs(x = "Country", y = "Total journals")

Number of citations on first Scholar page:

# number of citations on first gscholar page
domains_in_scholar %>%
  mutate(n_citations = pmin(n_citations, 500)) %>% 
  ggplot(aes(n_citations)) +
  geom_histogram(binwidth = 20, fill = "#0072B2") +
  scale_x_continuous(labels = c("0", "100", "200", "300", "400", "500+")) +
  hrbrthemes::theme_ipsum() +
  labs(
    x = "Total citations on first page",
    y = "Number of journal domains"
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    axis.ticks = element_blank()
  )

# ggsave("scholar_citations.png")

Journals with non-zero citations on first page:

# alter threshold to see proportions as needed (e.g. 0, 1000, 10000)
threshold <- 0

domains_in_scholar %>% arrange(-n_citations) %>% filter(n_citations > threshold) %>% count()

Summary of citations on first page:

domains_in_scholar %>% summary()

##     domain           n_citations     
##  Length:8548        Min.   :    0.0  
##  Class :character   1st Qu.:   11.0  
##  Mode  :character   Median :   50.0  
##                     Mean   :  358.8  
##                     3rd Qu.:  185.0  
##                     Max.   :84656.0

Latindex

Total Overlap:

# Latindex
df_latindex <-
  read_excel(here::here("data/overlaps/latindex.xlsx")) %>%
  clean_names() %>% 
  remove_empty() %>% 
  select(issn, e_issn, online = en_linea) %>% 
  distinct()
  

# join A (issn to issn)
df_join_a <-
  df %>%
  drop_na(issn) %>% 
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+")
  ) %>%
  distinct(issn, .keep_all = T) %>% 
  inner_join(df_latindex %>% select(-e_issn), by = "issn")

# join B (issn to e-issn)
df_join_b <-
  df %>%
  drop_na(issn) %>% 
  transmute(
    country,
    issn = str_extract(issn, "[^\n]+")
  ) %>%
  distinct(issn, .keep_all = T) %>% 
  inner_join(df_latindex %>% select(-issn), by = c("issn" = "e_issn"))

bind_rows(df_join_a, df_join_b) %>% 
  distinct() %>%
  arrange(country, issn, -online) %>% 
  distinct(country, issn) %>% 
  count()

# 4208/24486
# 4208/6319

Top 10 countries in overlap:

bind_rows(df_join_a, df_join_b) %>% 
  distinct() %>%
  arrange(country, issn, -online) %>% 
  distinct(country, issn) %>%
  drop_na(country) %>% 
  count(country) %>% 
  arrange(-n) %>% 
  head(10) %>% 
  mutate(country = fct_inorder(country) %>% fct_rev()) %>% 
  ggplot(aes(country, n)) +
  geom_col(fill = "#0072B2") +
  coord_flip() +
  labs(
    x = "Country", y = "Number of journals"
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 18),
    axis.ticks = element_blank()
  )

Latin American countries for JUOJS:

# Latin American countries for JUOJS
df_latam <- read_csv(here::here("data/latindex_countries.csv")) 

df %>% 
  inner_join(df_latam, by = "country") %>% 
  count()

Citation analysis

Top 100 cited journal domains

df_lang_disc <-
  read_csv(here::here("data/OJS_languages_disciplines.csv")) %>%
  select(journal_url, language, discipline, journal_name = context_name) %>% 
  remove_empty()

ojs_domains <-
  df %>% 
  mutate(domain = str_remove_all(domain, "^www.")) %>% 
  distinct(domain) %>% 
  pull(domain) %>% 
  suffix_extract() %>% 
  transmute(domain = str_c(domain, suffix, sep = ".")) %>% 
  distinct(domain) %>% 
  pull(domain)


bind_cols(
  df_lang_disc,
  df_lang_disc %>% pull(journal_url) %>% domain() %>% tld_extract()
) %>% 
  transmute(
    domain = str_replace(domain, "^https://", ""),
    domain = str_replace(domain, "^http://", ""),
    domain = str_replace(domain, "^www108.", ""),
    domain = str_replace(domain, "^www5.", ""),
    domain = str_replace(domain, "^www3.", ""),
    domain = str_replace(domain, "^www2.", ""),
    domain = str_replace(domain, "^www.", ""),
    language, discipline
  ) %>%
  distinct(domain, .keep_all = T) %>% 
  inner_join(domains_in_scholar, by = "domain") %>% 
  arrange(desc(n_citations)) %>% 
  mutate(domain = str_c('<a href="http://', domain, '" target="_blank">', domain, '</a>')) %>%
  select(`Journal Domain` = domain, `Language` = language, `Discipline` = discipline, `Scholar First Page Citations` = n_citations) %>% 
  head(100) %>% 
  DT::datatable(escape = F, options = list(pageLength = 10))

Analysing citations for the Journal of Statistical Software (English)

The journal domain is hosted here.
The top citations for this journal’s most cited article is here.

process_html <- function(html_content) {
  df_mini <-
    tibble(
      article = html_content %>% 
        html_nodes(".gs_ri .gs_rt a") %>% 
        html_text(),
      domain = html_content %>% 
        html_nodes(".gs_ri .gs_rt a") %>% 
        html_attr("href") %>% 
        domain() %>%
        suffix_extract() %>% 
        transmute(domain = str_c(domain, suffix, sep = ".")) %>% 
        pull(domain),
      # citations = html_content %>% 
      #   html_nodes(".gs_ri .gs_fl a") %>% 
      #   html_text() %>% 
      #   str_subset("^Cited by ") %>% 
      #   parse_number()
    ) %>% 
    mutate(
      domain_in_ojs = if_else(domain %in% ojs_domains, "Yes", "No"),
      domain = if_else(domain == "google.com", "books.google.com", domain)  
    ) %>% 
    # arrange(desc(citations)) %>% 
    clean_names(case = "title") %>% 
    rename(`JUOJS Domain` = `Domain in Ojs`, `Article/Book` = `Article`) 
  
  return(df_mini)
}

html_content1 <- read_html("https://scholar.google.com/scholar?cites=3069375885583311800&as_sdt=2005&sciodt=0,5&hl=en&inst=574688794952177237")
html_content2 <- read_html("https://scholar.google.com/scholar?start=10&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content3 <- read_html("https://scholar.google.com/scholar?start=20&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content4 <- read_html("https://scholar.google.com/scholar?start=30&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content5 <- read_html("https://scholar.google.com/scholar?start=40&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content6 <- read_html("https://scholar.google.com/scholar?start=50&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content7 <- read_html("https://scholar.google.com/scholar?start=60&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content8 <- read_html("https://scholar.google.com/scholar?start=70&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content9 <- read_html("https://scholar.google.com/scholar?start=80&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")
html_content10 <- read_html("https://scholar.google.com/scholar?start=90&hl=en&as_sdt=2005&sciodt=0,5&cites=3069375885583311800&scipsc=&inst=5746887945952177237")

bind_rows(
  `1` = process_html(html_content1),
  `2` = process_html(html_content2),
  `3` = process_html(html_content3),
  `4` = process_html(html_content4),
  `5` = process_html(html_content5),
  `6` = process_html(html_content6),
  `7` = process_html(html_content7),
  `8` = process_html(html_content8),
  `9` = process_html(html_content9),
  `10` = process_html(html_content10),
  .id = "Page"
) %>% view()
  write_csv(here::here("data/citation_samples/english.csv"))

read_csv(here::here("data/citation_samples/english.csv")) %>% 
  select(-Citations) %>% 
  DT::datatable()

JUOJS vs non-JUOJS domains among citations:

read_csv(here::here("data/citation_samples/english.csv")) %>% 
  count(`JUOJS Domain`, name = "Count")

Analysing citations for the UNY Journal (Indonesian)

The journal domain is hosted here.
The top citations for this journal’s most cited article is here.

html_content1 <- read_html("https://scholar.google.com/scholar?cites=6422621221779132632&as_sdt=2005&sciodt=0,5&hl=en&inst=5746887945952177237")
html_content2 <- read_html("https://scholar.google.com/scholar?start=10&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content3 <- read_html("https://scholar.google.com/scholar?start=20&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content4 <- read_html("https://scholar.google.com/scholar?start=30&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content5 <- read_html("https://scholar.google.com/scholar?start=40&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content6 <- read_html("https://scholar.google.com/scholar?start=50&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content7 <- read_html("https://scholar.google.com/scholar?start=60&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content8 <- read_html("https://scholar.google.com/scholar?start=70&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content9 <- read_html("https://scholar.google.com/scholar?start=80&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")
html_content10 <- read_html("https://scholar.google.com/scholar?start=90&hl=en&as_sdt=2005&sciodt=0,5&cites=6422621221779132632&scipsc=&inst=5746887945952177237")

bind_rows(
  `1` = process_html(html_content1),
  `2` = process_html(html_content2),
  `3` = process_html(html_content3),
  `4` = process_html(html_content4),
  `5` = process_html(html_content5),
  `6` = process_html(html_content6),
  `7` = process_html(html_content7),
  `8` = process_html(html_content8),
  `9` = process_html(html_content9),
  `10` = process_html(html_content10),
  .id = "Page"
) %>%
  write_csv(here::here("data/citation_samples/indonesian.csv"))

read_csv(here::here("data/citation_samples/indonesian.csv")) %>% 
  DT::datatable()

JUOJS vs non-JUOJS domains among citations:

read_csv(here::here("data/citation_samples/indonesian.csv")) %>% 
  count(`JUOJS Domain`, name = "Count")

Analysing citations for Portal de Periodicos UFSC (Portuguese)

The journal domain is hosted here.
The top citations for this journal’s most cited article is here.

html_content1 <- read_html("https://scholar.google.com/scholar?cites=12229533173691059082&as_sdt=2005&sciodt=0,5&hl=en&inst=5746887945952177237")
html_content2 <- read_html("https://scholar.google.com/scholar?start=10&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content3 <- read_html("https://scholar.google.com/scholar?start=20&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content4 <- read_html("https://scholar.google.com/scholar?start=30&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content5 <- read_html("https://scholar.google.com/scholar?start=40&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content6 <- read_html("https://scholar.google.com/scholar?start=50&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content7 <- read_html("https://scholar.google.com/scholar?start=60&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content8 <- read_html("https://scholar.google.com/scholar?start=70&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content9 <- read_html("https://scholar.google.com/scholar?start=80&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")
html_content10 <- read_html("https://scholar.google.com/scholar?start=90&hl=en&as_sdt=2005&sciodt=0,5&cites=12229533173691059082&scipsc=&inst=5746887945952177237")

bind_rows(
  `1` = process_html(html_content1),
  `2` = process_html(html_content2),
  `3` = process_html(html_content3),
  `4` = process_html(html_content4),
  `5` = process_html(html_content5),
  `6` = process_html(html_content6),
  `7` = process_html(html_content7),
  `8` = process_html(html_content8),
  `9` = process_html(html_content9),
  `10` = process_html(html_content10),
  .id = "Page"
) %>%
  write_csv(here::here("data/citation_samples/portuguese.csv"))

read_csv(here::here("data/citation_samples/portuguese.csv")) %>% 
  DT::datatable()

JUOJS vs non-JUOJS domains among citations:

read_csv(here::here("data/citation_samples/portuguese.csv")) %>% 
  count(`JUOJS Domain`, name = "Count")

Analysis for OJS Global Paper

Saurabh Khanna, Jon Ball, Juan Pablo Alperin, John Willinsky

Updated: October 20, 2022

Loading and cleaning beacon data

JUOJS Global Presence in 2020

JUOJS Growth (2010-20)

Assessing overlaps

Web of Science

Scopus

Dimensions

EBSCO Host

Google Scholar

Latindex

Citation analysis

Top 100 cited journal domains

Analysing citations for the Journal of Statistical Software (English)

Analysing citations for the UNY Journal (Indonesian)

Analysing citations for Portal de Periodicos UFSC (Portuguese)