China is Redrawing the Map of World Science

China is Redrawing the Map of World Science by some metrics:

R Codes for collecting data and Visualization:

library(rvest)
library(tidyverse)


#=======================================================
#     Research and development spending by country
#=======================================================


spending_link <- "https://en.wikipedia.org/wiki/List_of_countries_by_research_and_development_spending"

spending_link %>% 
  read_html() %>% 
  html_table(fill = TRUE) %>% 
  .[[3]] %>% 
  select(-Year, -Source) -> df_spending

names(df_spending) <- c("country", "expOn_RD", "percent_GDP", "per_capital_RD")

df_spending <- df_spending %>% 
  mutate(expOn_RD = as.numeric(expOn_RD), 
         per_capital_RD = as.numeric(per_capital_RD %>% str_replace_all("\\,", "")), 
         percent_GDP = str_replace_all(percent_GDP, "%", "") %>% as.numeric())

#===============================
#     Total Research Paper
#===============================

total_paper_link <- "https://www.scimagojr.com/countryrank.php?fbclid=IwAR2BP_UlTzsowsNXuVL44keV9pJPJ2T3-Xn156CWYrz51BTm4XR3w9CqQa4"


total_paper_link %>% 
  read_html() %>% 
  html_table(fill = TRUE) %>% 
  .[[1]] -> df_total_research_paper

df_total_research_paper <- df_total_research_paper[, -c(1:2)]
names(df_total_research_paper) <- names(df_total_research_paper) %>% str_replace_all("-| ", "")

#===============================
#   Technical Journal Papers
#===============================

stem_paper_link <- "https://www.worldatlas.com/articles/20-countries-publishing-the-most-scholarly-articles.html"

stem_paper_link %>% 
  read_html() %>% 
  html_table(fill = TRUE) %>% 
  .[[1]] -> df_stem

names(df_stem) <- c("rank", "country", "paper_num_stem")
df_stem <- df_stem %>% mutate(paper_num_stem = str_replace_all(paper_num_stem, "[^0-9]", "") %>% as.numeric())

#=====================
#  Create our theme
#=====================

library(hrbrthemes)

# Colors selected: 
my_colors <- c("#8C3F4D", "#3E606F")

df_total_research_paper %>% 
  slice(1:20) %>% 
  arrange(Documents) %>% 
  mutate(Country = factor(Country, levels = Country)) %>% 
  mutate(Documents = round(Documents / 1000), 0) %>% 
  ggplot(aes(Country, Documents)) + 
  geom_col(fill = my_colors[1], color = my_colors[1]) + 
  coord_flip() + 
  geom_text(aes(label = Documents), color = "white", hjust = 1.1, size = 5) + 
  theme_modern_rc(plot_title_size = 22, caption_size = 10, 
                  axis_text_size = 14, axis_title_size = 12,  plot_margin = margin(35, 35, 35, 35)) +
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  scale_y_continuous(labels = scales::comma, expand = c(0.01, 0)) + 
  labs(title = "The Top 20 Countries by Number of Research Papers Published", 
       x = NULL, y = "Number of Research Papers", 
       caption = "Data Source: https://www.scimagojr.com", 
       subtitle = "China is ranked country second behind the United States with 5901 (in thousands)  papers published in all fields.\nThis number does not include Hong Kong and Macau territories.")



df_stem %>% 
  arrange(paper_num_stem) %>% 
  mutate(country = factor(country, levels = country)) %>% 
  mutate(paper_num_stem = round(paper_num_stem / 1000), 0) %>% 
  ggplot(aes(country, paper_num_stem)) + 
  geom_col(fill = my_colors[1], color = my_colors[1]) + 
  coord_flip() + 
  geom_text(aes(label = paper_num_stem), color = "white", hjust = 1.1, size = 5) + 
  theme_modern_rc(plot_title_size = 22, caption_size = 10, 
                  axis_text_size = 14, axis_title_size = 12,  plot_margin = margin(35, 35, 35, 35)) +
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  scale_y_continuous(labels = scales::comma, expand = c(0.01, 0)) + 
  labs(title = "The Top 20 Countries by Number of Scientific and Technical Papers Published", 
       x = NULL, y = "Number of Research Papers", 
       caption = "Data Source: https://www.worldatlas.com", 
       subtitle = "China is ranked country second behind the United States with 401 (in thousands) scientific/technical papers published.\nThis number does not include Hong Kong and Macau territories.")



df_spending %>% 
  filter(country != "European Union") %>% 
  slice(1:20) %>% 
  arrange(expOn_RD) %>% 
  mutate(country = factor(country, levels = country)) -> df_spending


df_spending %>% 
  ggplot(aes(country, expOn_RD)) + 
  geom_col(fill = my_colors[1], color = my_colors[1]) + 
  coord_flip() + 
  geom_text(data = df_spending %>% slice(16:20), aes(label = expOn_RD), color = "white", hjust = 1.1, size = 5) +  
  geom_text(data = df_spending %>% slice(1:15), aes(label = expOn_RD), color = "white", hjust = -0.1, size = 5) + 
  theme_modern_rc(plot_title_size = 22, caption_size = 10, 
                  axis_text_size = 14, axis_title_size = 12,  plot_margin = margin(35, 35, 35, 35)) +
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  scale_y_continuous(labels = scales::comma, expand = c(0.01, 0)) + 
  labs(title = "The Top 20 Countries by Research and Development Spending", 
       x = NULL, y = "Expenditures on R&D", 
       caption = "Data Source: http://uis.unesco.org/apps/visualisations/research-and-development-spending/", 
       subtitle = "Global spending on R&D has reached a record high of almost US$ 1.7 trillion. About 10 countries account for 80% of spending.\nChina is ranked country second behind the United States with R&D spending of 451.9 Billions Dollars.\nThis number does not include Hong Kong and Macau territories.")
