GDP Sector Composition

R for Killing Pneumonia

Nguyen Chi Dung

rm(list = ls())
library(tidyverse)
library(magrittr)
library(scales)
library(rvest)
library(stringi)

link <- "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_sector_composition"


gdp <- link %>% 
  read_html() %>% 
  html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[5]') %>% 
  html_table() %>% 
  as.data.frame() %>% 
  select(con = Country.Economy, Agri., Indus., Serv.) %>% 
  mutate(con = stri_trans_general(con, "Latin-ASCII"))

# Viết hàm lấy ra tỉ lệ %: 
library(stringr)
get_num <- function(x) {
  x %>% 
    str_replace_all("%", "") %>% 
    str_split("\\.", simplify = TRUE) %>% 
    as.data.frame() %>% 
    mutate_all(as.character) ->> u
  
  p1 <- u$V1 %>% as.numeric() / 100
  p2 <- u$V2 %>% as.numeric() / 1000
  p2[is.na(p2)] <- 0
  
  num <- p1 + p2
  return(num)
  
}

# Sử dụng hàm: 

my_note <- c("China", "Japan", "United States")

gdp %<>% mutate(agri_per = get_num(Agri.), 
                indus_per = get_num(Indus.), 
                ser_per = get_num(Serv.), 
                col = case_when(con %in% my_note ~ "#8B2323", TRUE ~ "gray50"))

gdp %<>% 
  filter(!duplicated(con), con != "World") %>% 
  mutate(label = paste(ser_per*100, "%", sep = " ")) %>% 
  mutate(label = case_when(label == "43 %" ~ "43.0 %", TRUE ~ label)) %>% 
  arrange(ser_per) %>% 
  mutate(con = factor(con, levels = con)) 



# Viết hàm tạo theme riêng: 
library(extrafont)
font_import(pattern = "OfficinaSansITCMedium.ttf", prompt = FALSE)
extrafont::loadfonts(device = "win")

my_theme <- function(...) {
  theme_minimal() + 
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.major.x = element_line(color = "gray", linetype = 8), 
        axis.ticks = element_blank(),
        axis.text.x = element_text(size = 12, color = "gray50"), 
        plot.subtitle = element_text(color = "gray50", size = 12), 
        plot.caption = element_text(size = 12, color = "grey50"), 
        text = element_text(family = "OfficinaSansITC", size = 18)) 
}


gdp %>% 
  ggplot() + 
  geom_segment(aes(x = 0, xend = ser_per, 
                   y = con, yend = con, color = "Service"), size = 7) + 
  geom_segment(aes(x = ser_per, xend = ser_per + indus_per, 
                   y = con, yend = con, color = "Industrial"), size = 7) + 
  geom_segment(aes(x = ser_per + indus_per, xend = 1, 
                   y = con, yend = con, color = "Agricultural"), size = 7) + 
  my_theme() + 
  scale_color_manual(values = c('#e41a1c','#377eb8', '#4daf4a'), name = "") + 
  theme(legend.position = "top") + 
  theme(legend.text = element_text(size = 11, color = "grey50")) + 
  theme(axis.text.y = element_text(color = gdp$col, size = 12)) + 
  scale_x_continuous(expand = c(0, 0), 
                     limits = c(0, 1.03), 
                     labels = scales::percent) + 
  geom_text(aes(x = 0, y = con, label = label), hjust = -0.3, color = "white") + 
  labs(x = NULL, y = NULL, 
       title = "GDP Sector Composition by 36 Countries Selected in 2015", 
       subtitle = "The figures are based on nominal GDP and GDP (PPP) estimates and sector composition ratios provided by\nthe CIA World Factbook at market or government official exchange rates.", 
       caption = "Data Source: https://en.wikipedia.org/wiki/List_of_countries_by_GDP_sector_composition")