rm(list = ls())
library(tidyverse)
library(magrittr)
library(scales)
library(rvest)
library(stringi)
link <- "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_sector_composition"
gdp <- link %>%
read_html() %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[5]') %>%
html_table() %>%
as.data.frame() %>%
select(con = Country.Economy, Agri., Indus., Serv.) %>%
mutate(con = stri_trans_general(con, "Latin-ASCII"))
# Viết hà m lấy ra tỉ lệ %:
library(stringr)
get_num <- function(x) {
x %>%
str_replace_all("%", "") %>%
str_split("\\.", simplify = TRUE) %>%
as.data.frame() %>%
mutate_all(as.character) ->> u
p1 <- u$V1 %>% as.numeric() / 100
p2 <- u$V2 %>% as.numeric() / 1000
p2[is.na(p2)] <- 0
num <- p1 + p2
return(num)
}
# SỠdụng hà m:
my_note <- c("China", "Japan", "United States")
gdp %<>% mutate(agri_per = get_num(Agri.),
indus_per = get_num(Indus.),
ser_per = get_num(Serv.),
col = case_when(con %in% my_note ~ "#8B2323", TRUE ~ "gray50"))
gdp %<>%
filter(!duplicated(con), con != "World") %>%
mutate(label = paste(ser_per*100, "%", sep = " ")) %>%
mutate(label = case_when(label == "43 %" ~ "43.0 %", TRUE ~ label)) %>%
arrange(ser_per) %>%
mutate(con = factor(con, levels = con))
# Viết hà m tạo theme riêng:
library(extrafont)
font_import(pattern = "OfficinaSansITCMedium.ttf", prompt = FALSE)
extrafont::loadfonts(device = "win")
my_theme <- function(...) {
theme_minimal() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.grid.major.x = element_line(color = "gray", linetype = 8),
axis.ticks = element_blank(),
axis.text.x = element_text(size = 12, color = "gray50"),
plot.subtitle = element_text(color = "gray50", size = 12),
plot.caption = element_text(size = 12, color = "grey50"),
text = element_text(family = "OfficinaSansITC", size = 18))
}
gdp %>%
ggplot() +
geom_segment(aes(x = 0, xend = ser_per,
y = con, yend = con, color = "Service"), size = 7) +
geom_segment(aes(x = ser_per, xend = ser_per + indus_per,
y = con, yend = con, color = "Industrial"), size = 7) +
geom_segment(aes(x = ser_per + indus_per, xend = 1,
y = con, yend = con, color = "Agricultural"), size = 7) +
my_theme() +
scale_color_manual(values = c('#e41a1c','#377eb8', '#4daf4a'), name = "") +
theme(legend.position = "top") +
theme(legend.text = element_text(size = 11, color = "grey50")) +
theme(axis.text.y = element_text(color = gdp$col, size = 12)) +
scale_x_continuous(expand = c(0, 0),
limits = c(0, 1.03),
labels = scales::percent) +
geom_text(aes(x = 0, y = con, label = label), hjust = -0.3, color = "white") +
labs(x = NULL, y = NULL,
title = "GDP Sector Composition by 36 Countries Selected in 2015",
subtitle = "The figures are based on nominal GDP and GDP (PPP) estimates and sector composition ratios provided by\nthe CIA World Factbook at market or government official exchange rates.",
caption = "Data Source: https://en.wikipedia.org/wiki/List_of_countries_by_GDP_sector_composition")