Bài viết đưa ra kết quả về 30 quốc gia có chiều cao trung bình người thấp nhất, đứng cuối là Indonesia với trung bình chiều cao Nam và Nữ lần lượt là 1.58 và 1.47. Đáng quan tâm là Việt Nam là quốc gia thấp thứ 2 thế giới với các chỉ số lần lượt Nam và Nữ là 162.1 - 152.2. (theo World Population Review)
# Clear R environment:
rm(list = ls())
# Load some R packages:
library(tidyverse)
library(extrafont)
library(ggthemes)
library(grid)
library(rvest)
#Import data to link:
"http://worldpopulationreview.com/countries/average-height-by-country/" %>%
read_html() %>%
html_table() %>%
.[[1]] %>%
select(1:3) -> Dataheight
# Rename for all columns:
names(Dataheight) <- c("country", "h_male", "h_female")
#----------------
# Prepare data:
#----------------
Dataheight %>%
mutate_at(c("h_male", "h_female"), function(x) {x %>% as.numeric()}) -> Dataheight
#only male
o_male <- Dataheight %>%
select(country, h_male) %>%
filter(!is.na(h_male)) %>%
mutate(label_male = as.character(round(h_male, 1))) %>%
mutate(label_male = case_when(str_count(label_male) != 5 ~ paste0(label_male, ".0"), TRUE ~ label_male))
#only female
o_female <- Dataheight %>%
select(country, h_female) %>%
filter(!is.na(h_female)) %>%
mutate(label_female = as.character(round(h_female, 1))) %>%
mutate(label_female = case_when(str_count(label_female) != 5 ~ paste0(label_female, ".0"), TRUE ~ label_female))
#join data
inner_join(o_female, o_male, by = "country") %>%
arrange(h_male) %>%
mutate(country = factor(country, levels = country)) -> finaldata
finaldata %>%
mutate(end_point = h_male - 3.3) %>%
mutate(country_color = case_when(country == "Vietnam" ~ "red", TRUE ~ "white")) -> finaldata
#-----------------------
# Data Visualization
#-----------------------
library(hrbrthemes)
library(showtext)
# Select Ubuntu Condensed font:
showtext_auto()
font_add_google(name = "Ubuntu Condensed", family = "ubu")
my_font <- "ubu"
finaldata %>%
top_n(-30, wt = h_male) %>%
ggplot(aes(x = country)) +
geom_segment(aes(y = h_female, yend = h_male, x = country, xend = country), color = "grey30") +
geom_point(aes(x = country, y = h_male, color = "Male"), size = 4) +
geom_point(aes(x = country, y = h_female, color = "Female"), size = 4) +
coord_flip() +
theme_econodist() +
scale_color_manual(name = "", labels = c("Female", "Male"), values = c("#0693e3","#eb144c")) +
theme(axis.text.y = element_text(size = 9)) + #, color = "black"
theme(axis.text.x = element_blank()) +
theme(legend.position = c(0.93, 0.5)) +
theme(legend.position = "top") +
scale_y_continuous(limits = c(138, 180), expand = c(0, 0)) +
geom_text(aes(x = country, y = h_female, label = label_female), hjust = 1.4, color = "black", size = 4, family = my_font) +
geom_text(aes(x = country, y = h_male, label = label_male), hjust = -0.4, color = "black", size = 4, family = my_font) +
geom_segment(aes(y = 139, yend = end_point, x = country, xend = country), color = "gray80", linetype = 3) +
labs(x = NULL, y = NULL,
title = "Average Height By 30 Shortest Countries 2021",
subtitle = "Unit of measurement: centimeters",
caption = "Data Source: http://worldpopulationreview.com") +
theme(plot.margin = unit(c(1, 1.5, 1, 1), "cm")) +
theme(plot.title = element_text(family = my_font, size = 22))+
theme(plot.subtitle = element_text(size = 12, color = "black", face = "italic")) +
theme(plot.caption = element_text(size = 10, face = "italic")) +
theme(panel.grid = element_blank())+
theme(legend.text = element_text(color = "black", size = 12, face = "italic",family = my_font))