Dữ liệu được lấy từ https://population.un.org/wpp/Download/Standard/CSV/.
Coding theo bài chia sẻ của anh Nguyễn Chí Dũng: https://rpubs.com/chidungkt/505486. Khác biệt về data format cũng tạo ra không ít khó khăn. Tuy nhiên, “no pain, no gain”.
Dân số trẻ, mất cân bằng giới tính. Nam giới nhiều hơn nữ giới trong nhiều nhóm tuổi dưới 50. Nam giới chết sớm hơn, giảm hơn nữ sau tuổi 50.
rm(list = ls())
library(tidyverse)
library(ggthemes)
library(rvest)
library(magrittr)
library(extrafont)
df <- read.csv("E:/Python/Data/W2019Pop.csv")
df %>%
filter(Time == 2019, Location == "Viet Nam") %>%
select("AgeGrp","PopMale","PopFemale") -> df
df$AgeGrp <- factor(df$AgeGrp, levels = c("0-4","5-9","10-14","15-19","20-24","25-29",
"30-34","35-39","40-44","45-49","50-54",
"55-59","60-64","65-69","70-74","75-79",
"80-84","85-89","90-94","95-99","100+"))
my_colors <- c("blue", "red")
my_font <- "Arial"
df %>%
mutate(PopMale = -1*PopMale) %>%
gather(Gender, Value, -AgeGrp) %>%
ggplot(aes(AgeGrp, Value, fill = Gender)) +
geom_col(position = "stack") +
coord_flip() +
scale_y_continuous(breaks = seq(-5000, 5000, 1000),
limits = c(-5000, 5000),
labels = c(paste0(seq(5, 0, -1), "M"), paste0(1:5, "M"))) +
theme_minimal() +
scale_fill_manual(values = my_colors, name = "", labels = c("Female", "Male")) +
guides(fill = guide_legend(reverse = TRUE)) +
theme(panel.grid.major.x = element_line(linetype = "dotted", size = 0.2, color = "grey40")) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(legend.position = "top") +
theme(plot.title = element_text(family = my_font, size = 20)) +
theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) +
theme(plot.caption = element_text(family = my_font, size = 12, colour = "grey40", face = "italic")) +
theme(plot.margin = unit(c(1.2, 1.2, 1.2, 1.2), "cm")) +
theme(axis.text = element_text(size = 10, family = my_font)) +
theme(legend.text = element_text(size = 12, face = "bold", color = "grey30", family = my_font)) +
labs(x = "Age in years", y = "Population in millions (M)",
title = "Vietnam Population Pyramid in 2019",
subtitle = "By Age and Gender",
caption = "Data Source: https://population.un.org/wpp/Download/Standard/CSV/")
Figure 2
rm(list = ls())
library(tidyverse)
library(ggthemes)
library(rvest)
library(magrittr)
library(extrafont)
df <- read.csv("E:/Python/Data/W2019Pop.csv")
df2 <- df %>%
filter(Time == 2019, Location == "Viet Nam") %>%
select("AgeGrp","PopMale","PopFemale") -> df2
df2$AgeGrp <- factor(df2$AgeGrp, levels = c("0-4","5-9","10-14","15-19","20-24","25-29",
"30-34","35-39","40-44","45-49","50-54",
"55-59","60-64","65-69","70-74","75-79",
"80-84","85-89","90-94","95-99","100+"))
my_colors <- c("#2E74C0", "#CB454A")
my_font <- "Arial"
df2 %>%
gather(Gender, Value, -AgeGrp) %>%
ggplot(aes(AgeGrp, Value, fill = Gender)) +
geom_bar(stat="identity", color="black", position=position_dodge()) +
scale_y_continuous(breaks = seq(0, 5000, 1000),
limits = c(0, 5000),
labels = paste0(0:5, "M")) +
theme_minimal() +
scale_fill_manual(values = my_colors, name = "", labels = c("Female", "Male")) +
guides(fill = guide_legend(reverse = TRUE)) +
theme(panel.grid.major.y = element_line(linetype = "dotted", size = 0.2, color = "grey40")) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(legend.position = "top") +
theme(plot.title = element_text(family = my_font, size = 28)) +
theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) +
theme(plot.caption = element_text(family = my_font, size = 12, colour = "grey40", face = "italic")) +
theme(plot.margin = unit(c(1.2, 1.2, 1.2, 1.2), "cm")) +
theme(axis.text = element_text(size = 8, family = my_font)) +
theme(legend.text = element_text(size = 12, face = "bold", color = "grey30", family = my_font)) +
labs(x = "Age in years", y = "Population in millions (M)",
title = "Vietnam Population in 2019",
subtitle = "By Age and Gender",
caption = "Data Source: https://population.un.org/wpp/Download/Standard/CSV/")