# Clear workspace:
# rm(list = ls())
# # Load some packages:
library(rvest)
library(tidyverse)
library(magrittr)
#
# # A function collect Vietnam population data for both sexes with input is a given year:
#
# get_population_by_year <- function(year_selected) {
# base_url <- "https://www.census.gov/data-tools/demo/idb/region.php?N=%20Results%20&T=15&A=separate&RT=0&Y="
# link <- paste0(base_url, year_selected, "&R=-1&C=VM")
#
# link %>%
# read_html() %>%
# html_table(fill = TRUE) %>%
# .[[1]] -> pop_df
#
# names(pop_df) <- str_replace_all(names(pop_df), " ", "_")
# return(pop_df)
#
# }
#
# # Get data from 2000 to 2027:
#
# pop_data <- lapply(2000:2027, get_population_by_year)
#
# # Convert pop_data to data frame:
#
# pop_data_df <- do.call("bind_rows", pop_data)
# Data Preprocessing:
# saveRDS(pop_final,'D:\\VUDT\\8.TRAINING\\Chart\\pop_final.RDS')
pop_final <- readRDS('D:\\VUDT\\8.TRAINING\\Chart\\pop_final.RDS')
# pop_final <- pop_data_df %>%
# select(Year, Age, Male_Population, Female_Population) %>%
# filter(Age %in% as.character(20:35)) %>%
# mutate(Age = as.integer(Age)) %>%
# mutate_if(is.character, function(x) {str_replace_all(x, "\\,", "") %>% as.integer()}) %>%
# select(-Age) %>%
# gather(Gender, n_pop, -Year) %>%
# group_by(Year, Gender) %>%
# summarise(total_pop = sum(n_pop)) %>%
# ungroup()
pop_final_wide <- pop_final %>%
spread(key = "Gender", value = "total_pop") %>%
mutate(n_gap = Male_Population - Female_Population,
max_gap_gender = case_when(n_gap == max(n_gap) ~ "Yes", TRUE ~ "No"),
max_female_pop = case_when(Female_Population == max(Female_Population) ~ "Yes", TRUE ~ "No"),
max_male_pop = case_when(Male_Population == max(Male_Population) ~ "Yes", TRUE ~ "No"),
label_text = case_when(Year == 2000 ~ "2000", Year == 2027 ~ "2027", Year == 2018 ~ "2018", TRUE ~ ""),
label_color = case_when(Year == 2018 ~ "firebrick", TRUE ~ "gray60"),
rate_fa = n_gap / Male_Population)
upper_num <- pop_final_wide %>%
filter(Year == 2018) %>%
pull(Female_Population) / 1000000
low_num <- pop_final_wide %>%
pull(Female_Population) %>%
min() / 1000000
gender_pop_gap <- pop_final_wide %>%
filter(Year == 2018) %>%
pull(n_gap)
# install.packages('extrafont')
library(extrafont)
my_font <- "OfficinaSansITC"
ggplot() +
geom_segment(data = pop_final_wide %>% mutate(Female_Population = Female_Population / 1000000, Male_Population = Male_Population / 1000000),
aes(x = Year, xend = Year, y = Female_Population, yend = Male_Population),
size = 0.7,
color = "gray70") +
geom_segment(data = pop_final_wide %>% mutate(Female_Population = Female_Population / 1000000, Male_Population = Male_Population / 1000000),
aes(x = 2018, xend = 2018, y = low_num, yend = upper_num - 0.008*upper_num),
color = "firebrick",
arrow = arrow(length = unit(0.2, "cm")),
size = 0.5, alpha = 0.4) +
geom_point(data = pop_final %>% mutate(total_pop = total_pop / 1000000), aes(Year, total_pop, color = Gender), size = 4) +
annotate("text", label = "Trong n\u0103m 2018, c\u00F3 773.765\n \u0111a\u0300n \u00F4ng \u0111\u00F4\u0323c th\u00E2n.",
family = my_font,
x = 2010,
y = 12.1,
color = "gray20",
size = 4.5, hjust = 0, vjust = 1) +
scale_color_manual(name = "", values = c("#3E606F", "#8C3F4D"), labels = c("N\u01B0\u0303", "Nam")) +
scale_x_continuous(breaks = seq(2000, 2027, 1), labels = pop_final_wide$label_text) +
scale_y_continuous(labels = c("11", "12", "13", "14")) +
theme(plot.background = element_rect(fill = "#EFF2F4", color = NA)) +
theme(panel.background = element_rect(fill = "#EFF2F4", color = NA)) +
theme(legend.background = element_rect(fill = "#EFF2F4", color = NA)) +
theme(legend.text = element_text(family = my_font, size = 10, face = "bold", color = "gray20")) +
theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) +
theme(axis.text.x = element_text(face = "bold", size = 14, color = pop_final_wide$label_color, family = my_font)) +
theme(axis.text.y = element_text(color = "gray30", face = "bold", size = 14, family = my_font)) +
theme(plot.title = element_text(family = my_font, size = 20, color = "gray20")) +
theme(plot.subtitle = element_text(family = my_font, size = 14, color = "gray30")) +
theme(plot.caption = element_text(family = my_font, size = 14, color = "gray50")) +
theme(axis.ticks = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(legend.position = "top") +
theme(axis.title.y = element_text(family = my_font, size = 12, colour = "gray20")) +
labs(x = NULL, y = "D\u00E2n s\u1ED1 trong \u0111\u1ED9 tu\u1ED5i k\u1EBFt h\u00F4n\n (Tri\u00EA\u0323u ng\u01B0\u01A1\u0300i)",
title = "T\u1EA1i sao h\u00E0ng tri\u1EC7u \u0111\u00E0n \u00F4ng Vi\u1EC7t Nam\n ph\u1EA3i s\u1ED1ng \u0111\u1ED9c th\u00E2n trong t\u01B0\u01A1ng lai?",
subtitle = "Ta\u0323i Vi\u00EA\u0323t Nam t\u01B0\u0300 nh\u1EEFng n\u0103m 2000, kho\u1EA3ng c\u00E1ch gi\u1EDBi t\u00EDnh \u1EDF nh\u00F3m tu\u1ED5i\n k\u1EBFt h\u00F4n (t\u1EEB 20 \u0111\u1EBFn 35) \u0111\u00E3 t\u0103ng l\u00EAn nhanh cho\u0301ng\r s\u00F4\u0301 l\u01B0\u01A1\u0323ng\r nam gi\u01A1\u0301i\n v\u01B0\u1EE3t tr\u1ED9i so v\u1EDBi n\u01B0\u0303 gi\u01A1\u0301i\r\n",
caption = "Ngu\u00F4\u0300n: https://www.census.gov") -> a3
a3
---
title: "Ế đang là xu thế"
a!uthor: "RChart"
date: "May 27, 2019"
output:
  html_document: 
    code_download: true
    code_folding: hide
    highlight: pygments
    # number_sections: yes
    theme: "flatly"
    toc: TRUE
    toc_float: TRUE
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,warning = F,error = F,message = F)
```



```{r}

             # Clear workspace: 
             # rm(list = ls())
             
             # # Load some packages: 
             library(rvest)
             library(tidyverse)
             library(magrittr)
             # 
             # # A function collect Vietnam population data for both sexes with input is a given year: 
             # 
             # get_population_by_year <- function(year_selected) {
             #   base_url <- "https://www.census.gov/data-tools/demo/idb/region.php?N=%20Results%20&T=15&A=separate&RT=0&Y="
             #   link <- paste0(base_url, year_selected, "&R=-1&C=VM")
             #   
             #   link %>% 
             #     read_html() %>% 
             #     html_table(fill = TRUE) %>% 
             #     .[[1]] -> pop_df
             #   
             #   names(pop_df) <- str_replace_all(names(pop_df), " ", "_")
             #   return(pop_df)
             #   
             # }
             # 
             # # Get data from 2000 to 2027: 
             # 
             # pop_data <- lapply(2000:2027, get_population_by_year)
             # 
             # # Convert pop_data to data frame: 
             # 
             # pop_data_df <- do.call("bind_rows", pop_data)             

                                       
             # Data Preprocessing: 
             # saveRDS(pop_final,'D:\\VUDT\\8.TRAINING\\Chart\\pop_final.RDS')
             pop_final <- readRDS('D:\\VUDT\\8.TRAINING\\Chart\\pop_final.RDS')               
             # pop_final <- pop_data_df %>% 
             #   select(Year, Age, Male_Population, Female_Population) %>% 
             #   filter(Age %in% as.character(20:35)) %>% 
             #   mutate(Age = as.integer(Age)) %>% 
             #   mutate_if(is.character, function(x) {str_replace_all(x, "\\,", "") %>% as.integer()}) %>% 
             #   select(-Age) %>% 
             #   gather(Gender, n_pop, -Year) %>% 
             #   group_by(Year, Gender) %>% 
             #   summarise(total_pop = sum(n_pop)) %>% 
             #   ungroup()
             
             
             pop_final_wide <- pop_final %>% 
               spread(key = "Gender", value = "total_pop") %>% 
               mutate(n_gap = Male_Population - Female_Population, 
                      max_gap_gender = case_when(n_gap == max(n_gap) ~ "Yes", TRUE ~ "No"), 
                      max_female_pop = case_when(Female_Population == max(Female_Population) ~ "Yes", TRUE ~ "No"), 
                      max_male_pop = case_when(Male_Population == max(Male_Population) ~ "Yes", TRUE ~ "No"), 
                      label_text = case_when(Year == 2000 ~ "2000", Year == 2027 ~ "2027", Year == 2018 ~ "2018", TRUE ~ ""), 
                      label_color = case_when(Year == 2018 ~ "firebrick", TRUE ~ "gray60"), 
                      rate_fa = n_gap / Male_Population)
             
             
             upper_num <- pop_final_wide %>% 
               filter(Year == 2018) %>% 
               pull(Female_Population) / 1000000
             
             low_num <- pop_final_wide %>% 
               pull(Female_Population) %>% 
               min() / 1000000
             
             
             gender_pop_gap <- pop_final_wide %>% 
               filter(Year == 2018) %>% 
               pull(n_gap) 
             
             
             
             # install.packages('extrafont')
             library(extrafont)
             my_font <- "OfficinaSansITC"
             
             ggplot() + 
               geom_segment(data = pop_final_wide %>% mutate(Female_Population = Female_Population / 1000000, Male_Population = Male_Population / 1000000),
                            aes(x = Year, xend = Year, y = Female_Population, yend = Male_Population),
                            size = 0.7, 
                            color = "gray70") +
               
               geom_segment(data = pop_final_wide %>% mutate(Female_Population = Female_Population / 1000000, Male_Population = Male_Population / 1000000),
                            aes(x = 2018, xend = 2018, y = low_num, yend = upper_num - 0.008*upper_num), 
                            color = "firebrick", 
                            arrow = arrow(length = unit(0.2, "cm")), 
                            size = 0.5, alpha = 0.4) + 
               
               geom_point(data = pop_final %>% mutate(total_pop = total_pop / 1000000), aes(Year, total_pop, color = Gender), size = 4) + 
               
               annotate("text", label = "Trong n\u0103m 2018, c\u00F3 773.765\n \u0111a\u0300n \u00F4ng \u0111\u00F4\u0323c th\u00E2n.", 
                        family = my_font, 
                        x = 2010,
                        y = 12.1, 
                        color = "gray20", 
                        size = 4.5, hjust = 0, vjust = 1) + 
               
               scale_color_manual(name = "", values = c("#3E606F", "#8C3F4D"), labels = c("N\u01B0\u0303", "Nam")) + 
               scale_x_continuous(breaks = seq(2000, 2027, 1), labels = pop_final_wide$label_text) + 
               scale_y_continuous(labels = c("11", "12", "13", "14")) + 
               theme(plot.background = element_rect(fill = "#EFF2F4", color = NA)) + 
               theme(panel.background = element_rect(fill = "#EFF2F4", color = NA)) +
               theme(legend.background = element_rect(fill = "#EFF2F4", color = NA)) + 
               theme(legend.text = element_text(family = my_font, size = 10, face = "bold", color = "gray20")) + 
               theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) + 
               theme(axis.text.x = element_text(face = "bold", size = 14, color = pop_final_wide$label_color, family = my_font)) +
               theme(axis.text.y = element_text(color = "gray30", face = "bold", size = 14, family = my_font)) +
               theme(plot.title = element_text(family = my_font, size = 20, color = "gray20")) + 
               theme(plot.subtitle = element_text(family = my_font, size = 14, color = "gray30")) + 
               theme(plot.caption = element_text(family = my_font, size = 14, color = "gray50")) + 
               theme(axis.ticks = element_blank()) + 
               theme(panel.grid.major.x = element_blank()) + 
               theme(panel.grid.minor.x = element_blank()) + 
               theme(legend.position = "top") +
               theme(axis.title.y = element_text(family = my_font, size = 12, colour = "gray20")) + 
               
               labs(x = NULL, y = "D\u00E2n s\u1ED1 trong \u0111\u1ED9 tu\u1ED5i k\u1EBFt h\u00F4n\n (Tri\u00EA\u0323u ng\u01B0\u01A1\u0300i)", 
                    title = "T\u1EA1i sao h\u00E0ng tri\u1EC7u \u0111\u00E0n \u00F4ng Vi\u1EC7t Nam\n ph\u1EA3i s\u1ED1ng \u0111\u1ED9c th\u00E2n trong t\u01B0\u01A1ng lai?", 
                    subtitle = "Ta\u0323i Vi\u00EA\u0323t Nam t\u01B0\u0300 nh\u1EEFng n\u0103m 2000, kho\u1EA3ng c\u00E1ch gi\u1EDBi t\u00EDnh \u1EDF nh\u00F3m tu\u1ED5i\n k\u1EBFt h\u00F4n (t\u1EEB 20 \u0111\u1EBFn 35) \u0111\u00E3 t\u0103ng l\u00EAn nhanh cho\u0301ng\r s\u00F4\u0301 l\u01B0\u01A1\u0323ng\r nam gi\u01A1\u0301i\n v\u01B0\u1EE3t tr\u1ED9i so v\u1EDBi n\u01B0\u0303 gi\u01A1\u0301i\r\n", 
                    caption = "Ngu\u00F4\u0300n: https://www.census.gov")       -> a3      
             
             
a3
             

```

