1 Administrative Map of Vietnam

1.1 Vietnam Map - Whole country

# Clear workspace

# Load package
library(tidyverse)

# Get geo-spatial data for Vietnam 
link1 <- "https://raw.githubusercontent.com/nguyenduy1133/data/main/Dia_phan_Tinh_cap_nhat.geojson?fbclid=IwAR1cPIIswZ9y8ZvsmW8ioqh57malxnlrBr2L3EjYRfMPiDjCqu4kCA2PUxQ"

link2 <- "https://data.opendevelopmentmekong.net/dataset/999c96d8-fae0-4b82-9a2b-e481f6f50e12/resource/2818c2c5-e9c3-440b-a9b8-3029d7298065/download/diaphantinhenglish.geojson?fbclid=IwAR1coUVLkuEoJRsgaH81q6ocz1nVeGBirqpKRBN8WWxXQIJREUL1buFi1eE"

vn_spatial <- sf::st_read(link2)
## Reading layer `OGRGeoJSON' from data source 
##   `https://data.opendevelopmentmekong.net/dataset/999c96d8-fae0-4b82-9a2b-e481f6f50e12/resource/2818c2c5-e9c3-440b-a9b8-3029d7298065/download/diaphantinhenglish.geojson?fbclid=IwAR1coUVLkuEoJRsgaH81q6ocz1nVeGBirqpKRBN8WWxXQIJREUL1buFi1eE' 
##   using driver `GeoJSON'
## Simple feature collection with 63 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 102.1421 ymin: 6.953306 xmax: 116.9473 ymax: 23.3939
## Geodetic CRS:  WGS 84
# remane 
vn_spatial %>% mutate(Name = case_when(Name == "Dak Lak" ~ "Dac Lac", TRUE ~ Name)) -> vn_spatial
ggplot() + 
  geom_sf(data = vn_spatial) + 
  labs(title = "Vietnam Map - Version 1")

ggplot() + 
  geom_sf(data = vn_spatial, aes(fill = Name), show.legend = FALSE) + 
  labs(title = "Vietnam Map - Version 2")

1.2 Vietnam provice / region

library(rvest)

# Collect region from wiki

provinces <- "https://en.wikipedia.org/wiki/Provinces_of_Vietnam"

provinces %>% 
  read_html() %>% 
  html_nodes(xpath = '//*[@id="mw-content-text"]/div[1]/table[6]') %>% 
  html_table() %>% 
  .[[1]] -> provinces_vn
# Function extracts data in table: 

extract_table <- function(i) {
  
  provinces_vn %>% 
    slice(i) %>% 
    pull(3) %>% # column 3
    str_split("\n", simplify = TRUE) %>% 
    str_replace_all("†", "") %>% 
    str_squish() %>% 
    as.vector() -> province_names
  
  provinces_vn %>% 
    slice(i) %>% 
    pull(1) %>% # column 1
    str_split("\\(", simplify = TRUE) %>% 
    str_split("\\,", simplify = TRUE) %>% 
    str_replace_all("Vietnam", "") %>% 
    as.vector() %>% 
    str_squish() -> region
  
  provinces_vn %>% 
    slice(i) %>% 
    pull(2) %>% # column 2
    str_split("\\(", simplify = TRUE) %>% 
    str_replace_all("\\)", "") %>% 
    str_replace_all("Vietnam", "") %>% 
    as.vector() %>% 
    str_squish() -> sub_region 
  
  tibble(province = province_names, region_vn = region[2], region_en = region[1], 
         sub_region_vn = sub_region[2], sub_region_en = sub_region[1]) -> df_final
  
  return(df_final)
  
}
# Use the function: 

lapply(1:nrow(provinces_vn), extract_table) -> province_region

do.call("bind_rows", province_region) -> province_region_vietnam

# Rename for some provinnces: 

library(stringi)

province_region_vietnam %>% 
  mutate(province_latin = stri_trans_general(province, "Latin-ASCII")) %>% 
  mutate(province_latin = case_when(province_latin == "Thua Thien-Hue" ~ "Thua Thien - Hue", 
                                    province_latin == "Ba Ria-Vung Tau" ~ "Ba Ria - Vung Tau", 
                                    province_latin == "Ho Chi Minh City" ~ "TP. Ho Chi Minh", 
                                    TRUE ~ province_latin)) -> province_region_vietnam

1.3 Join Geo-spatial and Region Datasets and mapping

# Combine the two datasets: 

full_join(vn_spatial, province_region_vietnam, by = c("Name" = "province_latin")) -> vn_spatial_region

# Administrative map of Vietnam: 

ggplot() + 
  geom_sf(data = vn_spatial_region, aes(fill = sub_region_en)) + 
  labs(title = "Administrative Regions of Viet Nam")

# Elegant map of Vietnam: 

my_colors <- c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628','#f781bf')
bgr_color <- "azure"

library(extrafont)
my_font <- "Roboto Condensed"
my_font
## [1] "Roboto Condensed"
ggplot() + 
  geom_sf(data = vn_spatial_region, aes(fill = sub_region_en)) + 
  scale_fill_manual(values = my_colors, name = "Region:") + 
  theme(legend.position = c(0.82, 0.83)) + 
  # theme(text = element_text(family = my_font)) + 
  theme(legend.key.size = unit(0.5, "cm"), legend.key.width = unit(0.5, "cm") ) + 
  # theme(legend.text = element_text(family = my_font, color = "grey20")) + 
  theme(panel.background = element_rect(fill = bgr_color)) +
  theme(plot.background = element_rect(fill = bgr_color)) + 
  theme(legend.background = element_rect(fill = bgr_color)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(plot.title = element_text(size = 16)) + 
  labs(title = "Administrative Regions of Viet Nam", 
       caption = "Data Source: https://data.opendevelopmentmekong.net")

1.4 Population by provinces visualisation

1.4.1 Get population

# Extract all links 
all_links <- "https://www.citypopulation.de/Vietnam.html"
pg <- read_html(all_links)
m <- html_nodes(pg, "a")
k <- html_attr(m, "href")
all_links_communes_level <-  str_c("https://www.citypopulation.de/en/vietnam/", k[-c(1:6)])
# Function for getting popolation by commune level
get_pop_data <- function(link) {
  
  link %>% 
    read_html() %>% 
    html_nodes(xpath = '//*[@id="tl"]') %>% 
    html_table(fill = TRUE) %>% 
    .[[1]] -> df
  
  df %>%   
    select(1:3) -> df
  
  names(df) <- c("Name", "Status", "Pop")
  
  df %>% 
    mutate(Name_Latin = stringi::stri_trans_general(Name, "Latin-ASCII"), 
           Pop = str_replace_all(Pop, "[^0-9]", "") %>% as.integer()) %>% 
    return()
  
}

# Use the function: 

lapply(all_links_communes_level, get_pop_data) -> pop_cummunes_list

do.call("bind_rows", pop_cummunes_list) -> pop_cummunes_df

# Show some observations: 

library(knitr)

pop_cummunes_df %>% 
  select(-1) %>% 
  filter(!str_detect(Status, "District")) %>% 
  head() %>% 
  kable(caption = "Table 1: Population by Commune")
Table 1: Population by Commune
Status Pop Name_Latin
Ward 18499 An Thoi
Ward 18307 Binh Thuy
Ward 11745 Bui Huu Nghia
Ward 16450 Long Hoa
Ward 15232 Long Tuyen
Ward 10773 Thoi An Dong

1.4.2 Get Geo-spatial data with function raster::getData()

# Population by district level: 

commune_df_pop <- pop_cummunes_df %>% filter(str_detect(Status, "District"))

# Get geo-spatial data by district level: 

vietnam_dis <- raster::getData("GADM", country = "Vietnam", level = 2)

1.4.3 Map

# Convert to data frame: 

vietnam_df <- vietnam_dis %>% fortify(region = "NAME_2")

vietnam_df %>% 
  mutate(dis_names = stri_trans_general(id, "Latin-ASCII")) -> vietnam_df

# Join the two data set: 

full_join(vietnam_df, commune_df_pop, by = c("dis_names" = "Name_Latin")) -> df_pop_dis_vn

df_pop_dis_vn %>% mutate(Pop = Pop / 1000) -> df_pop_dis_vn
# Choropleth map: 
special_provinces <- c("Khanh Hoa", "Da Nang")
library(viridis)

ggplot() +
  geom_sf(data = vn_spatial_region %>% filter(Name %in% special_provinces)) +  
  geom_polygon(data = df_pop_dis_vn, aes(fill = Pop, x = long, y = lat, group = group), color = "grey80") + 
  theme(legend.position = c(0.8, 0.9)) + 
  # theme(text = element_text(family = my_font)) + 
  theme(legend.key.size = unit(0.5, "cm"), legend.key.width = unit(0.5, "cm") ) + 
  # theme(legend.text = element_text(family = my_font, color = "grey20")) + 
  # theme(legend.title = element_text(family = my_font, color = "black", size = 11)) + 
  theme(panel.background = element_rect(fill = bgr_color)) +
  theme(plot.background = element_rect(fill = bgr_color)) + 
  theme(legend.background = element_rect(fill = bgr_color)) + 
  theme(axis.text = element_blank()) + 
  theme(axis.title = element_blank()) + 
  theme(axis.ticks = element_blank()) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(plot.title = element_text(size = 16)) + 
  labs(title = "Population Density (in thousands) by District for Vietnam", caption = "Data Source: https://www.citypopulation.de/Vietnam.html") + 
  annotate("text", x = 112, y = 18, label = "Paracel Islands\n(Da Nang City)", family = my_font, size = 3, fontface = "italic") +  
  annotate("text", x = 115.5, y = 12.5, label = "Spratly Islands\n(Khanh Hoa)", family = my_font, size = 3, fontface = "italic") + 
  scale_fill_viridis(direction = -1, 
                     option = "C", 
                     name = "Density:", 
                     guide = guide_colourbar(direction = "horizontal",
                                             barheight = unit(3, units = "mm"),
                                             barwidth = unit(30, units = "mm"),
                                             title.hjust = 0.5,
                                             label.hjust = 0.5, 
                                             limits = c(0, 800), 
                                             title.position = "top"))
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"

2 District and Commune Level

nghean <- vietnam_dis[vietnam_dis$NAME_1 == "Nghệ An", ]

nghean_df <- nghean %>% 
  fortify(region = "NAME_2") %>% 
  mutate(id = stri_trans_general(id, "Latin-ASCII"))

ggplot() +
  geom_polygon(data = nghean_df,
               aes(fill = id, x = long, y = lat, group = group), 
               color = "gray69") + 
  # theme(text = element_text(family = my_font)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  labs(title = "Nghe An Province by District", 
       caption = "Data Source: http://diva-gis.org/gdata")