# Clear workspace
# Load package
library(tidyverse)
# Get geo-spatial data for Vietnam
link1 <- "https://raw.githubusercontent.com/nguyenduy1133/data/main/Dia_phan_Tinh_cap_nhat.geojson?fbclid=IwAR1cPIIswZ9y8ZvsmW8ioqh57malxnlrBr2L3EjYRfMPiDjCqu4kCA2PUxQ"
link2 <- "https://data.opendevelopmentmekong.net/dataset/999c96d8-fae0-4b82-9a2b-e481f6f50e12/resource/2818c2c5-e9c3-440b-a9b8-3029d7298065/download/diaphantinhenglish.geojson?fbclid=IwAR1coUVLkuEoJRsgaH81q6ocz1nVeGBirqpKRBN8WWxXQIJREUL1buFi1eE"
vn_spatial <- sf::st_read(link2)## Reading layer `OGRGeoJSON' from data source
## `https://data.opendevelopmentmekong.net/dataset/999c96d8-fae0-4b82-9a2b-e481f6f50e12/resource/2818c2c5-e9c3-440b-a9b8-3029d7298065/download/diaphantinhenglish.geojson?fbclid=IwAR1coUVLkuEoJRsgaH81q6ocz1nVeGBirqpKRBN8WWxXQIJREUL1buFi1eE'
## using driver `GeoJSON'
## Simple feature collection with 63 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 102.1421 ymin: 6.953306 xmax: 116.9473 ymax: 23.3939
## Geodetic CRS: WGS 84
# remane
vn_spatial %>% mutate(Name = case_when(Name == "Dak Lak" ~ "Dac Lac", TRUE ~ Name)) -> vn_spatialggplot() +
geom_sf(data = vn_spatial) +
labs(title = "Vietnam Map - Version 1")ggplot() +
geom_sf(data = vn_spatial, aes(fill = Name), show.legend = FALSE) +
labs(title = "Vietnam Map - Version 2")library(rvest)
# Collect region from wiki
provinces <- "https://en.wikipedia.org/wiki/Provinces_of_Vietnam"
provinces %>%
read_html() %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div[1]/table[6]') %>%
html_table() %>%
.[[1]] -> provinces_vn# Function extracts data in table:
extract_table <- function(i) {
provinces_vn %>%
slice(i) %>%
pull(3) %>% # column 3
str_split("\n", simplify = TRUE) %>%
str_replace_all("†", "") %>%
str_squish() %>%
as.vector() -> province_names
provinces_vn %>%
slice(i) %>%
pull(1) %>% # column 1
str_split("\\(", simplify = TRUE) %>%
str_split("\\,", simplify = TRUE) %>%
str_replace_all("Vietnam", "") %>%
as.vector() %>%
str_squish() -> region
provinces_vn %>%
slice(i) %>%
pull(2) %>% # column 2
str_split("\\(", simplify = TRUE) %>%
str_replace_all("\\)", "") %>%
str_replace_all("Vietnam", "") %>%
as.vector() %>%
str_squish() -> sub_region
tibble(province = province_names, region_vn = region[2], region_en = region[1],
sub_region_vn = sub_region[2], sub_region_en = sub_region[1]) -> df_final
return(df_final)
}# Use the function:
lapply(1:nrow(provinces_vn), extract_table) -> province_region
do.call("bind_rows", province_region) -> province_region_vietnam
# Rename for some provinnces:
library(stringi)
province_region_vietnam %>%
mutate(province_latin = stri_trans_general(province, "Latin-ASCII")) %>%
mutate(province_latin = case_when(province_latin == "Thua Thien-Hue" ~ "Thua Thien - Hue",
province_latin == "Ba Ria-Vung Tau" ~ "Ba Ria - Vung Tau",
province_latin == "Ho Chi Minh City" ~ "TP. Ho Chi Minh",
TRUE ~ province_latin)) -> province_region_vietnam# Combine the two datasets:
full_join(vn_spatial, province_region_vietnam, by = c("Name" = "province_latin")) -> vn_spatial_region
# Administrative map of Vietnam:
ggplot() +
geom_sf(data = vn_spatial_region, aes(fill = sub_region_en)) +
labs(title = "Administrative Regions of Viet Nam")# Elegant map of Vietnam:
my_colors <- c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628','#f781bf')
bgr_color <- "azure"
library(extrafont)
my_font <- "Roboto Condensed"
my_font## [1] "Roboto Condensed"
ggplot() +
geom_sf(data = vn_spatial_region, aes(fill = sub_region_en)) +
scale_fill_manual(values = my_colors, name = "Region:") +
theme(legend.position = c(0.82, 0.83)) +
# theme(text = element_text(family = my_font)) +
theme(legend.key.size = unit(0.5, "cm"), legend.key.width = unit(0.5, "cm") ) +
# theme(legend.text = element_text(family = my_font, color = "grey20")) +
theme(panel.background = element_rect(fill = bgr_color)) +
theme(plot.background = element_rect(fill = bgr_color)) +
theme(legend.background = element_rect(fill = bgr_color)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(plot.title = element_text(size = 16)) +
labs(title = "Administrative Regions of Viet Nam",
caption = "Data Source: https://data.opendevelopmentmekong.net")# Extract all links
all_links <- "https://www.citypopulation.de/Vietnam.html"
pg <- read_html(all_links)
m <- html_nodes(pg, "a")
k <- html_attr(m, "href")
all_links_communes_level <- str_c("https://www.citypopulation.de/en/vietnam/", k[-c(1:6)])# Function for getting popolation by commune level
get_pop_data <- function(link) {
link %>%
read_html() %>%
html_nodes(xpath = '//*[@id="tl"]') %>%
html_table(fill = TRUE) %>%
.[[1]] -> df
df %>%
select(1:3) -> df
names(df) <- c("Name", "Status", "Pop")
df %>%
mutate(Name_Latin = stringi::stri_trans_general(Name, "Latin-ASCII"),
Pop = str_replace_all(Pop, "[^0-9]", "") %>% as.integer()) %>%
return()
}
# Use the function:
lapply(all_links_communes_level, get_pop_data) -> pop_cummunes_list
do.call("bind_rows", pop_cummunes_list) -> pop_cummunes_df
# Show some observations:
library(knitr)
pop_cummunes_df %>%
select(-1) %>%
filter(!str_detect(Status, "District")) %>%
head() %>%
kable(caption = "Table 1: Population by Commune")| Status | Pop | Name_Latin |
|---|---|---|
| Ward | 18499 | An Thoi |
| Ward | 18307 | Binh Thuy |
| Ward | 11745 | Bui Huu Nghia |
| Ward | 16450 | Long Hoa |
| Ward | 15232 | Long Tuyen |
| Ward | 10773 | Thoi An Dong |
# Population by district level:
commune_df_pop <- pop_cummunes_df %>% filter(str_detect(Status, "District"))
# Get geo-spatial data by district level:
vietnam_dis <- raster::getData("GADM", country = "Vietnam", level = 2)# Convert to data frame:
vietnam_df <- vietnam_dis %>% fortify(region = "NAME_2")
vietnam_df %>%
mutate(dis_names = stri_trans_general(id, "Latin-ASCII")) -> vietnam_df
# Join the two data set:
full_join(vietnam_df, commune_df_pop, by = c("dis_names" = "Name_Latin")) -> df_pop_dis_vn
df_pop_dis_vn %>% mutate(Pop = Pop / 1000) -> df_pop_dis_vn# Choropleth map:
special_provinces <- c("Khanh Hoa", "Da Nang")
library(viridis)
ggplot() +
geom_sf(data = vn_spatial_region %>% filter(Name %in% special_provinces)) +
geom_polygon(data = df_pop_dis_vn, aes(fill = Pop, x = long, y = lat, group = group), color = "grey80") +
theme(legend.position = c(0.8, 0.9)) +
# theme(text = element_text(family = my_font)) +
theme(legend.key.size = unit(0.5, "cm"), legend.key.width = unit(0.5, "cm") ) +
# theme(legend.text = element_text(family = my_font, color = "grey20")) +
# theme(legend.title = element_text(family = my_font, color = "black", size = 11)) +
theme(panel.background = element_rect(fill = bgr_color)) +
theme(plot.background = element_rect(fill = bgr_color)) +
theme(legend.background = element_rect(fill = bgr_color)) +
theme(axis.text = element_blank()) +
theme(axis.title = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(plot.title = element_text(size = 16)) +
labs(title = "Population Density (in thousands) by District for Vietnam", caption = "Data Source: https://www.citypopulation.de/Vietnam.html") +
annotate("text", x = 112, y = 18, label = "Paracel Islands\n(Da Nang City)", family = my_font, size = 3, fontface = "italic") +
annotate("text", x = 115.5, y = 12.5, label = "Spratly Islands\n(Khanh Hoa)", family = my_font, size = 3, fontface = "italic") +
scale_fill_viridis(direction = -1,
option = "C",
name = "Density:",
guide = guide_colourbar(direction = "horizontal",
barheight = unit(3, units = "mm"),
barwidth = unit(30, units = "mm"),
title.hjust = 0.5,
label.hjust = 0.5,
limits = c(0, 800),
title.position = "top"))## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : no
## font could be found for family "Roboto Condensed"
nghean <- vietnam_dis[vietnam_dis$NAME_1 == "Nghệ An", ]
nghean_df <- nghean %>%
fortify(region = "NAME_2") %>%
mutate(id = stri_trans_general(id, "Latin-ASCII"))
ggplot() +
geom_polygon(data = nghean_df,
aes(fill = id, x = long, y = lat, group = group),
color = "gray69") +
# theme(text = element_text(family = my_font)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
labs(title = "Nghe An Province by District",
caption = "Data Source: http://diva-gis.org/gdata")