Motivations
Báo cáo CPI2018 khá đẹp nhưng vẫn còn và i thiếu sót như không nhất quán trong trình bà y, lúc thì thừa lúc thì thiếu và sỠdụng mà u sắc, legend không tốt. Chúng ta có thể cải tiến, và dụ, Bar Plot ở trang 26 của báo cáo nà y.
R Codes
# Reference: http://pci2018.pcivietnam.vn/uploads/2019/ho-so-63-tinh-vie.pdf
# Data Source: http://pci2018.pcivietnam.vn/
# cpi_colors <- c("#892890", "#034EA2", "#4792CF", "#8ED8F8", "#BAE1D1")
rm(list = ls())
library(viridis)
library(tidyverse)
library(readxl)
library(extrafont)
df_cpi <- read_excel("C:\\Users\\Zbook\\Downloads\\du-lieu-pci-2018.xlsx", sheet = 1) %>%
select(1:3) %>%
slice(1:63)
names(df_cpi) <- c("Province", "Rank", "Score")
df_cpi %>%
mutate(Province = case_when(str_detect(Province, "BRVT") ~ "Bà Rịa - Vũng Tà u", TRUE ~ Province)) %>%
mutate(fake_rank = case_when(Rank < 10 ~ paste0("0", Rank), TRUE ~ as.character(Rank))) %>%
mutate(Province = paste(Province, fake_rank)) %>%
mutate(my_colors = case_when(Rank <= 2 ~ "Excellent",
Rank >= 3 & Rank <= 9 ~ "Good",
Rank >= 10 & Rank <= 41 ~ "Fair",
Rank >= 42 & Rank <= 61 ~ "Mediocre",
TRUE ~ "Poor")) %>%
arrange(-Rank) %>%
mutate(Province = factor(Province, levels = Province)) %>%
mutate(my_colors = factor(my_colors, levels = my_colors %>% unique() %>% .[5:1])) %>%
mutate(label = round(Score, 2) %>% as.character()) %>%
mutate(label = case_when(str_count(label) == 2 ~ paste0(label, ".00"),
str_count(label) == 4 ~ paste0(label, "0"),
TRUE ~ label)) -> df_ploting2
#-------------
# Bar Plot
#-------------
my_font <- "Roboto Condensed"
df_ploting2 %>%
ggplot(aes(Province, Score, fill = my_colors)) +
geom_col(width = 0.85) +
coord_flip() +
scale_fill_viridis(discrete = TRUE, name = "", option = "D") +
geom_text(aes(label = label), size = 3, hjust = -0.1) +
scale_y_continuous(limits = c(0, 80), expand = c(0.001, 0)) +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_text(size = 8, family = my_font, color = "black")) +
theme(plot.title = element_text(family = my_font, color = "grey20", size = 22, face = "bold")) +
theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) +
theme(plot.caption = element_text(family = my_font, size = 11, colour = "grey40", face = "italic")) +
theme(legend.text = element_text(family = my_font, size = 10)) +
theme(plot.margin = unit(c(1, 1, 1, 2), "cm")) +
labs(x = NULL, y = NULL,
title = "Vietnam CPI Index 2018",
subtitle = "R Used for Data Visualization",
caption = "Data Source: http://pci2018.pcivietnam.vn") -> p1
#--------------
# Mapping
#--------------
# Get geospatial data for Viet Nam:
library(raster)
vietnam_province <- getData("GADM", country = "Vietnam", level = 1)
detach(package:raster)
vietnam_df <- vietnam_province %>% fortify(region = "NAME_1")
library(stringi)
vietnam_df %>%
mutate(id_prov = stri_trans_general(id, "Latin-ASCII")) %>%
mutate(id_prov = case_when(str_detect(id_prov, "Ba Ria") ~ "BRVT",
str_detect(id_prov, "Ho Chi Minh") ~ "TP.HCM",
str_detect(id_prov, "Thua Thien Hue") ~ "TT-Hue",
TRUE ~ id_prov)) -> vietnam_df
df_cpi %>%
mutate(id_prov = stri_trans_general(Province, "Latin-ASCII")) -> df_cpi
# Joint data sets:
df_cpi_mapping <- right_join(vietnam_df, df_cpi, by = "id_prov")
# Function creates new theme:
ggplot() +
geom_polygon(data = df_cpi_mapping, aes(long, lat, group = group, fill = Score), color = "white") +
coord_map("albers", lat0 = 30, lat1 = 40) +
labs(title = "Vietnam CPI Index 2018",
subtitle = "Vietnam's Paracel and Spratly Islands\nare not shown in this map.",
caption = "Data Source: http://pci2018.pcivietnam.vn") +
theme(axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid = element_blank(),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
legend.background = element_rect(fill = "white", color = NA),
panel.border = element_blank()) +
theme(plot.title = element_text(family = my_font, color = "grey20", size = 22, face = "bold")) +
theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) +
theme(plot.caption = element_text(family = my_font, size = 11, colour = "grey40", face = "italic")) +
theme(legend.text = element_text(family = my_font, color = "grey40", size = 12)) +
theme(legend.title = element_text(family = my_font, color = "grey20", size = 12)) +
theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) +
# theme(legend.position = "top") +
theme(legend.position = c(0.3, 0.5)) +
scale_fill_viridis(direction = -1,
option = "D",
name = "CPI Index",
guide = guide_colourbar(direction = "horizontal",
barheight = unit(3, units = "mm"),
barwidth = unit(40, units = "mm"),
title.hjust = 0.5,
label.hjust = 0.5,
title.position = "top")) -> p2
gridExtra::grid.arrange(p2, p1, ncol = 2)

---
title: "Mapping Vietnam Provincial Competitiveness Index 2018"
author: "Nguyen Chi Dung"
subtitle: "Daily Graph Series"
output:
  html_document:
    code_download: yes
    code_folding: hide
    highlight: zenburn
    theme: flatly
    toc: yes
    toc_float: yes
  word_document:
    toc: yes
---

```{r setup,include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.retina=2)
```

# Motivations

Báo cáo [CPI2018](http://pci2018.pcivietnam.vn/uploads/2019/ho-so-63-tinh-vie.pdf) khá đẹp nhưng vẫn còn vài thiếu sót như không nhất quán trong trình bày, lúc thì thừa lúc thì thiếu và sử dụng màu sắc, legend không tốt. Chúng ta có thể cải tiến, ví dụ, Bar Plot ở trang 26 của báo cáo này. 

# R Codes

```{r, eval=FALSE}



# Reference: http://pci2018.pcivietnam.vn/uploads/2019/ho-so-63-tinh-vie.pdf
# Data Source: http://pci2018.pcivietnam.vn/


# cpi_colors <- c("#892890", "#034EA2", "#4792CF", "#8ED8F8", "#BAE1D1")

rm(list = ls())

library(viridis)
library(tidyverse)
library(readxl)
library(extrafont)

df_cpi <- read_excel("C:\\Users\\Zbook\\Downloads\\du-lieu-pci-2018.xlsx", sheet = 1) %>% 
  select(1:3) %>% 
  slice(1:63)

names(df_cpi) <- c("Province", "Rank", "Score")



df_cpi %>% 
  mutate(Province = case_when(str_detect(Province, "BRVT") ~ "Bà Rịa - Vũng Tàu", TRUE ~ Province)) %>% 
  mutate(fake_rank = case_when(Rank < 10 ~ paste0("0", Rank), TRUE ~ as.character(Rank))) %>% 
  mutate(Province = paste(Province, fake_rank)) %>% 
  mutate(my_colors = case_when(Rank <= 2 ~ "Excellent", 
                               Rank >= 3 & Rank <= 9 ~ "Good", 
                               Rank >= 10 & Rank <= 41 ~ "Fair", 
                               Rank >= 42 & Rank <= 61 ~ "Mediocre", 
                               TRUE ~ "Poor")) %>% 
  arrange(-Rank) %>% 
  mutate(Province = factor(Province, levels = Province)) %>% 
  mutate(my_colors = factor(my_colors, levels = my_colors %>% unique() %>% .[5:1])) %>% 
  mutate(label = round(Score, 2) %>% as.character()) %>% 
  mutate(label = case_when(str_count(label) == 2 ~ paste0(label, ".00"), 
                           str_count(label) == 4 ~ paste0(label, "0"), 
                           TRUE ~ label)) -> df_ploting2


#-------------
#  Bar Plot 
#-------------


my_font <- "Roboto Condensed"

df_ploting2 %>% 
  ggplot(aes(Province, Score, fill = my_colors)) + 
  geom_col(width = 0.85) + 
  coord_flip() + 
  scale_fill_viridis(discrete = TRUE, name = "", option = "D") + 
  geom_text(aes(label = label), size = 3, hjust = -0.1) + 
  scale_y_continuous(limits = c(0, 80), expand = c(0.001, 0)) +
  theme_minimal() + 
  theme(panel.grid = element_blank()) + 
  theme(axis.text.x = element_blank()) + 
  theme(axis.text.y = element_text(size = 8, family = my_font, color = "black")) +
  theme(plot.title = element_text(family = my_font, color = "grey20", size = 22, face = "bold")) + 
  theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) + 
  theme(plot.caption = element_text(family = my_font, size = 11, colour = "grey40", face = "italic")) + 
  theme(legend.text = element_text(family = my_font, size = 10)) + 
  theme(plot.margin = unit(c(1, 1, 1, 2), "cm")) +
  labs(x = NULL, y = NULL, 
       title = "Vietnam CPI Index 2018", 
       subtitle = "R Used for Data Visualization", 
       caption = "Data Source: http://pci2018.pcivietnam.vn") -> p1

#--------------
#  Mapping
#--------------


# Get geospatial data for Viet Nam: 

library(raster)
vietnam_province <- getData("GADM", country = "Vietnam", level = 1)

detach(package:raster)
vietnam_df <- vietnam_province %>% fortify(region = "NAME_1")


library(stringi)

vietnam_df %>% 
  mutate(id_prov = stri_trans_general(id, "Latin-ASCII")) %>% 
  mutate(id_prov = case_when(str_detect(id_prov, "Ba Ria") ~ "BRVT", 
                             str_detect(id_prov, "Ho Chi Minh") ~ "TP.HCM", 
                             str_detect(id_prov, "Thua Thien Hue") ~ "TT-Hue", 
                             TRUE ~ id_prov)) -> vietnam_df

df_cpi %>% 
  mutate(id_prov = stri_trans_general(Province, "Latin-ASCII")) -> df_cpi


# Joint data sets: 

df_cpi_mapping <- right_join(vietnam_df, df_cpi, by = "id_prov")

# Function creates new theme: 



ggplot() + 
  geom_polygon(data = df_cpi_mapping, aes(long, lat, group = group, fill = Score), color = "white") +
  coord_map("albers", lat0 = 30, lat1 = 40) + 
  labs(title = "Vietnam CPI Index 2018",
       subtitle = "Vietnam's Paracel and Spratly Islands\nare not shown in this map.",
       caption = "Data Source: http://pci2018.pcivietnam.vn") + 
  theme(axis.line = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        plot.background = element_rect(fill = "white", color = NA),
        panel.background = element_rect(fill = "white", color = NA),
        legend.background = element_rect(fill = "white", color = NA),
        panel.border = element_blank()) +
  theme(plot.title = element_text(family = my_font, color = "grey20", size = 22, face = "bold")) + 
  theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) + 
  theme(plot.caption = element_text(family = my_font, size = 11, colour = "grey40", face = "italic")) + 
  theme(legend.text = element_text(family = my_font, color = "grey40", size = 12)) + 
  theme(legend.title = element_text(family = my_font, color = "grey20", size = 12)) + 
  theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) + 
  # theme(legend.position = "top") + 
  theme(legend.position = c(0.3, 0.5)) + 
  scale_fill_viridis(direction = -1, 
                     option = "D", 
                     name = "CPI Index", 
                     guide = guide_colourbar(direction = "horizontal",
                                             barheight = unit(3, units = "mm"),
                                             barwidth = unit(40, units = "mm"),
                                             title.hjust = 0.5,
                                             label.hjust = 0.5, 
                                             title.position = "top")) -> p2


gridExtra::grid.arrange(p2, p1, ncol = 2)
```


![](C:\\Users\\Zbook\\Desktop\\pic\\map_cpi.jpg)