Motivations

Báo cáo CPI2018 khá đẹp nhưng vẫn còn vài thiếu sót như không nhất quán trong trình bày, lúc thì thừa lúc thì thiếu và sử dụng màu sắc, legend không tốt. Chúng ta có thể cải tiến, ví dụ, Bar Plot ở trang 26 của báo cáo này.

R Codes

# Reference: http://pci2018.pcivietnam.vn/uploads/2019/ho-so-63-tinh-vie.pdf
# Data Source: http://pci2018.pcivietnam.vn/


# cpi_colors <- c("#892890", "#034EA2", "#4792CF", "#8ED8F8", "#BAE1D1")

rm(list = ls())

library(viridis)
library(tidyverse)
library(readxl)
library(extrafont)

df_cpi <- read_excel("C:\\Users\\Zbook\\Downloads\\du-lieu-pci-2018.xlsx", sheet = 1) %>% 
  select(1:3) %>% 
  slice(1:63)

names(df_cpi) <- c("Province", "Rank", "Score")



df_cpi %>% 
  mutate(Province = case_when(str_detect(Province, "BRVT") ~ "Bà Rịa - Vũng Tàu", TRUE ~ Province)) %>% 
  mutate(fake_rank = case_when(Rank < 10 ~ paste0("0", Rank), TRUE ~ as.character(Rank))) %>% 
  mutate(Province = paste(Province, fake_rank)) %>% 
  mutate(my_colors = case_when(Rank <= 2 ~ "Excellent", 
                               Rank >= 3 & Rank <= 9 ~ "Good", 
                               Rank >= 10 & Rank <= 41 ~ "Fair", 
                               Rank >= 42 & Rank <= 61 ~ "Mediocre", 
                               TRUE ~ "Poor")) %>% 
  arrange(-Rank) %>% 
  mutate(Province = factor(Province, levels = Province)) %>% 
  mutate(my_colors = factor(my_colors, levels = my_colors %>% unique() %>% .[5:1])) %>% 
  mutate(label = round(Score, 2) %>% as.character()) %>% 
  mutate(label = case_when(str_count(label) == 2 ~ paste0(label, ".00"), 
                           str_count(label) == 4 ~ paste0(label, "0"), 
                           TRUE ~ label)) -> df_ploting2


#-------------
#  Bar Plot 
#-------------


my_font <- "Roboto Condensed"

df_ploting2 %>% 
  ggplot(aes(Province, Score, fill = my_colors)) + 
  geom_col(width = 0.85) + 
  coord_flip() + 
  scale_fill_viridis(discrete = TRUE, name = "", option = "D") + 
  geom_text(aes(label = label), size = 3, hjust = -0.1) + 
  scale_y_continuous(limits = c(0, 80), expand = c(0.001, 0)) +
  theme_minimal() + 
  theme(panel.grid = element_blank()) + 
  theme(axis.text.x = element_blank()) + 
  theme(axis.text.y = element_text(size = 8, family = my_font, color = "black")) +
  theme(plot.title = element_text(family = my_font, color = "grey20", size = 22, face = "bold")) + 
  theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) + 
  theme(plot.caption = element_text(family = my_font, size = 11, colour = "grey40", face = "italic")) + 
  theme(legend.text = element_text(family = my_font, size = 10)) + 
  theme(plot.margin = unit(c(1, 1, 1, 2), "cm")) +
  labs(x = NULL, y = NULL, 
       title = "Vietnam CPI Index 2018", 
       subtitle = "R Used for Data Visualization", 
       caption = "Data Source: http://pci2018.pcivietnam.vn") -> p1

#--------------
#  Mapping
#--------------


# Get geospatial data for Viet Nam: 

library(raster)
vietnam_province <- getData("GADM", country = "Vietnam", level = 1)

detach(package:raster)
vietnam_df <- vietnam_province %>% fortify(region = "NAME_1")


library(stringi)

vietnam_df %>% 
  mutate(id_prov = stri_trans_general(id, "Latin-ASCII")) %>% 
  mutate(id_prov = case_when(str_detect(id_prov, "Ba Ria") ~ "BRVT", 
                             str_detect(id_prov, "Ho Chi Minh") ~ "TP.HCM", 
                             str_detect(id_prov, "Thua Thien Hue") ~ "TT-Hue", 
                             TRUE ~ id_prov)) -> vietnam_df

df_cpi %>% 
  mutate(id_prov = stri_trans_general(Province, "Latin-ASCII")) -> df_cpi


# Joint data sets: 

df_cpi_mapping <- right_join(vietnam_df, df_cpi, by = "id_prov")

# Function creates new theme: 



ggplot() + 
  geom_polygon(data = df_cpi_mapping, aes(long, lat, group = group, fill = Score), color = "white") +
  coord_map("albers", lat0 = 30, lat1 = 40) + 
  labs(title = "Vietnam CPI Index 2018",
       subtitle = "Vietnam's Paracel and Spratly Islands\nare not shown in this map.",
       caption = "Data Source: http://pci2018.pcivietnam.vn") + 
  theme(axis.line = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        plot.background = element_rect(fill = "white", color = NA),
        panel.background = element_rect(fill = "white", color = NA),
        legend.background = element_rect(fill = "white", color = NA),
        panel.border = element_blank()) +
  theme(plot.title = element_text(family = my_font, color = "grey20", size = 22, face = "bold")) + 
  theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) + 
  theme(plot.caption = element_text(family = my_font, size = 11, colour = "grey40", face = "italic")) + 
  theme(legend.text = element_text(family = my_font, color = "grey40", size = 12)) + 
  theme(legend.title = element_text(family = my_font, color = "grey20", size = 12)) + 
  theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) + 
  # theme(legend.position = "top") + 
  theme(legend.position = c(0.3, 0.5)) + 
  scale_fill_viridis(direction = -1, 
                     option = "D", 
                     name = "CPI Index", 
                     guide = guide_colourbar(direction = "horizontal",
                                             barheight = unit(3, units = "mm"),
                                             barwidth = unit(40, units = "mm"),
                                             title.hjust = 0.5,
                                             label.hjust = 0.5, 
                                             title.position = "top")) -> p2


gridExtra::grid.arrange(p2, p1, ncol = 2)

