Load required package

library(dplyr)
library(tidyverse)
library(stringr)
library(stringi)
library(rvest)
library(readxl)
library(ggplot2)
library(viridis)

Task 1

Make the PCI plot complete by:

  1. Combine the nation names and their ranks to create texts for the y-axis

  2. Custom the order of legend levels

pci_data <- read_excel("D:/2020/R course/data_day12/du-lieu-pci-2018.xlsx", n_max = 64)

pci_data$Rank <- as.character(pci_data$"Xếp hạng")
names(pci_data)[3] <- "PCI" 
names(pci_data)[1] <- "Province"

pci_data %>% 
  select("Province", PCI, Rank)  %>% 
  mutate_at("Province", function(x) {stri_trans_general(x, "Latin-ASCII")}) -> pci_data

pci_data %>% 
  select("Province", PCI, Rank)  %>% 
  mutate_at("Province", function(x) {stri_trans_general(x, "Latin-ASCII")}) %>%
  mutate(Rank1 = case_when(str_count(Rank) == 1 ~ paste0("0", Rank),
         TRUE ~ Rank))  %>% 
  unite("TP.Rank", c("Province", Rank1), sep=" ") %>% 
  mutate(category = case_when(PCI >= 70 ~ "Very good",
                              PCI < 70 & PCI >= 65 ~ "Good",
                              PCI < 65 & PCI >= 63 ~ "Fairly good",
                              PCI < 63 & PCI >= 60 ~ "Average",
                              TRUE ~ "Fairly low")) %>% 
  mutate(PCI.2 = format(round(PCI, 2), nsmall = 2))-> pci_data_final

pci_data_final$category_factored <- factor(pci_data_final$category, 
                                           levels = c("Very good", 
                                                      "Good", 
                                                      "Fairly good",
                                                      "Average", 
                                                      "Fairly low"))
my_color <- c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00') 

pci_bar <- pci_data_final %>% 
  ggplot(aes(x = reorder(TP.Rank, PCI), y = PCI, fill= category_factored )) +
  geom_col(width = 0.7)+
  theme_bw()+
  xlab(NULL)+
  ylab("PCI")+
  scale_y_continuous(expand = c(0,0), limit = c(0,80))+ 
  labs (title = "Provincial Competitiveness Index (PCI) 2018",
        caption = "http://pci2018.pcivietnam.vn/") +
  scale_fill_viridis(discrete = TRUE) +
  theme(legend.title = element_blank(),
        axis.ticks = element_blank(),
        plot.caption = element_text(face = "italic"))+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank())+ 
  geom_text(aes(label = PCI.2), size= 2.5, hjust = -0.2)+
  coord_flip()

pci_bar

Task 2

Reproduce the figure that combine two way of visualization for PCI (i.e. Choropleth Map and bar plot )

vietnam_province <- raster::getData("GADM", country = "Vietnam", level = 1)

vietnam_df <- vietnam_province  %>% 
  fortify(region = "NAME_1")


vietnam_df %>% 
  mutate_at("id", function(x) {stri_trans_general(x, "Latin-ASCII")}) -> vietnam_df


base::setdiff(pci_data$Province, unique(vietnam_df$id)) -> prov_diff 
base::setdiff(unique(vietnam_df$id), pci_data$Province) -> prov_diff_1 


pci_data %>% 
  mutate(Province = case_when(Province == prov_diff[1] ~  prov_diff_1[2],
                              Province == prov_diff[2] ~ prov_diff_1[1],
                              Province == prov_diff[3] ~ prov_diff_1[3],
                              TRUE ~ Province)) -> pci_data_map

base::setdiff(unique(vietnam_df$id), pci_data_map$Province)
## character(0)
full_join(pci_data_map, vietnam_df, by = c("Province" = "id" )) -> df_pci

pci_map <- ggplot() +
  geom_polygon(data = df_pci, aes(x = long, y = lat, group = group, fill = PCI)) +
  xlab(NULL)+
  ylab(NULL)+
  labs (title = "Provincial Competitiveness Index (PCI) 2018",
        caption = "http://pci2018.pcivietnam.vn/") +
  scale_fill_viridis() +
  theme_minimal() +
  theme(legend.position = "left") +
  theme(legend.title = element_text("PCI"),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.caption = element_text(face = "italic"))+
  theme(axis.line = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank())

To create a single plot from the two plots, we can use:

plot_grid() in cowplot package:

cowplot::plot_grid(pci_map, NULL, pci_bar, ncol = 3,rel_widths = c(1, 0.3, 1.5))

or ggarrange() in ggpubr:

ggpubr::ggarrange(pci_map, pci_bar, widths = c(0.4, 0.6)) 

or grid.arrange in gridExtra package

gridExtra::grid.arrange(pci_map, pci_bar, ncol = 2)