Load required package
library(dplyr)
library(tidyverse)
library(stringr)
library(stringi)
library(rvest)
library(readxl)
library(ggplot2)
library(viridis)Make the PCI plot complete by:
Combine the nation names and their ranks to create texts for the y-axis
Custom the order of legend levels
pci_data <- read_excel("D:/2020/R course/data_day12/du-lieu-pci-2018.xlsx", n_max = 64)
pci_data$Rank <- as.character(pci_data$"Xếp hạng")
names(pci_data)[3] <- "PCI"
names(pci_data)[1] <- "Province"
pci_data %>%
select("Province", PCI, Rank) %>%
mutate_at("Province", function(x) {stri_trans_general(x, "Latin-ASCII")}) -> pci_data
pci_data %>%
select("Province", PCI, Rank) %>%
mutate_at("Province", function(x) {stri_trans_general(x, "Latin-ASCII")}) %>%
mutate(Rank1 = case_when(str_count(Rank) == 1 ~ paste0("0", Rank),
TRUE ~ Rank)) %>%
unite("TP.Rank", c("Province", Rank1), sep=" ") %>%
mutate(category = case_when(PCI >= 70 ~ "Very good",
PCI < 70 & PCI >= 65 ~ "Good",
PCI < 65 & PCI >= 63 ~ "Fairly good",
PCI < 63 & PCI >= 60 ~ "Average",
TRUE ~ "Fairly low")) %>%
mutate(PCI.2 = format(round(PCI, 2), nsmall = 2))-> pci_data_final
pci_data_final$category_factored <- factor(pci_data_final$category,
levels = c("Very good",
"Good",
"Fairly good",
"Average",
"Fairly low"))
my_color <- c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00')
pci_bar <- pci_data_final %>%
ggplot(aes(x = reorder(TP.Rank, PCI), y = PCI, fill= category_factored )) +
geom_col(width = 0.7)+
theme_bw()+
xlab(NULL)+
ylab("PCI")+
scale_y_continuous(expand = c(0,0), limit = c(0,80))+
labs (title = "Provincial Competitiveness Index (PCI) 2018",
caption = "http://pci2018.pcivietnam.vn/") +
scale_fill_viridis(discrete = TRUE) +
theme(legend.title = element_blank(),
axis.ticks = element_blank(),
plot.caption = element_text(face = "italic"))+
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())+
geom_text(aes(label = PCI.2), size= 2.5, hjust = -0.2)+
coord_flip()
pci_barReproduce the figure that combine two way of visualization for PCI (i.e. Choropleth Map and bar plot )
vietnam_province <- raster::getData("GADM", country = "Vietnam", level = 1)
vietnam_df <- vietnam_province %>%
fortify(region = "NAME_1")
vietnam_df %>%
mutate_at("id", function(x) {stri_trans_general(x, "Latin-ASCII")}) -> vietnam_df
base::setdiff(pci_data$Province, unique(vietnam_df$id)) -> prov_diff
base::setdiff(unique(vietnam_df$id), pci_data$Province) -> prov_diff_1
pci_data %>%
mutate(Province = case_when(Province == prov_diff[1] ~ prov_diff_1[2],
Province == prov_diff[2] ~ prov_diff_1[1],
Province == prov_diff[3] ~ prov_diff_1[3],
TRUE ~ Province)) -> pci_data_map
base::setdiff(unique(vietnam_df$id), pci_data_map$Province)## character(0)
full_join(pci_data_map, vietnam_df, by = c("Province" = "id" )) -> df_pci
pci_map <- ggplot() +
geom_polygon(data = df_pci, aes(x = long, y = lat, group = group, fill = PCI)) +
xlab(NULL)+
ylab(NULL)+
labs (title = "Provincial Competitiveness Index (PCI) 2018",
caption = "http://pci2018.pcivietnam.vn/") +
scale_fill_viridis() +
theme_minimal() +
theme(legend.position = "left") +
theme(legend.title = element_text("PCI"),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.caption = element_text(face = "italic"))+
theme(axis.line = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())To create a single plot from the two plots, we can use:
plot_grid() in cowplot package:
or ggarrange() in ggpubr:
or grid.arrange in gridExtra package