Data Visualization (GSO, Part 4)

Nguyen Chi Dung

#=======================================
#      Mapping / Choropleth Maps 
#=======================================


#---------------------------------------
#     Mapping (bản đồ hành chính)
#---------------------------------------

# Lấy dữ liệu địa lí cho VN đến cấp tỉnh: 
library(raster)

# Và dữ liệu địa lí của VN đến cấp tỉnh: 
vietnam_province <- getData("GADM", country = "Vietnam", level = 1)

# Gỡ bỏ sử dụng raster: 
detach(package:raster)

# Lưu ý về kiểu liệu: 
library(tidyverse)
vietnam_province %>% class()

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

# Chuyển hóa về data frame quen thuộc: 
vietnam_df <- vietnam_province %>% fortify(region = "NAME_1")

# Xem qua dữ liệu: 
vietnam_df %>% head()

##       long      lat order  hole piece       id      group
## 1 105.1169 10.95687     1 FALSE     1 An Giang An Giang.1
## 2 105.1173 10.95274     2 FALSE     1 An Giang An Giang.1
## 3 105.1177 10.94941     3 FALSE     1 An Giang An Giang.1
## 4 105.1177 10.94706     4 FALSE     1 An Giang An Giang.1
## 5 105.1165 10.94386     5 FALSE     1 An Giang An Giang.1
## 6 105.1153 10.94174     6 FALSE     1 An Giang An Giang.1

# Các tỉnh thành: 
province <- vietnam_df$id %>% unique()
province

##  [1] "An Giang"          "B<U+1EA1>c Liêu"   "B<U+1EAF>c Giang" 
##  [4] "B<U+1EAF>c K<U+1EA1>n" "B<U+1EAF>c Ninh"   "B<U+1EBF>n Tre"   
##  [7] "Bà R<U+1ECB>a - Vung Tàu" "Bình Ð<U+1ECB>nh"  "Bình Duong"       
## [10] "Bình Phu<U+1EDB>c" "Bình Thu<U+1EAD>n" "C<U+1EA7>n Tho"   
## [13] "Cà Mau"            "Cao B<U+1EB1>ng"   "Ð<U+1EAF>k L<U+1EAF>k"
## [16] "Ð<U+1ED3>ng Nai"   "Ð<U+1ED3>ng Tháp"  "Ðà N<U+1EB5>ng"   
## [19] "Ðak Nông"          "Ði<U+1EC7>n Biên"  "Gia Lai"          
## [22] "H<U+1EA3>i Duong"  "H<U+1EA3>i Phòng"  "H<U+1EAD>u Giang" 
## [25] "H<U+1ED3> Chí Minh city" "Hà Giang"          "Hà N<U+1ED9>i"    
## [28] "Hà Nam"            "Hà Tinh"           "Hòa Bình"         
## [31] "Hung Yên"          "Khánh Hòa"         "Kiên Giang"       
## [34] "Kon Tum"           "L<U+1EA1>ng Son"   "Lai Châu"         
## [37] "Lâm Ð<U+1ED3>ng"   "Lào Cai"           "Long An"          
## [40] "Nam Ð<U+1ECB>nh"   "Ngh<U+1EC7> An"    "Ninh Bình"        
## [43] "Ninh Thu<U+1EAD>n" "Phú Th<U+1ECD>"    "Phú Yên"          
## [46] "Qu<U+1EA3>ng Bình" "Qu<U+1EA3>ng Nam"  "Qu<U+1EA3>ng Ngãi"
## [49] "Qu<U+1EA3>ng Ninh" "Qu<U+1EA3>ng Tr<U+1ECB>" "Sóc Trang"        
## [52] "Son La"            "Tây Ninh"          "Th<U+1EEB>a Thiên - Hu<U+1EBF>"
## [55] "Thái Bình"         "Thái Nguyên"       "Thanh Hóa"        
## [58] "Ti<U+1EC1>n Giang" "Trà Vinh"          "Tuyên Quang"      
## [61] "Vinh Long"         "Vinh Phúc"         "Yên Bái"

# Bản đồ Việt Nam:  
theme_set(theme_minimal())

ggplot() + 
  geom_polygon(data = vietnam_df, aes(long, lat, group = group)) -> m1

m1

# Hiệu chỉnh:
m1 + coord_fixed(1)

m1 + coord_equal()

# Bản đồ hành chính đến cấp tỉnh (Kiểu 1): 

ggplot() +
  geom_polygon(data = vietnam_df, aes(long, lat, group = group), fill = "gray80", color = "blue") +
  coord_equal()

# Giả sử chúng ta muốn biểu diễn vị trí của Hà Nội ( https://www.google.com):

ha_noi <- data.frame(lat = 21.040002, long = 105.834388)

ggplot() +
  geom_polygon(data = vietnam_df,
               aes(long, lat, group = group),
               fill = "gray80", color = "blue") +
  coord_equal() + 
  geom_point(data = ha_noi, aes(long, lat), color = "red", size = 3)

# Một kiểu khác: 
ggplot() +
  geom_polygon(data = vietnam_df, aes(long, lat, group = group), fill = NA, color = "red") + 
  coord_equal()

# Tô màu để phân biệt các tỉnh: 
ggplot() +
  geom_polygon(data = vietnam_df, aes(long, lat, group = group, fill = id), show.legend = FALSE) +
  coord_equal()

# Thêm đường ranh giới giữa các tỉnh (cách 1): 
ggplot() +
  geom_polygon(data = vietnam_df, 
               aes(long, lat, group = group, fill = id), 
               show.legend = FALSE, color = "grey50") +
  coord_equal()

# Cách 2: 
ggplot() +
geom_polygon(data = vietnam_df,
             aes(long, lat, group = group, fill = id),
             show.legend = FALSE) +
  geom_path(data = vietnam_df, aes(long, lat, group = group),
            color = "grey50", size = 0.1) +
  coord_equal()

#---------------------------
#     Choropleth Maps 
#---------------------------
# https://rpubs.com/chidungkt/388218
# https://rpubs.com/chidungkt/388184
# https://rpubs.com/chidungkt/388254 

# Dữ liệu về đói nghèo (lấy từ http://www.gso.gov.vn/default_en.aspx?tabid=783): 

poverty <- read.csv("D:\\GSO_R_Course\\data_for_visualization\\E11.35.csv", sep = ";")

# Xem qua: 
poverty %>% head()

##                    ï..Poverty.rate.by.province.by.Cities..provincies.and.Year
## Cities, provincies                                                       2015
##  WHOLE COUNTRY                                                            7.0
## Red River Delta                                                           3.2
## Ha Noi                                                                    1.8
## Ha Tay                                                                     ..
## Vinh Phuc                                                                 3.7

# Lấy rowname và chuyển hóa về Latin-ASCII: 
library(stringi)
library(magrittr)

poverty %<>% mutate(id = rownames(.), 
                    id =  stri_trans_general(id, "Latin-ASCII"))
poverty %>% head()

##   ï..Poverty.rate.by.province.by.Cities..provincies.and.Year
## 1                                                       2015
## 2                                                        7.0
## 3                                                        3.2
## 4                                                        1.8
## 5                                                         ..
## 6                                                        3.7
##                   id
## 1 Cities, provincies
## 2      WHOLE COUNTRY
## 3    Red River Delta
## 4             Ha Noi
## 5             Ha Tay
## 6          Vinh Phuc

# Đổi tên cho cột biến: 
names(poverty) <- c("poverty", "id")

# Bỏ ba dòng đầu: 
poverty %<>% slice(-c(1:3))
poverty %>% str()

## 'data.frame':    69 obs. of  2 variables:
##  $ poverty: Factor w/ 57 levels "..","0.0","0.7",..: 5 1 34 22 38 32 23 32 39 43 ...
##  $ id     : chr  "Ha Noi" "Ha Tay" "Vinh Phuc" "Bac Ninh" ...

# So sánh: 
province %>% head()

## [1] "An Giang"  "B<U+1EA1>c Liêu" "B<U+1EAF>c Giang" "B<U+1EAF>c K<U+1EA1>n" "B<U+1EAF>c Ninh" "B<U+1EBF>n Tre"

# Chuyển hóa về Latin-ASCII: 
vietnam_df %<>% mutate(id = stri_trans_general(id, "Latin-ASCII"))

# Các tỉnh thành của Việt Nam lúc này: 
province <- vietnam_df$id %>% unique()
province %>% head()

## [1] "An Giang"  "Bac Lieu"  "Bac Giang" "Bac Kan"   "Bac Ninh"  "Ben Tre"

# So sánh: 
setdiff(poverty$id, province)

##  [1] "Ha Tay"                                        
##  [2] "Northern midlands and mountain areas"          
##  [3] "Northern Central area and Central coastal area"
##  [4] "Thua Thien-Hue"                                
##  [5] "Quang  Nam"                                    
##  [6] "Quang  Ngai"                                   
##  [7] "Khanh  Hoa"                                    
##  [8] "Ninh  Thuan"                                   
##  [9] "Central Highlands"                             
## [10] "South East"                                    
## [11] "Mekong River Delta"                            
## [12] "Kien  Giang"

# Vậy thì cần đổi tên cho một số: 

poverty %<>% mutate(id = case_when(id == "Quang  Nam" ~ "Quang Nam", 
                                   id == "Thua Thien-Hue" ~ "Thua Thien - Hue", 
                                   id == "Quang  Ngai" ~ "Quang Ngai", 
                                   id == "Khanh  Hoa" ~ "Khanh Hoa", 
                                   id == "Ninh  Thuan" ~ "Ninh Thuan", 
                                   id == "Kien  Giang" ~ "Kien Giang", 
                                   TRUE ~ id))

# Giờ kiểm tra lại: 
setdiff(poverty$id, province) ->> khac_biet
khac_biet

## [1] "Ha Tay"                                        
## [2] "Northern midlands and mountain areas"          
## [3] "Northern Central area and Central coastal area"
## [4] "Central Highlands"                             
## [5] "South East"                                    
## [6] "Mekong River Delta"

# Lọc dữ liệu: 
poverty %<>% filter(!id %in% khac_biet)
poverty %>% dim()

## [1] 63  2

# Chuyển hóa cột biến poverty về dạng số: 
poverty %<>% mutate(rate = poverty %>% as.character() %>% as.numeric())

# Nối dữ liệu: 
vietnam_df_poverty <- right_join(vietnam_df, poverty, by = "id")

# Bản đồ tỉ lệ nghèo (vẽ phác thảo): 
ggplot() + 
  geom_polygon(data = vietnam_df_poverty, 
               aes(long, lat, group = group, fill = rate), color = "white") +
  coord_equal() + 
  labs(title = "Poverty Rate in Vietnam by Province",
       subtitle = "Note: Data Is Not Available for\nVietnam's Paracel and Spratly Islands",
       caption = "Data Source: General Statistics Office Of Vietnam") ->> m1

m1

# Cải tiến cho cái theme:

m1 + 
  theme(text = element_text(color = "#22211d", face = "bold")) + 
  theme(axis.line = element_blank()) + 
  theme(axis.text.x = element_blank()) + 
  theme(axis.text.y = element_blank()) + 
  theme(axis.ticks = element_blank()) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.title.y = element_blank()) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major = element_line(color = "#ebebe5", size = 0.2)) + 
  theme(plot.background = element_rect(fill = "#f5f5f2", color = NA)) + 
  theme(panel.background = element_rect(fill = "#f5f5f2", color = NA)) + 
  theme(legend.background = element_rect(fill = "#f5f5f2", color = NA)) + 
  theme(panel.border = element_blank())

# Nếu thé nên viết thành hàm để sử dụng nhiều lần: 

my_theme_for_map <- function(...) {
  theme(text = element_text(color = "#22211d", face = "bold")) + 
    theme(axis.line = element_blank()) + 
    theme(axis.text.x = element_blank()) + 
    theme(axis.text.y = element_blank()) + 
    theme(axis.ticks = element_blank()) + 
    theme(axis.title.x = element_blank()) + 
    theme(axis.title.y = element_blank()) + 
    theme(panel.grid.minor = element_blank()) + 
    theme(panel.grid.major = element_line(color = "#ebebe5", size = 0.2)) + 
    theme(plot.background = element_rect(fill = "#f5f5f2", color = NA)) + 
    theme(panel.background = element_rect(fill = "#f5f5f2", color = NA)) + 
    theme(legend.background = element_rect(fill = "#f5f5f2", color = NA)) + 
    theme(panel.border = element_blank())
  
}

# Do vậy: 
m1 + my_theme_for_map() ->> m2
m2

# Cải tiến nữa: 

library(viridis)
m2 +  
  scale_fill_viridis(direction = -1, option = "A", "Poverty Rate") + 
  theme(legend.position = c(0.2, 0.5))

# Hoặc một kiểu khác: 
m2 +  
  scale_fill_viridis(direction = -1, 
                     option = "B", 
                     name = "Poverty Rate", 
                     guide = guide_colourbar(direction = "horizontal",
                                             barheight = unit(2, units = "mm"),
                                             barwidth = unit(50, units = "mm"),
                                             draw.ulim = F,
                                             title.hjust = 0.5,
                                             label.hjust = 0.5, 
                                             title.position = "top")) + 
  theme(legend.position = "top")

m2 +  
  scale_fill_viridis(direction = -1, 
                     option = "D", 
                     name = "Poverty Rate", 
                     guide = guide_colourbar(direction = "horizontal",
                                             barheight = unit(3, units = "mm"),
                                             barwidth = unit(50, units = "mm"),
                                             draw.ulim = F,
                                             title.hjust = 0.5,
                                             label.hjust = 0.5, 
                                             title.position = "top")) + 
  theme(legend.position = c(0.3, 0.45))

# Nếu cần tham khảo thêm về: 
# 1. cách sử dụng màu sắc tại https://rpubs.com/chidungkt/388254
# 2. Bản đồ cấp xã - hoặc huyện tại https://rpubs.com/chidungkt/388184

#==============================================
#    Lollipop Chart / Cleveland’s Dot
#==============================================

#----------------------------
#      Cleveland’s Dot
#----------------------------

# Load dữ liệu (nguồn từ http://www.gso.gov.vn/default_en.aspx?tabid=783): 

library(readxl)
income <- read_excel("D:\\GSO_R_Course\\data_for_visualization\\E11.24.xlsx")

# Dòng thứ 3 chính là các tên cột biến. Căn cứ vào đó: 
col_names <- c("province", 
               paste(c("gen", "q1", "q2", "q3", "q4", "q5"), "2010", sep = "_"), 
               paste(c("gen", "q1", "q2", "q3", "q4", "q5"), "2012", sep = "_"), 
               paste(c("gen", "q1", "q2", "q3", "q4", "q5"), "2014", sep = "_"), 
               paste(c("gen", "q1", "q2", "q3", "q4", "q5"), "2016", sep = "_"))

# Đổi tên cột biến: 
names(income) <- col_names

# Loại NA: 
income %<>% na.omit()
# Loại tiếp các dòng không cần thiết (cách này thủ công và dễ hiểu): 
income$province

##  [1] "WHOLE COUNTRY"                                 
##  [2] "Red River Delta"                               
##  [3] "Ha Noi"                                        
##  [4] "Vinh Phuc"                                     
##  [5] "Bac Ninh"                                      
##  [6] "Quang Ninh"                                    
##  [7] "Hai Duong"                                     
##  [8] "Hai Phong"                                     
##  [9] "Hung Yen"                                      
## [10] "Thai Binh"                                     
## [11] "Ha Nam"                                        
## [12] "Nam Dinh"                                      
## [13] "Ninh Binh"                                     
## [14] "Northern midlands and mountain areas"          
## [15] "Ha Giang"                                      
## [16] "Cao Bang"                                      
## [17] "Bac Kan"                                       
## [18] "Tuyen Quang"                                   
## [19] "Lao Cai"                                       
## [20] "Yen Bai"                                       
## [21] "Thai Nguyen"                                   
## [22] "Lang Son"                                      
## [23] "Bac Giang"                                     
## [24] "Phu Tho"                                       
## [25] "Dien Bien"                                     
## [26] "Lai Chau"                                      
## [27] "Son La"                                        
## [28] "Hoa Binh"                                      
## [29] "Northern Central area and Central coastal area"
## [30] "Thanh Hoa"                                     
## [31] "Nghe An"                                       
## [32] "Ha Tinh"                                       
## [33] "Quang Binh"                                    
## [34] "Quang Tri"                                     
## [35] "Thua Thien-Hue"                                
## [36] "Da Nang"                                       
## [37] "Quang  Nam"                                    
## [38] "Quang  Ngai"                                   
## [39] "Binh Dinh"                                     
## [40] "Phu Yen"                                       
## [41] "Khanh  Hoa"                                    
## [42] "Ninh  Thuan"                                   
## [43] "Binh Thuan"                                    
## [44] "Central Highlands"                             
## [45] "Kon Tum"                                       
## [46] "Gia Lai"                                       
## [47] "Dak Lak"                                       
## [48] "Dak Nong"                                      
## [49] "Lam Dong"                                      
## [50] "South East"                                    
## [51] "Binh Phuoc"                                    
## [52] "Tay Ninh"                                      
## [53] "Binh Duong"                                    
## [54] "Dong Nai"                                      
## [55] "Ba Ria - Vung Tau"                             
## [56] "Ho Chi Minh city"                              
## [57] "Mekong River Delta"                            
## [58] "Long An"                                       
## [59] "Tien Giang"                                    
## [60] "Ben Tre"                                       
## [61] "Tra Vinh"                                      
## [62] "Vinh Long"                                     
## [63] "Dong Thap"                                     
## [64] "An Giang"                                      
## [65] "Kien  Giang"                                   
## [66] "Can Tho"                                       
## [67] "Hau Giang"                                     
## [68] "Soc Trang"                                     
## [69] "Bac Lieu"                                      
## [70] "Ca Mau"

income %<>% slice(-c(1, 2, 14, 29, 44, 50, 57))

# Lấy dữ liệu của hai năm là 2010 và 2016 và sắp xếp luôn: 
df_1016 <- income %>% 
  select(province, gen_2016, gen_2010) %>% 
  mutate(gen_2010 = as.numeric(gen_2010), 
         gen_2016 = as.numeric(gen_2016), 
         province = stri_trans_general(province, "Latin-ASCII")) %>% 
  mutate(per = (gen_2016 / gen_2010 - 1)) %>% 
  mutate(above = case_when(per >= mean(per) ~ "Above Average", 
                           per < mean(per) ~ "Below Average")) %>% 
  arrange(per) %>% 
  mutate(province = factor(province, levels = province))


# Vẽ phác. Plot này cho thấy Hải Phòng, Bắc Ninh và
# Thái Nguyên là ba tỉnh tăng nhanh nhất: 

p <- df_1016 %>% 
  ggplot(aes(province, per)) + 
  geom_segment(aes(x = province, xend = province, 
                   y = 0, yend = per), color = "#0e668b", size = 1.2) + 
  geom_point(size = 4, color = "#0e668b") + 
  coord_flip() + 
  labs(x = NULL, y = NULL, 
       title = "Monthly Average Income Growth at Current Prices\nfrom 2010 to 2016 for 30 Provinces Selected", 
       caption = "Data Source: General Statistics Office Of Vietnam")

p

# Cải tiến: 
p + 
  theme_bw() + 
  theme(plot.background = element_rect(fill = "#f7f7f7")) + 
  theme(panel.background = element_rect(fill = "#f7f7f7")) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.major.x = element_line()) + 
  theme(axis.ticks = element_blank()) + 
  theme(panel.border = element_blank())

# Nên viết thành hàm: 
my_theme <- function(...) {
  theme_bw() + 
    theme(plot.background = element_rect(fill = "#f7f7f7")) + 
    theme(panel.background = element_rect(fill = "#f7f7f7")) + 
    theme(panel.grid.minor = element_blank()) + 
    theme(panel.grid.major.y = element_blank()) + 
    theme(panel.grid.major.x = element_line()) + 
    theme(axis.ticks = element_blank()) + 
    theme(panel.border = element_blank()) 
}

# Sử dụng hàm: 
p + my_theme()

# Với mục đích minh họa, giả sử chỉ chọn 40 tỉnh: 

df_1016 <- income %>% 
  select(province, gen_2016, gen_2010) %>% 
  slice(1:30) %>% 
  mutate(gen_2010 = as.numeric(gen_2010), 
         gen_2016 = as.numeric(gen_2016), 
         province = stri_trans_general(province, "Latin-ASCII")) %>% 
  mutate(per = (gen_2016 / gen_2010 - 1)) %>% 
  mutate(above = case_when(per >= mean(per) ~ "Above Average", 
                           per < mean(per) ~ "Below Average")) %>% 
  arrange(per) %>% 
  mutate(province = factor(province, levels = province)) 


# Vẽ và cải tiến luôn: 

library(scales)

df_1016 %>% 
  ggplot(aes(province, per)) + 
  geom_segment(aes(x = province, xend = province, 
                   y = 0, yend = per), color = "#0e668b", size = 1.2) + 
  geom_point(size = 4, color = "#0e668b") + 
  coord_flip() + 
  labs(x = NULL, y = NULL, 
       title = "Monthly Average Income Growth at Current Prices\nfrom 2010 to 2016 for 30 Provinces Selected", 
       caption = "Data Source: General Statistics Office Of Vietnam") + 
  my_theme() + 
  scale_y_continuous(labels = percent) ->> g

g

# Hiệu chỉnh: 
g + theme(axis.text.y = element_text(face = "bold", color = "#0e668b", size = 9))

# Nhấn mạnh đến 5 tình đứng đầu về tốc độ tăng: 
g + 
  geom_segment(data = df_1016 %>% top_n(5, per), 
               aes(x = province, xend = province, y = 0, yend = per), color = "orange", size = 1.2) + 
  geom_point(size = 4, color = "orange", data = df_1016 %>% top_n(5, per)) ->> g1

g1

# Hoặc thêm 5 ông bét bảng: 

g1 + 
  geom_segment(data = df_1016 %>% top_n(5, -per), 
               aes(x = province, xend = province, 
                   y = 0, yend = per), color = "purple", size = 1.2) + 
  geom_point(size = 4, color = "purple", data = df_1016 %>% top_n(5, -per)) ->> g2 

g2

# Hiệu chỉnh tiếp: 
g2 + 
  scale_y_continuous(expand = c(0, 0), 
                     labels = percent, 
                     breaks = seq(0, 1.75, by = 0.25), 
                     limits = c(0, 1.8)) ->> g3 

g3

# Tô màu cho hai nhóm: 
df_1016 %>% 
  ggplot(aes(province, per, color = above)) + 
  geom_segment(aes(x = province, xend = province, 
                   y = 0, yend = per), size = 1.2) + 
  geom_point(size = 4) + 
  scale_color_manual(values = c("orange", "#0e668b")) + 
  coord_flip() + 
  my_theme() + 
  theme(legend.position = "none") + 
  theme(axis.text.y = element_text(face = "bold", size = 8)) + 
  scale_y_continuous(expand = c(0, 0), 
                     labels = percent_format(), 
                     breaks = seq(0, 1.75, by = 0.25), 
                     limits = c(0, 1.8)) ->> g4 

g4

# Hiệu chỉnh thêm: 

g4 + 
  geom_segment(aes(y = 1.45, yend = 1.45, x = "Vinh Phuc", xend = "Quang Binh"),
               arrow = arrow(length = unit(0.2,"cm")), color = "orange", size = 1) +
  geom_segment(aes(y = 1.45, yend = 1.45, x = "Lao Cai", xend = "Quang Tri"),
               arrow = arrow(length = unit(0.2,"cm")), color = "#0e668b", size = 1) + 
  annotate("text", 
           x = "Vinh Phuc", 
           y = 1.5, 
           label = "Above Average", 
           color = "orange", 
           size = 4, 
           hjust = 0.1, 
           vjust = -1) + 
  annotate("text", 
           x = "Lao Cai", 
           y = 1.5, 
           label = "Below Average", 
           color = "#0e668b", 
           size = 4, 
           hjust = 0.1, 
           vjust = 2) ->> g5

g5

# Hiệu chỉnh thêm nữa: 
g5 + 
  labs(x = NULL, y = NULL, 
       title = "Monthly Average Income Growth at Current Prices from 2010 to 2016 for\n30 Provinces Selected.", 
       subtitle = "According to a forecast by PricewaterhouseCoopers in February 2017, Vietnam may be the fastest-growing\nof the world's economies, with a potential annual GDP growth rate of about 5.1%,\nwhich would make its economy the 20th-largest in the world by 2050.", 
       caption = "Data Source: General Statistics Office Of Vietnam")

#-----------------------------
#       Lollipop Chart 
#-----------------------------

# Sắp xếp theo gen_2016: 

income %<>%  
  select(province, gen_2010, gen_2016) %>% 
  mutate(gen_2010 = as.numeric(gen_2010), 
         gen_2016 = as.numeric(gen_2016)) %>% 
  arrange(gen_2016) %>% 
  mutate(province = factor(province, levels = province)) 


# Cách 1: 

income %>% 
  ggplot(aes(x = province)) + 
  geom_segment(aes(y = gen_2010, yend = gen_2016, 
                   x = province, xend = province), color = "gray40", size = 1) + 
  geom_point(aes(x = province, y = gen_2010, color = "i_love"), size = 3.5) + 
  geom_point(aes(x = province, y = gen_2016, color = "you"), size = 3.5) + 
  coord_flip() + 
  scale_color_manual(name = "Monthly Income:", labels = c(2010, 2016), 
                     values = c("#FFB5C5", "#EE3A8C")) + 
  labs(x = NULL, y = NULL, 
       title = "Monthly Average Income Growth at Current Prices\nfrom 2010 to 2016 for 63 Provinces of Vietnam.", 
       subtitle = "The data draw on GSO's compilation of internationally comparable statistics about\nglobal development and the quality of Vietnam people's lives.", 
       caption = "Data Source: General Statistics Office Of Vietnam") + 
  scale_y_continuous(expand = c(0, 0), 
                     breaks = seq(500, 6000, by = 500), 
                     limits = c(500, 5700)) ->> p
p

# Hiệu chỉnh: 
library(extrafont)
extrafont::loadfonts(device = "win")

p + 
  theme_bw() + 
  theme(plot.background = element_rect(fill = "white")) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.major.x = element_line()) + 
  theme(axis.ticks = element_blank()) + 
  theme(panel.border = element_blank()) + 
  theme(text = element_text(family = "Georgia", size = 13, color = "black")) + 
  theme(plot.subtitle = element_text(color = "gray20", size = 10, face = "italic")) + 
  theme(legend.title = element_text(size = 10, color = "gray20")) + 
  theme(legend.position = "top")

# Viết thành hàm cho theme: 

my_theme_for_lollipop <- function(...) {
  theme_bw() + 
    theme(plot.background = element_rect(fill = "white")) + 
    theme(panel.grid.minor = element_blank()) + 
    theme(panel.grid.major.y = element_blank()) + 
    theme(panel.grid.major.x = element_line()) + 
    theme(axis.ticks = element_blank()) + 
    theme(panel.border = element_blank()) + 
    theme(text = element_text(family = "Georgia", size = 13, color = "black")) + 
    theme(plot.subtitle = element_text(color = "gray20", size = 10, face = "italic")) + 
    theme(legend.title = element_text(size = 10, color = "gray20")) + 
    theme(legend.position = "top")
}

# Một phiên bản cải tiến: 

income %>% 
  select(province, gen_2010, gen_2016) %>% 
  mutate(gen_2010 = as.numeric(gen_2010), 
         gen_2016 = as.numeric(gen_2016)) %>% 
  arrange(gen_2016) %>% 
  mutate(province = factor(province, levels = province)) %>% 
  mutate(th = 500) %>% 
  ggplot(aes(province)) + 
  geom_segment(aes(y = th, yend = gen_2010, 
                   x = province, xend = province), color = "gray80", linetype = 2) + 
  geom_segment(aes(y = gen_2010, yend = gen_2016, 
                   x = province, xend = province), color = "gray40", size = 1) + 
  geom_point(aes(x = province, y = gen_2010, color = "a"), size = 3.5) + 
  geom_point(aes(x = province, y = gen_2016, color = "b"), size = 3.5) + 
  coord_flip() + 
  my_theme_for_lollipop() + 
  scale_color_manual(name = "Monthly Income:", labels = c(2010, 2016), 
                     values = c("#FFB5C5", "#EE3A8C")) + 
  labs(x = NULL, y = NULL, 
       title = "Monthly Average Income Growth at Current Prices from\n2010 to 2016 for 63 Provinces of Vietnam.", 
       subtitle = "The data draw on GSO's compilation of internationally comparable statistics about\nglobal development and the quality of Vietnam people's lives.", 
       caption = "Data Source: General Statistics Office Of Vietnam") + 
  scale_y_continuous(breaks = seq(500, 5500, by = 500), 
                     expand = c(0, 0), 
                     limits = c(500, 5700))