Data Visualization (GSO, Part 3)

Nguyen Chi Dung

#--------------------------------------------------------------------------------
#                Bubble Chart
# Tham khảo trang 38: https://www.amazon.com/Great-Escape-Health-Origins-Inequality/dp/0691165629
#--------------------------------------------------------------------------------

# Load các gói và lấy dữ liệu  từ World Bank: 
library(WDI)
mydf <- WDI(country = "all", 
            start = 2016, 
            end = 2017, 
            indicator = c("SP.POP.TOTL", 
                          # Tuổi thọ bình quân: 
                          "SP.DYN.LE00.IN", 
                          # GDP đầu người: 
                          "NY.GDP.PCAP.PP.CD"))

# Xem qua dữ liệu: 
head(mydf)

##   iso2c                                     country year SP.POP.TOTL
## 1    1A                                  Arab World 2016   406452690
## 2    1A                                  Arab World 2017          NA
## 3    1W                                       World 2016  7444027227
## 4    1W                                       World 2017          NA
## 5    4E East Asia & Pacific (excluding high income) 2016  2053299126
## 6    4E East Asia & Pacific (excluding high income) 2017          NA
##   SP.DYN.LE00.IN NY.GDP.PCAP.PP.CD
## 1       71.19846          16726.72
## 2             NA                NA
## 3       72.03526          16216.93
## 4             NA                NA
## 5       74.49253          13839.97
## 6             NA                NA

# Chỉ lấy dữ liệu năm 2016  đồng thời đổi tên cho một số cột biến: 
library(tidyverse)
library(magrittr)

mydf_small <- mydf %>% 
  filter(year == 2016) %>% 
  rename(pop = SP.POP.TOTL, 
         life = SP.DYN.LE00.IN, 
         gdp = NY.GDP.PCAP.PP.CD)

sapply(mydf_small, class)

##       iso2c     country        year         pop        life         gdp 
## "character" "character"   "numeric"   "numeric"   "numeric"   "numeric"

# Lấy dữ liệu về phân loại nhóm quốc gia (mất chừng 1 phút tùy chất lượng mạng): 
d <- WDIcache() 

# Chú ý rằng d là một list: 
d %>% str()

## List of 2
##  $ series : chr [1:16695, 1:5] "5.1.16_ETH.TOTA.AID.USAID" "5.1.15_MOZ.TOTA.AID.USAID" "5.1.15_ETH.TOTA.AID.UNICEF" "5.1.14_MOZ.TOTA.AID.UNICEF" ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:5] "indicator" "name" "description" "sourceDatabase" ...
##  $ country: chr [1:304, 1:9] "ABW" "AFG" "AFR" "AGO" ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:9] "iso3c" "iso2c" "country" "region" ...

# Khai thác DF thứ 2: 
d2 <- d[[2]] %>% as.data.frame()
names(d2)

## [1] "iso3c"     "iso2c"     "country"   "region"    "capital"   "longitude"
## [7] "latitude"  "income"    "lending"

# Xem qua: 
d2$income %>% unique()

## [1] High income         Low income          Aggregates         
## [4] Lower middle income Upper middle income
## 5 Levels: Aggregates High income Low income ... Upper middle income

# Lấy ra các cột biến cần thiết: 
income_group <- d2 %>% 
  mutate_if(is.factor, as.character) %>% 
  filter(income != "Aggregates") %>% 
  select(iso2c, region, income)

# Ghép hai  DF và loại NA: 

total_df <- right_join(mydf_small, income_group, by = "iso2c") %>% na.omit()

# Nhóm thu nhập: 
total_df$income %>% table()

## .
##         High income          Low income Lower middle income 
##                  51                  27                  51 
## Upper middle income 
##                  48

total_df %>% 
  group_by(income) %>% 
  count()

## # A tibble: 4 x 2
## # Groups:   income [4]
##   income                  n
##   <chr>               <int>
## 1 High income            51
## 2 Low income             27
## 3 Lower middle income    51
## 4 Upper middle income    48

# Vẽ phác thảo: 
theme_set(theme_minimal())
total_df %>% 
  ggplot(aes(gdp, life)) + 
  geom_point()

# Xem qua: 
total_df$gdp %>% range()

## [1]    698.7067 127480.4825

# Nếu thế thì: 
library(scales)

total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0, 75000, 10000), labels = dollar) + 
  scale_y_continuous(breaks = seq(50, 85, 5))

# Biểu diễn thêm quy mô dân số: 

total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life, size = pop, color = income)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0, 75000, 10000), labels = dollar) + 
  scale_y_continuous(breaks = seq(50, 85, 5))

# Cải tiến hơn: 

total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life, size = pop, color = income)) + 
  geom_point(alpha = 0.3) + 
  scale_x_continuous(breaks = seq(0, 75000, 10000), labels = dollar) + 
  scale_y_continuous(breaks = seq(50, 85, 5)) + 
  scale_size(range = c(1, 30)) + 
  scale_color_discrete(name = "Income Group:") + 
  guides(size = FALSE) + 
  theme(legend.position = c(0.859, 0.26)) + 
  theme(legend.title = element_text(size = 12, face = "bold")) + 
  theme(legend.background = element_rect(fill = "#fff7ec", size = 0.2, color = "grey70")) + 
  labs(x = "GDP per capital", 
       y = "Life expectancy", 
       title = "The relationship between Life Expectancy and GDP per capital in 2016", 
       subtitle = "According to WHO definitions, Life Expectancy at reflects the overall mortality level of\na population and it is defined as the average number of years that a newborn is\nexpected to live if current mortality rates continue to apply. ", 
       caption =  "Data Source: The World Bank")

# Hoặc như trang 38 sách của A. Deaton: 

total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life, size = pop, color = income)) + 
  geom_smooth(aes(group = 1), method = "lm", formula = y ~ log(x), 
              color = "grey40", alpha = 0.1, se = FALSE) + 
  geom_point(alpha = 0.3) + 
  scale_x_continuous(breaks = seq(0, 75000, 10000), labels = dollar) + 
  scale_y_continuous(breaks = seq(50, 85, 5)) + 
  scale_size(range = c(1, 30)) + 
  scale_color_discrete(name = "Income Group:") + 
  guides(size = FALSE) + 
  theme(legend.position = c(0.859, 0.26)) + 
  theme(legend.title = element_text(size = 12, face = "bold")) + 
  theme(legend.background = element_rect(fill = "#fff7ec", size = 0.2, color = "grey70")) + 
  labs(x = "GDP per capital", 
       y = "Life expectancy", 
       title = "The relationship between Life Expectancy and GDP per capital in 2016", 
       subtitle = "According to WHO definitions, Life Expectancy at reflects the overall mortality level of\na population and it is defined as the average number of years that a newborn is\nexpected to live if current mortality rates continue to apply. ", 
       caption =  "Data Source: The World Bank")

# Một phiên bản khác: 


total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life, size = pop, color = income)) + 
  geom_smooth(aes(group = 1), method = "lm", formula = y ~ log(x), 
              color = "grey40", alpha = 0.1, se = FALSE) + 
  geom_point(alpha = 0.3) + 
  scale_x_log10(labels = dollar) + 
  scale_y_continuous(breaks = seq(50, 85, 5)) + 
  scale_size(range = c(1, 30)) + 
  scale_color_discrete(name = "Income Group:") + 
  guides(size = FALSE) + 
  theme(legend.position = c(0.15, 0.819)) + 
  theme(legend.title = element_text(size = 12, face = "bold")) + 
  theme(legend.background = element_rect(fill = "#fff7ec", size = 0.2, color = "grey70")) + 
  labs(x = "GDP per capital", 
       y = "Life expectancy", 
       title = "The relationship between Life Expectancy and GDP per capital in 2016", 
       subtitle = "According to WHO definitions, Life Expectancy at reflects the overall mortality level of\na population and it is defined as the average number of years that a newborn is\nexpected to live if current mortality rates continue to apply. ", 
       caption =  "Data Source: The World Bank")

# Hoặc kiểu khác nữa: 
total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life, size = pop, color = income)) + 
  geom_smooth(aes(group = 1), method = "lm", formula = y ~ log(x), 
              color = "grey40", alpha = 0.1, se = FALSE) + 
  geom_point(alpha = 0.3) + 
  scale_x_log10(labels = dollar, breaks = seq(0, 80000, 20000)) + 
  scale_y_continuous(breaks = seq(50, 85, 5)) + 
  scale_size(range = c(1, 30)) + 
  scale_color_discrete(name = "Income Group:") + 
  guides(size = FALSE) + 
  theme(legend.position = c(0.15, 0.819)) + 
  theme(legend.title = element_text(size = 12, face = "bold")) + 
  theme(legend.background = element_rect(fill = "#fff7ec", size = 0.2, color = "grey70")) + 
  labs(x = "GDP per capital", 
       y = "Life expectancy", 
       title = "The relationship between Life Expectancy and GDP per capital in 2016", 
       subtitle = "According to WHO definitions, Life Expectancy at reflects the overall mortality level of\na population and it is defined as the average number of years that a newborn is\nexpected to live if current mortality rates continue to apply. ", 
       caption =  "Data Source: The World Bank")

# Muốn nhấn mạnh đến một số quốc gia: 

my_country <- c("Vietnam", "China", "India", "Thailand", "Malaysia", 
                "Germany", "Japan", "Nigeria", "Indonesia")

library(ggrepel)

total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life, size = pop, color = income)) + 
  geom_smooth(aes(group = 1), method = "lm", formula = y ~ log(x), 
              color = "grey40", alpha = 0.1, se = FALSE) + 
  geom_point(alpha = 0.3) + 
  geom_text_repel(data = total_df %>% filter(country %in% my_country), 
            aes(label = country), color = "gray20", size = 3.5, force = 19) + 
  scale_x_continuous(breaks = seq(0, 75000, 10000), labels = dollar) + 
  scale_y_continuous(breaks = seq(50, 85, 5)) + 
  scale_size(range = c(1, 30)) + 
  scale_color_discrete(name = "Income Group:") + 
  guides(size = FALSE) + 
  theme(legend.position = c(0.83, 0.22)) + 
  theme(legend.title = element_text(size = 12, face = "bold")) + 
  theme(legend.background = element_rect(fill = "#fff7ec", size = 0.2, color = "grey70")) + 
  labs(x = "GDP per capital", 
       y = "Life expectancy", 
       title = "The relationship between Life Expectancy and GDP per capital in 2016", 
       subtitle = "According to WHO definitions, Life Expectancy at reflects the overall mortality level of\na population and it is defined as the average number of years that a newborn is\nexpected to live if current mortality rates continue to apply. ", 
       caption =  "Data Source: The World Bank")

# Thay đổi một số theme chẳng hạn: 
library(ggthemes)

total_df %>% 
  filter(gdp < 75000) %>% 
  ggplot(aes(gdp, life, size = pop, color = income)) + 
  geom_smooth(aes(group = 1), method = "lm", formula = y ~ log(x), 
              color = "grey40", alpha = 0.1, se = FALSE) + 
  geom_point(alpha = 0.3) + 
  geom_text_repel(data = total_df %>% filter(country %in% my_country), 
                  aes(label = country), color = "gray20", size = 3.5, force = 19) + 
  scale_x_continuous(breaks = seq(0, 75000, 10000), labels = dollar) + 
  scale_y_continuous(breaks = seq(50, 85, 5)) + 
  scale_size(range = c(1, 30)) + 
  guides(size = FALSE) + 
  labs(x = "GDP per capital", 
       y = "Life expectancy", 
       title = "The relationship between Life Expectancy and GDP per capital in 2016", 
       subtitle = "According to WHO definitions, Life Expectancy at reflects the overall mortality level of\na population and it is defined as the average number of years that a newborn is\nexpected to live if current mortality rates continue to apply. ", 
       caption =  "Data Source: The World Bank") + 
  theme_fivethirtyeight() + 
  scale_color_discrete(name = "Income Group:") + 
  theme(legend.title = element_text(size = 12, face = "bold")) + 
  theme(legend.position = c(0.882, 0.27)) + 
  theme(legend.direction = "vertical") + 
  theme(legend.background = element_rect(fill = "#fff7ec", size = 0.2, color = "grey70"))

#-------------------------------------
#            Line Plot
#-------------------------------------

# Vẽ đơn giản: 
economics %>% 
  ggplot(aes(date, psavert)) + 
  geom_line()

# Nhấn mạnh một thời điểm: 

economics %>% 
  ggplot(aes(date, psavert)) + 
  geom_line() + 
  geom_point(data = economics %>% slice(which.max(psavert)), 
             aes(date, psavert), color = "red", size = 3) + 
  annotate("text", 
           label = "The highest saving rate was 17% in 2000.", 
           x = as.Date("1978-01-01"), y = 17.3, 
           size = 4, hjust = 0, vjust = 1)

# Cải tiến hơn: 

economics %>% 
  mutate(psavert = 0.01*psavert) ->> my_economics


my_economics %>% 
  ggplot(aes(date, psavert)) + 
  geom_line() + 
  geom_point(data = my_economics %>% slice(which.max(psavert)), 
             aes(date, psavert), color = "red", size = 3) + 
  annotate("text", 
           label = "The highest saving rate was 17% in 2000.", 
           x = as.Date("1978-01-01"), y = 0.173, 
           size = 4, hjust = 0, vjust = 1, color = "grey40") ->> p

p

# Lại cải tiến: 
my_economics$psavert %>% range()

## [1] 0.019 0.170

p + 
  scale_y_continuous(labels = percent, breaks = seq(0.015, 0.20, 0.025))

# Cải tiến nữa: 
my_economics %>% 
  ggplot(aes(date, psavert)) + 
  geom_line() + 
  scale_y_continuous(labels = percent, breaks = seq(0.015, 0.18, 0.025)) + 
  geom_point(data = my_economics %>% slice(which.max(psavert)), 
             aes(date, psavert), color = "red", size = 3) + 
  annotate("text", 
           label = "The highest saving rate was 17% in 2000.", 
           x = as.Date("1978-01-01"), y = 0.173, 
           size = 4, hjust = 0, vjust = 1, color = "grey40") + 
  annotate("curve", 
           curvature = 0,
           x = as.Date("1978-01-01"), 
           xend = as.Date("1976-01-01"),
           y = 0.17, 
           yend = 0.17,
           arrow = arrow(angle = 20, length = unit(.2, "cm")), size = .6)

# Một cách khác có hiệu ứng tương tự nhưng sẽ rất hữu ích: 

p <- my_economics %>% 
  ggplot() + 
  geom_line(aes(date, psavert)) + 
  scale_y_continuous(labels = percent, breaks = seq(0.015, 0.18, 0.025)) + 
  geom_point(data = my_economics %>% slice(which.max(psavert)), 
             aes(date, psavert), color = "red", size = 3) + 
  annotate("text", 
           label = "The highest saving rate was 17% in 2000.", 
           x = as.Date("1978-01-01"), y = 0.173, 
           size = 4, hjust = 0, vjust = 1, color = "grey40") + 
  annotate("curve", 
           curvature = 0,
           x = as.Date("1978-01-01"), 
           xend = as.Date("1976-01-01"),
           y = 0.17, 
           yend = 0.17,
           arrow = arrow(angle = 20, length = unit(.2, "cm")), size = .6) 

p

# Thất nghiệp của Hoa Kì: 
p1 <- economics %>% ggplot() + 
  geom_line(aes(date, unemploy)) 

p1

# Bộ dữ liệu  nhiệm kì tổng thống: 

data("presidential")
presidential %>% head()

## # A tibble: 6 x 4
##   name       start      end        party     
##   <chr>      <date>     <date>     <chr>     
## 1 Eisenhower 1953-01-20 1961-01-20 Republican
## 2 Kennedy    1961-01-20 1963-11-22 Democratic
## 3 Johnson    1963-11-22 1969-01-20 Democratic
## 4 Nixon      1969-01-20 1974-08-09 Republican
## 5 Ford       1974-08-09 1977-01-20 Republican
## 6 Carter     1977-01-20 1981-01-20 Democratic

# Tô màu theo từng khoảng cầm quyền của các đảng: 
p1 + geom_rect(aes(xmin = start, xmax = end, fill = party),
                   ymin = -Inf, ymax = Inf, alpha = 0.2,
                   data = presidential)

# Cải tiến: 

p1 + geom_rect(aes(xmin = start, xmax = end, fill = party),
               ymin = -Inf, ymax = Inf, alpha = 0.2,
               data = presidential %>% filter(start >= min(economics$date), 
                                              start <= max(economics$date)))

# Hoặc cách khác như sau: 
small_pre <-  presidential %>% filter(start >= min(economics$date), 
                                      start <= max(economics$date))


  
economics %>% 
  filter(date >= min(small_pre$start)) %>% 
  ggplot() + 
  geom_line(aes(date, unemploy)) + 
  geom_rect(aes(xmin = start, xmax = end, fill = party),
            ymin = -Inf, ymax = Inf, alpha = 0.2,
            data = small_pre) ->> p2


# Tô màu theo ý muốn: 
p2 + 
  scale_fill_manual(values = c("red", "blue"), name = "Party") + 
  scale_y_continuous(breaks = seq(2000, 16000, 2000)) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.x = element_blank())

p2 + 
  scale_fill_manual(values = c("red", "blue"), name = "Party") + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.x = element_blank()) + 
  geom_vline(aes(xintercept = as.numeric(start)),
             data = small_pre, colour = "blue")

p2 + 
  scale_fill_manual(values = c("red", "blue"), name = "Party") + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.x = element_blank()) + 
  theme(legend.position = "top") + 
  geom_vline(aes(xintercept = as.numeric(start)),
             data = small_pre, colour = "gray40") + 
  geom_text(aes(x = start, y = 1000, label = name),
            data = small_pre, size = 3, 
            vjust = -1, hjust = 0, nudge_x = 50) + 
  labs(x = "Date", y = "Number of unemployed in thousands", 
       title = "Number of unemployed in the United States by Political Party from 1967 to 2015", 
       subtitle = "A person is defined as unemployed in the United States if they are jobless, but have looked for work\nin the last four weeks and are available for work. People who are neither employed nor defined as\nunemployed are not included in the labor force calcualation.", 
       caption = "Source: Bureau of Labor Statistics")

# Hoặc thiết kế theo một kiểu khác: 
economics %>% 
  ggplot(aes(date, uempmed / 100)) + 
  geom_line(color = "cyan") + 
  scale_y_continuous(labels = percent) + 
  geom_area(fill = "cyan", alpha = 0.1) + 
  labs(x = "Date", y = "Unemployment Rate", 
       title = "Official Unemployment Rate in the United States from 1967 to 2015", 
       subtitle = "The official unemployment rate is known as U3. It defines unemployed people as those who\nare willing and available to work, and who have actively sought work within the past four weeks.\nThose with temporary, part-time or full-time jobs are considered employed, as are those who\nperform at least 15 hours of unpaid family work.", 
       caption = "Source: Bureau of Labor Statistics")

# Sử dụng font chữ ưa thích: 

library(extrafont)
font_import() # CHỉ thực hiện một lần duy nhất. m

## Importing fonts may take a few minutes, depending on the number of fonts and the speed of the system.
## Continue? [y/n]

extrafont::loadfonts(device = "win")

economics %>% 
  ggplot(aes(date, uempmed / 100)) + 
  geom_line(color = "cyan") + 
  scale_y_continuous(labels = percent) + 
  geom_area(fill = "cyan", alpha = 0.1) + 
  labs(x = "Date", y = "Unemployment Rate", 
       title = "Official Unemployment Rate in the United States from 1967 to 2015", 
       subtitle = "The official unemployment rate is known as U3. It defines unemployed people as those who\nare willing and available to work, and who have actively sought work within the past four weeks.\nThose with temporary, part-time or full-time jobs are considered employed, as are those who\nperform at least 15 hours of unpaid family work.", 
       caption = "Source: Bureau of Labor Statistics") + 
  # Chọn màu và kiểu chữ cho title: 
  theme(text = element_text(family = "Georgia", color = "grey10", size = 15)) + 
  # Hiệu chỉnh kiểu chữ cho caption: 
  theme(plot.caption = element_text(face = "italic")) + 
  # Hiệu chỉnh cho subtitle: 
  theme(plot.subtitle = element_text(color = "gray40", size = 12)) + 
  # Hiệu chỉnh vị trí của chữ Date: 
  theme(axis.title.x = element_text(hjust = 0, face = "bold", size = 11, color = "grey20")) + 
  # Tương tự: 
  theme(axis.title.y = element_text(hjust = 1, face = "bold", size = 11, color = "grey20")) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.x = element_blank())

economics %>% 
  ggplot(aes(date, uempmed / 100)) + 
  geom_line(color = "cyan") + 
  scale_y_continuous(labels = percent) + 
  geom_area(fill = "cyan", alpha = 0.1) + 
  labs(x = "Date", y = "Unemployment Rate", 
       title = "Official Unemployment Rate in the United States from 1967 to 2015", 
       subtitle = "The official unemployment rate is known as U3. It defines unemployed people as those who\nare willing and available to work, and who have actively sought work within the past four weeks.\nThose with temporary, part-time or full-time jobs are considered employed, as are those who\nperform at least 15 hours of unpaid family work.", 
       caption = "Source: Bureau of Labor Statistics") + 
  # Chọn màu và kiểu chữ cho title: 
  theme(text = element_text(family = "Georgia", color = "grey10", size = 15)) + 
  # Hiệu chỉnh kiểu chữ cho caption: 
  theme(plot.caption = element_text(face = "italic")) + 
  # Hiệu chỉnh cho subtitle: 
  theme(plot.subtitle = element_text(color = "gray40", size = 12)) + 
  # Hiệu chỉnh vị trí của chữ Date: 
  theme(axis.title.x = element_text(hjust = 0, face = "bold", size = 11, color = "grey20")) + 
  # Tương tự: 
  theme(axis.title.y = element_text(hjust = 1, face = "bold", size = 11, color = "grey20")) + 
  theme(panel.grid.major = element_line(color = "#4d5566")) + 
  theme(panel.grid.minor.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  theme(panel.background = element_rect(fill = "#444B5A"))

# Tạo thành một hàm riêng: 

my_theme <- function(...) {
  theme(text = element_text(family = "Georgia", color = "grey10", size = 15)) + 
    theme(plot.caption = element_text(face = "italic")) + 
    theme(plot.subtitle = element_text(color = "gray40", size = 12)) + 
    theme(axis.title.x = element_text(hjust = 0, face = "bold", size = 11, color = "grey20")) + 
    theme(axis.title.y = element_text(hjust = 1, face = "bold", size = 11, color = "grey20")) + 
    theme(panel.grid.major = element_line(color = "#4d5566")) + 
    theme(panel.grid.minor.y = element_blank()) + 
    theme(panel.grid.minor.x = element_blank()) + 
    theme(panel.background = element_rect(fill = "#444B5A")) 
  
}

# ĐIều đó có nghĩa là:   
  

economics %>% 
  ggplot(aes(date, uempmed / 100)) + 
  geom_line(color = "cyan") + 
  scale_y_continuous(labels = percent) + 
  geom_area(fill = "cyan", alpha = 0.1) + 
  labs(x = "Date", y = "Unemployment Rate", 
       title = "Official Unemployment Rate in the United States from 1967 to 2015", 
       subtitle = "The official unemployment rate is known as U3. It defines unemployed people as those who\nare willing and available to work, and who have actively sought work within the past four weeks.\nThose with temporary, part-time or full-time jobs are considered employed, as are those who\nperform at least 15 hours of unpaid family work.", 
       caption = "Source: Bureau of Labor Statistics") + 
  my_theme()

# Thể hiện hai time series: 

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date)

## # A tibble: 1,148 x 3
##    date       a           b
##    <date>     <chr>   <dbl>
##  1 1967-07-01 psavert  12.5
##  2 1967-08-01 psavert  12.5
##  3 1967-09-01 psavert  11.7
##  4 1967-10-01 psavert  12.5
##  5 1967-11-01 psavert  12.5
##  6 1967-12-01 psavert  12.1
##  7 1968-01-01 psavert  11.7
##  8 1968-02-01 psavert  12.2
##  9 1968-03-01 psavert  11.6
## 10 1968-04-01 psavert  12.2
## # ... with 1,138 more rows

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(x = date, y = b)) +
  geom_line(aes(color = a), size = 1) +
  scale_color_manual(values = c("#00AFBB", "#E7B800"))

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(x = date, y = b)) +
  geom_line(aes(color = a), size = 1) +
  scale_color_manual(values = c("#00AFBB", "#E7B800"))

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(x = date, y = b)) +
  geom_line(aes(color = a), size = 1) + 
  facet_wrap(~ a) + 
  my_theme()

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(x = date, y = b)) +
  geom_line(aes(color = a), size = 1) + 
  facet_wrap(~ a, scales = "free") + 
  my_theme()

#---------------------------------
#         Area Graph
#---------------------------------


economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(date, y = b)) +
  geom_area(aes(color = a, fill = a), alpha = 0.5)

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(date, y = b)) +
  geom_area(aes(color = a, fill = a), alpha = 0.5, position = position_dodge(0.8))

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(date, y = b)) +
  geom_area(aes(color = a, fill = a), alpha = 0.5, position = position_dodge(0.8)) +
  scale_color_manual(values = c("#00AFBB", "#E7B800")) +
  scale_fill_manual(values = c("#00AFBB", "#E7B800"))

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  ggplot(aes(date, b)) + 
  geom_line(color = "cyan") + 
  geom_area(fill = "cyan", alpha = 0.1) + 
  facet_wrap(~ a, scales = "free") + 
  my_theme()

# Cải tiến cho đẹp hơn: 

economics %>%
  select(date, psavert, uempmed) %>% 
  gather(a, b, -date) %>% 
  mutate(a = case_when(a == "psavert" ~ "Saving", a != "psavert" ~ "Unemployment")) %>% 
  mutate(b = b / 100) %>% 
  ggplot(aes(date, y = b)) +
  geom_area(aes(color = a, fill = a), alpha = 0.5, position = position_dodge(0.8)) +
  scale_color_manual(name = "", values = c("#00AFBB", "#E7B800")) +
  scale_fill_manual(name = "", values = c("#00AFBB", "#E7B800")) + 
  scale_y_continuous(labels = percent) + 
  theme(panel.grid.minor.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  theme(legend.position = "top") + 
  theme(text = element_text(family = "Garamond", color = "grey10", size = 13, face = "bold")) + 
  labs(x = NULL, y = NULL, 
       title = "Official Unemployment and Saving Rate in the United States from 1967 to 2015",
       caption = "Source: Bureau of Labor Statistics")

# Case Study (tham khảo http://datatopics.worldbank.org/sdgatlas/SDG-13-climate-action.html): 

data <- WDI(country = "VN", 
            indicator = c("NE.IMP.GNFS.CD", "TM.VAL.MRCH.CD.WT", "TM.VAL.MRCH.WL.CD"), 
            start = 2005, end = 2015)


# Đổi tên: và chuyển về long form: 

data %<>% rename(IMP = NE.IMP.GNFS.CD, 
                 MRW = TM.VAL.MRCH.CD.WT, 
                 MAC = TM.VAL.MRCH.WL.CD)


#-----------------------------------------------------------------------------
#        Sử dụng font
#  1. Trước hết download một font yêu thích, ví dụ, từ
#     http://www.megafonts.net/view/officinasanitc-extrabold_96211.
#  2. Cài đặt font mới này (đuôi là ttf)
#  3. Di chuyển file vừa cài đặt đó vào: 
#     (1) thư mục font của Win, (2) thư mục hiện thời mà R đang hoạt động.
#-----------------------------------------------------------------------------

setwd("C:/Users/HP/Documents")
font_import(pattern = "OfficinaSansITCMedium.ttf", prompt = FALSE)

# Vẽ: 

data %>%  
  select(year, IMP, MRW, MAC) %>% 
  gather(Item, b, -year) %>% 
  mutate(b = round(b / 1000000000)) %>% 
  ggplot(aes(x = year, y = b, fill = Item)) + 
  geom_area(stat = "identity", show.legend = FALSE) + 
  labs(x = NULL, 
       y = NULL, 
       title = "Imports of Goods and Services in Vietnam: 2005 - 2015", 
       subtitle = "Note: Unit in billion dollar and numbers adjusted for inflation rate and price of 2015", 
       caption = "Data Source: The World Bank") + 
  scale_x_continuous(breaks = seq(2005, 2015, 1), 
                     limits = c(2005, 2015.1), 
                     expand = c(0.01, 0)) + 
  scale_fill_manual(values = c("#9C9C9C", "#FF82AB", "#FF1493")) + 
  theme(plot.background = element_rect(fill = "white")) + 
  theme(panel.grid.minor = element_blank()) + 
  theme(panel.grid.major.x = element_blank()) + 
  theme(panel.grid.major.y = element_line(size = 0.8)) + 
  theme(axis.ticks = element_blank()) + 
  theme(panel.border = element_blank()) + 
  theme(text = element_text(family = "OfficinaSansITC", size = 18, color = "black")) + 
  theme(plot.subtitle = element_text(color = "gray40", size = 15)) + 
  # Dịch vị trí của caption sang phía trái: 
  theme(plot.caption = element_text(color = "gray40", size = 13, family = "OfficinaSansITC", hjust = -0.02)) + 
  theme(legend.position = "top") + 
  # Kích cỡ + màu sắc hiển thị trên trục X và Y: 
  theme(axis.text.x = element_text(size = 13, color = "gray40")) + 
  theme(axis.text.y = element_text(size = 13, color = "gray40")) +  
  annotate("text", x = 2011, y = 80, label = "Industrial machines\nfrom US and EU", 
           hjust = 0, vjust = 1, color = "white", 
           size = 5, family = "OfficinaSansITC") + 
  annotate("text", x = 2011, y = 190, label = "Raw industrial materials\n from China", 
           hjust = 0, vjust = 1, color = "white", 
           size = 5, family = "OfficinaSansITC") + 
  annotate("text", x = 2009.2, y = 370, label = "Consumer goods from\nall countries", 
           hjust = 0, vjust = 1, color = "gray30", 
           size = 5, family = "OfficinaSansITC")