1. Gapminder

library(e1071)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggplot2)
library(ggthemes)
library(gapminder)

data("gapminder")

1.1. Lấy các giá trị theo yêu cầu

# Tính tuổi thọ trung bình của nước Việt Nam theo năm
gapminder %>%
  filter(continent == "Aria",
         country == "VietNam") %>%
  group_by(year) %>%
  summarise(mean(lifeExp))
## # A tibble: 0 × 2
## # ℹ 2 variables: year <int>, mean(lifeExp) <dbl>
# Tính tuổi thọ trung bình các châu lục, xếp tăng hoặc giảm vào 2007
# Hàm arrange: Sắp xếp giá trị
gapminder %>%
  filter(year == 2007) %>%
  group_by(continent) %>%
  summarise(mean_lifeExp = mean(lifeExp)) %>%
  arrange(desc(mean_lifeExp))
## # A tibble: 5 × 2
##   continent mean_lifeExp
##   <fct>            <dbl>
## 1 Oceania           80.7
## 2 Europe            77.6
## 3 Americas          73.6
## 4 Asia              70.7
## 5 Africa            54.8
# Tìm các nước có  tổng GDP cao nhất 2007
gapminder %>%
  filter(year == 2007) %>%
  mutate(totalGDP = gdpPercap * pop) %>%
  arrange(desc(totalGDP)) %>%
  head(n = 3)
## # A tibble: 3 × 7
##   country       continent  year lifeExp        pop gdpPercap totalGDP
##   <fct>         <fct>     <int>   <dbl>      <int>     <dbl>    <dbl>
## 1 United States Americas   2007    78.2  301139947    42952.  1.29e13
## 2 China         Asia       2007    73.0 1318683096     4959.  6.54e12
## 3 Japan         Asia       2007    82.6  127467972    31656.  4.04e12

1.2. Trực quan dữ liệu _ Biểu đồ tĩnh

# Tìm mối liên hệ giữa tuổi thọ và thu nhập GDP vào 2007
gapminder %>%
  filter(year == 2007) -> gapminder2007

1.2.1. Biểu đồ geom_point

ggplot(data = gapminder2007, mapping = aes(x = gdpPercap, y = lifeExp, color = continent)) + 
  geom_point()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, color = continent)) + 
  geom_point() + 
  scale_x_log10()

ggplot(data = gapminder2007, mapping = aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
  geom_point() + 
  scale_x_log10()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) + 
  geom_point() + 
  scale_x_log10() +
  geom_smooth(method = "loess")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: size
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, size = pop)) + 
  geom_point(aes(color = continent)) + 
  scale_x_log10() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: size
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) + 
  geom_point(aes(color = continent)) + 
  scale_x_log10() +
  geom_smooth(method = "loess") + 
  labs(x = "Log GDP per Capita", y = "Life Expectancy") +
  ggtitle("Association between GDP and Life Expectancy") +
  theme_economist()
## `geom_smooth()` using formula = 'y ~ x'

1.2.2.Biểu đồ geom_histogram

ggplot(data = gapminder2007, mapping = aes(gdpPercap)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = gapminder2007, mapping = aes(gdpPercap)) + 
  geom_histogram(fill = "indianred", color = "white", bins = 20)

ggplot(data = gapminder2007, mapping = aes(gdpPercap)) + 
  geom_histogram(fill = "indianred", color = "white", bins = 20)

# Độ lệch (dương -> phải, ngược lại)
skewness(gapminder2007$gdpPercap)
## [1] 1.198456
# Độ nhọn (dương -> nhọn, ngược lại)
kurtosis(gapminder2007$gdpPercap)
## [1] 0.2496718

1.2.3. Biểu đồ geom_bar

# Châu Á
gapminder2007 %>%
  filter(continent == "Asia") -> asia2007

# So sánh tuổi thọ trung bình các quốc gia châu Á vào 2007
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp)) + 
  geom_bar(stat = "identity", width = 0.9)

# Quay trục + Thêm màu
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none")

# Sắp Xếp
ggplot(data = asia2007, mapping = aes(x = reorder(country, lifeExp), y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none")

# Sắp xếp ngược lại
ggplot(data = asia2007, mapping = aes(x = reorder(country, -lifeExp), y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none")

# Ghép nối biểu đồ
ggplot(data = asia2007, mapping = aes(x = reorder(country, -lifeExp), y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x = "", y = "Life Expectncy of Asia on 2007") -> graph1

ggplot(data = asia2007, mapping = aes(x = reorder(country, -gdpPercap), y = gdpPercap, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x = "", y = "GDP Per Capita of Asia on 2007") -> graph2

grid.arrange(graph1, graph2, ncol = 2)

# Châu Âu
gapminder2007 %>%
  filter(continent == "Europe") -> europe2007

ggplot(data = europe2007, mapping = aes(x = reorder(country, -lifeExp), y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x = "", y = "Life Expectncy of Europe on 2007") -> graph3

ggplot(data = europe2007, mapping = aes(x = reorder(country, -gdpPercap), y = gdpPercap, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x = "", y = "GDP Per Capita of Europe on 2007") -> graph4

grid.arrange(graph3, graph4, ncol = 2)

1.2.4. Xử lý Overlapping

ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  theme(legend.position = "none")

# Đảo trục
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none")

# Đổi góc nghiêng
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

1.2.5. Biểu đồ kết hợp

ggplot(data = asia2007, mapping = aes(y = reorder(country, lifeExp), x = lifeExp)) + 
  geom_point(color = "blue", size = 2) +
  geom_segment(aes(x = 40, xend = lifeExp, 
                   y = reorder(country, lifeExp), yend = reorder(country, lifeExp)), 
               color = "red") +
  labs(x = "Life Expectancy", y = "Country", subtitle = "Gapminder Data 2007") +
  theme_minimal() +
  scale_x_continuous(breaks = seq(40, 90, 5), limits = c(40, 90)) +
  theme(panel.grid.major =  element_blank(), panel.grid.minor = element_blank())

1.3. Trực quan dữ liệu _ Biểu đồ động

1.3.1. Plotly

ggplot(data = asia2007, mapping = aes(y = reorder(country, lifeExp), x = lifeExp)) + 
  geom_point(color = "blue", size = 2) +
  geom_segment(aes(x = 40, xend = lifeExp, 
                   y = reorder(country, lifeExp), yend = reorder(country, lifeExp)), 
               color = "red") +
  labs(x = "Life Expectancy", y = "Country", subtitle = "Gapminder Data 2007") +
  theme_minimal() +
  scale_x_continuous(breaks = seq(40, 90, 5), limits = c(40, 90)) +
  theme(panel.grid.major =  element_blank(), panel.grid.minor = element_blank()) -> asia20007_dynamic

ggplotly(asia20007_dynamic)

1.3.2. Highcharter

# Dữ liệu
data(gapminder, package = "gapminder")
# Summary dữ liệu
gapminder %>% summary
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
# Biểu đồ thể hiện trung bình GDP bình quân đầu người (gdpPercap) của từng châu lục (continent) theo từng các năm (year)
# Tính toán GDP bình quân đầu người theo châu lục theo từng năm
data <- gapminder %>% 
  group_by(year, continent) %>%
  summarise(gdp_per_cap = mean(gdpPercap) %>% round(0))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
data %>% head(10)
## # A tibble: 10 × 3
## # Groups:   year [2]
##     year continent gdp_per_cap
##    <int> <fct>           <dbl>
##  1  1952 Africa           1253
##  2  1952 Americas         4079
##  3  1952 Asia             5195
##  4  1952 Europe           5661
##  5  1952 Oceania         10298
##  6  1957 Africa           1385
##  7  1957 Americas         4616
##  8  1957 Asia             5788
##  9  1957 Europe           6963
## 10  1957 Oceania         11599
# Biến đổi dữ liệu từ dạng dọc sang ngang
data_new <- data %>% 
  spread(continent, gdp_per_cap)
data_new %>% head(10)
## # A tibble: 10 × 6
## # Groups:   year [10]
##     year Africa Americas  Asia Europe Oceania
##    <int>  <dbl>    <dbl> <dbl>  <dbl>   <dbl>
##  1  1952   1253     4079  5195   5661   10298
##  2  1957   1385     4616  5788   6963   11599
##  3  1962   1598     4902  5729   8365   12696
##  4  1967   2050     5668  5971  10144   14495
##  5  1972   2340     6491  8187  12480   16417
##  6  1977   2586     7352  7791  14284   17284
##  7  1982   2482     7507  7434  15618   18555
##  8  1987   2283     7793  7608  17214   20448
##  9  1992   2282     8045  8640  17062   20894
## 10  1997   2379     8889  9834  19077   24024
# Tạo biểu đồ
h <- highchart() %>% 
  # Thêm trục X (year)
  hc_xAxis(categories = data_new$year) %>% 
  # Thêm các đường line (GDP bình quân của các châu lục)
  hc_add_series(name = "Africa", 
                data = data_new$Africa
                ) %>% 
  hc_add_series(name = "Americas", 
                data = data_new$Americas
                ) %>% 
  hc_add_series(name = "Asia", 
                data = data_new$Asia
                ) %>% 
  hc_add_series(name = "Europe", 
                data = data_new$Europe
                ) %>% 
  hc_add_series(name = "Oceania", 
                data = data_new$Oceania
                ) %>% 
  hc_colors(c("darkgreen", "darkred", "steelblue", "gray", "orange"))
h
# Customize biểu đồ
h1 <- h %>%
  # Add tên tiêu đề
  hc_title(text = "Average of GDP per capital by Country",
           margin = 20, 
           align = "left",
           style = list(color = "black", fontWeight = "bold")
           ) %>% 
  # Add subtitle
  hc_subtitle(text = "1952 to 2007",
              align = "left") %>% 
  # Add caption
  hc_credits(enabled = T, # add caption
             text = "Gapminder Data",
             href = "http://gapminder.com") %>% 
  # Add chú giải
  hc_legend(align = "right",
            verticalAlign = "top",
            layout = "vertical",
            x = 0, 
            y = 100
            ) %>%
  # Add đường so sánh
  hc_tooltip(crosshairs = TRUE, 
             backgroundColor = "#FCFFC5",
             shared = TRUE, 
             borderWidth = 4)
h1