1.1. Lấy các giá trị theo yêu cầu
# Tính tuổi thọ trung bình của nước Việt Nam theo năm
gapminder %>%
filter(continent == "Aria",
country == "VietNam") %>%
group_by(year) %>%
summarise(mean(lifeExp))
## # A tibble: 0 × 2
## # ℹ 2 variables: year <int>, mean(lifeExp) <dbl>
# Tính tuổi thọ trung bình các châu lục, xếp tăng hoặc giảm vào 2007
# Hàm arrange: Sắp xếp giá trị
gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarise(mean_lifeExp = mean(lifeExp)) %>%
arrange(desc(mean_lifeExp))
## # A tibble: 5 × 2
## continent mean_lifeExp
## <fct> <dbl>
## 1 Oceania 80.7
## 2 Europe 77.6
## 3 Americas 73.6
## 4 Asia 70.7
## 5 Africa 54.8
# Tìm các nước có tổng GDP cao nhất 2007
gapminder %>%
filter(year == 2007) %>%
mutate(totalGDP = gdpPercap * pop) %>%
arrange(desc(totalGDP)) %>%
head(n = 3)
## # A tibble: 3 × 7
## country continent year lifeExp pop gdpPercap totalGDP
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 United States Americas 2007 78.2 301139947 42952. 1.29e13
## 2 China Asia 2007 73.0 1318683096 4959. 6.54e12
## 3 Japan Asia 2007 82.6 127467972 31656. 4.04e12
1.2. Trực quan dữ liệu _ Biểu đồ tĩnh
# Tìm mối liên hệ giữa tuổi thọ và thu nhập GDP vào 2007
gapminder %>%
filter(year == 2007) -> gapminder2007
1.2.1. Biểu đồ geom_point
ggplot(data = gapminder2007, mapping = aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10()

ggplot(data = gapminder2007, mapping = aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
geom_point() +
scale_x_log10()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
geom_point() +
scale_x_log10() +
geom_smooth(method = "loess")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: size
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, size = pop)) +
geom_point(aes(color = continent)) +
scale_x_log10() +
geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: size
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(color = continent)) +
scale_x_log10() +
geom_smooth(method = "loess") +
labs(x = "Log GDP per Capita", y = "Life Expectancy") +
ggtitle("Association between GDP and Life Expectancy") +
theme_economist()
## `geom_smooth()` using formula = 'y ~ x'

1.2.2.Biểu đồ geom_histogram
ggplot(data = gapminder2007, mapping = aes(gdpPercap)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = gapminder2007, mapping = aes(gdpPercap)) +
geom_histogram(fill = "indianred", color = "white", bins = 20)

ggplot(data = gapminder2007, mapping = aes(gdpPercap)) +
geom_histogram(fill = "indianred", color = "white", bins = 20)

# Độ lệch (dương -> phải, ngược lại)
skewness(gapminder2007$gdpPercap)
## [1] 1.198456
# Độ nhọn (dương -> nhọn, ngược lại)
kurtosis(gapminder2007$gdpPercap)
## [1] 0.2496718
1.2.3. Biểu đồ geom_bar
# Châu Á
gapminder2007 %>%
filter(continent == "Asia") -> asia2007
# So sánh tuổi thọ trung bình các quốc gia châu Á vào 2007
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp)) +
geom_bar(stat = "identity", width = 0.9)

# Quay trục + Thêm màu
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none")

# Sắp Xếp
ggplot(data = asia2007, mapping = aes(x = reorder(country, lifeExp), y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none")

# Sắp xếp ngược lại
ggplot(data = asia2007, mapping = aes(x = reorder(country, -lifeExp), y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none")

# Ghép nối biểu đồ
ggplot(data = asia2007, mapping = aes(x = reorder(country, -lifeExp), y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none") +
labs(x = "", y = "Life Expectncy of Asia on 2007") -> graph1
ggplot(data = asia2007, mapping = aes(x = reorder(country, -gdpPercap), y = gdpPercap, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none") +
labs(x = "", y = "GDP Per Capita of Asia on 2007") -> graph2
grid.arrange(graph1, graph2, ncol = 2)

# Châu Âu
gapminder2007 %>%
filter(continent == "Europe") -> europe2007
ggplot(data = europe2007, mapping = aes(x = reorder(country, -lifeExp), y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none") +
labs(x = "", y = "Life Expectncy of Europe on 2007") -> graph3
ggplot(data = europe2007, mapping = aes(x = reorder(country, -gdpPercap), y = gdpPercap, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none") +
labs(x = "", y = "GDP Per Capita of Europe on 2007") -> graph4
grid.arrange(graph3, graph4, ncol = 2)

1.2.4. Xử lý Overlapping
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
theme(legend.position = "none")

# Đảo trục
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none")

# Đổi góc nghiêng
ggplot(data = asia2007, mapping = aes(x = country, y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

1.2.5. Biểu đồ kết hợp
ggplot(data = asia2007, mapping = aes(y = reorder(country, lifeExp), x = lifeExp)) +
geom_point(color = "blue", size = 2) +
geom_segment(aes(x = 40, xend = lifeExp,
y = reorder(country, lifeExp), yend = reorder(country, lifeExp)),
color = "red") +
labs(x = "Life Expectancy", y = "Country", subtitle = "Gapminder Data 2007") +
theme_minimal() +
scale_x_continuous(breaks = seq(40, 90, 5), limits = c(40, 90)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

1.3. Trực quan dữ liệu _ Biểu đồ động
1.3.1. Plotly
ggplot(data = asia2007, mapping = aes(y = reorder(country, lifeExp), x = lifeExp)) +
geom_point(color = "blue", size = 2) +
geom_segment(aes(x = 40, xend = lifeExp,
y = reorder(country, lifeExp), yend = reorder(country, lifeExp)),
color = "red") +
labs(x = "Life Expectancy", y = "Country", subtitle = "Gapminder Data 2007") +
theme_minimal() +
scale_x_continuous(breaks = seq(40, 90, 5), limits = c(40, 90)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) -> asia20007_dynamic
ggplotly(asia20007_dynamic)
1.3.2. Highcharter
# Dữ liệu
data(gapminder, package = "gapminder")
# Summary dữ liệu
gapminder %>% summary
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
# Biểu đồ thể hiện trung bình GDP bình quân đầu người (gdpPercap) của từng châu lục (continent) theo từng các năm (year)
# Tính toán GDP bình quân đầu người theo châu lục theo từng năm
data <- gapminder %>%
group_by(year, continent) %>%
summarise(gdp_per_cap = mean(gdpPercap) %>% round(0))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
data %>% head(10)
## # A tibble: 10 × 3
## # Groups: year [2]
## year continent gdp_per_cap
## <int> <fct> <dbl>
## 1 1952 Africa 1253
## 2 1952 Americas 4079
## 3 1952 Asia 5195
## 4 1952 Europe 5661
## 5 1952 Oceania 10298
## 6 1957 Africa 1385
## 7 1957 Americas 4616
## 8 1957 Asia 5788
## 9 1957 Europe 6963
## 10 1957 Oceania 11599
# Biến đổi dữ liệu từ dạng dọc sang ngang
data_new <- data %>%
spread(continent, gdp_per_cap)
data_new %>% head(10)
## # A tibble: 10 × 6
## # Groups: year [10]
## year Africa Americas Asia Europe Oceania
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1952 1253 4079 5195 5661 10298
## 2 1957 1385 4616 5788 6963 11599
## 3 1962 1598 4902 5729 8365 12696
## 4 1967 2050 5668 5971 10144 14495
## 5 1972 2340 6491 8187 12480 16417
## 6 1977 2586 7352 7791 14284 17284
## 7 1982 2482 7507 7434 15618 18555
## 8 1987 2283 7793 7608 17214 20448
## 9 1992 2282 8045 8640 17062 20894
## 10 1997 2379 8889 9834 19077 24024
# Tạo biểu đồ
h <- highchart() %>%
# Thêm trục X (year)
hc_xAxis(categories = data_new$year) %>%
# Thêm các đường line (GDP bình quân của các châu lục)
hc_add_series(name = "Africa",
data = data_new$Africa
) %>%
hc_add_series(name = "Americas",
data = data_new$Americas
) %>%
hc_add_series(name = "Asia",
data = data_new$Asia
) %>%
hc_add_series(name = "Europe",
data = data_new$Europe
) %>%
hc_add_series(name = "Oceania",
data = data_new$Oceania
) %>%
hc_colors(c("darkgreen", "darkred", "steelblue", "gray", "orange"))
h
# Customize biểu đồ
h1 <- h %>%
# Add tên tiêu đề
hc_title(text = "Average of GDP per capital by Country",
margin = 20,
align = "left",
style = list(color = "black", fontWeight = "bold")
) %>%
# Add subtitle
hc_subtitle(text = "1952 to 2007",
align = "left") %>%
# Add caption
hc_credits(enabled = T, # add caption
text = "Gapminder Data",
href = "http://gapminder.com") %>%
# Add chú giải
hc_legend(align = "right",
verticalAlign = "top",
layout = "vertical",
x = 0,
y = 100
) %>%
# Add đường so sánh
hc_tooltip(crosshairs = TRUE,
backgroundColor = "#FCFFC5",
shared = TRUE,
borderWidth = 4)
h1