Bài tập 1:

Cho ma trận A được biểu diễn như sau:

# a)
A <- matrix(c(1, 1, 3, 5, 2, 6, -2, -1, -3), nrow = 3, ncol = 3, byrow = TRUE)
print(A)
##      [,1] [,2] [,3]
## [1,]    1    1    3
## [2,]    5    2    6
## [3,]   -2   -1   -3
A2 = A%*%A
print(A2)
##      [,1] [,2] [,3]
## [1,]    0    0    0
## [2,]    3    3    9
## [3,]   -1   -1   -3
A3 = A2%*%A
print(A3)
##      [,1] [,2] [,3]
## [1,]    0    0    0
## [2,]    0    0    0
## [3,]    0    0    0
# b)
A[, 3] <- A[, 1] + A[, 2]
print(A)
##      [,1] [,2] [,3]
## [1,]    1    1    2
## [2,]    5    2    7
## [3,]   -2   -1   -3

Bài tập 2:

x <- 0:4
print(x)
## [1] 0 1 2 3 4
print(outer(x, x, FUN = "+"))
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    1    2    3    4
## [2,]    1    2    3    4    5
## [3,]    2    3    4    5    6
## [4,]    3    4    5    6    7
## [5,]    4    5    6    7    8

Bài tập 3:

c1 <- c(1, 2, 3, 4, 5)
c2 <- c(2, 1, 2, 3, 4)
c3 <- c(3, 2, 1, 2, 3)
c4 <- c(4, 3, 2, 1, 2)
c5 <- c(5, 4, 3, 2, 1)

A <- matrix(c(c1, c2, c3, c4, c5), nrow = length(c1), byrow = TRUE)
print(A)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]    2    1    2    3    4
## [3,]    3    2    1    2    3
## [4,]    4    3    2    1    2
## [5,]    5    4    3    2    1
y <- c(7, -1, -3, 5, 17)

x <- solve(A, y)
print(x)
## [1] -2  3  5  2 -4

Bài tập 4:

i <- 1:20
j <- 1:5
sum(sum((i**4)/(3 + j)))
## [1] 117391

Bài tập 5:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggplot2)
library(gapminder)
data("gapminder")

1.

gapminder %>%
  filter(country == "Vietnam") -> vietnamdata
head(vietnamdata)
## # A tibble: 6 × 6
##   country continent  year lifeExp      pop gdpPercap
##   <fct>   <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Vietnam Asia       1952    40.4 26246839      605.
## 2 Vietnam Asia       1957    42.9 28998543      676.
## 3 Vietnam Asia       1962    45.4 33796140      772.
## 4 Vietnam Asia       1967    47.8 39463910      637.
## 5 Vietnam Asia       1972    50.3 44655014      700.
## 6 Vietnam Asia       1977    55.8 50533506      714.

2.

gapminder %>%
  filter(country == "Vietnam") %>%
  summarise(mean_lifeExp = mean(lifeExp))
## # A tibble: 1 × 1
##   mean_lifeExp
##          <dbl>
## 1         57.5

3.

# Châu Á, 2007, tổng GDP
gapminder %>%
  filter(continent == "Asia", year == 2007) %>%
  mutate(totalGDP = gdpPercap * pop) -> asia2007

ggplot(data = asia2007, mapping = aes(x = reorder(country, -totalGDP), y = totalGDP, fill = country)) + 
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x = "", y = "Total GDP of Asia on 2007") -> graph1

# Châu ÂU, 2007, tổng GDP
gapminder %>%
  filter(continent == "Europe", year == 2007) %>%
  mutate(totalGDP = gdpPercap * pop) -> europe2007

ggplot(data = europe2007, mapping = aes(x = reorder(country, -totalGDP), y = totalGDP, fill = country)) +
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x = "", y = "Total GDP of Europe on 2007") -> graph2

# So sánh
grid.arrange(graph1, graph2, ncol = 2)

Bài tập 6:

library(ggplot2)
library(dplyr)
library(scales)
library(gapminder)

1. Tổng quan dữ liệu

# Các biến với tên và thông tin tổng quát
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num [1:1704] 28.8 30.3 32 34 36.1 ...
##  $ pop      : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
# Thông tin tổng quan về các biến
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
# Thông tin theo châu lục và thời gian
table(gapminder$continent, gapminder$year)
##           
##            1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
##   Africa     52   52   52   52   52   52   52   52   52   52   52   52
##   Americas   25   25   25   25   25   25   25   25   25   25   25   25
##   Asia       33   33   33   33   33   33   33   33   33   33   33   33
##   Europe     30   30   30   30   30   30   30   30   30   30   30   30
##   Oceania     2    2    2    2    2    2    2    2    2    2    2    2
with(gapminder, {table(continent, year)})
##           year
## continent  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
##   Africa     52   52   52   52   52   52   52   52   52   52   52   52
##   Americas   25   25   25   25   25   25   25   25   25   25   25   25
##   Asia       33   33   33   33   33   33   33   33   33   33   33   33
##   Europe     30   30   30   30   30   30   30   30   30   30   30   30
##   Oceania     2    2    2    2    2    2    2    2    2    2    2    2

2. 1D : Biểu đồ thanh cho các biến rời rạc

ggplot(gapminder, aes(x=continent)) + geom_bar()

# Thêm màu sắc
ggplot(gapminder, aes(x=continent, fill=continent)) + geom_bar()

# Đổi thành số lượng quốc gia + Thay nhãn trục y + Bỏ chú thích mặc định + Lưu vào 1 biến
ggplot(gapminder, aes(x=continent, fill=continent)) + 
    geom_bar(aes(y = ..count../12)) +
    labs(y = "Number of countries") +
    guides(fill = FALSE) -> mybar
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# vẽ các tần số theo thang căn bậc hai
mybar + coord_trans(y="sqrt")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Hoán đổi trục
mybar + coord_flip()

# Ánh xạ (X,Y) tới tọa độ cực (bán kính, góc) là biểu đồ coxcomb
mybar + coord_polar()

## 3. 1D : Đồ thị mật độ cho các biến liên tục

#  Tính toán và vẽ biểu đồ phân bố tần số được làm mịn
ggplot(data=gapminder, aes(x=lifeExp)) + 
  geom_density()

# Thay đổi độ dày của đường kẻ + Thêm màu tô + Làm cho màu tô trong suốt một phần
ggplot(data=gapminder, aes(x=lifeExp)) + geom_density(size = 1.5, fill = "pink", alpha = 0.3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(data=gapminder, aes(x=lifeExp)) + 
    geom_density(size=1.5, fill="pink", alpha=0.5) +
    geom_histogram(aes(y=..density..), binwidth=4, color="black", fill="lightblue", alpha=0.5)

#Sự khác biệt theo lục địa
ggplot(data=gapminder, aes(x=lifeExp, fill=continent)) +
    geom_density(alpha=0.3)

# geom_boxplot()
gap1 <- ggplot(data=gapminder, aes(x=continent, y=lifeExp, fill=continent))
gap1 + geom_boxplot(outlier.size=2)

## Tự làm

# Xoá chú thích + Đổi chiều + geom_violin()
gap2 <- ggplot(data=gapminder, aes(x=continent, y=lifeExp, fill=continent))
gap2 + guides(fill = FALSE) + coord_flip() + geom_violin()

#Thứ tự hiệu ứng
gapminder %>% 
    mutate(continent = reorder(continent, lifeExp, FUN=median))
## # A tibble: 1,704 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # ℹ 1,694 more rows
gapminder %>% 
    mutate(continent = reorder(continent, lifeExp, FUN=median)) %>%
    ggplot(aes(x=continent, y=lifeExp, fill=continent)) +
    geom_boxplot(outlier.size=2)

# Với GDP
ggplot(data=gapminder, aes(x=gdpPercap)) + 
    geom_density()

## Tự làm

## Tự làm
ggplot(data=gapminder, aes(x=log10(gdpPercap), fill=continent)) +
    geom_density(alpha=0.3)

gap3 <- ggplot(data = gapminder, aes(x = continent, y = log10(gdpPercap), fill = continent))
gap3 + geom_boxplot(outlier.size=2)

4. 1.5D : Biểu đồ chuỗi thời gian

ggplot(gapminder, aes(x=year, y=lifeExp, group=country)) +
    geom_line()

#  Nhóm dữ liệu theo quốc gia
gapminder %>%
    group_by(country) %>%
    summarise(sd = sd(lifeExp), IQR = IQR(lifeExp)) %>% 
    top_n(8) %>%
    arrange(desc(sd))
## Selecting by IQR
## # A tibble: 8 × 3
##   country               sd   IQR
##   <fct>              <dbl> <dbl>
## 1 Oman                14.1  25.5
## 2 Vietnam             12.2  21.2
## 3 Saudi Arabia        11.9  20.3
## 4 Indonesia           11.5  18.4
## 5 Libya               11.4  19.8
## 6 Yemen, Rep.         11.0  19.7
## 7 West Bank and Gaza  11.0  19.3
## 8 Tunisia             10.7  19.1
gapminder %>%
    group_by(continent, year) %>%
    summarise(lifeExp = median(lifeExp)) %>% head()
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 3
## # Groups:   continent [1]
##   continent  year lifeExp
##   <fct>     <int>   <dbl>
## 1 Africa     1952    38.8
## 2 Africa     1957    40.6
## 3 Africa     1962    42.6
## 4 Africa     1967    44.7
## 5 Africa     1972    47.0
## 6 Africa     1977    49.3
gapminder %>%
    group_by(continent, year) %>%
    summarise(lifeExp=median(lifeExp)) %>%
    ggplot(aes(x=year, y=lifeExp, color=continent)) +
     geom_line(size=1) + 
     geom_point(size=1.5)
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.

gapminder %>%
    group_by(continent, year) %>%
    summarise(lifeExp=median(lifeExp)) -> gapyear
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.
ggplot(gapyear, aes(x=year, y=lifeExp, color=continent)) +
    geom_point(size=1.5) +
    geom_smooth(aes(fill=continent), method="lm")
## `geom_smooth()` using formula = 'y ~ x'

## 5. 2D: Biểu đồ phân tán
plt <- ggplot(data=gapminder,
              aes(x=gdpPercap, y=lifeExp))
plt

plt + geom_point()

plt + geom_point(aes(color=continent))

plt + geom_point(aes(color=continent)) +
    geom_smooth(method="loess") 
## `geom_smooth()` using formula = 'y ~ x'

plt + geom_point(aes(color=continent)) +
    geom_smooth(method="loess") +
    scale_x_log10()
## `geom_smooth()` using formula = 'y ~ x'

library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
g <- crosstalk::SharedData$new(gapminder, ~continent)
gg <- ggplot(g, aes(gdpPercap, lifeExp, color = continent, frame = year)) +
  geom_point(aes(size = pop, ids = country)) +
  geom_smooth(se = FALSE, method = "lm") +
  scale_x_log10()
## Warning in geom_point(aes(size = pop, ids = country)): Ignoring unknown
## aesthetics: ids
ggplotly(gg) %>% 
  highlight("plotly_hover")
## `geom_smooth()` using formula = 'y ~ x'
## Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_hover'). You can change this default via the `highlight()` function.