Đổi qua data(diamonds)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## Warning: package 'lubridate' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("diamonds")
diamonds|>ggplot(aes(x=carat,y=price))+geom_point(na.rm=T,color="red")+xlab('carat')+ylab('price')
diamonds |> ggplot(aes(x = carat, y = price)) +
geom_smooth(formula = y ~ x, method = 'lm', color = 'orange') +
geom_point(color = 'pink') +
labs(title = 'Đồ Thị Dạng Scatter', x = 'trọng lượng', y = 'giá')
diamonds |> ggplot(aes(x = carat, y = price)) +
geom_point(color = 'yellow') +
geom_line(color = 'purple')
diamonds |> ggplot(aes(x = carat, y = price)) +
geom_point(aes(shape = cut), na.rm = T, color = 'red', size = 3) +
xlab('trọng lượng') +
ylab('giá')
## Warning: Using shapes for an ordinal variable is not advised
diamonds |> ggplot(aes(x = carat, y = price)) +
geom_point(aes(color = cut), na.rm = T) +
geom_smooth(formula = y ~ x, method = 'lm', na.rm = T) +
facet_grid(. ~ cut) +
xlab('trọng lượng') +
ylab('giá')
diamonds|> ggplot(aes(x=color))+ geom_bar(fill='orange')
diamonds|> ggplot(aes(x=price))+geom_bar(aes(fill=cut))
diamonds|> ggplot(aes(x=cut))+geom_bar(aes(fill=clarity),position = "dodge")
library(tidyverse)
bảng tổng hợp dạng long về trung bình các tập dữ liệu
anscombe_long <- pivot_longer(anscombe,cols = -c(y1,y3), names_to = "Variable", values_to = "Value")
x1_avgD <- anscombe_long %>%
filter(Variable == "x1")%>%
group_by(y1) %>%
summarize(Average_x1 = mean(Value, na.rm = TRUE))
print(x1_avgD)
## # A tibble: 11 × 2
## y1 Average_x1
## <dbl> <dbl>
## 1 4.26 4
## 2 4.82 7
## 3 5.68 5
## 4 6.95 8
## 5 7.24 6
## 6 7.58 13
## 7 8.04 10
## 8 8.33 11
## 9 8.81 9
## 10 9.96 14
## 11 10.8 12
giống với long ta tạo bảng wide cũng tương tự dùng lệnh pivot_wider
library(tidyr)
wider_x1y1 <- anscombe %>%
group_by(y1) %>%
summarize(mean_x1 = mean(x1, na.rm = TRUE)) %>%
pivot_wider(names_from = "y1", values_from = "mean_x1")
print(wider_x1y1)
## # A tibble: 1 × 11
## `4.26` `4.82` `5.68` `6.95` `7.24` `7.58` `8.04` `8.33` `8.81` `9.96` `10.84`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 7 5 8 6 13 10 11 9 14 12
Với dữ liệu của tuần 1 tiếp tục làm tuần 2
data(anscombe)
t <- anscombe
names(t) <- c("x1","x2","x3","x4","y1","y2","y3","y4")
t == 14
## x1 x2 x3 x4 y1 y2 y3 y4
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
var(anscombe)
## x1 x2 x3 x4 y1 y2 y3 y4
## x1 11.000 11.000 11.000 -5.500 5.501000 5.500000 5.49700 -2.115000
## x2 11.000 11.000 11.000 -5.500 5.501000 5.500000 5.49700 -2.115000
## x3 11.000 11.000 11.000 -5.500 5.501000 5.500000 5.49700 -2.115000
## x4 -5.500 -5.500 -5.500 11.000 -3.565000 -4.841000 -2.32100 5.499000
## y1 5.501 5.501 5.501 -3.565 4.127269 3.095609 1.93343 -2.017731
## y2 5.500 5.500 5.500 -4.841 3.095609 4.127629 2.42524 -1.972351
## y3 5.497 5.497 5.497 -2.321 1.933430 2.425240 4.12262 -0.641000
## y4 -2.115 -2.115 -2.115 5.499 -2.017731 -1.972351 -0.64100 4.123249
sd(anscombe$hp)
## [1] NA
summary(t)
## x1 x2 x3 x4 y1
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8 Min. : 4.260
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8 1st Qu.: 6.315
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8 Median : 7.580
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9 Mean : 7.501
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8 3rd Qu.: 8.570
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19 Max. :10.840
## y2 y3 y4
## Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median :8.140 Median : 7.11 Median : 7.040
## Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :9.260 Max. :12.74 Max. :12.500
anscombe
data(anscombe)
t <- anscombe
str(t)
## 'data.frame': 11 obs. of 8 variables:
## $ x1: num 10 8 13 9 11 14 6 4 12 7 ...
## $ x2: num 10 8 13 9 11 14 6 4 12 7 ...
## $ x3: num 10 8 13 9 11 14 6 4 12 7 ...
## $ x4: num 8 8 8 8 8 8 8 19 8 8 ...
## $ y1: num 8.04 6.95 7.58 8.81 8.33 ...
## $ y2: num 9.14 8.14 8.74 8.77 9.26 8.1 6.13 3.1 9.13 7.26 ...
## $ y3: num 7.46 6.77 12.74 7.11 7.81 ...
## $ y4: num 6.58 5.76 7.71 8.84 8.47 7.04 5.25 12.5 5.56 7.91 ...
head(t)
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
tail(t)
## x1 x2 x3 x4 y1 y2 y3 y4
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
Y1 <- t$y1
Y1 > 8
## [1] TRUE FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE FALSE FALSE
Y1 > 7 & Y1 < 9
## [1] TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
table(cut(Y1,3))
##
## (4.25,6.45] (6.45,8.65] (8.65,10.8]
## 3 5 3
t[3,3]
## [1] 13