Thao tác trên dataset
## 'data.frame': 31 obs. of 3 variables:
## $ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
## $ Height: num 70 65 63 72 81 83 66 75 80 75 ...
## $ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## [1] 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3 19.1
## [16] 22.2 33.8 27.4 25.7 24.9 34.5 31.7 36.3 38.3 42.6 55.4 55.7 58.3 51.5 51.0
## [31] 77.0
## [1] 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3 19.1 22.2 33.8 27.4
## [16] 25.7 24.9 34.5 31.7 36.3 38.3 42.6
## G H V
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
## [1] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9]
## [7] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9]
## [13] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (26.9,43.6] (26.9,43.6]
## [19] (10.1,26.9] (10.1,26.9] (26.9,43.6] (26.9,43.6] (26.9,43.6] (26.9,43.6]
## [25] (26.9,43.6] (43.6,60.3] (43.6,60.3] (43.6,60.3] (43.6,60.3] (43.6,60.3]
## [31] (60.3,77.1]
## Levels: (10.1,26.9] (26.9,43.6] (43.6,60.3] (60.3,77.1]
##
## (10.1,26.9] (26.9,43.6] (43.6,60.3] (60.3,77.1]
## 18 7 5 1
Bộ dữ liệu iris (thông số về hoa lan)
## Warning: package 'utf8' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## [1] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (5.2,6.1] (4.3,5.2]
## [8] (4.3,5.2] (4.3,5.2] (4.3,5.2] (5.2,6.1] (4.3,5.2] (4.3,5.2] (4.3,5.2]
## [15] (5.2,6.1] (5.2,6.1] (5.2,6.1] (4.3,5.2] (5.2,6.1] (4.3,5.2] (5.2,6.1]
## [22] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2]
## [29] (4.3,5.2] (4.3,5.2] (4.3,5.2] (5.2,6.1] (4.3,5.2] (5.2,6.1] (4.3,5.2]
## [36] (4.3,5.2] (5.2,6.1] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2]
## [43] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (4.3,5.2] (5.2,6.1]
## [50] (4.3,5.2] (6.1,7] (6.1,7] (6.1,7] (5.2,6.1] (6.1,7] (5.2,6.1]
## [57] (6.1,7] (4.3,5.2] (6.1,7] (4.3,5.2] (4.3,5.2] (5.2,6.1] (5.2,6.1]
## [64] (5.2,6.1] (5.2,6.1] (6.1,7] (5.2,6.1] (5.2,6.1] (6.1,7] (5.2,6.1]
## [71] (5.2,6.1] (5.2,6.1] (6.1,7] (5.2,6.1] (6.1,7] (6.1,7] (6.1,7]
## [78] (6.1,7] (5.2,6.1] (5.2,6.1] (5.2,6.1] (5.2,6.1] (5.2,6.1] (5.2,6.1]
## [85] (5.2,6.1] (5.2,6.1] (6.1,7] (6.1,7] (5.2,6.1] (5.2,6.1] (5.2,6.1]
## [92] (5.2,6.1] (5.2,6.1] (4.3,5.2] (5.2,6.1] (5.2,6.1] (5.2,6.1] (6.1,7]
## [99] (4.3,5.2] (5.2,6.1] (6.1,7] (5.2,6.1] (7,7.9] (6.1,7] (6.1,7]
## [106] (7,7.9] (4.3,5.2] (7,7.9] (6.1,7] (7,7.9] (6.1,7] (6.1,7]
## [113] (6.1,7] (5.2,6.1] (5.2,6.1] (6.1,7] (6.1,7] (7,7.9] (7,7.9]
## [120] (5.2,6.1] (6.1,7] (5.2,6.1] (7,7.9] (6.1,7] (6.1,7] (7,7.9]
## [127] (6.1,7] (5.2,6.1] (6.1,7] (7,7.9] (7,7.9] (7,7.9] (6.1,7]
## [134] (6.1,7] (5.2,6.1] (7,7.9] (6.1,7] (6.1,7] (5.2,6.1] (6.1,7]
## [141] (6.1,7] (6.1,7] (5.2,6.1] (6.1,7] (6.1,7] (6.1,7] (6.1,7]
## [148] (6.1,7] (6.1,7] (5.2,6.1]
## Levels: (4.3,5.2] (5.2,6.1] (6.1,7] (7,7.9]
setosa <- iris[iris$Species=="setosa",] # = là gán, == là so sánh
notsetosa <- iris[!iris$Species=="setosa",] # ! là phủ định
summary(iris$Sepal.Length) # 5.1 - có 25% nhỏ hơn 5.1,...## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.300 5.100 5.800 5.843 6.400 7.900
iris$Slcoded <- cut(iris$Sepal.Length, breaks = c(4.3,4.8,7.2,7.8), labels = c('Ngắn','Vừa','Dài')) # cắt có điều kiện, breaks là cắt, c() chọn điểm mút theo ý muốn, labels là tùy chọn đặt tên
table(iris$Slcoded) # lập bảng tần số xong đưa ra nhận xét##
## Ngắn Vừa Dài
## 15 126 7
## Warning in Ops.factor(iris$Slcoded, summary(iris$Sepal.Length)): '/' not
## meaningful for factors
## < table of extent 0 >
## [1] 5.843333
## [1] 0.6856935
## [1] 0.8280661
## [1] 876.5
quantile(iris$Sepal.Length, .31) # tìm mốc mà sẽ có 31% nhỏ hơn, thay vì các mốc đặt biệt 25, 50, 75%## 31%
## 5.4
##
## setosa versicolor virginica
## 50 50 50
## Group.1 x
## 1 setosa 5.006
## 2 versicolor 5.936
## 3 virginica 6.588
aggregate(iris$Sepal.Length, list(iris$Sepal.Length), FUN = 'sd') # tính độ lệch chuẩn cho từng loài## Group.1 x
## 1 4.3 NA
## 2 4.4 0
## 3 4.5 NA
## 4 4.6 0
## 5 4.7 0
## 6 4.8 0
## 7 4.9 0
## 8 5.0 0
## 9 5.1 0
## 10 5.2 0
## 11 5.3 NA
## 12 5.4 0
## 13 5.5 0
## 14 5.6 0
## 15 5.7 0
## 16 5.8 0
## 17 5.9 0
## 18 6.0 0
## 19 6.1 0
## 20 6.2 0
## 21 6.3 0
## 22 6.4 0
## 23 6.5 0
## 24 6.6 0
## 25 6.7 0
## 26 6.8 0
## 27 6.9 0
## 28 7.0 NA
## 29 7.1 NA
## 30 7.2 0
## 31 7.3 NA
## 32 7.4 NA
## 33 7.6 NA
## 34 7.7 0
## 35 7.9 NA
## Group.1 x
## 1 Ngắn 4.633333
## 2 Vừa 5.886508
## 3 Dài 7.585714
## Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max.
## 1 setosa 4.300 4.800 5.000 5.006 5.200 5.800
## 2 versicolor 4.900 5.600 5.900 5.936 6.300 7.000
## 3 virginica 4.900 6.225 6.500 6.588 6.900 7.900
## # A tibble: 150 × 6
## # Groups: Species [3]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Slcoded
## <dbl> <dbl> <dbl> <dbl> <fct> <fct>
## 1 5.1 3.5 1.4 0.2 setosa Vừa
## 2 4.9 3 1.4 0.2 setosa Vừa
## 3 4.7 3.2 1.3 0.2 setosa Ngắn
## 4 4.6 3.1 1.5 0.2 setosa Ngắn
## 5 5 3.6 1.4 0.2 setosa Vừa
## 6 5.4 3.9 1.7 0.4 setosa Vừa
## 7 4.6 3.4 1.4 0.3 setosa Ngắn
## 8 5 3.4 1.5 0.2 setosa Vừa
## 9 4.4 2.9 1.4 0.2 setosa Ngắn
## 10 4.9 3.1 1.5 0.1 setosa Vừa
## # ℹ 140 more rows
## # A tibble: 150 × 6
## # Groups: Species [3]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Slcoded
## <dbl> <dbl> <dbl> <dbl> <fct> <fct>
## 1 5.1 3.5 1.4 0.2 setosa Vừa
## 2 4.9 3 1.4 0.2 setosa Vừa
## 3 4.7 3.2 1.3 0.2 setosa Ngắn
## 4 4.6 3.1 1.5 0.2 setosa Ngắn
## 5 5 3.6 1.4 0.2 setosa Vừa
## 6 5.4 3.9 1.7 0.4 setosa Vừa
## 7 4.6 3.4 1.4 0.3 setosa Ngắn
## 8 5 3.4 1.5 0.2 setosa Vừa
## 9 4.4 2.9 1.4 0.2 setosa Ngắn
## 10 4.9 3.1 1.5 0.1 setosa Vừa
## # ℹ 140 more rows
## # A tibble: 35 × 2
## Sepal.Length n
## <dbl> <dbl>
## 1 4.3 NA
## 2 4.4 0
## 3 4.5 NA
## 4 4.6 0
## 5 4.7 0
## 6 4.8 0
## 7 4.9 0
## 8 5 0
## 9 5.1 0
## 10 5.2 0
## # ℹ 25 more rows
tg <- seq(2,2, length=10)
tg1 <- seq(2, length=10)
d1 <- rpois(10,6)
d2 <- rnorm(10,6,2)
d3 <- rnorm(10,15,2)
d1 <- as.data.frame(d1)
d1$tg <- seq(1,1, length = 10)
d2 <- as.data.frame(d2)
d2$tg <- seq(2,2, length = 10)
d3 <- as.data.frame(d3)
d3$tg <- seq(3,3, length = 10)
colnames(d1) <- c("d", "tg")
colnames(d2) <- c("d", "tg")
colnames(d3) <- c("d", "tg")
cbind(d1,d2,d3)## d tg d tg d tg
## 1 4 1 7.257990 2 14.31439 3
## 2 7 1 6.304555 2 12.32181 3
## 3 8 1 5.674464 2 14.66067 3
## 4 7 1 2.949578 2 15.56399 3
## 5 3 1 6.004711 2 15.58772 3
## 6 6 1 5.703548 2 14.89291 3
## 7 3 1 1.786781 2 14.24094 3
## 8 7 1 4.030775 2 12.23104 3
## 9 1 1 6.337933 2 19.01558 3
## 10 7 1 4.519477 2 14.50878 3
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species Slcoded
## setosa :50 Ngắn: 15
## versicolor:50 Vừa :126
## virginica :50 Dài : 7
## NA's: 2
##
##
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.2 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
data("billboard")
bill <- billboard
# Bảng pivot longer - xóa bớt cột, nối dữ liệu thành một cột
# dưới đây là chuyển từ bảng 'wide' sang bảng 'long'
m1 <- bill |> pivot_longer(cols = starts_with('wk'), names_to = 'week', values_to = 'rank') # lấy tất cả dữ liệu
m2 <- bill |> pivot_longer(cols = starts_with('wk'), names_to = 'week', values_to = 'rank', values_drop_na = T) # lấy các dữ liệu được xếp hạng, bài hát nào không được xếp hạng "na" sẽ bị loại bỏ
m3 <- bill |> pivot_longer(cols = starts_with('wk'), names_to = 'week', values_to = 'rank', values_drop_na = T) |> mutate(week = parse_number(week)) # parse_number - cột week chỉ lấy số, không lấy wk
m3$aa <- seq(1,1,length = length(m2$artist)) # thêm cột aa vào m3, mà cột aa là một dãy số toàn số 1 "1,1", độ dài = độ dài của artist
mm <- mutate(m3,bb = m3$rank + m3$aa) # mutate - thêm cột mới, gán mutate vào mm, lấy dữ liệu từ m3, thêm cột bb trong đó rank + aa lấy từ m3
# về làm 'long' sang 'wide'## Warning: package 'lme4' was built under R version 4.2.3
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## S1WantCurse S1WantScold S1WantShout S2WantCurse S2WantScold S2WantShout
## 316 316 316 316 316 316
## S3WantCurse S3WantScold S3WantShout S4wantCurse S4WantScold S4WantShout
## 316 316 316 316 316 316
## S1DoCurse S1DoScold S1DoShout S2DoCurse S2DoScold S2DoShout
## 316 316 316 316 316 316
## S3DoCurse S3DoScold S3DoShout S4DoCurse S4DoScold S4DoShout
## 316 316 316 316 316 316
## [1] "S1WantCurse" "S1WantScold" "S1WantShout" "S2WantCurse" "S2WantScold"
## [6] "S2WantShout" "S3WantCurse" "S3WantScold" "S3WantShout" "S4wantCurse"
## [11] "S4WantScold" "S4WantShout" "S1DoCurse" "S1DoScold" "S1DoShout"
## [16] "S2DoCurse" "S2DoScold" "S2DoShout" "S3DoCurse" "S3DoScold"
## [21] "S3DoShout" "S4DoCurse" "S4DoScold" "S4DoShout"