Bài thực hành tuần 1

Thao tác trên dataset

data(trees)
d <-trees
str(d)
## 'data.frame':    31 obs. of  3 variables:
##  $ Girth : num  8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
##  $ Height: num  70 65 63 72 81 83 66 75 80 75 ...
##  $ Volume: num  10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
names(d) <- c('G','H','V')

Tới đây đã xong một số việc.

head(d)
##      G  H    V
## 1  8.3 70 10.3
## 2  8.6 65 10.3
## 3  8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
Vol <- d$V
Vol
##  [1] 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3 19.1
## [16] 22.2 33.8 27.4 25.7 24.9 34.5 31.7 36.3 38.3 42.6 55.4 55.7 58.3 51.5 51.0
## [31] 77.0
Vol15 <- Vol[Vol>15 & Vol<50]
Vol15
##  [1] 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3 19.1 22.2 33.8 27.4
## [16] 25.7 24.9 34.5 31.7 36.3 38.3 42.6
Voll <- d[d$V > 15 |d$H < 60,]
Voll
##       G  H    V
## 4  10.5 72 16.4
## 5  10.7 81 18.8
## 6  10.8 83 19.7
## 7  11.0 66 15.6
## 8  11.0 75 18.2
## 9  11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
d$logV <- log(d$V)
d$logH <- log(d$H)
d$Tong <- d$G + d$H + d$V

cut(Vol,4)
##  [1] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9]
##  [7] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9]
## [13] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (26.9,43.6] (26.9,43.6]
## [19] (10.1,26.9] (10.1,26.9] (26.9,43.6] (26.9,43.6] (26.9,43.6] (26.9,43.6]
## [25] (26.9,43.6] (43.6,60.3] (43.6,60.3] (43.6,60.3] (43.6,60.3] (43.6,60.3]
## [31] (60.3,77.1]
## Levels: (10.1,26.9] (26.9,43.6] (43.6,60.3] (60.3,77.1]
table(cut(Vol,4))
## 
## (10.1,26.9] (26.9,43.6] (43.6,60.3] (60.3,77.1] 
##          18           7           5           1

Bộ dữ liệu Iris

data("iris")
cut(iris$Petal.Length,4)
##   [1] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##   [6] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [11] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [16] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [21] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [26] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [31] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [36] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [41] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [46] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
##  [51] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43] 
##  [56] (3.95,5.43]  (3.95,5.43]  (2.48,3.95]  (3.95,5.43]  (2.48,3.95] 
##  [61] (2.48,3.95]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (2.48,3.95] 
##  [66] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (2.48,3.95] 
##  [71] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43] 
##  [76] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (2.48,3.95] 
##  [81] (2.48,3.95]  (2.48,3.95]  (2.48,3.95]  (3.95,5.43]  (3.95,5.43] 
##  [86] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43] 
##  [91] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (2.48,3.95]  (3.95,5.43] 
##  [96] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (2.48,3.95]  (3.95,5.43] 
## [101] (5.43,6.91]  (3.95,5.43]  (5.43,6.91]  (5.43,6.91]  (5.43,6.91] 
## [106] (5.43,6.91]  (3.95,5.43]  (5.43,6.91]  (5.43,6.91]  (5.43,6.91] 
## [111] (3.95,5.43]  (3.95,5.43]  (5.43,6.91]  (3.95,5.43]  (3.95,5.43] 
## [116] (3.95,5.43]  (5.43,6.91]  (5.43,6.91]  (5.43,6.91]  (3.95,5.43] 
## [121] (5.43,6.91]  (3.95,5.43]  (5.43,6.91]  (3.95,5.43]  (5.43,6.91] 
## [126] (5.43,6.91]  (3.95,5.43]  (3.95,5.43]  (5.43,6.91]  (5.43,6.91] 
## [131] (5.43,6.91]  (5.43,6.91]  (5.43,6.91]  (3.95,5.43]  (5.43,6.91] 
## [136] (5.43,6.91]  (5.43,6.91]  (5.43,6.91]  (3.95,5.43]  (3.95,5.43] 
## [141] (5.43,6.91]  (3.95,5.43]  (3.95,5.43]  (5.43,6.91]  (5.43,6.91] 
## [146] (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43]  (3.95,5.43] 
## Levels: (0.994,2.48] (2.48,3.95] (3.95,5.43] (5.43,6.91]
table(cut(iris$Petal.Length,4))
## 
## (0.994,2.48]  (2.48,3.95]  (3.95,5.43]  (5.43,6.91] 
##           50           11           61           28
table(cut(iris$Petal.Length,4),iris$Species)
##               
##                setosa versicolor virginica
##   (0.994,2.48]     50          0         0
##   (2.48,3.95]       0         11         0
##   (3.95,5.43]       0         39        22
##   (5.43,6.91]       0          0        28