TUẦN 2
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.0
## Warning: package 'tibble' was built under R version 4.3.0
## Warning: package 'tidyr' was built under R version 4.3.0
## Warning: package 'purrr' was built under R version 4.3.0
## Warning: package 'dplyr' was built under R version 4.3.0
## Warning: package 'stringr' was built under R version 4.3.0
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("iris")
tmp <- iris
cut(iris$Petal.Length,4)
## [1] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [6] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [11] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [16] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [21] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [26] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [31] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [36] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [41] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [46] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [51] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## [56] (3.95,5.43] (3.95,5.43] (2.48,3.95] (3.95,5.43] (2.48,3.95]
## [61] (2.48,3.95] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95]
## [66] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95]
## [71] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## [76] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95]
## [81] (2.48,3.95] (2.48,3.95] (2.48,3.95] (3.95,5.43] (3.95,5.43]
## [86] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## [91] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95] (3.95,5.43]
## [96] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95] (3.95,5.43]
## [101] (5.43,6.91] (3.95,5.43] (5.43,6.91] (5.43,6.91] (5.43,6.91]
## [106] (5.43,6.91] (3.95,5.43] (5.43,6.91] (5.43,6.91] (5.43,6.91]
## [111] (3.95,5.43] (3.95,5.43] (5.43,6.91] (3.95,5.43] (3.95,5.43]
## [116] (3.95,5.43] (5.43,6.91] (5.43,6.91] (5.43,6.91] (3.95,5.43]
## [121] (5.43,6.91] (3.95,5.43] (5.43,6.91] (3.95,5.43] (5.43,6.91]
## [126] (5.43,6.91] (3.95,5.43] (3.95,5.43] (5.43,6.91] (5.43,6.91]
## [131] (5.43,6.91] (5.43,6.91] (5.43,6.91] (3.95,5.43] (5.43,6.91]
## [136] (5.43,6.91] (5.43,6.91] (5.43,6.91] (3.95,5.43] (3.95,5.43]
## [141] (5.43,6.91] (3.95,5.43] (3.95,5.43] (5.43,6.91] (5.43,6.91]
## [146] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## Levels: (0.994,2.48] (2.48,3.95] (3.95,5.43] (5.43,6.91]
table(cut(iris$Petal.Length,4),iris$Species)
##
## setosa versicolor virginica
## (0.994,2.48] 50 0 0
## (2.48,3.95] 0 11 0
## (3.95,5.43] 0 39 22
## (5.43,6.91] 0 0 28
x=table(cut(iris$Petal.Length,4),iris$Species)
prop.table(x)
##
## setosa versicolor virginica
## (0.994,2.48] 0.33333333 0.00000000 0.00000000
## (2.48,3.95] 0.00000000 0.07333333 0.00000000
## (3.95,5.43] 0.00000000 0.26000000 0.14666667
## (5.43,6.91] 0.00000000 0.00000000 0.18666667
setosa <- iris[iris$Species=='setosa',]
notsetosa <- iris[!iris$Species=='setosa',]
iris$SLcoded <- cut(iris$Sepal.Length,breaks = c(4.29,4.8,7.2,7.911),labels = c('ngan','vua','dai'))
table(iris$SLcoded)
##
## ngan vua dai
## 16 126 8
summary(iris$Sepal.Length)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.300 5.100 5.800 5.843 6.400 7.900
var(iris$Sepal.Length)
## [1] 0.6856935
sd(iris$Sepal.Length)
## [1] 0.8280661
sum(iris$Sepal.Length)
## [1] 876.5
quantile(iris$Sepal.Length,0.3)#có 30% bộ dữ liệu nhỏ hơn 5.27
## 30%
## 5.27
aggregate(iris$Sepal.Length,list(iris$Species),FUN='mean')
## Group.1 x
## 1 setosa 5.006
## 2 versicolor 5.936
## 3 virginica 6.588
aggregate(iris$Sepal.Length,list(iris$Species),FUN='sd')
## Group.1 x
## 1 setosa 0.3524897
## 2 versicolor 0.5161711
## 3 virginica 0.6358796
aggregate(iris$Sepal.Length,list(iris$SLcoded),FUN='mean')
## Group.1 x
## 1 ngan 4.612500
## 2 vua 5.886508
## 3 dai 7.625000
#iris %>% group_by(Species) %>%
iris %>% group_by(Species) %>% summarise(m=mean(Sepal.Length))
## # A tibble: 3 × 2
## Species m
## <fct> <dbl>
## 1 setosa 5.01
## 2 versicolor 5.94
## 3 virginica 6.59
#mo phong dữ liệu
poi2 <- rpois(10,2)
poi2
## [1] 2 3 1 5 0 3 1 1 3 2
is.vector(poi2)
## [1] TRUE
poi2 <- as.data.frame(poi2)
tg <- seq (2,length=10)
tg <- seq (2,2,length=10)
th <- seq (1,1,length=10)
poi2$th <- th
d1 <- rpois(10,6)
d2 <- rnorm(10,6,2)
d3 <- rnorm(10,15,2)
d1 <- as.data.frame(d1)
d1$tg <- seq(1,1,length = 10)
d2 <- as.data.frame(d2)
d2$tg <- seq(2,2,length = 10)
d3 <- as.data.frame(d3)
d3$tg <- seq(3,3,length = 10)
d12 <- d1 %>% rename(dn = d1)
d22 <- d2 %>% rename(dn = d2)
d32<- d3 %>% rename(dn = d3)
tonghop <- rbind(d12,d22,d32)
TUẦN 1
data(trees)
d <- trees
str(d)
## 'data.frame': 31 obs. of 3 variables:
## $ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
## $ Height: num 70 65 63 72 81 83 66 75 80 75 ...
## $ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
names(d) <- c('G','H','V')
head(d)
## G H V
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
vol <- d$V
vol
## [1] 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3 19.1
## [16] 22.2 33.8 27.4 25.7 24.9 34.5 31.7 36.3 38.3 42.6 55.4 55.7 58.3 51.5 51.0
## [31] 77.0
vol15 <- vol[vol>15&vol<50]
vol15
## [1] 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3 19.1 22.2 33.8 27.4
## [16] 25.7 24.9 34.5 31.7 36.3 38.3 42.6
vol15h60 <- d[d$V>15|d$H<60,]
vol15h60
## G H V
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
d$logV <- log(d$V)
d$logH<- log(d$H)
d$sinV <- sin(d$V)
d$tong <- d$G+d$H+d$V
cut(vol,4)
## [1] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9]
## [7] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9]
## [13] (10.1,26.9] (10.1,26.9] (10.1,26.9] (10.1,26.9] (26.9,43.6] (26.9,43.6]
## [19] (10.1,26.9] (10.1,26.9] (26.9,43.6] (26.9,43.6] (26.9,43.6] (26.9,43.6]
## [25] (26.9,43.6] (43.6,60.3] (43.6,60.3] (43.6,60.3] (43.6,60.3] (43.6,60.3]
## [31] (60.3,77.1]
## Levels: (10.1,26.9] (26.9,43.6] (43.6,60.3] (60.3,77.1]
table(cut(vol,4))
##
## (10.1,26.9] (26.9,43.6] (43.6,60.3] (60.3,77.1]
## 18 7 5 1
rm(list = ls())
data("iris")
cut(iris$Petal.Length,4)
## [1] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [6] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [11] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [16] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [21] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [26] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [31] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [36] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [41] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [46] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48] (0.994,2.48]
## [51] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## [56] (3.95,5.43] (3.95,5.43] (2.48,3.95] (3.95,5.43] (2.48,3.95]
## [61] (2.48,3.95] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95]
## [66] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95]
## [71] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## [76] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95]
## [81] (2.48,3.95] (2.48,3.95] (2.48,3.95] (3.95,5.43] (3.95,5.43]
## [86] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## [91] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95] (3.95,5.43]
## [96] (3.95,5.43] (3.95,5.43] (3.95,5.43] (2.48,3.95] (3.95,5.43]
## [101] (5.43,6.91] (3.95,5.43] (5.43,6.91] (5.43,6.91] (5.43,6.91]
## [106] (5.43,6.91] (3.95,5.43] (5.43,6.91] (5.43,6.91] (5.43,6.91]
## [111] (3.95,5.43] (3.95,5.43] (5.43,6.91] (3.95,5.43] (3.95,5.43]
## [116] (3.95,5.43] (5.43,6.91] (5.43,6.91] (5.43,6.91] (3.95,5.43]
## [121] (5.43,6.91] (3.95,5.43] (5.43,6.91] (3.95,5.43] (5.43,6.91]
## [126] (5.43,6.91] (3.95,5.43] (3.95,5.43] (5.43,6.91] (5.43,6.91]
## [131] (5.43,6.91] (5.43,6.91] (5.43,6.91] (3.95,5.43] (5.43,6.91]
## [136] (5.43,6.91] (5.43,6.91] (5.43,6.91] (3.95,5.43] (3.95,5.43]
## [141] (5.43,6.91] (3.95,5.43] (3.95,5.43] (5.43,6.91] (5.43,6.91]
## [146] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43] (3.95,5.43]
## Levels: (0.994,2.48] (2.48,3.95] (3.95,5.43] (5.43,6.91]
table(cut(iris$Petal.Length,4),iris$Species)
##
## setosa versicolor virginica
## (0.994,2.48] 50 0 0
## (2.48,3.95] 0 11 0
## (3.95,5.43] 0 39 22
## (5.43,6.91] 0 0 28
x=table(cut(iris$Petal.Length,4),iris$Species)
prop.table(x)
##
## setosa versicolor virginica
## (0.994,2.48] 0.33333333 0.00000000 0.00000000
## (2.48,3.95] 0.00000000 0.07333333 0.00000000
## (3.95,5.43] 0.00000000 0.26000000 0.14666667
## (5.43,6.91] 0.00000000 0.00000000 0.18666667
setosa <- iris[iris$Species=='setosa',]
setosa <- iris[!iris$Species=='setosa',]