Matrix
# concatenate vector
a <- c(1,2)
a <- c(a, 3)
a
## [1] 1 2 3
b <- c(4,5)
d <- c(a,b)
d
## [1] 1 2 3 4 5
# use vector to create matrix
kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)
c(kevin, marry, jerry)
## [1] 85 73 72 64 59 66
matrix(c(kevin, marry, jerry), nrow = 3)
## [,1] [,2]
## [1,] 85 64
## [2,] 73 59
## [3,] 72 66
?matrix
## starting httpd help server ... done
matrix(c(kevin, marry, jerry), nrow = 3 ,byrow = TRUE)
## [,1] [,2]
## [1,] 85 73
## [2,] 72 64
## [3,] 59 66
# create matrix with byrow=FALSEand byrow=TRUE
1:9
## [1] 1 2 3 4 5 6 7 8 9
matrix(1:9, ncol = 4, nrow = 4)
## Warning in matrix(1:9, ncol = 4, nrow = 4): 資料長度 [9] 並非列數量 [4] 的
## 因數或倍數
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 4
## [2,] 2 6 1 5
## [3,] 3 7 2 6
## [4,] 4 8 3 7
matrix(1:9, nrow = 3)
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
m <- matrix(1:9, nrow = 3, byrow = TRUE)
# filter matrix element by position
m[1,]
## [1] 1 2 3
m[,1]
## [1] 1 4 7
m[2,2]
## [1] 5
# add column names and row names to matrix
mat <- matrix(c(kevin, marry, jerry), nrow = 3 ,byrow = TRUE)
mat
## [,1] [,2]
## [1,] 85 73
## [2,] 72 64
## [3,] 59 66
rownames(mat) <- c('kevin', 'marry', 'jerry')
mat
## [,1] [,2]
## kevin 85 73
## marry 72 64
## jerry 59 66
colnames(mat) <- c('first', 'second')
mat
## first second
## kevin 85 73
## marry 72 64
## jerry 59 66
mat['kevin',]
## first second
## 85 73
mat[,'first']
## kevin marry jerry
## 85 72 59
mat['marry', 'second']
## [1] 64
mat <- matrix(c(kevin, marry, jerry), nrow = 3 ,byrow = TRUE, dimnames = list( c('kevin', 'marry', 'jerry') , c('first', 'second') ) )
# get basic info of a matrix
dim(mat)
## [1] 3 2
nrow(mat)
## [1] 3
ncol(mat)
## [1] 2
# filter matrix by position, boolean and name
mat[1,]
## first second
## 85 73
mat[c(TRUE, FALSE, FALSE),]
## first second
## 85 73
mat['kevin',]
## first second
## 85 73
mat[,1]
## kevin marry jerry
## 85 72 59
mat[,c(TRUE, FALSE)]
## kevin marry jerry
## 85 72 59
mat[,'first']
## kevin marry jerry
## 85 72 59
mat[c(2,3),]
## first second
## marry 72 64
## jerry 59 66
mat[2:3,]
## first second
## marry 72 64
## jerry 59 66
mat[c(FALSE, TRUE, TRUE), ]
## first second
## marry 72 64
## jerry 59 66
mat[c('marry', 'jerry'),]
## first second
## marry 72 64
## jerry 59 66
mat[2,1]
## [1] 72
mat['marry', 'first']
## [1] 72
# row bind
mat2 <- rbind(mat, c(78, 63))
rownames(mat2)[4] <- 'sam'
mat2
## first second
## kevin 85 73
## marry 72 64
## jerry 59 66
## sam 78 63
# column bind
mat3 <- cbind(mat, c(82, 77, 70))
mat3
## first second
## kevin 85 73 82
## marry 72 64 77
## jerry 59 66 70
colnames(mat3)[3]
## [1] ""
colnames(mat3)[3] <- 'third'
mat3
## first second third
## kevin 85 73 82
## marry 72 64 77
## jerry 59 66 70
# matrix computation
m1 <-matrix(1:4, byrow=TRUE, nrow=2)
m1
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
m2 <-matrix(5:8, byrow=TRUE, nrow=2)
m2
## [,1] [,2]
## [1,] 5 6
## [2,] 7 8
m1 + m2
## [,1] [,2]
## [1,] 6 8
## [2,] 10 12
m1 - m2
## [,1] [,2]
## [1,] -4 -4
## [2,] -4 -4
m1 * m2
## [,1] [,2]
## [1,] 5 12
## [2,] 21 32
m1 / m2
## [,1] [,2]
## [1,] 0.2000000 0.3333333
## [2,] 0.4285714 0.5000000
mat2
## first second
## kevin 85 73
## marry 72 64
## jerry 59 66
## sam 78 63
rowSums(mat2)
## kevin marry jerry sam
## 158 136 125 141
colSums(mat2)
## first second
## 294 266
?rowSums
# Product of matrix
m1 * m2
## [,1] [,2]
## [1,] 5 12
## [2,] 21 32
m1 %*% m2
## [,1] [,2]
## [1,] 19 22
## [2,] 43 50
# 9 X 1
m1 <- matrix(1:9, nrow = 9)
m1
## [,1]
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 6
## [7,] 7
## [8,] 8
## [9,] 9
# 1 X 9
m2 <- matrix(1:9, ncol = 9)
m2
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 1 2 3 4 5 6 7 8 9
# 9X1 %*% 1X9 = 9X9
m1 %*% m2
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 1 2 3 4 5 6 7 8 9
## [2,] 2 4 6 8 10 12 14 16 18
## [3,] 3 6 9 12 15 18 21 24 27
## [4,] 4 8 12 16 20 24 28 32 36
## [5,] 5 10 15 20 25 30 35 40 45
## [6,] 6 12 18 24 30 36 42 48 54
## [7,] 7 14 21 28 35 42 49 56 63
## [8,] 8 16 24 32 40 48 56 64 72
## [9,] 9 18 27 36 45 54 63 72 81
## Exercise
kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)
mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE)
colnames(mat) <- c('first', 'second')
rownames(mat) <- c('kevin', 'marry', 'jerry')
# method 1
mat[,1] * 0.4 + mat[,2] * 0.6
## kevin marry jerry
## 77.8 67.2 63.2
mat[,'first'] * 0.4 + mat[,'second'] * 0.6
## kevin marry jerry
## 77.8 67.2 63.2
# method 2: 3X2 %*% 2X1 = 3X1
mat %*% matrix(c(0.4, 0.6), nrow = 2)
## [,1]
## kevin 77.8
## marry 67.2
## jerry 63.2
Factor
## create a vector
weather <- c('sunny', 'rainy', 'cloudy', 'rainy', 'cloudy')
weather
## [1] "sunny" "rainy" "cloudy" "rainy" "cloudy"
class(weather)
## [1] "character"
## create a factor from vector
weather_category <- factor(weather)
weather_category
## [1] sunny rainy cloudy rainy cloudy
## Levels: cloudy rainy sunny
class(weather_category)
## [1] "factor"
##use levels to examine categories
levels(weather_category)
## [1] "cloudy" "rainy" "sunny"
temperature <-c("Low", "High", "High", "Medium", "Low", "Medium")
# create an ordering factor
temperature_category <- factor(temperature, order=TRUE, levels=c("Low", "Medium", "High"))
temperature
## [1] "Low" "High" "High" "Medium" "Low" "Medium"
temperature[2] > temperature[1]
## [1] FALSE
temperature[1] > temperature[2]
## [1] TRUE
temperature_category
## [1] Low High High Medium Low Medium
## Levels: Low < Medium < High
temperature_category[3] > temperature_category[1]
## [1] TRUE
temperature_category[4] > temperature_category[3]
## [1] FALSE
levels(temperature_category)
## [1] "Low" "Medium" "High"
# replace factor name with new name
temperature <-c("Low", "High", "High", "Medium", "Low", "Medium")
temperature_category <- factor(temperature)
temperature_category
## [1] Low High High Medium Low Medium
## Levels: High Low Medium
levels(temperature_category)
## [1] "High" "Low" "Medium"
levels(temperature_category) <- c('H', 'L', 'M')
temperature_category
## [1] L H H M L M
## Levels: H L M
Data Frame
days <-c('mon','tue', 'wed', 'thu','fri')
temp <-c(22.2 , 21, 23, 24.3, 25)
rain <-c(TRUE , TRUE, FALSE, FALSE, TRUE)
class(days)
## [1] "character"
class(temp)
## [1] "numeric"
class(rain)
## [1] "logical"
d <- c(days, temp, rain)
d
## [1] "mon" "tue" "wed" "thu" "fri" "22.2" "21" "23"
## [9] "24.3" "25" "TRUE" "TRUE" "FALSE" "FALSE" "TRUE"
class(d)
## [1] "character"
mat <- matrix(c(days, temp, rain), nrow = 5)
class(mat)
## [1] "matrix"
df <- data.frame(days, temp, rain)
class(df)
## [1] "data.frame"
View(df)
class(df)
## [1] "data.frame"
str(df)
## 'data.frame': 5 obs. of 3 variables:
## $ days: Factor w/ 5 levels "fri","mon","thu",..: 2 4 5 3 1
## $ temp: num 22.2 21 23 24.3 25
## $ rain: logi TRUE TRUE FALSE FALSE TRUE
summary(df)
## days temp rain
## fri:1 Min. :21.0 Mode :logical
## mon:1 1st Qu.:22.2 FALSE:2
## thu:1 Median :23.0 TRUE :3
## tue:1 Mean :23.1
## wed:1 3rd Qu.:24.3
## Max. :25.0
data()
## outbreaks dataset
### https://cran.r-project.org/web/packages/outbreaks/index.html
### http://www.repidemicsconsortium.org/
data(iris)
View(iris)
class(iris)
## [1] "data.frame"
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
head(iris, 10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
?head
tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
tail(iris, 10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
iris[1:3 , ]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
iris[1:3 , 1]
## [1] 5.1 4.9 4.7
iris[1:3 , 'Sepal.Length']
## [1] 5.1 4.9 4.7
head(iris[ , 1:2])
## Sepal.Length Sepal.Width
## 1 5.1 3.5
## 2 4.9 3.0
## 3 4.7 3.2
## 4 4.6 3.1
## 5 5.0 3.6
## 6 5.4 3.9
iris[ 1:6 , 1:2]
## Sepal.Length Sepal.Width
## 1 5.1 3.5
## 2 4.9 3.0
## 3 4.7 3.2
## 4 4.6 3.1
## 5 5.0 3.6
## 6 5.4 3.9
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
head(iris$Sepal.Length)
## [1] 5.1 4.9 4.7 4.6 5.0 5.4
five.sepal.iris <- iris[1:5, c('Sepal.Length', 'Sepal.Width')]
setosa.data <- iris[iris$Species == 'setosa', 1:5]
head(setosa.data)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
which(iris$Species == 'setosa')
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50
setosa.data2 <- iris[which(iris$Species == 'setosa'), 1:5]
head(sort(iris$Sepal.Length))
## [1] 4.3 4.4 4.4 4.4 4.5 4.6
head(sort(iris$Sepal.Length, decreasing = TRUE))
## [1] 7.9 7.7 7.7 7.7 7.7 7.6
a <- c(3,1,2,7,9,4)
# return ordered value
sort(a)
## [1] 1 2 3 4 7 9
# return ordered index (position)
order(a)
## [1] 2 3 1 6 4 5
head(iris[order(iris$Sepal.Length),])
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 14 4.3 3.0 1.1 0.1 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 4 4.6 3.1 1.5 0.2 setosa
head(iris[order(iris$Sepal.Length, decreasing = TRUE),])
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 132 7.9 3.8 6.4 2.0 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 106 7.6 3.0 6.6 2.1 virginica