save & load

#save.image("C:/Users/nc20/Desktop/0313.RData")
# get working directory
getwd()
## [1] "C:/Users/nc20/Documents"
# set working directory
setwd('C:/Users/nc20/Desktop/')

# save all variables into RData file
save.image("0313_2.RData")

# load variables from RData file
load("0313_2.RData")

Save Model

data(anscombe)
fit <- lm(y1 ~ x1, data = anscombe)
fit
## 
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
## 
## Coefficients:
## (Intercept)           x1  
##      3.0001       0.5001
plot(y1 ~ x1, data = anscombe)
abline(fit, col="red")

predict(fit, data.frame(x1 = 16))
##        1 
## 11.00155
save(fit, file = 'model.RData')
load('model.RData')

predict(fit, data.frame(x1 = 18))
##        1 
## 12.00173

Homework1

# use <- to assign variable
height <- 180
height <- c(180)

x <- c(1,2,3, 'hello')
# use class to examine data type
class(x)
## [1] "character"
# use c() to create vectors
names_vec  <- c('Brian', 'Toby', "Sherry")
height_vec <- c(180, 169, 173)
weight_vec <- c( 73,  87,  43)

# calculate bmi with a math formula
bmi_vec <- weight_vec / (height_vec / 100) ^ 2

# assign names to vector with names infix function
names(bmi_vec) <- names_vec

# filter vector with given condition
bmi_vec[bmi_vec < 18.5]
##   Sherry 
## 14.36734
bmi_vec[bmi_vec >= 24]
##     Toby 
## 30.46112
# filter vector with & or | condition
result <- bmi_vec[(bmi_vec < 18.5) | (bmi_vec >= 24)]
names(result)
## [1] "Toby"   "Sherry"
! ( (bmi_vec >= 18.5) & (bmi_vec < 24) )
##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE
bmi_vec[! ( (bmi_vec >= 18.5) & (bmi_vec < 24) )]
##     Toby   Sherry 
## 30.46112 14.36734
# use which to get filtering condition position
bmi_vec[which(! ( (bmi_vec >= 18.5) & (bmi_vec < 24) ))]
##     Toby   Sherry 
## 30.46112 14.36734

Matrix

# concatenate vector 
a <- c(1,2)
a <- c(a, 3)
a
## [1] 1 2 3
b <- c(4,5)
d <- c(a,b)
d
## [1] 1 2 3 4 5
# use vector to create matrix
kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)

c(kevin, marry, jerry)
## [1] 85 73 72 64 59 66
matrix(c(kevin, marry, jerry), nrow = 3)
##      [,1] [,2]
## [1,]   85   64
## [2,]   73   59
## [3,]   72   66
?matrix
## starting httpd help server ... done
matrix(c(kevin, marry, jerry), nrow = 3 ,byrow = TRUE)
##      [,1] [,2]
## [1,]   85   73
## [2,]   72   64
## [3,]   59   66
# create matrix with byrow=FALSEand byrow=TRUE
1:9
## [1] 1 2 3 4 5 6 7 8 9
matrix(1:9, ncol = 4, nrow = 4)
## Warning in matrix(1:9, ncol = 4, nrow = 4): 資料長度 [9] 並非列數量 [4] 的
## 因數或倍數
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9    4
## [2,]    2    6    1    5
## [3,]    3    7    2    6
## [4,]    4    8    3    7
matrix(1:9, nrow = 3)
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
m <- matrix(1:9, nrow = 3, byrow = TRUE)

# filter matrix element by position
m[1,]
## [1] 1 2 3
m[,1]
## [1] 1 4 7
m[2,2]
## [1] 5
# add column names and row names to matrix
mat <- matrix(c(kevin, marry, jerry), nrow = 3 ,byrow = TRUE)
mat
##      [,1] [,2]
## [1,]   85   73
## [2,]   72   64
## [3,]   59   66
rownames(mat) <- c('kevin', 'marry', 'jerry')
mat
##       [,1] [,2]
## kevin   85   73
## marry   72   64
## jerry   59   66
colnames(mat) <- c('first', 'second')
mat
##       first second
## kevin    85     73
## marry    72     64
## jerry    59     66
mat['kevin',]
##  first second 
##     85     73
mat[,'first']
## kevin marry jerry 
##    85    72    59
mat['marry', 'second']
## [1] 64
mat <- matrix(c(kevin, marry, jerry), nrow = 3 ,byrow = TRUE, dimnames = list( c('kevin', 'marry', 'jerry') , c('first', 'second') ) )

# get basic info of a matrix
dim(mat)
## [1] 3 2
nrow(mat)
## [1] 3
ncol(mat)
## [1] 2
# filter matrix by position, boolean and name
mat[1,]
##  first second 
##     85     73
mat[c(TRUE, FALSE, FALSE),]
##  first second 
##     85     73
mat['kevin',]
##  first second 
##     85     73
mat[,1]
## kevin marry jerry 
##    85    72    59
mat[,c(TRUE, FALSE)]
## kevin marry jerry 
##    85    72    59
mat[,'first']
## kevin marry jerry 
##    85    72    59
mat[c(2,3),]
##       first second
## marry    72     64
## jerry    59     66
mat[2:3,]
##       first second
## marry    72     64
## jerry    59     66
mat[c(FALSE, TRUE, TRUE), ]
##       first second
## marry    72     64
## jerry    59     66
mat[c('marry', 'jerry'),]
##       first second
## marry    72     64
## jerry    59     66
mat[2,1]
## [1] 72
mat['marry', 'first']
## [1] 72
# row bind
mat2 <- rbind(mat, c(78, 63))
rownames(mat2)[4] <- 'sam'
mat2
##       first second
## kevin    85     73
## marry    72     64
## jerry    59     66
## sam      78     63
# column bind
mat3 <- cbind(mat, c(82, 77, 70))
mat3
##       first second   
## kevin    85     73 82
## marry    72     64 77
## jerry    59     66 70
colnames(mat3)[3] 
## [1] ""
colnames(mat3)[3] <- 'third'
mat3
##       first second third
## kevin    85     73    82
## marry    72     64    77
## jerry    59     66    70
# matrix computation
m1 <-matrix(1:4, byrow=TRUE, nrow=2)
m1
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
m2 <-matrix(5:8, byrow=TRUE, nrow=2)
m2
##      [,1] [,2]
## [1,]    5    6
## [2,]    7    8
m1 + m2
##      [,1] [,2]
## [1,]    6    8
## [2,]   10   12
m1 - m2
##      [,1] [,2]
## [1,]   -4   -4
## [2,]   -4   -4
m1 * m2
##      [,1] [,2]
## [1,]    5   12
## [2,]   21   32
m1 / m2
##           [,1]      [,2]
## [1,] 0.2000000 0.3333333
## [2,] 0.4285714 0.5000000
mat2
##       first second
## kevin    85     73
## marry    72     64
## jerry    59     66
## sam      78     63
rowSums(mat2)
## kevin marry jerry   sam 
##   158   136   125   141
colSums(mat2)
##  first second 
##    294    266
?rowSums

# Product of matrix
m1  *  m2
##      [,1] [,2]
## [1,]    5   12
## [2,]   21   32
m1 %*% m2
##      [,1] [,2]
## [1,]   19   22
## [2,]   43   50
# 9 X 1
m1 <- matrix(1:9, nrow = 9)
m1
##       [,1]
##  [1,]    1
##  [2,]    2
##  [3,]    3
##  [4,]    4
##  [5,]    5
##  [6,]    6
##  [7,]    7
##  [8,]    8
##  [9,]    9
# 1 X 9 
m2 <- matrix(1:9, ncol = 9)
m2
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,]    1    2    3    4    5    6    7    8    9
# 9X1 %*% 1X9 = 9X9
m1 %*% m2
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
##  [1,]    1    2    3    4    5    6    7    8    9
##  [2,]    2    4    6    8   10   12   14   16   18
##  [3,]    3    6    9   12   15   18   21   24   27
##  [4,]    4    8   12   16   20   24   28   32   36
##  [5,]    5   10   15   20   25   30   35   40   45
##  [6,]    6   12   18   24   30   36   42   48   54
##  [7,]    7   14   21   28   35   42   49   56   63
##  [8,]    8   16   24   32   40   48   56   64   72
##  [9,]    9   18   27   36   45   54   63   72   81
## Exercise
kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)
mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE)


colnames(mat) <- c('first', 'second')
rownames(mat) <- c('kevin', 'marry', 'jerry')

# method 1
mat[,1] * 0.4 + mat[,2] * 0.6
## kevin marry jerry 
##  77.8  67.2  63.2
mat[,'first'] * 0.4 + mat[,'second'] * 0.6
## kevin marry jerry 
##  77.8  67.2  63.2
# method 2: 3X2 %*% 2X1 = 3X1
mat %*% matrix(c(0.4, 0.6), nrow = 2)
##       [,1]
## kevin 77.8
## marry 67.2
## jerry 63.2

Factor

## create a vector
weather <- c('sunny', 'rainy', 'cloudy', 'rainy', 'cloudy')
weather
## [1] "sunny"  "rainy"  "cloudy" "rainy"  "cloudy"
class(weather)
## [1] "character"
## create a factor from vector
weather_category <- factor(weather)
weather_category
## [1] sunny  rainy  cloudy rainy  cloudy
## Levels: cloudy rainy sunny
class(weather_category)
## [1] "factor"
##use levels to examine categories
levels(weather_category)
## [1] "cloudy" "rainy"  "sunny"
temperature <-c("Low", "High", "High", "Medium", "Low", "Medium")

# create an ordering factor
temperature_category <- factor(temperature, order=TRUE, levels=c("Low", "Medium", "High"))

temperature
## [1] "Low"    "High"   "High"   "Medium" "Low"    "Medium"
temperature[2] > temperature[1]
## [1] FALSE
temperature[1] > temperature[2]
## [1] TRUE
temperature_category
## [1] Low    High   High   Medium Low    Medium
## Levels: Low < Medium < High
temperature_category[3] > temperature_category[1]
## [1] TRUE
temperature_category[4] > temperature_category[3]
## [1] FALSE
levels(temperature_category)
## [1] "Low"    "Medium" "High"
# replace factor name with new name
temperature <-c("Low", "High", "High", "Medium", "Low", "Medium")
temperature_category <- factor(temperature)
temperature_category
## [1] Low    High   High   Medium Low    Medium
## Levels: High Low Medium
levels(temperature_category)
## [1] "High"   "Low"    "Medium"
levels(temperature_category) <- c('H', 'L', 'M')
temperature_category
## [1] L H H M L M
## Levels: H L M

Data Frame

days <-c('mon','tue', 'wed', 'thu','fri')
temp <-c(22.2 ,   21,    23,  24.3,   25)
rain <-c(TRUE , TRUE, FALSE, FALSE, TRUE)

class(days)
## [1] "character"
class(temp)
## [1] "numeric"
class(rain)
## [1] "logical"
d <- c(days, temp, rain)
d
##  [1] "mon"   "tue"   "wed"   "thu"   "fri"   "22.2"  "21"    "23"   
##  [9] "24.3"  "25"    "TRUE"  "TRUE"  "FALSE" "FALSE" "TRUE"
class(d)
## [1] "character"
mat <- matrix(c(days, temp, rain), nrow = 5)
class(mat)
## [1] "matrix"
df <- data.frame(days, temp, rain)
class(df)
## [1] "data.frame"
View(df)

class(df)
## [1] "data.frame"
str(df)
## 'data.frame':    5 obs. of  3 variables:
##  $ days: Factor w/ 5 levels "fri","mon","thu",..: 2 4 5 3 1
##  $ temp: num  22.2 21 23 24.3 25
##  $ rain: logi  TRUE TRUE FALSE FALSE TRUE
summary(df)
##   days        temp         rain        
##  fri:1   Min.   :21.0   Mode :logical  
##  mon:1   1st Qu.:22.2   FALSE:2        
##  thu:1   Median :23.0   TRUE :3        
##  tue:1   Mean   :23.1                  
##  wed:1   3rd Qu.:24.3                  
##          Max.   :25.0
data()

## outbreaks dataset
### https://cran.r-project.org/web/packages/outbreaks/index.html
### http://www.repidemicsconsortium.org/

data(iris)
View(iris)
class(iris)
## [1] "data.frame"
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
head(iris, 10)
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           5.1         3.5          1.4         0.2  setosa
## 2           4.9         3.0          1.4         0.2  setosa
## 3           4.7         3.2          1.3         0.2  setosa
## 4           4.6         3.1          1.5         0.2  setosa
## 5           5.0         3.6          1.4         0.2  setosa
## 6           5.4         3.9          1.7         0.4  setosa
## 7           4.6         3.4          1.4         0.3  setosa
## 8           5.0         3.4          1.5         0.2  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 10          4.9         3.1          1.5         0.1  setosa
?head

tail(iris)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
tail(iris, 10)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 141          6.7         3.1          5.6         2.4 virginica
## 142          6.9         3.1          5.1         2.3 virginica
## 143          5.8         2.7          5.1         1.9 virginica
## 144          6.8         3.2          5.9         2.3 virginica
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
iris[1:3 , ]
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
iris[1:3 , 1]
## [1] 5.1 4.9 4.7
iris[1:3 , 'Sepal.Length']
## [1] 5.1 4.9 4.7
head(iris[    , 1:2])
##   Sepal.Length Sepal.Width
## 1          5.1         3.5
## 2          4.9         3.0
## 3          4.7         3.2
## 4          4.6         3.1
## 5          5.0         3.6
## 6          5.4         3.9
iris[ 1:6   , 1:2]
##   Sepal.Length Sepal.Width
## 1          5.1         3.5
## 2          4.9         3.0
## 3          4.7         3.2
## 4          4.6         3.1
## 5          5.0         3.6
## 6          5.4         3.9
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
head(iris$Sepal.Length)
## [1] 5.1 4.9 4.7 4.6 5.0 5.4
five.sepal.iris <- iris[1:5, c('Sepal.Length', 'Sepal.Width')]

setosa.data <- iris[iris$Species == 'setosa', 1:5]
head(setosa.data)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
which(iris$Species == 'setosa')
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50
setosa.data2 <- iris[which(iris$Species == 'setosa'), 1:5]

head(sort(iris$Sepal.Length))
## [1] 4.3 4.4 4.4 4.4 4.5 4.6
head(sort(iris$Sepal.Length, decreasing = TRUE))
## [1] 7.9 7.7 7.7 7.7 7.7 7.6
a <- c(3,1,2,7,9,4)
# return ordered value
sort(a)
## [1] 1 2 3 4 7 9
# return ordered index (position)
order(a)
## [1] 2 3 1 6 4 5
head(iris[order(iris$Sepal.Length),])
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 14          4.3         3.0          1.1         0.1  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 39          4.4         3.0          1.3         0.2  setosa
## 43          4.4         3.2          1.3         0.2  setosa
## 42          4.5         2.3          1.3         0.3  setosa
## 4           4.6         3.1          1.5         0.2  setosa
head(iris[order(iris$Sepal.Length, decreasing = TRUE),])
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 132          7.9         3.8          6.4         2.0 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
## 106          7.6         3.0          6.6         2.1 virginica