what is R? - https://zh.wikipedia.org/wiki/R%E8%AF%AD%E8%A8%80
R vs Python? - https://www.datacamp.com/community/tutorials/r-or-python-for-data-analysis - https://www.kdnuggets.com/2019/05/poll-top-data-science-machine-learning-platforms.html
Kaggle - https://www.kaggle.com/
UCI dataset - https://archive.ics.uci.edu/ml/datasets.html
電子檔講義 - https://www.dropbox.com/s/bhqmpndutktw401/201806%20-%20R%E8%AA%9E%E8%A8%80.pdf?dl=0
#使用範例資料
data(anscombe)
#使用資料中x1,y1變數畫出點散布圖
plot(y1 ~ x1, data = anscombe)
#建立回歸模型並assign到lmfit變數中
lmfit <- lm(y1~x1, data=anscombe)
#在點散佈圖上加上迴歸線
abline(lmfit, col="red")
#文件查詢
help(package="base")
?base::sum
?sum
help.search("sum")
??sum
#範例演釋
demo()
#使用內建資料集
data()
#看現有變數
ls()
## [1] "anscombe" "lmfit"
#移除變數
rm()
x = c(1,2,3)
#查看資料型態
class(x)
## [1] "numeric"
#查看資料結構
str(x)
## num [1:3] 1 2 3
3+8
## [1] 11
3-8
## [1] -5
3*8
## [1] 24
3/8
## [1] 0.375
3^2
## [1] 9
11%%2
## [1] 1
3<4
## [1] TRUE
2==5
## [1] FALSE
T == TRUE
## [1] TRUE
a = 3
a <- 3 # ( alt + - )
assign("a",3)
a / 2
## [1] 1.5
a = a / 2
a
## [1] 1.5
character(5) ## character vector of length 5
## [1] "" "" "" "" ""
numeric(5)
## [1] 0 0 0 0 0
logical(5)
## [1] FALSE FALSE FALSE FALSE FALSE
x = c(1,2,3,7)
y= c(2,3,5,1)
x+y
## [1] 3 5 8 8
x*y
## [1] 2 6 15 7
x-y
## [1] -1 -1 -2 6
x/y
## [1] 0.5000000 0.6666667 0.6000000 7.0000000
x = c(1,2,3,7)
x + 10
## [1] 11 12 13 17
x + c(10)
## [1] 11 12 13 17
x + c(1,2)
## [1] 2 4 4 9
x + c(1,2,1,2)
## [1] 2 4 4 9
x == c(1,99,3,4)
## [1] TRUE FALSE TRUE FALSE
c(1,2,3)
## [1] 1 2 3
c(2,T,3+0i,"one")
## [1] "2" "TRUE" "3+0i" "one"
c(2,T,3+0i)
## [1] 2+0i 1+0i 3+0i
c(c(1,2,3,4),c(5))
## [1] 1 2 3 4 5
x = c(1,2,3,4,NA)
sum(x)
## [1] NA
sum(x, na.rm=T)
## [1] 10
x = c(1,2,3,4,NA)
is.na(x)
## [1] FALSE FALSE FALSE FALSE TRUE
sum(x[!is.na(x)])
## [1] 10
height_vec = c(180,169,173)
height_vec
## [1] 180 169 173
names(height_vec) = c("Brian", "Toby", "Sherry")
height_vec
## Brian Toby Sherry
## 180 169 173
name_vec = c("Brian", "Toby", "Sherry")
names(height_vec) = name_vec
height_vec > 175
## Brian Toby Sherry
## TRUE FALSE FALSE
height_vec / 100
## Brian Toby Sherry
## 1.80 1.69 1.73
height_vec > 175 | height_vec < 170
## Brian Toby Sherry
## TRUE TRUE FALSE
height_vec < 175 & height_vec > 170
## Brian Toby Sherry
## FALSE FALSE TRUE
#R 的index從1開始
height_vec[c(1)] #index
## Brian
## 180
height_vec['Brian'] #element name
## Brian
## 180
height_vec[height_vec > 175] #condition (boolean vector)
## Brian
## 180
h = c(180,169,173)
w = c(73,87,43)
bmi = w / ((h/100)^2)
names(bmi) = c("Brian", "Toby", "Sherry")
bmi < 18.5 | bmi >= 24
## Brian Toby Sherry
## FALSE TRUE TRUE
bmi[bmi < 18.5 | bmi >= 24]
## Toby Sherry
## 30.46112 14.36734
1:20
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seq(1,20)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
20:1
## [1] 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1
?seq
seq(from=1,to=20,by=2)
## [1] 1 3 5 7 9 11 13 15 17 19
seq(from=1,to=20,length=2)
## [1] 1 20
rep(1,5)
## [1] 1 1 1 1 1
?rep
rep(x=c(1,2), times=5)
## [1] 1 2 1 2 1 2 1 2 1 2
rep(x=c(1,2), times=c(1,2))
## [1] 1 2 2
rep(x=c(1,2), each=5)
## [1] 1 1 1 1 1 2 2 2 2 2
rep(x=c(1,2), length=5)
## [1] 1 2 1 2 1
rep_len(x=c(1,2),length.out = 5)
## [1] 1 2 1 2 1
paste("the","big","bang","theory")
## [1] "the big bang theory"
paste("big","bang",sep="-")
## [1] "big-bang"
length(paste("the","big","bang","theory"))
## [1] 1
paste("big","bang",sep="")
## [1] "bigbang"
paste("big","bang",sep=";")
## [1] "big;bang"
paste(c("big","bang"),1:2)
## [1] "big 1" "bang 2"
paste(c("big","bang"),1:2,collapse = "+" )
## [1] "big 1+bang 2"
length(paste(c("big","bang"),1:4,collapse = "+" ))
## [1] 1
matrix(1:9, byrow=TRUE, nrow=3)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
matrix(1:9, nrow=3)
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
kevin = c(85,73)
marry = c(72,64)
jerry = c(59,66)
mat = matrix(c(kevin, marry, jerry), nrow=3, byrow= TRUE)
colnames(mat) = c('first', 'second')
rownames(mat) = c('kevin', 'marry', 'jerry')
mat
## first second
## kevin 85 73
## marry 72 64
## jerry 59 66
#取得矩陣維度
dim(mat)
## [1] 3 2
#取得矩陣列數
nrow(mat)
## [1] 3
#取得矩陣欄數
ncol(mat)
## [1] 2
#矩陣轉置(transpose)
t(mat)
## kevin marry jerry
## first 85 72 59
## second 73 64 66
#取第一列
mat[1,]
## first second
## 85 73
#取第一行
mat[,1]
## kevin marry jerry
## 85 72 59
#取第一、二列
mat[1:2,]
## first second
## kevin 85 73
## marry 72 64
#取kevin和jerry成績
mat[c('kevin','jerry'),]
## first second
## kevin 85 73
## jerry 59 66
#取kevin和jerry成績的第一次考試成績
mat[c('kevin','jerry'),'first']
## kevin jerry
## 85 59
#取得第一次考試成績不及格的人
mat[mat[,1] < 60,'first']
## [1] 59
#新增列與行
mat2 = rbind(mat, c(78,63))
rownames(mat2)[nrow(mat2)] = 'sam'
mat2
## first second
## kevin 85 73
## marry 72 64
## jerry 59 66
## sam 78 63
mat3 = cbind(mat2,c(82,77,70,64))
colnames(mat3)[ncol(mat3)] = 'third'
mat3
## first second third
## kevin 85 73 82
## marry 72 64 77
## jerry 59 66 70
## sam 78 63 64
rowMeans(mat3)
## kevin marry jerry sam
## 80.00000 71.00000 65.00000 68.33333
colMeans(mat3)
## first second third
## 73.50 66.50 73.25
# arithmetic
m1 = matrix(1:4, byrow=TRUE, nrow=2)
m2 = matrix(5:8, byrow=TRUE, nrow=2)
m1 + m2
## [,1] [,2]
## [1,] 6 8
## [2,] 10 12
m1 - m2
## [,1] [,2]
## [1,] -4 -4
## [2,] -4 -4
m1 * m2
## [,1] [,2]
## [1,] 5 12
## [2,] 21 32
m1 / m2
## [,1] [,2]
## [1,] 0.2000000 0.3333333
## [2,] 0.4285714 0.5000000
m1 %*% m2
## [,1] [,2]
## [1,] 19 22
## [2,] 43 50
# syntax
weather= c("sunny","rainy", "cloudy", "rainy", "cloudy")
weather_category = factor(weather)
weather_category
## [1] sunny rainy cloudy rainy cloudy
## Levels: cloudy rainy sunny
class(weather)
## [1] "character"
class(weather_category)
## [1] "factor"
levels(weather_category)
## [1] "cloudy" "rainy" "sunny"
# order
temperature = c("Low", "High", "High", "Medium", "Low", "Medium")
temperature_category = factor(temperature, order = TRUE, levels = c("Low", "Medium", "High"))
temperature_category
## [1] Low High High Medium Low Medium
## Levels: Low < Medium < High
temperature_category[3] > temperature_category[1]
## [1] TRUE
temperature_category[4] > temperature_category[3]
## [1] FALSE
# change levels name
weather= c("s","r", "c", "r", "c")
weather_factor = factor(weather)
levels(weather_factor) = c("cloudy","rainy","sunny")
weather_factor
## [1] sunny rainy cloudy rainy cloudy
## Levels: cloudy rainy sunny
#type priority
c("string",1+2i,5.5,TRUE)
## [1] "string" "1+2i" "5.5" "TRUE"
c(1+2i,5.5,TRUE)
## [1] 1.0+2i 5.5+0i 1.0+0i
c(5.5,TRUE,FALSE)
## [1] 5.5 1.0 0.0