R Basic
data(anscombe)
plot(y1 ~ x1, data = anscombe)

fit <- lm(y1 ~ x1, data = anscombe)
a <- 3
b <- 2
a + b
## [1] 5
Numerics
3 + 8
## [1] 11
3 - 8
## [1] -5
3 * 8
## [1] 24
11 / 2
## [1] 5.5
2 ^ 10
## [1] 1024
11 %% 2
## [1] 1
a <- 3
b = 2
a + b
## [1] 5
numer <- 17.8
char <- 'hello world'
logic <- TRUE
class(logic)
## [1] "logical"
card_length <- 3
card_width <- '5 inches'
card_width <- 5
card_length * card_width
## [1] 15
RRP <- 35.99
Exchange <- 31.74
NTD <- RRP * Exchange
NTD
## [1] 1142.323
#ntd
Vector
height_vec <- c(180,169,173)
name_vec <- c('Brian', 'Toby', 'Sherry')
class(height_vec)
## [1] "numeric"
height2_vec <- c(180,169,173, '177')
class(height2_vec)
## [1] "character"
height2_vec
## [1] "180" "169" "173" "177"
x <- c(1,2,3,7)
y <- c(2,3,5,1)
x + y
## [1] 3 5 8 8
x - y
## [1] -1 -1 -2 6
x * y
## [1] 2 6 15 7
x / y
## [1] 0.5000000 0.6666667 0.6000000 7.0000000
x <- c(1,2,3,7)
x + 5
## [1] 6 7 8 12
x + c(5)
## [1] 6 7 8 12
x + c(5,5,5,5)
## [1] 6 7 8 12
x + c(1,2)
## [1] 2 4 4 9
x + c(1,2,1,2)
## [1] 2 4 4 9
x + c(1,2,3)
## Warning in x + c(1, 2, 3): longer object length is not a multiple of
## shorter object length
## [1] 2 4 6 8
x + c(1,2,3,1)
## [1] 2 4 6 8
x <- 1:20
x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
y <- seq(1,20)
y
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
?seq
help(seq)
seq()
## [1] 1
seq(1,20,2)
## [1] 1 3 5 7 9 11 13 15 17 19
seq(1,3.5,0.5)
## [1] 1.0 1.5 2.0 2.5 3.0 3.5
seq(1,20, length.out = 2)
## [1] 1 20
seq(1,20, length.out = 3)
## [1] 1.0 10.5 20.0
seq(1,20, len = 2)
## [1] 1 20
x <- c(1,2,3,5,7)
sum(x)
## [1] 18
?sum
sum(3,5,c(1,2))
## [1] 11
NA
## [1] NA
sum(3,5,NA)
## [1] NA
sum(3,5,NA, na.rm= TRUE)
## [1] 8
height_vec <- c(180, 169,173)
names(height_vec) <- c('Brian', 'Toby', 'Sherry')
height_vec
## Brian Toby Sherry
## 180 169 173
names_vec <- c('Brian', 'Toby', 'Sherry')
names(height_vec) <- names_vec
height_vec
## Brian Toby Sherry
## 180 169 173
height_vec > 175
## Brian Toby Sherry
## TRUE FALSE FALSE
height_vec < 175
## Brian Toby Sherry
## FALSE TRUE TRUE
height_vec >= 175
## Brian Toby Sherry
## TRUE FALSE FALSE
height_vec <= 175
## Brian Toby Sherry
## FALSE TRUE TRUE
height_vec == 180
## Brian Toby Sherry
## TRUE FALSE FALSE
height_vec != 180
## Brian Toby Sherry
## FALSE TRUE TRUE
height_vec[c(TRUE, FALSE, FALSE)]
## Brian
## 180
height_vec[c(1)]
## Brian
## 180
height_vec[1]
## Brian
## 180
height_vec[c(1,3)]
## Brian Sherry
## 180 173
height_vec[height_vec > 175]
## Brian
## 180
height_vec[height_vec > 175 | height_vec < 170 ]
## Brian Toby
## 180 169
height_vec[height_vec > 170 & height_vec < 175 ]
## Sherry
## 173
height_vec <- c(180,169,173)
weight_vec <- c( 73, 87,43 )
names_vec <- c('Brian','Toby', 'Sherry')
bmi_vec <- weight_vec / (height_vec / 100) ^ 2
names(bmi_vec) <- names_vec
bmi_vec
## Brian Toby Sherry
## 22.53086 30.46112 14.36734
bmi_vec < 18.5 | bmi_vec >= 24
## Brian Toby Sherry
## FALSE TRUE TRUE
bmi_vec[bmi_vec < 18.5 | bmi_vec >= 24]
## Toby Sherry
## 30.46112 14.36734
Matrix
1:9
## [1] 1 2 3 4 5 6 7 8 9
?matrix
matrix(1:9, nrow= 3, byrow=TRUE)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
a <- c(1,2,3)
c(a, c(4,5,6))
## [1] 1 2 3 4 5 6
kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)
c(kevin, marry, jerry)
## [1] 85 73 72 64 59 66
mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow = TRUE)
colnames(mat) <- c('first', 'second')
rownames(mat) <- c('kevin', 'marry', 'jerry')
mat
## first second
## kevin 85 73
## marry 72 64
## jerry 59 66
mat2 <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE, dimnames = list(c('Kevin', 'Marry', 'Jerry'), c('first', 'second')))
mat2
## first second
## Kevin 85 73
## Marry 72 64
## Jerry 59 66
dim(mat2)
## [1] 3 2
nrow(mat2)
## [1] 3
ncol(mat2)
## [1] 2
a <- c(1,2,3,4,5,6)
a[1]
## [1] 1
a[c(1,3)]
## [1] 1 3
a[1:3]
## [1] 1 2 3
mat2[1, ]
## first second
## 85 73
mat2[ ,1]
## Kevin Marry Jerry
## 85 72 59
mat2[2:3,]
## first second
## Marry 72 64
## Jerry 59 66
mat2[2,1]
## [1] 72
mat2
## first second
## Kevin 85 73
## Marry 72 64
## Jerry 59 66
mat3 <- rbind(mat2, c(78, 63))
rownames(mat3)[4] <- 'sam'
nrow(mat3)
## [1] 4
mat3
## first second
## Kevin 85 73
## Marry 72 64
## Jerry 59 66
## sam 78 63
mat4 <- cbind(mat2, c(82,77,70))
colnames(mat4)[3] <- 'third'
mat4
## first second third
## Kevin 85 73 82
## Marry 72 64 77
## Jerry 59 66 70
rowSums(mat2)
## Kevin Marry Jerry
## 158 136 125
colSums(mat2)
## first second
## 216 203
m1 <- matrix(1:4, byrow=TRUE, nrow = 2)
m2 <- matrix(5:8, byrow=TRUE, nrow = 2)
m1 + m2
## [,1] [,2]
## [1,] 6 8
## [2,] 10 12
m1 - m2
## [,1] [,2]
## [1,] -4 -4
## [2,] -4 -4
m1 * m2
## [,1] [,2]
## [1,] 5 12
## [2,] 21 32
m1 / m2
## [,1] [,2]
## [1,] 0.2000000 0.3333333
## [2,] 0.4285714 0.5000000
m1 %*% m2
## [,1] [,2]
## [1,] 19 22
## [2,] 43 50
m1 <-matrix(1:9, nrow = 9)
m2 <-matrix(1:9, nrow = 1)
m1 %*% m2
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 1 2 3 4 5 6 7 8 9
## [2,] 2 4 6 8 10 12 14 16 18
## [3,] 3 6 9 12 15 18 21 24 27
## [4,] 4 8 12 16 20 24 28 32 36
## [5,] 5 10 15 20 25 30 35 40 45
## [6,] 6 12 18 24 30 36 42 48 54
## [7,] 7 14 21 28 35 42 49 56 63
## [8,] 8 16 24 32 40 48 56 64 72
## [9,] 9 18 27 36 45 54 63 72 81
kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)
mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE)
colnames(mat) <- c('first', 'second')
rownames(mat) <- c('Kevin', 'Marry', 'Jerry')
# method 1
mat[ , 1 ] * 0.4 + mat[ , 2] * 0.6
## Kevin Marry Jerry
## 77.8 67.2 63.2
# method 2
mat %*% c(0.4, 0.6)
## [,1]
## Kevin 77.8
## Marry 67.2
## Jerry 63.2
Factor
weather <- c('sunny', 'rainy', 'cloudy', 'rainy', 'cloudy')
class(weather)
## [1] "character"
weather_category <- factor(weather)
levels(weather_category)
## [1] "cloudy" "rainy" "sunny"
weather_category[1] > weather_category[3]
## Warning in Ops.factor(weather_category[1], weather_category[3]): '>' not
## meaningful for factors
## [1] NA
temperature <- c('Low', 'High', 'High', 'Medium', 'Low', 'Medium')
temperature_category <- factor(temperature, order = TRUE , levels = c('Low', 'Medium', 'High'))
temperature_category
## [1] Low High High Medium Low Medium
## Levels: Low < Medium < High
temperature_category[3] > temperature_category[1]
## [1] TRUE
temperature_category[4] > temperature_category[3]
## [1] FALSE
levels(temperature_category)
## [1] "Low" "Medium" "High"
levels(temperature_category) <- c('L', 'M', 'H')
temperature_category
## [1] L H H M L M
## Levels: L < M < H
Data Frame
days <- c('mon','tue','wed','thu','fri')
temp <- c(22.2,21,23,24.3,25)
rain <- c(TRUE, TRUE, FALSE, FALSE, TRUE)
class(days)
class(temp)
class(rain)
df <- data.frame(days, rain, temp)
df
df2 <- data.frame('D' = days, 'R'=rain, 'T'=temp)
df2
class(df)
str(df)
summary(df)
data()
data(iris)
View(iris)
head(iris)
#?head
head(iris, 3)
# python
#df[1,:]
# R
iris[1,]
iris[1:3,1]
iris[1:3,'Sepal.Length']
head(iris[,1:2])
head(iris$Sepal.Length)
iris[c(1:5),c('Sepal.Length', 'Sepal.Width')]
iris$Species == 'setosa'
setosa.data <- iris[iris$Species == 'setosa', 1:5]
str(setosa.data)
head(iris[which(iris$Species == 'setosa'), ])
head(iris)
sort(iris$Sepal.Length, decreasing = TRUE)
rank <- order(iris$Sepal.Length, decreasing = TRUE)
head(iris[rank, ])
a <- c(180,169,173,182,177)
sort(a)
order(a)
a[order(a)]
TW2330 Analysis
tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)
?read.csv
head(tw2330)
## Date Open High Low Close Adj.Close
## 1 2000-01-04 69.649002 69.649002 68.475197 69.649002 40.966045
## 2 2000-01-05 69.649002 71.214104 68.866302 71.214104 41.886593
## 3 2000-01-06 70.822899 71.214104 69.649002 69.649002 40.966045
## 4 2000-01-07 67.301300 68.475197 66.518600 67.692497 39.815269
## 5 2000-01-10 69.649002 70.431396 68.475197 70.040199 41.196129
## 6 2000-01-11 70.822899 71.605202 68.475197 68.866302 40.505665
## Volume
## 1 200662321971
## 2 402466776297
## 3 197545701266
## 4 235270327441
## 5 276171665217
## 6 277769524211
str(tw2330)
## 'data.frame': 4532 obs. of 7 variables:
## $ Date : chr "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
## $ Open : chr "69.649002" "69.649002" "70.822899" "67.301300" ...
## $ High : chr "69.649002" "71.214104" "71.214104" "68.475197" ...
## $ Low : chr "68.475197" "68.866302" "69.649002" "66.518600" ...
## $ Close : chr "69.649002" "71.214104" "69.649002" "67.692497" ...
## $ Adj.Close: chr "40.966045" "41.886593" "40.966045" "39.815269" ...
## $ Volume : chr "200662321971" "402466776297" "197545701266" "235270327441" ...
tw2330$Date <- as.Date(tw2330$Date)
tw2330$Close <- as.numeric(tw2330$Close)
## Warning: NAs introduced by coercion
df <- tw2330[ tw2330$Date >= '2017-01-01' & tw2330$Date < '2018-01-01' , ]
min(df$Close, na.rm=TRUE)
## [1] 179.5
max(df$Close, na.rm=TRUE)
## [1] 244
mean(df$Close, na.rm=TRUE)
## [1] 210.1502
summary(df$Close, na.rm=TRUE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 179.5 191.5 212.5 210.2 222.0 244.0 2
df[order(df$Close), ][1:3,]
## Date Open High Low Close Adj.Close Volume
## 4296 2017-01-16 180.000000 180.500000 179.000000 179.5 173.709686 30756000
## 4299 2017-01-19 179.500000 181.000000 179.500000 180.5 174.677429 24627000
## 4297 2017-01-17 180.500000 181.000000 179.500000 181.0 175.161301 13159000
df[order(df$Close, decreasing =TRUE), ][1:3,]
## Date Open High Low Close Adj.Close Volume
## 4493 2017-11-07 242.000000 244.000000 241.500000 244 244.000000 20200261
## 4506 2017-11-24 242.000000 244.500000 241.500000 244 244.000000 11154453
## 4487 2017-10-30 242.000000 245.000000 241.500000 243 243.000000 27784990
str(df)
## 'data.frame': 245 obs. of 7 variables:
## $ Date : Date, format: "2017-01-03" "2017-01-04" ...
## $ Open : chr "181.500000" "183.000000" "182.000000" "184.000000" ...
## $ High : chr "183.500000" "184.000000" "183.500000" "184.500000" ...
## $ Low : chr "181.000000" "181.500000" "181.500000" "183.500000" ...
## $ Close : num 183 183 184 184 184 ...
## $ Adj.Close: chr "177.096786" "177.096786" "177.580658" "178.064514" ...
## $ Volume : chr "22630000" "24369000" "20979000" "22443000" ...
plot(df$Date, df$Close, type = 'l')

tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)
str(tw2330)
## 'data.frame': 4532 obs. of 7 variables:
## $ Date : chr "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
## $ Open : chr "69.649002" "69.649002" "70.822899" "67.301300" ...
## $ High : chr "69.649002" "71.214104" "71.214104" "68.475197" ...
## $ Low : chr "68.475197" "68.866302" "69.649002" "66.518600" ...
## $ Close : chr "69.649002" "71.214104" "69.649002" "67.692497" ...
## $ Adj.Close: chr "40.966045" "41.886593" "40.966045" "39.815269" ...
## $ Volume : chr "200662321971" "402466776297" "197545701266" "235270327441" ...
tw2330$Date <- as.Date(tw2330$Date)
tw2330$Close <- as.numeric(tw2330$Close)
## Warning: NAs introduced by coercion
df <- tw2330
summary(df)
## Date Open High
## Min. :2000-01-04 Length:4532 Length:4532
## 1st Qu.:2004-05-06 Class :character Class :character
## Median :2008-11-04 Mode :character Mode :character
## Mean :2008-11-23
## 3rd Qu.:2013-06-03
## Max. :2018-01-02
##
## Low Close Adj.Close Volume
## Length:4532 Min. : 27.46 Length:4532 Length:4532
## Class :character 1st Qu.: 50.69 Class :character Class :character
## Mode :character Median : 63.13 Mode :character Mode :character
## Mean : 81.56
## 3rd Qu.:100.50
## Max. :244.00
## NA's :176
plot(df$Date, df$Close, type = 'l')

List
item <- list(thing = 'hat', size = 8.25)
item
## $thing
## [1] "hat"
##
## $size
## [1] 8.25
test <- list(name = 'Toby', score = c(87, 57, 72))
test$score
## [1] 87 57 72
test$score[2]
## [1] 57
li <- list(c(3,5,12), c(2,4,5, 8, 10))
li[[1]]
## [1] 3 5 12
li[[2]]
## [1] 2 4 5 8 10
lapply(li, sum)
## [[1]]
## [1] 20
##
## [[2]]
## [1] 29
lapply(li, mean)
## [[1]]
## [1] 6.666667
##
## [[2]]
## [1] 5.8
lapply(li, max)
## [[1]]
## [1] 12
##
## [[2]]
## [1] 10
lapply(li, function(e) e[1])
## [[1]]
## [1] 3
##
## [[2]]
## [1] 2
sapply(li, function(e) e[1])
## [1] 3 2
Read and Write Data
match<- read.table('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/match.txt', sep = '|')
tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)
class(tw2330)
## [1] "data.frame"
str(tw2330)
## 'data.frame': 4532 obs. of 7 variables:
## $ Date : chr "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
## $ Open : chr "69.649002" "69.649002" "70.822899" "67.301300" ...
## $ High : chr "69.649002" "71.214104" "71.214104" "68.475197" ...
## $ Low : chr "68.475197" "68.866302" "69.649002" "66.518600" ...
## $ Close : chr "69.649002" "71.214104" "69.649002" "67.692497" ...
## $ Adj.Close: chr "40.966045" "41.886593" "40.966045" "39.815269" ...
## $ Volume : chr "200662321971" "402466776297" "197545701266" "235270327441" ...
write.table(tw2330, file= '2330.test.tab',sep = '\t')
write.csv(tw2330, file= '2330.test.csv')
library(readr)
## Warning: package 'readr' was built under R version 3.2.5
match <- read_delim("https://raw.githubusercontent.com/ywchiu/fubonr/master/data/match.txt",
"|", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_integer()
## )
#
#View(match)
Flow Control
a <- 2
if (a > 3){
print('a > 3')
}else{
print('a <= 3')
}
## [1] "a <= 3"
a <- 2
if (a > 3){
print('a > 3')
}else if(a == 3){
print('a = 3')
}else{
print('a < 3')
}
## [1] "a < 3"
for(i in 1:10){
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
# for loop method
s <- 0
for (i in 1:100){
s = s + i
}
s
## [1] 5050
# built in function
sum(1:100)
## [1] 5050
x <- c('sunny','rainy','cloudy','rainy', 'cloudy')
for (i in 1:length(x)){
print(x[i])
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
for( i in seq_along(x)){
print(x[i])
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
for (letter in x){
print(letter)
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
s <- 0
cnt <- 0
while(cnt <= 100){
s <- s + cnt
cnt <- cnt + 1
}
s
## [1] 5050
url <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
for (i in seq(1,10)){
#print(i)
#?paste
#print(paste0(url, i))
print(paste(url, i, sep = ''))
}
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/1"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/2"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/3"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/4"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/5"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/6"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/7"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/8"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/9"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/10"
Function
f <- function(a = 2, b = 3){
a + b
}
f()
## [1] 5
f(a = 6, b = 4)
## [1] 10
f(6,4)
## [1] 10
?head
f2 <- function(a = 2, b = 3){
return(a + b)
}
f2(7,9)
## [1] 16
f <- function(a, b){
a * 2
}
f(3)
## [1] 6
f <- function(a, b){
a + b
}
#f(3)
url <- 'https://raw.githubusercontent.com/ywchiu/fuboni/master/data/cnn.txt'
WordCount <- function(url){
f <- file(url)
news <- readLines(f)
close(f)
tb <- table(unlist(strsplit(news, ' ')))
swd <- sort(tb, decreasing = TRUE)
as.table(swd)
}
#install.packages('wordcloud2')
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 3.2.5
wc <- WordCount(url)
wordcloud2(wc, shape = 'pentagon')