R Basic

data(anscombe)
plot(y1 ~ x1, data = anscombe)

fit <- lm(y1 ~ x1, data = anscombe)


a <- 3
b <- 2
a + b
## [1] 5

Numerics

3 + 8
## [1] 11
3 - 8
## [1] -5
3 * 8
## [1] 24
11 / 2
## [1] 5.5
2 ^ 10
## [1] 1024
11 %% 2
## [1] 1
a <- 3
b =  2
a +  b
## [1] 5
numer <- 17.8
char  <- 'hello world'
logic <- TRUE

class(logic)
## [1] "logical"
card_length  <-  3
card_width   <- '5 inches'
card_width   <- 5
card_length * card_width
## [1] 15
RRP  <- 35.99
Exchange <- 31.74
NTD <- RRP * Exchange
NTD
## [1] 1142.323
#ntd

Vector

height_vec <- c(180,169,173)
name_vec   <- c('Brian', 'Toby', 'Sherry')
class(height_vec)
## [1] "numeric"
height2_vec <- c(180,169,173, '177')
class(height2_vec)
## [1] "character"
height2_vec
## [1] "180" "169" "173" "177"
x <- c(1,2,3,7)
y <- c(2,3,5,1)
x + y
## [1] 3 5 8 8
x - y
## [1] -1 -1 -2  6
x * y
## [1]  2  6 15  7
x / y
## [1] 0.5000000 0.6666667 0.6000000 7.0000000
x <- c(1,2,3,7) 
x + 5
## [1]  6  7  8 12
x + c(5)
## [1]  6  7  8 12
x + c(5,5,5,5)
## [1]  6  7  8 12
x + c(1,2)
## [1] 2 4 4 9
x + c(1,2,1,2)
## [1] 2 4 4 9
x + c(1,2,3)
## Warning in x + c(1, 2, 3): longer object length is not a multiple of
## shorter object length
## [1] 2 4 6 8
x + c(1,2,3,1)
## [1] 2 4 6 8
x <- 1:20
x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
y <- seq(1,20)
y
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
?seq
help(seq)
seq()
## [1] 1
seq(1,20,2)
##  [1]  1  3  5  7  9 11 13 15 17 19
seq(1,3.5,0.5)
## [1] 1.0 1.5 2.0 2.5 3.0 3.5
seq(1,20, length.out = 2)
## [1]  1 20
seq(1,20, length.out = 3)
## [1]  1.0 10.5 20.0
seq(1,20, len = 2)
## [1]  1 20
x <- c(1,2,3,5,7)
sum(x)
## [1] 18
?sum

sum(3,5,c(1,2))
## [1] 11
NA
## [1] NA
sum(3,5,NA)
## [1] NA
sum(3,5,NA, na.rm= TRUE)
## [1] 8
height_vec <- c(180, 169,173)
names(height_vec) <- c('Brian', 'Toby', 'Sherry')
height_vec
##  Brian   Toby Sherry 
##    180    169    173
names_vec <- c('Brian', 'Toby', 'Sherry')
names(height_vec) <- names_vec
height_vec
##  Brian   Toby Sherry 
##    180    169    173
height_vec >  175
##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE
height_vec <  175
##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE
height_vec >= 175
##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE
height_vec <= 175
##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE
height_vec == 180  
##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE
height_vec != 180
##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE
height_vec[c(TRUE, FALSE, FALSE)]
## Brian 
##   180
height_vec[c(1)]
## Brian 
##   180
height_vec[1]
## Brian 
##   180
height_vec[c(1,3)]
##  Brian Sherry 
##    180    173
height_vec[height_vec > 175]
## Brian 
##   180
height_vec[height_vec > 175 | height_vec < 170 ]
## Brian  Toby 
##   180   169
height_vec[height_vec > 170 & height_vec < 175 ]
## Sherry 
##    173
height_vec <- c(180,169,173)
weight_vec <- c( 73, 87,43 )
names_vec  <- c('Brian','Toby', 'Sherry')

bmi_vec <- weight_vec / (height_vec / 100) ^ 2
names(bmi_vec) <- names_vec 
bmi_vec
##    Brian     Toby   Sherry 
## 22.53086 30.46112 14.36734
bmi_vec < 18.5 | bmi_vec >= 24
##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE
bmi_vec[bmi_vec < 18.5 | bmi_vec >= 24]
##     Toby   Sherry 
## 30.46112 14.36734

Matrix

1:9
## [1] 1 2 3 4 5 6 7 8 9
?matrix
matrix(1:9, nrow= 3, byrow=TRUE)
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
a <- c(1,2,3)
c(a, c(4,5,6))
## [1] 1 2 3 4 5 6
kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)

c(kevin, marry, jerry)
## [1] 85 73 72 64 59 66
mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow = TRUE)

colnames(mat) <- c('first', 'second')
rownames(mat) <- c('kevin', 'marry', 'jerry')
mat
##       first second
## kevin    85     73
## marry    72     64
## jerry    59     66
mat2 <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE, dimnames = list(c('Kevin', 'Marry', 'Jerry'), c('first', 'second')))
mat2
##       first second
## Kevin    85     73
## Marry    72     64
## Jerry    59     66
dim(mat2)
## [1] 3 2
nrow(mat2)
## [1] 3
ncol(mat2)
## [1] 2
a <- c(1,2,3,4,5,6)
a[1]
## [1] 1
a[c(1,3)]
## [1] 1 3
a[1:3]
## [1] 1 2 3
mat2[1, ]
##  first second 
##     85     73
mat2[ ,1]
## Kevin Marry Jerry 
##    85    72    59
mat2[2:3,]
##       first second
## Marry    72     64
## Jerry    59     66
mat2[2,1]
## [1] 72
mat2
##       first second
## Kevin    85     73
## Marry    72     64
## Jerry    59     66
mat3 <- rbind(mat2, c(78, 63))
rownames(mat3)[4] <- 'sam'
nrow(mat3)
## [1] 4
mat3
##       first second
## Kevin    85     73
## Marry    72     64
## Jerry    59     66
## sam      78     63
mat4 <- cbind(mat2, c(82,77,70))
colnames(mat4)[3] <- 'third'
mat4
##       first second third
## Kevin    85     73    82
## Marry    72     64    77
## Jerry    59     66    70
rowSums(mat2)
## Kevin Marry Jerry 
##   158   136   125
colSums(mat2)
##  first second 
##    216    203
m1 <- matrix(1:4, byrow=TRUE, nrow = 2)
m2 <- matrix(5:8, byrow=TRUE, nrow = 2)
m1 + m2
##      [,1] [,2]
## [1,]    6    8
## [2,]   10   12
m1 - m2
##      [,1] [,2]
## [1,]   -4   -4
## [2,]   -4   -4
m1 * m2
##      [,1] [,2]
## [1,]    5   12
## [2,]   21   32
m1 / m2
##           [,1]      [,2]
## [1,] 0.2000000 0.3333333
## [2,] 0.4285714 0.5000000
m1 %*% m2
##      [,1] [,2]
## [1,]   19   22
## [2,]   43   50
m1 <-matrix(1:9, nrow = 9)
m2 <-matrix(1:9, nrow = 1)
m1 %*% m2
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
##  [1,]    1    2    3    4    5    6    7    8    9
##  [2,]    2    4    6    8   10   12   14   16   18
##  [3,]    3    6    9   12   15   18   21   24   27
##  [4,]    4    8   12   16   20   24   28   32   36
##  [5,]    5   10   15   20   25   30   35   40   45
##  [6,]    6   12   18   24   30   36   42   48   54
##  [7,]    7   14   21   28   35   42   49   56   63
##  [8,]    8   16   24   32   40   48   56   64   72
##  [9,]    9   18   27   36   45   54   63   72   81
kevin <-  c(85, 73)
marry <-  c(72, 64)
jerry <-  c(59, 66)
mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE)

colnames(mat) <- c('first', 'second')
rownames(mat) <- c('Kevin', 'Marry', 'Jerry')

# method 1
mat[  , 1 ] * 0.4 + mat[ , 2] * 0.6
## Kevin Marry Jerry 
##  77.8  67.2  63.2
# method 2
mat %*% c(0.4, 0.6)
##       [,1]
## Kevin 77.8
## Marry 67.2
## Jerry 63.2

Factor

weather <- c('sunny', 'rainy', 'cloudy', 'rainy', 'cloudy')

class(weather)
## [1] "character"
weather_category <- factor(weather)

levels(weather_category)
## [1] "cloudy" "rainy"  "sunny"
weather_category[1] > weather_category[3] 
## Warning in Ops.factor(weather_category[1], weather_category[3]): '>' not
## meaningful for factors
## [1] NA
temperature <- c('Low', 'High', 'High', 'Medium', 'Low', 'Medium') 
temperature_category <- factor(temperature, order = TRUE , levels = c('Low', 'Medium', 'High'))
temperature_category
## [1] Low    High   High   Medium Low    Medium
## Levels: Low < Medium < High
temperature_category[3] > temperature_category[1]
## [1] TRUE
temperature_category[4] > temperature_category[3]
## [1] FALSE
levels(temperature_category)
## [1] "Low"    "Medium" "High"
levels(temperature_category) <- c('L', 'M', 'H')
temperature_category
## [1] L H H M L M
## Levels: L < M < H

Data Frame


days <- c('mon','tue','wed','thu','fri')
temp <- c(22.2,21,23,24.3,25)
rain <- c(TRUE, TRUE, FALSE, FALSE, TRUE)

class(days)
class(temp)
class(rain)

df <- data.frame(days, rain, temp)
df

df2 <- data.frame('D' = days, 'R'=rain, 'T'=temp)
df2

class(df)
str(df)
summary(df)

data()
data(iris)
View(iris)
head(iris)
#?head
head(iris, 3)

# python
#df[1,:]

# R
iris[1,]
iris[1:3,1]
iris[1:3,'Sepal.Length'] 
head(iris[,1:2])

head(iris$Sepal.Length)

iris[c(1:5),c('Sepal.Length', 'Sepal.Width')]

iris$Species == 'setosa'
setosa.data <- iris[iris$Species == 'setosa', 1:5]
str(setosa.data)

head(iris[which(iris$Species == 'setosa'), ])


head(iris)

sort(iris$Sepal.Length, decreasing = TRUE)

rank <- order(iris$Sepal.Length, decreasing = TRUE)
head(iris[rank, ])


a <- c(180,169,173,182,177)
sort(a)
order(a)
a[order(a)]

TW2330 Analysis

tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)
?read.csv
head(tw2330)
##         Date      Open      High       Low     Close Adj.Close
## 1 2000-01-04 69.649002 69.649002 68.475197 69.649002 40.966045
## 2 2000-01-05 69.649002 71.214104 68.866302 71.214104 41.886593
## 3 2000-01-06 70.822899 71.214104 69.649002 69.649002 40.966045
## 4 2000-01-07 67.301300 68.475197 66.518600 67.692497 39.815269
## 5 2000-01-10 69.649002 70.431396 68.475197 70.040199 41.196129
## 6 2000-01-11 70.822899 71.605202 68.475197 68.866302 40.505665
##         Volume
## 1 200662321971
## 2 402466776297
## 3 197545701266
## 4 235270327441
## 5 276171665217
## 6 277769524211
str(tw2330)
## 'data.frame':    4532 obs. of  7 variables:
##  $ Date     : chr  "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
##  $ Open     : chr  "69.649002" "69.649002" "70.822899" "67.301300" ...
##  $ High     : chr  "69.649002" "71.214104" "71.214104" "68.475197" ...
##  $ Low      : chr  "68.475197" "68.866302" "69.649002" "66.518600" ...
##  $ Close    : chr  "69.649002" "71.214104" "69.649002" "67.692497" ...
##  $ Adj.Close: chr  "40.966045" "41.886593" "40.966045" "39.815269" ...
##  $ Volume   : chr  "200662321971" "402466776297" "197545701266" "235270327441" ...
tw2330$Date <- as.Date(tw2330$Date)
tw2330$Close <- as.numeric(tw2330$Close)
## Warning: NAs introduced by coercion
df <- tw2330[ tw2330$Date >= '2017-01-01' & tw2330$Date < '2018-01-01' ,  ]
min(df$Close, na.rm=TRUE)
## [1] 179.5
max(df$Close, na.rm=TRUE)
## [1] 244
mean(df$Close, na.rm=TRUE)
## [1] 210.1502
summary(df$Close, na.rm=TRUE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   179.5   191.5   212.5   210.2   222.0   244.0       2
df[order(df$Close), ][1:3,]
##            Date       Open       High        Low Close  Adj.Close   Volume
## 4296 2017-01-16 180.000000 180.500000 179.000000 179.5 173.709686 30756000
## 4299 2017-01-19 179.500000 181.000000 179.500000 180.5 174.677429 24627000
## 4297 2017-01-17 180.500000 181.000000 179.500000 181.0 175.161301 13159000
df[order(df$Close, decreasing =TRUE), ][1:3,]
##            Date       Open       High        Low Close  Adj.Close   Volume
## 4493 2017-11-07 242.000000 244.000000 241.500000   244 244.000000 20200261
## 4506 2017-11-24 242.000000 244.500000 241.500000   244 244.000000 11154453
## 4487 2017-10-30 242.000000 245.000000 241.500000   243 243.000000 27784990
str(df)
## 'data.frame':    245 obs. of  7 variables:
##  $ Date     : Date, format: "2017-01-03" "2017-01-04" ...
##  $ Open     : chr  "181.500000" "183.000000" "182.000000" "184.000000" ...
##  $ High     : chr  "183.500000" "184.000000" "183.500000" "184.500000" ...
##  $ Low      : chr  "181.000000" "181.500000" "181.500000" "183.500000" ...
##  $ Close    : num  183 183 184 184 184 ...
##  $ Adj.Close: chr  "177.096786" "177.096786" "177.580658" "178.064514" ...
##  $ Volume   : chr  "22630000" "24369000" "20979000" "22443000" ...
plot(df$Date, df$Close, type = 'l')

tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)
str(tw2330)
## 'data.frame':    4532 obs. of  7 variables:
##  $ Date     : chr  "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
##  $ Open     : chr  "69.649002" "69.649002" "70.822899" "67.301300" ...
##  $ High     : chr  "69.649002" "71.214104" "71.214104" "68.475197" ...
##  $ Low      : chr  "68.475197" "68.866302" "69.649002" "66.518600" ...
##  $ Close    : chr  "69.649002" "71.214104" "69.649002" "67.692497" ...
##  $ Adj.Close: chr  "40.966045" "41.886593" "40.966045" "39.815269" ...
##  $ Volume   : chr  "200662321971" "402466776297" "197545701266" "235270327441" ...
tw2330$Date <- as.Date(tw2330$Date)
tw2330$Close <- as.numeric(tw2330$Close)
## Warning: NAs introduced by coercion
df <- tw2330

summary(df)
##       Date                Open               High          
##  Min.   :2000-01-04   Length:4532        Length:4532       
##  1st Qu.:2004-05-06   Class :character   Class :character  
##  Median :2008-11-04   Mode  :character   Mode  :character  
##  Mean   :2008-11-23                                        
##  3rd Qu.:2013-06-03                                        
##  Max.   :2018-01-02                                        
##                                                            
##      Low                Close         Adj.Close            Volume         
##  Length:4532        Min.   : 27.46   Length:4532        Length:4532       
##  Class :character   1st Qu.: 50.69   Class :character   Class :character  
##  Mode  :character   Median : 63.13   Mode  :character   Mode  :character  
##                     Mean   : 81.56                                        
##                     3rd Qu.:100.50                                        
##                     Max.   :244.00                                        
##                     NA's   :176
plot(df$Date, df$Close, type = 'l')

List

item <- list(thing = 'hat', size = 8.25)
item
## $thing
## [1] "hat"
## 
## $size
## [1] 8.25
test <- list(name = 'Toby', score = c(87, 57, 72))
test$score
## [1] 87 57 72
test$score[2]
## [1] 57
li <- list(c(3,5,12), c(2,4,5, 8, 10))
li[[1]]
## [1]  3  5 12
li[[2]]
## [1]  2  4  5  8 10
lapply(li, sum)
## [[1]]
## [1] 20
## 
## [[2]]
## [1] 29
lapply(li, mean)
## [[1]]
## [1] 6.666667
## 
## [[2]]
## [1] 5.8
lapply(li, max)
## [[1]]
## [1] 12
## 
## [[2]]
## [1] 10
lapply(li, function(e) e[1])
## [[1]]
## [1] 3
## 
## [[2]]
## [1] 2
sapply(li, function(e) e[1])
## [1] 3 2

Read and Write Data

match<- read.table('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/match.txt', sep = '|')

tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)

class(tw2330)
## [1] "data.frame"
str(tw2330)
## 'data.frame':    4532 obs. of  7 variables:
##  $ Date     : chr  "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
##  $ Open     : chr  "69.649002" "69.649002" "70.822899" "67.301300" ...
##  $ High     : chr  "69.649002" "71.214104" "71.214104" "68.475197" ...
##  $ Low      : chr  "68.475197" "68.866302" "69.649002" "66.518600" ...
##  $ Close    : chr  "69.649002" "71.214104" "69.649002" "67.692497" ...
##  $ Adj.Close: chr  "40.966045" "41.886593" "40.966045" "39.815269" ...
##  $ Volume   : chr  "200662321971" "402466776297" "197545701266" "235270327441" ...
write.table(tw2330, file= '2330.test.tab',sep = '\t')
write.csv(tw2330, file= '2330.test.csv')

library(readr)
## Warning: package 'readr' was built under R version 3.2.5
match <- read_delim("https://raw.githubusercontent.com/ywchiu/fubonr/master/data/match.txt", 
    "|", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )
#
#View(match)

Flow Control

a <- 2
if (a > 3){
  print('a > 3')
}else{
  print('a <= 3')
}
## [1] "a <= 3"
a <- 2
if (a > 3){
  print('a > 3')
}else if(a == 3){
  print('a = 3')
}else{
  print('a < 3')
}
## [1] "a < 3"
for(i in 1:10){
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
# for loop method
s <- 0 
for (i in 1:100){
  s = s + i
}
s
## [1] 5050
# built in function
sum(1:100)
## [1] 5050
x <- c('sunny','rainy','cloudy','rainy', 'cloudy')
for (i in 1:length(x)){
  print(x[i])
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
for( i in seq_along(x)){
  print(x[i])
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
for (letter in x){
  print(letter)
}
## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"
s   <- 0
cnt <- 0
while(cnt <= 100){
  s   <-  s + cnt
  cnt <- cnt + 1
}
s
## [1] 5050
url <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
for (i in seq(1,10)){
  #print(i)
  #?paste
  #print(paste0(url, i))
  print(paste(url, i, sep = ''))
}
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/1"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/2"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/3"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/4"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/5"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/6"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/7"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/8"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/9"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/10"

Function

f <- function(a = 2, b = 3){
  a + b
}

f()
## [1] 5
f(a = 6, b = 4)
## [1] 10
f(6,4)
## [1] 10
?head

f2 <- function(a = 2, b = 3){
  return(a + b)
}

f2(7,9)
## [1] 16
f <- function(a, b){
  a * 2
}
f(3)
## [1] 6
f <- function(a, b){
  a + b
}
#f(3)

url <- 'https://raw.githubusercontent.com/ywchiu/fuboni/master/data/cnn.txt'

WordCount <- function(url){
  f <- file(url)
  news <- readLines(f)
  close(f)
  tb <- table(unlist(strsplit(news, ' ')))
  swd <- sort(tb, decreasing = TRUE)
  as.table(swd)
}

#install.packages('wordcloud2')
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 3.2.5
wc <- WordCount(url)
wordcloud2(wc, shape = 'pentagon')