Demo20180102

R Basic

data(anscombe)
plot(y1 ~ x1, data = anscombe)

fit <- lm(y1 ~ x1, data = anscombe)


a <- 3
b <- 2
a + b

## [1] 5

Numerics

3 + 8

## [1] 11

3 - 8

## [1] -5

3 * 8

## [1] 24

11 / 2

## [1] 5.5

2 ^ 10

## [1] 1024

11 %% 2

## [1] 1

a <- 3
b =  2
a +  b

## [1] 5

numer <- 17.8
char  <- 'hello world'
logic <- TRUE

class(logic)

## [1] "logical"

card_length  <-  3
card_width   <- '5 inches'
card_width   <- 5
card_length * card_width

## [1] 15

RRP  <- 35.99
Exchange <- 31.74
NTD <- RRP * Exchange
NTD

## [1] 1142.323

#ntd

Vector

height_vec <- c(180,169,173)
name_vec   <- c('Brian', 'Toby', 'Sherry')
class(height_vec)

## [1] "numeric"

height2_vec <- c(180,169,173, '177')
class(height2_vec)

## [1] "character"

height2_vec

## [1] "180" "169" "173" "177"

x <- c(1,2,3,7)
y <- c(2,3,5,1)
x + y

## [1] 3 5 8 8

x - y

## [1] -1 -1 -2  6

x * y

## [1]  2  6 15  7

x / y

## [1] 0.5000000 0.6666667 0.6000000 7.0000000

x <- c(1,2,3,7) 
x + 5

## [1]  6  7  8 12

x + c(5)

## [1]  6  7  8 12

x + c(5,5,5,5)

## [1]  6  7  8 12

x + c(1,2)

## [1] 2 4 4 9

x + c(1,2,1,2)

## [1] 2 4 4 9

x + c(1,2,3)

## Warning in x + c(1, 2, 3): longer object length is not a multiple of
## shorter object length

## [1] 2 4 6 8

x + c(1,2,3,1)

## [1] 2 4 6 8

x <- 1:20
x

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

y <- seq(1,20)
y

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

?seq
help(seq)
seq()

## [1] 1

seq(1,20,2)

##  [1]  1  3  5  7  9 11 13 15 17 19

seq(1,3.5,0.5)

## [1] 1.0 1.5 2.0 2.5 3.0 3.5

seq(1,20, length.out = 2)

## [1]  1 20

seq(1,20, length.out = 3)

## [1]  1.0 10.5 20.0

seq(1,20, len = 2)

## [1]  1 20

x <- c(1,2,3,5,7)
sum(x)

## [1] 18

?sum

sum(3,5,c(1,2))

## [1] 11

NA

## [1] NA

sum(3,5,NA)

## [1] NA

sum(3,5,NA, na.rm= TRUE)

## [1] 8

height_vec <- c(180, 169,173)
names(height_vec) <- c('Brian', 'Toby', 'Sherry')
height_vec

##  Brian   Toby Sherry 
##    180    169    173

names_vec <- c('Brian', 'Toby', 'Sherry')
names(height_vec) <- names_vec
height_vec

##  Brian   Toby Sherry 
##    180    169    173

height_vec >  175

##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE

height_vec <  175

##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE

height_vec >= 175

##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE

height_vec <= 175

##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE

height_vec == 180

##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE

height_vec != 180

##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE

height_vec[c(TRUE, FALSE, FALSE)]

## Brian 
##   180

height_vec[c(1)]

## Brian 
##   180

height_vec[1]

## Brian 
##   180

height_vec[c(1,3)]

##  Brian Sherry 
##    180    173

height_vec[height_vec > 175]

## Brian 
##   180

height_vec[height_vec > 175 | height_vec < 170 ]

## Brian  Toby 
##   180   169

height_vec[height_vec > 170 & height_vec < 175 ]

## Sherry 
##    173

height_vec <- c(180,169,173)
weight_vec <- c( 73, 87,43 )
names_vec  <- c('Brian','Toby', 'Sherry')

bmi_vec <- weight_vec / (height_vec / 100) ^ 2
names(bmi_vec) <- names_vec 
bmi_vec

##    Brian     Toby   Sherry 
## 22.53086 30.46112 14.36734

bmi_vec < 18.5 | bmi_vec >= 24

##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE

bmi_vec[bmi_vec < 18.5 | bmi_vec >= 24]

##     Toby   Sherry 
## 30.46112 14.36734

Matrix

1:9

## [1] 1 2 3 4 5 6 7 8 9

?matrix
matrix(1:9, nrow= 3, byrow=TRUE)

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9

a <- c(1,2,3)
c(a, c(4,5,6))

## [1] 1 2 3 4 5 6

kevin <- c(85, 73)
marry <- c(72, 64)
jerry <- c(59, 66)

c(kevin, marry, jerry)

## [1] 85 73 72 64 59 66

mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow = TRUE)

colnames(mat) <- c('first', 'second')
rownames(mat) <- c('kevin', 'marry', 'jerry')
mat

##       first second
## kevin    85     73
## marry    72     64
## jerry    59     66

mat2 <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE, dimnames = list(c('Kevin', 'Marry', 'Jerry'), c('first', 'second')))
mat2

##       first second
## Kevin    85     73
## Marry    72     64
## Jerry    59     66

dim(mat2)

## [1] 3 2

nrow(mat2)

## [1] 3

ncol(mat2)

## [1] 2

a <- c(1,2,3,4,5,6)
a[1]

## [1] 1

a[c(1,3)]

## [1] 1 3

a[1:3]

## [1] 1 2 3

mat2[1, ]

##  first second 
##     85     73

mat2[ ,1]

## Kevin Marry Jerry 
##    85    72    59

mat2[2:3,]

##       first second
## Marry    72     64
## Jerry    59     66

mat2[2,1]

## [1] 72

mat2

##       first second
## Kevin    85     73
## Marry    72     64
## Jerry    59     66

mat3 <- rbind(mat2, c(78, 63))
rownames(mat3)[4] <- 'sam'
nrow(mat3)

## [1] 4

mat3

##       first second
## Kevin    85     73
## Marry    72     64
## Jerry    59     66
## sam      78     63

mat4 <- cbind(mat2, c(82,77,70))
colnames(mat4)[3] <- 'third'
mat4

##       first second third
## Kevin    85     73    82
## Marry    72     64    77
## Jerry    59     66    70

rowSums(mat2)

## Kevin Marry Jerry 
##   158   136   125

colSums(mat2)

##  first second 
##    216    203

m1 <- matrix(1:4, byrow=TRUE, nrow = 2)
m2 <- matrix(5:8, byrow=TRUE, nrow = 2)
m1 + m2

##      [,1] [,2]
## [1,]    6    8
## [2,]   10   12

m1 - m2

##      [,1] [,2]
## [1,]   -4   -4
## [2,]   -4   -4

m1 * m2

##      [,1] [,2]
## [1,]    5   12
## [2,]   21   32

m1 / m2

##           [,1]      [,2]
## [1,] 0.2000000 0.3333333
## [2,] 0.4285714 0.5000000

m1 %*% m2

##      [,1] [,2]
## [1,]   19   22
## [2,]   43   50

m1 <-matrix(1:9, nrow = 9)
m2 <-matrix(1:9, nrow = 1)
m1 %*% m2

##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
##  [1,]    1    2    3    4    5    6    7    8    9
##  [2,]    2    4    6    8   10   12   14   16   18
##  [3,]    3    6    9   12   15   18   21   24   27
##  [4,]    4    8   12   16   20   24   28   32   36
##  [5,]    5   10   15   20   25   30   35   40   45
##  [6,]    6   12   18   24   30   36   42   48   54
##  [7,]    7   14   21   28   35   42   49   56   63
##  [8,]    8   16   24   32   40   48   56   64   72
##  [9,]    9   18   27   36   45   54   63   72   81

kevin <-  c(85, 73)
marry <-  c(72, 64)
jerry <-  c(59, 66)
mat <- matrix(c(kevin, marry, jerry), nrow = 3, byrow=TRUE)

colnames(mat) <- c('first', 'second')
rownames(mat) <- c('Kevin', 'Marry', 'Jerry')

# method 1
mat[  , 1 ] * 0.4 + mat[ , 2] * 0.6

## Kevin Marry Jerry 
##  77.8  67.2  63.2

# method 2
mat %*% c(0.4, 0.6)

##       [,1]
## Kevin 77.8
## Marry 67.2
## Jerry 63.2

Factor

weather <- c('sunny', 'rainy', 'cloudy', 'rainy', 'cloudy')

class(weather)

## [1] "character"

weather_category <- factor(weather)

levels(weather_category)

## [1] "cloudy" "rainy"  "sunny"

weather_category[1] > weather_category[3]

## Warning in Ops.factor(weather_category[1], weather_category[3]): '>' not
## meaningful for factors

## [1] NA

temperature <- c('Low', 'High', 'High', 'Medium', 'Low', 'Medium') 
temperature_category <- factor(temperature, order = TRUE , levels = c('Low', 'Medium', 'High'))
temperature_category

## [1] Low    High   High   Medium Low    Medium
## Levels: Low < Medium < High

temperature_category[3] > temperature_category[1]

## [1] TRUE

temperature_category[4] > temperature_category[3]

## [1] FALSE

levels(temperature_category)

## [1] "Low"    "Medium" "High"

levels(temperature_category) <- c('L', 'M', 'H')
temperature_category

## [1] L H H M L M
## Levels: L < M < H

Data Frame


days <- c('mon','tue','wed','thu','fri')
temp <- c(22.2,21,23,24.3,25)
rain <- c(TRUE, TRUE, FALSE, FALSE, TRUE)

class(days)
class(temp)
class(rain)

df <- data.frame(days, rain, temp)
df

df2 <- data.frame('D' = days, 'R'=rain, 'T'=temp)
df2

class(df)
str(df)
summary(df)

data()
data(iris)
View(iris)
head(iris)
#?head
head(iris, 3)

# python
#df[1,:]

# R
iris[1,]
iris[1:3,1]
iris[1:3,'Sepal.Length'] 
head(iris[,1:2])

head(iris$Sepal.Length)

iris[c(1:5),c('Sepal.Length', 'Sepal.Width')]

iris$Species == 'setosa'
setosa.data <- iris[iris$Species == 'setosa', 1:5]
str(setosa.data)

head(iris[which(iris$Species == 'setosa'), ])


head(iris)

sort(iris$Sepal.Length, decreasing = TRUE)

rank <- order(iris$Sepal.Length, decreasing = TRUE)
head(iris[rank, ])


a <- c(180,169,173,182,177)
sort(a)
order(a)
a[order(a)]

TW2330 Analysis

tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)
?read.csv
head(tw2330)

##         Date      Open      High       Low     Close Adj.Close
## 1 2000-01-04 69.649002 69.649002 68.475197 69.649002 40.966045
## 2 2000-01-05 69.649002 71.214104 68.866302 71.214104 41.886593
## 3 2000-01-06 70.822899 71.214104 69.649002 69.649002 40.966045
## 4 2000-01-07 67.301300 68.475197 66.518600 67.692497 39.815269
## 5 2000-01-10 69.649002 70.431396 68.475197 70.040199 41.196129
## 6 2000-01-11 70.822899 71.605202 68.475197 68.866302 40.505665
##         Volume
## 1 200662321971
## 2 402466776297
## 3 197545701266
## 4 235270327441
## 5 276171665217
## 6 277769524211

str(tw2330)

## 'data.frame':    4532 obs. of  7 variables:
##  $ Date     : chr  "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
##  $ Open     : chr  "69.649002" "69.649002" "70.822899" "67.301300" ...
##  $ High     : chr  "69.649002" "71.214104" "71.214104" "68.475197" ...
##  $ Low      : chr  "68.475197" "68.866302" "69.649002" "66.518600" ...
##  $ Close    : chr  "69.649002" "71.214104" "69.649002" "67.692497" ...
##  $ Adj.Close: chr  "40.966045" "41.886593" "40.966045" "39.815269" ...
##  $ Volume   : chr  "200662321971" "402466776297" "197545701266" "235270327441" ...

tw2330$Date <- as.Date(tw2330$Date)
tw2330$Close <- as.numeric(tw2330$Close)

## Warning: NAs introduced by coercion

df <- tw2330[ tw2330$Date >= '2017-01-01' & tw2330$Date < '2018-01-01' ,  ]
min(df$Close, na.rm=TRUE)

## [1] 179.5

max(df$Close, na.rm=TRUE)

## [1] 244

mean(df$Close, na.rm=TRUE)

## [1] 210.1502

summary(df$Close, na.rm=TRUE)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   179.5   191.5   212.5   210.2   222.0   244.0       2

df[order(df$Close), ][1:3,]

##            Date       Open       High        Low Close  Adj.Close   Volume
## 4296 2017-01-16 180.000000 180.500000 179.000000 179.5 173.709686 30756000
## 4299 2017-01-19 179.500000 181.000000 179.500000 180.5 174.677429 24627000
## 4297 2017-01-17 180.500000 181.000000 179.500000 181.0 175.161301 13159000

df[order(df$Close, decreasing =TRUE), ][1:3,]

##            Date       Open       High        Low Close  Adj.Close   Volume
## 4493 2017-11-07 242.000000 244.000000 241.500000   244 244.000000 20200261
## 4506 2017-11-24 242.000000 244.500000 241.500000   244 244.000000 11154453
## 4487 2017-10-30 242.000000 245.000000 241.500000   243 243.000000 27784990

str(df)

## 'data.frame':    245 obs. of  7 variables:
##  $ Date     : Date, format: "2017-01-03" "2017-01-04" ...
##  $ Open     : chr  "181.500000" "183.000000" "182.000000" "184.000000" ...
##  $ High     : chr  "183.500000" "184.000000" "183.500000" "184.500000" ...
##  $ Low      : chr  "181.000000" "181.500000" "181.500000" "183.500000" ...
##  $ Close    : num  183 183 184 184 184 ...
##  $ Adj.Close: chr  "177.096786" "177.096786" "177.580658" "178.064514" ...
##  $ Volume   : chr  "22630000" "24369000" "20979000" "22443000" ...

plot(df$Date, df$Close, type = 'l')

tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)
str(tw2330)

## 'data.frame':    4532 obs. of  7 variables:
##  $ Date     : chr  "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
##  $ Open     : chr  "69.649002" "69.649002" "70.822899" "67.301300" ...
##  $ High     : chr  "69.649002" "71.214104" "71.214104" "68.475197" ...
##  $ Low      : chr  "68.475197" "68.866302" "69.649002" "66.518600" ...
##  $ Close    : chr  "69.649002" "71.214104" "69.649002" "67.692497" ...
##  $ Adj.Close: chr  "40.966045" "41.886593" "40.966045" "39.815269" ...
##  $ Volume   : chr  "200662321971" "402466776297" "197545701266" "235270327441" ...

tw2330$Date <- as.Date(tw2330$Date)
tw2330$Close <- as.numeric(tw2330$Close)

## Warning: NAs introduced by coercion

df <- tw2330

summary(df)

##       Date                Open               High          
##  Min.   :2000-01-04   Length:4532        Length:4532       
##  1st Qu.:2004-05-06   Class :character   Class :character  
##  Median :2008-11-04   Mode  :character   Mode  :character  
##  Mean   :2008-11-23                                        
##  3rd Qu.:2013-06-03                                        
##  Max.   :2018-01-02                                        
##                                                            
##      Low                Close         Adj.Close            Volume         
##  Length:4532        Min.   : 27.46   Length:4532        Length:4532       
##  Class :character   1st Qu.: 50.69   Class :character   Class :character  
##  Mode  :character   Median : 63.13   Mode  :character   Mode  :character  
##                     Mean   : 81.56                                        
##                     3rd Qu.:100.50                                        
##                     Max.   :244.00                                        
##                     NA's   :176

plot(df$Date, df$Close, type = 'l')

List

item <- list(thing = 'hat', size = 8.25)
item

## $thing
## [1] "hat"
## 
## $size
## [1] 8.25

test <- list(name = 'Toby', score = c(87, 57, 72))
test$score

## [1] 87 57 72

test$score[2]

## [1] 57

li <- list(c(3,5,12), c(2,4,5, 8, 10))
li[[1]]

## [1]  3  5 12

li[[2]]

## [1]  2  4  5  8 10

lapply(li, sum)

## [[1]]
## [1] 20
## 
## [[2]]
## [1] 29

lapply(li, mean)

## [[1]]
## [1] 6.666667
## 
## [[2]]
## [1] 5.8

lapply(li, max)

## [[1]]
## [1] 12
## 
## [[2]]
## [1] 10

lapply(li, function(e) e[1])

## [[1]]
## [1] 3
## 
## [[2]]
## [1] 2

sapply(li, function(e) e[1])

## [1] 3 2

Read and Write Data

match<- read.table('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/match.txt', sep = '|')

tw2330 <- read.csv('https://raw.githubusercontent.com/ywchiu/fuboni/master/data/2330.TW.csv',stringsAsFactors =FALSE)

class(tw2330)

## [1] "data.frame"

str(tw2330)

## 'data.frame':    4532 obs. of  7 variables:
##  $ Date     : chr  "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
##  $ Open     : chr  "69.649002" "69.649002" "70.822899" "67.301300" ...
##  $ High     : chr  "69.649002" "71.214104" "71.214104" "68.475197" ...
##  $ Low      : chr  "68.475197" "68.866302" "69.649002" "66.518600" ...
##  $ Close    : chr  "69.649002" "71.214104" "69.649002" "67.692497" ...
##  $ Adj.Close: chr  "40.966045" "41.886593" "40.966045" "39.815269" ...
##  $ Volume   : chr  "200662321971" "402466776297" "197545701266" "235270327441" ...

write.table(tw2330, file= '2330.test.tab',sep = '\t')
write.csv(tw2330, file= '2330.test.csv')

library(readr)

## Warning: package 'readr' was built under R version 3.2.5

match <- read_delim("https://raw.githubusercontent.com/ywchiu/fubonr/master/data/match.txt", 
    "|", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)

## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_integer()
## )

#
#View(match)

Flow Control

a <- 2
if (a > 3){
  print('a > 3')
}else{
  print('a <= 3')
}

## [1] "a <= 3"

a <- 2
if (a > 3){
  print('a > 3')
}else if(a == 3){
  print('a = 3')
}else{
  print('a < 3')
}

## [1] "a < 3"

for(i in 1:10){
  print(i)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

# for loop method
s <- 0 
for (i in 1:100){
  s = s + i
}
s

## [1] 5050

# built in function
sum(1:100)

## [1] 5050

x <- c('sunny','rainy','cloudy','rainy', 'cloudy')
for (i in 1:length(x)){
  print(x[i])
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

for( i in seq_along(x)){
  print(x[i])
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

for (letter in x){
  print(letter)
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

s   <- 0
cnt <- 0
while(cnt <= 100){
  s   <-  s + cnt
  cnt <- cnt + 1
}
s

## [1] 5050

url <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
for (i in seq(1,10)){
  #print(i)
  #?paste
  #print(paste0(url, i))
  print(paste(url, i, sep = ''))
}

## [1] "http://www.appledaily.com.tw/realtimenews/section/new/1"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/2"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/3"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/4"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/5"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/6"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/7"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/8"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/9"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/10"

Function

f <- function(a = 2, b = 3){
  a + b
}

f()

## [1] 5

f(a = 6, b = 4)

## [1] 10

f(6,4)

## [1] 10

?head

f2 <- function(a = 2, b = 3){
  return(a + b)
}

f2(7,9)

## [1] 16

f <- function(a, b){
  a * 2
}
f(3)

## [1] 6

f <- function(a, b){
  a + b
}
#f(3)

url <- 'https://raw.githubusercontent.com/ywchiu/fuboni/master/data/cnn.txt'

WordCount <- function(url){
  f <- file(url)
  news <- readLines(f)
  close(f)
  tb <- table(unlist(strsplit(news, ' ')))
  swd <- sort(tb, decreasing = TRUE)
  as.table(swd)
}

#install.packages('wordcloud2')
library(wordcloud2)

## Warning: package 'wordcloud2' was built under R version 3.2.5

wc <- WordCount(url)
wordcloud2(wc, shape = 'pentagon')

Demo20180102

David Chiu

1/2/2018

R Basic

Numerics

Vector

Matrix

Factor

Data Frame

TW2330 Analysis

List

Read and Write Data

Flow Control

Function