p54 example

kevin = c(85,73)
marry = c(72,64)
jerry = c(59,66)
mat = matrix(c(kevin, marry, jerry), nrow=3, byrow= TRUE)
colnames(mat) = c('first', 'second')
rownames(mat) = c('kevin', 'marry', 'jerry')

final = mat %*% c(0.4,0.6)
final
##       [,1]
## kevin 77.8
## marry 67.2
## jerry 63.2
cbind(mat,final)
##       first second     
## kevin    85     73 77.8
## marry    72     64 67.2
## jerry    59     66 63.2
mat2 = cbind(mat,final)
mat2
##       first second     
## kevin    85     73 77.8
## marry    72     64 67.2
## jerry    59     66 63.2
colnames(mat2)[ncol(mat2)] = 'final'
mat2
##       first second final
## kevin    85     73  77.8
## marry    72     64  67.2
## jerry    59     66  63.2

Dataframe

name <- c("Joe", "Bob", "Vicky")
age <- c(28, 26, 34)
gender <- c("Male","Male","Female")
df <- data.frame(name, age, gender)
class(df)
## [1] "data.frame"
str(df)
## 'data.frame':    3 obs. of  3 variables:
##  $ name  : Factor w/ 3 levels "Bob","Joe","Vicky": 2 1 3
##  $ age   : num  28 26 34
##  $ gender: Factor w/ 2 levels "Female","Male": 2 2 1
summary(df)
##     name        age           gender 
##  Bob  :1   Min.   :26.00   Female:1  
##  Joe  :1   1st Qu.:27.00   Male  :2  
##  Vicky:1   Median :28.00             
##            Mean   :29.33             
##            3rd Qu.:31.00             
##            Max.   :34.00
data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
tail(iris)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
tail(iris, 10)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 141          6.7         3.1          5.6         2.4 virginica
## 142          6.9         3.1          5.1         2.3 virginica
## 143          5.8         2.7          5.1         1.9 virginica
## 144          6.8         3.2          5.9         2.3 virginica
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#取前三列資料
iris[1:3,]
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
#取前三列第一行的資料
iris[1:3,1]
## [1] 5.1 4.9 4.7
#取前三列Sepal.Length欄位的資料
iris[1:3,"Sepal.Length"]
## [1] 5.1 4.9 4.7
head(iris[,1:2])
##   Sepal.Length Sepal.Width
## 1          5.1         3.5
## 2          4.9         3.0
## 3          4.7         3.2
## 4          4.6         3.1
## 5          5.0         3.6
## 6          5.4         3.9
iris$"Sepal.Length"[1:3]
## [1] 5.1 4.9 4.7
#取前五筆包含length 及 width 的資料
Five.Sepal.iris = iris[1:5, c("Sepal.Length","Sepal.Width")]
#可以用條件做篩選
setosa.data = iris[iris$Species=="setosa",1:5]
str(setosa.data)
## 'data.frame':    50 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#使用which 做資料篩選
which(iris$Species=="setosa")
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50
#用order做資料排序
iris[order(iris$Sepal.Length, decreasing = TRUE),]
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 132          7.9         3.8          6.4         2.0  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 51           7.0         3.2          4.7         1.4 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 121          6.9         3.2          5.7         2.3  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 77           6.8         2.8          4.8         1.4 versicolor
## 113          6.8         3.0          5.5         2.1  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 66           6.7         3.1          4.4         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 109          6.7         2.5          5.8         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 59           6.6         2.9          4.6         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 105          6.5         3.0          5.8         2.2  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 52           6.4         3.2          4.5         1.5 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 112          6.4         2.7          5.3         1.9  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 57           6.3         3.3          4.7         1.6 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 69           6.2         2.2          4.5         1.5 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 127          6.2         2.8          4.8         1.8  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 64           6.1         2.9          4.7         1.4 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 128          6.1         3.0          4.9         1.8  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 63           6.0         2.2          4.0         1.0 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 120          6.0         2.2          5.0         1.5  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 62           5.9         3.0          4.2         1.5 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 150          5.9         3.0          5.1         1.8  virginica
## 15           5.8         4.0          1.2         0.2     setosa
## 68           5.8         2.7          4.1         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 102          5.8         2.7          5.1         1.9  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 16           5.7         4.4          1.5         0.4     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 56           5.7         2.8          4.5         1.3 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 114          5.7         2.5          5.0         2.0  virginica
## 65           5.6         2.9          3.6         1.3 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 122          5.6         2.8          4.9         2.0  virginica
## 34           5.5         4.2          1.4         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 54           5.5         2.3          4.0         1.3 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 6            5.4         3.9          1.7         0.4     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 85           5.4         3.0          4.5         1.5 versicolor
## 49           5.3         3.7          1.5         0.2     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 60           5.2         2.7          3.9         1.4 versicolor
## 1            5.1         3.5          1.4         0.2     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 99           5.1         2.5          3.0         1.1 versicolor
## 5            5.0         3.6          1.4         0.2     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 61           5.0         2.0          3.5         1.0 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 2            4.9         3.0          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 58           4.9         2.4          3.3         1.0 versicolor
## 107          4.9         2.5          4.5         1.7  virginica
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 14           4.3         3.0          1.1         0.1     setosa
sort(iris$Sepal.Length, decreasing = TRUE)
##   [1] 7.9 7.7 7.7 7.7 7.7 7.6 7.4 7.3 7.2 7.2 7.2 7.1 7.0 6.9 6.9 6.9 6.9
##  [18] 6.8 6.8 6.8 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.6 6.6 6.5 6.5 6.5 6.5
##  [35] 6.5 6.4 6.4 6.4 6.4 6.4 6.4 6.4 6.3 6.3 6.3 6.3 6.3 6.3 6.3 6.3 6.3
##  [52] 6.2 6.2 6.2 6.2 6.1 6.1 6.1 6.1 6.1 6.1 6.0 6.0 6.0 6.0 6.0 6.0 5.9
##  [69] 5.9 5.9 5.8 5.8 5.8 5.8 5.8 5.8 5.8 5.7 5.7 5.7 5.7 5.7 5.7 5.7 5.7
##  [86] 5.6 5.6 5.6 5.6 5.6 5.6 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.4 5.4 5.4 5.4
## [103] 5.4 5.4 5.3 5.2 5.2 5.2 5.2 5.1 5.1 5.1 5.1 5.1 5.1 5.1 5.1 5.1 5.0
## [120] 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 4.9 4.9 4.9 4.9 4.9 4.9 4.8 4.8
## [137] 4.8 4.8 4.8 4.7 4.7 4.6 4.6 4.6 4.6 4.5 4.4 4.4 4.4 4.3

File read and write

# getwd()
setwd('~/lecture/riii')
# setwd("__your_working_directory_path__")
tw2330 = read.csv("data/2330.csv", header=TRUE)
#tw2330 = read.csv('https://github.com/YuHsuanLin/riii/raw/master/data/2330.csv')
test.data = read.table("data/match.txt" ,header = FALSE, sep="|")

p70

setwd('~/lecture/riii')
tw2330 = read.csv("./data/2330.csv", header=TRUE)
str(tw2330)
## 'data.frame':    1801 obs. of  6 variables:
##  $ Date  : Factor w/ 1801 levels "2011-01-03","2011-01-04",..: 1801 1800 1799 1798 1797 1796 1795 1794 1793 1792 ...
##  $ Open  : num  224 225 225 226 225 ...
##  $ High  : num  228 226 226 226 228 ...
##  $ Low   : num  222 221 221 224 225 ...
##  $ Close : num  227 224 222 225 227 ...
##  $ Volume: int  6448117 7619247 10731921 10535437 9272078 16080436 29507056 7758149 10130508 10232257 ...
tw2330$Date = as.Date(tw2330$Date)

tw2330_2017 = tw2330[(tw2330$Date >= '2017-01-01' & tw2330$Date < '2018-01-01'),]
max(tw2330_2017$Close)
## [1] 244
ordered_stock = tw2330_2017[order(tw2330_2017$Close, decreasing = T),]

List

item= list(thing='hat',size=8.25)
item$thing
## [1] "hat"
item$size
## [1] 8.25
flower= list(title="iris dataset", data= iris)
class(flower)
## [1] "list"
class(flower$data)
## [1] "data.frame"
flower$data[1,"Sepal.Width"]
## [1] 3.5
li = list(c(1,2,3,4),c(5,6,7,8))
li[[1]]
## [1] 1 2 3 4

two dataframes join

#merge進行資料合併

df1 = data.frame(CustomerId = c(1:6), Product = c(rep("Toaster", 3), rep("Radio", 3)))
df2 = data.frame(CustomerId = c(2, 4, 6), State = c(rep("Alabama", 2), rep("Ohio", 1)))

#Inner join:
merge(x = df1, y= df2, by="CustomerId")
##   CustomerId Product   State
## 1          2 Toaster Alabama
## 2          4   Radio Alabama
## 3          6   Radio    Ohio
#Outer join: 
merge(x = df1, y = df2, by = "CustomerId", all = TRUE)
##   CustomerId Product   State
## 1          1 Toaster    <NA>
## 2          2 Toaster Alabama
## 3          3 Toaster    <NA>
## 4          4   Radio Alabama
## 5          5   Radio    <NA>
## 6          6   Radio    Ohio
#Left outer: 
merge(x = df1, y = df2, by = "CustomerId", all.x = TRUE)
##   CustomerId Product   State
## 1          1 Toaster    <NA>
## 2          2 Toaster Alabama
## 3          3 Toaster    <NA>
## 4          4   Radio Alabama
## 5          5   Radio    <NA>
## 6          6   Radio    Ohio
#Right outer: 
merge(x = df1, y = df2, by = "CustomerId", all.y = TRUE)
##   CustomerId Product   State
## 1          2 Toaster Alabama
## 2          4   Radio Alabama
## 3          6   Radio    Ohio
#Cross join: 
merge(x = df1, y = df2, by = NULL)
##    CustomerId.x Product CustomerId.y   State
## 1             1 Toaster            2 Alabama
## 2             2 Toaster            2 Alabama
## 3             3 Toaster            2 Alabama
## 4             4   Radio            2 Alabama
## 5             5   Radio            2 Alabama
## 6             6   Radio            2 Alabama
## 7             1 Toaster            4 Alabama
## 8             2 Toaster            4 Alabama
## 9             3 Toaster            4 Alabama
## 10            4   Radio            4 Alabama
## 11            5   Radio            4 Alabama
## 12            6   Radio            4 Alabama
## 13            1 Toaster            6    Ohio
## 14            2 Toaster            6    Ohio
## 15            3 Toaster            6    Ohio
## 16            4   Radio            6    Ohio
## 17            5   Radio            6    Ohio
## 18            6   Radio            6    Ohio

readr package

#install.packages("tidyverse")
#install.packages("readr")
library('tidyverse')
## ─ Attaching packages ─────────────────── tidyverse 1.2.1 ─
## ✔ ggplot2 3.1.0       ✔ purrr   0.2.5  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.1       ✔ stringr 1.3.1  
## ✔ readr   1.3.1       ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## ─ Conflicts ──────────────────── tidyverse_conflicts() ─
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
setwd('~/lecture/riii')
stock = read_csv('data/2330.csv',col_names=T)
## Parsed with column specification:
## cols(
##   Date = col_date(format = ""),
##   Open = col_double(),
##   High = col_double(),
##   Low = col_double(),
##   Close = col_double(),
##   Volume = col_double()
## )
read_csv('data/2330.csv',col_names=T,col_types = cols(
  Date = col_date(format = ""),
  Open = col_double(),
  High = col_double(),
  Low = col_double(),
  Close = col_double(),
  Volume = col_double()
))
## # A tibble: 1,801 x 6
##    Date        Open  High   Low Close   Volume
##    <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>
##  1 2018-04-30  224.  228   222.  227   6448117
##  2 2018-04-27  225   226.  221   224.  7619247
##  3 2018-04-26  225   226.  221   222  10731921
##  4 2018-04-25  226.  226   224   225  10535437
##  5 2018-04-24  225   228.  225   227   9272078
##  6 2018-04-23  226.  228.  225   226. 16080436
##  7 2018-04-20  228   232.  228   229  29507056
##  8 2018-04-19  242   244.  241   244.  7758149
##  9 2018-04-18  240.  242.  236.  238  10130508
## 10 2018-04-17  243   244.  238.  238  10232257
## # … with 1,791 more rows

read excel file

#install.packages("readxl")
library(readxl)
setwd('~/lecture/riii/')
FinancialReport <- read_excel("./data/FinancialReport.xlsx")
#View(FinancialReport)
summary(FinancialReport)
##       年度           股本         財報評分          收盤       
##  Min.   :1999   Min.   : 767   Min.   :59.00   Min.   : 42.60  
##  1st Qu.:2003   1st Qu.:2027   1st Qu.:89.00   1st Qu.: 62.50  
##  Median :2007   Median :2583   Median :92.00   Median : 71.00  
##  Mean   :2007   Mean   :2249   Mean   :88.24   Mean   : 83.75  
##  3rd Qu.:2011   3rd Qu.:2592   3rd Qu.:94.00   3rd Qu.: 97.00  
##  Max.   :2015   Max.   :2643   Max.   :96.00   Max.   :167.00  
##       平均             漲跌            漲跌__1          營業收入   
##  Min.   : 52.40   Min.   :-88.500   Min.   :-53.00   Min.   : 731  
##  1st Qu.: 56.40   1st Qu.: -5.500   1st Qu.: -8.10   1st Qu.:2030  
##  Median : 67.40   Median :  6.500   Median :  8.80   Median :3174  
##  Mean   : 82.29   Mean   :  4.235   Mean   : 11.77   Mean   :3576  
##  3rd Qu.:104.00   3rd Qu.: 20.100   3rd Qu.: 28.00   3rd Qu.:4271  
##  Max.   :147.00   Max.   : 96.000   Max.   :135.00   Max.   :8435  
##     營業毛利       營業利益       業外損益         稅後淨利   
##  Min.   : 315   Min.   : 128   Min.   :-43.70   Min.   : 145  
##  1st Qu.: 765   1st Qu.: 613   1st Qu.:  4.97   1st Qu.: 651  
##  Median :1417   Median :1044   Median : 35.00   Median : 999  
##  Mean   :1639   Mean   :1238   Mean   : 50.67   Mean   :1179  
##  3rd Qu.:2071   3rd Qu.:1592   3rd Qu.: 62.10   3rd Qu.:1616  
##  Max.   :4104   Max.   :3200   Max.   :304.00   Max.   :3066  
##       ROA             EPS        
##  Min.   : 3.93   Min.   : 0.830  
##  1st Qu.:15.50   1st Qu.: 3.450  
##  Median :18.40   Median : 4.140  
##  Mean   :17.15   Mean   : 4.969  
##  3rd Qu.:19.40   3rd Qu.: 6.240  
##  Max.   :24.70   Max.   :11.820

read json

setwd('~/lecture/riii/')
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
json_data<- fromJSON('./data/rent.json')
## Warning: JSON string contains (illegal) UTF8 byte-order-mark!
json_data <- as_tibble(json_data)
head(json_data)
## # A tibble: 6 x 11
##   縣市  經管單位 用途限制 實際用途 每月租金 租期屆滿 建物面積 構造 
##   <chr> <chr>    <chr>    <chr>       <dbl> <chr>    <chr>    <chr>
## 1 臺北市… 台北所   辦公或住宅或法… 商店        22900 105.12.… 190      木石磚造…
## 2 臺北市… 台北所   辦公或住宅或法… 辦公室     187000 107.6.7… 252      磚造 
## 3 臺北市… 台北所   辦公或住宅或法… 商店        56899 105.2.3… 132      加強磚造…
## 4 臺北市… 台北所   住宅或法律許可… 商店        31850 107.7.2… 72       加強磚造…
## 5 臺北市… 臺北所   辦公或住宅或法… 商業        31860 105.12.… 171      磚造 
## 6 臺北市… 臺北所   辦公或住宅或法… 商業        34000 106.2.2… 145.1    RC加強…
## # … with 3 more variables: 總樓層數 <chr>, 建物現況 <chr>, 房屋座落 <chr>

read xml

#install.packages("XML")
library(XML)
#url <- 'http://opendata.epa.gov.tw/ws/Data/ATM00698/?$format=xml'
#weather <- xmlToDataFrame(url)
#View(weather)
#str(weather)
#weather[ weather$SiteName == '臺北',  c('DataCreationDate','Temperature')   ]

Flow Control

x=5;
if(x>3){
  print("x > 3")
}else{
  print("x <= 3")
}
## [1] "x > 3"
if(x>3) print("x > 3") else print("x <= 3")
## [1] "x > 3"
test = ifelse(x>3,"x > 3","x <= 3")
test
## [1] "x > 3"
data(iris)
iris$new_species = factor(ifelse(iris$Species == "setosa","IsSetosa","NotSetosa"))
str(iris)
## 'data.frame':    150 obs. of  6 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ new_species : Factor w/ 2 levels "IsSetosa","NotSetosa": 1 1 1 1 1 1 1 1 1 1 ...
x=5;
if(x>3){
  print ("x > 3");
} else if (x ==3){
  print ("x == 3");
}else{
  print("x <= 3");
}
## [1] "x > 3"
for(i in 1:10){
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
sum=0
for(i in 1:100){
  sum= sum+ i;
}
sum
## [1] 5050
sum(1:100)
## [1] 5050
mat = matrix(1:9, byrow=TRUE, nrow=3)
for(i in 1:nrow(mat)){
  for(j in 1:ncol(mat)){
    print(mat[i,j])
  }
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
sum = 0;
cnt = 0;
while(cnt <= 100){
  sum = sum + cnt;
  cnt = cnt + 1;
}
sum
## [1] 5050

p86

mat = matrix(data=rep(1,9^2),nrow = 9)
mat = matrix(data=0,nrow=9,ncol=9)
for(i in 1:nrow(mat)){
  for(j in 1:ncol(mat)){
    #mat[i,j] = i * j;
    #mat[i,j] = paste(i,"*",j,"= ",i*j)
    mat[i,j] = sprintf("%s * %s = %s",i,j,i*j)
  }
}
mat
##       [,1]        [,2]         [,3]         [,4]         [,5]        
##  [1,] "1 * 1 = 1" "1 * 2 = 2"  "1 * 3 = 3"  "1 * 4 = 4"  "1 * 5 = 5" 
##  [2,] "2 * 1 = 2" "2 * 2 = 4"  "2 * 3 = 6"  "2 * 4 = 8"  "2 * 5 = 10"
##  [3,] "3 * 1 = 3" "3 * 2 = 6"  "3 * 3 = 9"  "3 * 4 = 12" "3 * 5 = 15"
##  [4,] "4 * 1 = 4" "4 * 2 = 8"  "4 * 3 = 12" "4 * 4 = 16" "4 * 5 = 20"
##  [5,] "5 * 1 = 5" "5 * 2 = 10" "5 * 3 = 15" "5 * 4 = 20" "5 * 5 = 25"
##  [6,] "6 * 1 = 6" "6 * 2 = 12" "6 * 3 = 18" "6 * 4 = 24" "6 * 5 = 30"
##  [7,] "7 * 1 = 7" "7 * 2 = 14" "7 * 3 = 21" "7 * 4 = 28" "7 * 5 = 35"
##  [8,] "8 * 1 = 8" "8 * 2 = 16" "8 * 3 = 24" "8 * 4 = 32" "8 * 5 = 40"
##  [9,] "9 * 1 = 9" "9 * 2 = 18" "9 * 3 = 27" "9 * 4 = 36" "9 * 5 = 45"
##       [,6]         [,7]         [,8]         [,9]        
##  [1,] "1 * 6 = 6"  "1 * 7 = 7"  "1 * 8 = 8"  "1 * 9 = 9" 
##  [2,] "2 * 6 = 12" "2 * 7 = 14" "2 * 8 = 16" "2 * 9 = 18"
##  [3,] "3 * 6 = 18" "3 * 7 = 21" "3 * 8 = 24" "3 * 9 = 27"
##  [4,] "4 * 6 = 24" "4 * 7 = 28" "4 * 8 = 32" "4 * 9 = 36"
##  [5,] "5 * 6 = 30" "5 * 7 = 35" "5 * 8 = 40" "5 * 9 = 45"
##  [6,] "6 * 6 = 36" "6 * 7 = 42" "6 * 8 = 48" "6 * 9 = 54"
##  [7,] "7 * 6 = 42" "7 * 7 = 49" "7 * 8 = 56" "7 * 9 = 63"
##  [8,] "8 * 6 = 48" "8 * 7 = 56" "8 * 8 = 64" "8 * 9 = 72"
##  [9,] "9 * 6 = 54" "9 * 7 = 63" "9 * 8 = 72" "9 * 9 = 81"
rep(1,9^2)
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [71] 1 1 1 1 1 1 1 1 1 1 1
mat1 = matrix(1:9, nrow = 9);
mat2 = matrix(1:9, nrow = 1);
mat = mat1 %*% mat2;
mat
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
##  [1,]    1    2    3    4    5    6    7    8    9
##  [2,]    2    4    6    8   10   12   14   16   18
##  [3,]    3    6    9   12   15   18   21   24   27
##  [4,]    4    8   12   16   20   24   28   32   36
##  [5,]    5   10   15   20   25   30   35   40   45
##  [6,]    6   12   18   24   30   36   42   48   54
##  [7,]    7   14   21   28   35   42   49   56   63
##  [8,]    8   16   24   32   40   48   56   64   72
##  [9,]    9   18   27   36   45   54   63   72   81

Function

addThree = function(a){
    return(a+3)
}
addThree(3)
## [1] 6
#with default arguments
addThree_2 = function(a = 3) {
    return(a+3)
}
addThree_2()
## [1] 6
#每行程式結尾可加;可不加
addThree_3 = function(a = 3) {
    a+3;
}
addThree_3()
## [1] 6
#lazy function
f2 = function(a, b = 2, c = NULL) {
   return(b + 1)
}
f2()
## [1] 3
#local variable
b = 3
f3 = function() {
    b = 2
    return(b)
}
f3()
## [1] 2

lapply sapply

x = list(c(1,2,3,4), c(5,6,7,8))
## 回傳list的結果
lapply(x, sum)
## [[1]]
## [1] 10
## 
## [[2]]
## [1] 26
class(lapply(x, sum))
## [1] "list"
## 回傳簡化的結果(vector,matrix)
sapply(x, sum)
## [1] 10 26
## sappply 等同 lapply unlist 後的結果
unlist(lapply(x, sum))
## [1] 10 26
class(sapply(x, sum))
## [1] "numeric"
lapply(x,addThree)
## [[1]]
## [1] 4 5 6 7
## 
## [[2]]
## [1]  8  9 10 11
## 匿名函式
lapply(x,function(e){e+3})
## [[1]]
## [1] 4 5 6 7
## 
## [[2]]
## [1]  8  9 10 11
m1 = matrix(1:4, byrow=TRUE, nrow=2)
m2 = matrix(5:8, byrow=TRUE, nrow=2)
li = list(m1, m2)
lapply(li, function(e){e[1,]})
## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6
lapply(li, mean)
## [[1]]
## [1] 2.5
## 
## [[2]]
## [1] 6.5

apply tapply

m = matrix(1:4, byrow=TRUE, nrow=2)
apply(m, 1, sum) # rowsums
## [1] 3 7
apply(m, 2, sum) # colsums
## [1] 4 6
rowmeans = apply(m, 1, mean)
colmeans = apply(m, 2, mean)

x = c(80,70,59,88,72,57)
t = c(1,1,2,1,1,2)
tapply(x,t, mean)
##    1    2 
## 77.5 58.0
data(iris)

tapply(iris$Sepal.Length, iris$Species, mean)
##     setosa versicolor  virginica 
##      5.006      5.936      6.588
tapply(iris$Sepal.Length,iris$Species,function(e){list(avg=mean(e),md=median(e),s_d = sd(e))})
## $setosa
## $setosa$avg
## [1] 5.006
## 
## $setosa$md
## [1] 5
## 
## $setosa$s_d
## [1] 0.3524897
## 
## 
## $versicolor
## $versicolor$avg
## [1] 5.936
## 
## $versicolor$md
## [1] 5.9
## 
## $versicolor$s_d
## [1] 0.5161711
## 
## 
## $virginica
## $virginica$avg
## [1] 6.588
## 
## $virginica$md
## [1] 6.5
## 
## $virginica$s_d
## [1] 0.6358796
s = lapply(names(iris[1:4]),function(e){tapply(iris[,e],iris$Species,mean) })

names(s)
## NULL
names(s) = names(iris[1:4])

探索性資料分析

表格

#import data
#getwd()
setwd("~/lecture/riii")
load("./Statistics/cdc.Rdata")

getwd()
## [1] "/Users/YorkLin/lecture/riii"
str(cdc)
## 'data.frame':    20000 obs. of  9 variables:
##  $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
##  $ exerany : num  0 0 1 1 0 1 1 0 0 1 ...
##  $ hlthplan: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ smoke100: num  0 1 1 0 0 0 0 0 1 0 ...
##  $ height  : num  70 64 60 66 61 64 71 67 65 70 ...
##  $ weight  : int  175 125 105 132 150 114 194 170 150 180 ...
##  $ wtdesire: int  175 115 105 124 130 114 185 160 130 170 ...
##  $ age     : int  77 33 49 42 55 55 31 45 27 44 ...
##  $ gender  : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
head(cdc)
##     genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1      good       0        1        0     70    175      175  77      m
## 2      good       0        1        1     64    125      115  33      f
## 3      good       1        1        1     60    105      105  49      f
## 4      good       1        1        0     66    132      124  42      f
## 5 very good       0        1        0     61    150      130  55      f
## 6 very good       1        1        0     64    114      114  55      f
names(cdc)
## [1] "genhlth"  "exerany"  "hlthplan" "smoke100" "height"   "weight"  
## [7] "wtdesire" "age"      "gender"
#轉換資料類型
cdc$exerany = as.factor(cdc$exerany)
cdc$hlthplan = as.factor(cdc$hlthplan)
cdc$smoke100 = as.factor(cdc$smoke100)

str(cdc)
## 'data.frame':    20000 obs. of  9 variables:
##  $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
##  $ exerany : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 2 1 1 2 ...
##  $ hlthplan: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ smoke100: Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
##  $ height  : num  70 64 60 66 61 64 71 67 65 70 ...
##  $ weight  : int  175 125 105 132 150 114 194 170 150 180 ...
##  $ wtdesire: int  175 115 105 124 130 114 185 160 130 170 ...
##  $ age     : int  77 33 49 42 55 55 31 45 27 44 ...
##  $ gender  : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...
#一維次數分配表
table(cdc$exerany)
## 
##     0     1 
##  5086 14914
#相對比例
table(cdc$exerany) / length(cdc$exerany)
## 
##      0      1 
## 0.2543 0.7457
paste(table(cdc$exerany) / nrow(cdc) * 100, '%')
## [1] "25.43 %" "74.57 %"
#二維次數分配表
table(cdc$gender,cdc$exerany)
##    
##        0    1
##   m 2149 7420
##   f 2937 7494
apply(table(cdc$gender,cdc$exerany),1,function(e){e/sum(e)})
##    
##             m         f
##   0 0.2245794 0.2815646
##   1 0.7754206 0.7184354

表格(續)

#三維以上
table(cdc$gender,cdc$genhlth,cdc$exerany)
## , ,  = 0
## 
##    
##     excellent very good good fair poor
##   m       335       606  723  340  145
##   f       427       746 1008  517  239
## 
## , ,  = 1
## 
##    
##     excellent very good good fair poor
##   m      1963      2776 1999  544  138
##   f      1932      2844 1945  618  155
#連續型資料作表
table(cdc$height)
## 
##   48   49   50   51   52   53   54   55   56   57   58   59   60   61   62 
##    2    1    1    2    2    7    3    4   17   20   51  170  613  594 1272 
##   63   64   65   66   67   68   69   70   71   72   73   74   75   76   77 
## 1368 1662 1568 1843 1671 1505 1380 1500 1296 1393  784  605  321  189   80 
##   78   79   80   81   82   83   84   93 
##   43   15   10    3    2    1    1    1
summary(cdc$height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   48.00   64.00   67.00   67.18   70.00   93.00
head(cut(cdc$height,seq(45,95,by=5)))
## [1] (65,70] (60,65] (55,60] (65,70] (60,65] (60,65]
## 10 Levels: (45,50] (50,55] (55,60] (60,65] (65,70] (70,75] ... (90,95]
#包含上界不包含下界
table(cut(cdc$height,seq(45,95,by=5),right=T))
## 
## (45,50] (50,55] (55,60] (60,65] (65,70] (70,75] (75,80] (80,85] (85,90] 
##       4      18     871    6464    7899    4399     337       7       0 
## (90,95] 
##       1
#包含下界不包含上界
table(cut(cdc$height,seq(45,95,by=5),right=F))
## 
## [45,50) [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) [80,85) [85,90) 
##       3      15     262    5509    7967    5578     648      17       0 
## [90,95) 
##       1