p54 example

kevin = c(85,73)
marry = c(72,64)
jerry = c(59,66)
mat = matrix(c(kevin, marry, jerry), nrow=3, byrow= TRUE)
colnames(mat) = c('first', 'second')
rownames(mat) = c('kevin', 'marry', 'jerry')

final = mat %*% c(0.4,0.6)
final

##       [,1]
## kevin 77.8
## marry 67.2
## jerry 63.2

cbind(mat,final)

##       first second     
## kevin    85     73 77.8
## marry    72     64 67.2
## jerry    59     66 63.2

mat2 = cbind(mat,final)
mat2

##       first second     
## kevin    85     73 77.8
## marry    72     64 67.2
## jerry    59     66 63.2

colnames(mat2)[ncol(mat2)] = 'final'
mat2

##       first second final
## kevin    85     73  77.8
## marry    72     64  67.2
## jerry    59     66  63.2

Dataframe

name <- c("Joe", "Bob", "Vicky")
age <- c(28, 26, 34)
gender <- c("Male","Male","Female")
df <- data.frame(name, age, gender)
class(df)

## [1] "data.frame"

str(df)

## 'data.frame':    3 obs. of  3 variables:
##  $ name  : Factor w/ 3 levels "Bob","Joe","Vicky": 2 1 3
##  $ age   : num  28 26 34
##  $ gender: Factor w/ 2 levels "Female","Male": 2 2 1

summary(df)

##     name        age           gender 
##  Bob  :1   Min.   :26.00   Female:1  
##  Joe  :1   1st Qu.:27.00   Male  :2  
##  Vicky:1   Median :28.00             
##            Mean   :29.33             
##            3rd Qu.:31.00             
##            Max.   :34.00

data(iris)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

tail(iris)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

tail(iris, 10)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 141          6.7         3.1          5.6         2.4 virginica
## 142          6.9         3.1          5.1         2.3 virginica
## 143          5.8         2.7          5.1         1.9 virginica
## 144          6.8         3.2          5.9         2.3 virginica
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

#取前三列資料
iris[1:3,]

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa

#取前三列第一行的資料
iris[1:3,1]

## [1] 5.1 4.9 4.7

#取前三列Sepal.Length欄位的資料
iris[1:3,"Sepal.Length"]

## [1] 5.1 4.9 4.7

head(iris[,1:2])

##   Sepal.Length Sepal.Width
## 1          5.1         3.5
## 2          4.9         3.0
## 3          4.7         3.2
## 4          4.6         3.1
## 5          5.0         3.6
## 6          5.4         3.9

iris$"Sepal.Length"[1:3]

## [1] 5.1 4.9 4.7

#取前五筆包含length 及 width 的資料
Five.Sepal.iris = iris[1:5, c("Sepal.Length","Sepal.Width")]
#可以用條件做篩選
setosa.data = iris[iris$Species=="setosa",1:5]
str(setosa.data)

## 'data.frame':    50 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

#使用which 做資料篩選
which(iris$Species=="setosa")

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50

#用order做資料排序
iris[order(iris$Sepal.Length, decreasing = TRUE),]

##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 132          7.9         3.8          6.4         2.0  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 51           7.0         3.2          4.7         1.4 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 121          6.9         3.2          5.7         2.3  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 77           6.8         2.8          4.8         1.4 versicolor
## 113          6.8         3.0          5.5         2.1  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 66           6.7         3.1          4.4         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 109          6.7         2.5          5.8         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 59           6.6         2.9          4.6         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 105          6.5         3.0          5.8         2.2  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 52           6.4         3.2          4.5         1.5 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 112          6.4         2.7          5.3         1.9  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 57           6.3         3.3          4.7         1.6 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 69           6.2         2.2          4.5         1.5 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 127          6.2         2.8          4.8         1.8  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 64           6.1         2.9          4.7         1.4 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 128          6.1         3.0          4.9         1.8  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 63           6.0         2.2          4.0         1.0 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 120          6.0         2.2          5.0         1.5  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 62           5.9         3.0          4.2         1.5 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 150          5.9         3.0          5.1         1.8  virginica
## 15           5.8         4.0          1.2         0.2     setosa
## 68           5.8         2.7          4.1         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 102          5.8         2.7          5.1         1.9  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 16           5.7         4.4          1.5         0.4     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 56           5.7         2.8          4.5         1.3 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 114          5.7         2.5          5.0         2.0  virginica
## 65           5.6         2.9          3.6         1.3 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 122          5.6         2.8          4.9         2.0  virginica
## 34           5.5         4.2          1.4         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 54           5.5         2.3          4.0         1.3 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 6            5.4         3.9          1.7         0.4     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 85           5.4         3.0          4.5         1.5 versicolor
## 49           5.3         3.7          1.5         0.2     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 60           5.2         2.7          3.9         1.4 versicolor
## 1            5.1         3.5          1.4         0.2     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 99           5.1         2.5          3.0         1.1 versicolor
## 5            5.0         3.6          1.4         0.2     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 61           5.0         2.0          3.5         1.0 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 2            4.9         3.0          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 58           4.9         2.4          3.3         1.0 versicolor
## 107          4.9         2.5          4.5         1.7  virginica
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 14           4.3         3.0          1.1         0.1     setosa

sort(iris$Sepal.Length, decreasing = TRUE)

##   [1] 7.9 7.7 7.7 7.7 7.7 7.6 7.4 7.3 7.2 7.2 7.2 7.1 7.0 6.9 6.9 6.9 6.9
##  [18] 6.8 6.8 6.8 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.6 6.6 6.5 6.5 6.5 6.5
##  [35] 6.5 6.4 6.4 6.4 6.4 6.4 6.4 6.4 6.3 6.3 6.3 6.3 6.3 6.3 6.3 6.3 6.3
##  [52] 6.2 6.2 6.2 6.2 6.1 6.1 6.1 6.1 6.1 6.1 6.0 6.0 6.0 6.0 6.0 6.0 5.9
##  [69] 5.9 5.9 5.8 5.8 5.8 5.8 5.8 5.8 5.8 5.7 5.7 5.7 5.7 5.7 5.7 5.7 5.7
##  [86] 5.6 5.6 5.6 5.6 5.6 5.6 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.4 5.4 5.4 5.4
## [103] 5.4 5.4 5.3 5.2 5.2 5.2 5.2 5.1 5.1 5.1 5.1 5.1 5.1 5.1 5.1 5.1 5.0
## [120] 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 4.9 4.9 4.9 4.9 4.9 4.9 4.8 4.8
## [137] 4.8 4.8 4.8 4.7 4.7 4.6 4.6 4.6 4.6 4.5 4.4 4.4 4.4 4.3

File read and write

# getwd()
setwd('~/lecture/riii')
# setwd("__your_working_directory_path__")
tw2330 = read.csv("data/2330.csv", header=TRUE)
#tw2330 = read.csv('https://github.com/YuHsuanLin/riii/raw/master/data/2330.csv')
test.data = read.table("data/match.txt" ,header = FALSE, sep="|")

p70

setwd('~/lecture/riii')
tw2330 = read.csv("./data/2330.csv", header=TRUE)
str(tw2330)

## 'data.frame':    1801 obs. of  6 variables:
##  $ Date  : Factor w/ 1801 levels "2011-01-03","2011-01-04",..: 1801 1800 1799 1798 1797 1796 1795 1794 1793 1792 ...
##  $ Open  : num  224 225 225 226 225 ...
##  $ High  : num  228 226 226 226 228 ...
##  $ Low   : num  222 221 221 224 225 ...
##  $ Close : num  227 224 222 225 227 ...
##  $ Volume: int  6448117 7619247 10731921 10535437 9272078 16080436 29507056 7758149 10130508 10232257 ...

tw2330$Date = as.Date(tw2330$Date)

tw2330_2017 = tw2330[(tw2330$Date >= '2017-01-01' & tw2330$Date < '2018-01-01'),]
max(tw2330_2017$Close)

## [1] 244

ordered_stock = tw2330_2017[order(tw2330_2017$Close, decreasing = T),]

List

item= list(thing='hat',size=8.25)
item$thing

## [1] "hat"

item$size

## [1] 8.25

flower= list(title="iris dataset", data= iris)
class(flower)

## [1] "list"

class(flower$data)

## [1] "data.frame"

flower$data[1,"Sepal.Width"]

## [1] 3.5

li = list(c(1,2,3,4),c(5,6,7,8))
li[[1]]

## [1] 1 2 3 4

two dataframes join

#merge進行資料合併

df1 = data.frame(CustomerId = c(1:6), Product = c(rep("Toaster", 3), rep("Radio", 3)))
df2 = data.frame(CustomerId = c(2, 4, 6), State = c(rep("Alabama", 2), rep("Ohio", 1)))

#Inner join:
merge(x = df1, y= df2, by="CustomerId")

##   CustomerId Product   State
## 1          2 Toaster Alabama
## 2          4   Radio Alabama
## 3          6   Radio    Ohio

#Outer join: 
merge(x = df1, y = df2, by = "CustomerId", all = TRUE)

##   CustomerId Product   State
## 1          1 Toaster    <NA>
## 2          2 Toaster Alabama
## 3          3 Toaster    <NA>
## 4          4   Radio Alabama
## 5          5   Radio    <NA>
## 6          6   Radio    Ohio

#Left outer: 
merge(x = df1, y = df2, by = "CustomerId", all.x = TRUE)

##   CustomerId Product   State
## 1          1 Toaster    <NA>
## 2          2 Toaster Alabama
## 3          3 Toaster    <NA>
## 4          4   Radio Alabama
## 5          5   Radio    <NA>
## 6          6   Radio    Ohio

#Right outer: 
merge(x = df1, y = df2, by = "CustomerId", all.y = TRUE)

##   CustomerId Product   State
## 1          2 Toaster Alabama
## 2          4   Radio Alabama
## 3          6   Radio    Ohio

#Cross join: 
merge(x = df1, y = df2, by = NULL)

##    CustomerId.x Product CustomerId.y   State
## 1             1 Toaster            2 Alabama
## 2             2 Toaster            2 Alabama
## 3             3 Toaster            2 Alabama
## 4             4   Radio            2 Alabama
## 5             5   Radio            2 Alabama
## 6             6   Radio            2 Alabama
## 7             1 Toaster            4 Alabama
## 8             2 Toaster            4 Alabama
## 9             3 Toaster            4 Alabama
## 10            4   Radio            4 Alabama
## 11            5   Radio            4 Alabama
## 12            6   Radio            4 Alabama
## 13            1 Toaster            6    Ohio
## 14            2 Toaster            6    Ohio
## 15            3 Toaster            6    Ohio
## 16            4   Radio            6    Ohio
## 17            5   Radio            6    Ohio
## 18            6   Radio            6    Ohio

readr package

#install.packages("tidyverse")
#install.packages("readr")
library('tidyverse')

## ─ Attaching packages ─────────────────── tidyverse 1.2.1 ─

## ✔ ggplot2 3.1.0       ✔ purrr   0.2.5  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.1       ✔ stringr 1.3.1  
## ✔ readr   1.3.1       ✔ forcats 0.3.0

## Warning: package 'tibble' was built under R version 3.5.2

## Warning: package 'dplyr' was built under R version 3.5.2

## ─ Conflicts ──────────────────── tidyverse_conflicts() ─
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

setwd('~/lecture/riii')
stock = read_csv('data/2330.csv',col_names=T)

## Parsed with column specification:
## cols(
##   Date = col_date(format = ""),
##   Open = col_double(),
##   High = col_double(),
##   Low = col_double(),
##   Close = col_double(),
##   Volume = col_double()
## )

read_csv('data/2330.csv',col_names=T,col_types = cols(
  Date = col_date(format = ""),
  Open = col_double(),
  High = col_double(),
  Low = col_double(),
  Close = col_double(),
  Volume = col_double()
))

## # A tibble: 1,801 x 6
##    Date        Open  High   Low Close   Volume
##    <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>
##  1 2018-04-30  224.  228   222.  227   6448117
##  2 2018-04-27  225   226.  221   224.  7619247
##  3 2018-04-26  225   226.  221   222  10731921
##  4 2018-04-25  226.  226   224   225  10535437
##  5 2018-04-24  225   228.  225   227   9272078
##  6 2018-04-23  226.  228.  225   226. 16080436
##  7 2018-04-20  228   232.  228   229  29507056
##  8 2018-04-19  242   244.  241   244.  7758149
##  9 2018-04-18  240.  242.  236.  238  10130508
## 10 2018-04-17  243   244.  238.  238  10232257
## # … with 1,791 more rows

read excel file

#install.packages("readxl")
library(readxl)
setwd('~/lecture/riii/')
FinancialReport <- read_excel("./data/FinancialReport.xlsx")
#View(FinancialReport)
summary(FinancialReport)

##       年度           股本         財報評分          收盤       
##  Min.   :1999   Min.   : 767   Min.   :59.00   Min.   : 42.60  
##  1st Qu.:2003   1st Qu.:2027   1st Qu.:89.00   1st Qu.: 62.50  
##  Median :2007   Median :2583   Median :92.00   Median : 71.00  
##  Mean   :2007   Mean   :2249   Mean   :88.24   Mean   : 83.75  
##  3rd Qu.:2011   3rd Qu.:2592   3rd Qu.:94.00   3rd Qu.: 97.00  
##  Max.   :2015   Max.   :2643   Max.   :96.00   Max.   :167.00  
##       平均             漲跌            漲跌__1          營業收入   
##  Min.   : 52.40   Min.   :-88.500   Min.   :-53.00   Min.   : 731  
##  1st Qu.: 56.40   1st Qu.: -5.500   1st Qu.: -8.10   1st Qu.:2030  
##  Median : 67.40   Median :  6.500   Median :  8.80   Median :3174  
##  Mean   : 82.29   Mean   :  4.235   Mean   : 11.77   Mean   :3576  
##  3rd Qu.:104.00   3rd Qu.: 20.100   3rd Qu.: 28.00   3rd Qu.:4271  
##  Max.   :147.00   Max.   : 96.000   Max.   :135.00   Max.   :8435  
##     營業毛利       營業利益       業外損益         稅後淨利   
##  Min.   : 315   Min.   : 128   Min.   :-43.70   Min.   : 145  
##  1st Qu.: 765   1st Qu.: 613   1st Qu.:  4.97   1st Qu.: 651  
##  Median :1417   Median :1044   Median : 35.00   Median : 999  
##  Mean   :1639   Mean   :1238   Mean   : 50.67   Mean   :1179  
##  3rd Qu.:2071   3rd Qu.:1592   3rd Qu.: 62.10   3rd Qu.:1616  
##  Max.   :4104   Max.   :3200   Max.   :304.00   Max.   :3066  
##       ROA             EPS        
##  Min.   : 3.93   Min.   : 0.830  
##  1st Qu.:15.50   1st Qu.: 3.450  
##  Median :18.40   Median : 4.140  
##  Mean   :17.15   Mean   : 4.969  
##  3rd Qu.:19.40   3rd Qu.: 6.240  
##  Max.   :24.70   Max.   :11.820

read json

setwd('~/lecture/riii/')
library(jsonlite)

## 
## Attaching package: 'jsonlite'

## The following object is masked from 'package:purrr':
## 
##     flatten

json_data<- fromJSON('./data/rent.json')

## Warning: JSON string contains (illegal) UTF8 byte-order-mark!

json_data <- as_tibble(json_data)
head(json_data)

## # A tibble: 6 x 11
##   縣市  經管單位 用途限制 實際用途 每月租金 租期屆滿 建物面積 構造 
##   <chr> <chr>    <chr>    <chr>       <dbl> <chr>    <chr>    <chr>
## 1 臺北市… 台北所   辦公或住宅或法… 商店        22900 105.12.… 190      木石磚造…
## 2 臺北市… 台北所   辦公或住宅或法… 辦公室     187000 107.6.7… 252      磚造 
## 3 臺北市… 台北所   辦公或住宅或法… 商店        56899 105.2.3… 132      加強磚造…
## 4 臺北市… 台北所   住宅或法律許可… 商店        31850 107.7.2… 72       加強磚造…
## 5 臺北市… 臺北所   辦公或住宅或法… 商業        31860 105.12.… 171      磚造 
## 6 臺北市… 臺北所   辦公或住宅或法… 商業        34000 106.2.2… 145.1    RC加強…
## # … with 3 more variables: 總樓層數 <chr>, 建物現況 <chr>, 房屋座落 <chr>

read xml

#install.packages("XML")
library(XML)
#url <- 'http://opendata.epa.gov.tw/ws/Data/ATM00698/?$format=xml'
#weather <- xmlToDataFrame(url)
#View(weather)
#str(weather)
#weather[ weather$SiteName == '臺北',  c('DataCreationDate','Temperature')   ]

Flow Control

x=5;
if(x>3){
  print("x > 3")
}else{
  print("x <= 3")
}

## [1] "x > 3"

if(x>3) print("x > 3") else print("x <= 3")

## [1] "x > 3"

test = ifelse(x>3,"x > 3","x <= 3")
test

## [1] "x > 3"

data(iris)
iris$new_species = factor(ifelse(iris$Species == "setosa","IsSetosa","NotSetosa"))
str(iris)

## 'data.frame':    150 obs. of  6 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ new_species : Factor w/ 2 levels "IsSetosa","NotSetosa": 1 1 1 1 1 1 1 1 1 1 ...

x=5;
if(x>3){
  print ("x > 3");
} else if (x ==3){
  print ("x == 3");
}else{
  print("x <= 3");
}

## [1] "x > 3"

for(i in 1:10){
  print(i)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

sum=0
for(i in 1:100){
  sum= sum+ i;
}
sum

## [1] 5050

sum(1:100)

## [1] 5050

mat = matrix(1:9, byrow=TRUE, nrow=3)
for(i in 1:nrow(mat)){
  for(j in 1:ncol(mat)){
    print(mat[i,j])
  }
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9

sum = 0;
cnt = 0;
while(cnt <= 100){
  sum = sum + cnt;
  cnt = cnt + 1;
}
sum

## [1] 5050

p86

mat = matrix(data=rep(1,9^2),nrow = 9)
mat = matrix(data=0,nrow=9,ncol=9)
for(i in 1:nrow(mat)){
  for(j in 1:ncol(mat)){
    #mat[i,j] = i * j;
    #mat[i,j] = paste(i,"*",j,"= ",i*j)
    mat[i,j] = sprintf("%s * %s = %s",i,j,i*j)
  }
}
mat

##       [,1]        [,2]         [,3]         [,4]         [,5]        
##  [1,] "1 * 1 = 1" "1 * 2 = 2"  "1 * 3 = 3"  "1 * 4 = 4"  "1 * 5 = 5" 
##  [2,] "2 * 1 = 2" "2 * 2 = 4"  "2 * 3 = 6"  "2 * 4 = 8"  "2 * 5 = 10"
##  [3,] "3 * 1 = 3" "3 * 2 = 6"  "3 * 3 = 9"  "3 * 4 = 12" "3 * 5 = 15"
##  [4,] "4 * 1 = 4" "4 * 2 = 8"  "4 * 3 = 12" "4 * 4 = 16" "4 * 5 = 20"
##  [5,] "5 * 1 = 5" "5 * 2 = 10" "5 * 3 = 15" "5 * 4 = 20" "5 * 5 = 25"
##  [6,] "6 * 1 = 6" "6 * 2 = 12" "6 * 3 = 18" "6 * 4 = 24" "6 * 5 = 30"
##  [7,] "7 * 1 = 7" "7 * 2 = 14" "7 * 3 = 21" "7 * 4 = 28" "7 * 5 = 35"
##  [8,] "8 * 1 = 8" "8 * 2 = 16" "8 * 3 = 24" "8 * 4 = 32" "8 * 5 = 40"
##  [9,] "9 * 1 = 9" "9 * 2 = 18" "9 * 3 = 27" "9 * 4 = 36" "9 * 5 = 45"
##       [,6]         [,7]         [,8]         [,9]        
##  [1,] "1 * 6 = 6"  "1 * 7 = 7"  "1 * 8 = 8"  "1 * 9 = 9" 
##  [2,] "2 * 6 = 12" "2 * 7 = 14" "2 * 8 = 16" "2 * 9 = 18"
##  [3,] "3 * 6 = 18" "3 * 7 = 21" "3 * 8 = 24" "3 * 9 = 27"
##  [4,] "4 * 6 = 24" "4 * 7 = 28" "4 * 8 = 32" "4 * 9 = 36"
##  [5,] "5 * 6 = 30" "5 * 7 = 35" "5 * 8 = 40" "5 * 9 = 45"
##  [6,] "6 * 6 = 36" "6 * 7 = 42" "6 * 8 = 48" "6 * 9 = 54"
##  [7,] "7 * 6 = 42" "7 * 7 = 49" "7 * 8 = 56" "7 * 9 = 63"
##  [8,] "8 * 6 = 48" "8 * 7 = 56" "8 * 8 = 64" "8 * 9 = 72"
##  [9,] "9 * 6 = 54" "9 * 7 = 63" "9 * 8 = 72" "9 * 9 = 81"

rep(1,9^2)

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [71] 1 1 1 1 1 1 1 1 1 1 1

mat1 = matrix(1:9, nrow = 9);
mat2 = matrix(1:9, nrow = 1);
mat = mat1 %*% mat2;
mat

##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
##  [1,]    1    2    3    4    5    6    7    8    9
##  [2,]    2    4    6    8   10   12   14   16   18
##  [3,]    3    6    9   12   15   18   21   24   27
##  [4,]    4    8   12   16   20   24   28   32   36
##  [5,]    5   10   15   20   25   30   35   40   45
##  [6,]    6   12   18   24   30   36   42   48   54
##  [7,]    7   14   21   28   35   42   49   56   63
##  [8,]    8   16   24   32   40   48   56   64   72
##  [9,]    9   18   27   36   45   54   63   72   81

Function

addThree = function(a){
    return(a+3)
}
addThree(3)

## [1] 6

#with default arguments
addThree_2 = function(a = 3) {
    return(a+3)
}
addThree_2()

## [1] 6

#每行程式結尾可加;可不加
addThree_3 = function(a = 3) {
    a+3;
}
addThree_3()

## [1] 6

#lazy function
f2 = function(a, b = 2, c = NULL) {
   return(b + 1)
}
f2()

## [1] 3

#local variable
b = 3
f3 = function() {
    b = 2
    return(b)
}
f3()

## [1] 2

lapply sapply

x = list(c(1,2,3,4), c(5,6,7,8))
## 回傳list的結果
lapply(x, sum)

## [[1]]
## [1] 10
## 
## [[2]]
## [1] 26

class(lapply(x, sum))

## [1] "list"

## 回傳簡化的結果(vector,matrix)
sapply(x, sum)

## [1] 10 26

## sappply 等同 lapply unlist 後的結果
unlist(lapply(x, sum))

## [1] 10 26

class(sapply(x, sum))

## [1] "numeric"

lapply(x,addThree)

## [[1]]
## [1] 4 5 6 7
## 
## [[2]]
## [1]  8  9 10 11

## 匿名函式
lapply(x,function(e){e+3})

## [[1]]
## [1] 4 5 6 7
## 
## [[2]]
## [1]  8  9 10 11

m1 = matrix(1:4, byrow=TRUE, nrow=2)
m2 = matrix(5:8, byrow=TRUE, nrow=2)
li = list(m1, m2)
lapply(li, function(e){e[1,]})

## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 5 6

lapply(li, mean)

## [[1]]
## [1] 2.5
## 
## [[2]]
## [1] 6.5

apply tapply

m = matrix(1:4, byrow=TRUE, nrow=2)
apply(m, 1, sum) # rowsums

## [1] 3 7

apply(m, 2, sum) # colsums

## [1] 4 6

rowmeans = apply(m, 1, mean)
colmeans = apply(m, 2, mean)

x = c(80,70,59,88,72,57)
t = c(1,1,2,1,1,2)
tapply(x,t, mean)

##    1    2 
## 77.5 58.0

data(iris)

tapply(iris$Sepal.Length, iris$Species, mean)

##     setosa versicolor  virginica 
##      5.006      5.936      6.588

tapply(iris$Sepal.Length,iris$Species,function(e){list(avg=mean(e),md=median(e),s_d = sd(e))})

## $setosa
## $setosa$avg
## [1] 5.006
## 
## $setosa$md
## [1] 5
## 
## $setosa$s_d
## [1] 0.3524897
## 
## 
## $versicolor
## $versicolor$avg
## [1] 5.936
## 
## $versicolor$md
## [1] 5.9
## 
## $versicolor$s_d
## [1] 0.5161711
## 
## 
## $virginica
## $virginica$avg
## [1] 6.588
## 
## $virginica$md
## [1] 6.5
## 
## $virginica$s_d
## [1] 0.6358796

s = lapply(names(iris[1:4]),function(e){tapply(iris[,e],iris$Species,mean) })

names(s)

## NULL

names(s) = names(iris[1:4])

探索性資料分析

表格

#import data
#getwd()
setwd("~/lecture/riii")
load("./Statistics/cdc.Rdata")

getwd()

## [1] "/Users/YorkLin/lecture/riii"

str(cdc)

## 'data.frame':    20000 obs. of  9 variables:
##  $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
##  $ exerany : num  0 0 1 1 0 1 1 0 0 1 ...
##  $ hlthplan: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ smoke100: num  0 1 1 0 0 0 0 0 1 0 ...
##  $ height  : num  70 64 60 66 61 64 71 67 65 70 ...
##  $ weight  : int  175 125 105 132 150 114 194 170 150 180 ...
##  $ wtdesire: int  175 115 105 124 130 114 185 160 130 170 ...
##  $ age     : int  77 33 49 42 55 55 31 45 27 44 ...
##  $ gender  : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...

head(cdc)

##     genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1      good       0        1        0     70    175      175  77      m
## 2      good       0        1        1     64    125      115  33      f
## 3      good       1        1        1     60    105      105  49      f
## 4      good       1        1        0     66    132      124  42      f
## 5 very good       0        1        0     61    150      130  55      f
## 6 very good       1        1        0     64    114      114  55      f

names(cdc)

## [1] "genhlth"  "exerany"  "hlthplan" "smoke100" "height"   "weight"  
## [7] "wtdesire" "age"      "gender"

#轉換資料類型
cdc$exerany = as.factor(cdc$exerany)
cdc$hlthplan = as.factor(cdc$hlthplan)
cdc$smoke100 = as.factor(cdc$smoke100)

str(cdc)

## 'data.frame':    20000 obs. of  9 variables:
##  $ genhlth : Factor w/ 5 levels "excellent","very good",..: 3 3 3 3 2 2 2 2 3 3 ...
##  $ exerany : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 2 1 1 2 ...
##  $ hlthplan: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ smoke100: Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
##  $ height  : num  70 64 60 66 61 64 71 67 65 70 ...
##  $ weight  : int  175 125 105 132 150 114 194 170 150 180 ...
##  $ wtdesire: int  175 115 105 124 130 114 185 160 130 170 ...
##  $ age     : int  77 33 49 42 55 55 31 45 27 44 ...
##  $ gender  : Factor w/ 2 levels "m","f": 1 2 2 2 2 2 1 1 2 1 ...

#一維次數分配表
table(cdc$exerany)

## 
##     0     1 
##  5086 14914

#相對比例
table(cdc$exerany) / length(cdc$exerany)

## 
##      0      1 
## 0.2543 0.7457

paste(table(cdc$exerany) / nrow(cdc) * 100, '%')

## [1] "25.43 %" "74.57 %"

#二維次數分配表
table(cdc$gender,cdc$exerany)

##    
##        0    1
##   m 2149 7420
##   f 2937 7494

apply(table(cdc$gender,cdc$exerany),1,function(e){e/sum(e)})

##    
##             m         f
##   0 0.2245794 0.2815646
##   1 0.7754206 0.7184354

表格(續)

#三維以上
table(cdc$gender,cdc$genhlth,cdc$exerany)

## , ,  = 0
## 
##    
##     excellent very good good fair poor
##   m       335       606  723  340  145
##   f       427       746 1008  517  239
## 
## , ,  = 1
## 
##    
##     excellent very good good fair poor
##   m      1963      2776 1999  544  138
##   f      1932      2844 1945  618  155

#連續型資料作表
table(cdc$height)

## 
##   48   49   50   51   52   53   54   55   56   57   58   59   60   61   62 
##    2    1    1    2    2    7    3    4   17   20   51  170  613  594 1272 
##   63   64   65   66   67   68   69   70   71   72   73   74   75   76   77 
## 1368 1662 1568 1843 1671 1505 1380 1500 1296 1393  784  605  321  189   80 
##   78   79   80   81   82   83   84   93 
##   43   15   10    3    2    1    1    1

summary(cdc$height)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   48.00   64.00   67.00   67.18   70.00   93.00

head(cut(cdc$height,seq(45,95,by=5)))

## [1] (65,70] (60,65] (55,60] (65,70] (60,65] (60,65]
## 10 Levels: (45,50] (50,55] (55,60] (60,65] (65,70] (70,75] ... (90,95]

#包含上界不包含下界
table(cut(cdc$height,seq(45,95,by=5),right=T))

## 
## (45,50] (50,55] (55,60] (60,65] (65,70] (70,75] (75,80] (80,85] (85,90] 
##       4      18     871    6464    7899    4399     337       7       0 
## (90,95] 
##       1

#包含下界不包含上界
table(cut(cdc$height,seq(45,95,by=5),right=F))

## 
## [45,50) [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) [80,85) [85,90) 
##       3      15     262    5509    7967    5578     648      17       0 
## [90,95) 
##       1

R_basic2

York Lin

2019年09月24日

p54 example

Dataframe

File read and write

p70

List

two dataframes join

readr package

read excel file

read json

read xml

Flow Control

p86

Function

lapply sapply

apply tapply

探索性資料分析

表格

表格(續)