用R做簡單迴歸分析

data(anscombe)
plot(y1 ~ x1, data = anscombe) 
lmfit <- lm(y1~x1, data=anscombe)
abline(lmfit, col="red")

數學運算

# 數字相加
3 + 8

## [1] 11

# 數字相減
3 - 8

## [1] -5

# 數字相乘
5 * 5

## [1] 25

# 數字相除
11 / 2

## [1] 5.5

# 指數
2^10

## [1] 1024

# 取餘數
11%%2

## [1] 1

設定變數

a <- 3
a = 3

b <- 5
a + b

## [1] 8

基礎資料型態

# 數值型態
numer <- 17.8
# 字串型態
char <- "hello world"
# 布林邏輯
logic <- TRUE

# 使用class 檢查資料型態
class(numer)

## [1] "numeric"

不同型態資料做運算

card_length <- 3
class(card_length)

## [1] "numeric"

#card_width <- "5 inches"
card_width = 5
class(card_width)

## [1] "numeric"

card_length * card_width

## [1] 15

計算一本書的價錢

RRP <- 35.99
Exchange = 31.74
#rrp
NTD <- RRP * Exchange
NTD

## [1] 1142.323

weight1 <- 75
height1 <- 185

weight2 <- 72
height2 <- 160


height_vec <- c(180, 169, 173)
names_vec <-c("Brian", "Toby", "Sherry")

# 兩個向量進行數學運算
x <- c(1,2,3,7) 
y <- c(2,3,5,1)

x+y

## [1] 3 5 8 8

x*y

## [1]  2  6 15  7

x - y

## [1] -1 -1 -2  6

x/y

## [1] 0.5000000 0.6666667 0.6000000 7.0000000

x + 3

## [1]  4  5  6 10

x + c(3)

## [1]  4  5  6 10

x + c(3,3,3,3)

## [1]  4  5  6 10

x + c(3,4)

## [1]  4  6  6 11

x + c(3,4,3,4)

## [1]  4  6  6 11

x <- 1:20 
x

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

x <- seq(1,20)
x

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

seq

?seq

## starting httpd help server ...

##  done

seq(1,20,2)

##  [1]  1  3  5  7  9 11 13 15 17 19

seq(1,3.5, by =0.5)

## [1] 1.0 1.5 2.0 2.5 3.0 3.5

seq(1,10,length=2)

## [1]  1 10

# 透過sum 將向量資料作加總
x <- c(1,2,3,5,7) 
sum(x)

## [1] 18

# 查詢該如何使用sum函式
?sum
help(sum)

x <- 1:100
sum(x)

## [1] 5050

指定名稱

height_vec = c(180, 169, 173)
names(height_vec)

## NULL

names(height_vec) <- c("Brian", "Toby", "Sherry")

names(height_vec)

## [1] "Brian"  "Toby"   "Sherry"

height_vec

##  Brian   Toby Sherry 
##    180    169    173

names_vec <- c("Brian", "Toby", "Sherry")
names(height_vec) <- names_vec

判斷向量內容是否符合條件

height_vec > 175

##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE

height_vec < 175

##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE

height_vec >= 175

##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE

height_vec <= 175

##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE

height_vec == 180

##  Brian   Toby Sherry 
##   TRUE  FALSE  FALSE

height_vec != 180

##  Brian   Toby Sherry 
##  FALSE   TRUE   TRUE

height_vec[height_vec > 175]

## Brian 
##   180

height_vec[2] #取得第二個人的身高

## Toby 
##  169

height_vec[c(2,3)] # 取得二跟三

##   Toby Sherry 
##    169    173

2:3

## [1] 2 3

height_vec[2:3] # 取得二跟三

##   Toby Sherry 
##    169    173

# 使用& 取得交集
height_vec[height_vec < 180 & height_vec > 170 ]

## Sherry 
##    173

# 使用 | 取得聯集
height_vec[height_vec >= 180 | height_vec < 170 ]

## Brian  Toby 
##   180   169

使用向量計算BMI

height_vec <- c(180,169,173)
weight_vec <- c(73, 87, 43)
names_vec <- c('Brian', 'Toby', 'Sherry')

bmi_vec <- weight_vec / (height_vec / 100) ^ 2
names(bmi_vec) = names_vec

bmi_vec[bmi_vec < 18.5 | bmi_vec >= 24]

##     Toby   Sherry 
## 30.46112 14.36734

rbind(bmi_vec, height_vec, weight_vec)

##                Brian      Toby    Sherry
## bmi_vec     22.53086  30.46112  14.36734
## height_vec 180.00000 169.00000 173.00000
## weight_vec  73.00000  87.00000  43.00000

cbind(bmi_vec, height_vec, weight_vec)

##         bmi_vec height_vec weight_vec
## Brian  22.53086        180         73
## Toby   30.46112        169         87
## Sherry 14.36734        173         43

?matrix
1:9

## [1] 1 2 3 4 5 6 7 8 9

matrix(1:9, nrow=3)

##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

matrix(1:9, nrow=1)

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,]    1    2    3    4    5    6    7    8    9

matrix(1:9, nrow=3, byrow=TRUE)

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9

kevin <- c(85,73)
marry <- c(72,64)
jerry <- c(59,66)

mat <- matrix(c(kevin, marry, jerry), nrow=3, byrow= TRUE)


colnames(mat) <- c('first', 'second')
rownames(mat) <-  c('kevin', 'marry', 'jerry')

mat2 <- matrix(c(kevin, marry, jerry), nrow=3, byrow=TRUE, 
dimnames=list(c('kevin', 'marry', 'jerry'),c('first', 'second')))


dim(mat2)

## [1] 3 2

nrow(mat2)

## [1] 3

ncol(mat2)

## [1] 2

mat2[      1 ,  ]

##  first second 
##     85     73

mat2[        , 1 ]

## kevin marry jerry 
##    85    72    59

mat2[  c(2,3),   ]

##       first second
## marry    72     64
## jerry    59     66

mat2[  2:3   ,   ]

##       first second
## marry    72     64
## jerry    59     66

mat2[  2:3   ,  1 ]

## marry jerry 
##    72    59

新增列與行

mat3 <- rbind(mat2, c(78,63))
mat3

##       first second
## kevin    85     73
## marry    72     64
## jerry    59     66
##          78     63

rownames(mat3)

## [1] "kevin" "marry" "jerry" ""

rownames(mat3)[4]

## [1] ""

rownames(mat3)[4]  <- 'sam'

mat4 <- cbind(mat2,c(82,77,70))
mat4

##       first second   
## kevin    85     73 82
## marry    72     64 77
## jerry    59     66 70

colnames(mat4)

## [1] "first"  "second" ""

colnames(mat4)[3] = 'third'
mat4

##       first second third
## kevin    85     73    82
## marry    72     64    77
## jerry    59     66    70

mat2

##       first second
## kevin    85     73
## marry    72     64
## jerry    59     66

rowSums(mat2)

## kevin marry jerry 
##   158   136   125

colSums(mat2)

##  first second 
##    216    203

矩陣運算

#矩陣宣告
m1 <- matrix(1:4, byrow=TRUE, nrow=2)
m2 <- matrix(5:8, byrow=TRUE, nrow=2)

#矩陣運算
m1 + m2

##      [,1] [,2]
## [1,]    6    8
## [2,]   10   12

m1 - m2

##      [,1] [,2]
## [1,]   -4   -4
## [2,]   -4   -4

m1 * m2

##      [,1] [,2]
## [1,]    5   12
## [2,]   21   32

m1 / m2

##           [,1]      [,2]
## [1,] 0.2000000 0.3333333
## [2,] 0.4285714 0.5000000

m1 %*% m2

##      [,1] [,2]
## [1,]   19   22
## [2,]   43   50

使用矩陣計算考試成績

kevin <- c(85,73)
marry <- c(72,64)
jerry <- c(59,66)
mat <- matrix(c(kevin, marry, jerry), nrow=3, byrow= TRUE)
mat

##      [,1] [,2]
## [1,]   85   73
## [2,]   72   64
## [3,]   59   66

weighted_score <- mat[,1] * 0.4 + mat[,2] * 0.6

weighted_score2 <- mat %*% c(0.4,0.6)

建立Data Frame

# 建立 Vector
days <- c('mon','tue','wed','thu','fri')
temp <- c(22.2,21,23,24.3,25)
rain <- c(TRUE, TRUE, FALSE, FALSE, TRUE)
mat <- matrix(c(days, temp,rain), nrow=3)
# 使用 Vector 建立Data Frame 
df <- data.frame(days,temp,rain)

df

##   days temp  rain
## 1  mon 22.2  TRUE
## 2  tue 21.0  TRUE
## 3  wed 23.0 FALSE
## 4  thu 24.3 FALSE
## 5  fri 25.0  TRUE

class(df)

## [1] "data.frame"

# 檢視資料形態
class(df)

## [1] "data.frame"

# 檢視架構
str(df)

## 'data.frame':    5 obs. of  3 variables:
##  $ days: Factor w/ 5 levels "fri","mon","thu",..: 2 4 5 3 1
##  $ temp: num  22.2 21 23 24.3 25
##  $ rain: logi  TRUE TRUE FALSE FALSE TRUE

# 檢視資料摘要
summary(df)

##   days        temp         rain        
##  fri:1   Min.   :21.0   Mode :logical  
##  mon:1   1st Qu.:22.2   FALSE:2        
##  thu:1   Median :23.0   TRUE :3        
##  tue:1   Mean   :23.1   NA's :0        
##  wed:1   3rd Qu.:24.3                  
##          Max.   :25.0

使用R 內建的資料集

#表列資料集
data()

#使用資料集
data(iris)
View(iris)


#觀察讀取到的資料集型態
class(iris)

## [1] "data.frame"

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

# 觀看資料集的前幾筆資料與後幾筆資料
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

head(iris, 10)

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           5.1         3.5          1.4         0.2  setosa
## 2           4.9         3.0          1.4         0.2  setosa
## 3           4.7         3.2          1.3         0.2  setosa
## 4           4.6         3.1          1.5         0.2  setosa
## 5           5.0         3.6          1.4         0.2  setosa
## 6           5.4         3.9          1.7         0.4  setosa
## 7           4.6         3.4          1.4         0.3  setosa
## 8           5.0         3.4          1.5         0.2  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 10          4.9         3.1          1.5         0.1  setosa

tail(iris)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

tail(iris,3)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

#取前三列資料
iris[1:3,]

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa

#取前三列第一行的資料
iris[1:3,1]

## [1] 5.1 4.9 4.7

#也可以用欄位名稱取值
iris[1:3,"Sepal.Length"]

## [1] 5.1 4.9 4.7

#取前兩行資料
iris[,1:2]

##     Sepal.Length Sepal.Width
## 1            5.1         3.5
## 2            4.9         3.0
## 3            4.7         3.2
## 4            4.6         3.1
## 5            5.0         3.6
## 6            5.4         3.9
## 7            4.6         3.4
## 8            5.0         3.4
## 9            4.4         2.9
## 10           4.9         3.1
## 11           5.4         3.7
## 12           4.8         3.4
## 13           4.8         3.0
## 14           4.3         3.0
## 15           5.8         4.0
## 16           5.7         4.4
## 17           5.4         3.9
## 18           5.1         3.5
## 19           5.7         3.8
## 20           5.1         3.8
## 21           5.4         3.4
## 22           5.1         3.7
## 23           4.6         3.6
## 24           5.1         3.3
## 25           4.8         3.4
## 26           5.0         3.0
## 27           5.0         3.4
## 28           5.2         3.5
## 29           5.2         3.4
## 30           4.7         3.2
## 31           4.8         3.1
## 32           5.4         3.4
## 33           5.2         4.1
## 34           5.5         4.2
## 35           4.9         3.1
## 36           5.0         3.2
## 37           5.5         3.5
## 38           4.9         3.6
## 39           4.4         3.0
## 40           5.1         3.4
## 41           5.0         3.5
## 42           4.5         2.3
## 43           4.4         3.2
## 44           5.0         3.5
## 45           5.1         3.8
## 46           4.8         3.0
## 47           5.1         3.8
## 48           4.6         3.2
## 49           5.3         3.7
## 50           5.0         3.3
## 51           7.0         3.2
## 52           6.4         3.2
## 53           6.9         3.1
## 54           5.5         2.3
## 55           6.5         2.8
## 56           5.7         2.8
## 57           6.3         3.3
## 58           4.9         2.4
## 59           6.6         2.9
## 60           5.2         2.7
## 61           5.0         2.0
## 62           5.9         3.0
## 63           6.0         2.2
## 64           6.1         2.9
## 65           5.6         2.9
## 66           6.7         3.1
## 67           5.6         3.0
## 68           5.8         2.7
## 69           6.2         2.2
## 70           5.6         2.5
## 71           5.9         3.2
## 72           6.1         2.8
## 73           6.3         2.5
## 74           6.1         2.8
## 75           6.4         2.9
## 76           6.6         3.0
## 77           6.8         2.8
## 78           6.7         3.0
## 79           6.0         2.9
## 80           5.7         2.6
## 81           5.5         2.4
## 82           5.5         2.4
## 83           5.8         2.7
## 84           6.0         2.7
## 85           5.4         3.0
## 86           6.0         3.4
## 87           6.7         3.1
## 88           6.3         2.3
## 89           5.6         3.0
## 90           5.5         2.5
## 91           5.5         2.6
## 92           6.1         3.0
## 93           5.8         2.6
## 94           5.0         2.3
## 95           5.6         2.7
## 96           5.7         3.0
## 97           5.7         2.9
## 98           6.2         2.9
## 99           5.1         2.5
## 100          5.7         2.8
## 101          6.3         3.3
## 102          5.8         2.7
## 103          7.1         3.0
## 104          6.3         2.9
## 105          6.5         3.0
## 106          7.6         3.0
## 107          4.9         2.5
## 108          7.3         2.9
## 109          6.7         2.5
## 110          7.2         3.6
## 111          6.5         3.2
## 112          6.4         2.7
## 113          6.8         3.0
## 114          5.7         2.5
## 115          5.8         2.8
## 116          6.4         3.2
## 117          6.5         3.0
## 118          7.7         3.8
## 119          7.7         2.6
## 120          6.0         2.2
## 121          6.9         3.2
## 122          5.6         2.8
## 123          7.7         2.8
## 124          6.3         2.7
## 125          6.7         3.3
## 126          7.2         3.2
## 127          6.2         2.8
## 128          6.1         3.0
## 129          6.4         2.8
## 130          7.2         3.0
## 131          7.4         2.8
## 132          7.9         3.8
## 133          6.4         2.8
## 134          6.3         2.8
## 135          6.1         2.6
## 136          7.7         3.0
## 137          6.3         3.4
## 138          6.4         3.1
## 139          6.0         3.0
## 140          6.9         3.1
## 141          6.7         3.1
## 142          6.9         3.1
## 143          5.8         2.7
## 144          6.8         3.2
## 145          6.7         3.3
## 146          6.7         3.0
## 147          6.3         2.5
## 148          6.5         3.0
## 149          6.2         3.4
## 150          5.9         3.0

#取特定欄位向量值
iris$"Sepal.Length"

##   [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
##  [18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
##  [35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
##  [52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
##  [69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
##  [86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8
## [103] 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7
## [120] 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7
## [137] 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9

iris[1:3,][1:2, 1:2]$Sepal.Length

## [1] 5.1 4.9

#取前五筆包含length 及 width 的資料
five.Sepal.iris <- iris[1:5, c("Sepal.Length", "Sepal.Width")]

five.iris <- iris[1:5,]
five.iris$Sepal.Length

## [1] 5.1 4.9 4.7 4.6 5.0

five.iris$Sepal.Length >= 5

## [1]  TRUE FALSE FALSE FALSE  TRUE

five.iris[five.iris$Sepal.Length >= 5, ]

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa

which(five.iris$Sepal.Length >= 5)

## [1] 1 5

five.iris[which(five.iris$Sepal.Length >= 5), ]

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa

#可以用條件做篩選
setosa.data <- iris[iris$Species=="setosa",1:5]

#使用which 做資料篩選
which(iris$Species=="setosa")

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50

比較sort 與 order

a <- c(1,5,2,7,4,6)
sort(a)

## [1] 1 2 4 5 6 7

order(a)

## [1] 1 3 5 2 6 4

head(sort(iris$Sepal.Length, decreasing = TRUE))

## [1] 7.9 7.7 7.7 7.7 7.7 7.6

head(iris[order(iris$Sepal.Length, decreasing = TRUE),])

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 132          7.9         3.8          6.4         2.0 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
## 106          7.6         3.0          6.6         2.1 virginica

Explore 2330

summary(tw2330)
head(tw2330[order(tw2330$Close),])
tail(tw2330[order(tw2330$Close),])
str(tw2330)
tw2330$Date <- as.Date(tw2330$Date)
plot(Close ~ Date, data = tw2330)

階層 (Factor)

Weather <- c("sunny","rainy", "cloudy", "rainy", "cloudy")
str(Weather)

##  chr [1:5] "sunny" "rainy" "cloudy" "rainy" "cloudy"

weather_category <- factor(Weather)
weather_category

## [1] sunny  rainy  cloudy rainy  cloudy
## Levels: cloudy rainy sunny

levels(weather_category)

## [1] "cloudy" "rainy"  "sunny"

names_vec = c('a','b', 'c')
scores_vec = c(77,56,66)
gender_vec = c(1,0,1)
df = data.frame(names_vec, scores_vec, gender_vec)
str(df)

## 'data.frame':    3 obs. of  3 variables:
##  $ names_vec : Factor w/ 3 levels "a","b","c": 1 2 3
##  $ scores_vec: num  77 56 66
##  $ gender_vec: num  1 0 1

summary(df)

##  names_vec   scores_vec      gender_vec    
##  a:1       Min.   :56.00   Min.   :0.0000  
##  b:1       1st Qu.:61.00   1st Qu.:0.5000  
##  c:1       Median :66.00   Median :1.0000  
##            Mean   :66.33   Mean   :0.6667  
##            3rd Qu.:71.50   3rd Qu.:1.0000  
##            Max.   :77.00   Max.   :1.0000

df$gender_vec <- as.factor(df$gender_vec)
str(df)

## 'data.frame':    3 obs. of  3 variables:
##  $ names_vec : Factor w/ 3 levels "a","b","c": 1 2 3
##  $ scores_vec: num  77 56 66
##  $ gender_vec: Factor w/ 2 levels "0","1": 2 1 2

summary(df)

##  names_vec   scores_vec    gender_vec
##  a:1       Min.   :56.00   0:1       
##  b:1       1st Qu.:61.00   1:2       
##  c:1       Median :66.00             
##            Mean   :66.33             
##            3rd Qu.:71.50             
##            Max.   :77.00

temperature <- c("Low", "High", "High", "Medium", "Low", "Medium")

temperature_category <- factor(temperature, order = TRUE, levels = c("Low", "Medium", "High"))

temperature_category

## [1] Low    High   High   Medium Low    Medium
## Levels: Low < Medium < High

temperature_category[3] > temperature_category[1]

## [1] TRUE

temperature_category[4] > temperature_category[3]

## [1] FALSE

levels(temperature_category)

## [1] "Low"    "Medium" "High"

Lists

item <- list(thing="hat", size=8.25) 
item

## $thing
## [1] "hat"
## 
## $size
## [1] 8.25

item$thing

## [1] "hat"

item$size * 3

## [1] 24.75

test <- list(name="Toby", score = c(87,57,72)) 
test$score

## [1] 87 57 72

test$score[2]

## [1] 57

#沒有名字的清單


li <- list(c(1,2,3), c(2,3,4,5))
li

## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] 2 3 4 5

li[[1]]

## [1] 1 2 3

li[[1]][2]

## [1] 2

li <- list(c(3,5,12), c(2,4,5,8,10))
li

## [[1]]
## [1]  3  5 12
## 
## [[2]]
## [1]  2  4  5  8 10

#使用lapply將函式套用到list 上
lapply(li, sum)

## [[1]]
## [1] 20
## 
## [[2]]
## [1] 29

lapply(li, mean)

## [[1]]
## [1] 6.666667
## 
## [[2]]
## [1] 5.8

lapply(li, length)

## [[1]]
## [1] 3
## 
## [[2]]
## [1] 5

# sapply = simplified lapply
sapply(li, length)

## [1] 3 5

流程控制(Flow Control)

x = 2
if(x > 3){
  print('x>3');
}else{
  print('x<=3');
}

## [1] "x<=3"

x = 3;
if(x > 3){ 
    print("x > 3");
} else if(x < 4){
    print("x < 4");
}else if(x ==3){
    print("x == 3");
}else{
    print("x < 3");
}

## [1] "x < 4"

for(i in 1:10){
     print (i);
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

s = 0
for(i in 1:100){
     s = s + i;
}
s

## [1] 5050

sum(1:100)

## [1] 5050

x <- c("sunny","rainy", "cloudy", "rainy", "cloudy")
1:length(x)

## [1] 1 2 3 4 5

for(i in 1:length(x)){
  #print(i)
  print(x[i])
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

seq_along(x)

## [1] 1 2 3 4 5

for(i in seq_along(x)){
  #print(i)
  print(x[i])
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

for (letter in x){
  print(letter);
}

## [1] "sunny"
## [1] "rainy"
## [1] "cloudy"
## [1] "rainy"
## [1] "cloudy"

使用while 迴圈

s = 0;
cnt = 0;
while(cnt <= 100){
    s = s + cnt;
    cnt = cnt + 1;
}
s

## [1] 5050

範例: 產生多筆頁面連結

?paste

paste('hello', 'world')

## [1] "hello world"

paste('hello', 'world', sep = '')

## [1] "helloworld"

for (page in 1:20){
  #print(page);
  print(paste('http://www.appledaily.com.tw/realtimenews/section/new/', page, sep=''));
}

## [1] "http://www.appledaily.com.tw/realtimenews/section/new/1"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/2"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/3"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/4"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/5"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/6"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/7"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/8"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/9"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/10"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/11"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/12"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/13"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/14"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/15"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/16"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/17"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/18"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/19"
## [1] "http://www.appledaily.com.tw/realtimenews/section/new/20"

函式 (Function)

addnum <- function(a, b){
  a = a + 3
  b = b * 2
  a + b
}
addnum

## function(a, b){
##   a = a + 3
##   b = b * 2
##   a + b
## }

addnum(3,5)

## [1] 16

a = 3
b = 5
addnum(a,b)

## [1] 16

## [1] 3

## [1] 5

addnum <- function(a, b = 2){
  a = a + 3
  b = b * 2
  a + b
}
addnum(3)

## [1] 10

addnum(a = 3)

## [1] 10

#addnum(b = 3)
addnum(b = 3, a = 2)

## [1] 11

f = function(a, b) {
   a * 2
}
f(3)

## [1] 6

f = function(a, b) {
     print(a+ b)
}
#f(3)

範例: 撰寫函式計算文章詞頻

a = 'sum returns the sum of all the values present in its arguments'

?strsplit
strsplit(a,split= ' ')

## [[1]]
##  [1] "sum"       "returns"   "the"       "sum"       "of"       
##  [6] "all"       "the"       "values"    "present"   "in"       
## [11] "its"       "arguments"

?table
b = c(1,2,2,1,2,1)
table(b)

## b
## 1 2 
## 3 3

wordcount <- function(article){
  article.split <- strsplit(article,split= ' ')
  table(article.split)
}

wordcount(a)

## article.split
##       all arguments        in       its        of   present   returns 
##         1         1         1         1         1         1         1 
##       sum       the    values 
##         2         2         1

xi = "President Barack Obama is embarking upon a final bout of delicate overseas diplomacy before his successor is elected in November, arriving in Asia Saturday for meetings with some of his most nettlesome counterparts.

Air Force One touched down in Hangzhou, the host city of this year's Group of 20 meetings, just after 2 p.m. local time (2 a.m. ET). Obama's first stop was a formal bilateral session with the summit's host, Chinese President Xi Jinping"

wordcount(xi)

## article.split
##                   (2                    2                   20 
##                    1                    1                    1 
##                    a                 a.m.                after 
##                    2                    1                    1 
##             arriving                 Asia               Barack 
##                    1                    1                    1 
##               before            bilateral                 bout 
##                    1                    1                    1 
##              Chinese                 city counterparts.\n\nAir 
##                    1                    1                    1 
##             delicate            diplomacy                 down 
##                    1                    1                    1 
##              elected            embarking                 ET). 
##                    1                    1                    1 
##                final                first                  for 
##                    1                    1                    1 
##                Force               formal                Group 
##                    1                    1                    1 
##            Hangzhou,                  his                 host 
##                    1                    2                    1 
##                host,                   in                   is 
##                    1                    3                    2 
##              Jinping                 just                local 
##                    1                    1                    1 
##             meetings            meetings,                 most 
##                    1                    1                    1 
##           nettlesome            November,                Obama 
##                    1                    1                    1 
##              Obama's                   of                  One 
##                    1                    4                    1 
##             overseas                 p.m.            President 
##                    1                    1                    2 
##             Saturday              session                 some 
##                    1                    1                    1 
##                 stop            successor             summit's 
##                    1                    1                    1 
##                  the                 this                 time 
##                    2                    1                    1 
##              touched                 upon                  was 
##                    1                    1                    1 
##                 with                   Xi               year's 
##                    2                    1                    1

Demo20160903

David Chiu

2016年9月3日

用R做簡單迴歸分析

數學運算

設定變數

基礎資料型態

不同型態資料做運算

計算一本書的價錢

seq

指定名稱

判斷向量內容是否符合條件

使用向量計算BMI

新增列與行

矩陣運算

使用矩陣計算考試成績

建立Data Frame

使用R 內建的資料集

比較sort 與 order

Explore 2330

階層 (Factor)

Lists

流程控制(Flow Control)

使用while 迴圈

範例: 產生多筆頁面連結

函式 (Function)

範例: 撰寫函式計算文章詞頻