功能函式的操作

R的型別與參數有很大的彈性,透過傳入函式處理傳入資料

範例1

產生1000 個隨機變數,計算開根號

xs <- runif(1e3)

#一般程式語言透過 loop 開根號, 順便透過 system.time 比較處理效率
loopSqrt <- function (xs){
        #透過 c()產生一個空 vector 儲存計算結果
        res <- c()
        #針對每個傳入變數 透過 sqrt 開根號,將結果 透過 c() 透過 vector 回傳結果
        for(x in xs) {
                #將每次計算結果 附加到 res 的 vector 中
                res <- c(res, sqrt(x))
        }
        return (res)

}
system.time( sqrt.xs.1 <- loopSqrt(xs))
##    user  system elapsed 
##   0.003   0.001   0.006
#印出計算結果的前幾筆資料
head(sqrt.xs.1, 5 )
## [1] 0.9097993 0.8107128 0.7657996 0.6060091 0.2087795

R透過 sapply 函式處理的方式 將1000 個變數開根號

#透過 sapply將 xs透過 sqrt函式進行資料處理
system.time(sqrt.xs.2 <-  sapply(xs, sqrt))
##    user  system elapsed 
##   0.001   0.000   0.001
head(sqrt.xs.2, 5)
## [1] 0.9097993 0.8107128 0.7657996 0.6060091 0.2087795

範例 計算移動平均

rollmean <- function(x, n) {
  out <- rep(NA, length(x))
  
  offset <- trunc(n / 2)
  for (i in (offset + 1):(length(x) - n + offset + 1)) {
    out[i] <- mean(x[(i - offset):(i + offset - 1)])
  }
  out
}

#當平均個數不為2的倍數,只為取偶數個數計算移動平均,修正rollmean處理
myrollmean <- function(x, n) {
        out <- rep(NA, length(x))
        
        offset <- trunc(n / 2)
        #取個數除以2的餘數
        res_off <- n %% 2
        #產生 length(x) - n 個數的移動平均個數,起始位置修正 offset + 個數除以2的餘數
        for (i in (offset + res_off):(length(x) - n + offset + res_off)) {
                from <- (i - offset +1 - res_off)
                to <- (i + offset)
                #印出計算移動平均的起迄位置進行除錯
                #print(paste("[", i, "] from ", from, " to " , to , sep=""))
                out[i] <- mean(x[from: to])
        }
        out
} 
  
x <- seq(1, 3, length = 1e2) + runif(1e2)
plot(x,  type="b",pch=20, col="black", lwd=1)
lines(myrollmean(x, 5), col = "blue", lwd = 2)
lines(myrollmean(x, 20), col = "red", lwd = 2)

範例:計算加權平均數

透過lapply 針對xs 每個清單 的 10個元素依照 ws加權,計算加權平均數

#隨機產生亂數
xs <- replicate(5, runif(10), simplify=FALSE)
#隨機產生加權變量
ws <- replicate(5, rpois(10, 5) +1, simplify=FALSE)
#透過匿名函式處理 lapply傳入的資料 
wmean <- unlist(lapply(seq_along(xs), function(i) {
        weighted.mean(xs[[i]], ws[[i]])
}))
wmean
## [1] 0.4312876 0.3712385 0.3120263 0.5116387 0.4016649

透過mapply 計算加權平均 使用相同函式,使用多重輸入值計算結果:Map / mapply 函式 後面的輸入參數,需要相同陣列長度

mean.map <- mapply(weighted.mean, xs, ws) 
mean.map
## [1] 0.4312876 0.3712385 0.3120263 0.5116387 0.4016649

測試使用不同參數名稱使用 mapply檢視處理結果

#  透過mapply times= 1:4 逐一呼叫 rep(times, list(x = 42))
mapply(rep, times = 1:4, MoreArgs = list(x = 42), SIMPLIFY = FALSE)
## [[1]]
## [1] 42
## 
## [[2]]
## [1] 42 42
## 
## [[3]]
## [1] 42 42 42
## 
## [[4]]
## [1] 42 42 42 42
mapply(rep, x = 1:4, MoreArgs = list(times = 6), SIMPLIFY = FALSE)
## [[1]]
## [1] 1 1 1 1 1 1
## 
## [[2]]
## [1] 2 2 2 2 2 2
## 
## [[3]]
## [1] 3 3 3 3 3 3
## 
## [[4]]
## [1] 4 4 4 4 4 4
mapply(rep, 1:4, MoreArgs = list(6), SIMPLIFY = FALSE)
## [[1]]
## [1] 1 1 1 1 1 1
## 
## [[2]]
## [1] 2 2 2 2 2 2
## 
## [[3]]
## [1] 3 3 3 3 3 3
## 
## [[4]]
## [1] 4 4 4 4 4 4

透過 Sweep 函式 將每列數值進行運算

( x <- matrix(rnorm(20,0,10), nrow = 4) )
##            [,1]      [,2]       [,3]      [,4]       [,5]
## [1,]   2.006544  6.451559  -3.081156  5.400328   7.575752
## [2,]  14.301472 -1.469460 -12.614696 -2.163271  -2.922400
## [3,] -13.644186 10.294094   5.977293 -3.047047 -12.697640
## [4,]  -5.414440 10.258010  -5.789888  7.379702   1.028952
#取得每行(row)的最小值
(xx <- apply(x, 1, min) )
## [1]  -3.081156 -12.614696 -13.644186  -5.789888
#將每行數值修正最小值 讓最小值歸零
( x1 <- sweep(x, 1, xx, `-`) )
##            [,1]      [,2]     [,3]      [,4]       [,5]
## [1,]  5.0877004  9.532715  0.00000  8.481484 10.6569079
## [2,] 26.9161678 11.145236  0.00000 10.451425  9.6922965
## [3,]  0.0000000 23.938280 19.62148 10.597139  0.9465455
## [4,]  0.3754479 16.047898  0.00000 13.169590  6.8188399
#將每行數值修正最大值 將最大值正規化為一
( xx1 <- apply(x1, 1, max) )
## [1] 10.65691 26.91617 23.93828 16.04790
( x2 <- sweep(x1, 1, xx1 , `/`) )
##            [,1]      [,2]      [,3]      [,4]       [,5]
## [1,] 0.47740869 0.8945104 0.0000000 0.7958673 1.00000000
## [2,] 1.00000000 0.4140722 0.0000000 0.3882954 0.36009199
## [3,] 0.00000000 1.0000000 0.8196696 0.4426859 0.03954108
## [4,] 0.02339546 1.0000000 0.0000000 0.8206427 0.42490550

outer以 多個向量 作為輸入,並以矩陣或陣列作為輸出

outer (1:3, 1:10, "*")
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    1    2    3    4    5    6    7    8    9    10
## [2,]    2    4    6    8   10   12   14   16   18    20
## [3,]    3    6    9   12   15   18   21   24   27    30

tapply() 將傳入資料透過分類資料分群後再套用函式處理資料

(pulse <- round(rnorm(22, 70, 10/3)) + rep(c(0,5), c(10,12)))
##  [1] 70 72 68 71 69 71 72 70 79 73 73 75 80 78 75 75 75 68 77 82 77 72
(group <- rep(c("A","B"), c(10,12)))
##  [1] "A" "A" "A" "A" "A" "A" "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "B"
## [18] "B" "B" "B" "B" "B"
tapply(pulse, group, length)
##  A  B 
## 10 12
tapply(pulse, group, mean)
##        A        B 
## 71.50000 75.58333

tapply = split + sppply

tapply2 <- function(x, group, f, ... ,simplify = TRUE) {
        pieces <- split(x, group)
        sapply(pieces, f, simplify = simplify)
}
 
tapply2(pulse, group, mean)
##        A        B 
## 71.50000 75.58333

透過 Reduce 函式 將 vector 結果 彙整成單一數值

Reduce(`+`, 1:3) 
## [1] 6
Reduce(sum, 1:3) 
## [1] 6
Reduce2 <- function(f, x) {
        out <- x[[1]]
        for(i in seq(2, length(x))) {
                out <- f(out, x[[i]])
        }
        out
}

Reduce2(sum,  1:3) 
## [1] 6
Reduce2(`+`, 1:3) 
## [1] 6
`+`(3,2)
## [1] 5

Reduce()

reduces a vector, x, to a single value by recursively calling a function, f, two arguments at a time

(l <- replicate(5, sample(1:10, 15, replace = T), simplify =  FALSE) )
## [[1]]
##  [1]  9 10  7  6  2  8  4  7  2  4  4  8  8  8  1
## 
## [[2]]
##  [1]  5  2  1  2 10  3 10  5  9  4  9  7 10  6  1
## 
## [[3]]
##  [1]  2  6  4  9  5  2  3  3  9 10 10  5  2 10  8
## 
## [[4]]
##  [1]  5  4  5  9  2  5 10  4 10  5  3  8  3  8 10
## 
## [[5]]
##  [1]  1  7  2  9 10  7  9  2  2  1  3  4  2  7  9
Reduce(intersect, l)
## [1]  9 10  2  4
intersect(intersect(l[[1]], l[[2]]),l[[3]])
## [1]  9 10  6  2  4
out <- intersect(l[[1]], l[[2]])
for(i in 3:5) {
        out <- intersect(out, l[[i]])
}
out
## [1]  9 10  2  4

Predicate functionals

df <- data.frame(x = 1:3, y= c("a","b","c"))
where <- function(f, x) {
        vapply(x, f, logical(1))
}
where(is.factor, df)
##     x     y 
## FALSE  TRUE
(filter.df <- Filter(is.factor, df))
##   y
## 1 a
## 2 b
## 3 c
str(filter.df)
## 'data.frame':    3 obs. of  1 variable:
##  $ y: Factor w/ 3 levels "a","b","c": 1 2 3
(find.df <- Find(is.factor, df))
## [1] a b c
## Levels: a b c
str(find.df)
##  Factor w/ 3 levels "a","b","c": 1 2 3
position.df <- Position(is.factor, df)
str(position.df)
##  int 2

透過特殊循環,進行資料前處理

  1. 依照欄位名稱定義數值轉換處理函式
trans <- list(
        disp = function(x) x * 0.0163871,
        am = function(x) factor(x, labels = c("auto","manual"))
)
  1. 會使用迴圈,依照欄位名稱定義轉換函式,修改資料框內容
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
for(var in names(trans)){
        mtcars[[var]] <- trans[[var]](mtcars[[var]])
}
head(mtcars)
##                    mpg cyl     disp  hp drat    wt  qsec vs     am gear
## Mazda RX4         21.0   6 2.621936 110 3.90 2.620 16.46  0 manual    4
## Mazda RX4 Wag     21.0   6 2.621936 110 3.90 2.875 17.02  0 manual    4
## Datsun 710        22.8   4 1.769807  93 3.85 2.320 18.61  1 manual    4
## Hornet 4 Drive    21.4   6 4.227872 110 3.08 3.215 19.44  1   auto    3
## Hornet Sportabout 18.7   8 5.899356 175 3.15 3.440 17.02  0   auto    3
## Valiant           18.1   6 3.687098 105 2.76 3.460 20.22  1   auto    3
##                   carb
## Mazda RX4            4
## Mazda RX4 Wag        4
## Datsun 710           1
## Hornet 4 Drive       1
## Hornet Sportabout    2
## Valiant              1
  1. 改使用lapply 透過 匿名函數去呼叫轉換函式,資料框內容不會改變
data(mtcars)
lapply(names(trans), function(x) {
        mtcars[[x]] <- trans[[x]](mtcars[[x]]) 
})
## [[1]]
##  [1] 2.621936 2.621936 1.769807 4.227872 5.899356 3.687098 5.899356
##  [8] 2.403988 2.307304 2.746478 2.746478 4.519562 4.519562 4.519562
## [15] 7.734711 7.538066 7.210324 1.289665 1.240503 1.165123 1.968091
## [22] 5.211098 4.981678 5.735485 6.554840 1.294581 1.971368 1.558413
## [29] 5.751872 2.376130 4.932517 1.982839
## 
## [[2]]
##  [1] manual manual manual auto   auto   auto   auto   auto   auto   auto  
## [11] auto   auto   auto   auto   auto   auto   auto   manual manual manual
## [21] auto   auto   auto   auto   auto   manual manual manual manual manual
## [31] manual manual
## Levels: auto manual
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
  1. 透過 <<-全域變數設值,才能用 lapply匿名函式改變資料框內容
lapply(names(trans), function(x) {
        mtcars[[x]] <<- trans[[x]](mtcars[[x]]) 
})
## [[1]]
##  [1] 2.621936 2.621936 1.769807 4.227872 5.899356 3.687098 5.899356
##  [8] 2.403988 2.307304 2.746478 2.746478 4.519562 4.519562 4.519562
## [15] 7.734711 7.538066 7.210324 1.289665 1.240503 1.165123 1.968091
## [22] 5.211098 4.981678 5.735485 6.554840 1.294581 1.971368 1.558413
## [29] 5.751872 2.376130 4.932517 1.982839
## 
## [[2]]
##  [1] manual manual manual auto   auto   auto   auto   auto   auto   auto  
## [11] auto   auto   auto   auto   auto   auto   auto   manual manual manual
## [21] auto   auto   auto   auto   auto   manual manual manual manual manual
## [31] manual manual
## Levels: auto manual
head(mtcars)
##                    mpg cyl     disp  hp drat    wt  qsec vs     am gear
## Mazda RX4         21.0   6 2.621936 110 3.90 2.620 16.46  0 manual    4
## Mazda RX4 Wag     21.0   6 2.621936 110 3.90 2.875 17.02  0 manual    4
## Datsun 710        22.8   4 1.769807  93 3.85 2.320 18.61  1 manual    4
## Hornet 4 Drive    21.4   6 4.227872 110 3.08 3.215 19.44  1   auto    3
## Hornet Sportabout 18.7   8 5.899356 175 3.15 3.440 17.02  0   auto    3
## Valiant           18.1   6 3.687098 105 2.76 3.460 20.22  1   auto    3
##                   carb
## Mazda RX4            4
## Mazda RX4 Wag        4
## Datsun 710           1
## Hornet 4 Drive       1
## Hornet Sportabout    2
## Valiant              1

Non-standard evaluation (NSE)

substitute取得expression,通常用來繪製圖表的座標表示

x <- seq(0, 2*pi, length.out = 100)
plot(x, sin(x))

plot(x, (function(x){sin(x)})(x) )

透過 substitute取得傳入參數的表示式

f <- function(x) {
        substitute(x)
}
f(1:10)
## 1:10
f(10)
## [1] 10
x <- 10
f(x)
## x
y<- 13
f(x+y^2)
## x + y^2
substitute(mean(1:10))
## mean(1:10)
str(substitute(mean(1:10)))
##  language mean(1:10)
deparse("mean(1:10)")
## [1] "\"mean(1:10)\""
str(deparse(substitute(mean(1:10))))
##  chr "mean(1:10)"
quote(mean(1:10))
## mean(1:10)
str(quote(mean(1:10)))
##  language mean(1:10)

透過Eval 運算透過subsitute或quote取得表示式的結果,可以指定表示式的運算環境

eval(substitute(mean(1:10)))
## [1] 5.5
x <- 10
e <- new.env()
e$x <- 20
eval(quote(x))
## [1] 10
eval(quote(x), e)
## [1] 20
#透過 substitute 與 eval 實作 subset
subset2 <- function(x, condition){
        condition_call <- substitute(condition)
        r <- eval(condition_call, x)
        x[r, ]
}
subset2(mtcars, cyl==6)
##                 mpg cyl     disp  hp drat    wt  qsec vs     am gear carb
## Mazda RX4      21.0   6 2.621936 110 3.90 2.620 16.46  0 manual    4    4
## Mazda RX4 Wag  21.0   6 2.621936 110 3.90 2.875 17.02  0 manual    4    4
## Hornet 4 Drive 21.4   6 4.227872 110 3.08 3.215 19.44  1   auto    3    1
## Valiant        18.1   6 3.687098 105 2.76 3.460 20.22  1   auto    3    1
## Merc 280       19.2   6 2.746478 123 3.92 3.440 18.30  1   auto    4    4
## Merc 280C      17.8   6 2.746478 123 3.92 3.440 18.90  1   auto    4    4
## Ferrari Dino   19.7   6 2.376130 175 3.62 2.770 15.50  0 manual    5    6