Efficiency Tips for R Loop

今天與大家簡介R迴圈效能，總共有五個要點:
1. 建議盡量用R的向量計算；
2. 避免使用data.frame[,] , 建議盡量用vector[] ，若不行則使用matrix[,]
3. 避免用名稱進行搜尋或迴圈；
4. 避免用c(),cbind(),rbind()；
5. 迴圈前先做框架，做一個全部是NA或FALSE的matrix或vector

Case 1

1:10000

a = 10000

# method 1
t1 = NULL
system.time(
        for (i in 1:a){
                t1 = c(t1,i)
        }
)

##    user  system elapsed 
##   0.140   0.000   0.139

# method 2
t1 = vector()
system.time(
        for (i in 1:a){
                t1[i] = i
        }
)

##    user  system elapsed 
##   0.096   0.000   0.097

# method 3
t1 = vector(length = a)
system.time(
        for (i in 1:a){
                t1[i] = i
        }
)

##    user  system elapsed 
##   0.012   0.000   0.009

Case 2

1 2 2 3 4 3 4 5 6 4 5 6 7 8 5 6 7 8 9 10

a = 1300

# method 1
t1 = NULL
system.time(
        for (i in 1:a){
                t1 = c(t1,i:(i+i))
        }
)

##    user  system elapsed 
##   1.504   0.016   1.521

# method 2
t1 = NULL
endInd = 0
system.time(
        for (i in 1:a){
                a1 = i:(i+i)
                startInd = endInd + 1
                endInd = startInd + length(a1) - 1
                t1[(startInd):(endInd)] = a1
        }
)

##    user  system elapsed 
##   0.924   0.016   0.939

# method 3
t1 = vector(length = 1000000)
endInd = 0
system.time(
        for (i in 1:a){
                a1 = i:(i+i)
                startInd = endInd + 1
                endInd = startInd + length(a1) - 1
                t1[(startInd):(endInd)] = a1
        }
)

##    user  system elapsed 
##   0.008   0.000   0.009

Case 3

print(letters)

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"

a = 1000

t1 = NULL
system.time(
        for (i in 1:a){
                t1 = rbind(t1, letters)
        }
)

##    user  system elapsed 
##   0.072   0.004   0.077

t1 = matrix(NA, ncol = 26, nrow = a)
system.time(
        for (i in 1:a){
                t1[i,] = letters
        }
)

##    user  system elapsed 
##   0.000   0.000   0.002

system.time(
        matrix(rep(letters,a), byrow = TRUE, ncol = 26)
)

##    user  system elapsed 
##       0       0       0

Case 4

# read csv

path = '/home/leongkaon/Documents/Text_mining/result/result_20170328/AB30Weight1990.csv'
file.size(path) / 1024 / 1024                           # 5MB

## [1] 5.08975

system.time(read.csv(path))                             # 2.290

##    user  system elapsed 
##   2.308   0.004   2.310

system.time(read.table(path, header = TRUE, sep = ",")) # 2.283

##    user  system elapsed 
##   2.184   0.000   2.186

system.time(readr::read_csv(path))                      # 0.095

##    user  system elapsed 
##   0.104   0.004   0.106

system.time(data.table::fread(path))                    # 0.039

##    user  system elapsed 
##   0.096   0.000   0.093

Reference:

Efficiency Tips for Basic R Loop, Svetlana Eden, 2012

Speed up the loop operation in R, Stackoverflow

Efficiency Tips for R Loop

梁家安

11th April 2017

Case 1

1:10000

Case 2

1 2 2 3 4 3 4 5 6 4 5 6 7 8 5 6 7 8 9 10

Case 3

Case 4

Reference: