今天與大家簡介R迴圈效能,總共有五個要點:
1. 建議盡量用R的向量計算;
2. 避免使用data.frame[,] , 建議盡量用vector[] , 若不行則使用matrix[,]
3. 避免用名稱進行搜尋或迴圈;
4. 避免用c(),cbind(),rbind();
5. 迴圈前先做框架,做一個全部是NA或FALSE的matrix或vector


Case 1

1:10000

a = 10000
# method 1
t1 = NULL
system.time(
        for (i in 1:a){
                t1 = c(t1,i)
        }
)
##    user  system elapsed 
##   0.140   0.000   0.139
# method 2
t1 = vector()
system.time(
        for (i in 1:a){
                t1[i] = i
        }
)
##    user  system elapsed 
##   0.096   0.000   0.097
# method 3
t1 = vector(length = a)
system.time(
        for (i in 1:a){
                t1[i] = i
        }
)
##    user  system elapsed 
##   0.012   0.000   0.009



Case 2

1 2 2 3 4 3 4 5 6 4 5 6 7 8 5 6 7 8 9 10

a = 1300
# method 1
t1 = NULL
system.time(
        for (i in 1:a){
                t1 = c(t1,i:(i+i))
        }
)
##    user  system elapsed 
##   1.504   0.016   1.521
# method 2
t1 = NULL
endInd = 0
system.time(
        for (i in 1:a){
                a1 = i:(i+i)
                startInd = endInd + 1
                endInd = startInd + length(a1) - 1
                t1[(startInd):(endInd)] = a1
        }
)
##    user  system elapsed 
##   0.924   0.016   0.939
# method 3
t1 = vector(length = 1000000)
endInd = 0
system.time(
        for (i in 1:a){
                a1 = i:(i+i)
                startInd = endInd + 1
                endInd = startInd + length(a1) - 1
                t1[(startInd):(endInd)] = a1
        }
)
##    user  system elapsed 
##   0.008   0.000   0.009



Case 3

print(letters)
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
a = 1000
t1 = NULL
system.time(
        for (i in 1:a){
                t1 = rbind(t1, letters)
        }
)
##    user  system elapsed 
##   0.072   0.004   0.077
t1 = matrix(NA, ncol = 26, nrow = a)
system.time(
        for (i in 1:a){
                t1[i,] = letters
        }
)
##    user  system elapsed 
##   0.000   0.000   0.002
system.time(
        matrix(rep(letters,a), byrow = TRUE, ncol = 26)
)
##    user  system elapsed 
##       0       0       0



Case 4

# read csv

path = '/home/leongkaon/Documents/Text_mining/result/result_20170328/AB30Weight1990.csv'
file.size(path) / 1024 / 1024                           # 5MB
## [1] 5.08975
system.time(read.csv(path))                             # 2.290
##    user  system elapsed 
##   2.308   0.004   2.310
system.time(read.table(path, header = TRUE, sep = ",")) # 2.283
##    user  system elapsed 
##   2.184   0.000   2.186
system.time(readr::read_csv(path))                      # 0.095
##    user  system elapsed 
##   0.104   0.004   0.106
system.time(data.table::fread(path))                    # 0.039
##    user  system elapsed 
##   0.096   0.000   0.093


Reference:

Efficiency Tips for Basic R Loop, Svetlana Eden, 2012

Speed up the loop operation in R, Stackoverflow