今天與大家簡介R迴圈效能,總共有五個要點:
1. 建議盡量用R的向量計算;
2. 避免使用data.frame[,] , 建議盡量用vector[] , 若不行則使用matrix[,]
3. 避免用名稱進行搜尋或迴圈;
4. 避免用c(),cbind(),rbind();
5. 迴圈前先做框架,做一個全部是NA或FALSE的matrix或vector
a = 10000
# method 1
t1 = NULL
system.time(
for (i in 1:a){
t1 = c(t1,i)
}
)
## user system elapsed
## 0.140 0.000 0.139
# method 2
t1 = vector()
system.time(
for (i in 1:a){
t1[i] = i
}
)
## user system elapsed
## 0.096 0.000 0.097
# method 3
t1 = vector(length = a)
system.time(
for (i in 1:a){
t1[i] = i
}
)
## user system elapsed
## 0.012 0.000 0.009
a = 1300
# method 1
t1 = NULL
system.time(
for (i in 1:a){
t1 = c(t1,i:(i+i))
}
)
## user system elapsed
## 1.504 0.016 1.521
# method 2
t1 = NULL
endInd = 0
system.time(
for (i in 1:a){
a1 = i:(i+i)
startInd = endInd + 1
endInd = startInd + length(a1) - 1
t1[(startInd):(endInd)] = a1
}
)
## user system elapsed
## 0.924 0.016 0.939
# method 3
t1 = vector(length = 1000000)
endInd = 0
system.time(
for (i in 1:a){
a1 = i:(i+i)
startInd = endInd + 1
endInd = startInd + length(a1) - 1
t1[(startInd):(endInd)] = a1
}
)
## user system elapsed
## 0.008 0.000 0.009
print(letters)
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
a = 1000
t1 = NULL
system.time(
for (i in 1:a){
t1 = rbind(t1, letters)
}
)
## user system elapsed
## 0.072 0.004 0.077
t1 = matrix(NA, ncol = 26, nrow = a)
system.time(
for (i in 1:a){
t1[i,] = letters
}
)
## user system elapsed
## 0.000 0.000 0.002
system.time(
matrix(rep(letters,a), byrow = TRUE, ncol = 26)
)
## user system elapsed
## 0 0 0
# read csv
path = '/home/leongkaon/Documents/Text_mining/result/result_20170328/AB30Weight1990.csv'
file.size(path) / 1024 / 1024 # 5MB
## [1] 5.08975
system.time(read.csv(path)) # 2.290
## user system elapsed
## 2.308 0.004 2.310
system.time(read.table(path, header = TRUE, sep = ",")) # 2.283
## user system elapsed
## 2.184 0.000 2.186
system.time(readr::read_csv(path)) # 0.095
## user system elapsed
## 0.104 0.004 0.106
system.time(data.table::fread(path)) # 0.039
## user system elapsed
## 0.096 0.000 0.093