x = sample(LETTERS, 10000, replace = TRUE)
# Method 1
start = Sys.time()
result1 = rep(NA, length(x))
for(i in 1:length(x)) {
tmp = x[-c(1:i)]
result1[i] = tmp[tmp %in% c("A", "B", "C")][1]
}
end = Sys.time()
time1 = end - start
# Method 2
library(Rcpp)
cppFunction('CharacterVector nextABC(CharacterVector x) {
int n = x.size();
CharacterVector result(n);
for(int i = 0; i < n; i++) {
for(int j = i+1; j < n; j++) {
if(x[j] == "A" || x[j] == "B" || x[j] == "C") {
result[i] = x[j];
break;
}
}
}
return result;
}')
start = Sys.time()
result2 = nextABC(x)
result2[result2 == ""] = NA
end = Sys.time()
time2 = end - start
# Method 3
start = Sys.time()
goods = which(x %in% c("A","B","C"))
nexti = sapply(1:length(x), function(i) min(goods[goods>i],length(x)+1))
result3 = x[nexti]
end = Sys.time()
time3 = end - start
# Method 4
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
start = Sys.time()
df = as.data.frame(x, stringsAsFactors = FALSE)
result4 =
df %>%
mutate(new = rev(ifelse(x %in% c("A","B","C"), x, NA))) %>%
fill(new) %>%
mutate(new = rev(new)) %>%
pull(new)
end = Sys.time()
time4 = end - start
# Method 5
start = Sys.time()
abc = x%in%c("A","B","C")
qq = cumsum(c(T,abc))[1:length(abc)]
result5 = x[which(abc)[qq]]
end = Sys.time()
time5 = end - start
time1 # 'for' loop
## Time difference of 2.819571 secs
time2 # Rcpp
## Time difference of 0.008967876 secs
time3 # sapply
## Time difference of 0.1276851 secs
time4 # dplyr
## Time difference of 0.08501935 secs
time5 # cumsum
## Time difference of 0.003102779 secs
all.equal(result1, result2, result3, result4, result5)
## [1] TRUE