The data frame contains 10,000,000 rows of random integers from 1-100.
library(dplyr)
#replace = TRUE means same value can occur more than once.
col1 <- sample(1:100,10000000,replace = TRUE)
col2 <- sample(1:100,10000000,replace = TRUE)
col3 <- sample(1:100,10000000,replace = TRUE)
df <- data.frame(col1 = col1, col2 = col2, col3 = col3)
head(df)
ELSE <- TRUE
start.time <- Sys.time()
df2 <- df %>% mutate(.,result = with(.,case_when(
(col1 > 20 & col2 < 50 & col3 > 20) ~ 1,
(col1 < 15 | (col2 > 50 & col3 > 30)) ~ 2,
ELSE ~ 3
)))
end.time <- Sys.time()
time.taken <- end.time - start.time
head(df2)
time.taken
## Time difference of 1.565432 secs
df3 <- df
start.time <- Sys.time()
df3$result <- ifelse((col1 > 20 & col2 < 50 & col3 > 20),1,
ifelse((col1 < 15 | (col2 > 50 & col3 > 30)),2,3))
end.time <- Sys.time()
time.taken <- end.time - start.time
head(df3)
time.taken
## Time difference of 1.96998 secs