Row-by-row?

Row-by-row calculations can be a bit slow in R…

mydf <- inDF <- structure(list(V1 = c("a", "c", "d", "f"), V2 = c("ha", "hb", 
    "hc", "hd"), V3 = c("1;2;3", "4", "5;6", "7"), V4 = c("A", "B", "C", "D")), 
    .Names = c("V1", "V2", "V3", "V4"), row.names = c("1", "2", "3", "4"), class = "data.frame")
mydf
##   V1 V2    V3 V4
## 1  a ha 1;2;3  A
## 2  c hb     4  B
## 3  d hc   5;6  C
## 4  f hd     7  D

Our benchmarking functions:

AM_manual <- function() {
    V3 <- strsplit(mydf$V3, ";", fixed = TRUE)
    mydf2 <- mydf[rep(seq_along(V3), sapply(V3, length)), ]
    mydf2$V3 <- unlist(V3)
    mydf2
}
library(splitstackshape)
## Loading required package: data.table
AM_csm <- function() {
    df2 <- concat.split.multiple(mydf, split.cols = "V3", seps = ";", direction = "long")
    df2 <- df2[complete.cases(df2), ]  ## Optional, perhaps
    df2[order(df2$V1), ]
}

J_df <- function() {
    spl = split(inDF, 1:nrow(inDF))
    transformed = lapply(spl, function(x) {
        data.frame(V1 = x$V1, V2 = x$V2, V3 = strsplit(x$V3, ";")[[1]], V4 = x$V4)
    })
    outDF = do.call(rbind, transformed)
}

J_cbind <- function() {
    spl = split(inDF, 1:nrow(inDF))
    do.call(rbind, lapply(spl, function(x) {
        cbind(x[, c("V1", "V2", "V4")], V3 = strsplit(x[, "V3"], ";", fixed = TRUE)[[1]], 
            row.names = NULL)
    }))
}

The benchmarking:

library(microbenchmark)
microbenchmark(AM_manual(), AM_csm(), J_df(), J_cbind())
## Unit: microseconds
##         expr   min      lq  median      uq   max neval
##  AM_manual()   278   305.1   319.1   338.9  3413   100
##     AM_csm() 11804 11947.2 12040.0 13434.9 26721   100
##       J_df()  9484  9625.2  9708.9  9835.8 12432   100
##    J_cbind()  4822  4930.5  4973.4  5048.0  6796   100

concat.split.multiple appears to be the slowest, but we are talking about microseconds, so I don't think most people would bat an eye. Now, let's make things a little bit more interesting and see how well the approaches fare:

backup <- mydf

## 4,000 rows
mydf <- inDF <- do.call(rbind, replicate(1000, mydf, simplify = FALSE))

system.time(J_df())
##    user  system elapsed 
##  10.936   0.016  11.064
system.time(J_cbind())
##    user  system elapsed 
##   5.631   0.016   5.823

microbenchmark(AM_manual(), AM_csm(), times = 20)
## Unit: milliseconds
##         expr    min     lq median     uq    max neval
##  AM_manual()  11.14  11.57  14.38  15.69  70.89    20
##     AM_csm() 241.29 248.04 258.63 292.78 392.48    20