Filtering a large number of rows

library(data.table)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:data.table':
## 
##     last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(microbenchmark)
set.seed(1)
mydf <- data.frame(person = sample(10000, 1e+07, TRUE), value = runif(1e+07))
DT <- data.table(mydf)

## Common to all tests...
A <- unique(mydf$person)
B <- sample(A, ceiling(0.1 * length(A)), FALSE)

fun1a <- function() {
    mydf[mydf$person %in% B, ]
}
fun1b <- function() {
    mydf[which(mydf$person %in% B), ]
}
fun2 <- function() {
    filter(mydf, person %in% B)
}
fun3 <- function() {
    DT[which(person %in% B)]
}

microbenchmark(fun1a(), fun1b(), fun2(), fun3(), times = 20)
## Unit: milliseconds
##     expr    min     lq median     uq    max neval
##  fun1a() 1752.0 1791.8 1799.8 1828.0 2230.9    20
##  fun1b()  711.7  744.9  774.6  811.9 1022.1    20
##   fun2()  673.4  692.2  718.5  750.4  981.0    20
##   fun3()  647.7  659.9  668.4  710.6  744.3    20

all.equal(fun1b(), fun2())
## [1] TRUE

all.equal(fun1b(), fun3())
## [1] TRUE