title: “Speed-up tidySummarizedExperiment with
plyxp”
author: “Stefano Mangiola”
contributors: - Michael Love - Justin Landis
output: prettydoc::html_pretty: theme: cayman toc: yes toc_depth: 2 number_sections: yes fig_caption: yes df_print: paged vignette: > % % % —
1 Motivation and design principles
This benchmark supports ongoing work to improve mutate() execution paths, including exploring plyxp-backed execution for mixed-slot operations (see issue: Attempt using plyxp for some cases in tidySummarizedExperiment). The current proposal is grounded in three principles:
- Decompose operation series: break
mutate(a=..., b=..., c=...)into single operations for simpler handling and clearer routing. Reference implementation inR/mutate.R(decomposition step) at L146. - Analyze scope: infer whether each expression targets
colData,rowData,assays, or a mix (noting that the current analyser is likely over-engineered and could be simplified). See L149. - Route mixed operations via plyxp: when an expression touches multiple slots, prefer the plyxp path for correctness and performance. See L155.
These design choices aim to preserve dimnames, avoid unnecessary tibble round-trips, and provide predictable performance across simple and mixed-slot scenarios.
2 Overview
This vignette benchmarks a set of mutate() scenarios on two branches of the repository, by explicitly checking out the branches via git worktree, loading each branch’s code with devtools::load_all(), running the same scenarios multiple times, and comparing the runtimes with ggplot boxplots.
- Branch A: origin/master
- Branch B: origin/query-to-slot-routines
Requirements: git, devtools, airway, dplyr, SummarizedExperiment, ggplot2.
3 Setup helper functions
suppressPackageStartupMessages({
library(ggplot2)
library(dplyr)
library(SummarizedExperiment)
library(rlang)
library(devtools)
library(airway)
library(microbenchmark)
library(future.apply)
})
#> Warning: package 'GenomeInfoDb' was built under R version 4.5.1
#> Warning: package 'future' was built under R version 4.5.1
ensure_worktree <- function(branch_ref, worktree_dir) {
cmd <- sprintf(
"sh -c 'git fetch origin --prune; git worktree remove -f %s >/dev/null 2>&1 || true; git worktree add --detach %s %s'",
shQuote(worktree_dir), shQuote(worktree_dir), shQuote(branch_ref)
)
status <- system(cmd)
if (status != 0) stop(sprintf("Failed to create worktree for %s", branch_ref))
}
load_branch_code <- function(worktree_dir) {
if (!requireNamespace("devtools", quietly = TRUE)) stop("Please install devtools to run this vignette.")
suppressMessages(devtools::load_all(worktree_dir, quiet = TRUE))
}
create_airway_test_se <- function() {
suppressPackageStartupMessages(library(airway))
data(airway)
se <- airway
se[1:200, ]
}
benchmark_scenarios <- function() {
list(
coldata_simple_assignment = quo({ se %>% mutate(new_dex = dex) }),
coldata_arithmetic = quo({ se %>% mutate(avgLength_plus_5 = avgLength + 5) }),
coldata_concat = quo({ se %>% mutate(sample_info = paste(cell, dex, SampleName, sep = "_")) }),
coldata_grouped_mean = quo({ se %>% group_by(dex) %>% mutate(avgLength_group_mean = mean(avgLength)) %>% ungroup() }),
assay_simple_assignment = quo({ se %>% mutate(counts_copy = counts) }),
assay_plus_one = quo({ se %>% mutate(counts_plus_1 = counts + 1) }),
assay_log = quo({ se %>% mutate(log_counts_manual = log2(counts + 1)) }),
complex_conditional_coldata = quo({ se %>% mutate(length_group = ifelse(avgLength > mean(avgLength), "longer", "shorter")) }),
complex_nested = quo({ se %>% mutate(complex_category = ifelse(dex == "trt" & avgLength > mean(avgLength), "treated_long", ifelse(dex == "untrt", "untreated", "other"))) }),
mixed_assay_coldata = quo({ se %>% mutate(new_counts = counts * avgLength) }),
multiple_simple_assay = quo({ se %>% mutate(normalized_counts = counts / 1000, sqrt_counts = sqrt(counts)) }),
chained_mutates = quo({ se %>% mutate(tmp = avgLength * 2) %>% mutate(flag = ifelse(tmp > mean(tmp), 1, 0)) }),
# Filter benchmarks (scoped and non-rectangular)
filter_coldata_simple = quo({ se %>% filter(dex == "trt") }),
filter_coldata_numeric = quo({ se %>% filter(avgLength > median(avgLength)) }),
filter_assay_nonrect = quo({ se %>% filter(counts > 0) }),
# Select benchmarks (covering colData-only, rowData-only, assays-only, mixed)
select_coldata_simple = quo({ se %>% select(.sample, dex) }),
select_rowdata_simple = quo({ se %>% select(.feature) }),
select_assay_only = quo({ se %>% select(counts) }),
select_mixed_keys_counts = quo({ se %>% select(.sample, .feature, counts) }),
select_coldata_wide = quo({ se %>% select(.sample, dex, avgLength, SampleName) })
)
}
run_one <- function(expr_quo, reps = 5L) {
se_base <- create_airway_test_se()
mb <- microbenchmark::microbenchmark(
eval_tidy(expr_quo),
times = reps,
setup = { se <- se_base }, # reuse the same input, avoid recreating inside the timed expr
control = list(warmup = 2L)
)
# microbenchmark returns nanoseconds; convert to milliseconds
as.numeric(mb$time) / 1e6
}
run_all_scenarios <- function(branch_label, reps = 7L) {
scenarios <- benchmark_scenarios()
out <- list()
for (nm in names(scenarios)) {
tms <- run_one(scenarios[[nm]], reps = reps)
out[[length(out) + 1L]] <- data.frame(
branch = branch_label,
scenario = nm,
replicate = seq_along(tms),
elapsed_ms = tms,
stringsAsFactors = FALSE
)
}
bind_rows(out)
}
# Parallel version: run each scenario on a separate worker
run_all_scenarios_parallel <- function(branch_label, reps = 20L, workers = 1L, initializer = NULL) {
scenarios <- benchmark_scenarios()
nms <- names(scenarios)
old_plan <- future::plan()
on.exit(future::plan(old_plan), add = TRUE)
future::plan(future::multisession, workers = workers)
res <- future.apply::future_lapply(nms, function(nm) {
if (!is.null(initializer)) initializer()
tms <- run_one(scenarios[[nm]], reps = reps)
data.frame(
branch = branch_label,
scenario = nm,
replicate = seq_along(tms),
elapsed_ms = tms,
stringsAsFactors = FALSE
)
}, future.seed = TRUE)
dplyr::bind_rows(res)
}4 Prepare explicit worktrees for both branches
branch_a_ref <- "origin/master"
branch_b_ref <- "origin/query-to-slot-routines"
wt_a <- normalizePath("../tidySummarizedExperiment.__bench_master__", mustWork = FALSE)
wt_b <- normalizePath("../tidySummarizedExperiment.__bench_query_to_slot__", mustWork = FALSE)
ensure_worktree(branch_a_ref, wt_a)
ensure_worktree(branch_b_ref, wt_b)5 Run benchmarks on both branches
# Branch A
load_branch_code(wt_a)
#> Warning: package 'ttservice' was built under R version 4.5.1
init_a <- function() suppressMessages(devtools::load_all(wt_a, quiet = TRUE))
res_a <- run_all_scenarios_parallel(branch_label = "master", reps = 20L, workers = 8L, initializer = init_a)
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> Warning: package ‘future’ was built under R version 4.5.1
#> Warning: package ‘GenomeInfoDb’ was built under R version 4.5.1
#> Warning: package ‘ttservice’ was built under R version 4.5.1
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
# Branch B
load_branch_code(wt_b)
init_b <- function() suppressMessages(devtools::load_all(wt_b, quiet = TRUE))
res_b <- run_all_scenarios_parallel(branch_label = "query-to-slot-routines", reps = 20L, workers = 1L, initializer = init_b)
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: A data frame is returned for independent data analysis.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> tidySummarizedExperiment says: The resulting data frame is not rectangular (all genes for all samples), a tibble is returned for independent data analysis.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> Warning in microbenchmark::microbenchmark(eval_tidy(expr_quo), times = reps, :
#> Could not measure overhead. Your clock might lack precision.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
results <- dplyr::bind_rows(res_a, res_b) %>%
dplyr::mutate(operation = dplyr::case_when(
grepl("^filter", scenario) ~ "filter",
grepl("^select", scenario) ~ "select",
TRUE ~ "mutate"
))
summary_table <- results %>%
group_by(branch, scenario) %>%
summarise(median_ms = median(elapsed_ms), .groups = "drop") %>%
tidyr::pivot_wider(names_from = branch, values_from = median_ms)
summary_table6 Visualise with ggplot boxplots
dodge_w <- 0.7
ggplot(results, aes(x = scenario, y = elapsed_ms, fill = branch)) +
geom_boxplot(position = position_dodge(width = dodge_w), width = 0.7, outlier.shape = NA) +
# Add jittered points aligned with the dodged boxplots
geom_point(
position = position_jitterdodge(jitter.width = 0.1, jitter.height = 0, dodge.width = dodge_w),
alpha = 0.6,
size = 0.5
) +
scale_y_log10() +
coord_flip() +
facet_grid(operation ~ ., scales = "free_y", space = "free_y") +
annotation_logticks(sides = "b") +
labs(title = "Mutate, Filter, and Select benchmark across branches",
x = "Scenario",
y = "Elapsed (ms)") +
theme_bw() +
# Angle x labels
theme(legend.position = "top", axis.text.x = element_text(angle = 45, hjust = 1))sessionInfo()
#> R version 4.5.0 (2025-04-11)
#> Platform: x86_64-apple-darwin20
#> Running under: macOS Sonoma 14.6.1
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> time zone: Australia/Adelaide
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] tidySummarizedExperiment_1.19.7 tidyr_1.3.1
#> [3] testthat_3.2.3 ttservice_0.5.3
#> [5] future.apply_1.20.0 future_1.67.0
#> [7] microbenchmark_1.5.0 airway_1.28.0
#> [9] devtools_2.4.5 usethis_3.1.0
#> [11] rlang_1.1.6 SummarizedExperiment_1.38.1
#> [13] Biobase_2.68.0 GenomicRanges_1.60.0
#> [15] GenomeInfoDb_1.44.2 IRanges_2.42.0
#> [17] S4Vectors_0.46.0 BiocGenerics_0.54.0
#> [19] generics_0.1.4 MatrixGenerics_1.20.0
#> [21] matrixStats_1.5.0 dplyr_1.1.4
#> [23] ggplot2_3.5.2
#>
#> loaded via a namespace (and not attached):
#> [1] tidyselect_1.2.1 viridisLite_0.4.2 farver_2.1.2
#> [4] S7_0.2.0 lazyeval_0.2.2 fastmap_1.2.0
#> [7] promises_1.3.3 plyxp_1.2.7 digest_0.6.37
#> [10] mime_0.13 lifecycle_1.0.4 ellipsis_0.3.2
#> [13] magrittr_2.0.3 compiler_4.5.0 sass_0.4.10
#> [16] tools_4.5.0 yaml_2.3.10 data.table_1.17.8
#> [19] knitr_1.50 S4Arrays_1.8.1 htmlwidgets_1.6.4
#> [22] pkgbuild_1.4.8 DelayedArray_0.34.1 RColorBrewer_1.1-3
#> [25] pkgload_1.4.0 abind_1.4-8 miniUI_0.1.2
#> [28] withr_3.0.2 purrr_1.1.0 desc_1.4.3
#> [31] grid_4.5.0 fansi_1.0.6 urlchecker_1.0.1
#> [34] profvis_0.4.0 xtable_1.8-4 globals_0.18.0
#> [37] scales_1.4.0 cli_3.6.5 rmarkdown_2.29
#> [40] crayon_1.5.3 remotes_2.5.0 rstudioapi_0.17.1
#> [43] httr_1.4.7 sessioninfo_1.2.3 cachem_1.1.0
#> [46] stringr_1.5.1 parallel_4.5.0 XVector_0.48.0
#> [49] vctrs_0.6.5 Matrix_1.7-3 jsonlite_2.0.0
#> [52] listenv_0.9.1 plotly_4.11.0 jquerylib_0.1.4
#> [55] glue_1.8.0 parallelly_1.45.1 codetools_0.2-20
#> [58] stringi_1.8.7 gtable_0.3.6 later_1.4.4
#> [61] UCSC.utils_1.4.0 tibble_3.3.0 pillar_1.11.0
#> [64] brio_1.1.5 htmltools_0.5.8.1 GenomeInfoDbData_1.2.14
#> [67] R6_2.6.1 rprojroot_2.1.0 evaluate_1.0.5
#> [70] shiny_1.11.1 lattice_0.22-7 memoise_2.0.1
#> [73] httpuv_1.6.16 bslib_0.9.0 Rcpp_1.1.0
#> [76] SparseArray_1.8.1 xfun_0.53 fs_1.6.6
#> [79] prettydoc_0.4.1 pkgconfig_2.0.3