library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
files <- fs::dir_ls("src", regexp = "[.]cpp$|[.]h$")
boost_includes <-
tibble(path = files) %>%
mutate(data = map(path, brio::read_lines)) %>%
mutate(include = map(data, ~ na.omit(str_match(.x, "#include +[<](boost/[^>]+)>")[, 2]))) %>%
unnest(include) %>%
count(include, sort = TRUE) %>%
rowid_to_column()
all_combs <-
#tibble(total = nrow(boost_includes), use = seq.int(total, total)) %>%
#tibble(total = nrow(boost_includes), use = seq.int(total, total - 3)) %>%
#tibble(total = nrow(boost_includes), use = c(1, seq.int(total, total - 3))) %>%
tibble(total = nrow(boost_includes), use = c(1, total)) %>%
mutate(combs = map2(total, use, ~ suppressWarnings(combinat::combn(.x, .y, simplify = FALSE)))) %>%
unnest(combs) %>%
mutate(comb_id = row_number())
bh_size <- function(includes) {
tempdir <- fs::file_temp()
fs::dir_create(tempdir)
withr::defer(fs::dir_delete(tempdir))
boost_dir <- "../../bh/inst/include"
scan <- fs::path(boost_dir, includes)
command <- paste0(
"bcp --boost=", boost_dir, " --scan ",
paste(scan, collapse = " "),
" ", tempdir
)
bcp_result <- system(command, intern = TRUE, ignore.stderr = TRUE)
stopifnot(attr(bcp_result, "status") == 0)
input <- fs::file_info(scan)
output <- fs::dir_info(tempdir, recurse = TRUE)
tibble(
size = sum(input$size) + sum(output$size),
count = length(input$size) + length(output$size)
)
}
summarize_include <- function(x) {
all_includes <- boost_includes$include
if (length(x) * 2 <= length(all_includes)) {
out <- paste(x, collapse = ", ")
} else if (length(x) == length(all_includes)) {
out <- "all"
} else {
out <- paste0("-", setdiff(all_includes, x), collapse = ", ")
}
str_replace_all(out, "boost/", "")
}
size_results <-
all_combs %>%
unnest(combs) %>%
rename(rowid = combs) %>%
left_join(boost_includes, by = "rowid") %>%
select(comb_id, include) %>%
group_by(comb_id) %>%
summarize(size = bh_size(include), include = summarize_include(include)) %>%
ungroup()
size_results %>%
arrange(size)
## # A tibble: 13 x 3
## comb_id size$size $count include
## <int> <fs::bytes> <int> <chr>
## 1 11 401.01K 91 limits.hpp
## 2 2 401.27K 93 noncopyable.hpp
## 3 8 419.07K 92 cstdint.hpp
## 4 5 422.49K 103 scoped_ptr.hpp
## 5 7 633.54K 119 bind.hpp
## 6 1 1.25M 212 shared_ptr.hpp
## 7 3 1.52M 219 container/stable_vector.hpp
## 8 12 11.81M 1079 range/algorithm_ext/for_each.hpp
## 9 4 13.08M 1231 ptr_container/ptr_vector.hpp
## 10 6 13.67M 1320 algorithm/string.hpp
## 11 9 15.25M 1639 date_time/gregorian/gregorian.hpp
## 12 10 15.72M 1750 date_time/posix_time/posix_time.hpp
## 13 13 17.55M 1994 all