library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
files <- fs::dir_ls("src", regexp = "[.]cpp$|[.]h$")

boost_includes <-
  tibble(path = files) %>%
  mutate(data = map(path, brio::read_lines)) %>%
  mutate(include = map(data, ~ na.omit(str_match(.x, "#include +[<](boost/[^>]+)>")[, 2]))) %>%
  unnest(include) %>%
  count(include, sort = TRUE) %>%
  rowid_to_column()

all_combs <-
  #tibble(total = nrow(boost_includes), use = seq.int(total, total)) %>%
  #tibble(total = nrow(boost_includes), use = seq.int(total, total - 3)) %>%
  #tibble(total = nrow(boost_includes), use = c(1, seq.int(total, total - 3))) %>%
  tibble(total = nrow(boost_includes), use = c(1, total)) %>%
  mutate(combs = map2(total, use, ~ suppressWarnings(combinat::combn(.x, .y, simplify = FALSE)))) %>%
  unnest(combs) %>%
  mutate(comb_id = row_number())

bh_size <- function(includes) {
  tempdir <- fs::file_temp()
  fs::dir_create(tempdir)
  withr::defer(fs::dir_delete(tempdir))

  boost_dir <- "../../bh/inst/include"

  scan <- fs::path(boost_dir, includes)

  command <- paste0(
    "bcp --boost=", boost_dir, " --scan ",
    paste(scan, collapse = " "),
    " ", tempdir
  )

  bcp_result <- system(command, intern = TRUE, ignore.stderr = TRUE)
  stopifnot(attr(bcp_result, "status") == 0)

  input <- fs::file_info(scan)
  output <- fs::dir_info(tempdir, recurse = TRUE)

  tibble(
    size = sum(input$size) + sum(output$size),
    count = length(input$size) + length(output$size)
  )
}

summarize_include <- function(x) {
  all_includes <- boost_includes$include
  if (length(x) * 2 <= length(all_includes)) {
    out <- paste(x, collapse = ", ")
  } else if (length(x) == length(all_includes)) {
    out <- "all"
  } else {
    out <- paste0("-", setdiff(all_includes, x), collapse = ", ")
  }
  str_replace_all(out, "boost/", "")
}

size_results <-
  all_combs %>%
  unnest(combs) %>%
  rename(rowid = combs) %>%
  left_join(boost_includes, by = "rowid") %>%
  select(comb_id, include) %>%
  group_by(comb_id) %>%
  summarize(size = bh_size(include), include = summarize_include(include)) %>%
  ungroup()

size_results %>%
  arrange(size)
## # A tibble: 13 x 3
##    comb_id   size$size $count include                            
##      <int> <fs::bytes>  <int> <chr>                              
##  1      11     401.01K     91 limits.hpp                         
##  2       2     401.27K     93 noncopyable.hpp                    
##  3       8     419.07K     92 cstdint.hpp                        
##  4       5     422.49K    103 scoped_ptr.hpp                     
##  5       7     633.54K    119 bind.hpp                           
##  6       1       1.25M    212 shared_ptr.hpp                     
##  7       3       1.52M    219 container/stable_vector.hpp        
##  8      12      11.81M   1079 range/algorithm_ext/for_each.hpp   
##  9       4      13.08M   1231 ptr_container/ptr_vector.hpp       
## 10       6      13.67M   1320 algorithm/string.hpp               
## 11       9      15.25M   1639 date_time/gregorian/gregorian.hpp  
## 12      10      15.72M   1750 date_time/posix_time/posix_time.hpp
## 13      13      17.55M   1994 all