This notebook is only used to create the figures and tables. Results are discussed here.
Load the data generated by “TODO insert link to code that generated data”
n_categories <- read_tsv("./n_categories.tsv") %>%
arrange(-n_col_two_or_more_category)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## exp_url = col_character(),
## n_col_one_category = col_double(),
## n_col_two_or_more_category = col_double(),
## largest_category_n = col_double(),
## largest_category_label = col_character()
## )
n_categories$exp_url <- gsub("cellxgene.dev.single-cell.czi.technology","cellxgene.cziscience.com", n_categories$exp_url)
category_counts <- read_tsv("./category_counts.tsv") %>%
arrange(n_categories)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## n_categories = col_double(),
## counts = col_double()
## )
category_counts <- category_counts[category_counts$n_categories > 1, ]
Histogram of number columns that have x number of categories. A column is a cell metadata column (e.g. “Sex”) and a category is the possible values that exist (e.g. “Male”, “Female”, “Unknown”)
ggplot(head(category_counts, 80), aes(x = as.factor(n_categories), y = counts)) +
geom_bar(stat = "identity") +
labs(title = "Histogram of category counts per cell metadata column", subtitle = paste("Max number of categories is ", max(category_counts$n_categories))) +
xlab("Number of categories in column") +
ylab("Cell metadata column counts") +
theme_bw()
Table, each row is a dataset. The columns are:
n_categories %>%
dplyr::rename(`Number of columns w/ 1 cat` = n_col_one_category,
`Number of columns w/ > 1 cat` = n_col_two_or_more_category,
`Column with highest cats` = largest_category_label,
`N cats for column with highest cat n` = largest_category_n
) %>%
head(40) %>%
kbl() %>%
kable_paper("hover", full_width = T)
n_categories %>%
arrange(-largest_category_n) %>%
dplyr::rename(`Number of columns w/ 1 cat` = n_col_one_category,
`Number of columns w/ > 1 cat` = n_col_two_or_more_category,
`Column with highest cats` = largest_category_label,
`N cats for column with highest cat n` = largest_category_n
) %>%
head(40) %>%
kbl() %>%
kable_paper("hover", full_width = T)