Filtered-out samples becuse few cells

metadata |> anti_join(all_data |> distinct(sample)) |> count(sample, batch, name = "cell_count")

## Joining, by = "sample"

## # A tibble: 9 × 3
##   sample             batch cell_count
##   <glue>             <int>      <int>
## 1 CB218T01X__batch11    11         16
## 2 CB229T03X__batch10    10          1
## 3 CB232T02X__batch13    13          1
## 4 DR034T01X__batch4      4          8
## 5 DR034T07X__batch4      4         10
## 6 DR062T01X__batch5      5          1
## 7 DR062T07X__batch5      5         32
## 8 DR065T01X__batch14    14        148
## 9 DR071T01X__batch7      7        126

Samples per batch

all_data |> left_join(metadata) |> distinct(batch, sample) |> count(batch, name = "sample_count")

## Joining, by = c(".cell", "donor", "sample")

## # A tibble: 12 × 2
##    batch sample_count
##    <int>        <int>
##  1     3           19
##  2     4           17
##  3     5           17
##  4     6           18
##  5     7           18
##  6     8           19
##  7     9           19
##  8    10           18
##  9    11           18
## 10    12           19
## 11    13           18
## 12    14           18

Samples shared across batched

metadata |> 
  distinct(single_cell_rna_id, batch) |> 
  count(single_cell_rna_id) |> 
  arrange(desc(n))

## # A tibble: 200 × 2
##    single_cell_rna_id     n
##    <chr>              <int>
##  1 <NA>                  12
##  2 P001T01X              11
##  3 S018T01X               2
##  4 V005T01X               2
##  5 V006T01X               2
##  6 V007T01X               2
##  7 V008T01X               2
##  8 V009T01X               2
##  9 CB113T02X              1
## 10 CB115T01X              1
## # … with 190 more rows

cells per sample

metadata |> 
    mutate(batch = factor(batch)) |> 
  count(sample, batch) |> 
  with_groups(batch, ~ .x |> mutate(med = mean(n))) |> 
  ggplot(aes(fct_reorder(sample, n, .desc = TRUE), n)) +
  geom_bar(stat = "identity") +
  geom_hline(aes(yintercept = med), color="red") +
  facet_wrap(~ fct_reorder(batch, med, .desc = T), scales = "free_x") +
  ylab("Cell count") +
  scale_y_sqrt() +
  theme_multipanel +
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 0.5))

RNA per sample

all_data |> 
  left_join(metadata) |> 
  mutate(batch = factor(batch)) |> 
  with_groups(c(batch, sample), ~ .x |> summarise(sum_RNA = sum(nCount_RNA))) |> 
  with_groups(batch, ~ .x |> mutate(med = median(sum_RNA))) |> 
  
  ggplot(aes(fct_reorder(sample, sum_RNA, .desc = TRUE), sum_RNA)) +
  geom_bar(stat = "identity") +
  geom_hline(aes(yintercept = med), color="red") +
  facet_wrap(~ fct_reorder(batch, med, .desc = T), scales = "free_x") +
  ylab("Cell count") +
  #scale_y_log10() +
  theme_multipanel +
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 0.5))

## Joining, by = c(".cell", "donor", "sample")
## `summarise()` has grouped output by 'batch'. You can override using the
## `.groups` argument.

RNA count per cell per sample

all_data |> 
  left_join(metadata) |> 
  ggplot(aes(fct_reorder(sample, nCount_RNA), nCount_RNA)) +
  geom_boxplot(aes(fill = factor(batch)), outlier.shape = ".") +
  scale_y_log10() +
  theme_multipanel 

## Joining, by = c(".cell", "donor", "sample")

Gene count per cell per sample

all_data |> 
  left_join(metadata) |> 
  ggplot(aes(fct_reorder(sample, nFeature_RNA), nFeature_RNA)) +
  geom_boxplot(aes(fill = factor(batch)), outlier.shape = ".") +
  scale_y_log10() +
  theme_multipanel 

## Joining, by = c(".cell", "donor", "sample")

Gene count vs RNA count, they should be positively correlated

all_data |> 
  left_join(metadata) |> 
  with_groups(c(batch, sample), ~ .x |> summarise(nCount_RNA_median = median(nCount_RNA), nFeature_RNA_median = median(nFeature_RNA))) |> 
  ggplot(aes(nCount_RNA_median, nFeature_RNA_median)) +
  geom_point(aes(color = factor(batch))) +
  scale_y_log10() +
  scale_x_log10() +
  theme_multipanel 

## Joining, by = c(".cell", "donor", "sample")
## `summarise()` has grouped output by 'batch'. You can override using the
## `.groups` argument.