Filtered-out samples becuse few cells
metadata |> anti_join(all_data |> distinct(sample)) |> count(sample, batch, name = "cell_count")
## Joining, by = "sample"
## # A tibble: 9 × 3
## sample batch cell_count
## <glue> <int> <int>
## 1 CB218T01X__batch11 11 16
## 2 CB229T03X__batch10 10 1
## 3 CB232T02X__batch13 13 1
## 4 DR034T01X__batch4 4 8
## 5 DR034T07X__batch4 4 10
## 6 DR062T01X__batch5 5 1
## 7 DR062T07X__batch5 5 32
## 8 DR065T01X__batch14 14 148
## 9 DR071T01X__batch7 7 126
Samples per batch
all_data |> left_join(metadata) |> distinct(batch, sample) |> count(batch, name = "sample_count")
## Joining, by = c(".cell", "donor", "sample")
## # A tibble: 12 × 2
## batch sample_count
## <int> <int>
## 1 3 19
## 2 4 17
## 3 5 17
## 4 6 18
## 5 7 18
## 6 8 19
## 7 9 19
## 8 10 18
## 9 11 18
## 10 12 19
## 11 13 18
## 12 14 18
Samples shared across batched
metadata |>
distinct(single_cell_rna_id, batch) |>
count(single_cell_rna_id) |>
arrange(desc(n))
## # A tibble: 200 × 2
## single_cell_rna_id n
## <chr> <int>
## 1 <NA> 12
## 2 P001T01X 11
## 3 S018T01X 2
## 4 V005T01X 2
## 5 V006T01X 2
## 6 V007T01X 2
## 7 V008T01X 2
## 8 V009T01X 2
## 9 CB113T02X 1
## 10 CB115T01X 1
## # … with 190 more rows
cells per sample
metadata |>
mutate(batch = factor(batch)) |>
count(sample, batch) |>
with_groups(batch, ~ .x |> mutate(med = mean(n))) |>
ggplot(aes(fct_reorder(sample, n, .desc = TRUE), n)) +
geom_bar(stat = "identity") +
geom_hline(aes(yintercept = med), color="red") +
facet_wrap(~ fct_reorder(batch, med, .desc = T), scales = "free_x") +
ylab("Cell count") +
scale_y_sqrt() +
theme_multipanel +
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 0.5))
RNA per sample
all_data |>
left_join(metadata) |>
mutate(batch = factor(batch)) |>
with_groups(c(batch, sample), ~ .x |> summarise(sum_RNA = sum(nCount_RNA))) |>
with_groups(batch, ~ .x |> mutate(med = median(sum_RNA))) |>
ggplot(aes(fct_reorder(sample, sum_RNA, .desc = TRUE), sum_RNA)) +
geom_bar(stat = "identity") +
geom_hline(aes(yintercept = med), color="red") +
facet_wrap(~ fct_reorder(batch, med, .desc = T), scales = "free_x") +
ylab("Cell count") +
#scale_y_log10() +
theme_multipanel +
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 0.5))
## Joining, by = c(".cell", "donor", "sample")
## `summarise()` has grouped output by 'batch'. You can override using the
## `.groups` argument.
RNA count per cell per sample
all_data |>
left_join(metadata) |>
ggplot(aes(fct_reorder(sample, nCount_RNA), nCount_RNA)) +
geom_boxplot(aes(fill = factor(batch)), outlier.shape = ".") +
scale_y_log10() +
theme_multipanel
## Joining, by = c(".cell", "donor", "sample")
Gene count per cell per sample
all_data |>
left_join(metadata) |>
ggplot(aes(fct_reorder(sample, nFeature_RNA), nFeature_RNA)) +
geom_boxplot(aes(fill = factor(batch)), outlier.shape = ".") +
scale_y_log10() +
theme_multipanel
## Joining, by = c(".cell", "donor", "sample")
Gene count vs RNA count, they should be positively correlated
all_data |>
left_join(metadata) |>
with_groups(c(batch, sample), ~ .x |> summarise(nCount_RNA_median = median(nCount_RNA), nFeature_RNA_median = median(nFeature_RNA))) |>
ggplot(aes(nCount_RNA_median, nFeature_RNA_median)) +
geom_point(aes(color = factor(batch))) +
scale_y_log10() +
scale_x_log10() +
theme_multipanel
## Joining, by = c(".cell", "donor", "sample")
## `summarise()` has grouped output by 'batch'. You can override using the
## `.groups` argument.