Cell QC

Previous thresholds were:

	lib_size_high	expression_high	lib_size_low	expression_low	mt_pct	total
Cells filtered	1561	96	0	116	5618	7291
Threshold	17346.98	6293.816	156.1676	108.9684	31.10665	NA

Click to expand plots

Violin plots

Split by sample

plotColData(sce, x = "Sample", y = "subsets_mt_percent") +
  scale_y_log10() + ggtitle("mt genes") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +  scale_color_manual(values = cols)

## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis

## Warning: Removed 6 rows containing non-finite values (stat_ydensity).

## Warning: Removed 6 rows containing missing values (geom_point).

plotColData(sce, x = "Sample", y = "detected") +
  scale_y_log10() + ggtitle("Detected genes log scale") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +  scale_color_manual(values = cols)

plotColData(sce, x = "Sample", y = "sum") +
  scale_y_log10() + ggtitle("Total count log scale") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +  scale_color_manual(values = cols)

Split by sample and cluster, facet wrap

# detected
plotColData(sce, x = "Sample", y = "detected",  colour_by = "genotype", other_fields = "clusters_named") +
  scale_y_log10(breaks = scales::pretty_breaks(n = 12)) + 
  ggtitle("Detected genes log scale") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +           scale_color_manual(values = col_wt_ko)  + facet_wrap(~clusters_named)

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.

# sum
plotColData(sce, x = "Sample", y = "sum",  colour_by = "genotype", other_fields = "clusters_named") +
 scale_y_log10(breaks = scales::pretty_breaks(n = 12))+
  ggtitle("Total count log scale") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  scale_color_manual(values = col_wt_ko)+ facet_wrap(~clusters_named)

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.

#mt
plotColData(sce, x = "Sample", y = "subsets_mt_percent",  colour_by = "genotype", other_fields = "clusters_named") +
 scale_y_log10(breaks = scales::pretty_breaks(n = 12))+
  ggtitle("Mt. percentage") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  scale_color_manual(values = col_wt_ko)+ facet_wrap(~clusters_named)

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 6 rows containing non-finite values (stat_ydensity).

## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.

## Warning: Removed 6 rows containing missing values (geom_point).

hist(
  sce$detected,
  breaks = 100
)

hist(
  sce$sum,
  breaks = 100
)

hist(
  sce$subsets_mt_percent,
  breaks = 100
)

Scatter plots

plotColData(sce, x = "sum", y = "subsets_mt_percent", colour_by = "genotype", other_fields = "clusters_named") + facet_wrap(~clusters_named) + 
  scale_color_manual(values = col_wt_ko)

plotColData(sce, x = "sum", y = "detected", colour_by = "genotype", other_fields = "clusters_named") + facet_wrap(~clusters_named) + 
  scale_color_manual(values = col_wt_ko)

plotColData(sce, x = "sum", y = "subsets_mt_percent", colour_by = "Sample", other_fields = "clusters_named") + facet_wrap(~clusters_named) + 
  scale_color_manual(values = cols)

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

plotColData(sce, x = "sum", y = "detected", colour_by = "Sample", other_fields = "clusters_named") + facet_wrap(~clusters_named) + 
  scale_color_manual(values = cols)

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Sample WT3_2 and WT2_1 are lower quality. When deciding the thresholds they will be ignored.

We set the thresholds for each celltype:

Sum

OPCs 6000 -na

oligo 6000- exept olito 3 -

Astrocyte 4000

Microglia and immune 3000

vascular 3000

Ependymocytes&NRPs 2000

mature_neurons 2000

Gran&Mono 2000

ChP_epithelial 4000

all 15%

# create vector with same levels as the clusters_named to replace by thresholds for each
sce$clusters_named <- droplevels(sce$clusters_named)
sce$threshold_umi <- (sce$clusters_named)
#to get the order:
#dput(levels(sce$threshold_umi))
#c("Astro1", "Astro2", "Oligo1", "Astro3", "Vascular", "OPC", "Microglia1", "Immune", "Oligo2", "mNeurons", "Ependymocytes&NRPs", "Gran&Mono", "Oligo3", "ChP epithelia")

# replace 
levels(sce$threshold_umi) <- c(4000, 4000, 6000, 4000, 3000, 6000, 
3000, 3000, 6000, 2000, 2000, 
2000, 0, 4000)


# transform the factor into numeric
sce$threshold_umi <- as.numeric(as.character(sce$threshold_umi))

# set thresholds
discard_umi <- sce$sum < sce$threshold_umi
discard_mt <- sce$subsets_mt_percent > 15
discard <- discard_umi | discard_mt

# subset
sce <- sce[,!discard]

We discard 2666 cells due to umi counts, and 1122 cells for high mt percentatge.

in total we discard 3063 cells

Second QC

Nadine Bestard

2022-12-16

Set-up

Cell QC

Violin plots

Scatter plots

Gene QC