library(here) # for reproducible paths
library(SingleCellExperiment)
library(scater) # For qcs
library(scran) # for normalisation, feature selection
library(ggplot2) # To add titles to plots
library(batchelor) # Batch correction
library(patchwork) # agregate plots
library(pals) # for palettes with large n
project <- "fire-mice"
sce <- readRDS(here("processed", project, "sce_clusterQC.RDS"))
source(here("src/colours.R"))
levels(sce$genotype) <- levels(c("WT", "HET", "KO"))
The object has 23719 genes and 51194 cells.
Previous thresholds were: detected genes: 429 - 7111 sum umi: 462 - 19024 mt: 14.37%
Split by sample
plotColData(sce, x = "Sample", y = "subsets_mt_percent") +
scale_y_log10() + ggtitle("mt genes") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + scale_color_manual(values = cols)
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Removed 6 rows containing missing values (geom_point).
plotColData(sce, x = "Sample", y = "detected") +
scale_y_log10() + ggtitle("Detected genes log scale") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + scale_color_manual(values = cols)
plotColData(sce, x = "Sample", y = "sum") +
scale_y_log10() + ggtitle("Total count log scale") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + scale_color_manual(values = cols)
Split by cell type
#mt
plotColData(sce, x = "cluster_names", y = "subsets_mt_percent", colour_by = "Sample") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
ggtitle("mt genes") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_color_manual(values = cols)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
#detected
plotColData(sce, x = "cluster_names", y = "detected", colour_by = "Sample") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
ggtitle("Detected genes") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_color_manual(values = cols)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
#sum
plotColData(sce, x = "cluster_names", y = "sum", colour_by = "Sample") +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
ggtitle("Total count") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_color_manual(values = cols)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Split by cell type, log scales
plotColData(sce, x = "cluster_names", y = "detected", colour_by = "Sample") +
scale_y_log10(breaks = scales::pretty_breaks(n = 12)) +
ggtitle("Detected genes log scale") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + scale_color_manual(values = cols)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
plotColData(sce, x = "cluster_names", y = "sum", colour_by = "Sample") +
scale_y_log10(breaks = scales::pretty_breaks(n = 12))+
ggtitle("Total count log scale") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_color_manual(values = cols)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
Split by sample type, facet wrap
# detected
plotColData(sce, x = "Sample", y = "detected", colour_by = "genotype", other_fields = "cluster_names") +
scale_y_log10(breaks = scales::pretty_breaks(n = 12)) +
ggtitle("Detected genes log scale") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + scale_color_manual(values = col_wt_het_ko) + facet_wrap(~cluster_names)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
# sum
plotColData(sce, x = "Sample", y = "sum", colour_by = "genotype", other_fields = "cluster_names") +
scale_y_log10(breaks = scales::pretty_breaks(n = 12))+
ggtitle("Total count log scale") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_color_manual(values = col_wt_het_ko)+ facet_wrap(~cluster_names)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
hist(
sce$detected,
breaks = 100
)
hist(
sce$sum,
breaks = 100
)
hist(
sce$subsets_mt_percent,
breaks = 100
)
plotColData(sce, x = "sum", y = "subsets_mt_percent", colour_by = "genotype", other_fields = "cluster_names") + facet_wrap(~cluster_names) +
scale_color_manual(values = col_wt_het_ko)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
plotColData(sce, x = "sum", y = "detected", colour_by = "genotype", other_fields = "cluster_names") + facet_wrap(~cluster_names) +
scale_color_manual(values = col_wt_het_ko)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
plotColData(sce, x = "sum", y = "subsets_mt_percent", colour_by = "Sample", other_fields = "cluster_names") + facet_wrap(~cluster_names) +
scale_color_manual(values = cols)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
plotColData(sce, x = "sum", y = "detected", colour_by = "Sample", other_fields = "cluster_names") + facet_wrap(~cluster_names) +
scale_color_manual(values = cols)
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
In the preliminary analysis we only deleted genes that had less than 2 cells expressing it, now we filter all genes with less than 10 cells expressing it
# at least 10 cells should express the gene
keep_feature <- rowSums(counts(sce) > 0) > 10
sce <- sce[keep_feature,]
## Normalise by deconvolution ---
if(!file.exists(here("processed", project, "sce_norm_02.RDS"))){
# For reproducibility
set.seed(100)
# Quick clustering to pool samples together and deal with 0 counts
quick_clusters <- quickCluster(sce)
# Calculate size factors
sce <-
computeSumFactors(sce, cluster = quick_clusters, min.mean = 0.1)
# Check that there are not negative size factors
summary(sizeFactors(sce))
# Apply size factors and log transform them
sce <- logNormCounts(sce)
# save object
saveRDS(sce, here("processed", project, "sce_norm_02.RDS"))
}
After the quality control the variable genes need to be selected again to better represent the variance in this cleaned dataset. We follow the same methods than for our first feature selection, selecting here for the top 2000 genes.
For visualisation, reduce to 2 dimensions. Non linear reductions.