library(readr)
## Warning: package 'readr' was built under R version 4.1.2
library(reshape2)
library(ggplot2)
summaries <- read.csv("U:/Datastore/CMVM/scs/groups/jpriller-GROUP/scRNAseq/outs/CellRanger/combined_metrics_summaries.csv", row.names = "Sample")
# keep the sample names
samples <- row.names(summaries)
# remove the comas and % signs, transform all to numeric
summaries <- data.frame(sapply(summaries, function(X) {
as.numeric(gsub(",||%", "", X))
}))
summaries$Sample <- samples
head(summaries)
## Estimated.Number.of.Cells Mean.Reads.per.Cell Median.Genes.per.Cell
## 1 9619 27881 2455
## 2 4690 58287 2597
## 3 9524 26579 2590
## 4 6493 39553 2505
## 5 7811 32412 2286
## 6 6651 42718 2638
## Number.of.Reads Valid.Barcodes Sequencing.Saturation Q30.Bases.in.Barcode
## 1 268191837 97.2 49.3 95.9
## 2 273370387 97.7 69.3 95.9
## 3 253141535 97.5 43.4 95.9
## 4 256819713 97.4 57.2 95.9
## 5 253172925 97.5 55.8 95.9
## 6 284117623 97.5 56.4 95.8
## Q30.Bases.in.RNA.Read Q30.Bases.in.UMI Reads.Mapped.to.Genome
## 1 92.3 92.6 94.8
## 2 92.0 95.6 95.2
## 3 92.1 95.6 95.5
## 4 92.2 95.6 95.4
## 5 91.9 95.6 95.6
## 6 91.8 95.5 95.2
## Reads.Mapped.Confidently.to.Genome
## 1 92.8
## 2 93.1
## 3 93.5
## 4 93.5
## 5 93.6
## 6 93.2
## Reads.Mapped.Confidently.to.Intergenic.Regions
## 1 3.7
## 2 4.0
## 3 3.6
## 4 3.9
## 5 3.8
## 6 3.8
## Reads.Mapped.Confidently.to.Intronic.Regions
## 1 29.7
## 2 29.6
## 3 30.4
## 4 29.3
## 5 30.0
## 6 29.8
## Reads.Mapped.Confidently.to.Exonic.Regions
## 1 59.4
## 2 59.5
## 3 59.6
## 4 60.3
## 5 59.8
## 6 59.6
## Reads.Mapped.Confidently.to.Transcriptome Reads.Mapped.Antisense.to.Gene
## 1 55.4 2.3
## 2 55.6 2.2
## 3 55.7 2.2
## 4 56.2 2.4
## 5 55.7 2.4
## 6 55.5 2.4
## Fraction.Reads.in.Cells Total.Genes.Detected Median.UMI.Counts.per.Cell
## 1 86.5 21369 5741
## 2 81.6 20758 6861
## 3 87.9 21436 5895
## 4 84.0 21073 6294
## 5 86.1 21354 5483
## 6 82.4 21183 6517
## Sample
## 1 EKDL2651
## 2 EKDL2652
## 3 EKDL2653
## 4 EKDL2654
## 5 EKDL2655
## 6 EKDL2656
number of cells
ggplot(data = summaries, aes(x = Sample, y = Estimated.Number.of.Cells)) +
geom_bar(stat = "identity") +
#scale_fill_manual(values = mycoloursP[4:5]) +
theme_minimal() +
ylab("Number of Cells") +
xlab("sample") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(
yintercept = mean(summaries$Estimated.Number.of.Cells), color = "salmon",
size = 1
)

ggplot(data = summaries, aes(x = Sample, y = Mean.Reads.per.Cell)) +
geom_bar(stat = "identity") +
theme_minimal() +
ylab("Mean reads per cell") +
xlab("sample") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(yintercept = mean(summaries$Mean.Reads.per.Cell), color = "salmon", size = 1)

# try to plot them all
summaries_melt <- melt(summaries, id.vars="Sample", variable.name="metric")
ggplot(summaries_melt, aes(x = Sample, y = value)) + geom_bar(stat = "identity") + facet_wrap(~ metric, scales = "free")# +

#theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))