library(readr)
## Warning: package 'readr' was built under R version 4.1.2
library(reshape2)
library(ggplot2)
summaries <- read.csv("U:/Datastore/CMVM/scs/groups/jpriller-GROUP/scRNAseq/outs/CellRanger/combined_metrics_summaries.csv", row.names = "Sample")
# keep the sample names
samples <- row.names(summaries)
# remove the comas and % signs, transform all to numeric
summaries <- data.frame(sapply(summaries, function(X) {
  as.numeric(gsub(",||%", "", X))
}))
summaries$Sample <- samples
head(summaries)
##   Estimated.Number.of.Cells Mean.Reads.per.Cell Median.Genes.per.Cell
## 1                      9619               27881                  2455
## 2                      4690               58287                  2597
## 3                      9524               26579                  2590
## 4                      6493               39553                  2505
## 5                      7811               32412                  2286
## 6                      6651               42718                  2638
##   Number.of.Reads Valid.Barcodes Sequencing.Saturation Q30.Bases.in.Barcode
## 1       268191837           97.2                  49.3                 95.9
## 2       273370387           97.7                  69.3                 95.9
## 3       253141535           97.5                  43.4                 95.9
## 4       256819713           97.4                  57.2                 95.9
## 5       253172925           97.5                  55.8                 95.9
## 6       284117623           97.5                  56.4                 95.8
##   Q30.Bases.in.RNA.Read Q30.Bases.in.UMI Reads.Mapped.to.Genome
## 1                  92.3             92.6                   94.8
## 2                  92.0             95.6                   95.2
## 3                  92.1             95.6                   95.5
## 4                  92.2             95.6                   95.4
## 5                  91.9             95.6                   95.6
## 6                  91.8             95.5                   95.2
##   Reads.Mapped.Confidently.to.Genome
## 1                               92.8
## 2                               93.1
## 3                               93.5
## 4                               93.5
## 5                               93.6
## 6                               93.2
##   Reads.Mapped.Confidently.to.Intergenic.Regions
## 1                                            3.7
## 2                                            4.0
## 3                                            3.6
## 4                                            3.9
## 5                                            3.8
## 6                                            3.8
##   Reads.Mapped.Confidently.to.Intronic.Regions
## 1                                         29.7
## 2                                         29.6
## 3                                         30.4
## 4                                         29.3
## 5                                         30.0
## 6                                         29.8
##   Reads.Mapped.Confidently.to.Exonic.Regions
## 1                                       59.4
## 2                                       59.5
## 3                                       59.6
## 4                                       60.3
## 5                                       59.8
## 6                                       59.6
##   Reads.Mapped.Confidently.to.Transcriptome Reads.Mapped.Antisense.to.Gene
## 1                                      55.4                            2.3
## 2                                      55.6                            2.2
## 3                                      55.7                            2.2
## 4                                      56.2                            2.4
## 5                                      55.7                            2.4
## 6                                      55.5                            2.4
##   Fraction.Reads.in.Cells Total.Genes.Detected Median.UMI.Counts.per.Cell
## 1                    86.5                21369                       5741
## 2                    81.6                20758                       6861
## 3                    87.9                21436                       5895
## 4                    84.0                21073                       6294
## 5                    86.1                21354                       5483
## 6                    82.4                21183                       6517
##     Sample
## 1 EKDL2651
## 2 EKDL2652
## 3 EKDL2653
## 4 EKDL2654
## 5 EKDL2655
## 6 EKDL2656

number of cells

ggplot(data = summaries, aes(x = Sample, y = Estimated.Number.of.Cells)) +
  geom_bar(stat = "identity") +
  #scale_fill_manual(values = mycoloursP[4:5]) +
  theme_minimal() +
  ylab("Number of Cells") +
  xlab("sample") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(
    yintercept = mean(summaries$Estimated.Number.of.Cells), color = "salmon",
    size = 1
  )

ggplot(data = summaries, aes(x = Sample, y = Mean.Reads.per.Cell)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  ylab("Mean reads per cell") +
  xlab("sample") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(yintercept = mean(summaries$Mean.Reads.per.Cell), color = "salmon", size = 1)

# try to plot them all
summaries_melt <- melt(summaries, id.vars="Sample", variable.name="metric")

ggplot(summaries_melt, aes(x = Sample, y = value)) + geom_bar(stat = "identity") + facet_wrap(~ metric, scales = "free")# +

  #theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))