# , include=TRUE, warning=FALSE, echo=TRUE, error=FALSE
knitr::opts_knit$set(root.dir=normalizePath('../../'))
knitr::opts_chunk$set(warning=FALSE, message=FALSE, error=FALSE, echo=TRUE)
library(fastqcr)
library(tidyverse)
Load QC report Data
qcdir1 <- "largedata/fqqc/BM-MB-1st/"
qcdir2 <- "largedata/fqqc/BM-MB-2nd/"
Aggregating Reports
QC reports for the first round of pollen sequencing
They will report 10 modules and 7 columns for each fq file. The column names are: - sample: sample names
- module: fastqc modules
- status: fastqc module status for each sample
- tot.seq: total sequences (i.e.: the number of reads)
- seq.length: sequence length
- pct.gc: percentage of GC content
- pct.dup: percentage of duplicate reads
QC reports for the 2nd round of pollen sequencing
Binding data together and get WARN and FAIL modules:
Column names:
- module: fastqc modules
- nb_samples: the number of samples tested
- nb_pass, nb_fail, nb_warn: the number of samples that passed, failed and warned, respectively.
- failed, warned: the name of samples that failed and warned, respectively.
qc <- rbind(qc1, qc2)
qc %>%
select(sample, module, status) %>%
filter(status %in% c("WARN", "FAIL")) %>%
arrange(sample)
## Summary of qc
summary(qc)
General Statistics
Column names:
- pct.dup: the percentage of duplicate reads,
- pct.gc: the percentage of GC content,
- tot.seq: total sequences or the number of reads and
- seq.length: sequence length or the length of reads.
mean(as.numeric(st$pct.dup))
[1] 45.9713
Note, we should have 24 files, but BW350-1_L5_I20006.R2.clean.fastq.gz failed to unzip.
Inspecting Problems
Per module problems:
qc_fails(qc, "module")
#qc_warns(qc, "module")
#qc_problems(qc, "module")
Per sample problems
#qc_warns(qc, "sample")
#qc_problems(qc, "sample")
qc_fails(qc, "sample")
Building an HTML Report
qc_report(qc.path=qcdir1, result.file = "largedata/fqqc",
experiment = "Pollen BM")
qc <- qc_read(list.files(qcdir1, full.names = T)[3])
qc_plot(qc, "Summary")
qc_plot(qc, "Basic Statistics")
qc_plot(qc, "Kmer Content")
qc_plot(qc, "per_tile_sequence_quality")
qc_plot(qc, "Per sequence quality scores")
qc_plot(qc, "Per base sequence content")
qc_plot(qc, "Per base N content")
# Demo file
qc.file <- system.file("fastqc_results", "S1_fastqc.zip", package = "fastqcr")
# Read all modules
qc <- qc_read(qc.file)
names(qc)
# Plot per sequence GC content
qc_plot(qc, "Per sequence GC content")
# Per base sequence quality
qc_plot(qc, "per_base sequence quality")
# Per sequence quality scores
qc_plot(qc, "Per sequence quality scores")
# Per base sequence content
qc_plot(qc, "Per base sequence content")
# Sequence duplication levels
qc_plot(qc, "Sequence duplication levels")
LS0tCnRpdGxlOiAiUUMgcmVwb3J0cyBmb3IgdGhlIHBvbGxlbiBzZXEgZGF0YSIKYXV0aG9yOiAiSmlubGlhbmcgWWFuZyIKZGF0ZTogIjQvMjEvMjAxNyIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBodG1sX2RvY3VtZW50OiBkZWZhdWx0CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKYGBge3Igc2V0dXB9CiMgLCBpbmNsdWRlPVRSVUUsIHdhcm5pbmc9RkFMU0UsIGVjaG89VFJVRSwgZXJyb3I9RkFMU0UKa25pdHI6Om9wdHNfa25pdCRzZXQocm9vdC5kaXI9bm9ybWFsaXplUGF0aCgnLi4vLi4vJykpCmtuaXRyOjpvcHRzX2NodW5rJHNldCh3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgZWNobz1UUlVFKQpsaWJyYXJ5KGZhc3RxY3IpCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCiMjIExvYWQgUUMgcmVwb3J0IERhdGEKCmBgYHtyIGxvYWQgZGF0YSB9CnFjZGlyMSA8LSAibGFyZ2VkYXRhL2ZxcWMvQk0tTUItMXN0LyIKcWNkaXIyIDwtICJsYXJnZWRhdGEvZnFxYy9CTS1NQi0ybmQvIgpgYGAKCiMjIEFnZ3JlZ2F0aW5nIFJlcG9ydHMKCiMjIyBRQyByZXBvcnRzIGZvciB0aGUgZmlyc3Qgcm91bmQgb2YgcG9sbGVuIHNlcXVlbmNpbmcKClRoZXkgd2lsbCByZXBvcnQgMTAgbW9kdWxlcyBhbmQgNyBjb2x1bW5zIGZvciBlYWNoIGZxIGZpbGUuIFRoZSBjb2x1bW4gbmFtZXMgYXJlOgotIHNhbXBsZTogc2FtcGxlIG5hbWVzICAKLSBtb2R1bGU6IGZhc3RxYyBtb2R1bGVzICAKLSBzdGF0dXM6IGZhc3RxYyBtb2R1bGUgc3RhdHVzIGZvciBlYWNoIHNhbXBsZSAgCi0gdG90LnNlcTogdG90YWwgc2VxdWVuY2VzIChpLmUuOiB0aGUgbnVtYmVyIG9mIHJlYWRzKSAgCi0gc2VxLmxlbmd0aDogc2VxdWVuY2UgbGVuZ3RoICAKLSBwY3QuZ2M6IHBlcmNlbnRhZ2Ugb2YgR0MgY29udGVudCAgCi0gcGN0LmR1cDogcGVyY2VudGFnZSBvZiBkdXBsaWNhdGUgcmVhZHMgIAoKYGBge3IgcG9sbGVuMX0KcWMxIDwtIHFjX2FnZ3JlZ2F0ZShxY2RpcjEsIHByb2dyZXNzYmFyPUZBTFNFKQojIEFnZ3JlZ2F0aW5nIEZhc3RRQyByZXBvcnRzOgojIGh0dHBzOi8vZ2l0aHViLmNvbS9rYXNzYW1iYXJhL2Zhc3RxY3IKcWMxCgpgYGAKCiMjIyBRQyByZXBvcnRzIGZvciB0aGUgMm5kIHJvdW5kIG9mIHBvbGxlbiBzZXF1ZW5jaW5nCmBgYHtyIHBvbGxlbjJ9CnFjMiA8LSBxY19hZ2dyZWdhdGUocWNkaXIyLCBwcm9ncmVzc2Jhcj1GQUxTRSkKcWMyCgpgYGAKCiMjIyBCaW5kaW5nIGRhdGEgdG9nZXRoZXIgYW5kIGdldCBgV0FSTmAgYW5kIGBGQUlMYCBtb2R1bGVzOgoKQ29sdW1uIG5hbWVzOgoKLSBtb2R1bGU6IGZhc3RxYyBtb2R1bGVzICAKLSBuYl9zYW1wbGVzOiB0aGUgbnVtYmVyIG9mIHNhbXBsZXMgdGVzdGVkICAKLSBuYl9wYXNzLCBuYl9mYWlsLCBuYl93YXJuOiB0aGUgbnVtYmVyIG9mIHNhbXBsZXMgdGhhdCBwYXNzZWQsIGZhaWxlZCBhbmQgd2FybmVkLCByZXNwZWN0aXZlbHkuICAKLSBmYWlsZWQsIHdhcm5lZDogdGhlIG5hbWUgb2Ygc2FtcGxlcyB0aGF0IGZhaWxlZCBhbmQgd2FybmVkLCByZXNwZWN0aXZlbHkuICAKCmBgYHtyIHFjMTJ9CnFjIDwtIHJiaW5kKHFjMSwgcWMyKQpxYyAlPiUKICAgIHNlbGVjdChzYW1wbGUsIG1vZHVsZSwgc3RhdHVzKSAlPiUgICAgCiAgICBmaWx0ZXIoc3RhdHVzICVpbiUgYygiV0FSTiIsICJGQUlMIikpICU+JQogICAgYXJyYW5nZShzYW1wbGUpCgojIyBTdW1tYXJ5IG9mIHFjCnN1bW1hcnkocWMpCgpgYGAKCiMjIEdlbmVyYWwgU3RhdGlzdGljcwoKQ29sdW1uIG5hbWVzOgoKLSBwY3QuZHVwOiB0aGUgcGVyY2VudGFnZSBvZiBkdXBsaWNhdGUgcmVhZHMsICAKLSBwY3QuZ2M6IHRoZSBwZXJjZW50YWdlIG9mIEdDIGNvbnRlbnQsICAKLSB0b3Quc2VxOiB0b3RhbCBzZXF1ZW5jZXMgb3IgdGhlIG51bWJlciBvZiByZWFkcyBhbmQgIAotIHNlcS5sZW5ndGg6IHNlcXVlbmNlIGxlbmd0aCBvciB0aGUgbGVuZ3RoIG9mIHJlYWRzLiAgCgpgYGB7ciBkZXB0aH0KCnN0IDwtIHFjX3N0YXRzKHFjKQoKIyMjIG92ZXJhbGwgY29udmVyYWdlCnN1bShhcy5udW1lcmljKHN0JHRvdC5zZXEpKSoxMDAvKDIzMDAqMTBeNikKIyMjIG1lYW4gZHVwbGljYXRpb24KbWVhbihhcy5udW1lcmljKHN0JHBjdC5kdXApKQoKc3QKYGBgCgpOb3RlLCB3ZSBzaG91bGQgaGF2ZSAyNCBmaWxlcywgYnV0IGBCVzM1MC0xX0w1X0kyMDAwNi5SMi5jbGVhbi5mYXN0cS5nemAgZmFpbGVkIHRvIHVuemlwLgoKCiMjIEluc3BlY3RpbmcgUHJvYmxlbXMKCiMjIyBQZXIgbW9kdWxlIHByb2JsZW1zOgpgYGB7cn0KcWNfZmFpbHMocWMsICJtb2R1bGUiKQojcWNfd2FybnMocWMsICJtb2R1bGUiKQojcWNfcHJvYmxlbXMocWMsICJtb2R1bGUiKQpgYGAKCgojIyMgUGVyIHNhbXBsZSBwcm9ibGVtcwoKYGBge3J9CiNxY193YXJucyhxYywgInNhbXBsZSIpCiNxY19wcm9ibGVtcyhxYywgInNhbXBsZSIpCnFjX2ZhaWxzKHFjLCAic2FtcGxlIikKYGBgCgoKIyBCdWlsZGluZyBhbiBIVE1MIFJlcG9ydAoKYGBge3IsIGV2YWw9RkFMU0V9CgpxY19yZXBvcnQocWMucGF0aD1xY2RpcjEsIHJlc3VsdC5maWxlID0gImxhcmdlZGF0YS9mcXFjIiwKICAgICAgICAgIGV4cGVyaW1lbnQgPSAiUG9sbGVuIEJNIikKCgpxYyA8LSBxY19yZWFkKGxpc3QuZmlsZXMocWNkaXIxLCBmdWxsLm5hbWVzID0gVClbM10pCnFjX3Bsb3QocWMsICJTdW1tYXJ5IikKcWNfcGxvdChxYywgIkJhc2ljIFN0YXRpc3RpY3MiKQpxY19wbG90KHFjLCAiS21lciBDb250ZW50IikKCnFjX3Bsb3QocWMsICJwZXJfdGlsZV9zZXF1ZW5jZV9xdWFsaXR5IikKCnFjX3Bsb3QocWMsICJQZXIgc2VxdWVuY2UgcXVhbGl0eSBzY29yZXMiKQoKcWNfcGxvdChxYywgIlBlciBiYXNlIHNlcXVlbmNlIGNvbnRlbnQiKQoKcWNfcGxvdChxYywgIlBlciBiYXNlIE4gY29udGVudCIpCgoKIyBEZW1vIGZpbGUKcWMuZmlsZSA8LSBzeXN0ZW0uZmlsZSgiZmFzdHFjX3Jlc3VsdHMiLCAiUzFfZmFzdHFjLnppcCIsICBwYWNrYWdlID0gImZhc3RxY3IiKQoKIyBSZWFkIGFsbCBtb2R1bGVzCnFjIDwtIHFjX3JlYWQocWMuZmlsZSkKbmFtZXMocWMpCgojIFBsb3QgcGVyIHNlcXVlbmNlIEdDIGNvbnRlbnQKcWNfcGxvdChxYywgIlBlciBzZXF1ZW5jZSBHQyBjb250ZW50IikKCiMgUGVyIGJhc2Ugc2VxdWVuY2UgcXVhbGl0eQpxY19wbG90KHFjLCAicGVyX2Jhc2Ugc2VxdWVuY2UgcXVhbGl0eSIpCgojIFBlciBzZXF1ZW5jZSBxdWFsaXR5IHNjb3JlcwpxY19wbG90KHFjLCAiUGVyIHNlcXVlbmNlIHF1YWxpdHkgc2NvcmVzIikKCiMgUGVyIGJhc2Ugc2VxdWVuY2UgY29udGVudApxY19wbG90KHFjLCAiUGVyIGJhc2Ugc2VxdWVuY2UgY29udGVudCIpCgojIFNlcXVlbmNlIGR1cGxpY2F0aW9uIGxldmVscwpxY19wbG90KHFjLCAiU2VxdWVuY2UgZHVwbGljYXRpb24gbGV2ZWxzIikKCgpgYGAKCg==