Dx.R extracts copy number and mutation metrics (e.g. Tumor Mutation Burden (TMB) and COSMIC Mutational Signature (mutSig)) from PureCN.R output
# Generate a BED file with callable regions
java -jar GenomeAnalysisTK.jar \
-T CallableLoci \
-R hg38.fasta \
-I:tumor $BAM_TUMOR \
--summary $OUT/${SAMPLEID}_table.txt \
-o $OUT/${SAMPLEID}_callable_status.bed \
--minDepth 30
# BED file with callable regions
grep CALLABLE $OUT/${SAMPLEID}_callable_status.bed > \
$OUT/${SAMPLEID}_callable_status_filtered.bed
# Restrict mutation burden calculation to coding sequences
Rscript $PURECN/FilterCallableLoci.R \
--genome hg38 \
--infile $OUT/${SAMPLEID}_callable_status_filtered.bed \
--outfile $OUT/${SAMPLEID}_callable_status_filtered_cds.bed
# Restrict mutation burden calculation to coding sequences
Rscript $PURECN/Dx.R \
--out $OUT/$SAMPLEID/${SAMPLEID} \
--rds $OUT/$SAMPLEID/${SAMPLEID}.rds \
--callable $OUT/${SAMPLEID}_callable_status_filtered_cds.bed \
--exclude hg38_simpleRepeats.bed \
--signatures
Summarize the germline vs somatic classification accuracy and the AUC of the posterior probability
Rscript $PURECN/BenchmarkTumorOnly.R \
--out $OUT/$SAMPLEID/${SAMPLEID} \
--rds $OUT/$SAMPLEID/${SAMPLEID}.rds \
--vcf $OUT/${SAMPLEID}_matching_mutect.vcf