Example code to look at the effect of different SNR cuts

## Example file that shows how to use pbbamr with an alignment dataset to calculate values at different metrics
suppressMessages({
  library(pbbamr)
  library(dplyr)
  library(ggplot2) })
aln_file = "/pbi/analysis/smrtlink/release/smrtsuite/userdata/jobs_root/001/001804/tasks/pbalign.tasks.consolidate_alignments-0/combined.alignmentset.xml"
# Load the data and the SNR values
d = loadPBI(aln_file,  loadSNR = TRUE)
# Calculate the Min SNR for each entry
d$minSNR = apply(d[, grep('snr', colnames(d))], 1, min)
# Calculate the accuracy
d$aLength = d$tend - d$tstart
d$Acc = 1 - (d$mismatches + d$inserts + d$dels) / d$aLength

### NOW CALCULATE METRICS ABOVE DIFFERENT MIN SNR CUT-OFS
# Function to calculate summary statistics over some value
statsAboveValue <- function(snrCut) {
  tmp = d[d$minSNR >= snrCut,]
  tmp %>% summarize(snrCut = snrCut,
                    TotalSubreads = n(),
                    TotalBases = sum(aLength),
                    MedianAcc = median(Acc, na.rm = TRUE),
                    MeanAcc = mean(Acc, na.rm = TRUE))
}
# Create a range of SNR cut-offs and calculate the metrics for each one
snrCuts = seq(min(d$minSNR), max(d$minSNR), 0.1)
cutStats = do.call(rbind, lapply(snrCuts, statsAboveValue))
# Example table
head(cutStats)
##     snrCut TotalSubreads TotalBases MedianAcc   MeanAcc
## 1 4.000109         32696   90910322 0.8370457 0.8200473
## 2 4.100109         31754   88497277 0.8382353 0.8207168
## 3 4.200109         30553   85163696 0.8395610 0.8214312
## 4 4.300109         29347   81902984 0.8405831 0.8220588
## 5 4.400109         28116   78610246 0.8416611 0.8227688
## 6 4.500109         26787   74855500 0.8424731 0.8233327
# Now let's look at one example plot
ggplot(cutStats, aes(x=snrCut, y=TotalSubreads)) + geom_point() + geom_line() + 
  labs(x="Min SNR Value", y="Total Subreads", title = "Yield vs. SNR Cut") +
  theme_bw()