Find out variants showing low GATK-SB while high SB in RealignBAQ

Load data into R

# only return those while sequencing genotype is heterzygous,forward
# depth>=10 and reverse depth>=10, and GWAS genotype is not missing (0/0)
my.read.table <- function(x) {
    d <- read.table(x, header = 1, sep = "\t")
    rownames(d) <- paste(d[, 1], d[, 2], sep = "_")  #use chr and postion as rownames
    # forward depth and reverse depth filter
    forwardindex <- (d[, 5] + d[, 6]) >= 10
    reverseindex <- (d[, 7] + d[, 8]) >= 10
    seqGeno <- as.character(d[, 13])
    # sequence genotype heterzygous
    seqHeterIndex <- sapply(strsplit(seqGeno, "/"), function(x) {
        x[1] != x[2]
    })
    # gwas genotype no missing
    gwasNoMissingIndex <- d[, 14] != "0/0"
    subd <- d[forwardindex & seqHeterIndex & reverseindex & gwasNoMissingIndex, 
        ]
    # Sort subd by GATK.SB, lowest to highest
    subd <- subd[order(subd[, "GATK.SB"]), ]
}


load.dat <- function(method = "realignment2") {
    files <- list.files(path = paste("/2TB/projects/StrandBias/SureSelect/SB/", 
        method, "/", sep = ""), pattern = "BAQ20.*_depth5_SB.txt", full.names = TRUE)
    datas = lapply(files, my.read.table)
    names(datas) <- gsub("-Sanger.*", "", gsub(".*?BAQ20_", "", files, perl = T), 
        perl = T)
    datas
}

method <- "realignment2"
data <- load.dat(method = method)

Print out data

Now for each sample, print out top 100 variants showing lowest GATK-SB value

suppressPackageStartupMessages(library(googleVis))
# Foreach sample print out the first 100 items of the table
for (i in 1:length(data)) {
    cat("Sample Name: <b>", names(data)[i], "</b>", sep = " ")
    n <- min(100, nrow(data[[i]]))
    PopTable <- gvisTable(head(data[[i]], n = n), options = list(width = 1800, 
        height = 400, page = "enable"))

    print(PopTable, "chart")
}

Sample Name: 1055QC0003 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0004 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0005 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0006 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0007 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0008 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0009 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0011 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0012 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0013 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0014 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0016 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0017 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0018 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0020 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0021 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0022 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0024 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0025 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0026 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0028 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>

Sample Name: 1055QC0030 <!– Table generated in R 2.14.1 by googleVis 0.2.16 package –>