Read Depth

library(RColorBrewer)
library(ggplot2)
setwd("~/Documents/Dog/Dog_Run2.1/")
bowtie_Run2 <-  read.table("summary_metrics.txt",
                            header = T,
                            sep = "\t",
                            stringsAsFactors = F)

bowtie_Run2$SAMPLE <- gsub(".*_D", "D", bowtie_Run2$SAMPLE)
bowtie_Run2$SAMPLE <- gsub("run2_S.*", "Replicate", bowtie_Run2$SAMPLE)

### Original Run2
bowtie_run2_original <- read.table("../Dog_Run2/summary_metrics_unmasked.txt",
                                   header = T,
                                   sep = "\t",
                                   stringsAsFactors = F)
bowtie_run2_original$SAMPLE <- gsub(".*_D", "D", bowtie_run2_original$SAMPLE)
bowtie_run2_original$SAMPLE <- gsub("_S.*", "_Original", bowtie_run2_original$SAMPLE)

## convert percentage to 100 
bowtie_Run2[,grep("PCT", colnames(bowtie_Run2))] <- bowtie_Run2[,grep("PCT", colnames(bowtie_Run2))] * 100
bowtie_run2_original[,grep("PCT", colnames(bowtie_run2_original))] <- bowtie_run2_original[,grep("PCT", colnames(bowtie_run2_original))] * 100


## Merge to create Summary Column
summary <- rbind(bowtie_Run2, bowtie_run2_original)

colors <- function(numDoubles) {
  if (numDoubles > 12) {
    diff <- numDoubles - 12
    doubles <- brewer.pal(12, "Paired")
    doubles <- c(doubles, brewer.pal(diff, "Paired"))
  }
  cols <- doubles
  return(cols)
}



plot_reads_metric <- function(datafile, metric, double = FALSE, title, bowtie = FALSE) {
  heading <- paste(title, deparse(substitute(datafile)))
  datafile[[metric]] <- datafile[[metric]] / 1000000
  if (bowtie == TRUE) {
    datafile[[metric]] <- datafile[[metric]] / 2
  }
  if (double == TRUE) {
    cols = allSample.cols <- rep(brewer.pal((nrow(datafile)/2), "Set3"), each=2)
  }
  else {
    if (nrow(datafile) > 12) {
      cols = colors(numDoubles = 24)
      # diff = nrow(datafile) - 12 
      # cols = allSample.cols <- rep(brewer.pal(12, "Set3"), each=1)
      # cols = c(rep(brewer.pal(diff, "Set3"), each=1), cols)
    } else {
      cols = allSample.cols <- rep(brewer.pal(nrow(datafile), "Set3"), each=1)
    }
  }
  ggplot(data = datafile,
         aes_string(x = "SAMPLE",
                    y = metric,
                    label = metric)) +
    geom_bar(stat = "identity", fill = cols, alpha = 0.8)  +
    ylab("Millions of Reads") +
    ggtitle(heading) +
    geom_text(hjust = 1.5, size = 3) +
    coord_flip()
}
plot_reads_metric(datafile = summary, metric = "TOTAL_READS", title = "Total Reads PAIRS", bowtie = TRUE)

plot_reads_metric(datafile = summary, metric = "PF_READS_ALIGNED", title = "Total Reads Aligned PAIRS", double = FALSE, bowtie=TRUE)

Correlation

library(edgeR)
library(tidyverse)
### Create a matrix
counts_original  <- read.table("peaks_counts_original.txt", header=T, sep="\t", row.names=1)

### counts original
colnames(counts_original) <- gsub("...BAM.", "", colnames(counts_original))
colnames(counts_original) <- gsub(".*02_", "", colnames(counts_original))
colnames(counts_original) <- gsub("_.*.bam", "", colnames(counts_original))
counts_original <- counts_original[,-1 * grep("50", colnames(counts_original))]
colnames(counts_original) <- paste0(colnames(counts_original), "_Original")


### counts replicate
counts_replicate <- read.table("peaks_counts_replicate.txt", header=T, sep="\t", row.names=1)
colnames(counts_replicate) <- gsub("...BAM.", "", colnames(counts_replicate))
colnames(counts_replicate) <- gsub(".*02_", "", colnames(counts_replicate))
colnames(counts_replicate) <- gsub("_run2.*.bam", "", colnames(counts_replicate))
colnames(counts_replicate) <- paste0(colnames(counts_replicate), "_Replicate")


counts <- cbind(counts_original, counts_replicate)

### Create summary table
### New Data
samples <- read.table("../DE_Individual/sampleinfo_s1_s12.txt", header=T, sep="\t")
samples$exp.id <- gsub("s", "Dog", samples$exp.id)
samples <- samples %>% separate("Sex", c("Fixed", "Sex"), " ") 
dogPre <- gsub("_Original", "", colnames(counts))
dogPre <- gsub("_Replicate", "", dogPre)
matchingIndex <- na.omit(match(dogPre, samples$exp.id))


### Filter out genes with low read counts ("unexpressed" genes) -- 
unex_filter <- which(rowSums(cpm(counts)) < 20)
counts <- counts[-1 * unex_filter,]

############################################
### Filter low sum counts ##################
############################################
filter <- which(rowSums(counts) <= 100)
countData <- counts[-1 * filter,]                              

sortColumns <- function(tbl) {
    dogNames <- gsub("Dog", "", colnames(countData))
    dogNames <- gsub("_.*", "", dogNames)
    dogNames <- as.numeric(dogNames)
    dogOrder <- order(dogNames)
    tbl      <- tbl[,dogOrder]
    return(tbl)
}
countData <- sortColumns(countData)

### Print a correlation plot so that we can see how the samples correlate
source("../DE_Individual/myImagePlot.R")
myImagePlot(cor(cpm(countData)),
            title = "Mapped Reads Pearson",
            label=TRUE)

myImagePlot(cor(cpm(countData), method="spearman"),
            title = "Mapped Reads Spearman",
            label=TRUE)

# ### Correlation Plot w/o dog 8
# countDataMinus8 <- countData[,-1 *grep("Dog8", colnames(countData))]
# myImagePlot(cor(cpm(countDataMinus8)),
#             title = "Mapped Reads Pearson",
#             label=TRUE)
# 
# myImagePlot(cor(cpm(countDataMinus8), method="spearman"),
#             title = "Mapped Reads Spearman",
#             label=TRUE)

MDS Plot

## MDS Plot
allSample.cols <- rep(brewer.pal(12, "Set3"), each=2)
Group <- gsub("_.*", "", colnames(countData))
y <- DGEList(countData, group=Group)
plotMDS(y,
        col = allSample.cols,
        main = "MDS Plot")

# ### Pairs Plots
# 
# nrow(countData)
# pairs(countData[sample(1:nrow(countData), size = 1000, replace = TRUE),])