Read Depth
library(RColorBrewer)
library(ggplot2)
setwd("~/Documents/Dog/Dog_Run2.1/")
bowtie_Run2 <- read.table("summary_metrics.txt",
header = T,
sep = "\t",
stringsAsFactors = F)
bowtie_Run2$SAMPLE <- gsub(".*_D", "D", bowtie_Run2$SAMPLE)
bowtie_Run2$SAMPLE <- gsub("run2_S.*", "Replicate", bowtie_Run2$SAMPLE)
### Original Run2
bowtie_run2_original <- read.table("../Dog_Run2/summary_metrics_unmasked.txt",
header = T,
sep = "\t",
stringsAsFactors = F)
bowtie_run2_original$SAMPLE <- gsub(".*_D", "D", bowtie_run2_original$SAMPLE)
bowtie_run2_original$SAMPLE <- gsub("_S.*", "_Original", bowtie_run2_original$SAMPLE)
## convert percentage to 100
bowtie_Run2[,grep("PCT", colnames(bowtie_Run2))] <- bowtie_Run2[,grep("PCT", colnames(bowtie_Run2))] * 100
bowtie_run2_original[,grep("PCT", colnames(bowtie_run2_original))] <- bowtie_run2_original[,grep("PCT", colnames(bowtie_run2_original))] * 100
## Merge to create Summary Column
summary <- rbind(bowtie_Run2, bowtie_run2_original)
colors <- function(numDoubles) {
if (numDoubles > 12) {
diff <- numDoubles - 12
doubles <- brewer.pal(12, "Paired")
doubles <- c(doubles, brewer.pal(diff, "Paired"))
}
cols <- doubles
return(cols)
}
plot_reads_metric <- function(datafile, metric, double = FALSE, title, bowtie = FALSE) {
heading <- paste(title, deparse(substitute(datafile)))
datafile[[metric]] <- datafile[[metric]] / 1000000
if (bowtie == TRUE) {
datafile[[metric]] <- datafile[[metric]] / 2
}
if (double == TRUE) {
cols = allSample.cols <- rep(brewer.pal((nrow(datafile)/2), "Set3"), each=2)
}
else {
if (nrow(datafile) > 12) {
cols = colors(numDoubles = 24)
# diff = nrow(datafile) - 12
# cols = allSample.cols <- rep(brewer.pal(12, "Set3"), each=1)
# cols = c(rep(brewer.pal(diff, "Set3"), each=1), cols)
} else {
cols = allSample.cols <- rep(brewer.pal(nrow(datafile), "Set3"), each=1)
}
}
ggplot(data = datafile,
aes_string(x = "SAMPLE",
y = metric,
label = metric)) +
geom_bar(stat = "identity", fill = cols, alpha = 0.8) +
ylab("Millions of Reads") +
ggtitle(heading) +
geom_text(hjust = 1.5, size = 3) +
coord_flip()
}
plot_reads_metric(datafile = summary, metric = "TOTAL_READS", title = "Total Reads PAIRS", bowtie = TRUE)

plot_reads_metric(datafile = summary, metric = "PF_READS_ALIGNED", title = "Total Reads Aligned PAIRS", double = FALSE, bowtie=TRUE)

Correlation
library(edgeR)
library(tidyverse)
### Create a matrix
counts_original <- read.table("peaks_counts_original.txt", header=T, sep="\t", row.names=1)
### counts original
colnames(counts_original) <- gsub("...BAM.", "", colnames(counts_original))
colnames(counts_original) <- gsub(".*02_", "", colnames(counts_original))
colnames(counts_original) <- gsub("_.*.bam", "", colnames(counts_original))
counts_original <- counts_original[,-1 * grep("50", colnames(counts_original))]
colnames(counts_original) <- paste0(colnames(counts_original), "_Original")
### counts replicate
counts_replicate <- read.table("peaks_counts_replicate.txt", header=T, sep="\t", row.names=1)
colnames(counts_replicate) <- gsub("...BAM.", "", colnames(counts_replicate))
colnames(counts_replicate) <- gsub(".*02_", "", colnames(counts_replicate))
colnames(counts_replicate) <- gsub("_run2.*.bam", "", colnames(counts_replicate))
colnames(counts_replicate) <- paste0(colnames(counts_replicate), "_Replicate")
counts <- cbind(counts_original, counts_replicate)
### Create summary table
### New Data
samples <- read.table("../DE_Individual/sampleinfo_s1_s12.txt", header=T, sep="\t")
samples$exp.id <- gsub("s", "Dog", samples$exp.id)
samples <- samples %>% separate("Sex", c("Fixed", "Sex"), " ")
dogPre <- gsub("_Original", "", colnames(counts))
dogPre <- gsub("_Replicate", "", dogPre)
matchingIndex <- na.omit(match(dogPre, samples$exp.id))
### Filter out genes with low read counts ("unexpressed" genes) --
unex_filter <- which(rowSums(cpm(counts)) < 20)
counts <- counts[-1 * unex_filter,]
############################################
### Filter low sum counts ##################
############################################
filter <- which(rowSums(counts) <= 100)
countData <- counts[-1 * filter,]
sortColumns <- function(tbl) {
dogNames <- gsub("Dog", "", colnames(countData))
dogNames <- gsub("_.*", "", dogNames)
dogNames <- as.numeric(dogNames)
dogOrder <- order(dogNames)
tbl <- tbl[,dogOrder]
return(tbl)
}
countData <- sortColumns(countData)
### Print a correlation plot so that we can see how the samples correlate
source("../DE_Individual/myImagePlot.R")
myImagePlot(cor(cpm(countData)),
title = "Mapped Reads Pearson",
label=TRUE)

myImagePlot(cor(cpm(countData), method="spearman"),
title = "Mapped Reads Spearman",
label=TRUE)

# ### Correlation Plot w/o dog 8
# countDataMinus8 <- countData[,-1 *grep("Dog8", colnames(countData))]
# myImagePlot(cor(cpm(countDataMinus8)),
# title = "Mapped Reads Pearson",
# label=TRUE)
#
# myImagePlot(cor(cpm(countDataMinus8), method="spearman"),
# title = "Mapped Reads Spearman",
# label=TRUE)
MDS Plot
## MDS Plot
allSample.cols <- rep(brewer.pal(12, "Set3"), each=2)
Group <- gsub("_.*", "", colnames(countData))
y <- DGEList(countData, group=Group)
plotMDS(y,
col = allSample.cols,
main = "MDS Plot")

# ### Pairs Plots
#
# nrow(countData)
# pairs(countData[sample(1:nrow(countData), size = 1000, replace = TRUE),])