I’m looking at differences in DMRs when we change the coverage value cutoffs for the Day 135 samples. I don’t think I’m going to actually run this notebook, as it’s largely cut and paste code from a previous analysis. The only real changes are output file names, to avoid overwriting results and the actual cutoff values.
Cutoff values are located in lines 63 and 64 of the analysis code, that look like and are located in the all(x > 3) section. Changing the numeric value changes the cutoff.
meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]
The thing you came here for! The different number of DMRs found at each cutoff. I’ll upload the graphs to scaphapoda also
3x |
709 |
5x |
304 |
7x |
136 |
10x |
63 |
Let the cutting and pasting begin! I’ll go 7x, 5x, and 3x in order.
7x Coverage Code
setwd("~/Documents/Day135/Day135Cutoff/")
samples.vec <- c("EPI-151", "EPI-152", "EPI-153", "EPI-154", "EPI-159", "EPI-160", "EPI-161", "EPI-162", "EPI-167", "EPI-168", "EPI-169", "EPI-170")
library(readr)
setwd("~/Documents/Day135/Day135Cutoff/CGoutput")
data <- list.files(path = ".", pattern = "*.output")
print(data)
db <- read_delim("~/Documents/Day135/Day135Cutoff/CGoutput/EPI-151_CG.output", "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
db <- as.data.table(db)
db$EPI103Meth <- rowSums(cbind(db[,4], db[,7]), na.rm = TRUE)
db$EPI103TotCov <- rowSums(cbind(db[,5], db[,8]), na.rm = TRUE)
meth.db <- db[,c(1,2,3,10)]
totcov.db <- db[,c(1,2,3,11)]
for(i in 2:length(data)) {
temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/CGoutput/", data[i]), "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
temp <- as.data.table(temp)
temp$Meth <- rowSums(cbind(temp[,4], temp[,7]), na.rm = TRUE)
temp$TotCov <- rowSums(cbind(temp[,5], temp[,8]), na.rm = TRUE)
meth.db <- merge(meth.db, temp[,c(1,2,3,10)], by = names(meth.db[,c(1,2,3)]), all = TRUE)
colnames(meth.db)[ncol(meth.db)] <- paste0(substr(data[i], 1, 7), "Meth")
meth.db <- as.data.table(meth.db)
totcov.db <- merge(totcov.db, temp[,c(1,2,3,11)], by = names(totcov.db[,c(1,2,3)]), all = TRUE)
colnames(totcov.db)[ncol(totcov.db)] <- paste0(substr(data[i], 1, 7), "TotCov")
totcov.db <- as.data.table(totcov.db)
}
setwd("~/Documents/Day135/Day135Cutoff/macau")
meth.db2 <- meth.db[complete.cases(meth.db[,c(4:15)]),]
totcov.db2 <- totcov.db[complete.cases(totcov.db[,c(4:15)]),]
meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 7)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 7)), ]
meth.db4 <- as.data.table(cbind(paste0(meth.db3$`#CHROM`, "-", meth.db3$POS), meth.db3[,4:ncol(meth.db3)]))
colnames(meth.db4)[1] <- "Site"
totcov.db4 <- as.data.table(cbind(paste0(totcov.db3$`#CHROM`, "-", totcov.db3$POS), totcov.db3[,4:ncol(totcov.db3)]))
colnames(totcov.db4)[1] <- "Site"
totcov.db5 <- totcov.db4[totcov.db4$Site %in% meth.db4$Site,]
nrow(meth.db4)
nrow(totcov.db5)
length(totcov.db5$Site %in% meth.db4$Site)
write.table(meth.db4, file = "sevenx.macau.meth.txt", sep = " ", quote = FALSE, row.names = FALSE)
write.table(totcov.db5, file = "sevenx.macau.totalcov.txt", sep = " ", quote = FALSE, row.names = FALSE)
system("head macau.meth.txt")
system("head macau.totalcov.txt")
setwd("~/Documents/Day135/Day135Cutoff/macau/")
system("/home/shared/macau/macau -g sevenx.macau.meth.csv -t sevenx.macau.totalcov.csv -p Predictor.csv -k day135-relate-finished.csv -o sevenx")
setwd("~/Documents/Day135/Day135Cutoff/macau/")
result_assoc <- read_delim("~/Documents/Day135/Day135Cutoff/macau/output/sevenx.assoc.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
sig.results <- result_assoc[result_assoc$pvalue <= 0.05,]
for(i in 1:nrow(sig.results)) {
temp <- strsplit(sig.results$id[i], "-")
sig.results[i,13] <- temp[[1]][1]
sig.results[i,14] <- temp[[1]][2]
}
colnames(sig.results)[c(13,14)] <- c("Scaffold", "Loc")
bed.test <- as.data.frame(cbind(sig.results[,13], (sig.results[,14]), (sig.results[,14]), sig.results$beta))
colnames(bed.test)[4] <- "beta"
write.table(bed.test, file = "sevenx.DiffMethRegions.bed", sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)
setwd("~/Documents/Day135/Day135Cutoff/")
list.files(path = ".")
library(data.table)
library(readr)
DiffMethRegions <- read_delim("~/Documents/Day135/Day135Cutoff/macau/sevenx.DiffMethRegions.bed",
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
DiffMethRegions$Loc <- paste0(DiffMethRegions$X1, "-", DiffMethRegions$X2)
head(DiffMethRegions)
sample.files <- list.files(path = "~/Documents/Day135/Day135Cutoff/", pattern = "*.bedgraph")
print(sample.files)
EPI_103_percmeth_bedgraph <- read_delim("~/Documents/Day135/Day135Cutoff/EPI-151-percmeth.bedgraph",
"\t", escape_double = FALSE, col_names = FALSE,
col_types = cols(X4 = col_double()),
trim_ws = TRUE)
EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)
head(EPI_103_percmeth_bedgraph)
print(EPI_103_percmeth_bedgraph[7,])
EPI_103_DMRs <- EPI_103_percmeth_bedgraph[EPI_103_percmeth_bedgraph$X1 %in% DiffMethRegions$X1,]
EPI_103_DMRs$Loc <- paste0(EPI_103_DMRs$X1, "-", EPI_103_DMRs$X2)
head(EPI_103_DMRs)
EPI_103_DMRs <- EPI_103_DMRs[EPI_103_DMRs$Loc %in% DiffMethRegions$Loc,]
head(EPI_103_DMRs)
EPI_DMRs <- as.data.table(cbind(EPI_103_DMRs$Loc, EPI_103_DMRs$X4))
colnames(EPI_DMRs)[1] <- "DMRLoc"
colnames(EPI_DMRs)[2] <- "EPI_151"
head(EPI_DMRs)
for(i in 2:length(sample.files)) {
temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/", sample.files[i]),
"\t", escape_double = FALSE, col_names = FALSE,
col_types = cols(X4 = col_double()),
trim_ws = TRUE)
temp <- as.data.table(temp)
temp_DMRs <- temp[temp$X1 %in% DiffMethRegions$X1,]
temp_DMRs$Loc <- paste0(temp_DMRs$X1, "-", temp_DMRs$X2)
temp_DMRs <- temp_DMRs[temp_DMRs$Loc %in% DiffMethRegions$Loc,]
EPI_DMRs$temp <- temp_DMRs$X4
colnames(EPI_DMRs)[ncol(EPI_DMRs)] <- substr(sample.files[i], 1, 7)
}
head(EPI_DMRs)
## This is here because for osme reason EPI_103 gets brught in as characters as opposed to doubles. Need to figure out why, but this fixes it for now
EPI_DMRs$EPI_151 <- as.numeric(EPI_DMRs$EPI_151)
treatment.means <- as.data.frame(EPI_DMRs$DMRLoc)
treatment.means$Ambient <- apply(cbind(EPI_DMRs$`EPI-151`, EPI_DMRs$`EPI-152`, EPI_DMRs$`EPI-153`, EPI_DMRs$`EPI-154`) ,MARGIN = 1, FUN = mean)
treatment.means$Low <- apply(cbind(EPI_DMRs$`EPI-159`, EPI_DMRs$`EPI-160`, EPI_DMRs$`EPI-161`, EPI_DMRs$`EPI-162`) ,MARGIN = 1, FUN = mean)
treatment.means$SuperLow <- apply(cbind(EPI_DMRs$`EPI-167`, EPI_DMRs$`EPI-168`, EPI_DMRs$`EPI-169`, EPI_DMRs$`EPI-170`) ,MARGIN = 1, FUN = mean)
head(treatment.means)
for(i in 1:nrow(treatment.means)) {
boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,4], EPI_DMRs[i,5])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")
boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")
boxplot(t(cbind(EPI_DMRs[i,10], EPI_DMRs[i,11], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")
axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")
title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))
}
pdf(file = "sevenx-DMR-Box-Plots.pdf")
for(i in 1:nrow(treatment.means)) {
boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,10], EPI_DMRs[i,11])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")
boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")
boxplot(t(cbind(EPI_DMRs[i,4], EPI_DMRs[i,5], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")
axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")
title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))
}
dev.off()
5x Coverage Code
setwd("~/Documents/Day135/Day135Cutoff/")
samples.vec <- c("EPI-151", "EPI-152", "EPI-153", "EPI-154", "EPI-159", "EPI-160", "EPI-161", "EPI-162", "EPI-167", "EPI-168", "EPI-169", "EPI-170")
library(readr)
setwd("~/Documents/Day135/Day135Cutoff/CGoutput")
data <- list.files(path = ".", pattern = "*.output")
print(data)
db <- read_delim("~/Documents/Day135/Day135Cutoff/CGoutput/EPI-151_CG.output", "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
db <- as.data.table(db)
db$EPI103Meth <- rowSums(cbind(db[,4], db[,7]), na.rm = TRUE)
db$EPI103TotCov <- rowSums(cbind(db[,5], db[,8]), na.rm = TRUE)
meth.db <- db[,c(1,2,3,10)]
totcov.db <- db[,c(1,2,3,11)]
for(i in 2:length(data)) {
temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/CGoutput/", data[i]), "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
temp <- as.data.table(temp)
temp$Meth <- rowSums(cbind(temp[,4], temp[,7]), na.rm = TRUE)
temp$TotCov <- rowSums(cbind(temp[,5], temp[,8]), na.rm = TRUE)
meth.db <- merge(meth.db, temp[,c(1,2,3,10)], by = names(meth.db[,c(1,2,3)]), all = TRUE)
colnames(meth.db)[ncol(meth.db)] <- paste0(substr(data[i], 1, 7), "Meth")
meth.db <- as.data.table(meth.db)
totcov.db <- merge(totcov.db, temp[,c(1,2,3,11)], by = names(totcov.db[,c(1,2,3)]), all = TRUE)
colnames(totcov.db)[ncol(totcov.db)] <- paste0(substr(data[i], 1, 7), "TotCov")
totcov.db <- as.data.table(totcov.db)
}
setwd("~/Documents/Day135/Day135Cutoff/macau")
meth.db2 <- meth.db[complete.cases(meth.db[,c(4:15)]),]
totcov.db2 <- totcov.db[complete.cases(totcov.db[,c(4:15)]),]
meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 5)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 5)), ]
meth.db4 <- as.data.table(cbind(paste0(meth.db3$`#CHROM`, "-", meth.db3$POS), meth.db3[,4:ncol(meth.db3)]))
colnames(meth.db4)[1] <- "Site"
totcov.db4 <- as.data.table(cbind(paste0(totcov.db3$`#CHROM`, "-", totcov.db3$POS), totcov.db3[,4:ncol(totcov.db3)]))
colnames(totcov.db4)[1] <- "Site"
totcov.db5 <- totcov.db4[totcov.db4$Site %in% meth.db4$Site,]
nrow(meth.db4)
nrow(totcov.db5)
length(totcov.db5$Site %in% meth.db4$Site)
write.table(meth.db4, file = "fivex.macau.meth.txt", sep = " ", quote = FALSE, row.names = FALSE)
write.table(totcov.db5, file = "fivex.macau.totalcov.txt", sep = " ", quote = FALSE, row.names = FALSE)
system("head fivex.macau.meth.txt")
system("head fivex.macau.totalcov.txt")
setwd("~/Documents/Day135/Day135Cutoff/macau/")
system("/home/shared/macau/macau -g fivex.macau.meth.txt -t fivex.macau.totalcov.txt -p Predictor.csv -k day135-relate-finished.csv -o threex")
setwd("~/Documents/Day135/Day135Cutoff/macau/")
result_assoc <- read_delim("~/Documents/Day135/Day135Cutoff/macau/output/sevenx.assoc.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
sig.results <- result_assoc[result_assoc$pvalue <= 0.05,]
for(i in 1:nrow(sig.results)) {
temp <- strsplit(sig.results$id[i], "-")
sig.results[i,13] <- temp[[1]][1]
sig.results[i,14] <- temp[[1]][2]
}
colnames(sig.results)[c(13,14)] <- c("Scaffold", "Loc")
bed.test <- as.data.frame(cbind(sig.results[,13], (sig.results[,14]), (sig.results[,14]), sig.results$beta))
colnames(bed.test)[4] <- "beta"
write.table(bed.test, file = "fivex.DiffMethRegions.bed", sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)
setwd("~/Documents/Day135/Day135Cutoff/")
list.files(path = ".")
library(data.table)
library(readr)
DiffMethRegions <- read_delim("~/Documents/Day135/Day135Cutoff/macau/fivex.DiffMethRegions.bed",
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
DiffMethRegions$Loc <- paste0(DiffMethRegions$X1, "-", DiffMethRegions$X2)
head(DiffMethRegions)
sample.files <- list.files(path = "~/Documents/Day135/Day135Cutoff/", pattern = "*.bedgraph")
print(sample.files)
EPI_103_percmeth_bedgraph <- read_delim("~/Documents/Day135/Day135Cutoff/EPI-151-percmeth.bedgraph",
"\t", escape_double = FALSE, col_names = FALSE,
col_types = cols(X4 = col_double()),
trim_ws = TRUE)
EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)
head(EPI_103_percmeth_bedgraph)
print(EPI_103_percmeth_bedgraph[7,])
EPI_103_DMRs <- EPI_103_percmeth_bedgraph[EPI_103_percmeth_bedgraph$X1 %in% DiffMethRegions$X1,]
EPI_103_DMRs$Loc <- paste0(EPI_103_DMRs$X1, "-", EPI_103_DMRs$X2)
head(EPI_103_DMRs)
EPI_103_DMRs <- EPI_103_DMRs[EPI_103_DMRs$Loc %in% DiffMethRegions$Loc,]
head(EPI_103_DMRs)
EPI_DMRs <- as.data.table(cbind(EPI_103_DMRs$Loc, EPI_103_DMRs$X4))
colnames(EPI_DMRs)[1] <- "DMRLoc"
colnames(EPI_DMRs)[2] <- "EPI_151"
head(EPI_DMRs)
for(i in 2:length(sample.files)) {
temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/", sample.files[i]),
"\t", escape_double = FALSE, col_names = FALSE,
col_types = cols(X4 = col_double()),
trim_ws = TRUE)
temp <- as.data.table(temp)
temp_DMRs <- temp[temp$X1 %in% DiffMethRegions$X1,]
temp_DMRs$Loc <- paste0(temp_DMRs$X1, "-", temp_DMRs$X2)
temp_DMRs <- temp_DMRs[temp_DMRs$Loc %in% DiffMethRegions$Loc,]
EPI_DMRs$temp <- temp_DMRs$X4
colnames(EPI_DMRs)[ncol(EPI_DMRs)] <- substr(sample.files[i], 1, 7)
}
head(EPI_DMRs)
## This is here because for osme reason EPI_103 gets brught in as characters as opposed to doubles. Need to figure out why, but this fixes it for now
EPI_DMRs$EPI_151 <- as.numeric(EPI_DMRs$EPI_151)
treatment.means <- as.data.frame(EPI_DMRs$DMRLoc)
treatment.means$Ambient <- apply(cbind(EPI_DMRs$`EPI-151`, EPI_DMRs$`EPI-152`, EPI_DMRs$`EPI-153`, EPI_DMRs$`EPI-154`) ,MARGIN = 1, FUN = mean)
treatment.means$Low <- apply(cbind(EPI_DMRs$`EPI-159`, EPI_DMRs$`EPI-160`, EPI_DMRs$`EPI-161`, EPI_DMRs$`EPI-162`) ,MARGIN = 1, FUN = mean)
treatment.means$SuperLow <- apply(cbind(EPI_DMRs$`EPI-167`, EPI_DMRs$`EPI-168`, EPI_DMRs$`EPI-169`, EPI_DMRs$`EPI-170`) ,MARGIN = 1, FUN = mean)
head(treatment.means)
for(i in 1:nrow(treatment.means)) {
boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,4], EPI_DMRs[i,5])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")
boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")
boxplot(t(cbind(EPI_DMRs[i,10], EPI_DMRs[i,11], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")
axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")
title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))
}
pdf(file = "fivex-DMR-Box-Plots.pdf")
for(i in 1:nrow(treatment.means)) {
boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,10], EPI_DMRs[i,11])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")
boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")
boxplot(t(cbind(EPI_DMRs[i,4], EPI_DMRs[i,5], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")
axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")
title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))
}
dev.off()
3x Coverage Code
setwd("~/Documents/Day135/Day135Cutoff/")
samples.vec <- c("EPI-151", "EPI-152", "EPI-153", "EPI-154", "EPI-159", "EPI-160", "EPI-161", "EPI-162", "EPI-167", "EPI-168", "EPI-169", "EPI-170")
library(readr)
setwd("~/Documents/Day135/Day135Cutoff/CGoutput")
data <- list.files(path = ".", pattern = "*.output")
print(data)
db <- read_delim("~/Documents/Day135/Day135Cutoff/CGoutput/EPI-151_CG.output", "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
db <- as.data.table(db)
db$EPI103Meth <- rowSums(cbind(db[,4], db[,7]), na.rm = TRUE)
db$EPI103TotCov <- rowSums(cbind(db[,5], db[,8]), na.rm = TRUE)
meth.db <- db[,c(1,2,3,10)]
totcov.db <- db[,c(1,2,3,11)]
for(i in 2:length(data)) {
temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/CGoutput/", data[i]), "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
temp <- as.data.table(temp)
temp$Meth <- rowSums(cbind(temp[,4], temp[,7]), na.rm = TRUE)
temp$TotCov <- rowSums(cbind(temp[,5], temp[,8]), na.rm = TRUE)
meth.db <- merge(meth.db, temp[,c(1,2,3,10)], by = names(meth.db[,c(1,2,3)]), all = TRUE)
colnames(meth.db)[ncol(meth.db)] <- paste0(substr(data[i], 1, 7), "Meth")
meth.db <- as.data.table(meth.db)
totcov.db <- merge(totcov.db, temp[,c(1,2,3,11)], by = names(totcov.db[,c(1,2,3)]), all = TRUE)
colnames(totcov.db)[ncol(totcov.db)] <- paste0(substr(data[i], 1, 7), "TotCov")
totcov.db <- as.data.table(totcov.db)
}
setwd("~/Documents/Day135/Day135Cutoff/macau")
meth.db2 <- meth.db[complete.cases(meth.db[,c(4:15)]),]
totcov.db2 <- totcov.db[complete.cases(totcov.db[,c(4:15)]),]
meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]
meth.db4 <- as.data.table(cbind(paste0(meth.db3$`#CHROM`, "-", meth.db3$POS), meth.db3[,4:ncol(meth.db3)]))
colnames(meth.db4)[1] <- "Site"
totcov.db4 <- as.data.table(cbind(paste0(totcov.db3$`#CHROM`, "-", totcov.db3$POS), totcov.db3[,4:ncol(totcov.db3)]))
colnames(totcov.db4)[1] <- "Site"
totcov.db5 <- totcov.db4[totcov.db4$Site %in% meth.db4$Site,]
nrow(meth.db4)
nrow(totcov.db5)
length(totcov.db5$Site %in% meth.db4$Site)
write.table(meth.db4, file = "threex.macau.meth.txt", sep = " ", quote = FALSE, row.names = FALSE)
write.table(totcov.db5, file = "threex.macau.totalcov.txt", sep = " ", quote = FALSE, row.names = FALSE)
system("head threex.macau.meth.txt")
system("head threex.macau.totalcov.txt")
setwd("~/Documents/Day135/Day135Cutoff/macau/")
system("/home/shared/macau/macau -g threex.macau.meth.txt -t threex.macau.totalcov.txt -p Predictor.csv -k day135-relate-finished.csv -o threex")
setwd("~/Documents/Day135/Day135Cutoff/macau/")
threex_assoc <- read_delim("~/Documents/Day135/Day135Cutoff/macau/output/threex.assoc.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
sig.results <- threex_assoc[threex_assoc$pvalue <= 0.05,]
for(i in 1:nrow(sig.results)) {
temp <- strsplit(sig.results$id[i], "-")
sig.results[i,13] <- temp[[1]][1]
sig.results[i,14] <- temp[[1]][2]
}
colnames(sig.results)[c(13,14)] <- c("Scaffold", "Loc")
bed.test <- as.data.frame(cbind(sig.results[,13], (sig.results[,14]), (sig.results[,14]), sig.results$beta))
colnames(bed.test)[4] <- "beta"
write.table(bed.test, file = "threex.DiffMethRegions.bed", sep = ",", quote = FALSE, col.names = FALSE, row.names = FALSE)
setwd("~/Documents/Day135/Day135Cutoff/")
list.files(path = ".")
library(data.table)
library(readr)
DiffMethRegions <- read_delim("~/Documents/Day135/Day135Cutoff/threex.DiffMethRegions.bed",
",", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
DiffMethRegions$Loc <- paste0(DiffMethRegions$X1, "-", DiffMethRegions$X2)
head(DiffMethRegions)
sample.files <- list.files(path = "~/Documents/Day135/Day135Cutoff/", pattern = "*.bedgraph")
print(sample.files)
EPI_103_percmeth_bedgraph <- read_delim("~/Documents/Day135/Day135Cutoff/EPI-151-percmeth.bedgraph",
"\t", escape_double = FALSE, col_names = FALSE,
col_types = cols(X4 = col_double()),
trim_ws = TRUE)
EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)
head(EPI_103_percmeth_bedgraph)
print(EPI_103_percmeth_bedgraph[7,])
EPI_103_DMRs <- EPI_103_percmeth_bedgraph[EPI_103_percmeth_bedgraph$X1 %in% DiffMethRegions$X1,]
EPI_103_DMRs$Loc <- paste0(EPI_103_DMRs$X1, "-", EPI_103_DMRs$X2)
head(EPI_103_DMRs)
EPI_103_DMRs <- EPI_103_DMRs[EPI_103_DMRs$Loc %in% DiffMethRegions$Loc,]
head(EPI_103_DMRs)
EPI_DMRs <- as.data.table(cbind(EPI_103_DMRs$Loc, EPI_103_DMRs$X4))
colnames(EPI_DMRs)[1] <- "DMRLoc"
colnames(EPI_DMRs)[2] <- "EPI_151"
head(EPI_DMRs)
for(i in 2:length(sample.files)) {
temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/", sample.files[i]),
"\t", escape_double = FALSE, col_names = FALSE,
col_types = cols(X4 = col_double()),
trim_ws = TRUE)
temp <- as.data.table(temp)
temp_DMRs <- temp[temp$X1 %in% DiffMethRegions$X1,]
temp_DMRs$Loc <- paste0(temp_DMRs$X1, "-", temp_DMRs$X2)
temp_DMRs <- temp_DMRs[temp_DMRs$Loc %in% DiffMethRegions$Loc,]
EPI_DMRs$temp <- temp_DMRs$X4
colnames(EPI_DMRs)[ncol(EPI_DMRs)] <- substr(sample.files[i], 1, 7)
}
head(EPI_DMRs)
## This is here because for osme reason EPI_103 gets brught in as characters as opposed to doubles. Need to figure out why, but this fixes it for now
EPI_DMRs$EPI_151 <- as.numeric(EPI_DMRs$EPI_151)
treatment.means <- as.data.frame(EPI_DMRs$DMRLoc)
treatment.means$Ambient <- apply(cbind(EPI_DMRs$`EPI-151`, EPI_DMRs$`EPI-152`, EPI_DMRs$`EPI-153`, EPI_DMRs$`EPI-154`) ,MARGIN = 1, FUN = mean)
treatment.means$Low <- apply(cbind(EPI_DMRs$`EPI-159`, EPI_DMRs$`EPI-160`, EPI_DMRs$`EPI-161`, EPI_DMRs$`EPI-162`) ,MARGIN = 1, FUN = mean)
treatment.means$SuperLow <- apply(cbind(EPI_DMRs$`EPI-167`, EPI_DMRs$`EPI-168`, EPI_DMRs$`EPI-169`, EPI_DMRs$`EPI-170`) ,MARGIN = 1, FUN = mean)
head(treatment.means)
# for(i in 1:nrow(treatment.means)) {
#
#
# boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,4], EPI_DMRs[i,5])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")
#
# boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")
#
# boxplot(t(cbind(EPI_DMRs[i,10], EPI_DMRs[i,11], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")
#
# axis(side = 1, at = 2, labels = "Low")
# axis(side = 1, at = 3, labels = "Super Low")
#
# title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))
#
#
# }
pdf(file = "threex-DMR-Box-Plots.pdf")
for(i in 1:nrow(treatment.means)) {
boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,10], EPI_DMRs[i,11])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")
boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")
boxplot(t(cbind(EPI_DMRs[i,4], EPI_DMRs[i,5], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")
axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")
title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))
}
dev.off()
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
---
title: "Day 135 cutoff differences."
output: html_notebook
---

I'm looking at differences in DMRs when we change the coverage value cutoffs for the Day 135 samples. I don't think I'm going to actually run this notebook, as it's largely cut and paste code from a previous analysis. The only real changes are output file names, to avoid overwriting results and the actual cutoff values.

Cutoff values are located in lines 63 and 64 of the analysis code, that look like and are located in the all(x > 3) section. Changing the numeric value changes the cutoff. 



```{r}

meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]

```


The thing you came here for! The different number of DMRs found at each cutoff. I'll upload the graphs to scaphapoda also

| Coverage Cutoff | # of DMRs For Day 135 Samples |
|-----------------|-------------------------------|
| 3x              | 709                           |
| 5x              | 304                           |
| 7x              | 136                           |
| 10x             | 63                            |

  
  
  
  


Let the cutting and pasting begin! I'll go 7x, 5x, and 3x in order.

***
7x Coverage Code  
---
  

```{r}
setwd("~/Documents/Day135/Day135Cutoff/")

samples.vec <- c("EPI-151", "EPI-152", "EPI-153", "EPI-154", "EPI-159", "EPI-160", "EPI-161", "EPI-162", "EPI-167", "EPI-168", "EPI-169", "EPI-170")

```


```{r}

library(readr)
setwd("~/Documents/Day135/Day135Cutoff/CGoutput")

data <- list.files(path = ".", pattern = "*.output")
print(data)

db <- read_delim("~/Documents/Day135/Day135Cutoff/CGoutput/EPI-151_CG.output", "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")

db <- as.data.table(db)

db$EPI103Meth <- rowSums(cbind(db[,4], db[,7]), na.rm = TRUE)
db$EPI103TotCov <- rowSums(cbind(db[,5], db[,8]), na.rm = TRUE)
meth.db <- db[,c(1,2,3,10)]
totcov.db <- db[,c(1,2,3,11)]

for(i in 2:length(data))   {
  
  temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/CGoutput/", data[i]), "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
  temp <- as.data.table(temp)
  temp$Meth <- rowSums(cbind(temp[,4], temp[,7]), na.rm = TRUE)
  temp$TotCov <- rowSums(cbind(temp[,5], temp[,8]), na.rm = TRUE)

  meth.db <- merge(meth.db, temp[,c(1,2,3,10)], by = names(meth.db[,c(1,2,3)]), all = TRUE)
  colnames(meth.db)[ncol(meth.db)] <- paste0(substr(data[i], 1, 7), "Meth")
  meth.db <- as.data.table(meth.db)
  
  totcov.db <- merge(totcov.db, temp[,c(1,2,3,11)], by = names(totcov.db[,c(1,2,3)]), all = TRUE)
  colnames(totcov.db)[ncol(totcov.db)] <- paste0(substr(data[i], 1, 7), "TotCov")
  totcov.db <- as.data.table(totcov.db)
}



```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau")
meth.db2 <- meth.db[complete.cases(meth.db[,c(4:15)]),]

totcov.db2 <- totcov.db[complete.cases(totcov.db[,c(4:15)]),]

meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 7)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 7)), ]

meth.db4 <- as.data.table(cbind(paste0(meth.db3$`#CHROM`, "-", meth.db3$POS), meth.db3[,4:ncol(meth.db3)]))
colnames(meth.db4)[1] <- "Site"
totcov.db4 <- as.data.table(cbind(paste0(totcov.db3$`#CHROM`, "-", totcov.db3$POS), totcov.db3[,4:ncol(totcov.db3)]))
colnames(totcov.db4)[1] <- "Site"

totcov.db5 <- totcov.db4[totcov.db4$Site %in% meth.db4$Site,]

nrow(meth.db4)
nrow(totcov.db5)
length(totcov.db5$Site %in% meth.db4$Site)

write.table(meth.db4, file = "sevenx.macau.meth.txt", sep = " ", quote = FALSE, row.names = FALSE)
write.table(totcov.db5, file = "sevenx.macau.totalcov.txt", sep = " ", quote = FALSE, row.names = FALSE)

system("head macau.meth.txt")
system("head macau.totalcov.txt")
```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau/")
system("/home/shared/macau/macau -g sevenx.macau.meth.csv -t sevenx.macau.totalcov.csv -p Predictor.csv -k day135-relate-finished.csv -o sevenx")

```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau/")
result_assoc <- read_delim("~/Documents/Day135/Day135Cutoff/macau/output/sevenx.assoc.txt", "\t", escape_double = FALSE, trim_ws = TRUE)

sig.results <- result_assoc[result_assoc$pvalue <= 0.05,]

 for(i in 1:nrow(sig.results))   {
   temp <- strsplit(sig.results$id[i], "-") 
   sig.results[i,13] <- temp[[1]][1]
   sig.results[i,14] <- temp[[1]][2]
   
 }
 
 colnames(sig.results)[c(13,14)] <- c("Scaffold", "Loc")
 
 bed.test <- as.data.frame(cbind(sig.results[,13], (sig.results[,14]), (sig.results[,14]), sig.results$beta))
 colnames(bed.test)[4] <- "beta"
 write.table(bed.test, file = "sevenx.DiffMethRegions.bed", sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)

```

```{r}
 setwd("~/Documents/Day135/Day135Cutoff/")
 
 list.files(path = ".")
 

```

```{r}
 library(data.table)
 library(readr)
 DiffMethRegions <- read_delim("~/Documents/Day135/Day135Cutoff/macau/sevenx.DiffMethRegions.bed", 
     "\t", escape_double = FALSE, col_names = FALSE, 
     trim_ws = TRUE)
 
 DiffMethRegions$Loc <- paste0(DiffMethRegions$X1, "-", DiffMethRegions$X2)
 
 head(DiffMethRegions)
 
 sample.files <- list.files(path = "~/Documents/Day135/Day135Cutoff/", pattern = "*.bedgraph")
 
 print(sample.files)

```

```{r}

 EPI_103_percmeth_bedgraph <- read_delim("~/Documents/Day135/Day135Cutoff/EPI-151-percmeth.bedgraph", 
     "\t", escape_double = FALSE, col_names = FALSE, 
     col_types = cols(X4 = col_double()), 
     trim_ws = TRUE)
 
 EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)
 
 head(EPI_103_percmeth_bedgraph)
 
 print(EPI_103_percmeth_bedgraph[7,])

```

```{r}

EPI_103_DMRs <- EPI_103_percmeth_bedgraph[EPI_103_percmeth_bedgraph$X1 %in% DiffMethRegions$X1,]


EPI_103_DMRs$Loc <- paste0(EPI_103_DMRs$X1, "-", EPI_103_DMRs$X2)


head(EPI_103_DMRs)

EPI_103_DMRs <- EPI_103_DMRs[EPI_103_DMRs$Loc %in% DiffMethRegions$Loc,]

head(EPI_103_DMRs)

EPI_DMRs <- as.data.table(cbind(EPI_103_DMRs$Loc, EPI_103_DMRs$X4))



colnames(EPI_DMRs)[1] <- "DMRLoc"
colnames(EPI_DMRs)[2] <- "EPI_151"
head(EPI_DMRs)

```

```{r}

for(i in 2:length(sample.files))   {

  temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/", sample.files[i]),
    "\t", escape_double = FALSE, col_names = FALSE,
    col_types = cols(X4 = col_double()),
    trim_ws = TRUE)

  temp <- as.data.table(temp)

  temp_DMRs <- temp[temp$X1 %in% DiffMethRegions$X1,]


  temp_DMRs$Loc <- paste0(temp_DMRs$X1, "-", temp_DMRs$X2)


  temp_DMRs <- temp_DMRs[temp_DMRs$Loc %in% DiffMethRegions$Loc,]

  EPI_DMRs$temp <- temp_DMRs$X4

  colnames(EPI_DMRs)[ncol(EPI_DMRs)] <- substr(sample.files[i], 1, 7)


}

head(EPI_DMRs)

## This is here because for osme reason EPI_103 gets brught in as characters as opposed to doubles. Need to figure out why, but this fixes it for now
EPI_DMRs$EPI_151 <- as.numeric(EPI_DMRs$EPI_151)

```

```{r}

treatment.means <- as.data.frame(EPI_DMRs$DMRLoc)

treatment.means$Ambient <- apply(cbind(EPI_DMRs$`EPI-151`, EPI_DMRs$`EPI-152`, EPI_DMRs$`EPI-153`, EPI_DMRs$`EPI-154`) ,MARGIN = 1, FUN = mean)

treatment.means$Low <- apply(cbind(EPI_DMRs$`EPI-159`, EPI_DMRs$`EPI-160`, EPI_DMRs$`EPI-161`, EPI_DMRs$`EPI-162`) ,MARGIN = 1, FUN = mean)

treatment.means$SuperLow <- apply(cbind(EPI_DMRs$`EPI-167`, EPI_DMRs$`EPI-168`, EPI_DMRs$`EPI-169`, EPI_DMRs$`EPI-170`) ,MARGIN = 1, FUN = mean)

head(treatment.means)
```

```{r}



 for(i in 1:nrow(treatment.means))   {
   

  boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,4], EPI_DMRs[i,5])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")

boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")

boxplot(t(cbind(EPI_DMRs[i,10], EPI_DMRs[i,11], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")

axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")

title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))


}


pdf(file = "sevenx-DMR-Box-Plots.pdf")

for(i in 1:nrow(treatment.means))   {


  boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,10], EPI_DMRs[i,11])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")

boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")

boxplot(t(cbind(EPI_DMRs[i,4], EPI_DMRs[i,5], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")

axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")

title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))


}

dev.off()

```


***
5x Coverage Code  
---

```{r}
setwd("~/Documents/Day135/Day135Cutoff/")

samples.vec <- c("EPI-151", "EPI-152", "EPI-153", "EPI-154", "EPI-159", "EPI-160", "EPI-161", "EPI-162", "EPI-167", "EPI-168", "EPI-169", "EPI-170")

```


```{r}

library(readr)
setwd("~/Documents/Day135/Day135Cutoff/CGoutput")

data <- list.files(path = ".", pattern = "*.output")
print(data)

db <- read_delim("~/Documents/Day135/Day135Cutoff/CGoutput/EPI-151_CG.output", "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")

db <- as.data.table(db)

db$EPI103Meth <- rowSums(cbind(db[,4], db[,7]), na.rm = TRUE)
db$EPI103TotCov <- rowSums(cbind(db[,5], db[,8]), na.rm = TRUE)
meth.db <- db[,c(1,2,3,10)]
totcov.db <- db[,c(1,2,3,11)]

for(i in 2:length(data))   {
  
  temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/CGoutput/", data[i]), "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
  temp <- as.data.table(temp)
  temp$Meth <- rowSums(cbind(temp[,4], temp[,7]), na.rm = TRUE)
  temp$TotCov <- rowSums(cbind(temp[,5], temp[,8]), na.rm = TRUE)

  meth.db <- merge(meth.db, temp[,c(1,2,3,10)], by = names(meth.db[,c(1,2,3)]), all = TRUE)
  colnames(meth.db)[ncol(meth.db)] <- paste0(substr(data[i], 1, 7), "Meth")
  meth.db <- as.data.table(meth.db)
  
  totcov.db <- merge(totcov.db, temp[,c(1,2,3,11)], by = names(totcov.db[,c(1,2,3)]), all = TRUE)
  colnames(totcov.db)[ncol(totcov.db)] <- paste0(substr(data[i], 1, 7), "TotCov")
  totcov.db <- as.data.table(totcov.db)
}



```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau")
meth.db2 <- meth.db[complete.cases(meth.db[,c(4:15)]),]

totcov.db2 <- totcov.db[complete.cases(totcov.db[,c(4:15)]),]

meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 5)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 5)), ]

meth.db4 <- as.data.table(cbind(paste0(meth.db3$`#CHROM`, "-", meth.db3$POS), meth.db3[,4:ncol(meth.db3)]))
colnames(meth.db4)[1] <- "Site"
totcov.db4 <- as.data.table(cbind(paste0(totcov.db3$`#CHROM`, "-", totcov.db3$POS), totcov.db3[,4:ncol(totcov.db3)]))
colnames(totcov.db4)[1] <- "Site"

totcov.db5 <- totcov.db4[totcov.db4$Site %in% meth.db4$Site,]

nrow(meth.db4)
nrow(totcov.db5)
length(totcov.db5$Site %in% meth.db4$Site)

write.table(meth.db4, file = "fivex.macau.meth.txt", sep = " ", quote = FALSE, row.names = FALSE)
write.table(totcov.db5, file = "fivex.macau.totalcov.txt", sep = " ", quote = FALSE, row.names = FALSE)

system("head fivex.macau.meth.txt")
system("head fivex.macau.totalcov.txt")
```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau/")
system("/home/shared/macau/macau -g fivex.macau.meth.txt -t fivex.macau.totalcov.txt -p Predictor.csv -k day135-relate-finished.csv -o threex")

```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau/")
result_assoc <- read_delim("~/Documents/Day135/Day135Cutoff/macau/output/sevenx.assoc.txt", "\t", escape_double = FALSE, trim_ws = TRUE)

sig.results <- result_assoc[result_assoc$pvalue <= 0.05,]

 for(i in 1:nrow(sig.results))   {
   temp <- strsplit(sig.results$id[i], "-") 
   sig.results[i,13] <- temp[[1]][1]
   sig.results[i,14] <- temp[[1]][2]
   
 }
 
 colnames(sig.results)[c(13,14)] <- c("Scaffold", "Loc")
 
 bed.test <- as.data.frame(cbind(sig.results[,13], (sig.results[,14]), (sig.results[,14]), sig.results$beta))
 colnames(bed.test)[4] <- "beta"
 write.table(bed.test, file = "fivex.DiffMethRegions.bed", sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)

```

```{r}
 setwd("~/Documents/Day135/Day135Cutoff/")
 
 list.files(path = ".")
 

```

```{r}
 library(data.table)
 library(readr)
 DiffMethRegions <- read_delim("~/Documents/Day135/Day135Cutoff/macau/fivex.DiffMethRegions.bed", 
     "\t", escape_double = FALSE, col_names = FALSE, 
     trim_ws = TRUE)
 
 DiffMethRegions$Loc <- paste0(DiffMethRegions$X1, "-", DiffMethRegions$X2)
 
 head(DiffMethRegions)
 
 sample.files <- list.files(path = "~/Documents/Day135/Day135Cutoff/", pattern = "*.bedgraph")
 
 print(sample.files)

```

```{r}

 EPI_103_percmeth_bedgraph <- read_delim("~/Documents/Day135/Day135Cutoff/EPI-151-percmeth.bedgraph", 
     "\t", escape_double = FALSE, col_names = FALSE, 
     col_types = cols(X4 = col_double()), 
     trim_ws = TRUE)
 
 EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)
 
 head(EPI_103_percmeth_bedgraph)
 
 print(EPI_103_percmeth_bedgraph[7,])

```

```{r}

EPI_103_DMRs <- EPI_103_percmeth_bedgraph[EPI_103_percmeth_bedgraph$X1 %in% DiffMethRegions$X1,]


EPI_103_DMRs$Loc <- paste0(EPI_103_DMRs$X1, "-", EPI_103_DMRs$X2)


head(EPI_103_DMRs)

EPI_103_DMRs <- EPI_103_DMRs[EPI_103_DMRs$Loc %in% DiffMethRegions$Loc,]

head(EPI_103_DMRs)

EPI_DMRs <- as.data.table(cbind(EPI_103_DMRs$Loc, EPI_103_DMRs$X4))



colnames(EPI_DMRs)[1] <- "DMRLoc"
colnames(EPI_DMRs)[2] <- "EPI_151"
head(EPI_DMRs)

```

```{r}

for(i in 2:length(sample.files))   {

  temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/", sample.files[i]),
    "\t", escape_double = FALSE, col_names = FALSE,
    col_types = cols(X4 = col_double()),
    trim_ws = TRUE)

  temp <- as.data.table(temp)

  temp_DMRs <- temp[temp$X1 %in% DiffMethRegions$X1,]


  temp_DMRs$Loc <- paste0(temp_DMRs$X1, "-", temp_DMRs$X2)


  temp_DMRs <- temp_DMRs[temp_DMRs$Loc %in% DiffMethRegions$Loc,]

  EPI_DMRs$temp <- temp_DMRs$X4

  colnames(EPI_DMRs)[ncol(EPI_DMRs)] <- substr(sample.files[i], 1, 7)


}

head(EPI_DMRs)

## This is here because for osme reason EPI_103 gets brught in as characters as opposed to doubles. Need to figure out why, but this fixes it for now
EPI_DMRs$EPI_151 <- as.numeric(EPI_DMRs$EPI_151)

```

```{r}

treatment.means <- as.data.frame(EPI_DMRs$DMRLoc)

treatment.means$Ambient <- apply(cbind(EPI_DMRs$`EPI-151`, EPI_DMRs$`EPI-152`, EPI_DMRs$`EPI-153`, EPI_DMRs$`EPI-154`) ,MARGIN = 1, FUN = mean)

treatment.means$Low <- apply(cbind(EPI_DMRs$`EPI-159`, EPI_DMRs$`EPI-160`, EPI_DMRs$`EPI-161`, EPI_DMRs$`EPI-162`) ,MARGIN = 1, FUN = mean)

treatment.means$SuperLow <- apply(cbind(EPI_DMRs$`EPI-167`, EPI_DMRs$`EPI-168`, EPI_DMRs$`EPI-169`, EPI_DMRs$`EPI-170`) ,MARGIN = 1, FUN = mean)

head(treatment.means)
```

```{r}



 for(i in 1:nrow(treatment.means))   {
   

  boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,4], EPI_DMRs[i,5])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")

boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")

boxplot(t(cbind(EPI_DMRs[i,10], EPI_DMRs[i,11], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")

axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")

title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))


}


pdf(file = "fivex-DMR-Box-Plots.pdf")

for(i in 1:nrow(treatment.means))   {


  boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,10], EPI_DMRs[i,11])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")

boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")

boxplot(t(cbind(EPI_DMRs[i,4], EPI_DMRs[i,5], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")

axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")

title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))


}

dev.off()

```

***
3x Coverage Code  
---

```{r}
setwd("~/Documents/Day135/Day135Cutoff/")

samples.vec <- c("EPI-151", "EPI-152", "EPI-153", "EPI-154", "EPI-159", "EPI-160", "EPI-161", "EPI-162", "EPI-167", "EPI-168", "EPI-169", "EPI-170")

```


```{r}

library(readr)
setwd("~/Documents/Day135/Day135Cutoff/CGoutput")

data <- list.files(path = ".", pattern = "*.output")
print(data)

db <- read_delim("~/Documents/Day135/Day135Cutoff/CGoutput/EPI-151_CG.output", "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")

db <- as.data.table(db)

db$EPI103Meth <- rowSums(cbind(db[,4], db[,7]), na.rm = TRUE)
db$EPI103TotCov <- rowSums(cbind(db[,5], db[,8]), na.rm = TRUE)
meth.db <- db[,c(1,2,3,10)]
totcov.db <- db[,c(1,2,3,11)]

for(i in 2:length(data))   {
  
  temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/CGoutput/", data[i]), "\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
  temp <- as.data.table(temp)
  temp$Meth <- rowSums(cbind(temp[,4], temp[,7]), na.rm = TRUE)
  temp$TotCov <- rowSums(cbind(temp[,5], temp[,8]), na.rm = TRUE)

  meth.db <- merge(meth.db, temp[,c(1,2,3,10)], by = names(meth.db[,c(1,2,3)]), all = TRUE)
  colnames(meth.db)[ncol(meth.db)] <- paste0(substr(data[i], 1, 7), "Meth")
  meth.db <- as.data.table(meth.db)
  
  totcov.db <- merge(totcov.db, temp[,c(1,2,3,11)], by = names(totcov.db[,c(1,2,3)]), all = TRUE)
  colnames(totcov.db)[ncol(totcov.db)] <- paste0(substr(data[i], 1, 7), "TotCov")
  totcov.db <- as.data.table(totcov.db)
}



```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau")
meth.db2 <- meth.db[complete.cases(meth.db[,c(4:15)]),]

totcov.db2 <- totcov.db[complete.cases(totcov.db[,c(4:15)]),]

meth.db3 <- meth.db2[apply(meth.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]
totcov.db3 <- totcov.db2[apply(totcov.db2[, c(4:15)], MARGIN = 1, function(x) all(x > 3)), ]

meth.db4 <- as.data.table(cbind(paste0(meth.db3$`#CHROM`, "-", meth.db3$POS), meth.db3[,4:ncol(meth.db3)]))
colnames(meth.db4)[1] <- "Site"
totcov.db4 <- as.data.table(cbind(paste0(totcov.db3$`#CHROM`, "-", totcov.db3$POS), totcov.db3[,4:ncol(totcov.db3)]))
colnames(totcov.db4)[1] <- "Site"

totcov.db5 <- totcov.db4[totcov.db4$Site %in% meth.db4$Site,]

nrow(meth.db4)
nrow(totcov.db5)
length(totcov.db5$Site %in% meth.db4$Site)

write.table(meth.db4, file = "threex.macau.meth.txt", sep = " ", quote = FALSE, row.names = FALSE)
write.table(totcov.db5, file = "threex.macau.totalcov.txt", sep = " ", quote = FALSE, row.names = FALSE)

system("head threex.macau.meth.txt")
system("head threex.macau.totalcov.txt")
```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau/")
system("/home/shared/macau/macau -g threex.macau.meth.txt -t threex.macau.totalcov.txt -p Predictor.csv -k day135-relate-finished.csv -o threex")

```

```{r}
setwd("~/Documents/Day135/Day135Cutoff/macau/")

threex_assoc <- read_delim("~/Documents/Day135/Day135Cutoff/macau/output/threex.assoc.txt", 
    "\t", escape_double = FALSE, trim_ws = TRUE)

sig.results <- threex_assoc[threex_assoc$pvalue <= 0.05,]

 for(i in 1:nrow(sig.results))   {
   temp <- strsplit(sig.results$id[i], "-") 
   sig.results[i,13] <- temp[[1]][1]
   sig.results[i,14] <- temp[[1]][2]
   
 }
 
 colnames(sig.results)[c(13,14)] <- c("Scaffold", "Loc")
 
 bed.test <- as.data.frame(cbind(sig.results[,13], (sig.results[,14]), (sig.results[,14]), sig.results$beta))
 colnames(bed.test)[4] <- "beta"
 write.table(bed.test, file = "threex.DiffMethRegions.bed", sep = ",", quote = FALSE, col.names = FALSE, row.names = FALSE)

```


```{r}
 
setwd("~/Documents/Day135/Day135Cutoff/")
 
 list.files(path = ".")
 
```

```{r}
 library(data.table)
 library(readr)
 DiffMethRegions <- read_delim("~/Documents/Day135/Day135Cutoff/threex.DiffMethRegions.bed", 
     ",", escape_double = FALSE, col_names = FALSE, 
     trim_ws = TRUE)
 
 DiffMethRegions$Loc <- paste0(DiffMethRegions$X1, "-", DiffMethRegions$X2)
 
 head(DiffMethRegions)
 
 sample.files <- list.files(path = "~/Documents/Day135/Day135Cutoff/", pattern = "*.bedgraph")
 
 print(sample.files)

```

```{r}

 EPI_103_percmeth_bedgraph <- read_delim("~/Documents/Day135/Day135Cutoff/EPI-151-percmeth.bedgraph", 
     "\t", escape_double = FALSE, col_names = FALSE, 
     col_types = cols(X4 = col_double()), 
     trim_ws = TRUE)
 
 EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)
 
 head(EPI_103_percmeth_bedgraph)
 
 print(EPI_103_percmeth_bedgraph[7,])

```

```{r}

EPI_103_DMRs <- EPI_103_percmeth_bedgraph[EPI_103_percmeth_bedgraph$X1 %in% DiffMethRegions$X1,]


EPI_103_DMRs$Loc <- paste0(EPI_103_DMRs$X1, "-", EPI_103_DMRs$X2)


head(EPI_103_DMRs)

EPI_103_DMRs <- EPI_103_DMRs[EPI_103_DMRs$Loc %in% DiffMethRegions$Loc,]

head(EPI_103_DMRs)

EPI_DMRs <- as.data.table(cbind(EPI_103_DMRs$Loc, EPI_103_DMRs$X4))



colnames(EPI_DMRs)[1] <- "DMRLoc"
colnames(EPI_DMRs)[2] <- "EPI_151"
head(EPI_DMRs)

```

```{r}

for(i in 2:length(sample.files))   {

  temp <- read_delim(paste0("~/Documents/Day135/Day135Cutoff/", sample.files[i]),
    "\t", escape_double = FALSE, col_names = FALSE,
    col_types = cols(X4 = col_double()),
    trim_ws = TRUE)

  temp <- as.data.table(temp)

  temp_DMRs <- temp[temp$X1 %in% DiffMethRegions$X1,]


  temp_DMRs$Loc <- paste0(temp_DMRs$X1, "-", temp_DMRs$X2)


  temp_DMRs <- temp_DMRs[temp_DMRs$Loc %in% DiffMethRegions$Loc,]

  EPI_DMRs$temp <- temp_DMRs$X4

  colnames(EPI_DMRs)[ncol(EPI_DMRs)] <- substr(sample.files[i], 1, 7)


}

head(EPI_DMRs)

## This is here because for osme reason EPI_103 gets brught in as characters as opposed to doubles. Need to figure out why, but this fixes it for now
EPI_DMRs$EPI_151 <- as.numeric(EPI_DMRs$EPI_151)

```

```{r}

treatment.means <- as.data.frame(EPI_DMRs$DMRLoc)

treatment.means$Ambient <- apply(cbind(EPI_DMRs$`EPI-151`, EPI_DMRs$`EPI-152`, EPI_DMRs$`EPI-153`, EPI_DMRs$`EPI-154`) ,MARGIN = 1, FUN = mean)

treatment.means$Low <- apply(cbind(EPI_DMRs$`EPI-159`, EPI_DMRs$`EPI-160`, EPI_DMRs$`EPI-161`, EPI_DMRs$`EPI-162`) ,MARGIN = 1, FUN = mean)

treatment.means$SuperLow <- apply(cbind(EPI_DMRs$`EPI-167`, EPI_DMRs$`EPI-168`, EPI_DMRs$`EPI-169`, EPI_DMRs$`EPI-170`) ,MARGIN = 1, FUN = mean)

head(treatment.means)

```

```{r}



#  for(i in 1:nrow(treatment.means))   {
#    
# 
#   boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,4], EPI_DMRs[i,5])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")
# 
# boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")
# 
# boxplot(t(cbind(EPI_DMRs[i,10], EPI_DMRs[i,11], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")
# 
# axis(side = 1, at = 2, labels = "Low")
# axis(side = 1, at = 3, labels = "Super Low")
# 
# title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))
# 
# 
# }


pdf(file = "threex-DMR-Box-Plots.pdf")

for(i in 1:nrow(treatment.means))   {


  boxplot(t(cbind(EPI_DMRs[i,6], EPI_DMRs[i,7], EPI_DMRs[i,10], EPI_DMRs[i,11])),xlim = c(0.5, 3.5), ylim = c(min(EPI_DMRs[i,2:13]), max(EPI_DMRs[i,2:13])), boxfill = "blue", names = "Ambient", show.names = TRUE, ylab = "Proportion of reads methylated")

boxplot(t(cbind(EPI_DMRs[i,2], EPI_DMRs[i,3], EPI_DMRs[i,8], EPI_DMRs[i,9])), xaxt = "n", add = TRUE, at = 2, boxfill = "green")

boxplot(t(cbind(EPI_DMRs[i,4], EPI_DMRs[i,5], EPI_DMRs[i,12], EPI_DMRs[i,13])), xaxt = "n", add = TRUE, at = 3, boxfill = "red")

axis(side = 1, at = 2, labels = "Low")
axis(side = 1, at = 3, labels = "Super Low")

title(paste0("Proportion of Methylated reads for DMR \n located at ", EPI_DMRs[i,1]))


}

dev.off()

```


Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).
