Steven wanted to start looking at the methylome of the geoduck, so the script below is for aggregating all samples percent methylation information into a single (large) file. Since Emu is currently being strangled by PECAN, I’m doing this on my laptop with files I either had from other projects, or grabbed from Owl.

First the Day 10 stuff. Set my working directory and grab all the file names.

setwd("~/Documents/RobertsLab/DMRGraphs")
list.files(path = ".")
 [1] "DiffMethRegions.bed"           "DMR-Box-Plots.pdf"             "EPI-103-percmeth.bedgraph.txt" "EPI-104-percmeth.bedgraph.txt"
 [5] "EPI-111-percmeth.bedgraph.txt" "EPI-113-percmeth.bedgraph.txt" "EPI-119-percmeth.bedgraph.txt" "EPI-120-percmeth.bedgraph.txt"
 [9] "EPI-127-percmeth.bedgraph.txt" "EPI-128-percmeth.bedgraph.txt" "EPI-135-percmeth.bedgraph.txt" "EPI-136-percmeth.bedgraph.txt"
[13] "EPI-143-percmeth.bedgraph.txt" "EPI-145-percmeth.bedgraph.txt" "graphs.nb.html"                "graphs.Rmd"                   
[17] "rsconnect"                    

Start my fence with the first file, read it in and save it to my methCounts data table.

EPI_103_percmeth_bedgraph <- read_delim("~/Documents/RobertsLab/DMRGraphs/EPI-103-percmeth.bedgraph.txt", 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)
EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)
head(EPI_103_percmeth_bedgraph)
print(EPI_103_percmeth_bedgraph[7,])

Now I iterate over all the other files in the Day10 directory, merging them with the previous version of methCounts. I do two different merges, one that includes all loci, and one that includes only loci shared between samples. Minimum coverage to be included is 5x.


methCounts.all <- methCounts
methCounts.short <- methCounts


for(i in 2:length(sample.files))   {
  
  temp <- read_delim(paste0("~/Documents/RobertsLab/DMRGraphs/", sample.files[i]), 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)
  
  temp2 <- as.data.table(paste0(temp$X1, "-", temp$X2))
  temp2$X2 <- temp$X4
  colnames(temp2)[1] <- "loc"
  colnames(temp2)[2] <- substr(sample.files[i], 1, 7)
  
  methCounts.all <- merge(methCounts.all, temp2, by = "loc", all = TRUE)
  methCounts.short <- merge(methCounts.short, temp2, by = "loc")
}

Next I make a column of row means, for average methylation across sample animals.

Next I write the files to disk.


write.table(methCounts.all, file = "Day10-methcounts-all.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

write.table(methCounts.short, file = "Day10-methcounts-short.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

I do the same as above, but with Day 135 files.

Then Day 145 files.

Then I merge all the files into a single file and write it to disk.

---
title: "Aggregating methylation ratio by loci"
output: html_notebook
---

Steven wanted to start looking at the methylome of the geoduck, so the script below is for aggregating all samples percent methylation information into a single (large) file. Since Emu is currently being strangled by PECAN, I'm doing this on my laptop with files I either had from other projects, or grabbed from Owl.

First the Day 10 stuff. Set my working directory and grab all the file names.

```{r}
library(data.table)

setwd("~/Documents/RobertsLab/DMRGraphs")

sample.files <- list.files(path = ".", pattern = "bedgraph.txt")


```

Start my fence with the first file, read it in and save it to my methCounts data table.

```{r}

EPI_103_percmeth_bedgraph <- read_delim("~/Documents/RobertsLab/DMRGraphs/EPI-103-percmeth.bedgraph.txt", 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)

EPI_103_percmeth_bedgraph <- as.data.table(EPI_103_percmeth_bedgraph)

head(EPI_103_percmeth_bedgraph)

print(EPI_103_percmeth_bedgraph[7,])



methCounts <-  as.data.table(paste0(EPI_103_percmeth_bedgraph$X1, "-", EPI_103_percmeth_bedgraph$X2))
methCounts$EPI_103 <- EPI_103_percmeth_bedgraph$X4
colnames(methCounts)[1] <- "loc"

```



Now I iterate over all the other files in the Day10 directory, merging them with the previous version of methCounts. I do two different merges, one that includes all loci, and one that includes only loci shared between samples. Minimum coverage to be included is 5x. 

```{r}

methCounts.all <- methCounts
methCounts.short <- methCounts


for(i in 2:length(sample.files))   {
  
  temp <- read_delim(paste0("~/Documents/RobertsLab/DMRGraphs/", sample.files[i]), 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)
  
  temp2 <- as.data.table(paste0(temp$X1, "-", temp$X2))
  temp2$X2 <- temp$X4
  colnames(temp2)[1] <- "loc"
  colnames(temp2)[2] <- substr(sample.files[i], 1, 7)
  
  methCounts.all <- merge(methCounts.all, temp2, by = "loc", all = TRUE)
  methCounts.short <- merge(methCounts.short, temp2, by = "loc")
}

```

```{r}

head(methCounts.all)
head(methCounts.short)

```

Next I make a column of row means, for average methylation across sample animals.

```{r}

methCounts.all$Means <- rowMeans(methCounts.all[,2:13], na.rm = TRUE)
methCounts.short$Means <- rowMeans(methCounts.short[,2:13], na.rm = TRUE)
```

Next I write the files to disk. 

```{r}

write.table(methCounts.all, file = "Day10-methcounts-all.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

write.table(methCounts.short, file = "Day10-methcounts-short.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

```


I do the same as above, but with Day 135 files.

```{r}

setwd("~/Documents/RobertsLab/Day135")

sample.files <- list.files(path = ".", pattern = "bedgraph")

EPI_151_percmeth_bedgraph <- read_delim("~/Documents/RobertsLab/Day135/EPI-151-percmeth.bedgraph", 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)

EPI_151_percmeth_bedgraph <- as.data.table(EPI_151_percmeth_bedgraph)

head(EPI_151_percmeth_bedgraph)


Day135.methCounts <-  as.data.table(paste0(EPI_151_percmeth_bedgraph$X1, "-", EPI_151_percmeth_bedgraph$X2))
Day135.methCounts$EPI_151 <- EPI_151_percmeth_bedgraph$X4
colnames(Day135.methCounts)[1] <- "loc"

Day135.methCounts.all <- Day135.methCounts
Day135.methCounts.short <- Day135.methCounts


for(i in 2:length(sample.files))   {
  
  temp <- read_delim(paste0("~/Documents/RobertsLab/Day135/", sample.files[i]), 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)
  
  temp2 <- as.data.table(paste0(temp$X1, "-", temp$X2))
  temp2$X2 <- temp$X4
  colnames(temp2)[1] <- "loc"
  colnames(temp2)[2] <- substr(sample.files[i], 1, 7)
  
  Day135.methCounts.all <- merge(Day135.methCounts.all, temp2, by = "loc", all = TRUE)
  Day135.methCounts.short <- merge(Day135.methCounts.short, temp2, by = "loc")
}

Day135.methCounts.all$Means <- rowMeans(Day135.methCounts.all[,2:13], na.rm = TRUE)
Day135.methCounts.short$Means <- rowMeans(Day135.methCounts.short[,2:13], na.rm = TRUE)

write.table(Day135.methCounts.all, file = "Day135-methcounts-all.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

write.table(Day135.methCounts.short, file = "Day135-methcounts-short.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

```


Then Day 145 files.

```{r}

setwd("~/Documents/RobertsLab/Methylome/Day145")

sample.files <- list.files(path = ".", pattern = "bedgraph")

EPI_175_percmeth_bedgraph <- read_delim("~/Documents/RobertsLab/Methylome/Day145/EPI-175_percmeth.bedgraph", 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)

EPI_175_percmeth_bedgraph <- as.data.table(EPI_175_percmeth_bedgraph)

head(EPI_175_percmeth_bedgraph)


Day145.methCounts <-  as.data.table(paste0(EPI_175_percmeth_bedgraph$X1, "-", EPI_175_percmeth_bedgraph$X2))
Day145.methCounts$EPI_175 <- EPI_175_percmeth_bedgraph$X4
colnames(Day145.methCounts)[1] <- "loc"

Day145.methCounts.all <- Day145.methCounts
Day145.methCounts.short <- Day145.methCounts


for(i in 2:length(sample.files))   {
  
  temp <- read_delim(paste0("~/Documents/RobertsLab/Methylome/Day145/", sample.files[i]), 
    "\t", escape_double = FALSE, col_names = FALSE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)
  
  temp2 <- as.data.table(paste0(temp$X1, "-", temp$X2))
  temp2$X2 <- temp$X4
  colnames(temp2)[1] <- "loc"
  colnames(temp2)[2] <- substr(sample.files[i], 1, 7)
  
  Day145.methCounts.all <- merge(Day145.methCounts.all, temp2, by = "loc", all = TRUE)
  Day145.methCounts.short <- merge(Day145.methCounts.short, temp2, by = "loc")
}

Day145.methCounts.all$Means <- rowMeans(Day145.methCounts.all[,2:13], na.rm = TRUE)
Day145.methCounts.short$Means <- rowMeans(Day145.methCounts.short[,2:13], na.rm = TRUE)

write.table(Day145.methCounts.all, file = "Day145-methcounts-all.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

write.table(Day145.methCounts.short, file = "Day145-methcounts-short.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

```


Then I merge all the files into a single file and write it to disk. 


```{r}
setwd("~/Documents/RobertsLab/Methylome/")

Day10 <- read_delim("~/Documents/RobertsLab/Methylome/Day10-methcounts-all.csv", 
    ",", escape_double = FALSE, col_names = TRUE, 
    col_types = cols(X4 = col_double()), 
    trim_ws = TRUE)

Day135 <- read_delim("~/Documents/RobertsLab/Methylome/Day135-methcounts-all.csv", 
    ",", escape_double = FALSE, col_names = TRUE, 
    trim_ws = TRUE)

Day145 <- read_delim("~/Documents/RobertsLab/Methylome/Day145-methcounts-all.csv", 
    ",", escape_double = FALSE, col_names = TRUE, 
    trim_ws = TRUE)

Day10 <- Day10[,1:13]
Day135 <- Day135[,1:13]
Day145 <- Day145[,1:25]

Day10_135 <- merge(Day10, Day135, by = "loc", all = TRUE)
Day10_135_145 <- merge(Day10_135, Day145, by = "loc", all = TRUE)

Day10_135_145$Means <- rowMeans(Day10_135_145[,2:50], na.rm = TRUE)

write.table(Day10_135_145, file = "Day10_135_145-percent-methylation.csv", quote = FALSE, sep = ",", col.names = TRUE, row.names = FALSE)

```




```{r}



```

