setwd("~/Documents/Bismark-Mapping-Eff/Galore")
file.names.list <- list.files("~/Documents/Bismark-Mapping-Eff/Galore/", pattern = "*.fq")
for(i in 1:length(file.names.list)) {
system(paste0("/home/shared/bsmap/BSMAP-master/bsmap -a ~/Documents/Bismark-Mapping-Eff/Galore/", file.names.list[i], " -d ~/Documents/Bismark-Mapping-Eff/Bowtie1/Ostrea_lurida-Scaff-10k.fa -o ~/Documents/Bismark-Mapping-Eff/Galore/", file.names.list[i], ".sam"))
}
Now to run everything through Methratio, the BSMap methylation extractor. I tried running a BSMap generated file through bismark methylation extractor, but it generated an error due to unexpected characters in the file.
meth.files.list <- list.files(path = "~/Documents/Bismark-Mapping-Eff/Galore/", pattern = "*.sam")
for(i in 1:length(meth.files.list)) {
system(paste0("python /home/shared/bsmap/BSMAP-master/methratio.py -u -z -d ~/Documents/Bismark-Mapping-Eff/Bowtie2/Ostrea_lurida-Scaff-10k.fa -o ~/Documents/Bismark-Mapping-Eff/Galore/methratio-out/methratio-out-", meth.files.list[i], ".txt ~/Documents/Bismark-Mapping-Eff/Galore/", meth.files.list[i]))
}
Now we move on to Methylkit, a native R package.
library(readr)
setwd("~/Documents/Bismark-Mapping-Eff/Galore/methratio-out")
file.list2 <- list.files(path = ".", pattern = "*.txt")
for(i in 1:length(file.list2)) {
temp.df <- read_delim(file.list2[i], "\t", escape_double = FALSE, trim_ws = TRUE)
temp.df2 <- temp.df[temp.df$context == 'CG', ]
temp.df2$strand <- gsub("-", "R", temp.df2$strand)
temp.df2$strand <- gsub("\\+", "F", temp.df2$strand)
write.table(temp.df2, paste0("CPG_", file.list2[i]), sep = "\t", col.names = FALSE)
}
file.list3 <- list("CPG_methratio-out-zr1394_1_trimmed.fq.sam.txt", "CPG_methratio-out-zr1394_2_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_3_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_4_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_5_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_6_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_7_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_8_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_9_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_10_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_11_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_12_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_13_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_14_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_15_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_16_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_17_trimmed.fq.sam.txt",
"CPG_methratio-out-zr1394_18_trimmed.fq.sam.txt")
sample.id <- list("hc1_2B",
"hc1_4B",
"hc2_15B",
"hc2_17",
"hc3_1",
"hc3_5",
"hc3_7",
"hc3_10",
"hc3_11",
"ss2_9B",
"ss2_14B",
"ss2_18B",
"ss3_3B",
"ss3_14B",
"ss3_15B",
"ss3_16B",
"ss3_20",
"ss3_18")
treatment <- c(rep(0, 9), rep(1,9))
setwd("~/Documents/Bismark-Mapping-Eff/Galore/methratio-out")
The working directory was changed to /home/sean/Documents/Bismark-Mapping-Eff/Galore/methratio-out inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the the working directory for notebook chunks.
testObj <- methRead(file.list3, sample.id = sample.id, treatment = treatment, assembly = "10K", mincov = 3, header = TRUE, context = "CpG", resolution = "base", pipeline = list(fraction = TRUE, chr.col = 2, start.col = 3, end.col = 3, coverage.col = 7, strand.col = 4, freqC.col = 6))
Read 88.9% of 1293128 rows
Read 1293128 rows and 13 (of 13) columns from 0.076 GB file in 00:00:03
filteredObj <- filterByCoverage(testObj, lo.count = 3,
low.perc = NULL, hi.count = NULL, hi.perc = 95)
for(i in 1:18) {
getMethylationStats(filteredObj[[i]], plot = T, both.strands = F)
}
for(i in 1:18) {
getCoverageStats(filteredObj[[i]], plot = T, both.strands = F)
}
meth <- unite(filteredObj, destrand = F)
getCorrelation(meth, plot = F)
hc1_2B hc1_4B hc2_15B hc2_17 hc3_1 hc3_5
hc1_2B 1.0000000 0.5825763 0.6082526 0.6020948 0.5676813 0.5843891
hc1_4B 0.5825763 1.0000000 0.5882878 0.5967915 0.5540952 0.5401394
hc2_15B 0.6082526 0.5882878 1.0000000 0.6163110 0.5679949 0.5552069
hc2_17 0.6020948 0.5967915 0.6163110 1.0000000 0.5600040 0.5595264
hc3_1 0.5676813 0.5540952 0.5679949 0.5600040 1.0000000 0.5338735
hc3_5 0.5843891 0.5401394 0.5552069 0.5595264 0.5338735 1.0000000
hc3_7 0.6134484 0.6036028 0.6288240 0.6274563 0.5893121 0.5565610
hc3_10 0.5985582 0.5659941 0.5877461 0.6053112 0.5365359 0.5553464
hc3_11 0.6241623 0.5930966 0.6210848 0.6174785 0.5785888 0.5752752
ss2_9B 0.5532374 0.5261933 0.5509772 0.5604499 0.5043559 0.5083168
ss2_14B 0.5799536 0.5368552 0.5781300 0.5769332 0.5196690 0.5158186
ss2_18B 0.5848756 0.5741283 0.5770714 0.5756921 0.5633135 0.5464680
ss3_3B 0.6363807 0.6080465 0.6361529 0.6462354 0.5694096 0.5696659
ss3_14B 0.6425583 0.6048014 0.6425629 0.6445563 0.5759141 0.5754698
ss3_15B 0.6295079 0.5950700 0.6242719 0.6387126 0.5547503 0.5624581
ss3_16B 0.6297294 0.5896160 0.6294621 0.6337805 0.5588463 0.5608976
ss3_20 0.5892393 0.5576405 0.5833442 0.6005111 0.5297110 0.5318637
ss3_18 0.6049379 0.5858013 0.6039849 0.6156422 0.5663073 0.5612884
hc3_7 hc3_10 hc3_11 ss2_9B ss2_14B ss2_18B
hc1_2B 0.6134484 0.5985582 0.6241623 0.5532374 0.5799536 0.5848756
hc1_4B 0.6036028 0.5659941 0.5930966 0.5261933 0.5368552 0.5741283
hc2_15B 0.6288240 0.5877461 0.6210848 0.5509772 0.5781300 0.5770714
hc2_17 0.6274563 0.6053112 0.6174785 0.5604499 0.5769332 0.5756921
hc3_1 0.5893121 0.5365359 0.5785888 0.5043559 0.5196690 0.5633135
hc3_5 0.5565610 0.5553464 0.5752752 0.5083168 0.5158186 0.5464680
hc3_7 1.0000000 0.5885631 0.6305158 0.5603417 0.5804083 0.5866063
hc3_10 0.5885631 1.0000000 0.6052017 0.5459048 0.5650789 0.5665386
hc3_11 0.6305158 0.6052017 1.0000000 0.5804804 0.5900805 0.6135835
ss2_9B 0.5603417 0.5459048 0.5804804 1.0000000 0.5929223 0.5552835
ss2_14B 0.5804083 0.5650789 0.5900805 0.5929223 1.0000000 0.5533827
ss2_18B 0.5866063 0.5665386 0.6135835 0.5552835 0.5533827 1.0000000
ss3_3B 0.6481039 0.6278208 0.6616854 0.6377490 0.6585786 0.6399737
ss3_14B 0.6495731 0.6344468 0.6644168 0.6263041 0.6633209 0.6314432
ss3_15B 0.6363063 0.6257283 0.6436521 0.6116900 0.6504572 0.5966592
ss3_16B 0.6340741 0.6224361 0.6500979 0.6227101 0.6715535 0.6087491
ss3_20 0.5950028 0.5883223 0.6102587 0.5807754 0.5940179 0.5738258
ss3_18 0.6169326 0.6001961 0.6345106 0.5883566 0.5910109 0.6307574
ss3_3B ss3_14B ss3_15B ss3_16B ss3_20 ss3_18
hc1_2B 0.6363807 0.6425583 0.6295079 0.6297294 0.5892393 0.6049379
hc1_4B 0.6080465 0.6048014 0.5950700 0.5896160 0.5576405 0.5858013
hc2_15B 0.6361529 0.6425629 0.6242719 0.6294621 0.5833442 0.6039849
hc2_17 0.6462354 0.6445563 0.6387126 0.6337805 0.6005111 0.6156422
hc3_1 0.5694096 0.5759141 0.5547503 0.5588463 0.5297110 0.5663073
hc3_5 0.5696659 0.5754698 0.5624581 0.5608976 0.5318637 0.5612884
hc3_7 0.6481039 0.6495731 0.6363063 0.6340741 0.5950028 0.6169326
hc3_10 0.6278208 0.6344468 0.6257283 0.6224361 0.5883223 0.6001961
hc3_11 0.6616854 0.6644168 0.6436521 0.6500979 0.6102587 0.6345106
ss2_9B 0.6377490 0.6263041 0.6116900 0.6227101 0.5807754 0.5883566
ss2_14B 0.6585786 0.6633209 0.6504572 0.6715535 0.5940179 0.5910109
ss2_18B 0.6399737 0.6314432 0.5966592 0.6087491 0.5738258 0.6307574
ss3_3B 1.0000000 0.7317786 0.6931595 0.7258108 0.6490109 0.6834266
ss3_14B 0.7317786 1.0000000 0.7027543 0.7339790 0.6480324 0.6742292
ss3_15B 0.6931595 0.7027543 1.0000000 0.7000359 0.6472395 0.6384625
ss3_16B 0.7258108 0.7339790 0.7000359 1.0000000 0.6436348 0.6543238
ss3_20 0.6490109 0.6480324 0.6472395 0.6436348 1.0000000 0.6095460
ss3_18 0.6834266 0.6742292 0.6384625 0.6543238 0.6095460 1.0000000
CpGDendro <- clusterSamples(meth, dist = "correlation", method = "ward", plot = T)
The "ward" method has been renamed to "ward.D"; note new "ward.D2"
PCA <- PCASamples(meth, scale = T, center = T)