I needed to make some bedgraphs for Steven, and since the code I’d used previously was buried in a larger section, I decided to break it out into it’s own little script.
Housekeeping stuff, loading needed packages, setting working directory, and getting all of the CG output files from Bismark into a vector to iterate over.
library(readr)
library(data.table)
setwd("~/Documents/Genewiz_hdd/Day10/CGoutput/")
files <- list.files(".", pattern = "*.output")
print(files)
Loops over the file names reading in the data, removing all rows that contain an NA, saving location data (I subtract 1 because IGV is apparently 0 indexed, so when you don’t, everything is shifted one to the right), combining the two strand data for methylation and total coverage, and then setting a 5x coverage requirement to be included.
I then save the file as a bedgraph, and use sed -i to append the minimum header required to count as a bedgraph type=bedGraph.
for(i in 1:length(files)) {
temp <- read_delim(paste0("~/Documents/Genewiz_hdd/Day10/CGoutput/", files[i]),
"\t", escape_double = FALSE, trim_ws = TRUE, na = ".")
temp <- na.omit(temp)
temp.2 <- as.data.table(cbind(temp$`#CHROM`))
temp.2$sloc <-temp$POS - 1
temp.2$eloc <- temp$POS - 1
temp.2$meth <- temp$`Watson METH` + temp$`Crick METH`
temp.2$totcov <- temp$`Watson COVERAGE` + temp$`Crick COVERAGE`
temp.3 <- temp.2[temp.2$totcov > 5,]
temp.final <- as.data.table(cbind(temp.3$V1, temp.3$sloc, temp.3$eloc, (temp.3$meth / temp.3$totcov)))
filename <- paste0(substr(files[i], 1, 7), "_percmeth.bedgraph")
write.table(temp.final, file = filename, quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE)
system(paste0("sed -i 'itype=bedGraph' ", filename))
}
LS0tCnRpdGxlOiAiR2VuZXJhdGluZyBEYXkgMTAgZ2Vub21pYyBmZWF0dXJlIGZpbGVzLiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKSSBuZWVkZWQgdG8gbWFrZSBzb21lIGJlZGdyYXBocyBmb3IgU3RldmVuLCBhbmQgc2luY2UgdGhlIGNvZGUgSSdkIHVzZWQgcHJldmlvdXNseSB3YXMgYnVyaWVkIGluIGEgbGFyZ2VyIHNlY3Rpb24sIEkgZGVjaWRlZCB0byBicmVhayBpdCBvdXQgaW50byBpdCdzIG93biBsaXR0bGUgc2NyaXB0LiAKCkhvdXNla2VlcGluZyBzdHVmZiwgbG9hZGluZyBuZWVkZWQgcGFja2FnZXMsIHNldHRpbmcgd29ya2luZyBkaXJlY3RvcnksIGFuZCBnZXR0aW5nIGFsbCBvZiB0aGUgQ0cgb3V0cHV0IGZpbGVzIGZyb20gQmlzbWFyayBpbnRvIGEgdmVjdG9yIHRvIGl0ZXJhdGUgb3Zlci4KCmBgYHtyfQoKbGlicmFyeShyZWFkcikKbGlicmFyeShkYXRhLnRhYmxlKQoKc2V0d2QoIn4vRG9jdW1lbnRzL0dlbmV3aXpfaGRkL0RheTEwL0NHb3V0cHV0LyIpCgpmaWxlcyA8LSBsaXN0LmZpbGVzKCIuIiwgcGF0dGVybiA9ICIqLm91dHB1dCIpCgpwcmludChmaWxlcykKYGBgCgpMb29wcyBvdmVyIHRoZSBmaWxlIG5hbWVzIHJlYWRpbmcgaW4gdGhlIGRhdGEsIHJlbW92aW5nIGFsbCByb3dzIHRoYXQgY29udGFpbiBhbiBOQSwgc2F2aW5nIGxvY2F0aW9uIGRhdGEgKEkgc3VidHJhY3QgMSBiZWNhdXNlIElHViBpcyBhcHBhcmVudGx5IDAgaW5kZXhlZCwgc28gd2hlbiB5b3UgZG9uJ3QsIGV2ZXJ5dGhpbmcgaXMgc2hpZnRlZCBvbmUgdG8gdGhlIHJpZ2h0KSwgY29tYmluaW5nIHRoZSB0d28gc3RyYW5kIGRhdGEgZm9yIG1ldGh5bGF0aW9uIGFuZCB0b3RhbCBjb3ZlcmFnZSwgYW5kIHRoZW4gc2V0dGluZyBhIDV4IGNvdmVyYWdlIHJlcXVpcmVtZW50IHRvIGJlIGluY2x1ZGVkLiAKCkkgdGhlbiBzYXZlIHRoZSBmaWxlIGFzIGEgYmVkZ3JhcGgsIGFuZCB1c2Ugc2VkIC1pIHRvIGFwcGVuZCB0aGUgbWluaW11bSBoZWFkZXIgcmVxdWlyZWQgdG8gY291bnQgYXMgYSBiZWRncmFwaCB0eXBlPWJlZEdyYXBoLgoKYGBge3J9Cgpmb3IoaSBpbiAxOmxlbmd0aChmaWxlcykpICAgewogIAogIHRlbXAgPC0gcmVhZF9kZWxpbShwYXN0ZTAoIn4vRG9jdW1lbnRzL0dlbmV3aXpfaGRkL0RheTEwL0NHb3V0cHV0LyIsIGZpbGVzW2ldKSwgCiAgICAiXHQiLCBlc2NhcGVfZG91YmxlID0gRkFMU0UsIHRyaW1fd3MgPSBUUlVFLCBuYSA9ICIuIikKICAKICB0ZW1wIDwtIG5hLm9taXQodGVtcCkKICAKICB0ZW1wLjIgPC0gYXMuZGF0YS50YWJsZShjYmluZCh0ZW1wJGAjQ0hST01gKSkKICB0ZW1wLjIkc2xvYyA8LXRlbXAkUE9TIC0gMQogIHRlbXAuMiRlbG9jIDwtIHRlbXAkUE9TIC0gMQogIAogIHRlbXAuMiRtZXRoIDwtIHRlbXAkYFdhdHNvbiBNRVRIYCArIHRlbXAkYENyaWNrIE1FVEhgCiAgdGVtcC4yJHRvdGNvdiA8LSB0ZW1wJGBXYXRzb24gQ09WRVJBR0VgICsgdGVtcCRgQ3JpY2sgQ09WRVJBR0VgICAKICB0ZW1wLjMgPC0gdGVtcC4yW3RlbXAuMiR0b3Rjb3YgPiA1LF0gICAgCgogIHRlbXAuZmluYWwgPC0gYXMuZGF0YS50YWJsZShjYmluZCh0ZW1wLjMkVjEsIHRlbXAuMyRzbG9jLCB0ZW1wLjMkZWxvYywgKHRlbXAuMyRtZXRoIC8gdGVtcC4zJHRvdGNvdikpKSAgCiAgCiAgCiAgZmlsZW5hbWUgPC0gcGFzdGUwKHN1YnN0cihmaWxlc1tpXSwgMSwgNyksICJfcGVyY21ldGguYmVkZ3JhcGgiKQogIAogIHdyaXRlLnRhYmxlKHRlbXAuZmluYWwsIGZpbGUgPSBmaWxlbmFtZSwgcXVvdGUgPSBGQUxTRSwgc2VwID0gIlx0IiwgY29sLm5hbWVzID0gRkFMU0UsIHJvdy5uYW1lcyA9IEZBTFNFKQogIHN5c3RlbShwYXN0ZTAoInNlZCAtaSAnaXR5cGU9YmVkR3JhcGgnICIsIGZpbGVuYW1lKSkKfQoKYGBgCg==