So Steven noticed an interesting thing, DMRs that are significant in the 10x cutoff aren’t present in the 5x coverage. I’m thinking this is a modeling effect, but I wanted to check to make sure my data subsetting was actually correct first.
I read in the methylation count data I use in Macau for each of the cutoffs.
library(readr)
macau_meth10x <- read_delim("~/Documents/Genewiz_hdd/Day10/macau/macau.meth10x.txt", " ", escape_double = FALSE, trim_ws = TRUE)
macau_meth7x <- read_csv("~/Documents/Genewiz_hdd/Day10/macauv2/macau.meth7x.txt")
macau_meth5x <- read_csv("~/Documents/Genewiz_hdd/Day10/macauv2/macau.meth5x.txt")
macau_meth3x <- read_csv("~/Documents/Genewiz_hdd/Day10/macauv2/macau.meth3x.txt")
Then I check to see how many rows I’m dealing with, this is equivilant to the number of potential loci that meet our data cutoffs.
nrow(macau_meth10x)
[1] 1977
nrow(macau_meth7x)
[1] 4250
nrow(macau_meth5x)
[1] 8972
nrow(macau_meth3x)
[1] 23284
Then I check to see that all of my 10x coverage sites are in the 7x, 5x, and 3x sites.
The code asks what is the length of the vector returned by looking at which sites co-occur in both the 7x and 10x data sets.
This should equal the length of the smaller dataset, if they are all present in the larger data set, and it does.
nrow(macau_meth10x)
[1] 1977
length(which(macau_meth7x$Site %in% macau_meth10x$Site == TRUE))
[1] 1977
length(which(macau_meth5x$Site %in% macau_meth10x$Site == TRUE))
[1] 1977
length(which(macau_meth3x$Site %in% macau_meth10x$Site == TRUE))
[1] 1977
Now I make sure that all of the 7x are in the 5x and 3x sets, and they are.
nrow(macau_meth7x)
[1] 4250
length(which(macau_meth5x$Site %in% macau_meth7x$Site == TRUE))
[1] 4250
length(which(macau_meth3x$Site %in% macau_meth7x$Site == TRUE))
[1] 4250
And that the 5x is in the 3x set. And it is.
nrow(macau_meth5x)
[1] 8972
length(which(macau_meth3x$Site %in% macau_meth5x$Site == TRUE))
[1] 8972
Next. Lets make sure that something didn’t get wonky with the count values themselves.
test <- as.data.frame(macau_meth10x$Site)
test$EPI103_10x <- macau_meth10x$`EPI-103`
test$EPI103_5x <- macau_meth5x$EPI103Meth[which(macau_meth5x$Site %in% macau_meth10x$Site == TRUE)]
test$EPI104_10x <- macau_meth10x$`EPI-104`
test$EPI104_5x <- macau_meth5x$`EPI-104Meth`[which(macau_meth5x$Site %in% macau_meth10x$Site == TRUE)]
test$EPI111_10x <- macau_meth10x$`EPI-111`
test$EPI111_5x <- macau_meth5x$`EPI-111Meth`[which(macau_meth5x$Site %in% macau_meth10x$Site == TRUE)]
head(test)
It looks like count values also match between a subset of samples at the 10x and 5x range
What does this tell us? That it’s likely a modeling effect. Next I’ll look at the outputs from Macau for each coverage level, and see what the P-values for individual sites look like, maybe it’s just an effect of borderline close p-values getting cut off in different runs.
TenX_results <- read_delim("~/Documents/Genewiz_hdd/Day10/macau/output/result.assoc.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
SevenX_assoc <- read_delim("~/Documents/Genewiz_hdd/Day10/macauv2/output/7x.assoc.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
FiveX_assoc <- read_delim("~/Documents/Genewiz_hdd/Day10/macauv2/output/5x.assoc.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
ThreeX_assoc <- read_delim("~/Documents/Genewiz_hdd/Day10/macauv2/output/3x.assoc.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
Lets check to make sure that all of the loci deemed significant by the 10X test are still located in the 7x, 5x, and 3x files.
TenX.sig.results <- TenX_results[TenX_results$pvalue <= 0.05,]
length(which(TenX.sig.results$id %in% SevenX_assoc$id == TRUE))
[1] 41
length(which(TenX.sig.results$id %in% FiveX_assoc$id == TRUE))
[1] 41
length(which(TenX.sig.results$id %in% ThreeX_assoc$id == TRUE))
[1] 41
They are. Now lets look at p-values.
TenvFive.p <- as.data.frame(TenX.sig.results$id)
TenvFive.p$TenXp <- TenX.sig.results$pvalue
TenvFive.p$FiveXp <- FiveX_assoc$pvalue[which(TenX.sig.results$id %in% FiveX_assoc$id == TRUE)]
print(TenvFive.p)
TenvThree.p <- as.data.frame(TenX.sig.results$id)
TenvThree.p$TenXp <- TenX.sig.results$pvalue
TenvThree.p$ThreeXp <- ThreeX_assoc$pvalue[which(TenX.sig.results$id %in% ThreeX_assoc$id == TRUE)]
print(TenvThree.p)
So it looks like all of the DMR sites identified in the 10x coverage run still exist in the other runs, it’s just an effect of the model being run that removes them from the DMR list.
LS0tCnRpdGxlOiAiV2hlcmUnZCBteSBETVIncyBnbz8iCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KClNvIFN0ZXZlbiBub3RpY2VkIGFuIGludGVyZXN0aW5nIHRoaW5nLCBETVJzIHRoYXQgYXJlIHNpZ25pZmljYW50IGluIHRoZSAxMHggY3V0b2ZmIGFyZW4ndCBwcmVzZW50IGluIHRoZSA1eCBjb3ZlcmFnZS4gSSdtIHRoaW5raW5nIHRoaXMgaXMgYSBtb2RlbGluZyBlZmZlY3QsIGJ1dCBJIHdhbnRlZCB0byBjaGVjayB0byBtYWtlIHN1cmUgbXkgZGF0YSBzdWJzZXR0aW5nIHdhcyBhY3R1YWxseSBjb3JyZWN0IGZpcnN0LgoKSSByZWFkIGluIHRoZSBtZXRoeWxhdGlvbiBjb3VudCBkYXRhIEkgdXNlIGluIE1hY2F1IGZvciBlYWNoIG9mIHRoZSBjdXRvZmZzLgoKYGBge3J9CgpsaWJyYXJ5KHJlYWRyKQptYWNhdV9tZXRoMTB4IDwtIHJlYWRfZGVsaW0oIn4vRG9jdW1lbnRzL0dlbmV3aXpfaGRkL0RheTEwL21hY2F1L21hY2F1Lm1ldGgxMHgudHh0IiwgIiAiLCBlc2NhcGVfZG91YmxlID0gRkFMU0UsIHRyaW1fd3MgPSBUUlVFKQoKbWFjYXVfbWV0aDd4IDwtIHJlYWRfY3N2KCJ+L0RvY3VtZW50cy9HZW5ld2l6X2hkZC9EYXkxMC9tYWNhdXYyL21hY2F1Lm1ldGg3eC50eHQiKQoKbWFjYXVfbWV0aDV4IDwtIHJlYWRfY3N2KCJ+L0RvY3VtZW50cy9HZW5ld2l6X2hkZC9EYXkxMC9tYWNhdXYyL21hY2F1Lm1ldGg1eC50eHQiKQoKbWFjYXVfbWV0aDN4IDwtIHJlYWRfY3N2KCJ+L0RvY3VtZW50cy9HZW5ld2l6X2hkZC9EYXkxMC9tYWNhdXYyL21hY2F1Lm1ldGgzeC50eHQiKQoKYGBgCgpUaGVuIEkgY2hlY2sgdG8gc2VlIGhvdyBtYW55IHJvd3MgSSdtIGRlYWxpbmcgd2l0aCwgdGhpcyBpcyBlcXVpdmlsYW50IHRvIHRoZSBudW1iZXIgb2YgcG90ZW50aWFsIGxvY2kgdGhhdCBtZWV0IG91ciBkYXRhIGN1dG9mZnMuCgpgYGB7cn0KCm5yb3cobWFjYXVfbWV0aDEweCkKbnJvdyhtYWNhdV9tZXRoN3gpCm5yb3cobWFjYXVfbWV0aDV4KQpucm93KG1hY2F1X21ldGgzeCkKCmBgYAoKVGhlbiBJIGNoZWNrIHRvIHNlZSB0aGF0IGFsbCBvZiBteSAxMHggY292ZXJhZ2Ugc2l0ZXMgYXJlIGluIHRoZSA3eCwgNXgsIGFuZCAzeCBzaXRlcy4gCgpUaGUgY29kZSBhc2tzIHdoYXQgaXMgdGhlIGxlbmd0aCBvZiB0aGUgdmVjdG9yIHJldHVybmVkIGJ5IGxvb2tpbmcgYXQgd2hpY2ggc2l0ZXMgY28tb2NjdXIgaW4gYm90aCB0aGUgN3ggYW5kIDEweCBkYXRhIHNldHMuIAoKVGhpcyBzaG91bGQgZXF1YWwgdGhlIGxlbmd0aCBvZiB0aGUgc21hbGxlciBkYXRhc2V0LCBpZiB0aGV5IGFyZSBhbGwgcHJlc2VudCBpbiB0aGUgbGFyZ2VyIGRhdGEgc2V0LCBhbmQgaXQgZG9lcy4gCgpgYGB7cn0KbnJvdyhtYWNhdV9tZXRoMTB4KQoKbGVuZ3RoKHdoaWNoKG1hY2F1X21ldGg3eCRTaXRlICVpbiUgbWFjYXVfbWV0aDEweCRTaXRlID09IFRSVUUpKQpsZW5ndGgod2hpY2gobWFjYXVfbWV0aDV4JFNpdGUgJWluJSBtYWNhdV9tZXRoMTB4JFNpdGUgPT0gVFJVRSkpCmxlbmd0aCh3aGljaChtYWNhdV9tZXRoM3gkU2l0ZSAlaW4lIG1hY2F1X21ldGgxMHgkU2l0ZSA9PSBUUlVFKSkKCgpgYGAKCk5vdyBJIG1ha2Ugc3VyZSB0aGF0IGFsbCBvZiB0aGUgN3ggYXJlIGluIHRoZSA1eCBhbmQgM3ggc2V0cywgYW5kIHRoZXkgYXJlLgoKYGBge3J9Cm5yb3cobWFjYXVfbWV0aDd4KQoKbGVuZ3RoKHdoaWNoKG1hY2F1X21ldGg1eCRTaXRlICVpbiUgbWFjYXVfbWV0aDd4JFNpdGUgPT0gVFJVRSkpCmxlbmd0aCh3aGljaChtYWNhdV9tZXRoM3gkU2l0ZSAlaW4lIG1hY2F1X21ldGg3eCRTaXRlID09IFRSVUUpKQpgYGAKCkFuZCB0aGF0IHRoZSA1eCBpcyBpbiB0aGUgM3ggc2V0LiBBbmQgaXQgaXMuCgpgYGB7cn0KbnJvdyhtYWNhdV9tZXRoNXgpCgpsZW5ndGgod2hpY2gobWFjYXVfbWV0aDN4JFNpdGUgJWluJSBtYWNhdV9tZXRoNXgkU2l0ZSA9PSBUUlVFKSkKYGBgCgpOZXh0LiBMZXRzIG1ha2Ugc3VyZSB0aGF0IHNvbWV0aGluZyBkaWRuJ3QgZ2V0IHdvbmt5IHdpdGggdGhlIGNvdW50IHZhbHVlcyB0aGVtc2VsdmVzLgoKYGBge3J9Cgp0ZXN0IDwtIGFzLmRhdGEuZnJhbWUobWFjYXVfbWV0aDEweCRTaXRlKQoKdGVzdCRFUEkxMDNfMTB4IDwtIG1hY2F1X21ldGgxMHgkYEVQSS0xMDNgCnRlc3QkRVBJMTAzXzV4IDwtIG1hY2F1X21ldGg1eCRFUEkxMDNNZXRoW3doaWNoKG1hY2F1X21ldGg1eCRTaXRlICVpbiUgbWFjYXVfbWV0aDEweCRTaXRlID09IFRSVUUpXQoKdGVzdCRFUEkxMDRfMTB4IDwtIG1hY2F1X21ldGgxMHgkYEVQSS0xMDRgCnRlc3QkRVBJMTA0XzV4IDwtIG1hY2F1X21ldGg1eCRgRVBJLTEwNE1ldGhgW3doaWNoKG1hY2F1X21ldGg1eCRTaXRlICVpbiUgbWFjYXVfbWV0aDEweCRTaXRlID09IFRSVUUpXQoKdGVzdCRFUEkxMTFfMTB4IDwtIG1hY2F1X21ldGgxMHgkYEVQSS0xMTFgCnRlc3QkRVBJMTExXzV4IDwtIG1hY2F1X21ldGg1eCRgRVBJLTExMU1ldGhgW3doaWNoKG1hY2F1X21ldGg1eCRTaXRlICVpbiUgbWFjYXVfbWV0aDEweCRTaXRlID09IFRSVUUpXQoKCmhlYWQodGVzdCkKCmBgYAoKSXQgbG9va3MgbGlrZSBjb3VudCB2YWx1ZXMgYWxzbyBtYXRjaCBiZXR3ZWVuIGEgc3Vic2V0IG9mIHNhbXBsZXMgYXQgdGhlIDEweCBhbmQgNXggcmFuZ2UgCgoKV2hhdCBkb2VzIHRoaXMgdGVsbCB1cz8gVGhhdCBpdCdzIGxpa2VseSBhIG1vZGVsaW5nIGVmZmVjdC4gTmV4dCBJJ2xsIGxvb2sgYXQgdGhlIG91dHB1dHMgZnJvbSBNYWNhdSBmb3IgZWFjaCBjb3ZlcmFnZSBsZXZlbCwgYW5kIHNlZSB3aGF0IHRoZSBQLXZhbHVlcyBmb3IgaW5kaXZpZHVhbCBzaXRlcyBsb29rIGxpa2UsIG1heWJlIGl0J3MganVzdCBhbiBlZmZlY3Qgb2YgYm9yZGVybGluZSBjbG9zZSBwLXZhbHVlcyBnZXR0aW5nIGN1dCBvZmYgaW4gZGlmZmVyZW50IHJ1bnMuCgoKYGBge3J9CgpUZW5YX3Jlc3VsdHMgPC0gcmVhZF9kZWxpbSgifi9Eb2N1bWVudHMvR2VuZXdpel9oZGQvRGF5MTAvbWFjYXUvb3V0cHV0L3Jlc3VsdC5hc3NvYy50eHQiLCAKICAgICJcdCIsIGVzY2FwZV9kb3VibGUgPSBGQUxTRSwgdHJpbV93cyA9IFRSVUUpCgpTZXZlblhfYXNzb2MgPC0gcmVhZF9kZWxpbSgifi9Eb2N1bWVudHMvR2VuZXdpel9oZGQvRGF5MTAvbWFjYXV2Mi9vdXRwdXQvN3guYXNzb2MudHh0IiwgCiAgICAiXHQiLCBlc2NhcGVfZG91YmxlID0gRkFMU0UsIHRyaW1fd3MgPSBUUlVFKQoKRml2ZVhfYXNzb2MgPC0gcmVhZF9kZWxpbSgifi9Eb2N1bWVudHMvR2VuZXdpel9oZGQvRGF5MTAvbWFjYXV2Mi9vdXRwdXQvNXguYXNzb2MudHh0IiwgCiAgICAiXHQiLCBlc2NhcGVfZG91YmxlID0gRkFMU0UsIHRyaW1fd3MgPSBUUlVFKQoKVGhyZWVYX2Fzc29jIDwtIHJlYWRfZGVsaW0oIn4vRG9jdW1lbnRzL0dlbmV3aXpfaGRkL0RheTEwL21hY2F1djIvb3V0cHV0LzN4LmFzc29jLnR4dCIsIAogICAgIlx0IiwgZXNjYXBlX2RvdWJsZSA9IEZBTFNFLCB0cmltX3dzID0gVFJVRSkKYGBgCgpMZXRzIGNoZWNrIHRvIG1ha2Ugc3VyZSB0aGF0IGFsbCBvZiB0aGUgbG9jaSBkZWVtZWQgc2lnbmlmaWNhbnQgYnkgdGhlIDEwWCB0ZXN0IGFyZSBzdGlsbCBsb2NhdGVkIGluIHRoZSA3eCwgNXgsIGFuZCAzeCBmaWxlcy4KYGBge3J9CgpUZW5YLnNpZy5yZXN1bHRzIDwtIFRlblhfcmVzdWx0c1tUZW5YX3Jlc3VsdHMkcHZhbHVlIDw9IDAuMDUsXQoKbGVuZ3RoKHdoaWNoKFRlblguc2lnLnJlc3VsdHMkaWQgJWluJSBTZXZlblhfYXNzb2MkaWQgPT0gVFJVRSkpCmxlbmd0aCh3aGljaChUZW5YLnNpZy5yZXN1bHRzJGlkICVpbiUgRml2ZVhfYXNzb2MkaWQgPT0gVFJVRSkpCmxlbmd0aCh3aGljaChUZW5YLnNpZy5yZXN1bHRzJGlkICVpbiUgVGhyZWVYX2Fzc29jJGlkID09IFRSVUUpKQoKCmBgYAoKVGhleSBhcmUuIE5vdyBsZXRzIGxvb2sgYXQgcC12YWx1ZXMuCgpgYGB7cn0KClRlbnZTZXZlbi5wIDwtIGFzLmRhdGEuZnJhbWUoVGVuWC5zaWcucmVzdWx0cyRpZCkKClRlbnZTZXZlbi5wJFRlblhwIDwtIFRlblguc2lnLnJlc3VsdHMkcHZhbHVlCgpUZW52U2V2ZW4ucCRTZXZlblhwIDwtIFNldmVuWF9hc3NvYyRwdmFsdWVbd2hpY2goVGVuWC5zaWcucmVzdWx0cyRpZCAlaW4lIFNldmVuWF9hc3NvYyRpZCA9PSBUUlVFKV0KCnByaW50KFRlbnZTZXZlbi5wKQoKCmBgYAoKYGBge3J9CgpUZW52Rml2ZS5wIDwtIGFzLmRhdGEuZnJhbWUoVGVuWC5zaWcucmVzdWx0cyRpZCkKClRlbnZGaXZlLnAkVGVuWHAgPC0gVGVuWC5zaWcucmVzdWx0cyRwdmFsdWUKClRlbnZGaXZlLnAkRml2ZVhwIDwtIEZpdmVYX2Fzc29jJHB2YWx1ZVt3aGljaChUZW5YLnNpZy5yZXN1bHRzJGlkICVpbiUgRml2ZVhfYXNzb2MkaWQgPT0gVFJVRSldCgpwcmludChUZW52Rml2ZS5wKQoKCmBgYAoKYGBge3J9CgpUZW52VGhyZWUucCA8LSBhcy5kYXRhLmZyYW1lKFRlblguc2lnLnJlc3VsdHMkaWQpCgpUZW52VGhyZWUucCRUZW5YcCA8LSBUZW5YLnNpZy5yZXN1bHRzJHB2YWx1ZQoKVGVudlRocmVlLnAkVGhyZWVYcCA8LSBUaHJlZVhfYXNzb2MkcHZhbHVlW3doaWNoKFRlblguc2lnLnJlc3VsdHMkaWQgJWluJSBUaHJlZVhfYXNzb2MkaWQgPT0gVFJVRSldCgpwcmludChUZW52VGhyZWUucCkKCgpgYGAKClNvIGl0IGxvb2tzIGxpa2UgYWxsIG9mIHRoZSBETVIgc2l0ZXMgaWRlbnRpZmllZCBpbiB0aGUgMTB4IGNvdmVyYWdlIHJ1biBzdGlsbCBleGlzdCBpbiB0aGUgb3RoZXIgcnVucywgaXQncyBqdXN0IGFuIGVmZmVjdCBvZiB0aGUgbW9kZWwgYmVpbmcgcnVuIHRoYXQgcmVtb3ZlcyB0aGVtIGZyb20gdGhlIERNUiBsaXN0LiA=