library(forestplot)
library(readr)
data <- read_delim("~/Documents/Roberts Lab/geoduck transcriptome/Geoduck-transcriptome_v3_bigtable_v08.tab.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
Parsed with column specification:
cols(
.default = col_character(),
length = col_integer(),
GC_content = col_double(),
`CpG_o/e` = col_double(),
male_unique_count = col_integer(),
female_unique_count = col_integer(),
Evalue = col_double(),
`evalue-Gigaton` = col_double(),
`evalue-Ruphibase` = col_double(),
`evalue-Sigenae` = col_double(),
Dheilly_Cluster = col_integer()
)
See spec(...) for full column specifications.
GOslim_bin <- read_delim("~/Documents/Roberts Lab/geoduck transcriptome/GOslim-bin.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
Parsed with column specification:
cols(
Column1 = col_character(),
GOSlim_bin = col_character()
)
data_merged <- merge(data, GOslim_bin, by.x = "ContigID", by.y = "Column1", all = FALSE)
#install.packages("mixtools")
#install.packages("mclust")
library("mixtools")
library("mclust")
data_cpg <- data_merged[,4]
data_cpg <- data_cpg[data_cpg >= 0.001 & data_cpg <= 1.5]
model <- normalmixEM(data_cpg, k = 2)
number of iterations= 325
plot(model, which = 2, col2 = c("blue", "red"), xlab2 = " ", main2 = "Panopea generosa", font.main = 3)
lines(density(data_cpg), lty=2, lwd=2)

model.mclust <- Mclust(data_cpg)
summary(model.mclust)
----------------------------------------------------
Gaussian finite mixture model fitted by EM algorithm
----------------------------------------------------
Mclust V (univariate, unequal variance) model with 2 components:
log.likelihood n df BIC ICL
2034.472 19442 5 4019.568 -154.6012
Clustering table:
1 2
14565 4877
model.mclust.1 <- Mclust(data_cpg, G = 1)
unimodal <- logLik(model.mclust.1)
bimodal <- logLik(model.mclust)
difference <- bimodal[1] - unimodal[1]
p.val <- 1- pchisq(difference, df = 3)
data_merged$GOSlim_bin <- as.factor(data_merged$GOSlim_bin)
GOSlim_types <- levels(data_merged$GOSlim_bin)
for(i in 1:length(GOSlim_types)) {
hist(data_cpg[data_merged$GOSlim_bin == GOSlim_types[i]], main = GOSlim_types[i], xlab = "CpG O/E", xlim = range(0.01, 1.5), breaks = 15)
}














LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KbGlicmFyeShmb3Jlc3RwbG90KQpsaWJyYXJ5KHJlYWRyKQoKZGF0YSA8LSByZWFkX2RlbGltKCJ+L0RvY3VtZW50cy9Sb2JlcnRzIExhYi9nZW9kdWNrIHRyYW5zY3JpcHRvbWUvR2VvZHVjay10cmFuc2NyaXB0b21lX3YzX2JpZ3RhYmxlX3YwOC50YWIudHh0IiwgIlx0IiwgZXNjYXBlX2RvdWJsZSA9IEZBTFNFLCB0cmltX3dzID0gVFJVRSkKCkdPc2xpbV9iaW4gPC0gcmVhZF9kZWxpbSgifi9Eb2N1bWVudHMvUm9iZXJ0cyBMYWIvZ2VvZHVjayB0cmFuc2NyaXB0b21lL0dPc2xpbS1iaW4udHh0IiwgCiAgICAiXHQiLCBlc2NhcGVfZG91YmxlID0gRkFMU0UsIHRyaW1fd3MgPSBUUlVFKQpgYGAKCgpgYGB7cn0KCmRhdGFfbWVyZ2VkIDwtIG1lcmdlKGRhdGEsIEdPc2xpbV9iaW4sIGJ5LnggPSAiQ29udGlnSUQiLCBieS55ID0gIkNvbHVtbjEiLCBhbGwgPSBGQUxTRSkKYGBgCgpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoIm1peHRvb2xzIikKI2luc3RhbGwucGFja2FnZXMoIm1jbHVzdCIpCmxpYnJhcnkoIm1peHRvb2xzIikKbGlicmFyeSgibWNsdXN0IikKCmRhdGFfY3BnIDwtIGRhdGFfbWVyZ2VkWyw0XQpkYXRhX2NwZyA8LSBkYXRhX2NwZ1tkYXRhX2NwZyA+PSAwLjAwMSAmIGRhdGFfY3BnIDw9IDEuNV0KCm1vZGVsIDwtIG5vcm1hbG1peEVNKGRhdGFfY3BnLCBrID0gMikKCnBsb3QobW9kZWwsIHdoaWNoID0gMiwgY29sMiA9IGMoImJsdWUiLCAicmVkIiksIHhsYWIyID0gIiAiLCBtYWluMiA9ICJQYW5vcGVhIGdlbmVyb3NhIiwgZm9udC5tYWluID0gMykKCmxpbmVzKGRlbnNpdHkoZGF0YV9jcGcpLCBsdHk9MiwgbHdkPTIpCgoKbW9kZWwubWNsdXN0IDwtIE1jbHVzdChkYXRhX2NwZykKc3VtbWFyeShtb2RlbC5tY2x1c3QpCgptb2RlbC5tY2x1c3QuMSA8LSBNY2x1c3QoZGF0YV9jcGcsIEcgPSAxKQp1bmltb2RhbCA8LSBsb2dMaWsobW9kZWwubWNsdXN0LjEpCmJpbW9kYWwgPC0gbG9nTGlrKG1vZGVsLm1jbHVzdCkKZGlmZmVyZW5jZSA8LSBiaW1vZGFsWzFdIC0gdW5pbW9kYWxbMV0KCnAudmFsIDwtIDEtIHBjaGlzcShkaWZmZXJlbmNlLCBkZiA9IDMpCmRhdGFfbWVyZ2VkJEdPU2xpbV9iaW4gPC0gYXMuZmFjdG9yKGRhdGFfbWVyZ2VkJEdPU2xpbV9iaW4pCkdPU2xpbV90eXBlcyA8LSBsZXZlbHMoZGF0YV9tZXJnZWQkR09TbGltX2JpbikKCmBgYAoKYGBge3J9Cgpmb3IoaSBpbiAxOmxlbmd0aChHT1NsaW1fdHlwZXMpKSAgIHsKICBoaXN0KGRhdGFfY3BnW2RhdGFfbWVyZ2VkJEdPU2xpbV9iaW4gPT0gR09TbGltX3R5cGVzW2ldXSwgbWFpbiA9IEdPU2xpbV90eXBlc1tpXSwgeGxhYiA9ICJDcEcgTy9FIiwgeGxpbSA9IHJhbmdlKDAuMDEsIDEuNSksIGJyZWFrcyA9IDE1KQp9CgoKYGBgCgo=