Within each window we calculated the average methlyation across all the CpG sites in that window for each individual and correlated that average to the gene expression.
library(stats)
load("slide.RDATA")
getAggCor <- function(subset, geneExp) {
agg1 <- aggregate(subset$per_methyl, by = list(subset$sample), FUN = "mean")
rows <- match(agg1$Group.1, geneExp$ID)
geneOrd <- geneExp$P_Cing[rows]
return(cor(geneOrd, agg1$x))
}
data = read.csv("new_compiled_data.csv")
data <- data[, -1]
data <- data[order(data$sample), ]
sex <- c(rep("f", 1585), rep("m", length(1586:length(data$sample))))
data$sex <- sex
stepSize = 1
windowSize = 650
index <- vector()
ccM <- vector()
ccF <- vector()
for (i in seq(from = range(data$site)[1], to = range(data$site)[2], by = stepSize)) {
subsetM <- data[data$site >= i & data$site < (i + windowSize) & data$sex ==
"m", ]
subsetF <- data[data$site >= i & data$site < (i + windowSize) & data$sex ==
"f", ]
if (nrow(subsetM) == 0) {
(ccM <- c(ccM, 0))
(ccF <- c(ccF, 0))
} else {
ccM <- c(ccM, getAggCor(subsetM, geneExp))
ccF <- c(ccF, getAggCor(subsetF, geneExp))
}
index <- c(index, i)
}
plot(index, ccM, type = "n", ylim = c(-0.8, 0.8), xlab = "lower limit of the window",
ylab = "correlation coefficent of the average percentage methylation and protein abundance",
main = "Window size 650")
lines(index, ccM, lwd = 2, col = "blue")
lines(index, ccF, col = "red", lwd = 2)
abline(v = 3590, lty = 3, col = 8, lwd = 3)
abline(v = 4581, lty = 3, col = 8, lwd = 3)
abline(h = 0, lwd = 0.2)