Sliding Window

This creates a sliding window with a certain step size and window range.

Within each window we calculated the average methlyation across all the CpG sites in that window for each individual and correlated that average to the gene expression.

library(stats)
load("slide.RDATA")
getAggCor <- function(subset, geneExp) {
    agg1 <- aggregate(subset$per_methyl, by = list(subset$sample), FUN = "mean")
    rows <- match(agg1$Group.1, geneExp$ID)
    geneOrd <- geneExp$P_Cing[rows]
    return(cor(geneOrd, agg1$x))
}
data = read.csv("new_compiled_data.csv")
data <- data[, -1]
data <- data[order(data$sample), ]
sex <- c(rep("f", 1585), rep("m", length(1586:length(data$sample))))
data$sex <- sex
stepSize = 1
windowSize = 650
index <- vector()
ccM <- vector()
ccF <- vector()
for (i in seq(from = range(data$site)[1], to = range(data$site)[2], by = stepSize)) {
    subsetM <- data[data$site >= i & data$site < (i + windowSize) & data$sex == 
        "m", ]
    subsetF <- data[data$site >= i & data$site < (i + windowSize) & data$sex == 
        "f", ]
    if (nrow(subsetM) == 0) {
        (ccM <- c(ccM, 0))
        (ccF <- c(ccF, 0))
    } else {
        ccM <- c(ccM, getAggCor(subsetM, geneExp))
        ccF <- c(ccF, getAggCor(subsetF, geneExp))
    }
    index <- c(index, i)
}
plot(index, ccM, type = "n", ylim = c(-0.8, 0.8), xlab = "lower limit of the window", 
    ylab = "correlation coefficent of the average percentage methylation and protein abundance", 
    main = "Window size 650")
lines(index, ccM, lwd = 2, col = "blue")
lines(index, ccF, col = "red", lwd = 2)
abline(v = 3590, lty = 3, col = 8, lwd = 3)
abline(v = 4581, lty = 3, col = 8, lwd = 3)
abline(h = 0, lwd = 0.2)

plot of chunk unnamed-chunk-3