library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(gbdmr)
Load the functiotions in the R package gbdmr
source("src_samebeta_ver2.R")
## Loading required package: miscTools
##
## Attaching package: 'miscTools'
## The following object is masked from 'package:Biobase':
##
## rowMedians
## The following objects are masked from 'package:MatrixGenerics':
##
## colMedians, rowMedians
## The following objects are masked from 'package:matrixStats':
##
## colMedians, rowMedians
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
# load your data and the data is a matrix where columns are samples and rows correspond to the sites
load('beta.Rdata')
beta <- as.matrix(beta)
cor.threshold=0.5 # the threshold to define the adjacent correlation
#construct an annotation data frame
data(list="IlluminaHumanMethylation450kanno.ilmn12.hg19")
data(Locations)
data(Other)
annotation <- cbind(as.data.frame(Locations), as.data.frame(Other))
# add the annotation to the stats object
common <- intersect(rownames(beta), rownames(annotation))
length(common)
## [1] 1000
dim(beta)
## [1] 1000 296
annotation <- annotation[match(common, rownames(annotation)),]
beta <- beta[match(common, rownames(beta)),]
t.beta <- t(beta)
chr=annotation$chr
pos=annotation$pos
Indexes <- split(1:length(chr),chr)
clusterIDs=rep(NA,length(chr))
LAST=0
for (i in seq(along=Indexes)){
Index <- Indexes[[i]]
temp <- pos[Index]
Index <- Index[order(temp)]
x=colCors(t.beta[,Index[-length(Index)]],t.beta[,Index[-1]])
y <- as.numeric(x <= cor.threshold)
z <- cumsum(c(1, y))
clusterIDs[Index] <- z + LAST
LAST <- max(z) + LAST
}
#clusterIDs # which cluster the cpg belongs
sum(table(clusterIDs))
## [1] 1000
table(table(clusterIDs))
##
## 1 2 3 4 5 6 7 12
## 743 54 26 9 2 1 1 1
# Data
blocksize <- c(1, 2, 3, 4, 5, 6, 7, 12)
counts <- c(743, 54, 26, 9, 2, 1, 1, 1)
# Create a data frame
df <- data.frame(value = rep(blocksize, counts))
# Plot histogram
hist(df$value, breaks = seq(0.5, max(df$value) + 0.5, by = 1), col = "skyblue", main = "Histogram Plot", xlab = "Size", ylab = "Frequency")