library(circlize)
library(dplyr)
library(gplots)
library(ggplot2)
library(genefilter)
library(RColorBrewer)
library(dendextend)# set working directory to combined matrix state file from chromStatesMatrixforClustering_particularStates.pl output
setwd("~/Desktop/PRAD_ChromHMM_Tile/")
# set chromatin state for analysis -- can be changed for each chromatin state
i = 8
# read in combined matrix state file from chromStatesMatrixforClustering_particularStates.pl output
dat <- read.table(paste("CombinedMatrix-",i,"-10000bps.txt",sep =""),header = FALSE,sep = "\t",quote = "",
row.names = 1, na.strings = FALSE, stringsAsFactors = FALSE)
colnames(dat) <- c("P02Untreated","PR7Untreated","P10Untreated","P13Untreated",
"P05Resistant","PR6Resistant","P08Resistant","PR14Resistant")
head(dat)## P02Untreated PR7Untreated P10Untreated P13Untreated
## chr1:1000000-1009999 0 6 1 1
## chr1:100000000-100009999 0 1 0 0
## chr1:100010000-100019999 1 2 0 1
## chr1:100020000-100029999 0 0 0 0
## chr1:100030000-100039999 0 1 0 0
## chr1:100040000-100049999 1 2 0 0
## P05Resistant PR6Resistant P08Resistant PR14Resistant
## chr1:1000000-1009999 2 0 0 3
## chr1:100000000-100009999 1 0 0 0
## chr1:100010000-100019999 2 0 0 1
## chr1:100020000-100029999 1 0 0 0
## chr1:100030000-100039999 1 0 0 0
## chr1:100040000-100049999 0 0 0 0
# sample type labels for plot
mutation.type <- factor(c(rep("Untreated",4), rep("Resistant",4)))
# filtering low variable regions
dat.state1 <- varFilter(as.matrix(dat), var.cutoff = 0.75)
print(dim(dat.state1))## [1] 7467 8
# hclust for dendrograms
dat.state1 <- t(dat.state1)
dat.state2 <- log2(dat.state1+1)
dat.state2 <- scale(dat.state2)
d <- dist(dat.state2, method = "euclidean" )
hc <- hclust(d) dhc <- as.dendrogram(hc)
colorCodes <- c(Resistant = "red", Untreated = "blue")
labels_colors(dhc) <- colorCodes[mutation.type][order.dendrogram(dhc)]
dhc <- color_branches(dhc, k=2)
par(cex=0.7,mar=c(5, 3, 2, 2))
plot(dhc) par(cex=0.7)
circlize_dendrogram(dhc)Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.