Unsupervised Dendrogram Analysis based on combinatorial chromatin states in Untreated and Resistant PRAD Patient Tumors

Christopher Terranova May 2019 based on code from Ayush Raman

Load necessary programs

library(circlize)
library(dplyr)
library(gplots)
library(ggplot2)
library(genefilter)
library(RColorBrewer)
library(dendextend)

Identification of differential chromatin state regions

# set working directory to combined matrix state file from chromStatesMatrixforClustering_particularStates.pl output
  setwd("~/Desktop/PRAD_ChromHMM_Tile/")

# set chromatin state for analysis -- can be changed for each chromatin state
  i = 8

# read in combined matrix state file from chromStatesMatrixforClustering_particularStates.pl output
  dat <- read.table(paste("CombinedMatrix-",i,"-10000bps.txt",sep =""),header = FALSE,sep = "\t",quote = "",
                    row.names = 1, na.strings = FALSE, stringsAsFactors = FALSE)
  
  colnames(dat) <- c("P02Untreated","PR7Untreated","P10Untreated","P13Untreated",
                     "P05Resistant","PR6Resistant","P08Resistant","PR14Resistant")
  head(dat)
##                          P02Untreated PR7Untreated P10Untreated P13Untreated
## chr1:1000000-1009999                0            6            1            1
## chr1:100000000-100009999            0            1            0            0
## chr1:100010000-100019999            1            2            0            1
## chr1:100020000-100029999            0            0            0            0
## chr1:100030000-100039999            0            1            0            0
## chr1:100040000-100049999            1            2            0            0
##                          P05Resistant PR6Resistant P08Resistant PR14Resistant
## chr1:1000000-1009999                2            0            0             3
## chr1:100000000-100009999            1            0            0             0
## chr1:100010000-100019999            2            0            0             1
## chr1:100020000-100029999            1            0            0             0
## chr1:100030000-100039999            1            0            0             0
## chr1:100040000-100049999            0            0            0             0
# sample type labels for plot
  mutation.type <- factor(c(rep("Untreated",4), rep("Resistant",4)))
  
# filtering low variable regions
  dat.state1 <- varFilter(as.matrix(dat), var.cutoff = 0.75)
  print(dim(dat.state1))
## [1] 7467    8
# hclust for dendrograms
  dat.state1 <- t(dat.state1)
  dat.state2 <- log2(dat.state1+1)
  dat.state2 <- scale(dat.state2)
  d <- dist(dat.state2, method = "euclidean" )
  hc <- hclust(d)

Dendrogram displaying top variable regions based on active enhancer chromatin state in Untreated and Resistant Tumors

  dhc <- as.dendrogram(hc)
  colorCodes <- c(Resistant = "red", Untreated = "blue")
  labels_colors(dhc) <- colorCodes[mutation.type][order.dendrogram(dhc)]
  dhc <- color_branches(dhc, k=2)
  par(cex=0.7,mar=c(5, 3, 2, 2))
  plot(dhc)

  par(cex=0.7)
  circlize_dendrogram(dhc)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.