Christopher Terranova May 2019

Load necessary programs

library(DiffBind)
library(ComplexHeatmap)
library(dplyr)
library(genefilter)
library(RColorBrewer)

Unsupervised Heatmap of H3K27ac genomic loci in Untreated vs Resistant Tumors

#Set working directory
setwd("/Volumes/Terranova2/PROSTATE_12-19-16/PRAD_FINAL_Analysis_June2018/PRAD_22RV1_ENZ_MENIN/04aln_downsample/")

#create a dba.object#
H3K27ac <- dba(sampleSheet = "PRAD_22RV1_H3K27ac_DiffBind_menin.csv", minOverlap = 2)

#count reads in binding site intervals: Scoring metric can be changed and does not influence differential analysis#
H3K27ac_counts_RPKM <- dba.count(H3K27ac, minOverlap = 2, score = DBA_SCORE_RPKM, fragmentSize = 200)

## from dba count object
reads.RPKM <- dba.peakset(H3K27ac_counts_RPKM, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
name <- "RPKM"

## function
subset_most_variable_rows<- function(x, center, scale, top_n){
  log2.counts<- t(scale(t(log2(x+1)),center= center,scale= scale))
  rv<- rowVars(log2.counts)
  ## select the top n most variable regions for clustering
  idx<- order(-rv)[1:top_n]
  log2.counts[idx,]
}

## Top regions
rownames(reads.RPKM) <- paste(reads.RPKM$CHR,":",reads.RPKM$START,"-",reads.RPKM$END,sep = "")
reads.RPKM.mat <- reads.RPKM[, 4:ncol(reads.RPKM)] %>% as.matrix()
ind <- which(reads.RPKM.mat <  0)
reads.RPKM.mat[ind] <- 0
hmcols <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
all.peaks <- subset_most_variable_rows(reads.RPKM.mat, TRUE, FALSE, 1000) #1000

## Features and annotation
feature.mat <- data.frame(H3K27ac_counts_RPKM$class)
Mut.subtype <- factor(unlist(feature.mat["ID",]))
annot <- data.frame(Mutation = Mut.subtype) 

## Colors and its assignment
mut.cols <- brewer.pal(7, "Set2")[4:5]
Mut.cols.assigned <- setNames(mut.cols, unique(levels(annot$Mutation)))

Unsupervised Heatmap analysis

heat.annot <- HeatmapAnnotation(df = annot, col = list(Mutation = Mut.cols.assigned),
                                show_legend = TRUE, annotation_legend_param = list(title_gp = gpar(fontsize = 14,fontface = "bold"), 
                                                                                   labels_gp = gpar(fontsize = 14, fontface = "bold")))
Heatmap(all.peaks, show_row_names = FALSE, show_column_names = TRUE, row_dend_reorder = TRUE, 
        column_dend_reorder = TRUE, clustering_distance_rows = "pearson",
        clustering_distance_columns = "euclidean", clustering_method_rows = "complete", 
        column_names_gp = gpar(fontsize = 16, fontface = "bold"), 
        clustering_method_columns = "complete", top_annotation = heat.annot,
        heatmap_legend_param = list(title = "RPKM", title_gp = gpar(fontsize = 14, fontface = "bold"), 
                                    labels_gp = gpar(fontsize = 14, fontface = "bold")))

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.