DiffBind Analysis for Unsupervised Heatmap from whole crypts in fed vs fasted mice H4K12bhb

Christopher Terranova May 2019

Load necessary programs

library(DiffBind)
library(ComplexHeatmap)
library(dplyr)
library(genefilter)
library(RColorBrewer)

Unsupervised Heatmap analysis of H4K12bhb genomic loci in fed vs fasted mice

#set working directory and place sample.csv file in directory
setwd("/Volumes/Terranova2/KS_SMINT_FINAL_Analysis_May2018/KS-SmINT_FED_FASTED_REFED_Fastq/KS_Intestine_ChIP-seq_FINAL_BigWig_20Million_Reads_e5__02-01-18/04aln_downsample/")

#create a dba.object#

H4K12bhb <- dba(sampleSheet = "H4K12bhb_singlemark.csv", minOverlap = 2)

#count reads in binding site intervals: Scoring metric can be changed#

H4K12bhb_RPKM <- dba.count(H4K12bhb, minOverlap = 2, score = DBA_SCORE_RPKM, fragmentSize = 200)

## function
subset_most_variable_rows<- function(x, center, scale, top_n){
  log2.counts<- t(scale(t(log2(x+1)),center= center,scale= scale))
  rv<- rowVars(log2.counts)
  ## select the top n most variable regions for clustering
  idx<- order(-rv)[1:top_n]
  log2.counts[idx,]
}

## from dba count object
reads.RPKM <- dba.peakset(H4K12bhb_RPKM, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
name <- "RPKM"

## Top regions
rownames(reads.RPKM) <- paste(reads.RPKM$CHR,":",reads.RPKM$START,"-",reads.RPKM$END,sep = "")
reads.RPKM.mat <- reads.RPKM[, 4:ncol(reads.RPKM)] %>% as.matrix()
ind <- which(reads.RPKM.mat <  0)
reads.RPKM.mat[ind] <- 0
hmcols <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
all.peaks <- subset_most_variable_rows(reads.RPKM.mat, TRUE, FALSE, 1000) #1000

## Features and annotation
feature.mat <- data.frame(H4K12bhb_RPKM$class)
Diet.subtype <- factor(unlist(feature.mat["Tissue",]))
annot <- data.frame(Diet = Diet.subtype) 

## Colors and its assignment
Diet.cols <- brewer.pal(7, "Accent")[5:6]
Diet.cols.assigned <- setNames(Diet.cols, unique(levels(annot$Diet)))

Unsupervised Heatmap analysis

You can also embed plots, for example:

## Heatmap
heat.annot <- HeatmapAnnotation(df = annot, col = list(Diet = Diet.cols.assigned),
                                show_legend = TRUE, annotation_legend_param = list(title_gp = gpar(fontsize = 14,fontface = "bold"), 
                                                                                   labels_gp = gpar(fontsize = 14, fontface = "bold")))
Heatmap(all.peaks, show_row_names = FALSE, show_column_names = TRUE, row_dend_reorder = TRUE, 
        column_dend_reorder = TRUE, clustering_distance_rows = "pearson",
        clustering_distance_columns = "euclidean", clustering_method_rows = "complete", 
        column_names_gp = gpar(fontsize = 16, fontface = "bold"), 
        clustering_method_columns = "complete", top_annotation = heat.annot,
        heatmap_legend_param = list(title = "RPKM", title_gp = gpar(fontsize = 14, fontface = "bold"), 
                                    labels_gp = gpar(fontsize = 14, fontface = "bold")))

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.