DiffBind Analysis for Unsupervised Heatmaps from whole crypts in fed vs fasted mice

Christopher Terranova May 2019

Load necessary programs

library(DiffBind)
library(ComplexHeatmap)
library(dplyr)
library(genefilter)
library(RColorBrewer)

Unsupervised Heatmap analysis of H3K9bhb genomic loci in fed vs fasted mice

#set working directory and place sample.csv file in directory
setwd("/Volumes/Terranova2/KS_SMINT_FINAL_Analysis_May2018/KS-SmINT_FED_FASTED_REFED_Fastq/KS_Intestine_ChIP-seq_FINAL_BigWig_20Million_Reads_e5__02-01-18/04aln_downsample/")

#create a dba.object#

H3K9bhb <- dba(sampleSheet = "H3K9bhb_singlemark.csv", minOverlap = 2)

#count reads in binding site intervals: Scoring metric can be changed#

H3K9bhb_RPKM <- dba.count(H3K9bhb, minOverlap = 2, score = DBA_SCORE_RPKM, fragmentSize = 200)

## function
subset_most_variable_rows<- function(x, center, scale, top_n){
  log2.counts<- t(scale(t(log2(x+1)),center= center,scale= scale))
  rv<- rowVars(log2.counts)
  ## select the top n most variable regions for clustering
  idx<- order(-rv)[1:top_n]
  log2.counts[idx,]
}

## from dba count object
reads.RPKM <- dba.peakset(H3K9bhb_RPKM, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
name <- "RPKM"

## Top regions
rownames(reads.RPKM) <- paste(reads.RPKM$CHR,":",reads.RPKM$START,"-",reads.RPKM$END,sep = "")
reads.RPKM.mat <- reads.RPKM[, 4:ncol(reads.RPKM)] %>% as.matrix()
ind <- which(reads.RPKM.mat <  0)
reads.RPKM.mat[ind] <- 0
hmcols <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
all.peaks <- subset_most_variable_rows(reads.RPKM.mat, TRUE, FALSE, 1000) #1000

## Features and annotation
feature.mat <- data.frame(H3K9bhb_RPKM$class)
Diet.subtype <- factor(unlist(feature.mat["Tissue",]))
annot <- data.frame(Diet = Diet.subtype) 

## Colors and its assignment
Diet.cols <- brewer.pal(7, "Accent")[5:6]
Diet.cols.assigned <- setNames(Diet.cols, unique(levels(annot$Diet)))

Unsupervised Heatmap analysis

You can also embed plots, for example:

## Heatmap
heat.annot <- HeatmapAnnotation(df = annot, col = list(Diet = Diet.cols.assigned),
                                show_legend = TRUE, annotation_legend_param = list(title_gp = gpar(fontsize = 14,fontface = "bold"), 
                                                                                   labels_gp = gpar(fontsize = 14, fontface = "bold")))
Heatmap(all.peaks, show_row_names = FALSE, show_column_names = TRUE, row_dend_reorder = TRUE, 
        column_dend_reorder = TRUE, clustering_distance_rows = "pearson",
        clustering_distance_columns = "euclidean", clustering_method_rows = "complete", 
        column_names_gp = gpar(fontsize = 16, fontface = "bold"), 
        clustering_method_columns = "complete", top_annotation = heat.annot,
        heatmap_legend_param = list(title = "RPKM", title_gp = gpar(fontsize = 14, fontface = "bold"), 
                                    labels_gp = gpar(fontsize = 14, fontface = "bold")))

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.