library(DiffBind)
library(ComplexHeatmap)
library(dplyr)
library(genefilter)
library(RColorBrewer)#set working directory and place sample.csv file in directory
setwd("/Volumes/Terranova2/KS_SMINT_FINAL_Analysis_May2018/KS-SmINT_FED_FASTED_REFED_Fastq/KS_Intestine_ChIP-seq_FINAL_BigWig_20Million_Reads_e5__02-01-18/04aln_downsample/")
#create a dba.object#
H4K12bhb <- dba(sampleSheet = "H4K12bhb_singlemark.csv", minOverlap = 2)
#count reads in binding site intervals: Scoring metric can be changed#
H4K12bhb_RPKM <- dba.count(H4K12bhb, minOverlap = 2, score = DBA_SCORE_RPKM, fragmentSize = 200)
## function
subset_most_variable_rows<- function(x, center, scale, top_n){
log2.counts<- t(scale(t(log2(x+1)),center= center,scale= scale))
rv<- rowVars(log2.counts)
## select the top n most variable regions for clustering
idx<- order(-rv)[1:top_n]
log2.counts[idx,]
}
## from dba count object
reads.RPKM <- dba.peakset(H4K12bhb_RPKM, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
name <- "RPKM"
## Top regions
rownames(reads.RPKM) <- paste(reads.RPKM$CHR,":",reads.RPKM$START,"-",reads.RPKM$END,sep = "")
reads.RPKM.mat <- reads.RPKM[, 4:ncol(reads.RPKM)] %>% as.matrix()
ind <- which(reads.RPKM.mat < 0)
reads.RPKM.mat[ind] <- 0
hmcols <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
all.peaks <- subset_most_variable_rows(reads.RPKM.mat, TRUE, FALSE, 1000) #1000
## Features and annotation
feature.mat <- data.frame(H4K12bhb_RPKM$class)
Diet.subtype <- factor(unlist(feature.mat["Tissue",]))
annot <- data.frame(Diet = Diet.subtype)
## Colors and its assignment
Diet.cols <- brewer.pal(7, "Accent")[5:6]
Diet.cols.assigned <- setNames(Diet.cols, unique(levels(annot$Diet)))You can also embed plots, for example:
## Heatmap
heat.annot <- HeatmapAnnotation(df = annot, col = list(Diet = Diet.cols.assigned),
show_legend = TRUE, annotation_legend_param = list(title_gp = gpar(fontsize = 14,fontface = "bold"),
labels_gp = gpar(fontsize = 14, fontface = "bold")))
Heatmap(all.peaks, show_row_names = FALSE, show_column_names = TRUE, row_dend_reorder = TRUE,
column_dend_reorder = TRUE, clustering_distance_rows = "pearson",
clustering_distance_columns = "euclidean", clustering_method_rows = "complete",
column_names_gp = gpar(fontsize = 16, fontface = "bold"),
clustering_method_columns = "complete", top_annotation = heat.annot,
heatmap_legend_param = list(title = "RPKM", title_gp = gpar(fontsize = 14, fontface = "bold"),
labels_gp = gpar(fontsize = 14, fontface = "bold")))Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.