Unsupervised MDS analysis based on combinatorial chromatin states in metastatic melanoma tumors

Christopher Terranova May 2019 based on code from Ayush Raman

Load necessary programs

library(circlize)
library(ComplexHeatmap)
library(pheatmap)
library(dplyr)
library(gplots)
library(ggplot2)
library(genefilter)
library(RColorBrewer)
library(reshape2)
library(vegan)

Identification of most variable chromatin state regions

# set working directory to combined matrix state file from chromStatesMatrixforClustering_particularStates.pl output
  setwd("/TCGA_ChromHMM/tiled_segment/")

# set chromatin state for analysis -- can be changed for each chromatin state
  i = 6

# read in combined matrix state file from chromStatesMatrixforClustering_particularStates.pl output
  dat <- read.table(paste("CombinedMatrix-",i,"-10000bps.txt",sep =""),header = FALSE,sep = "\t",quote = "",
                    row.names = 1, na.strings = FALSE, stringsAsFactors = FALSE)
  colnames(dat) <- c("BRAF-M137","BRAF-M233","BRAF-M263","BRAF-M275",
                     "BRAF-M305","BRAF-M306","BRAF-M357","BRAF-M409",
                     "BRAF-M527","BRAF-M721","BRAF-M749","BRAF-M762",
                     "BRAF-M807","RAS-M028","RAS-M399","RAS-M642",
                     "RAS-M852","WT-M035","WT-M822","WT-M857")
  head(dat)
##                          BRAF-M137 BRAF-M233 BRAF-M263 BRAF-M275 BRAF-M305
## chr1:1000000-1009999             0         1         0         0         0
## chr1:100250000-100259999         0         0         0         0         0
## chr1:100260000-100269999         0         0         0         0         0
## chr1:100270000-100279999         0         0         0         0         0
## chr1:100990000-100999999         0         0         0         0         0
## chr1:1010000-1019999             0         0         0         0         0
##                          BRAF-M306 BRAF-M357 BRAF-M409 BRAF-M527 BRAF-M721
## chr1:1000000-1009999             0         0         0         3         0
## chr1:100250000-100259999         0         0         0         0         0
## chr1:100260000-100269999         0         0         0         0         0
## chr1:100270000-100279999         0         0         0         0         0
## chr1:100990000-100999999         0         0         0         0         0
## chr1:1010000-1019999             0         0         0         0         0
##                          BRAF-M749 BRAF-M762 BRAF-M807 RAS-M028 RAS-M399
## chr1:1000000-1009999             0         0         0        8        0
## chr1:100250000-100259999         0         0         0        0        0
## chr1:100260000-100269999         0         0         0        0        0
## chr1:100270000-100279999         0         0         0        0        0
## chr1:100990000-100999999         0         0         0        0        0
## chr1:1010000-1019999             0         0         0        2        0
##                          RAS-M642 RAS-M852 WT-M035 WT-M822 WT-M857
## chr1:1000000-1009999            0        0       4       0       0
## chr1:100250000-100259999        0        0       0       2       0
## chr1:100260000-100269999        0        0       0       2       0
## chr1:100270000-100279999        0        0       0       2       0
## chr1:100990000-100999999        0        0       0       2       0
## chr1:1010000-1019999            0        0       1       0       0
# sample type labels for plot
  mutation.type <- factor(c(rep("BRAF",13), rep("RAS",4), rep("WT", 3)))
  RNA.type <- factor(c("Keratin", "Immune", "Immune","Immune","Immune","Immune","Immune","Immune",
                       "MITF-low","Keratin","Immune","MITF-low","Immune","MITF-low","Immune","Immune",
                       "Keratin","Keratin","Keratin", "Immune"))
  Methylation.type <- factor(c("CpG", "Normal", "Hyper","Normal", "Normal","Hyper","Hyper","CpG",
                               "Hyper","Hypo","Hyper","Normal","Hypo","Hypo","CpG","Hypo",
                               "CpG","Hyper","Hyper","CpG"))
  
# filtering low variable regions -- Change be changed for RNA-expression or DNA-methylation 
  dat.state1 <- varFilter(as.matrix(dat), var.cutoff = 0.5)
  mut.gene <- factor(paste(RNA.type, sep = "-"))
  print(dim(dat.state1))
## [1] 3922   20

MDS plot based on bivalent polycomb chromatin state in metastatic melanoma

  d <- dist(t(dat.state1), method = "binary")
  mds.plot <- cmdscale(d = d, eig = T, k = 2)
  mdsDist <- data.frame(genotypes = mutation.type, x = mds.plot$points[,1], y = mds.plot$points[,2],
                        subtype = RNA.type)
  print(ggplot(mdsDist, aes(x = x, y = y, color = genotypes, shape = subtype)) + geom_point(size = 8) +
          ylab("MDS Coordinate 2") + xlab("MDS Coordinate 1") + theme_grey() +
          theme(legend.text = element_text(size = 18, face = "bold"),
                legend.title = element_text(size = 18, colour = "black", face = "bold"),
                axis.title = element_text(size = 18, face = "bold"),
                axis.text.x = element_text(size = 18, face = "bold", color = "black"),
                axis.text.y = element_text(size = 18, face = "bold", color = "black"),
                plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))) 

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.