setwd("/Users/Daniela/Desktop/Math/third_lab/pyscenic")
library(Seurat)
## Attaching SeuratObject
library(data.table)
library(MASS)
library(circlize)
## ========================================
## circlize version 0.4.13
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
library(rjson)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.6.2
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
seurat = load(file = "harmony_BT474.rda")
binary = read.csv("binarization.csv",row.names = 1)
colnames(binary) = gsub(pattern = "...$",replacement = "", x = colnames(binary))
seurat_meta = AddMetaData(hnc, binary)
#AUC scores split by RNA_snn_res.0.5
Idents(seurat_meta) = 'RNA_snn_res.0.5'
data_to_write_out = as.data.frame(as.matrix(seurat_meta@meta.data))
#10 cluster contrast
K = rbind("c0 - others" = c( 9, -1, -1, -1, -1, -1, -1, -1, -1, -1),
"c1 - others" = c( -1, 9, -1, -1, -1, -1, -1, -1, -1, -1),
"c2 - others" = c( -1, -1, 9, -1, -1, -1, -1, -1, -1, -1),
"c3 - others" = c( -1, -1, -1, 9, -1, -1, -1, -1, -1, -1),
"c4 - others" = c( -1, -1, -1, -1, 9, -1, -1, -1, -1, -1),
"c5 - others" = c( -1, -1, -1, -1, -1, 9, -1, -1, -1, -1),
"c6 - others" = c( -1, -1, -1, -1, -1, -1, 9, -1, -1, -1),
"c7 - others" = c( -1, -1, -1, -1, -1, -1, -1, 9, -1, -1),
"c8 - others" = c( -1, -1, -1, -1, -1, -1, -1, -1, 9, -1),
"c9 - others" = c( -1, -1, -1, -1, -1, -1, -1, -1, -1, 9))
#COMBINE 4 HEATMAPS
#type 1------------------------------------
#check how many clusters: type 1 has no cluster c9
data_to_write_out1 = data_to_write_out[data_to_write_out$type=="1.DMSO",]
data_to_write_out1$clusters = as.factor(data_to_write_out1$RNA_snn_res.0.5)
levels(data_to_write_out1$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8")
#330 x 10
#cluster 0
result1 = vector("list",331)
#9 cluster contrast
K1 = rbind("c0 - others" = c( 8, -1, -1, -1, -1, -1, -1, -1, -1),
"c1 - others" = c( -1, 8, -1, -1, -1, -1, -1, -1, -1),
"c2 - others" = c( -1, -1, 8, -1, -1, -1, -1, -1, -1),
"c3 - others" = c( -1, -1, -1, 8, -1, -1, -1, -1, -1),
"c4 - others" = c( -1, -1, -1, -1, 8, -1, -1, -1, -1),
"c5 - others" = c( -1, -1, -1, -1, -1, 8, -1, -1, -1),
"c6 - others" = c( -1, -1, -1, -1, -1, -1, 8, -1, -1),
"c7 - others" = c( -1, -1, -1, -1, -1, -1, -1, 8, -1),
"c8 - others" = c( -1, -1, -1, -1, -1, -1, -1, -1, 8))
for(i in 1:331){
g1 = glm(as.numeric(data_to_write_out1[,26+i]) ~ clusters,
data= data_to_write_out1, family = "binomial") #GLM with contrast
mult1 = glht(g1, linfct = mcp(clusters = K1))
s1 = summary(mult1, test=adjusted("hochberg"))
result1[[i]] = s1$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df1 = data.frame(matrix(unlist(result1), nrow=length(result1), byrow=TRUE))
colnames(df1) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9")
all1 = colnames(data_to_write_out1)
rownames(df1) = all1[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df1)[i] = paste0(rownames(df1)[i]," (",genes[i]," genes)")
}
#type 2------------------------------------
#type 2 has no cluster c9
data_to_write_out2 = data_to_write_out[data_to_write_out$type=="2.fullvestrant",]
data_to_write_out2$clusters = as.factor(data_to_write_out2$RNA_snn_res.0.5)
levels(data_to_write_out2$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8")
#330 x 10
#cluster 0
result2 = vector("list",331)
K2 = rbind("c0 - others" = c( 8, -1, -1, -1, -1, -1, -1, -1, -1),
"c1 - others" = c( -1, 8, -1, -1, -1, -1, -1, -1, -1),
"c2 - others" = c( -1, -1, 8, -1, -1, -1, -1, -1, -1),
"c3 - others" = c( -1, -1, -1, 8, -1, -1, -1, -1, -1),
"c4 - others" = c( -1, -1, -1, -1, 8, -1, -1, -1, -1),
"c5 - others" = c( -1, -1, -1, -1, -1, 8, -1, -1, -1),
"c6 - others" = c( -1, -1, -1, -1, -1, -1, 8, -1, -1),
"c7 - others" = c( -1, -1, -1, -1, -1, -1, -1, 8, -1),
"c8 - others" = c( -1, -1, -1, -1, -1, -1, -1, -1, 8))
for(i in 1:331){
g2 = glm(as.numeric(data_to_write_out2[,26+i]) ~ clusters,
data= data_to_write_out2, family = "binomial") #GLM with contrast
mult2 = glht(g2, linfct = mcp(clusters = K2))
s2 = summary(mult2, test=adjusted("hochberg"))
result2[[i]] = s2$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df2 = data.frame(matrix(unlist(result2), nrow=length(result2), byrow=TRUE))
colnames(df2) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9")
all2 = colnames(data_to_write_out2)
rownames(df2) = all2[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df2)[i] = paste0(rownames(df2)[i]," (",genes[i]," genes)")
}
#type 3------------------------------------
#c3 has all 10 clusters
data_to_write_out3 = data_to_write_out[data_to_write_out$type=="3.neratinib",]
data_to_write_out3$clusters = as.factor(data_to_write_out3$RNA_snn_res.0.5)
levels(data_to_write_out3$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8","c9")
#330 x 10
#cluster 0
result3 = vector("list",331)
for(i in 1:331){
g3 = glm(as.numeric(data_to_write_out3[,26+i]) ~ clusters,
data= data_to_write_out3, family = "binomial") #GLM with contrast
mult3 = glht(g3, linfct = mcp(clusters = K))
s3 = summary(mult3, test=adjusted("hochberg"))
result3[[i]] = s3$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df3 = data.frame(matrix(unlist(result3), nrow=length(result3), byrow=TRUE))
colnames(df3) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10")
all3 = colnames(data_to_write_out3)
rownames(df3) = all3[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df3)[i] = paste0(rownames(df3)[i]," (",genes[i]," genes)")
}
#type 4------------------------------------
#type 4 has all 10 clusters
data_to_write_out4 = data_to_write_out[data_to_write_out$type=="4.both",]
data_to_write_out4$clusters = as.factor(data_to_write_out4$RNA_snn_res.0.5)
levels(data_to_write_out4$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10")
#330 x 10
#cluster 0
result4 = vector("list",331)
for(i in 1:331){
g4 = glm(as.numeric(data_to_write_out4[,26+i]) ~ clusters,
data= data_to_write_out4, family = "binomial") #GLM with contrast
mult4 = glht(g4, linfct = mcp(clusters = K))
s4 = summary(mult4, test=adjusted("hochberg"))
result4[[i]] = s4$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df4 = data.frame(matrix(unlist(result4), nrow=length(result4), byrow=TRUE))
colnames(df4) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c10")
all4 = colnames(data_to_write_out4)
rownames(df4) = all4[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df4)[i] = paste0(rownames(df4)[i]," (",genes[i]," genes)")
}
#filter rows
df1$type = "type1"
df2$type = "type2"
zeros = rep(0,331)
df1$c10 = zeros
df2$c10 = zeros
df3$type = "type3"
df4$type = "type4"
df_new = rbind(df1,df2,df3,df4)
colnames(df_new) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","type","c10")
df_new = df_new %>% filter(abs(c1) > 3 | abs(c2)> 3 | abs(c3) > 3 | abs(c4) > 3 | abs(c5) > 3 | abs(c6) > 3 | abs(c7) > 3 | abs(c8) > 3 | abs(c9) > 3 | abs(c10) > 3)
final = rownames(df_new[1:44,])
#COMBINE 4 HEATMAPS ---------------------
#type 1------------------------------------
#check how many clusters: type 1 has no cluster c9
data_to_write_out1 = data_to_write_out[data_to_write_out$type=="1.DMSO",]
data_to_write_out1$clusters = as.factor(data_to_write_out1$RNA_snn_res.0.5)
levels(data_to_write_out1$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8")
#330 x 10
#cluster 0
result1 = vector("list",331)
K1 = rbind("c0 - others" = c( 8, -1, -1, -1, -1, -1, -1, -1, -1),
"c1 - others" = c( -1, 8, -1, -1, -1, -1, -1, -1, -1),
"c2 - others" = c( -1, -1, 8, -1, -1, -1, -1, -1, -1),
"c3 - others" = c( -1, -1, -1, 8, -1, -1, -1, -1, -1),
"c4 - others" = c( -1, -1, -1, -1, 8, -1, -1, -1, -1),
"c5 - others" = c( -1, -1, -1, -1, -1, 8, -1, -1, -1),
"c6 - others" = c( -1, -1, -1, -1, -1, -1, 8, -1, -1),
"c7 - others" = c( -1, -1, -1, -1, -1, -1, -1, 8, -1),
"c8 - others" = c( -1, -1, -1, -1, -1, -1, -1, -1, 8))
for(i in 1:331){
g1 = glm(as.numeric(data_to_write_out1[,26+i]) ~ clusters,
data= data_to_write_out1, family = "binomial") #GLM with contrast
mult1 = glht(g1, linfct = mcp(clusters = K1))
s1 = summary(mult1, test=adjusted("hochberg"))
result1[[i]] = s1$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df1 = data.frame(matrix(unlist(result1), nrow=length(result1), byrow=TRUE))
colnames(df1) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9")
all1 = colnames(data_to_write_out1)
rownames(df1) = all1[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df1)[i] = paste0(rownames(df1)[i]," (",genes[i]," genes)")
}
#heatmap
df1 = df1[rownames(df1) %in% final,]
hoch_mat1 = as.matrix(df1)
#same for all legends
col_fun = colorRamp2(c(-10, 0, 10), c("blue", "white", "red"))
cn = c(1:9)
c1 = Heatmap(hoch_mat1, column_order = sort(colnames(hoch_mat1)), row_order = sort(rownames(hoch_mat1)),show_column_names = FALSE, show_column_dend = FALSE, show_row_dend = F,
top_annotation = HeatmapAnnotation(
text = anno_text(cn, rot = 0, just = "center"),
annotation_height = max_text_width(cn)
), column_title = "DMSO",
rect_gp = gpar(col = "white", lwd = 1),
heatmap_legend_param = list(
col_fun = col_fun, title=c("Z score"),
show_heatmap_legend = T)
)
#type 2------------------------------------
#type 2 has no cluster c9
data_to_write_out2 = data_to_write_out[data_to_write_out$type=="2.fullvestrant",]
data_to_write_out2$clusters = as.factor(data_to_write_out2$RNA_snn_res.0.5)
levels(data_to_write_out2$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8")
#330 x 10
#cluster 0
result2 = vector("list",331)
K2 = rbind("c0 - others" = c( 8, -1, -1, -1, -1, -1, -1, -1, -1),
"c1 - others" = c( -1, 8, -1, -1, -1, -1, -1, -1, -1),
"c2 - others" = c( -1, -1, 8, -1, -1, -1, -1, -1, -1),
"c3 - others" = c( -1, -1, -1, 8, -1, -1, -1, -1, -1),
"c4 - others" = c( -1, -1, -1, -1, 8, -1, -1, -1, -1),
"c5 - others" = c( -1, -1, -1, -1, -1, 8, -1, -1, -1),
"c6 - others" = c( -1, -1, -1, -1, -1, -1, 8, -1, -1),
"c7 - others" = c( -1, -1, -1, -1, -1, -1, -1, 8, -1),
"c8 - others" = c( -1, -1, -1, -1, -1, -1, -1, -1, 8))
for(i in 1:331){
g2 = glm(as.numeric(data_to_write_out2[,26+i]) ~ clusters,
data= data_to_write_out2, family = "binomial") #GLM with contrast
mult2 = glht(g2, linfct = mcp(clusters = K2))
s2 = summary(mult2, test=adjusted("hochberg"))
result2[[i]] = s2$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df2 = data.frame(matrix(unlist(result2), nrow=length(result2), byrow=TRUE))
colnames(df2) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9")
all2 = colnames(data_to_write_out2)
rownames(df2) = all2[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df2)[i] = paste0(rownames(df2)[i]," (",genes[i]," genes)")
}
#heatmap
df2 = df2[rownames(df2) %in% final,]
df2 = df2[,1:9]
hoch_mat2 = as.matrix(df2)
cn = c(1:9)
c2 = Heatmap(hoch_mat2, show_column_names = FALSE, column_order = sort(colnames(hoch_mat2)), row_order = sort(rownames(hoch_mat2)),show_column_dend = FALSE, show_row_dend = F,
top_annotation = HeatmapAnnotation(
text = anno_text(cn, rot = 0, just = "center"),
annotation_height = max_text_width(cn)
), column_title = "Fullvestrant",
rect_gp = gpar(col = "white", lwd = 1),
show_heatmap_legend = F,
heatmap_legend_param = list(
col_fun = col_fun, title=c("Z score"))
)
#type 3------------------------------------
#c3 has all 10 clusters
data_to_write_out3 = data_to_write_out[data_to_write_out$type=="3.neratinib",]
data_to_write_out3$clusters = as.factor(data_to_write_out3$RNA_snn_res.0.5)
levels(data_to_write_out3$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8","c9")
#330 x 10
#cluster 0
result3 = vector("list",331)
for(i in 1:331){
g3 = glm(as.numeric(data_to_write_out3[,26+i]) ~ clusters,
data= data_to_write_out3, family = "binomial") #GLM with contrast
mult3 = glht(g3, linfct = mcp(clusters = K))
s3 = summary(mult3, test=adjusted("hochberg"))
result3[[i]] = s3$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df3 = data.frame(matrix(unlist(result3), nrow=length(result3), byrow=TRUE))
colnames(df3) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c99")
all3 = colnames(data_to_write_out3)
rownames(df3) = all3[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df3)[i] = paste0(rownames(df3)[i]," (",genes[i]," genes)")
}
#heatmap
df3 = df3[rownames(df3) %in% final,]
hoch_mat3 = as.matrix(df3)
cn = c(1:10)
c3 = Heatmap(hoch_mat3, show_column_names = FALSE, column_order = sort(colnames(hoch_mat3)), row_order = sort(rownames(hoch_mat3)),show_column_dend = FALSE, show_row_dend = F,
top_annotation = HeatmapAnnotation(
text = anno_text(cn, rot = 0, just = "center"),
annotation_height = max_text_width(cn)
), column_title = "Neratinib",
rect_gp = gpar(col = "white", lwd = 1),
show_heatmap_legend = F,
heatmap_legend_param = list(
col_fun = col_fun, title=c("Z score"))
)
#type 4------------------------------------
#type 4 has all 10 clusters
data_to_write_out4 = data_to_write_out[data_to_write_out$type=="4.both",]
data_to_write_out4$clusters = as.factor(data_to_write_out4$RNA_snn_res.0.5)
levels(data_to_write_out4$clusters) = c("c0","c1","c2","c3","c4","c5","c6","c7","c8","c9","c99")
#c99 is 10, used for ordering
#330 x 10
#cluster 0
result4 = vector("list",331)
for(i in 1:331){
g4 = glm(as.numeric(data_to_write_out4[,26+i]) ~ clusters,
data= data_to_write_out4, family = "binomial") #GLM with contrast
mult4 = glht(g4, linfct = mcp(clusters = K))
s4 = summary(mult4, test=adjusted("hochberg"))
result4[[i]] = s4$test$tstat
}
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
#convert to dataframe
df4 = data.frame(matrix(unlist(result4), nrow=length(result4), byrow=TRUE))
colnames(df4) = c("c1","c2","c3","c4","c5","c6","c7","c8","c9","c99")
all4 = colnames(data_to_write_out4)
rownames(df4) = all4[27:357]
#target genes
# Give the input file name to the function.
regulons = fromJSON(file = "regulons.json")
genes = lengths(regulons)
for(i in 1:331){
rownames(df4)[i] = paste0(rownames(df4)[i]," (",genes[i]," genes)")
}
#heatmap
df4 = df4[rownames(df4) %in% final,]
hoch_mat4 = as.matrix(df4)
cn = c(1:10)
c4 = Heatmap(hoch_mat4, show_column_names = FALSE, column_order = sort(colnames(hoch_mat4)), row_order = sort(rownames(hoch_mat4)),show_column_dend = FALSE, show_row_dend = F,
top_annotation = HeatmapAnnotation(
text = anno_text(cn, rot = 0, just = "center"),
annotation_height = max_text_width(cn)
), column_title = "Both",
rect_gp = gpar(col = "white", lwd = 1),
show_heatmap_legend = F,
heatmap_legend_param = list(
col_fun = col_fun, title=c("Z score"))
)
#combine
c5 = c1 + c2 + c3 + c4
#plot
c5

#8, 50
pdf("heatmap_demo.pdf", width = 12, height = 10)
draw(c5, show_heatmap_legend = TRUE)
dev.off()
## quartz_off_screen
## 2