suppressMessages(library(data.table))
suppressMessages(library(ggplot2))
suppressMessages(library(dplyr))
# suppressMessages(library(xlsx))
suppressMessages(library(pheatmap))
suppressMessages(library(RColorBrewer))
set.seed(42)
fl.sources <- list.files("../../scripts/utils/", full.names = T)
tmp <- sapply(fl.sources,source)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## annotation file
group_sorted <- read.csv('group_file_for_heatmap.csv',header = T, row.names = 1)
mycolors <- list(subsets = c("#A6A6A6", "#FF0000", "#0007F4", "#FFA75C", "#00AAFE",
"#009051", "#FEB5B5", "#E6CFFF", "#8AAA75"))
names(mycolors$subsets) <- unique(group_sorted$Group)
breaksList = seq(-2, 2, by = 0.1)
# function
get_heatmap <- function(df,size=3,...){
g2 <- group_sorted[colnames(df),]
annotation_col = data.frame(subsets = g2$Group)
rownames(annotation_col) = colnames(df)
p1 <- pheatmap(df,
fontsize = size, angle_col = 90,
cellwidth = 2*size, cellheight = size,
cluster_cols = F,
annotation_col = annotation_col,
clustering_distance_cols = 'correlation',
clustering_distance_rows = 'correlation',
clustering_method = 'average',
# annotation_row = annotation_row,
breaks = breaksList,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)),
annotation_names_row = F, annotation_names_col = F,
annotation_colors = mycolors, border_color = NA, ...)
return(p1)
}
Data <- read.csv('pr_original.csv', row.names = 1)#### default scale
#### zscore
Data_normed <- zscore(Data)
#### scale to [0,1]
Data_normed_2 <- scaleData(Data)
#### scale to [-2,2]
tmp <- apply(Data_normed,1:2, function(x) min(x, 2))
Data_normed_3 <- as.data.frame(apply(tmp,1:2, function(x) max(x, -2)))
Non-specific TFs are those TFs with mean PageRank ranked as top10% across 9 cell states and coefficients of variation (CV) less than 0.5
tmp <- mostConserveTFs(highlyRankedTFs(Data,0.1),0.5)
housekeeping <- rownames(tmp)
writeLines(housekeeping, 'housekeeping_top01_CV05.txt')
p <- get_heatmap(Data_normed_3[housekeeping,], cluster_rows=F, size = 5)
# write to file
pdf(paste0('hp_',length(housekeeping),'_non_specific_TFs.pdf'))
print(p)
dev.off()
## quartz_off_screen
## 2
First we divided the samples into two groups: target group and background group. Target group included all the samples belonging to the cell state of interest and the background group comprised the remaining samples. Unpaired t-test was used to calculate the P-value. A P-value cutoff of 0.05 and log2 fold change cutoff of 0.5 were used for calling cell state-specific TFs. Then we kept those TFs with non-zero expression values in more than 20% of the cells in the target group.
# load meta file
group_sorted <- read.table('group_file_v2.txt',header = T, row.names = 1)
group_sorted$oldName <- rownames(group_sorted)
rownames(group_sorted) <- group_sorted$newName
## import rna-seq data
rna_seq <- read.csv('rna_expr_TFs.csv', row.names = 1)
n <- which(group_sorted[["Group"]]=="TexTerm")
# identification
tfs <- lapply(unique(group_sorted$Group),
function(x) {
pr <- Data[!(row.names(Data) %in% housekeeping),]
n <- which(group_sorted[["Group"]]==x)
x <- unpairedtTest(pr,n,p=0.05,d=0.5)
t <- rownames(x)
t <- t[unlist(lapply(t, function(x) sum(rna_seq[x, n]>0) >= ceiling(0.2*length(n))))]
})
names(tfs) <- unique(group_sorted$Group)
From top to bottom, they’re Naive, MP, TE, TCM, TEM, TRM, TexProg, TexInt, and TexTerm.
ps <- lapply(names(tfs), function(x) {
annotation_row <- data.frame("subsets"=rep(x,length(tfs[[x]])))
rownames(annotation_row) <- tfs[[x]]
p <-get_heatmap(df=Data_normed_3[rownames(annotation_row),], cluster_rows=F, annotation_row=annotation_row, size = 5)
pdf(paste0('hp_',nrow(annotation_row),'_',x,'_TFs.pdf'))
print(p)
dev.off()
})
Multi-state specific TFs are identified in multiple cell states. Single-state specific TFs are only identified in single cell state.
getRowAnnotation <- function(TFs){
tmp <- unlist(TFs)
redundants <- unname(tmp[duplicated(tmp)])
uniques <- unname(tmp[!duplicated(tmp)])
tmp2 <- tibble("subsets" = "multi","TFs"=unique(redundants))
annotation_row <- tibble("subsets" = unlist(lapply(1:length(TFs), function(x) {rep(names(TFs[x]), length(TFs[[x]]))})),
"TFs" = unlist(TFs)) %>%
filter(TFs %in% setdiff(uniques, redundants)) %>%
bind_rows(tmp2) %>%
tibble::column_to_rownames("TFs")
return(annotation_row)
}
annotation_row <- getRowAnnotation(tfs)
breaksList = seq(-2, 2, by = 0.1)
annotation_row %>% filter(subsets=="multi") %>% rownames() %>% writeLines('multi_TFs.txt')
annotation_row %>% filter(subsets!="multi") %>% rownames() %>% writeLines('single_TFs.txt')
df_multi <- Data_normed_3[readLines('multi_TFs.txt'),]
p1 <- get_heatmap(df_multi, size=5)
## write to file
pdf(paste0('hp_',nrow(df_multi),'_multi_TFs.pdf'), width = 8, height=12)
print(p1)
dev.off()
## quartz_off_screen
## 2
df_sgl <- Data_normed_3[readLines('single_TFs.txt'),]
# add gap between cell state groups
annotation_row_single <- annotation_row |> filter(subsets!='multi')
last_element_per_group <- aggregate(rownames(annotation_row_single), by=list(annotation_row_single$subsets), FUN=tail, n=1) |> pull(x)
gaps <- sort(unlist(lapply(last_element_per_group, function(x) which(x==rownames(df_sgl)))))
p1 <- get_heatmap(df_sgl, size=5, gaps_row = gaps, cluster_rows=F, annotation_row = annotation_row_single)
## write to file
pdf(paste0('hp_',nrow(df_sgl),'_single_TFs.pdf'), width = 8, height=12)
print(p1)
dev.off()
## quartz_off_screen
## 2
As described in Extended Data Fig.3a, we first curated a candiate list including TFs in TexTerm and TRM-TFs identified in cell-state-specific step. Then we performed unpaired t-test. The target group is TexTerm/TRM while the background is TRM/TexTerm.
# manually select background samples
group2 <- group_sorted[grepl("TRM|TexTerm",group_sorted$Group),]
candidates <- union(tfs[['TexTerm']], tfs[['TRM']])
multi_state_tfs <- intersect(tfs[['TexTerm']], tfs[['TRM']])
tfs2 <- lapply(unique(group2$Group),
function(x) {
pr <- Data[candidates,rownames(group2)]
n <- which(group2[["Group"]]==x)
x <- unpairedtTest(pr,n,p=0.05,d=0.5)
rownames(x)
})
names(tfs2) <- unique(group2$Group)
common_tfs <- union(setdiff(candidates, unlist(tfs2)),multi_state_tfs)
tex_tfs <- setdiff(tfs2[['TexTerm']], common_tfs)
trm_tfs <- setdiff(tfs2[['TRM']], common_tfs)
TFs that most active in TEXterm
# reorganize TFs by the p-value significance
t <- rev(intersect(candidates, tex_tfs))
p1 <- bubble(t,log2(rna_seq+1)[t,rownames(group2)],Data_normed_3[t,rownames(group2)], text.size = 8)
plot(p1)
TFs that most active in TRM
# reorganize TFs by the p-value significance
t <- rev(intersect(candidates, trm_tfs))
p2 <- bubble(t,log2(rna_seq+1)[t,rownames(group2)],Data_normed_3[t,rownames(group2)], text.size = 8)
plot(p2)
TFs that active in TRM and TEXterm
t <- rev(intersect(candidates, common_tfs))
p3 <- bubble(t,log2(rna_seq+1)[t,rownames(group2)],Data_normed_3[t,rownames(group2)], text.size = 8)
plot(p3)
## save to file
pdf(paste0('bb_',length(tex_tfs),'_TEX_TFs.pdf'), width = 5, height=8)
plot(p1)
dev.off()
## quartz_off_screen
## 2
pdf(paste0('bb_',length(trm_tfs),'_TRM_TFs.pdf'), width = 4.5, height=6)
plot(p2)
dev.off()
## quartz_off_screen
## 2
pdf(paste0('bb_',length(common_tfs),'_common_TFs.pdf'), width = 4.5, height=8)
plot(p3)
dev.off()
## quartz_off_screen
## 2
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] factoextra_1.0.7 ggpubr_0.6.0 RColorBrewer_1.1-3 pheatmap_1.0.12
## [5] dplyr_1.1.4 ggplot2_3.5.1 data.table_1.16.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.5 jsonlite_1.8.8 highr_0.11 compiler_4.4.1
## [5] ggsignif_0.6.4 Rcpp_1.0.12 tidyselect_1.2.1 tidyr_1.3.1
## [9] jquerylib_0.1.4 scales_1.3.0 yaml_2.3.8 fastmap_1.2.0
## [13] R6_2.5.1 labeling_0.4.3 generics_0.1.3 knitr_1.47
## [17] backports_1.5.0 ggrepel_0.9.6 tibble_3.2.1 car_3.1-2
## [21] munsell_0.5.1 bslib_0.7.0 pillar_1.9.0 rlang_1.1.4
## [25] utf8_1.2.4 cachem_1.1.0 broom_1.0.6 xfun_0.45
## [29] sass_0.4.9 cli_3.6.3 withr_3.0.0 magrittr_2.0.3
## [33] digest_0.6.36 rstudioapi_0.16.0 lifecycle_1.0.4 vctrs_0.6.5
## [37] rstatix_0.7.2 evaluate_0.24.0 glue_1.7.0 farver_2.1.2
## [41] abind_1.4-8 carData_3.0-5 fansi_1.0.6 colorspace_2.1-0
## [45] rmarkdown_2.27 purrr_1.0.2 tools_4.4.1 pkgconfig_2.0.3
## [49] htmltools_0.5.8.1