suppressMessages(library(data.table))
suppressMessages(library(ggplot2))
suppressMessages(library(dplyr))
suppressMessages(library(xlsx))
suppressMessages(library(pheatmap))
suppressMessages(library(RColorBrewer))
set.seed(42)
fl.sources <- list.files("../../scripts/utils/", full.names = T)
tmp <- sapply(fl.sources,source)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## annotation file
group_sorted <- read.csv('group_file_for_heatmap.csv',header = T, row.names = 1)
mycolors <- list(subsets = c("#A6A6A6", "#FF0000", "#0007F4", "#FFA75C", "#00AAFE",
"#009051", "#FEB5B5", "#E6CFFF", "#8AAA75"))
names(mycolors$subsets) <- unique(group_sorted$Group)
breaksList = seq(-2, 2, by = 0.1)
# function
get_heatmap <- function(df,size=3,...){
g2 <- group_sorted[colnames(df),]
annotation_col = data.frame(subsets = g2$Group)
rownames(annotation_col) = colnames(df)
p1 <- pheatmap(df,
fontsize = size, angle_col = 90,
cellwidth = 2*size, cellheight = size,
cluster_cols = F,
annotation_col = annotation_col,
clustering_distance_cols = 'correlation',
clustering_distance_rows = 'correlation',
clustering_method = 'average',
# annotation_row = annotation_row,
breaks = breaksList,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)),
annotation_names_row = F, annotation_names_col = F,
annotation_colors = mycolors, border_color = NA, ...)
return(p1)
}
Data <- read.csv('pr_original.csv', row.names = 1)#### default scale
#### zscore
Data_normed <- zscore(Data)
#### scale to [0,1]
Data_normed_2 <- scaleData(Data)
#### scale to [-2,2]
tmp <- apply(Data_normed,1:2, function(x) min(x, 2))
Data_normed_3 <- as.data.frame(apply(tmp,1:2, function(x) max(x, -2)))
Non-specific TFs are those TFs with mean PageRank ranked as top10% across 9 cell states and coefficients of variation (CV) less than 0.5
tmp <- mostConserveTFs(highlyRankedTFs(Data,0.1),0.5)
housekeeping <- rownames(tmp)
writeLines(housekeeping, 'housekeeping_top01_CV05.txt')
get_heatmap(Data_normed_3[housekeeping,], cluster_rows=F, size = 5)
First we divided the samples into two groups: target group and background group. Target group included all the samples belonging to the cell state of interest and the background group comprised the remaining samples. Unpaired t-test was used to calculate the P-value. A P-value cutoff of 0.05 and log2 fold change cutoff of 0.5 were used for calling cell state-specific TFs.
# load meta file
group_sorted <- read.table('group_file_v2.txt',header = T, row.names = 1)
group_sorted$oldName <- rownames(group_sorted)
rownames(group_sorted) <- group_sorted$newName
n <- which(group_sorted[["Group"]]=="TexTerm")
# identification
tfs <- lapply(unique(group_sorted$Group),
function(x) {
pr <- Data[!(row.names(Data) %in% housekeeping),]
n <- which(group_sorted[["Group"]]==x)
x <- unpairedtTest(pr,n,p=0.05,d=0.5)
rownames(x)
})
names(tfs) <- unique(group_sorted$Group)
From top to bottom, they’re Naive, MP, TE, TCM, TEM, TRM, TexProg, TexInt, and TexTerm.
ps <- lapply(names(tfs), function(x) {
annotation_row <- data.frame("subsets"=rep(x,length(tfs[[x]])))
rownames(annotation_row) <- tfs[[x]]
get_heatmap(df=Data_normed_3[rownames(annotation_row),], cluster_rows=F, annotation_row=annotation_row, size = 5)
})
Multi-taskers are TFs appearing in multiple cell states. Single-taskers are TFs only identified in single cell state.
getRowAnnotation <- function(TFs){
tmp <- unlist(TFs)
redundants <- unname(tmp[duplicated(tmp)])
uniques <- unname(tmp[!duplicated(tmp)])
tmp2 <- tibble("subsets" = "MultiTasker","TFs"=unique(redundants))
annotation_row <- tibble("subsets" = unlist(lapply(1:length(TFs), function(x) {rep(names(TFs[x]), length(TFs[[x]]))})),
"TFs" = unlist(TFs)) %>%
filter(TFs %in% setdiff(uniques, redundants)) %>%
bind_rows(tmp2) %>%
tibble::column_to_rownames("TFs")
return(annotation_row)
}
annotation_row <- getRowAnnotation(tfs)
breaksList = seq(-2, 2, by = 0.1)
annotation_row %>% filter(subsets=="MultiTasker") %>% rownames() %>% writeLines('MultiTasker_TFs.txt')
annotation_row %>% filter(subsets!="MultiTasker") %>% rownames() %>% writeLines('SingleTasker_TFs.txt')
Heatmap for multi-taskers:
df_multi <- Data_normed_3[readLines('MultiTasker_TFs.txt'),]
p1 <- get_heatmap(df_multi, size=5)
Heatmap for single-taskers:
df_sgl <- Data_normed_3[readLines('SingleTasker_TFs.txt'),]
p1 <- get_heatmap(df_sgl, size=5)
As described in Extended Data Fig.3a, we first curated a candiate list including TFs in TexTerm and TRM-TFs identified in cell-state-specific step. Then we performed unpaired t-test. The target group is TexTerm/TRM while the background is TRM/TexTerm.
# manually select background samples
group2 <- group_sorted[grepl("TRM|TexTerm",group_sorted$Group),]
candidates <- union(tfs[['TexTerm']], tfs[['TRM']])
tfs2 <- lapply(unique(group2$Group),
function(x) {
pr <- Data[candidates,rownames(group2)]
n <- which(group2[["Group"]]==x)
x <- unpairedtTest(pr,n,p=0.05,d=0.5)
rownames(x)
})
names(tfs2) <- unique(group2$Group)
common_tfs <- setdiff(candidates, unlist(tfs2))
## import rna-seq data
rna_seq <- read.csv('rna_expr_TFs.csv', row.names = 1)
TFs that most active in TEXterm
plot(bubble(tfs2[['TexTerm']],
log2(rna_seq+1)[tfs2[['TexTerm']],rownames(group2)],
Data_normed_3[tfs2[['TexTerm']],rownames(group2)]
))
TFs that most active in TRM
plot(bubble(tfs2[['TRM']],
log2(rna_seq+1)[tfs2[['TRM']],rownames(group2)],
Data_normed_3[tfs2[['TRM']],rownames(group2)]
))
TFs that active in TRM and TEXterm
plot(bubble(common_tfs,
log2(rna_seq+1)[common_tfs,rownames(group2)],
Data_normed_3[common_tfs,rownames(group2)]
))
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] factoextra_1.0.7 ggpubr_0.6.0 RColorBrewer_1.1-3 pheatmap_1.0.12
## [5] xlsx_0.6.5 dplyr_1.1.2 ggplot2_3.4.2 data.table_1.14.8
##
## loaded via a namespace (and not attached):
## [1] xlsxjars_0.6.1 sass_0.4.6 utf8_1.2.3 generics_0.1.3
## [5] tidyr_1.3.0 rstatix_0.7.2 digest_0.6.31 magrittr_2.0.3
## [9] evaluate_0.21 fastmap_1.1.1 jsonlite_1.8.7 ggrepel_0.9.3
## [13] backports_1.4.1 purrr_1.0.1 fansi_1.0.4 scales_1.2.1
## [17] jquerylib_0.1.4 abind_1.4-5 cli_3.6.1 rlang_1.1.1
## [21] munsell_0.5.0 withr_2.5.0 cachem_1.0.8 yaml_2.3.7
## [25] tools_4.3.1 ggsignif_0.6.4 colorspace_2.1-0 broom_1.0.5
## [29] vctrs_0.6.3 R6_2.5.1 lifecycle_1.0.3 car_3.1-2
## [33] pkgconfig_2.0.3 rJava_1.0-6 pillar_1.9.0 bslib_0.5.0
## [37] gtable_0.3.3 glue_1.6.2 Rcpp_1.0.10 highr_0.10
## [41] xfun_0.39 tibble_3.2.1 tidyselect_1.2.0 rstudioapi_0.14
## [45] knitr_1.43 farver_2.1.1 htmltools_0.5.5 labeling_0.4.2
## [49] rmarkdown_2.23 carData_3.0-5 compiler_4.3.1