1 import packages and functions

suppressMessages(library(data.table))
suppressMessages(library(ggplot2))
suppressMessages(library(dplyr))
# suppressMessages(library(xlsx))
suppressMessages(library(pheatmap))
suppressMessages(library(RColorBrewer))


set.seed(42)
fl.sources <- list.files("../../scripts/utils/", full.names = T)
tmp <- sapply(fl.sources,source)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

2 set up plot parameters

## annotation file
group_sorted <- read.csv('group_file_for_heatmap.csv',header = T, row.names = 1)
mycolors <- list(subsets = c("#A6A6A6", "#FF0000", "#0007F4", "#FFA75C", "#00AAFE", 
                             "#009051", "#FEB5B5", "#E6CFFF", "#8AAA75"))
names(mycolors$subsets) <- unique(group_sorted$Group)
breaksList = seq(-2, 2, by = 0.1)

# function
get_heatmap <- function(df,size=3,...){
  g2 <- group_sorted[colnames(df),]
  annotation_col = data.frame(subsets = g2$Group) 
  rownames(annotation_col) = colnames(df)
  p1 <- pheatmap(df, 
                 fontsize = size, angle_col = 90, 
                 cellwidth = 2*size, cellheight = size,
                 cluster_cols = F, 
                 annotation_col = annotation_col,
                 clustering_distance_cols = 'correlation', 
                 clustering_distance_rows = 'correlation', 
                 clustering_method = 'average',
                 # annotation_row = annotation_row,
                 breaks = breaksList,
                 color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)),
                 annotation_names_row = F, annotation_names_col = F,
                 annotation_colors = mycolors, border_color = NA, ...)
  return(p1)
}

3 import pagerank scores

Data <- read.csv('pr_original.csv', row.names = 1)#### default scale
#### zscore
Data_normed <- zscore(Data)

#### scale to [0,1]
Data_normed_2 <- scaleData(Data)

#### scale to [-2,2]
tmp <- apply(Data_normed,1:2, function(x) min(x, 2))
Data_normed_3 <- as.data.frame(apply(tmp,1:2, function(x) max(x, -2)))

4 identify non-specific TFs

Non-specific TFs are those TFs with mean PageRank ranked as top10% across 9 cell states and coefficients of variation (CV) less than 0.5

tmp <- mostConserveTFs(highlyRankedTFs(Data,0.1),0.5) 
housekeeping <- rownames(tmp)
writeLines(housekeeping, 'housekeeping_top01_CV05.txt')

4.1 heatmap of non-specific TFs

p <- get_heatmap(Data_normed_3[housekeeping,], cluster_rows=F, size = 5)

# write to file
pdf(paste0('hp_',length(housekeeping),'_non_specific_TFs.pdf'))
print(p)
dev.off()

## quartz_off_screen 
##                 2

5 identify cell state-specific TFs

First we divided the samples into two groups: target group and background group. Target group included all the samples belonging to the cell state of interest and the background group comprised the remaining samples. Unpaired t-test was used to calculate the P-value. A P-value cutoff of 0.05 and log2 fold change cutoff of 0.5 were used for calling cell state-specific TFs. Then we kept those TFs with non-zero expression values in more than 20% of the cells in the target group.

# load meta file
group_sorted <- read.table('group_file_v2.txt',header = T, row.names = 1)
group_sorted$oldName <- rownames(group_sorted)
rownames(group_sorted) <- group_sorted$newName

## import rna-seq data
rna_seq <- read.csv('rna_expr_TFs.csv', row.names = 1)

n <- which(group_sorted[["Group"]]=="TexTerm")
# identification
tfs <- lapply(unique(group_sorted$Group),
            function(x) {
             pr <- Data[!(row.names(Data) %in% housekeeping),]
             n <- which(group_sorted[["Group"]]==x)
             x <- unpairedtTest(pr,n,p=0.05,d=0.5)
             t <- rownames(x)
             t <- t[unlist(lapply(t, function(x) sum(rna_seq[x, n]>0) >= ceiling(0.2*length(n))))]
             
            }) 
names(tfs) <- unique(group_sorted$Group)

5.1 heatmap of cell state-specific TFs

From top to bottom, they’re Naive, MP, TE, TCM, TEM, TRM, TexProg, TexInt, and TexTerm.

ps <- lapply(names(tfs), function(x) {
  annotation_row <- data.frame("subsets"=rep(x,length(tfs[[x]])))
  rownames(annotation_row) <- tfs[[x]]
  p <-get_heatmap(df=Data_normed_3[rownames(annotation_row),], cluster_rows=F, annotation_row=annotation_row, size = 5) 
  pdf(paste0('hp_',nrow(annotation_row),'_',x,'_TFs.pdf'))
  print(p)
  dev.off()
})

6 identify multi-states specific TFs and single-state specific TFs

Multi-state specific TFs are identified in multiple cell states. Single-state specific TFs are only identified in single cell state.

getRowAnnotation <- function(TFs){
  tmp <- unlist(TFs)
  redundants <- unname(tmp[duplicated(tmp)])
  uniques <- unname(tmp[!duplicated(tmp)])
  tmp2 <- tibble("subsets" = "multi","TFs"=unique(redundants))
  annotation_row <- tibble("subsets" = unlist(lapply(1:length(TFs), function(x) {rep(names(TFs[x]), length(TFs[[x]]))})),
                           "TFs" = unlist(TFs)) %>% 
    filter(TFs %in% setdiff(uniques, redundants)) %>%
    bind_rows(tmp2) %>%
    tibble::column_to_rownames("TFs")
  return(annotation_row)
  
}
annotation_row <- getRowAnnotation(tfs)

breaksList = seq(-2, 2, by = 0.1)
annotation_row %>% filter(subsets=="multi") %>% rownames() %>% writeLines('multi_TFs.txt')
annotation_row %>% filter(subsets!="multi") %>% rownames() %>% writeLines('single_TFs.txt')

6.1 heatmap of multi-state specific TFs

df_multi <- Data_normed_3[readLines('multi_TFs.txt'),]
p1 <- get_heatmap(df_multi, size=5)

## write to file
pdf(paste0('hp_',nrow(df_multi),'_multi_TFs.pdf'), width = 8, height=12)
print(p1)
dev.off()

## quartz_off_screen 
##                 2

6.2 heatmap of single-state specific TFs

df_sgl <- Data_normed_3[readLines('single_TFs.txt'),]

# add gap between cell state groups
annotation_row_single <- annotation_row |> filter(subsets!='multi')
last_element_per_group <- aggregate(rownames(annotation_row_single), by=list(annotation_row_single$subsets), FUN=tail, n=1) |> pull(x)
gaps <- sort(unlist(lapply(last_element_per_group, function(x) which(x==rownames(df_sgl)))))

p1 <- get_heatmap(df_sgl, size=5, gaps_row = gaps, cluster_rows=F, annotation_row = annotation_row_single)

## write to file
pdf(paste0('hp_',nrow(df_sgl),'_single_TFs.pdf'), width = 8, height=12)
print(p1)
dev.off()

## quartz_off_screen 
##                 2

7 identification of TRM-specific and TEXterm-specific TFs

As described in Extended Data Fig.3a, we first curated a candiate list including TFs in TexTerm and TRM-TFs identified in cell-state-specific step. Then we performed unpaired t-test. The target group is TexTerm/TRM while the background is TRM/TexTerm.

# manually select background samples
group2 <- group_sorted[grepl("TRM|TexTerm",group_sorted$Group),]
candidates <- union(tfs[['TexTerm']], tfs[['TRM']])
multi_state_tfs <- intersect(tfs[['TexTerm']], tfs[['TRM']])
tfs2 <- lapply(unique(group2$Group),
            function(x) {
             pr <- Data[candidates,rownames(group2)]
             n <- which(group2[["Group"]]==x)
             x <- unpairedtTest(pr,n,p=0.05,d=0.5)
             rownames(x)
            })
names(tfs2) <- unique(group2$Group)

common_tfs <- union(setdiff(candidates, unlist(tfs2)),multi_state_tfs)
tex_tfs <- setdiff(tfs2[['TexTerm']], common_tfs)
trm_tfs <- setdiff(tfs2[['TRM']], common_tfs)

7.1 bubble visualization for TRM and TEXterm TFs

TFs that most active in TEXterm

# reorganize TFs by the p-value significance
t <- rev(intersect(candidates, tex_tfs))

p1 <- bubble(t,log2(rna_seq+1)[t,rownames(group2)],Data_normed_3[t,rownames(group2)], text.size = 8)
plot(p1)

TFs that most active in TRM

# reorganize TFs by the p-value significance
t <- rev(intersect(candidates, trm_tfs))

p2 <- bubble(t,log2(rna_seq+1)[t,rownames(group2)],Data_normed_3[t,rownames(group2)], text.size = 8)
plot(p2)

TFs that active in TRM and TEXterm

t <- rev(intersect(candidates, common_tfs))

p3 <- bubble(t,log2(rna_seq+1)[t,rownames(group2)],Data_normed_3[t,rownames(group2)], text.size = 8)
plot(p3)

## save to file
pdf(paste0('bb_',length(tex_tfs),'_TEX_TFs.pdf'), width = 5, height=8)
plot(p1)
dev.off()

## quartz_off_screen 
##                 2

pdf(paste0('bb_',length(trm_tfs),'_TRM_TFs.pdf'), width = 4.5, height=6)
plot(p2)
dev.off()

## quartz_off_screen 
##                 2

pdf(paste0('bb_',length(common_tfs),'_common_TFs.pdf'), width = 4.5, height=8)
plot(p3)
dev.off()

## quartz_off_screen 
##                 2

8 session info

sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] factoextra_1.0.7   ggpubr_0.6.0       RColorBrewer_1.1-3 pheatmap_1.0.12   
## [5] dplyr_1.1.4        ggplot2_3.5.1      data.table_1.16.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.5      jsonlite_1.8.8    highr_0.11        compiler_4.4.1   
##  [5] ggsignif_0.6.4    Rcpp_1.0.12       tidyselect_1.2.1  tidyr_1.3.1      
##  [9] jquerylib_0.1.4   scales_1.3.0      yaml_2.3.8        fastmap_1.2.0    
## [13] R6_2.5.1          labeling_0.4.3    generics_0.1.3    knitr_1.47       
## [17] backports_1.5.0   ggrepel_0.9.6     tibble_3.2.1      car_3.1-2        
## [21] munsell_0.5.1     bslib_0.7.0       pillar_1.9.0      rlang_1.1.4      
## [25] utf8_1.2.4        cachem_1.1.0      broom_1.0.6       xfun_0.45        
## [29] sass_0.4.9        cli_3.6.3         withr_3.0.0       magrittr_2.0.3   
## [33] digest_0.6.36     rstudioapi_0.16.0 lifecycle_1.0.4   vctrs_0.6.5      
## [37] rstatix_0.7.2     evaluate_0.24.0   glue_1.7.0        farver_2.1.2     
## [41] abind_1.4-8       carData_3.0-5     fansi_1.0.6       colorspace_2.1-0 
## [45] rmarkdown_2.27    purrr_1.0.2       tools_4.4.1       pkgconfig_2.0.3  
## [49] htmltools_0.5.8.1

visualization of TF activity

Cong

2024-09-07