1 import packages and functions

suppressMessages(library(data.table))
suppressMessages(library(ggplot2))
suppressMessages(library(dplyr))
suppressMessages(library(xlsx))
suppressMessages(library(pheatmap))
suppressMessages(library(RColorBrewer))


set.seed(42)
fl.sources <- list.files("../../scripts/utils/", full.names = T)
tmp <- sapply(fl.sources,source)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

2 set up plot parameters

## annotation file
group_sorted <- read.csv('group_file_for_heatmap.csv',header = T, row.names = 1)
mycolors <- list(subsets = c("#A6A6A6", "#FF0000", "#0007F4", "#FFA75C", "#00AAFE", 
                             "#009051", "#FEB5B5", "#E6CFFF", "#8AAA75"))
names(mycolors$subsets) <- unique(group_sorted$Group)
breaksList = seq(-2, 2, by = 0.1)

# function
get_heatmap <- function(df,size=3,...){
  g2 <- group_sorted[colnames(df),]
  annotation_col = data.frame(subsets = g2$Group) 
  rownames(annotation_col) = colnames(df)
  p1 <- pheatmap(df, 
                 fontsize = size, angle_col = 90, 
                 cellwidth = 2*size, cellheight = size,
                 cluster_cols = F, 
                 annotation_col = annotation_col,
                 clustering_distance_cols = 'correlation', 
                 clustering_distance_rows = 'correlation', 
                 clustering_method = 'average',
                 # annotation_row = annotation_row,
                 breaks = breaksList,
                 color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)),
                 annotation_names_row = F, annotation_names_col = F,
                 annotation_colors = mycolors, border_color = NA, ...)
  return(p1)
}

3 import pagerank scores

Data <- read.csv('pr_original.csv', row.names = 1)#### default scale
#### zscore
Data_normed <- zscore(Data)

#### scale to [0,1]
Data_normed_2 <- scaleData(Data)

#### scale to [-2,2]
tmp <- apply(Data_normed,1:2, function(x) min(x, 2))
Data_normed_3 <- as.data.frame(apply(tmp,1:2, function(x) max(x, -2)))

4 identify non-specific TFs

Non-specific TFs are those TFs with mean PageRank ranked as top10% across 9 cell states and coefficients of variation (CV) less than 0.5

tmp <- mostConserveTFs(highlyRankedTFs(Data,0.1),0.5) 
housekeeping <- rownames(tmp)
writeLines(housekeeping, 'housekeeping_top01_CV05.txt')

4.1 heatmap of non-specific TFs

get_heatmap(Data_normed_3[housekeeping,], cluster_rows=F, size = 5)

5 identify cell state-specific TFs

First we divided the samples into two groups: target group and background group. Target group included all the samples belonging to the cell state of interest and the background group comprised the remaining samples. Unpaired t-test was used to calculate the P-value. A P-value cutoff of 0.05 and log2 fold change cutoff of 0.5 were used for calling cell state-specific TFs.

# load meta file
group_sorted <- read.table('group_file_v2.txt',header = T, row.names = 1)
group_sorted$oldName <- rownames(group_sorted)
rownames(group_sorted) <- group_sorted$newName

n <- which(group_sorted[["Group"]]=="TexTerm")
# identification
tfs <- lapply(unique(group_sorted$Group),
            function(x) {
             pr <- Data[!(row.names(Data) %in% housekeeping),]
             n <- which(group_sorted[["Group"]]==x)
             x <- unpairedtTest(pr,n,p=0.05,d=0.5)
             rownames(x)
            }) 
names(tfs) <- unique(group_sorted$Group)

5.1 heatmap of cell state-specific TFs

From top to bottom, they’re Naive, MP, TE, TCM, TEM, TRM, TexProg, TexInt, and TexTerm.

ps <- lapply(names(tfs), function(x) {
  annotation_row <- data.frame("subsets"=rep(x,length(tfs[[x]])))
  rownames(annotation_row) <- tfs[[x]]
  get_heatmap(df=Data_normed_3[rownames(annotation_row),], cluster_rows=F, annotation_row=annotation_row, size = 5) 
})

6 identify multi-taskers and single-taskers

Multi-taskers are TFs appearing in multiple cell states. Single-taskers are TFs only identified in single cell state.

getRowAnnotation <- function(TFs){
  tmp <- unlist(TFs)
  redundants <- unname(tmp[duplicated(tmp)])
  uniques <- unname(tmp[!duplicated(tmp)])
  tmp2 <- tibble("subsets" = "MultiTasker","TFs"=unique(redundants))
  annotation_row <- tibble("subsets" = unlist(lapply(1:length(TFs), function(x) {rep(names(TFs[x]), length(TFs[[x]]))})),
                           "TFs" = unlist(TFs)) %>% 
    filter(TFs %in% setdiff(uniques, redundants)) %>%
    bind_rows(tmp2) %>%
    tibble::column_to_rownames("TFs")
  return(annotation_row)
  
}
annotation_row <- getRowAnnotation(tfs)

breaksList = seq(-2, 2, by = 0.1)
annotation_row %>% filter(subsets=="MultiTasker") %>% rownames() %>% writeLines('MultiTasker_TFs.txt')
annotation_row %>% filter(subsets!="MultiTasker") %>% rownames() %>% writeLines('SingleTasker_TFs.txt')

6.1 heatmap of multi-taskers and single-taskers

Heatmap for multi-taskers:

df_multi <- Data_normed_3[readLines('MultiTasker_TFs.txt'),]
p1 <- get_heatmap(df_multi, size=5)

Heatmap for single-taskers:

df_sgl <- Data_normed_3[readLines('SingleTasker_TFs.txt'),]
p1 <- get_heatmap(df_sgl, size=5)

7 identification of TRM-specific and TEXterm-specific TFs

As described in Extended Data Fig.3a, we first curated a candiate list including TFs in TexTerm and TRM-TFs identified in cell-state-specific step. Then we performed unpaired t-test. The target group is TexTerm/TRM while the background is TRM/TexTerm.

# manually select background samples
group2 <- group_sorted[grepl("TRM|TexTerm",group_sorted$Group),]
candidates <- union(tfs[['TexTerm']], tfs[['TRM']])
tfs2 <- lapply(unique(group2$Group),
            function(x) {
             pr <- Data[candidates,rownames(group2)]
             n <- which(group2[["Group"]]==x)
             x <- unpairedtTest(pr,n,p=0.05,d=0.5)
             rownames(x)
            })
names(tfs2) <- unique(group2$Group)
common_tfs <- setdiff(candidates, unlist(tfs2))

7.1 bubble visualization for TRM and TEXterm TFs

## import rna-seq data
rna_seq <- read.csv('rna_expr_TFs.csv', row.names = 1)

TFs that most active in TEXterm

plot(bubble(tfs2[['TexTerm']],
            log2(rna_seq+1)[tfs2[['TexTerm']],rownames(group2)],
            Data_normed_3[tfs2[['TexTerm']],rownames(group2)]
))

TFs that most active in TRM

plot(bubble(tfs2[['TRM']],
            log2(rna_seq+1)[tfs2[['TRM']],rownames(group2)],
            Data_normed_3[tfs2[['TRM']],rownames(group2)]
))

TFs that active in TRM and TEXterm

plot(bubble(common_tfs,
            log2(rna_seq+1)[common_tfs,rownames(group2)],
            Data_normed_3[common_tfs,rownames(group2)]
))

8 session info

sessionInfo()

## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] factoextra_1.0.7   ggpubr_0.6.0       RColorBrewer_1.1-3 pheatmap_1.0.12   
## [5] xlsx_0.6.5         dplyr_1.1.2        ggplot2_3.4.2      data.table_1.14.8 
## 
## loaded via a namespace (and not attached):
##  [1] xlsxjars_0.6.1   sass_0.4.6       utf8_1.2.3       generics_0.1.3  
##  [5] tidyr_1.3.0      rstatix_0.7.2    digest_0.6.31    magrittr_2.0.3  
##  [9] evaluate_0.21    fastmap_1.1.1    jsonlite_1.8.7   ggrepel_0.9.3   
## [13] backports_1.4.1  purrr_1.0.1      fansi_1.0.4      scales_1.2.1    
## [17] jquerylib_0.1.4  abind_1.4-5      cli_3.6.1        rlang_1.1.1     
## [21] munsell_0.5.0    withr_2.5.0      cachem_1.0.8     yaml_2.3.7      
## [25] tools_4.3.1      ggsignif_0.6.4   colorspace_2.1-0 broom_1.0.5     
## [29] vctrs_0.6.3      R6_2.5.1         lifecycle_1.0.3  car_3.1-2       
## [33] pkgconfig_2.0.3  rJava_1.0-6      pillar_1.9.0     bslib_0.5.0     
## [37] gtable_0.3.3     glue_1.6.2       Rcpp_1.0.10      highr_0.10      
## [41] xfun_0.39        tibble_3.2.1     tidyselect_1.2.0 rstudioapi_0.14 
## [45] knitr_1.43       farver_2.1.1     htmltools_0.5.5  labeling_0.4.2  
## [49] rmarkdown_2.23   carData_3.0-5    compiler_4.3.1

visualization of TF activity

Cong

2024-09-07