1 import packages and functions

suppressMessages(library(pheatmap))
suppressMessages(library(dplyr))
## Warning: package 'dplyr' was built under R version 4.0.5
suppressMessages(library(RColorBrewer))

2 load data first

df_fc <- read.csv('log2FC_btw_TEX_and_TRM_mean_edge_weight_subset_TFs_v2_top500_genes.csv', row.names = 1)
df_fc = df_fc[apply(df_fc,MARGIN = 1, FUN = function(x) length(unique(x)) > 1),apply(df_fc,MARGIN = 2, FUN = function(x) length(unique(x)) > 1)] # filter out rows and columns with all zeros
knitr::kable(df_fc[1:6,1:6], caption = 'log2 fold change of edge weight betweeen TEXterm and TRM') %>% kableExtra::kable_styling(latex_options = 'scale_down') 
log2 fold change of edge weight betweeen TEXterm and TRM
Ahr Arid3a Arnt Arntl Atf1 Atf2
0610005C13RIK -17.11308 -17.9507140 0.0000000 -18.1611137 0.0000000 -14.573647
0610007P14RIK -17.02543 -19.4012452 -17.9343811 0.0000000 -18.5015384 1.880424
0610009B22RIK -16.59115 -18.1571970 19.6159249 19.5648057 1.0726713 4.799144
0610009E02RIK -16.68274 -18.3419988 -17.7398062 0.0000000 -18.9743612 -17.026008
0610009L18RIK -16.53229 -0.1199366 0.7064989 0.3416398 -0.7550526 4.642163
0610009O20RIK -16.72654 0.6963692 19.1582437 19.6540559 19.0787844 0.000000

3 Kmeans clustering

Following the script of PCA+Kmeans clustering, we selected k as 16

set.seed(123)
k = 16
kmeans_model <- kmeans(df_fc, centers = k)
## Warning: did not converge in 10 iterations

4 plot heatmap

p <- pheatmap::pheatmap(kmeans_model$centers,fontsize=5, cellheight=5, cellwidth=5,
              show_colnames = T, show_rownames = T, 
              border_color = NA,
              cluster_cols = T,
#               annotation_col = annotation_col, 
              annotation_colors = mycolors,
              color = colorRampPalette(rev(brewer.pal(n = 7, name = "BrBG")))(20))

5 extract Kmeans member

output = as.data.frame(kmeans_model$cluster) %>% tibble::rownames_to_column('gene')
names(output) = c('gene','kmeans_cluster')

# # save to file
# write.csv(output, 'kmeans_cluster_k_16_log2FC_btw_TEX_and_TRM_mean_edge_weight_top500_subset_TFs_v2_v2_ordered_by_group.csv') 

6 pathway analysis

This step takes around 20 min to run.

source("../../scripts/utils/enrich_all.R")
source("../../scripts/utils/utils.R")

suppressMessages(library(org.Mm.eg.db))
suppressMessages(library(clusterProfiler))
suppressMessages(library(enrichplot))

# lapply(unique(output$kmeans_cluster), function(x){
#   gene <- output %>% dplyr::filter(kmeans_cluster==x) %>% pull(gene) %>% firstup()
#   enrich_all(gene = gene, prefix=paste0('k',x),is.plot = F, pvalue = 0.1)
# })

7 selected pathway visualization

## select genesets for visualization
df2 <- read.csv('selected_cluster_enrich_pathways_p0.001.csv',row.names = 1, check.names = F)
annotation_col <- data.frame(clusters=c(7,11,16,3,5,8,9,12,15,2,10,13,1,14,4,6), group=c(rep('TEX',5), rep('TRM',3), rep('mix',8))) %>% tibble::column_to_rownames('clusters')
mycolors <- list(group = c(brewer.pal(n = 7, name = "BrBG")[1],brewer.pal(n = 7, name = "BrBG")[7],'#808080'))
names(mycolors$group) <- c('TEX','TRM','mix')
size <- 5
p1 <- pheatmap(df2, fontsize = size, show_rownames = T,
               angle_col = 90, show_colnames = T,
               cellwidth = 3*size, cellheight = size,
               cluster_rows = T, cluster_cols = T,
               clustering_distance_rows = 'correlation',
               clustering_method = 'average',
               treeheight_row = size, treeheight_col = size,
               border_color = NA,
               annotation_col = annotation_col,
               annotation_colors = mycolors,
               color = colorRampPalette(c("#1984c5", "#22a7f0", "#63bff0", "#a7d5ed", "#e2e2e2", "#e1a692", "#de6e56", "#e14b31", "#c23728"))(50)
)

8 session info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22631)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] enrichplot_1.8.1       clusterProfiler_3.16.1 org.Mm.eg.db_3.11.4   
##  [4] AnnotationDbi_1.50.3   IRanges_2.22.2         S4Vectors_0.26.1      
##  [7] Biobase_2.48.0         BiocGenerics_0.34.0    RColorBrewer_1.1-2    
## [10] dplyr_1.0.7            pheatmap_1.0.12       
## 
## loaded via a namespace (and not attached):
##  [1] bit64_4.0.5         progress_1.2.2      webshot_0.5.2      
##  [4] httr_1.4.2          tools_4.0.2         bslib_0.2.4        
##  [7] utf8_1.1.4          R6_2.5.0            DBI_1.1.1          
## [10] colorspace_2.0-0    prettyunits_1.1.1   tidyselect_1.1.1   
## [13] gridExtra_2.3       bit_4.0.4           compiler_4.0.2     
## [16] rvest_1.0.0         scatterpie_0.1.6    xml2_1.3.2         
## [19] triebeard_0.3.0     sass_0.3.1          scales_1.1.1       
## [22] ggridges_0.5.3      systemfonts_1.0.1   stringr_1.4.0      
## [25] digest_0.6.27       rmarkdown_2.8       DOSE_3.14.0        
## [28] svglite_2.0.0       pkgconfig_2.0.3     htmltools_0.5.1.1  
## [31] fastmap_1.1.0       highr_0.9           rlang_0.4.10       
## [34] rstudioapi_0.13     RSQLite_2.2.3       gridGraphics_0.5-1 
## [37] jquerylib_0.1.4     generics_0.1.0      farver_2.0.3       
## [40] jsonlite_1.7.2      BiocParallel_1.22.0 GOSemSim_2.14.2    
## [43] magrittr_2.0.1      ggplotify_0.0.6     kableExtra_1.3.4   
## [46] GO.db_3.11.4        Matrix_1.3-3        Rcpp_1.0.6         
## [49] munsell_0.5.0       fansi_0.4.2         viridis_0.6.0      
## [52] lifecycle_1.0.0     stringi_1.5.3       yaml_2.2.1         
## [55] ggraph_2.0.5        MASS_7.3-51.6       plyr_1.8.6         
## [58] qvalue_2.20.0       grid_4.0.2          blob_1.2.1         
## [61] ggrepel_0.9.1       DO.db_2.9           crayon_1.4.1       
## [64] lattice_0.20-41     cowplot_1.1.1       graphlayouts_0.7.1 
## [67] splines_4.0.2       hms_1.0.0           knitr_1.33         
## [70] pillar_1.6.0        fgsea_1.14.0        igraph_1.2.6       
## [73] reshape2_1.4.4      fastmatch_1.1-0     glue_1.4.2         
## [76] evaluate_0.14       downloader_0.4      BiocManager_1.30.13
## [79] data.table_1.14.0   urltools_1.7.3      vctrs_0.3.8        
## [82] tweenr_1.0.1        gtable_0.3.0        purrr_0.3.4        
## [85] polyclip_1.10-0     tidyr_1.1.3         assertthat_0.2.1   
## [88] cachem_1.0.4        ggplot2_3.3.3       xfun_0.21          
## [91] ggforce_0.3.2       europepmc_0.4       tidygraph_1.2.0    
## [94] viridisLite_0.4.0   tibble_3.1.2        rvcheck_0.1.8      
## [97] memoise_2.0.0       ellipsis_0.3.2