suppressMessages(library(pheatmap))
suppressMessages(library(dplyr))
## Warning: package 'dplyr' was built under R version 4.0.5
suppressMessages(library(RColorBrewer))
df_fc <- read.csv('log2FC_btw_TEX_and_TRM_mean_edge_weight_subset_TFs_v2_top500_genes.csv', row.names = 1)
df_fc = df_fc[apply(df_fc,MARGIN = 1, FUN = function(x) length(unique(x)) > 1),apply(df_fc,MARGIN = 2, FUN = function(x) length(unique(x)) > 1)] # filter out rows and columns with all zeros
knitr::kable(df_fc[1:6,1:6], caption = 'log2 fold change of edge weight betweeen TEXterm and TRM') %>% kableExtra::kable_styling(latex_options = 'scale_down')
| Ahr | Arid3a | Arnt | Arntl | Atf1 | Atf2 | |
|---|---|---|---|---|---|---|
| 0610005C13RIK | -17.11308 | -17.9507140 | 0.0000000 | -18.1611137 | 0.0000000 | -14.573647 |
| 0610007P14RIK | -17.02543 | -19.4012452 | -17.9343811 | 0.0000000 | -18.5015384 | 1.880424 |
| 0610009B22RIK | -16.59115 | -18.1571970 | 19.6159249 | 19.5648057 | 1.0726713 | 4.799144 |
| 0610009E02RIK | -16.68274 | -18.3419988 | -17.7398062 | 0.0000000 | -18.9743612 | -17.026008 |
| 0610009L18RIK | -16.53229 | -0.1199366 | 0.7064989 | 0.3416398 | -0.7550526 | 4.642163 |
| 0610009O20RIK | -16.72654 | 0.6963692 | 19.1582437 | 19.6540559 | 19.0787844 | 0.000000 |
Following the script of PCA+Kmeans clustering, we selected k as 16
set.seed(123)
k = 16
kmeans_model <- kmeans(df_fc, centers = k)
## Warning: did not converge in 10 iterations
p <- pheatmap::pheatmap(kmeans_model$centers,fontsize=5, cellheight=5, cellwidth=5,
show_colnames = T, show_rownames = T,
border_color = NA,
cluster_cols = T,
# annotation_col = annotation_col,
annotation_colors = mycolors,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "BrBG")))(20))
output = as.data.frame(kmeans_model$cluster) %>% tibble::rownames_to_column('gene')
names(output) = c('gene','kmeans_cluster')
# # save to file
# write.csv(output, 'kmeans_cluster_k_16_log2FC_btw_TEX_and_TRM_mean_edge_weight_top500_subset_TFs_v2_v2_ordered_by_group.csv')
This step takes around 20 min to run.
source("../../scripts/utils/enrich_all.R")
source("../../scripts/utils/utils.R")
suppressMessages(library(org.Mm.eg.db))
suppressMessages(library(clusterProfiler))
suppressMessages(library(enrichplot))
# lapply(unique(output$kmeans_cluster), function(x){
# gene <- output %>% dplyr::filter(kmeans_cluster==x) %>% pull(gene) %>% firstup()
# enrich_all(gene = gene, prefix=paste0('k',x),is.plot = F, pvalue = 0.1)
# })
## select genesets for visualization
df2 <- read.csv('selected_cluster_enrich_pathways_p0.001.csv',row.names = 1, check.names = F)
annotation_col <- data.frame(clusters=c(7,11,16,3,5,8,9,12,15,2,10,13,1,14,4,6), group=c(rep('TEX',5), rep('TRM',3), rep('mix',8))) %>% tibble::column_to_rownames('clusters')
mycolors <- list(group = c(brewer.pal(n = 7, name = "BrBG")[1],brewer.pal(n = 7, name = "BrBG")[7],'#808080'))
names(mycolors$group) <- c('TEX','TRM','mix')
size <- 5
p1 <- pheatmap(df2, fontsize = size, show_rownames = T,
angle_col = 90, show_colnames = T,
cellwidth = 3*size, cellheight = size,
cluster_rows = T, cluster_cols = T,
clustering_distance_rows = 'correlation',
clustering_method = 'average',
treeheight_row = size, treeheight_col = size,
border_color = NA,
annotation_col = annotation_col,
annotation_colors = mycolors,
color = colorRampPalette(c("#1984c5", "#22a7f0", "#63bff0", "#a7d5ed", "#e2e2e2", "#e1a692", "#de6e56", "#e14b31", "#c23728"))(50)
)
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22631)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] enrichplot_1.8.1 clusterProfiler_3.16.1 org.Mm.eg.db_3.11.4
## [4] AnnotationDbi_1.50.3 IRanges_2.22.2 S4Vectors_0.26.1
## [7] Biobase_2.48.0 BiocGenerics_0.34.0 RColorBrewer_1.1-2
## [10] dplyr_1.0.7 pheatmap_1.0.12
##
## loaded via a namespace (and not attached):
## [1] bit64_4.0.5 progress_1.2.2 webshot_0.5.2
## [4] httr_1.4.2 tools_4.0.2 bslib_0.2.4
## [7] utf8_1.1.4 R6_2.5.0 DBI_1.1.1
## [10] colorspace_2.0-0 prettyunits_1.1.1 tidyselect_1.1.1
## [13] gridExtra_2.3 bit_4.0.4 compiler_4.0.2
## [16] rvest_1.0.0 scatterpie_0.1.6 xml2_1.3.2
## [19] triebeard_0.3.0 sass_0.3.1 scales_1.1.1
## [22] ggridges_0.5.3 systemfonts_1.0.1 stringr_1.4.0
## [25] digest_0.6.27 rmarkdown_2.8 DOSE_3.14.0
## [28] svglite_2.0.0 pkgconfig_2.0.3 htmltools_0.5.1.1
## [31] fastmap_1.1.0 highr_0.9 rlang_0.4.10
## [34] rstudioapi_0.13 RSQLite_2.2.3 gridGraphics_0.5-1
## [37] jquerylib_0.1.4 generics_0.1.0 farver_2.0.3
## [40] jsonlite_1.7.2 BiocParallel_1.22.0 GOSemSim_2.14.2
## [43] magrittr_2.0.1 ggplotify_0.0.6 kableExtra_1.3.4
## [46] GO.db_3.11.4 Matrix_1.3-3 Rcpp_1.0.6
## [49] munsell_0.5.0 fansi_0.4.2 viridis_0.6.0
## [52] lifecycle_1.0.0 stringi_1.5.3 yaml_2.2.1
## [55] ggraph_2.0.5 MASS_7.3-51.6 plyr_1.8.6
## [58] qvalue_2.20.0 grid_4.0.2 blob_1.2.1
## [61] ggrepel_0.9.1 DO.db_2.9 crayon_1.4.1
## [64] lattice_0.20-41 cowplot_1.1.1 graphlayouts_0.7.1
## [67] splines_4.0.2 hms_1.0.0 knitr_1.33
## [70] pillar_1.6.0 fgsea_1.14.0 igraph_1.2.6
## [73] reshape2_1.4.4 fastmatch_1.1-0 glue_1.4.2
## [76] evaluate_0.14 downloader_0.4 BiocManager_1.30.13
## [79] data.table_1.14.0 urltools_1.7.3 vctrs_0.3.8
## [82] tweenr_1.0.1 gtable_0.3.0 purrr_0.3.4
## [85] polyclip_1.10-0 tidyr_1.1.3 assertthat_0.2.1
## [88] cachem_1.0.4 ggplot2_3.3.3 xfun_0.21
## [91] ggforce_0.3.2 europepmc_0.4 tidygraph_1.2.0
## [94] viridisLite_0.4.0 tibble_3.1.2 rvcheck_0.1.8
## [97] memoise_2.0.0 ellipsis_0.3.2