Functions
source_from_github(repositoy = "DEG_functions",version = "0.2.45")
ℹ SHA-1 hash of file is 645c7d8e9571eb9caed4b7baf4b058f6a2c051cc
Data
genesets <- msigdb_download("Homo sapiens",category="H") %>% append( msigdb_download("Homo sapiens",category="C2",subcategory = "CP:KEGG"))
genesets[["HIF_targets"]] = hif_targets
from cnmf import cNMF
import pickle
f = open('./Data/cnmf/cnmf_objects/models_2Kvargenes_all_K_cnmf_obj.pckl', 'rb')
cnmf_obj = pickle.load(f)
f.close()
K selection plot
plot_path = paste0("/sci/labs/yotamd/lab_share/avishai.wizel/R_projects/EGFR/Data/cNMF/cNMF_models_Varnorm_Harmony_2Kvargenes_all_K/cNMF_models_Varnorm_Harmony_2Kvargenes_all_K.k_selection.png")
knitr::include_graphics(plot_path)

k = 5
density_threshold = 0.1
cnmf_obj.consensus(k=k, density_threshold=density_threshold,show_clustering=True)
usage_norm, gep_scores, _, _ = cnmf_obj.load_results(K=k, density_threshold=density_threshold)
usage_norm5_xeno = py$usage_norm
gep_scores5_xeno = py$gep_scores
gep_scores = gep_scores5_xeno
usage_norm = usage_norm5_xeno
NMF usage

Programs
GSEA

for (col in seq_along(gep_scores)) {
ranked_vec = gep_scores[,col] %>% setNames(rownames(gep_scores)) %>% sort(decreasing = TRUE)
hyp_obj <- hypeR_fgsea(ranked_vec, genesets)
print_tab(hyp_dots(hyp_obj,title = paste("program",col))+ aes(size=nes),title = paste0("gep",col))
}
gep1

gep2

gep3

Warning in fgsea::fgseaMultilevel(stats = signature, pathways =
gsets.obj$genesets, : There were 1 pathways for which P-values were not
calculated properly due to unbalanced (positive and negative) gene-level
statistic values. For such pathways pval, padj, NES, log2err are set to
NA. You can try to increase the value of the argument nPermSimple (for
example set it nPermSimple = 10000) ## gep4 {.unnumbered }

Warning in fgsea::fgseaMultilevel(stats = signature, pathways =
gsets.obj$genesets, : There were 2 pathways for which P-values were not
calculated properly due to unbalanced (positive and negative) gene-level
statistic values. For such pathways pval, padj, NES, log2err are set to
NA. You can try to increase the value of the argument nPermSimple (for
example set it nPermSimple = 10000) ## gep5 {.unnumbered }

NA
programs_main_pathways = list(gep1 = "HALLMARK_INTERFERON_ALPHA_RESPONSE", gep2 = c("HALLMARK_TNFA_SIGNALING_VIA_NFKB","KEGG_ASTHMA","KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_PATHWAY"),gep3 = c("HALLMARK_HYPOXIA","HIF_targets"),gep4 = "HALLMARK_E2F_TARGETS")
xeno = FindVariableFeatures(object = xeno,nfeatures = 2000)
xeno_vargenes = VariableFeatures(object = xeno)
xeno_expression = FetchData(object = xeno,vars = xeno_vargenes,slot='counts')
all_0_genes = colnames(xeno_expression)[colSums(xeno_expression==0, na.rm=TRUE)==nrow(xeno_expression)] #delete rows that have all 0
xeno_vargenes = xeno_vargenes[!xeno_vargenes %in% all_0_genes]
calculate score for
Xeno
import numpy as np
import scanpy as sc
xeno_expression = r.xeno_expression
xeno_vargenes = r.xeno_vargenes
tpm = compute_tpm(xeno_expression)
usage_by_calc = get_usage_from_score(counts=xeno_expression,tpm=tpm,genes=xeno_vargenes, cnmf_obj=cnmf_obj,k=5)
/sci/labs/yotamd/lab_share/avishai.wizel/python_envs/miniconda/envs/cnmf_env_6/bin/python3.7:7: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
/sci/labs/yotamd/lab_share/avishai.wizel/python_envs/miniconda/envs/cnmf_env_6/bin/python3.7:8: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
xeno_5_metagenes = py$usage_by_calc
all_metagenes = xeno_5_metagenes
colnames(all_metagenes) = c("IFNa","immune_response", "hypoxia","cell_cycle","unknown")
programs
expression
#add each metagene to metadata
for (i in 1:ncol(all_metagenes)) {
metagene_metadata = all_metagenes[,i,drop=F]
xeno = AddMetaData(object = xeno,metadata = metagene_metadata,col.name = names(all_metagenes)[i])
}
FeaturePlot(object = xeno,features = colnames(all_metagenes),ncol = 3)

NA
NA
Programs dotplot
DotPlot(object = xeno, features = colnames(all_metagenes),group.by = 'treatment',scale = T)+
guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score"))

#rename unknown to resistant program
xeno$resistant_program <- xeno$unknown
xeno$unknown <- NULL
all_metagenes = all_metagenes %>% dplyr::rename(resistant_program=unknown)
NMF
programs regulation
metagenes_mean_compare(dataset = xeno,time.point_var = "treatment",prefix = "model",patient.ident_var = "orig.ident",pre_on = c("NT","OSI"),test = "wilcox.test",programs = colnames(all_metagenes)[1:4],without_split = F)
IFNa per patient

immune_response per patient

hypoxia per patient

cell_cycle per patient

NA
LE
genes programs UMAP
le_genes
NULL
LE
genes programs regulation
metagenes_mean_compare(dataset = xeno,time.point_var = "treatment",prefix = "model",patient.ident_var = "orig.ident",pre_on = c("NT","OSI"),test = "wilcox.test",programs = programs_main_pathways %>% unlist() %>% paste0("_le"),without_split = F)
HALLMARK_INTERFERON_ALPHA_RESPONSE_le per
patient

HALLMARK_TNFA_SIGNALING_VIA_NFKB_le per
patient

KEGG_ASTHMA_le per patient

KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_PATHWAY_le per
patient

HALLMARK_HYPOXIA_le per patient

HIF_targets_le per patient

HALLMARK_E2F_TARGETS_le per patient

NA

Top program 2 genes
expression correlation
top_ot = gep_scores [order(gep_scores [,2],decreasing = T),2,drop = F]%>% head(200) %>% rownames()
num_of_clusters = 7
annotation = plot_genes_cor(dataset = xeno,num_of_clusters = num_of_clusters,geneIds = top_ot,height = 3)
program
2 all clusters expression
for (chosen_clusters in 1:num_of_clusters) {
chosen_genes = annotation %>% dplyr::filter(cluster == chosen_clusters) %>% rownames() #take relevant genes
# print(chosen_genes)
hyp_obj <- hypeR(chosen_genes, genesets, test = "hypergeometric", fdr=1, plotting=F,background = rownames(xeno_5_gep_scores))
scoresAndIndices <- getPathwayScores(xeno@assays$RNA@data, chosen_genes)
xeno=AddMetaData(xeno,scoresAndIndices$pathwayScores,paste0("cluster",chosen_clusters))
print_tab(plt =
hyp_dots(hyp_obj,size_by = "none",title = paste0("cluster",chosen_clusters))+
FeaturePlot(object = xeno,features = paste0("cluster",chosen_clusters)),
title = chosen_clusters)
}
Correlation of
clusters
for (chosen_clusters in 1:num_of_clusters) {
cor_res = cor(xeno$TNFa,xeno[[paste0("cluster",chosen_clusters)]])
print(paste("correlation of TNFa program to", paste0("cluster",chosen_clusters),":", cor_res))
}
clusters_idents = c("cluster1", "KEGG_OXIDATIVE_PHOSPHORYLATION","HALLMARK_TNFA_SIGNALING_VIA_NFKB","KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION","HALLMARK_P53_PATHWAY","cluster6","cluster7")
program 2 intersected
genes
programs_of_cluster = c()
for (chosen_clusters in 1:num_of_clusters) {
chosen_genes = annotation[["myannotation"]] %>% dplyr::filter(cluster == chosen_clusters) %>% rownames() #take relevant genes
pathway_name = clusters_idents[chosen_clusters]
if (!startsWith(x = pathway_name,prefix = "cluster")){
chosen_genes = (chosen_genes) %>% intersect(genesets[[pathway_name]])
pathway_name = paste0(pathway_name,"_cluster")
}
programs_of_cluster = c(programs_of_cluster,pathway_name)
print(pathway_name)
print(chosen_genes)
cat("\n")
scoresAndIndices <- getPathwayScores(xeno@assays$RNA@data, chosen_genes)
xeno=AddMetaData(xeno,scoresAndIndices$pathwayScores,pathway_name)
}
program
2 intersected pathway regulation
metagenes_mean_compare(dataset = xeno,time.point_var = "treatment",prefix = "model",patient.ident_var = "orig.ident",pre_on = c("NT","OSI"),test = "wilcox.test",programs = programs_of_cluster,without_split = F)
program 2 significant
plot
signf_plot_pre_vs_on<- function(dataset,programs,patient.ident_var,prefix,pre_on,test,time.point_var) {
final_df = data.frame()
for (metegene in programs) {
genes_by_tp = FetchData(object = dataset,vars = metegene) %>% rowSums() %>% as.data.frame() #mean expression
names(genes_by_tp)[1] = metegene
genes_by_tp = cbind(genes_by_tp,FetchData(object = dataset,vars = c(patient.ident_var,time.point_var))) # add id and time points
genes_by_tp_forPlot = genes_by_tp %>% mutate(!!ensym(patient.ident_var) := paste(prefix,genes_by_tp[,patient.ident_var])) #add "model" before each model/patient
fm <- as.formula(paste(metegene, "~", time.point_var)) #make formula to plot
#plot and split by patient:
stat.test = compare_means(formula = fm ,data = genes_by_tp_forPlot,method = test,group.by = patient.ident_var)%>%
dplyr::filter(group1 == pre_on[1] & group2 == pre_on[2]) #filter for pre vs on treatment only
final_df = rbind(final_df,stat.test)
}
return(final_df)
}
undebug(signf_plot_pre_vs_on)
final_df = signf_plot_pre_vs_on(dataset = xeno,time.point_var = "treatment",prefix = "model",patient.ident_var = "orig.ident",pre_on = c("NT","OSI"),test = "wilcox.test",programs = programs_of_cluster )
final_df = reshape2::dcast(final_df, orig.ident ~.y.,value.var = "p.adj") %>% column_to_rownames("orig.ident")
sig_heatmap(all_patients_result = final_df,title = "ad")
Top program 3 genes
expression correlation
top_hypoxia = gep_scores [order(gep_scores [,3],decreasing = T),2,drop = F]%>% head(200) %>% rownames()
num_of_clusters = 4
annotation = plot_genes_cor(dataset = xeno,hallmark_name = NULL,num_of_clusters = num_of_clusters,geneIds = top_hypoxia)
program
3 all clusters expression
for (chosen_clusters in 1:num_of_clusters) {
chosen_genes = annotation[["myannotation"]] %>% dplyr::filter(cluster == chosen_clusters) %>% rownames() #take relevant genes
# print(chosen_genes)
hyp_obj <- hypeR(chosen_genes, genesets_env, test = "hypergeometric", fdr=1, plotting=F,background = rownames(xeno_5_gep_scores))
scoresAndIndices <- getPathwayScores(xeno@assays$RNA@data, chosen_genes)
xeno=AddMetaData(xeno,scoresAndIndices$pathwayScores,paste0("cluster",chosen_clusters))
print_tab(plt =
hyp_dots(hyp_obj,size_by = "none",title = paste0("cluster",chosen_clusters))+
FeaturePlot(object = xeno,features = paste0("cluster",chosen_clusters)),
title = chosen_clusters)
}
Correlation of
clusters
for (chosen_clusters in 1:num_of_clusters) {
cor_res = cor(xeno$hypoxia,xeno[[paste0("cluster",chosen_clusters)]])
print(paste("correlation of hypoxia program to", paste0("cluster",chosen_clusters),":", cor_res))
}
clusters_idents = c("HALLMARK_HYPOXIA", "HIF_targets","cluster3","cluster4")
programs_of_cluster = c()
for (chosen_clusters in 1:num_of_clusters) {
chosen_genes = annotation[["myannotation"]] %>% dplyr::filter(cluster == chosen_clusters) %>% rownames() #take relevant genes
pathway_name = clusters_idents[chosen_clusters]
if (!startsWith(x = pathway_name,prefix = "cluster")){
chosen_genes = (chosen_genes) %>% intersect(genesets[[pathway_name]])
pathway_name = paste0(pathway_name,"_cluster")
}
programs_of_cluster = c(programs_of_cluster,pathway_name)
print(pathway_name)
print(chosen_genes)
cat("\n")
scoresAndIndices <- getPathwayScores(xeno@assays$RNA@data, chosen_genes)
xeno=AddMetaData(xeno,scoresAndIndices$pathwayScores,pathway_name)
}
program
3 intersected pathway regulation
metagenes_mean_compare(dataset = xeno,time.point_var = "treatment",prefix = "model",patient.ident_var = "orig.ident",pre_on = c("NT","OSI"),test = "wilcox.test",programs = programs_of_cluster,without_split = F)
Top program 3 genes
expression correlation
top_cc = gep_scores [order(gep_scores [,4],decreasing = T),2,drop = F]%>% head(200) %>% rownames()
num_of_clusters = 4
annotation = plot_genes_cor(dataset = xeno,hallmark_name = NULL,num_of_clusters = num_of_clusters,geneIds = top_cc)
program
3 all clusters expression
for (chosen_clusters in 1:num_of_clusters) {
chosen_genes = annotation[["myannotation"]] %>% dplyr::filter(cluster == chosen_clusters) %>% rownames() #take relevant genes
# print(chosen_genes)
hyp_obj <- hypeR(chosen_genes, genesets_env, test = "hypergeometric", fdr=1, plotting=F,background = rownames(xeno_5_gep_scores))
scoresAndIndices <- getPathwayScores(xeno@assays$RNA@data, chosen_genes)
xeno=AddMetaData(xeno,scoresAndIndices$pathwayScores,paste0("cluster",chosen_clusters))
print_tab(plt =
hyp_dots(hyp_obj,size_by = "none",title = paste0("cluster",chosen_clusters))+
FeaturePlot(object = xeno,features = paste0("cluster",chosen_clusters)),
title = chosen_clusters)
}
