lung = readRDS("./Data/lung_cancercells_withTP_onlyPatients.rds")
lung_patients = lung$patient.ident %>% unique() %>% as.character()
lung_patients_filtered = lung_patients[!(lung_patients %in% c("X1055new","X1099"))] # remove patients with less than 100 malignant cells
lung = subset(x = lung,subset = patient.ident %in% lung_patients_filtered)
suffix ="xeno_genes_0-5sigma_2-7theta_100iter_26_9"
from cnmf import cNMF
suffix = r.suffix
import pickle
f = open('./Data/cnmf/cnmf_objects/patients_' + suffix + '_cnmf_obj.pckl', 'rb')
cnmf_obj = pickle.load(f)
f.close()
library(stringi)
library(reticulate)
source_from_github(repositoy = "DEG_functions",version = "0.2.47")
source_from_github(repositoy = "cNMF_functions",version = "0.4.02",script_name = "cnmf_functions_V3.R")
source_from_github(repositoy = "sc_general_functions",version = "0.1.28",script_name = "functions.R")
genesets <- msigdb_download("Homo sapiens",category="H") %>% append( msigdb_download("Homo sapiens",category="C2",subcategory = "CP"))
genesets[["HIF_targets"]] = hif_targets
genesets_go <- msigdb_gsets("Homo sapiens", "C5", "GO:BP", clean=TRUE)
DimPlot(lung,group.by = "patient.ident",shuffle = T)+DimPlot(lung,group.by = "time.point",shuffle = T )
plot_path = paste0("/sci/labs/yotamd/lab_share/avishai.wizel/R_projects/EGFR/Data/cnmf/cNMF_patients_Varnorm_Harmony_",suffix,"/cNMF_patients_Varnorm_Harmony_",suffix,".k_selection.png")
knitr::include_graphics(plot_path)
cnmf_obj.consensus(k=3, density_threshold=0.1,show_clustering=True)
cnmf_obj.consensus(k=6, density_threshold=0.1,show_clustering=True)
cnmf_obj.consensus(k=7, density_threshold=0.1,show_clustering=True)
cnmf_obj.consensus(k=8, density_threshold=0.1,show_clustering=True)
density_threshold = 0.1
usage_norm3, gep_scores3, gep_tpm3, topgenes = cnmf_obj.load_results(K=3, density_threshold=density_threshold)
usage_norm6, gep_scores6, gep_tpm6, topgenes = cnmf_obj.load_results(K=6, density_threshold=density_threshold)
usage_norm7, gep_scores7, gep_tpm7, topgenes = cnmf_obj.load_results(K=7, density_threshold=density_threshold)
usage_norm8, gep_scores8, gep_tpm8, topgenes = cnmf_obj.load_results(K=8, density_threshold=density_threshold)
gep_scores3 = py$gep_scores3
gep_scores6 = py$gep_scores6
gep_scores7 = py$gep_scores7
gep_scores8 = py$gep_scores8
gep_tpm8 = py$gep_tpm8
top_genes = py$topgenes
for (col in seq_along(gep_scores8)) {
ranked_vec = gep_scores8[,col] %>% setNames(rownames(gep_scores8)) %>% sort(decreasing = TRUE)
hyp_obj <- hypeR_fgsea(ranked_vec, genesets,up_only = T)
print_tab(hyp_dots(hyp_obj,title = paste("program",col))+ aes(size=nes),title = paste0("gep",col))
}
Warning in fgsea::fgseaMultilevel(stats = signature, pathways = gsets.obj$genesets, : There were 1 pathways for which P-values were not calculated properly due to unbalanced (positive and negative) gene-level statistic values. For such pathways pval, padj, NES, log2err are set to NA. You can try to increase the value of the argument nPermSimple (for example set it nPermSimple = 10000) ## gep6 {.unnumbered }
NA
programs_main_pathways = list(gep1 = 1:2, gep2 = 1:3,gep3 = 1:4)
# get expression with genes in cnmf input
genes = rownames(gep_scores8)
lung_expression = t(as.matrix(GetAssayData(lung,slot='data')))
lung_expression = 2**lung_expression #convert from log2(tpm+1) to tpm
lung_expression = lung_expression-1
lung_expression = lung_expression[,genes] %>% as.data.frame()
all_0_genes = colnames(lung_expression)[colSums(lung_expression==0, na.rm=TRUE)==nrow(lung_expression)] #delete rows that have all 0
genes = genes[!genes %in% all_0_genes]
lung_expression = lung_expression[,!colnames(lung_expression) %in% all_0_genes]
gc(verbose = F)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 16160973 863.1 23764145 1269.2 23764145 1269.2 Vcells 835781914 6376.6 2506002394 19119.3 1899752628 14494.0
lung_expression = r.lung_expression
genes = r.genes
usage_by_calc = get_usage_from_score(counts=lung_expression,tpm=lung_expression,genes=genes,cnmf_obj=cnmf_obj,k=8,sumTo1=True)
/sci/labs/yotamd/lab_share/avishai.wizel/python_envs/miniconda/envs/cnmf_env_6/bin/python3.7:7: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
/sci/labs/yotamd/lab_share/avishai.wizel/python_envs/miniconda/envs/cnmf_env_6/bin/python3.7:8: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
usage_by_calc = py$usage_by_calc
colnames(usage_by_calc) = c("autoimmune","TNFa.NFkB", "hypoxia","unknown2", "cell_cycle1", "cell_cycle2","INFg","unknown")
# colnames(usage_by_calc) = paste0("gep",1:8)
#add each metagene to metadata
for (i in 1:ncol(usage_by_calc )) {
metagene_metadata = usage_by_calc [,i,drop=F]
lung = AddMetaData(object = lung,metadata = metagene_metadata,col.name = colnames(usage_by_calc)[i])
}
FeaturePlot(object = lung,features = colnames(usage_by_calc),ncol = 3)
metagenes_mean_compare(dataset = lung,time.point_var = "time.point",prefix = "patient",patient.ident_var = "patient.ident",pre_on = c("pre-treatment","on-treatment"),test = "wilcox.test",programs = colnames(usage_by_calc)[1:5],without_split = F)
NA
DotPlot.2(object = lung, features = colnames(usage_by_calc)[1:5],group.by = 'time.point',scale = T)+
guides(size = guide_legend(title = "Cells expressing (%)"),color = guide_colorbar(title = "Average Score"))
all_pathways = list()
for (pathway in colnames(usage_by_calc)) {
data = FetchData(object = lung,vars = c(pathway, "time.point", "patient.ident"))
all_patients = list()
for (patient in unique(lung$patient.ident)) {
mean1 = data %>% filter(patient.ident == patient, time.point == "pre-treatment") %>% pull(1) %>% mean()
mean2 = data %>% filter(patient.ident == patient, time.point == "on-treatment") %>% pull(1) %>% mean()
fc =(mean1+1) / (mean2+1)
all_patients[[patient]] = fc
}
all_pathways[[pathway]] = all_patients
}
mat = as.data.frame(lapply(all_pathways, unlist))
mat = log2(t(mat) %>% as.data.frame())
breaks <- c(seq(-1,1,by=0.05))
colors_for_plot <- colorRampPalette(colors = c("blue", "white", "red"))(n = length(breaks)-1); colors_for_plot[20:21] = "white"
pheatmap::pheatmap(mat,color = colors_for_plot,breaks = breaks,display_numbers = T,main = "log2(FC) pre/on")
ranked_vec = gep_scores8[,2] %>% setNames(rownames(gep_scores8)) %>% sort(decreasing = TRUE)
hyp_obj <- hypeR_fgsea(ranked_vec, genesets)
le_genes_tnfa = hyp_obj$data %>% filter(label == "HALLMARK_TNFA_SIGNALING_VIA_NFKB") %>% pull("le") %>% strsplit(",") %>% unlist()
ranked_vec = gep_scores8[,3] %>% setNames(rownames(gep_scores8)) %>% sort(decreasing = TRUE)
hyp_obj <- hypeR_fgsea(ranked_vec, genesets)
le_genes_hif = hyp_obj$data %>% filter(label == "HIF_targets") %>% pull("le") %>% strsplit(",") %>% unlist()
gene_list = list(autoimmune_genes = genesets$KEGG_AUTOIMMUNE_THYROID_DISEASE, TNFa_genes = genesets$HALLMARK_TNFA_SIGNALING_VIA_NFKB, hif_targets = hif_targets,E2F_genes = genesets$HALLMARK_E2F_TARGETS,gep2_top = gep_scores8 %>% arrange(desc(gep_scores8[2])) %>% rownames() %>% head(50),TNFa_le = le_genes_tnfa,hif_le = le_genes_hif,autoimmune_le = le_genes_autoimmune)
# lung = ScaleData(object = lung,features = unlist(gene_list))
for (i in seq_along(gene_list)) {
genes = gene_list[[i]]
name = names(gene_list)[i]
scores = FetchData(object = lung,vars = genes,slot = "data") %>% 2^. %>% magrittr::subtract(1) %>% rowMeans() #use TPM
# scores = FetchData(object = lung,vars = genes,slot = "data") %>% rowMeans() #use log TPM
# scores = expression[,genes] %>% rowMeans() #use TPM
lung %<>% AddMetaData(metadata = scores,col.name = name)
}
Warning in FetchData.Seurat(object = lung, vars = genes, slot = "data") :
The following requested variables were not found: HLA-DRB3, HLA-DRB4, IFNA10, IFNA14, IFNA16, IFNA17, IFNA4, IFNA6, IFNA7, IFNA8
Warning in FetchData.Seurat(object = lung, vars = genes, slot = "data") :
The following requested variables were not found: CCN1
Warning in FetchData.Seurat(object = lung, vars = genes, slot = "data") :
The following requested variables were not found: AC114803.1, BNIP3P1, OVOL1-AS1, BICDL2, LDHAP5, AL158201.1, NLRP3P1, GSX1, AL109946.1
Warning in FetchData.Seurat(object = lung, vars = genes, slot = "data") :
The following requested variables were not found: CNOT9, H2AX, H2AZ1, JPT1, MRE11
cor(lung$TNFa.NFkB, lung$gep2_top)
[1] 0.7171286
cor(lung$TNFa.NFkB, lung$TNFa_le)
[1] 0.3031179
cor(lung$TNFa.NFkB, lung$TNFa_genes)
[1] 0.1956391
cat ("\n")
cor(lung$hypoxia, lung$hif_targets)
[1] 0.5026576
cor(lung$hypoxia, lung$hif_le)
[1] 0.4716419
all_pathways = list()
for (pathway in names(gene_list)) {
data = FetchData(object = lung,vars = c(pathway, "time.point", "patient.ident"))
all_patients = list()
for (patient in unique(lung$patient.ident)) {
mean1 = data %>% filter(patient.ident == patient, time.point == "pre-treatment") %>% pull(1) %>% mean()
mean2 = data %>% filter(patient.ident == patient, time.point == "on-treatment") %>% pull(1) %>% mean()
fc =(mean1+1) / (mean2+1)
all_patients[[patient]] = fc
}
all_pathways[[pathway]] = all_patients
}
a = as.data.frame(lapply(all_pathways, unlist))
a = log2(t(a) %>% as.data.frame())
breaks <- c(seq(-1,1,by=0.05))
colors_for_plot <- colorRampPalette(colors = c("blue", "white", "red"))(n = length(breaks)-1); colors_for_plot[20:21] = "white"
pheatmap::pheatmap(a,color = colors_for_plot,breaks = breaks,display_numbers = T,main = "log2(FC) pre/on")