1 Functions

library(stringi)
library(reticulate)
source_from_github(repositoy = "DEG_functions",version = "0.2.24")
source_from_github(repositoy = "cNMF_functions",version = "0.3.91",script_name = "cnmf_function_Harmony.R")

2 Data

xeno = readRDS("./Data/10x_xeno_1000.Rds")
lung = readRDS("./Data/lung_cancercells_withTP_onlyPatients.rds")
lung_patients = lung$patient.ident %>% unique() %>% as.character()
lung_patients_filtered = lung_patients[!(lung_patients %in% c("X1055new","X1099"))] # remove patients with less than 100 malignant cells
lung = subset(x = lung,subset = patient.ident %in% lung_patients_filtered)

3 Models 2K vargenes

import pickle
from cnmf import cNMF
f = open('./Data/cnmf/cnmf_objects/models_2Kvargenes_cnmf_obj.pckl', 'rb')
cnmf_obj = pickle.load(f)
f.close()

4 K selection plot

plot_path = paste0("/sci/labs/yotamd/lab_share/avishai.wizel/R_projects/EGFR/Data/cnmf/cNMF_models_Varnorm_Harmony_2Kvargenes/cNMF_models_Varnorm_Harmony_2Kvargenes.k_selection.png")
knitr::include_graphics(plot_path)
selected_k = 5
density_threshold = 0.1
# cnmf_obj.consensus(k=selected_k, density_threshold=density_threshold)
usage_norm, gep_scores, gep_tpm, topgenes = cnmf_obj.load_results(K=selected_k, density_threshold=density_threshold)
xeno_5_gep_scores = py$gep_scores

genesets <- msigdb_download("Homo sapiens",category="H") %>% append( msigdb_download("Homo sapiens",category="C2",subcategory = "CP:KEGG"))
genesets[["HIF_targets"]] = hif_targets
genesets = gsets$new(genesets, name="my genesets", version="v1.0")

ranked_list = list()
  for (col in (xeno_5_gep_scores)) {
   lst = col %>% setNames(rownames(xeno_5_gep_scores)) %>% sort(decreasing = TRUE) 
   ranked_list  %<>% append(list(lst))
  }
  names(ranked_list) = paste0("gep",1:ncol(xeno_5_gep_scores))
  
  
  hyp_obj <- hypeR(ranked_list, genesets, test="kstest", fdr=0.05, plotting=F,background = rownames(xeno_5_gep_scores))
  print(
    hyp_dots(hyp_obj,size_by = "significance",abrv = 100,merge = F)
  )
$gep1

$gep2

$gep3

$gep4

$gep5

xeno = FindVariableFeatures(object = xeno,nfeatures = 2000)
xeno_vargenes = VariableFeatures(object = xeno)

xeno_expression = FetchData(object = xeno,vars = xeno_vargenes,slot='counts')
all_0_genes = colnames(xeno_expression)[colSums(xeno_expression==0, na.rm=TRUE)==nrow(xeno_expression)] #delete rows that have all 0
xeno_vargenes = xeno_vargenes[!xeno_vargenes %in% all_0_genes]

5 calculate score for Xeno

import numpy as np
import scanpy as sc
xeno_expression = r.xeno_expression
xeno_vargenes = r.xeno_vargenes
tpm =  compute_tpm(xeno_expression)
usage_by_calc = get_usage_from_score(counts=xeno_expression,tpm=tpm,genes=xeno_vargenes, cnmf_obj=cnmf_obj,k=5)
/sci/labs/yotamd/lab_share/avishai.wizel/python_envs/miniconda/envs/cnmf_env_6/bin/python3.7:7: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
/sci/labs/yotamd/lab_share/avishai.wizel/python_envs/miniconda/envs/cnmf_env_6/bin/python3.7:8: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
all_metagenes = py$usage_by_calc
colnames(all_metagenes) = c("IFNa","immune_response", "hypoxia","cell_cycle","unknown")

6 programs expression


#add each metagene to metadata
for (i  in 1:ncol(all_metagenes)) {
  metagene_metadata = all_metagenes[,i,drop=F]
  xeno = AddMetaData(object = xeno,metadata = metagene_metadata,col.name = names(all_metagenes)[i])
}

print_tab(plt = FeaturePlot(object = xeno,features = colnames(all_metagenes),ncol = 3),title = "umap expression")

umap expression

NA

DotPlot(object = xeno, features =  colnames(all_metagenes),group.by  = 'treatment')

metagenes_mean_compare(dataset = xeno,time.point_var = "treatment",prefix = "model",patient.ident_var = "orig.ident",pre_on = c("NT","OSI"),test = "wilcox.test",programs = colnames(all_metagenes)[1:3])
LS0tCnRpdGxlOiAnYHIgcnN0dWRpb2FwaTo6Z2V0U291cmNlRWRpdG9yQ29udGV4dCgpJHBhdGggJT4lIGJhc2VuYW1lKCkgJT4lIGdzdWIocGF0dGVybiA9ICJcXC5SbWQiLHJlcGxhY2VtZW50ID0gIiIpYCcgCmF1dGhvcjogIkF2aXNoYWkgV2l6ZWwiCmRhdGU6ICdgciBTeXMudGltZSgpYCcKb3V0cHV0OiAKICBodG1sX25vdGVib29rOiAKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgdG9jOiB5ZXMKICAgIHRvY19jb2xsYXBzZTogeWVzCiAgICB0b2NfZmxvYXQ6IAogICAgICBjb2xsYXBzZWQ6IEZBTFNFCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIHRvY19kZXB0aDogMQplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KCgoKIyBGdW5jdGlvbnMKCmBgYHtyIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkoc3RyaW5naSkKbGlicmFyeShyZXRpY3VsYXRlKQpzb3VyY2VfZnJvbV9naXRodWIocmVwb3NpdG95ID0gIkRFR19mdW5jdGlvbnMiLHZlcnNpb24gPSAiMC4yLjI0IikKc291cmNlX2Zyb21fZ2l0aHViKHJlcG9zaXRveSA9ICJjTk1GX2Z1bmN0aW9ucyIsdmVyc2lvbiA9ICIwLjMuOTEiLHNjcmlwdF9uYW1lID0gImNubWZfZnVuY3Rpb25fSGFybW9ueS5SIikKCmBgYAoKIyBEYXRhCgpgYGB7ciByZWFkX2RhdGF9Cnhlbm8gPSByZWFkUkRTKCIuL0RhdGEvMTB4X3hlbm9fMTAwMC5SZHMiKQpsdW5nID0gcmVhZFJEUygiLi9EYXRhL2x1bmdfY2FuY2VyY2VsbHNfd2l0aFRQX29ubHlQYXRpZW50cy5yZHMiKQpsdW5nX3BhdGllbnRzID0gbHVuZyRwYXRpZW50LmlkZW50ICU+JSB1bmlxdWUoKSAlPiUgYXMuY2hhcmFjdGVyKCkKbHVuZ19wYXRpZW50c19maWx0ZXJlZCA9IGx1bmdfcGF0aWVudHNbIShsdW5nX3BhdGllbnRzICVpbiUgYygiWDEwNTVuZXciLCJYMTA5OSIpKV0gIyByZW1vdmUgcGF0aWVudHMgd2l0aCBsZXNzIHRoYW4gMTAwIG1hbGlnbmFudCBjZWxscwpsdW5nID0gc3Vic2V0KHggPSBsdW5nLHN1YnNldCA9IHBhdGllbnQuaWRlbnQgJWluJSBsdW5nX3BhdGllbnRzX2ZpbHRlcmVkKQpgYGAKCiMgTW9kZWxzIDJLIHZhcmdlbmVzIAoKYGBge3B5dGhvbn0KaW1wb3J0IHBpY2tsZQpmcm9tIGNubWYgaW1wb3J0IGNOTUYKZiA9IG9wZW4oJy4vRGF0YS9jbm1mL2NubWZfb2JqZWN0cy9tb2RlbHNfMkt2YXJnZW5lc19jbm1mX29iai5wY2tsJywgJ3JiJykKY25tZl9vYmogPSBwaWNrbGUubG9hZChmKQpmLmNsb3NlKCkKYGBgCgojIEsgc2VsZWN0aW9uIHBsb3QKYGBge3IgZmlnLmhlaWdodD0yLCBmaWcud2lkdGg9Mn0KcGxvdF9wYXRoID0gcGFzdGUwKCIvc2NpL2xhYnMveW90YW1kL2xhYl9zaGFyZS9hdmlzaGFpLndpemVsL1JfcHJvamVjdHMvRUdGUi9EYXRhL2NubWYvY05NRl9tb2RlbHNfVmFybm9ybV9IYXJtb255XzJLdmFyZ2VuZXMvY05NRl9tb2RlbHNfVmFybm9ybV9IYXJtb255XzJLdmFyZ2VuZXMua19zZWxlY3Rpb24ucG5nIikKa25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MocGxvdF9wYXRoKQpgYGAKCmBgYHtweXRob259CnNlbGVjdGVkX2sgPSA1CmRlbnNpdHlfdGhyZXNob2xkID0gMC4xCiMgY25tZl9vYmouY29uc2Vuc3VzKGs9c2VsZWN0ZWRfaywgZGVuc2l0eV90aHJlc2hvbGQ9ZGVuc2l0eV90aHJlc2hvbGQpCnVzYWdlX25vcm0sIGdlcF9zY29yZXMsIGdlcF90cG0sIHRvcGdlbmVzID0gY25tZl9vYmoubG9hZF9yZXN1bHRzKEs9c2VsZWN0ZWRfaywgZGVuc2l0eV90aHJlc2hvbGQ9ZGVuc2l0eV90aHJlc2hvbGQpCmBgYAoKYGBge3J9Cnhlbm9fNV9nZXBfc2NvcmVzID0gcHkkZ2VwX3Njb3JlcwpgYGAKCmBgYHtyfQoKZ2VuZXNldHMgPC0gbXNpZ2RiX2Rvd25sb2FkKCJIb21vIHNhcGllbnMiLGNhdGVnb3J5PSJIIikgJT4lIGFwcGVuZCggbXNpZ2RiX2Rvd25sb2FkKCJIb21vIHNhcGllbnMiLGNhdGVnb3J5PSJDMiIsc3ViY2F0ZWdvcnkgPSAiQ1A6S0VHRyIpKQpnZW5lc2V0c1tbIkhJRl90YXJnZXRzIl1dID0gaGlmX3RhcmdldHMKZ2VuZXNldHMgPSBnc2V0cyRuZXcoZ2VuZXNldHMsIG5hbWU9Im15IGdlbmVzZXRzIiwgdmVyc2lvbj0idjEuMCIpCgpyYW5rZWRfbGlzdCA9IGxpc3QoKQogIGZvciAoY29sIGluICh4ZW5vXzVfZ2VwX3Njb3JlcykpIHsKICAgbHN0ID0gY29sICU+JSBzZXROYW1lcyhyb3duYW1lcyh4ZW5vXzVfZ2VwX3Njb3JlcykpICU+JSBzb3J0KGRlY3JlYXNpbmcgPSBUUlVFKSAKICAgcmFua2VkX2xpc3QgICU8PiUgYXBwZW5kKGxpc3QobHN0KSkKICB9CiAgbmFtZXMocmFua2VkX2xpc3QpID0gcGFzdGUwKCJnZXAiLDE6bmNvbCh4ZW5vXzVfZ2VwX3Njb3JlcykpCiAgCiAgCiAgaHlwX29iaiA8LSBoeXBlUihyYW5rZWRfbGlzdCwgZ2VuZXNldHMsIHRlc3Q9ImtzdGVzdCIsIGZkcj0wLjA1LCBwbG90dGluZz1GLGJhY2tncm91bmQgPSByb3duYW1lcyh4ZW5vXzVfZ2VwX3Njb3JlcykpCiAgcHJpbnQoCiAgICBoeXBfZG90cyhoeXBfb2JqLHNpemVfYnkgPSAic2lnbmlmaWNhbmNlIixhYnJ2ID0gMTAwLG1lcmdlID0gRikKICApCgoKYGBgCgpgYGB7cn0KeGVubyA9IEZpbmRWYXJpYWJsZUZlYXR1cmVzKG9iamVjdCA9IHhlbm8sbmZlYXR1cmVzID0gMjAwMCkKeGVub192YXJnZW5lcyA9IFZhcmlhYmxlRmVhdHVyZXMob2JqZWN0ID0geGVubykKCnhlbm9fZXhwcmVzc2lvbiA9IEZldGNoRGF0YShvYmplY3QgPSB4ZW5vLHZhcnMgPSB4ZW5vX3ZhcmdlbmVzLHNsb3Q9J2NvdW50cycpCmFsbF8wX2dlbmVzID0gY29sbmFtZXMoeGVub19leHByZXNzaW9uKVtjb2xTdW1zKHhlbm9fZXhwcmVzc2lvbj09MCwgbmEucm09VFJVRSk9PW5yb3coeGVub19leHByZXNzaW9uKV0gI2RlbGV0ZSByb3dzIHRoYXQgaGF2ZSBhbGwgMAp4ZW5vX3ZhcmdlbmVzID0geGVub192YXJnZW5lc1sheGVub192YXJnZW5lcyAlaW4lIGFsbF8wX2dlbmVzXQpgYGAKCiMgY2FsY3VsYXRlIHNjb3JlIGZvciBYZW5vCmBgYHtweXRob259CmltcG9ydCBudW1weSBhcyBucAppbXBvcnQgc2NhbnB5IGFzIHNjCnhlbm9fZXhwcmVzc2lvbiA9IHIueGVub19leHByZXNzaW9uCnhlbm9fdmFyZ2VuZXMgPSByLnhlbm9fdmFyZ2VuZXMKdHBtID0gIGNvbXB1dGVfdHBtKHhlbm9fZXhwcmVzc2lvbikKdXNhZ2VfYnlfY2FsYyA9IGdldF91c2FnZV9mcm9tX3Njb3JlKGNvdW50cz14ZW5vX2V4cHJlc3Npb24sdHBtPXRwbSxnZW5lcz14ZW5vX3ZhcmdlbmVzLCBjbm1mX29iaj1jbm1mX29iaixrPTUpCmBgYAoKCmBgYHtyfQphbGxfbWV0YWdlbmVzID0gcHkkdXNhZ2VfYnlfY2FsYwpjb2xuYW1lcyhhbGxfbWV0YWdlbmVzKSA9IGMoIklGTmEiLCJpbW11bmVfcmVzcG9uc2UiLCAiaHlwb3hpYSIsImNlbGxfY3ljbGUiLCJ1bmtub3duIikKYGBgCgoKIyBwcm9ncmFtcyBleHByZXNzaW9uIHsudGFic2V0fQpgYGB7ciBlY2hvPVRSVUUsIGZpZy5oZWlnaHQ9NywgZmlnLndpZHRoPTEyLCByZXN1bHRzPSdhc2lzJ30KCiNhZGQgZWFjaCBtZXRhZ2VuZSB0byBtZXRhZGF0YQpmb3IgKGkgIGluIDE6bmNvbChhbGxfbWV0YWdlbmVzKSkgewogIG1ldGFnZW5lX21ldGFkYXRhID0gYWxsX21ldGFnZW5lc1ssaSxkcm9wPUZdCiAgeGVubyA9IEFkZE1ldGFEYXRhKG9iamVjdCA9IHhlbm8sbWV0YWRhdGEgPSBtZXRhZ2VuZV9tZXRhZGF0YSxjb2wubmFtZSA9IG5hbWVzKGFsbF9tZXRhZ2VuZXMpW2ldKQp9CgpwcmludF90YWIocGx0ID0gRmVhdHVyZVBsb3Qob2JqZWN0ID0geGVubyxmZWF0dXJlcyA9IGNvbG5hbWVzKGFsbF9tZXRhZ2VuZXMpLG5jb2wgPSAzKSx0aXRsZSA9ICJ1bWFwIGV4cHJlc3Npb24iKQoKCmBgYAoKCmBgYHtyIGZpZy53aWR0aD04fQpEb3RQbG90KG9iamVjdCA9IHhlbm8sIGZlYXR1cmVzID0gIGNvbG5hbWVzKGFsbF9tZXRhZ2VuZXMpLGdyb3VwLmJ5ICA9ICd0cmVhdG1lbnQnKQpgYGAKCmBgYHtyIGVjaG89VFJVRSwgIHJlc3VsdHM9J2FzaXMnfQptZXRhZ2VuZXNfbWVhbl9jb21wYXJlKGRhdGFzZXQgPSB4ZW5vLHRpbWUucG9pbnRfdmFyID0gInRyZWF0bWVudCIscHJlZml4ID0gIm1vZGVsIixwYXRpZW50LmlkZW50X3ZhciA9ICJvcmlnLmlkZW50IixwcmVfb24gPSBjKCJOVCIsIk9TSSIpLHRlc3QgPSAid2lsY294LnRlc3QiLHByb2dyYW1zID0gY29sbmFtZXMoYWxsX21ldGFnZW5lcylbMTozXSkKCmBgYAo8c2NyaXB0IHNyYz0iaHR0cHM6Ly9oeXBvdGhlcy5pcy9lbWJlZC5qcyIgYXN5bmM+PC9zY3JpcHQ+Cgo=