Data

xeno = readRDS("./Data/10x_xeno_1000.Rds")
by_expression = T #calculate metagenes by multiplie expression in genes coef, or by cnmf usage
suffix = "2Kvargenes"
suffix = r.suffix
import pickle
from cnmf import cNMF
f = open('./Data/cnmf/cnmf_objects/models_' + suffix + '_cnmf_obj.pckl', 'rb')
cnmf_obj = pickle.load(f)
f.close()

Functions

library(stringi)
library(reticulate)
source_from_github(repositoy = "DEG_functions",version = "0.2.24")
source_from_github(repositoy = "cNMF_functions",version = "0.3.63",script_name = "cnmf_function_Harmony.R")
selected_k = 3
suffix = paste(suffix,paste0(selected_k,"nmfK"),sep="_")
print(suffix)
[1] "2Kvargenes_3nmfK"
selected_k = int(r.selected_k)
density_threshold = 0.1
# cnmf_obj.consensus(k=selected_k, density_threshold=density_threshold)
usage_norm, gep_scores, gep_tpm, topgenes = cnmf_obj.load_results(K=selected_k, density_threshold=density_threshold)
gep_scores= py$gep_scores

hypoxia_genes = msigdbr(species = "Homo sapiens", category = "H") %>% filter(grepl('HALLMARK_HYPOXIA', gs_name)) %>% pull("gene_symbol") 
tnfa_genes = msigdbr(species = "Homo sapiens", category = "H") %>% filter(grepl('HALLMARK_TNFA_SIGNALING_VIA_NFKB', gs_name)) %>% pull("gene_symbol") 

cell_cycle_genes = msigdbr(species = "Homo sapiens", category = "H") %>% filter(grepl('HALLMARK_G2M_CHECKPOINT', gs_name)) %>% pull("gene_symbol") 

hallmark_genes = list(hypoxia_genes,tnfa_genes,cell_cycle_genes)
if (by_expression){
  message('By expression')
  
  no_neg <- function(x) {
  x = x + abs(min(x))
  x
  }
  gep_scores_norm= gep_scores
gep_scores_norm = apply(gep_scores_norm, MARGIN = 2, FUN = no_neg)%>% as.data.frame()
gep_scores_norm = sum2one(gep_scores_norm)

# gep_scores_norm = apply(gep_scores, MARGIN = 2, FUN = min_max_normalize)%>% as.data.frame()
# gep_scores_norm = sum2one(gep_scores_norm)
  all_metagenes = expression_mult(gep_scores = gep_scores_norm,dataset = xeno,top_genes = T,max_genes = F,z_score  = F,hallmark_genes = NULL)

}else{
    message('By usage matrix')

  all_metagenes= py$usage_norm}
By expression
gep_scores_norm = gep_scores
gep_scores_norm = apply(gep_scores_norm, MARGIN = 2, FUN = no_neg)%>% as.data.frame()
# 
# gep_scores_norm = apply(gep_scores, MARGIN = 2, FUN = min_max_normalize)%>% as.data.frame()
gep_scores_norm = sum2one(gep_scores_norm)
# gep_scores_norm = scale(gep_scores_norm)

all_metagenes = expression_inversion(gep_scores = gep_scores_norm %>% as.matrix(),dataset = xeno) %>% t() %>% as.data.frame()
# gep_scores_norm = apply(gep_scores, MARGIN = 2, FUN = no_neg)%>% as.data.frame()

all_metagenes = apply(all_metagenes, MARGIN = 2, FUN = min_max_normalize)%>% as.data.frame()
# all_metagenes = sum2one(all_metagenes)
#Make metagene names
for (i in 1:ncol(all_metagenes)) {
  colnames(all_metagenes)[i] = "metagene." %>% paste0(i)
}

#add each metagene to metadata
for (i in 1:ncol(all_metagenes)) {
  metage_metadata = all_metagenes %>% dplyr::select(i)
  xeno = AddMetaData(object = xeno,metadata = metage_metadata)
}

FeaturePlot(object = xeno,features = colnames(all_metagenes))

Enrichment analysis by top 200 genes of each program

hif_targets_set = data.frame(gs_name = "hif_targets",gene_symbol = hif_targets)
gep_scores = py$gep_scores
plt_list = list()
for (i in 1:ncol(gep_scores)) {
  top_genes = gep_scores  %>%  arrange(desc(gep_scores[i])) #sort by score a
  top = head(rownames(top_genes),200) #take top top_genes_num
  res = genes_vec_enrichment(genes = top,background = rownames(gep_scores),homer = T,title = 
                    i,silent = T,return_all = T,custom_pathways  = hif_targets_set)
   
  plt_list[[i]] = res$plt
}
gridExtra::grid.arrange(grobs = plt_list)

Enrichment analysis by selecting genes using “max” method

Enriched in time point

larger_by = 1.5
xeno = program_assignment(dataset = xeno,larger_by = larger_by,program_names = colnames(all_metagenes))

Percentages

p = cell_percentage(dataset = xeno,time.point_var = "treatment",by_program = T)
print_tab(plt = p,title = "by program")

by program

p = cell_percentage(dataset = xeno,time.point_var = "treatment",by_tp = T)
print_tab(plt = p,title = "by timepoint")

by timepoint

NA

xeno = SetIdent(object = xeno,value = "program.assignment")
xeno  = RenameIdents(object = xeno,"metagene.1" = "Hypoxia")
xeno  = RenameIdents(object = xeno,"metagene.2" = "TNFa")
xeno  = RenameIdents(object = xeno,"metagene.3" = "cell_cycle")

xeno$program.assignment = Idents(object = xeno)

UMAPS

# DimPlot(xeno,group.by = "program.assignment",cols = c("red","green","grey"))
print_tab(plt = 
            DimPlot(xeno,group.by = "program.assignment",cols = c(Hypoxia = "red",TNFa = "green",cell_cycle = "blue","NA" = "grey"))
          ,title = "orig.ident")

orig.ident

print_tab(plt = 
              DimPlot(xeno,group.by = "orig.ident")
          ,title = "program.assignment")

program.assignment

print_tab(plt = 
            DimPlot(xeno,group.by = "treatment")
          ,title = "treatment")

treatment

# DimPlot(xeno,group.by = "program.assignment",cols = c("red","darkgreen","blue","yellow","black","grey"))

Score regulation


i = 1
for (metegene in c("metagene.1","metagene.2")) {

  genes_by_tp = FetchData(object = xeno,vars = metegene) %>% rowSums() %>% as.data.frame() #mean expression
  names(genes_by_tp)[1] = "Metagene_mean"
  
  genes_by_tp = cbind(genes_by_tp,FetchData(object = xeno,vars = c("orig.ident","treatment"))) # add vars
  
  genes_by_tp_forPlot =  genes_by_tp %>% mutate(orig.ident = paste("model",orig.ident)) #add "model" before model num
  my_comparisons = list( c("NT", "OSI") )
  plt = ggplot(genes_by_tp_forPlot, aes(x=treatment, y=Metagene_mean)) +
    geom_boxplot() +
    scale_y_continuous(limits = c(0, max(genes_by_tp_forPlot[1])*1.1))+ #scale axis
    facet_wrap(~orig.ident, nrow = 2, strip.position = "top")+
    stat_compare_means(comparisons = my_comparisons,method = "wilcox.test") + # Add pairwise comparisons p-value
  ylab(paste(metegene,"mean"))

  
  print_tab(plt = plt,title = c(metegene,"per patient"))

  plt = ggplot(genes_by_tp_forPlot, aes(x=treatment, y=Metagene_mean)) +
    geom_boxplot() +
    scale_y_continuous(limits = c(0, max(genes_by_tp_forPlot[1])*1.1))+ #scale axis
    stat_compare_means(comparisons = my_comparisons,method = "wilcox.test") + # Add pairwise comparisons p-value
  ylab(paste(metegene,"mean"))
  
  print_tab(plt = plt,title = metegene)
    i = i+1
}

metagene.1 per patient

metagene.1

metagene.2 per patient

metagene.2

NA

---
title: "Xeno_cnmf_analysis"
author: "Avishai Wizel"
date: '`r Sys.time()`'
output: 
  html_notebook: 
    code_folding: hide
---
## Data
```{r}
xeno = readRDS("./Data/10x_xeno_1000.Rds")
```



```{r}
by_expression = T #calculate metagenes by multiplie expression in genes coef, or by cnmf usage
suffix = "2Kvargenes"
```

```{python}
suffix = r.suffix
import pickle
from cnmf import cNMF
f = open('./Data/cnmf/cnmf_objects/models_' + suffix + '_cnmf_obj.pckl', 'rb')
cnmf_obj = pickle.load(f)
f.close()
```

## Functions

```{r}
library(stringi)
library(reticulate)
source_from_github(repositoy = "DEG_functions",version = "0.2.24")
source_from_github(repositoy = "cNMF_functions",version = "0.3.63",script_name = "cnmf_function_Harmony.R")
```
<!-- ## K selection plot -->
<!-- ![Caption for the picture.](/sci/labs/yotamd/lab_share/avishai.wizel/R_projects/EGFR/Data/cnmf/cNMF_patients_Varnorm_Harmony_2Kvargenes/cNMF_patients_Varnorm_Harmony_2Kvargenes.k_selection.png) -->


```{r}
selected_k = 3
suffix = paste(suffix,paste0(selected_k,"nmfK"),sep="_")
print(suffix)
```


```{python}
selected_k = int(r.selected_k)
density_threshold = 0.1
# cnmf_obj.consensus(k=selected_k, density_threshold=density_threshold)
usage_norm, gep_scores, gep_tpm, topgenes = cnmf_obj.load_results(K=selected_k, density_threshold=density_threshold)
```

```{r}
gep_scores= py$gep_scores
```

```{r}

hypoxia_genes = msigdbr(species = "Homo sapiens", category = "H") %>% filter(grepl('HALLMARK_HYPOXIA', gs_name)) %>% pull("gene_symbol") 
tnfa_genes = msigdbr(species = "Homo sapiens", category = "H") %>% filter(grepl('HALLMARK_TNFA_SIGNALING_VIA_NFKB', gs_name)) %>% pull("gene_symbol") 

cell_cycle_genes = msigdbr(species = "Homo sapiens", category = "H") %>% filter(grepl('HALLMARK_G2M_CHECKPOINT', gs_name)) %>% pull("gene_symbol") 

hallmark_genes = list(hypoxia_genes,tnfa_genes,cell_cycle_genes)
if (by_expression){
  message('By expression')
  
  no_neg <- function(x) {
  x = x + abs(min(x))
  x
  }
  gep_scores_norm= gep_scores
gep_scores_norm = apply(gep_scores_norm, MARGIN = 2, FUN = no_neg)%>% as.data.frame()
gep_scores_norm = sum2one(gep_scores_norm)

# gep_scores_norm = apply(gep_scores, MARGIN = 2, FUN = min_max_normalize)%>% as.data.frame()
# gep_scores_norm = sum2one(gep_scores_norm)
  all_metagenes = expression_mult(gep_scores = gep_scores_norm,dataset = xeno,top_genes = T,max_genes = F,z_score  = F,hallmark_genes = NULL)

}else{
    message('By usage matrix')

  all_metagenes= py$usage_norm}
```
```{r}
gep_scores_norm = gep_scores
gep_scores_norm = apply(gep_scores_norm, MARGIN = 2, FUN = no_neg)%>% as.data.frame()
# 
# gep_scores_norm = apply(gep_scores, MARGIN = 2, FUN = min_max_normalize)%>% as.data.frame()
gep_scores_norm = sum2one(gep_scores_norm)
# gep_scores_norm = scale(gep_scores_norm)

all_metagenes = expression_inversion(gep_scores = gep_scores_norm %>% as.matrix(),dataset = xeno) %>% t() %>% as.data.frame()
# gep_scores_norm = apply(gep_scores, MARGIN = 2, FUN = no_neg)%>% as.data.frame()

all_metagenes = apply(all_metagenes, MARGIN = 2, FUN = min_max_normalize)%>% as.data.frame()
# all_metagenes = sum2one(all_metagenes)

```


```{r fig.height=10, fig.width=10}
#Make metagene names
for (i in 1:ncol(all_metagenes)) {
  colnames(all_metagenes)[i] = "metagene." %>% paste0(i)
}

#add each metagene to metadata
for (i in 1:ncol(all_metagenes)) {
  metage_metadata = all_metagenes %>% dplyr::select(i)
  xeno = AddMetaData(object = xeno,metadata = metage_metadata)
}

FeaturePlot(object = xeno,features = colnames(all_metagenes))

```




## Enrichment analysis by top 200 genes of each program

```{r fig.height=8, fig.width=8, results='hide'}
hif_targets_set = data.frame(gs_name = "hif_targets",gene_symbol = hif_targets)
gep_scores = py$gep_scores
plt_list = list()
for (i in 1:ncol(gep_scores)) {
  top_genes = gep_scores  %>%  arrange(desc(gep_scores[i])) #sort by score a
  top = head(rownames(top_genes),200) #take top top_genes_num
  res = genes_vec_enrichment(genes = top,background = rownames(gep_scores),homer = T,title = 
                    i,silent = T,return_all = T,custom_pathways  = hif_targets_set)
   
  plt_list[[i]] = res$plt
}
gridExtra::grid.arrange(grobs = plt_list)
```

## Enrichment analysis by selecting genes using "max" method
```{r fig.height=7, fig.width=7, results='hide'}
# gep_scores = py$gep_scores
plt_list = list()

library(NMF)
top_features = extractFeatures(object = gep_scores %>% data.matrix(),method ="max")
for (i in 1:length(top_features)) {
  top_features[[i]]= rownames(gep_scores)[top_features[[i]]]
}

for (i in 1:ncol(gep_scores)) {
top = top_features[i] %>% unlist()
  try({ 
 res = genes_vec_enrichment(genes = top,background = rownames(gep_scores),homer = T,title = 
                    i,silent = T,return_all = T)
      plt_list[[i]] = res$plt
    }, silent=TRUE)
}
plt_list = Filter(Negate(is.null), plt_list) #remove null plots


gridExtra::grid.arrange(grobs = plt_list)

```





## Enriched in time point
```{r}
larger_by = 1.5
xeno = program_assignment(dataset = xeno,larger_by = larger_by,program_names = colnames(all_metagenes))
```   

# Percentages {.tabset}


```{r echo=TRUE, results='asis'}
p = cell_percentage(dataset = xeno,time.point_var = "treatment",by_program = T)
print_tab(plt = p,title = "by program")
p = cell_percentage(dataset = xeno,time.point_var = "treatment",by_tp = T)
print_tab(plt = p,title = "by timepoint")


```


```{r}
xeno = SetIdent(object = xeno,value = "program.assignment")
xeno  = RenameIdents(object = xeno,"metagene.1" = "Hypoxia")
xeno  = RenameIdents(object = xeno,"metagene.2" = "TNFa")
xeno  = RenameIdents(object = xeno,"metagene.3" = "cell_cycle")

xeno$program.assignment = Idents(object = xeno)
```

# UMAPS {.tabset}
```{r echo=TRUE, results='asis'}
# DimPlot(xeno,group.by = "program.assignment",cols = c("red","green","grey"))
print_tab(plt = 
            DimPlot(xeno,group.by = "program.assignment",cols = c(Hypoxia = "red",TNFa = "green",cell_cycle = "blue","NA" = "grey"))
          ,title = "orig.ident")
print_tab(plt = 
              DimPlot(xeno,group.by = "orig.ident")
          ,title = "program.assignment")
print_tab(plt = 
            DimPlot(xeno,group.by = "treatment")
          ,title = "treatment")

# DimPlot(xeno,group.by = "program.assignment",cols = c("red","darkgreen","blue","yellow","black","grey"))
```

# Score regulation {.tabset}
```{r echo=TRUE, results='asis'}

i = 1
for (metegene in c("metagene.1","metagene.2")) {

  genes_by_tp = FetchData(object = xeno,vars = metegene) %>% rowSums() %>% as.data.frame() #mean expression
  names(genes_by_tp)[1] = "Metagene_mean"
  
  genes_by_tp = cbind(genes_by_tp,FetchData(object = xeno,vars = c("orig.ident","treatment"))) # add vars
  
  genes_by_tp_forPlot =  genes_by_tp %>% mutate(orig.ident = paste("model",orig.ident)) #add "model" before model num
  my_comparisons = list( c("NT", "OSI") )
  plt = ggplot(genes_by_tp_forPlot, aes(x=treatment, y=Metagene_mean)) +
    geom_boxplot() +
    scale_y_continuous(limits = c(0, max(genes_by_tp_forPlot[1])*1.1))+ #scale axis
    facet_wrap(~orig.ident, nrow = 2, strip.position = "top")+
    stat_compare_means(comparisons = my_comparisons,method = "wilcox.test") + # Add pairwise comparisons p-value
  ylab(paste(metegene,"mean"))

  
  print_tab(plt = plt,title = c(metegene,"per patient"))

  plt = ggplot(genes_by_tp_forPlot, aes(x=treatment, y=Metagene_mean)) +
    geom_boxplot() +
    scale_y_continuous(limits = c(0, max(genes_by_tp_forPlot[1])*1.1))+ #scale axis
    stat_compare_means(comparisons = my_comparisons,method = "wilcox.test") + # Add pairwise comparisons p-value
  ylab(paste(metegene,"mean"))
  
  print_tab(plt = plt,title = metegene)
    i = i+1
}

```
