1 Functions

library(GSEABase)
filter_pathways <- function(pathway,dataset) {
  genes_in_data = pathway@geneIds %in% rownames(dataset) %>% sum()
  pathway@setName = paste( pathway@setName,genes_in_data,sep = "_genes:") #if genes in data are less than threshold, mark them
  pathway
}

calculatePathwayScores <- function(pathwayToScore, countsMatrix)
{
  pathwayName <- pathwayToScore@setName
  print(pathwayName)
  pathwayGenes <- pathwayToScore@geneIds
  pathwayScoresObject <- getPathwayScores(countsMatrix, pathwayGenes) 
  suppressWarnings(
    if(is.na(pathwayScoresObject)){return (NA)}
  )
  return(pathwayScoresObject$pathwayScores)
}

sipsic_pipeline <- function(geneset, dataset,thresholds) {

  pathwayScoreLists <- lapply(X = geneset@.Data, calculatePathwayScores, dataset %>% as.matrix() %>% Matrix(sparse = T))
  pathwayScoreLists <- pathwayScoreLists[!is.na(pathwayScoreLists)] #remove NA pathways
  pathwayScoresMatrix <- as.data.frame(do.call("rbind", pathwayScoreLists))
  for(currPathwayIndex in 1:length(pathwayScoreLists)){
    rownames(pathwayScoresMatrix)[currPathwayIndex] <- geneset@.Data[[currPathwayIndex]]@setName
  }
  
  seurat_assay <- CreateAssayObject(counts = pathwayScoresMatrix)
  return(seurat_assay)
}

2 Data

GBMSeurat_cancer = readRDS("./Data/GSM3828673_10X_GBM_seurat_cancer.RDS")
genesets_h <- getGmt("./Data/msigdb_pathways/h.all.v7.0.symbols.pluscc.gmt")
genesets_cp <- getGmt("./Data/msigdb_pathways/c2.cp.v2023.2.Hs.symbols.gmt")
genesets_kegg <- getGmt("./Data/msigdb_pathways/c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt")
genesets_pid<- getGmt("./Data/msigdb_pathways/c2.cp.pid.v2023.2.Hs.symbols.gmt")

genesets_test = GeneSetCollection(genesets_h[10:13])
genesets_h_cp = GeneSetCollection(c(genesets_h,genesets_cp))
genesets_h_kegg = GeneSetCollection(c(genesets_h,genesets_kegg))
genesets_h_pid = GeneSetCollection(c(genesets_h,genesets_pid))
GBMSeurat_cancer@assays$RNA <- data.frame()
saveRDS(GBMSeurat_cancer,file = "./Data/GSM3828673_10X_GBM_seurat_cancer_msigdb_pathwyas.RDS")

3 original Louvain algorithm

print_tab(all_p_patient,title = "patients barplot")

patients barplot

print_tab(all_p_cellType,title = "cancer type barplot")

cancer type barplot

print_tab(all_p_umap,title = "UMAPs")

UMAPs

NA

4 Louvain algorithm with multilevel refinement

all_p

5 SLM algorithm

all_p

6 Leiden algorithm

all_p

---
title: '`r rstudioapi::getSourceEditorContext()$path %>% basename() %>% gsub(pattern = "\\.Rmd",replacement = "")`' 
author: "Avishai Wizel"
date: '`r Sys.time()`'
output: 
  html_notebook: 
    code_folding: hide
    toc: yes
    toc_collapse: yes
    toc_float: 
      collapsed: FALSE
    number_sections: true
    toc_depth: 1
---



# Functions

```{r warning=FALSE}
library(GSEABase)
filter_pathways <- function(pathway,dataset) {
  genes_in_data = pathway@geneIds %in% rownames(dataset) %>% sum()
  pathway@setName = paste( pathway@setName,genes_in_data,sep = "_genes:") #if genes in data are less than threshold, mark them
  pathway
}

calculatePathwayScores <- function(pathwayToScore, countsMatrix)
{
  pathwayName <- pathwayToScore@setName
  print(pathwayName)
  pathwayGenes <- pathwayToScore@geneIds
  pathwayScoresObject <- getPathwayScores(countsMatrix, pathwayGenes) 
  suppressWarnings(
    if(is.na(pathwayScoresObject)){return (NA)}
  )
  return(pathwayScoresObject$pathwayScores)
}

sipsic_pipeline <- function(geneset, dataset,thresholds) {

  pathwayScoreLists <- lapply(X = geneset@.Data, calculatePathwayScores, dataset %>% as.matrix() %>% Matrix(sparse = T))
  pathwayScoreLists <- pathwayScoreLists[!is.na(pathwayScoreLists)] #remove NA pathways
  pathwayScoresMatrix <- as.data.frame(do.call("rbind", pathwayScoreLists))
  for(currPathwayIndex in 1:length(pathwayScoreLists)){
    rownames(pathwayScoresMatrix)[currPathwayIndex] <- geneset@.Data[[currPathwayIndex]]@setName
  }
  
  seurat_assay <- CreateAssayObject(counts = pathwayScoresMatrix)
  return(seurat_assay)
}

```

# Data
```{r}
GBMSeurat_cancer = readRDS("./Data/GSM3828673_10X_GBM_seurat_cancer.RDS")
genesets_h <- getGmt("./Data/msigdb_pathways/h.all.v7.0.symbols.pluscc.gmt")
genesets_cp <- getGmt("./Data/msigdb_pathways/c2.cp.v2023.2.Hs.symbols.gmt")
genesets_kegg <- getGmt("./Data/msigdb_pathways/c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt")
genesets_pid<- getGmt("./Data/msigdb_pathways/c2.cp.pid.v2023.2.Hs.symbols.gmt")

genesets_test = GeneSetCollection(genesets_h[10:13])
genesets_h_cp = GeneSetCollection(c(genesets_h,genesets_cp))
genesets_h_kegg = GeneSetCollection(c(genesets_h,genesets_kegg))
genesets_h_pid = GeneSetCollection(c(genesets_h,genesets_pid))
```

```{r include=FALSE}
genesets_options = list(genesets_cp=genesets_cp,genesets_h = genesets_h, genesets_kegg = genesets_kegg, genesets_pid = genesets_pid)
filter_genes_options = c(5,10)

#sipsic for each geneset
for (i in seq_along(genesets_options)) { 
  geneset = genesets_options[[i]]
  geneset_name = names(genesets_options)[i]
  print(geneset_name)
  new_assay = sipsic_pipeline(geneset = geneset,dataset = GBMSeurat_cancer@assays$RNA@counts,threshold = genes_threshold)
  GBMSeurat_cancer[[geneset_name]] = new_assay
}

# Add hallmark to the other genesets
new_assay = rbind(GBMSeurat_cancer@assays$genesets_cp@counts,GBMSeurat_cancer@assays$genesets_h@counts) %>% CreateAssayObject
GBMSeurat_cancer[["genesets_h_cp"]] = new_assay

new_assay = rbind(GBMSeurat_cancer@assays$genesets_kegg@counts,GBMSeurat_cancer@assays$genesets_h@counts) %>% CreateAssayObject
GBMSeurat_cancer[["genesets_h_kegg"]] = new_assay

new_assay = rbind(GBMSeurat_cancer@assays$genesets_pid@counts,GBMSeurat_cancer@assays$genesets_h@counts) %>% CreateAssayObject
GBMSeurat_cancer[["genesets_h_pid"]] = new_assay


genesets_options_with_h = list(genesets_h_cp=genesets_h_cp,genesets_h = genesets_h, genesets_h_kegg = genesets_h_kegg, genesets_h_pid = genesets_h_pid)

# for each geneset, filter pathway that are below threshold
for (i in seq_along(genesets_options_with_h)) {
  geneset = genesets_options_with_h[[i]]
  geneset_name = names(genesets_options_with_h)[i]
  geneset = geneset[rownames(GBMSeurat_cancer[[geneset_name]]) %>% gsub(pattern = "-",replacement = "_")] #update geneset pathway to be like after sipsic filtering
  for (genes_threshold in filter_genes_options) {
    geneset_with_numGenes <- lapply(X = geneset@.Data, filter_pathways, dataset) %>% GeneSetCollection() # add num of existing genes
    num_of_genes = names(geneset_with_numGenes) %>% gsub(pattern = ".*_genes:",replacement = "") %>% as.numeric() # get vector of num of exist genes
    seurat_assay = GBMSeurat_cancer[[geneset_name]]@counts[num_of_genes>genes_threshold, ] %>% CreateAssayObject() #filter rows based on num_of_genes
    GBMSeurat_cancer[[paste(geneset_name,genes_threshold,sep = "_")]] = seurat_assay #add new assay
  }
}  

```

```{r}
GBMSeurat_cancer@assays$RNA <- data.frame()
saveRDS(GBMSeurat_cancer,file = "./Data/GSM3828673_10X_GBM_seurat_cancer_msigdb_pathwyas.RDS")
```

# original Louvain algorithm {.tabset}
```{r include=FALSE}
all_runs = names(genesets_options_with_h) %>% c(as.vector(outer(names(genesets_options_with_h), filter_genes_options, paste, sep="_"))  ) # vector of all combinations
patient_plt_list <- list() 
cellType_plt_list <- list() 
umap_plt_list <- list() 

runs_res = c(0.61,0.9,0.61,0.72,0.61,0.9,0.6598,0.7,0.6222,0.9,0.7,0.72)
names(runs_res) = all_runs
for (run in all_runs) {
  DefaultAssay(GBMSeurat_cancer) = run
  GBMSeurat_cancer  %<>%    RunPCA(features = rownames(GBMSeurat_cancer)) %>% FindNeighbors(dims = 1:15) %>% FindClusters(resolution = runs_res[[run]])

  clusters_and_scores = FetchData(object = GBMSeurat_cancer,vars= c("orig.ident","seurat_clusters")) %>%  group_by(seurat_clusters,orig.ident) %>%  
    summarise(n_cells = n(), .groups = "drop_last")%>% mutate(per =  100 *n_cells/sum(n_cells))
  
  colors = RColorBrewer::brewer.pal(9, "Paired")
  p =  ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(orig.ident))) +
    geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name  = "Patient")+ 
    labs(title = run)+
    ylab("% from cluster")
  
  patient_plt_list[[run]] <- p  # add each plot into plot list
  
  
  # cancer types stacked barplot
  
   clusters_and_scores = FetchData(object = GBMSeurat_cancer,vars= c("cancer_type","seurat_clusters")) %>%  group_by(seurat_clusters,cancer_type) %>%
    summarise(n_cells = n(), .groups = "drop_last")%>% mutate(per =  100 *n_cells/sum(n_cells))
  
  integration_score = FetchData(object = GBMSeurat_cancer,vars= c("cancer_type","seurat_clusters"))%>%
    mutate(cancer_type = cancer_type %>% gsub(pattern ="MesLike1|MesLike2",replacement = "MesLike"))%>%  #union subtypes
    mutate(cancer_type = cancer_type %>% gsub(pattern = "NPCLike1|NPCLike2",replacement = "NPCLike")) %>% #union subtypes
    group_by(seurat_clusters,cancer_type) %>% 
    summarise(n_cells = n(), .groups = "drop_last")%>% #count by cluster
    mutate(per =  100 *n_cells/sum(n_cells)) %>%   #calc percentages
    group_by(seurat_clusters) %>% #for each cluster
    filter(n_cells == max(n_cells)) %>% #get max cell type
    pull(per) %>% mean() %>% round(digits = 2)
  
  
  cell_types_levels <-c( "MesLike1", "MesLike2", "NPCLike1", "NPCLike2", "OPCLike","ACLike")
  colors = RColorBrewer::brewer.pal(6, "Paired"); colors[5] = "orange"
  p2 = ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(cancer_type, levels = cell_types_levels))) +
    geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name  = "Cancer type")+ 
    labs(title = run,subtitle = "integration score=" %s+% integration_score)+
    ylab("% from cluster")
  cellType_plt_list[[run]] <- p2  # add each plot into plot list
  GBMSeurat_cancer <- RunUMAP(GBMSeurat_cancer, dims = 1:15)
  p3 = DimPlot(GBMSeurat_cancer, reduction = "umap",group.by = c("orig.ident"))
    umap_plt_list[[run]] <- p3  # add each plot into plot list

}

all_p_patient = ggarrange(plotlist = patient_plt_list,common.legend = T)
all_p_cellType = ggarrange(plotlist = cellType_plt_list,common.legend = T)
all_p_umap = ggarrange(plotlist = umap_plt_list,common.legend = T)


```


```{r fig.height=10, fig.width=14,results='asis'}
print_tab(all_p_patient,title = "patients barplot")
print_tab(all_p_cellType,title = "cancer type barplot")

print_tab(all_p_umap,title = "UMAPs")

```

#  Louvain algorithm with multilevel refinement
```{r include=FALSE}
all_runs = names(genesets_options_with_h) %>% c(as.vector(outer(names(genesets_options_with_h), filter_genes_options, paste, sep="_"))  )
myplots <- list() 
for (run in all_runs) {
  DefaultAssay(GBMSeurat_cancer) = run
  GBMSeurat_cancer  %<>%  FindClusters(resolution = runs_res[[run]],algorithm =2)
  
  clusters_and_scores = FetchData(object = GBMSeurat_cancer,vars= c("orig.ident","seurat_clusters")) %>%  group_by(seurat_clusters,orig.ident) %>%  
    summarise(n_cells = n(), .groups = "drop_last")%>% mutate(per =  100 *n_cells/sum(n_cells))
  
  colors = RColorBrewer::brewer.pal(9, "Paired")
  p = ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(orig.ident))) +
    geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name  = "Patient")+ 
    labs(title = run)+
    ylab("% from cluster")
  myplots[[run]] <- p  # add each plot into plot list

}

all_p = ggarrange(plotlist = myplots,common.legend = T)
```


```{r fig.height=10, fig.width=14}
all_p
```

# SLM algorithm
```{r include=FALSE}
all_runs = names(genesets_options_with_h) %>% c(as.vector(outer(names(genesets_options_with_h), filter_genes_options, paste, sep="_"))  )
myplots <- list() 
for (run in all_runs) {
  DefaultAssay(GBMSeurat_cancer) = run
  GBMSeurat_cancer  %<>%  FindClusters(resolution = runs_res[[run]],algorithm = 3)
  
  clusters_and_scores = FetchData(object = GBMSeurat_cancer,vars= c("orig.ident","seurat_clusters")) %>%  group_by(seurat_clusters,orig.ident) %>%  
    summarise(n_cells = n(), .groups = "drop_last")%>% mutate(per =  100 *n_cells/sum(n_cells))
  
  colors = RColorBrewer::brewer.pal(9, "Paired")
  p = ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(orig.ident))) +
    geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name  = "Patient")+ 
    labs(title = run)+
    ylab("% from cluster")
  myplots[[run]] <- p  # add each plot into plot list

}

all_p = ggarrange(plotlist = myplots,common.legend = T)
```


```{r fig.height=10, fig.width=14}
all_p
```

# Leiden algorithm

```{r include=FALSE}
runs_res = runs_res-0.07
all_runs = names(genesets_options_with_h) %>% c(as.vector(outer(names(genesets_options_with_h), filter_genes_options, paste, sep="_"))  )
myplots <- list() 
for (run in all_runs) {
  DefaultAssay(GBMSeurat_cancer) = run
  GBMSeurat_cancer %<>% FindClusters(resolution = runs_res[[run]],algorithm = 4)
  
  clusters_and_scores = FetchData(object = GBMSeurat_cancer,vars= c("orig.ident","seurat_clusters")) %>%  group_by(seurat_clusters,orig.ident) %>%  
    summarise(n_cells = n(), .groups = "drop_last")%>% mutate(per =  100 *n_cells/sum(n_cells))
  
  colors = RColorBrewer::brewer.pal(9, "Paired")
  p = ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(orig.ident))) +
    geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name  = "Patient")+ 
    labs(title = run)+
    ylab("% from cluster")
  myplots[[run]] <- p  # add each plot into plot list

}

all_p = ggarrange(plotlist = myplots,common.legend = T)
```


```{r fig.height=10, fig.width=14}
all_p
```

<script src="https://hypothes.is/embed.js" async></script>

