Data
library(GSEABase)
GBMSeurat_cancer = readRDS("./Data/GSM3828673_10X_GBM_seurat_cancer.RDS")
metaModuleGenes <- read.table(file = "./Data/MetaModuleGenes_CellPaper.tsv", header = TRUE)
# Creating genesets for the meta module markers
MesLike2GeneSet <- GeneSet(metaModuleGenes[,1])
MesLike1GeneSet <- GeneSet(metaModuleGenes[,2])
ACLikeGeneSet <- GeneSet(metaModuleGenes[!is.na(metaModuleGenes[,3]), 3])
OPCLikeGeneSet <- GeneSet(metaModuleGenes[,4])
NPCLike1GeneSet <- GeneSet(metaModuleGenes[,5])
NPCLike2GeneSet <- GeneSet(metaModuleGenes[,6])
MesLike1GeneSet@setName <- "MesLike1"
MesLike2GeneSet@setName <- "MesLike2"
ACLikeGeneSet@setName <- "ACLike"
OPCLikeGeneSet@setName <- "OPCLike"
NPCLike1GeneSet@setName <- "NPCLike1"
NPCLike2GeneSet@setName <- "NPCLike2"
# split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(GBMSeurat_cancer, split.by = "orig.ident")
# normalize and identify variable features for each dataset independently
ifnb.list <- lapply(X = ifnb.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = ifnb.list)
immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features)
# this command creates an 'integrated' data assay
GBM.combined <- IntegrateData(anchorset = immune.anchors)
GBM.combined <- ScaleData(GBM.combined, verbose = FALSE)
GBM.combined <- RunPCA(GBM.combined, npcs = 30, verbose = FALSE)
ElbowPlot(GBM.combined,ndims = 30)
GBM.combined <- RunUMAP(GBM.combined, reduction = "pca", dims = 1:15)
GBM.combined <- FindNeighbors(GBM.combined, reduction = "pca", dims = 1:15)
GBM.combined <- FindClusters(GBM.combined, resolution = 0.8)
UMAP
after integration
print_tab(DimPlot(GBM.combined, reduction = "umap", group.by = "orig.ident"),title = "by patient")
by patient

print_tab(DimPlot(GBM.combined, reduction = "umap",label = T),title = "by cluster")
by cluster

NA
cancer_markers = list(
MesLike1 = MesLike1GeneSet@geneIds,
MesLike2 = MesLike2GeneSet@geneIds,
ACLike = ACLikeGeneSet@geneIds,
OPCLike = OPCLikeGeneSet@geneIds,
NPCLike1 = NPCLike1GeneSet@geneIds,
NPCLike2 = NPCLike2GeneSet@geneIds
)
GBMSeurat_cancer <- AddModuleScore(
object = GBMSeurat_cancer,
features = cancer_markers,ctrl = 50,name = "cancer_markers")
Warning: The following features are not present in the object: ERO1L, not searching for symbol synonyms
Warning: The following features are not present in the object: PPAP2B, not searching for symbol synonyms
Warning: The following features are not present in the object: SOX2-OT, LPPR1, not searching for symbol synonyms
Warning: The following features are not present in the object: CD24, GPR56, not searching for symbol synonyms
Warning: The following features are not present in the object: CD24, HMP19, LOC150568, not searching for symbol synonyms
GBM.combined[["MesLike1"]] = GBMSeurat_cancer$cancer_markers1
GBM.combined[["MesLike2"]] = GBMSeurat_cancer$cancer_markers2
GBM.combined[["ACLike"]] = GBMSeurat_cancer$cancer_markers3
GBM.combined[["OPCLike"]] = GBMSeurat_cancer$cancer_markers4
GBM.combined[["NPCLike1"]] = GBMSeurat_cancer$cancer_markers5
GBM.combined[["NPCLike2"]] = GBMSeurat_cancer$cancer_markers6
cancer_scores = FetchData(object = GBM.combined,vars = c("MesLike1","MesLike2","ACLike","OPCLike","NPCLike1","NPCLike2"))
cancer_scores$assignment<- colnames(cancer_scores)[max.col(cancer_scores, ties.method = "first")]
GBM.combined %<>% AddMetaData(metadata = cancer_scores$assignment,col.name = "cancer_type")
DimPlot(GBM.combined,group.by = "cancer_type",label = T)

DimPlot(GBM.combined,group.by = "seurat_clusters",label = T)

Feature
plots
# Testing for clusters enriched in markers of the different malignant meta-modules
print_tab(FeaturePlot(GBM.combined, features = "MesLike1"),title = "MESLike1Scores")
MESLike1Scores

print_tab(FeaturePlot(GBM.combined, features = "MesLike2"),title = "MESLike2Scores")
MESLike2Scores

print_tab(FeaturePlot(GBM.combined, features = "ACLike"),title = "ACLikeScores")
ACLikeScores

print_tab(FeaturePlot(GBM.combined, features = "OPCLike"),title = "OPCLikeScores")
OPCLikeScores

print_tab(FeaturePlot(GBM.combined, features = "NPCLike1"),title = "NPCLike1Scores")
NPCLike1Scores

print_tab(FeaturePlot(GBM.combined, features = "NPCLike2"),title = "NPCLike2Scores")
NPCLike2Scores

NA
pdf(file = "./Figures/Seurat/UMAP_orig.ident.pdf", onefile = FALSE)
DimPlot(GBM.combined, reduction = "umap", group.by = "orig.ident")
dev.off()
pdf(file = "./Figures/Seurat/UMAP_clustering.pdf", onefile = FALSE)
DimPlot(GBM.combined, reduction = "umap")
dev.off()
pdf(file = "./Figures/Seurat/macrophageScore.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "macrophageScore"))
dev.off()
pdf(file = "./Figures/Seurat/TCellScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "TCellScores"))
dev.off()
pdf(file = "./Figures/Seurat/oligodendrocyteScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "oligodendrocyteScores"))
dev.off()
pdf(file = "./Figures/Seurat/oligodendrocyteScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "oligodendrocyteScores"))
dev.off()
pdf(file = "./Figures/Seurat/MESLike1Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "MESLike1Scores"))
dev.off()
pdf(file = "./Figures/Seurat/MESLike2Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "MESLike2Scores"))
dev.off()
pdf(file = "./Figures/Seurat/ACLikeScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "ACLikeScores"))
dev.off()
pdf(file = "./Figures/Seurat/OPCLikeScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "OPCLikeScores"))
dev.off()
pdf(file = "./Figures/Seurat/NPCLike1Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "NPCLike1Scores"))
dev.off()
pdf(file = "./Figures/Seurat/NPCLike2Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "NPCLike2Scores"))
dev.off()
Combine cancer
subtypes
GBM.combined$cancer_type = GBM.combined$cancer_type %>% gsub(pattern = "MesLike1|MesLike2",replacement = "MesLike")%>% gsub(pattern = "NPCLike1|NPCLike2",replacement = "NPCLike")
clusters_and_scores = FetchData(object = GBM.combined,vars= c("cancer_type","seurat_clusters")) %>% group_by(seurat_clusters,cancer_type) %>% summarise(n_cells = n())%>% mutate(per = 100 *n_cells/sum(n_cells))
`summarise()` has grouped output by 'seurat_clusters'. You can override using the `.groups` argument.
integration_score = clusters_and_scores %>% group_by(seurat_clusters) %>% filter(n_cells == max(n_cells)) %>% pull(per) %>% mean() %>% round(digits = 2)
v_factor_levels <-c( "MesLike", "NPCLike", "OPCLike","ACLike")
colors = RColorBrewer::brewer.pal(6, "Paired")[c(2,4,5,6)]; colors[3] = "orange"
p4 = ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(cancer_type, levels = v_factor_levels))) +
geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name = "Cancer type")+ labs(title = "SiPSiC",subtitle = "integration score=" %s+% integration_score %s+% "%")+ylab("% from cluster")
p4

library(cluster, quietly = TRUE)
dist.matrix <- dist(x = Embeddings(object = GBM.combined[["umap"]]))
clusters <- GBM.combined$cancer_type
sil <- silhouette(x = as.numeric(x = as.factor(x = clusters)), dist = dist.matrix)
summary(sil)
Silhouette of 10000 units in 4 clusters from silhouette.default(x = as.numeric(x = as.factor(x = clusters)), from dist = dist.matrix) :
Cluster sizes and average silhouette widths:
2842 4447 2521 190
-0.08230674 -0.05689546 0.20320946 0.22385726
Individual silhouette widths:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.669558 -0.240717 0.068159 0.006789 0.231651 0.521249
GBM.combined$sil = sil[,3]
VlnPlot(object = GBM.combined,features = "sil",group.by = "cancer_type")

ggarrange(p1,p2,p3,common.legend = T)
# Measuring the percentage of cells of each malignant meta module that are in a single cluster
clusterAssignments <- Idents(GBM.combined)
ScoresAndClusters <- cbind(clusterAssignments, MacrophageScores = macrophageScores$pathwayScores, OligodecdrocytesScores = oligodendrocyteScores$pathwayScores,
MESLike1Scores = MesLike1Scores$pathwayScores, MESLike2Scores = MesLike2Scores$pathwayScores,
ACLikeScores = ACLikeScores$pathwayScores, OPCLikeScores = OPCLikeScores$pathwayScores,
NPCLike1Scores = NPCLike1Scores$pathwayScores, NPCLike2Scores = NPCLike2Scores$pathwayScores,
TCellScores = TCellScores$pathwayScores, as.data.frame(GBM.combined@meta.data$orig.ident))
colnames(ScoresAndClusters)[colnames(ScoresAndClusters) == "GBM.combined@meta.data$orig.ident"] <- "Patient"
# Checking the proportions of the different patients in each cluster
clusterByPatientIDs <- ScoresAndClusters[,c("clusterAssignments", "Patient")]
clusterByPatientIDs <- clusterByPatientIDs[order(clusterByPatientIDs$clusterAssignments),]
for(currCluster in 0:max(as.numeric(as.character(clusterByPatientIDs[,"clusterAssignments"]))))
{
currClusterCells <- clusterByPatientIDs[clusterByPatientIDs[,"clusterAssignments"] == currCluster,"Patient"]
cat("\n", "Cluster number: ", currCluster, " Total Cells: ", sum(clusterByPatientIDs[,"clusterAssignments"] == currCluster), "\n")
print(table(currClusterCells))
}
NPCThreshold <- ACLikeThreshold <- OPCLikeThreshold <- MESLikesThreshold <- 0.13
OligodendrocyteThreshold <- 1.5
MacrophageThreshold <- TCellThreshold <- 0.05
macrophageOverThresh <- ScoresAndClusters[ScoresAndClusters[,"MacrophageScores"] > MacrophageThreshold,c("clusterAssignments", "MacrophageScores")]
print("Macrophage cells: Total over threshold = ")
print(nrow(macrophageOverThresh))
print(table(macrophageOverThresh[,"clusterAssignments"]))
cat("\n\n")
TCellsOverThresh <- ScoresAndClusters[ScoresAndClusters[,"TCellScores"] > TCellThreshold,c("clusterAssignments", "TCellScores")]
print("T cells: Total over threshold = ")
print(nrow(TCellsOverThresh))
print(table(TCellsOverThresh[,"clusterAssignments"]))
cat("\n\n")
OligodendrocytesOverThresh <- ScoresAndClusters[ScoresAndClusters[,"OligodecdrocytesScores"] > OligodendrocyteThreshold,c("clusterAssignments", "OligodecdrocytesScores")]
print("Oligodendrocyte cells: Total over threshold = ")
print(nrow(OligodendrocytesOverThresh))
print(table(OligodendrocytesOverThresh[,"clusterAssignments"]))
cat("\n\n")
NPCs1OverThresh <- ScoresAndClusters[ScoresAndClusters[,"NPCLike1Scores"] > NPCThreshold,c("clusterAssignments", "NPCLike1Scores")]
print("NPC1 cells: Total over threshold = ")
print(nrow(NPCs1OverThresh))
print(table(NPCs1OverThresh[,"clusterAssignments"]))
cat("\n\n")
NPCs2OverThresh <- ScoresAndClusters[ScoresAndClusters[,"NPCLike2Scores"] > NPCThreshold,c("clusterAssignments", "NPCLike2Scores")]
print("NPC2 cells: Total over threshold = ")
print(nrow(NPCs2OverThresh))
print(table(NPCs2OverThresh[,"clusterAssignments"]))
cat("\n\n")
ACLikesOverThresh <- ScoresAndClusters[ScoresAndClusters[,"ACLikeScores"] > ACLikeThreshold,c("clusterAssignments", "ACLikeScores")]
print("ACLike cells: Total over threshold = ")
print(nrow(ACLikesOverThresh))
print(table(ACLikesOverThresh[,"clusterAssignments"]))
cat("\n\n")
OPCLikesOverThresh <- ScoresAndClusters[ScoresAndClusters[,"OPCLikeScores"] > OPCLikeThreshold,c("clusterAssignments", "OPCLikeScores")]
print("OPCLike cells: Total over threshold = ")
print(nrow(OPCLikesOverThresh))
print(table(OPCLikesOverThresh[,"clusterAssignments"]))
cat("\n\n")
MES1OverThresh <- ScoresAndClusters[ScoresAndClusters[,"MESLike1Scores"] > MESLikesThreshold,c("clusterAssignments", "MESLike1Scores")]
print("MES1 cells: Total over threshold = ")
print(nrow(MES1OverThresh))
print(table(MES1OverThresh[,"clusterAssignments"]))
cat("\n\n")
MES2OverThresh <- ScoresAndClusters[ScoresAndClusters[,"MESLike2Scores"] > MESLikesThreshold,c("clusterAssignments", "MESLike2Scores")]
print("MES2 cells: Total over threshold = ")
print(nrow(MES2OverThresh))
print(table(MES2OverThresh[,"clusterAssignments"]))
cat("\n\n")
# Checking the distribution of each patient's malignant cells across clusters
isCellMalignant <- ScoresAndClusters[,"NPCLike1Scores"] > NPCThreshold |
ScoresAndClusters[,"NPCLike2Scores"] > NPCThreshold |
ScoresAndClusters[,"ACLikeScores"] > ACLikeThreshold |
ScoresAndClusters[,"OPCLikeScores"] > OPCLikeThreshold |
ScoresAndClusters[,"MESLike1Scores"] > MESLikesThreshold |
ScoresAndClusters[,"MESLike2Scores"] > MESLikesThreshold
malignantCellsOnly <- ScoresAndClusters[isCellMalignant,]
for (currPatient in levels(malignantCellsOnly$Patient))
{
currPatientMaligCells <- malignantCellsOnly[malignantCellsOnly[,"Patient"] == currPatient,]
cat("\n", "Patient number: ", currPatient, "\nTotal number of cells: ", nrow(currPatientMaligCells), "\n")
print(table(currPatientMaligCells[,"clusterAssignments"]))
# Finding the NPC-Like cells of this patient only
currPatientNPCLike1 <- currPatientMaligCells[currPatientMaligCells[,"NPCLike1Scores"] > NPCThreshold,]
currPatientNPCLike2 <- currPatientMaligCells[currPatientMaligCells[,"NPCLike2Scores"] > NPCThreshold,]
cat("\n", "NPC-Like1 cells in this cluster:\n")
cat("Total number of NPC-Like1: ", nrow(currPatientNPCLike1))
print(table(currPatientNPCLike1[, "clusterAssignments"]))
cat("\n\n", "NPC-Like2 cells in this cluster:\n")
cat("Total number of NPC-Like2: ", nrow(currPatientNPCLike2))
print(table(currPatientNPCLike2[, "clusterAssignments"]))
cat("\n\n")
}
clusters_assignment_results = data.frame(cluster = 0:9)
clusters_assignment_results[,"Macrophages"] = table(macrophageOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"TCellS"] = table(TCellsOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"Oligodecdrocytes"] = table(OligodendrocytesOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"NPC1"] = table(NPCs1OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"NPC2"] = table(NPCs2OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"ACLike"] = table(ACLikesOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"OPCLike"] = table(OPCLikesOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"MES1"] = table(MES1OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"MES2"] = table(MES2OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
df2 = reshape2::melt(clusters_assignment_results, id.vars = c("cluster"), variable.name = "cell_type", value.name = "n_cells")
ggplot(data=df2, aes(x=cluster, y=n_cells, fill=cell_type)) +
geom_bar(stat="identity")+theme_minimal() + scale_x_continuous(breaks=0:13)+ scale_fill_brewer(palette="Paired")
---
title: '`r rstudioapi::getSourceEditorContext()$path %>% basename() %>% gsub(pattern = "\\.Rmd",replacement = "")`' 
author: "Avishai Wizel"
date: '`r Sys.time()`'
output: 
  html_notebook: 
    code_folding: hide
    toc: yes
    toc_collapse: yes
    toc_float: 
      collapsed: FALSE
    number_sections: true
    toc_depth: 1
    self_contained: true
---



# Functions

```{r warning=FALSE}
```

# Data

```{r}
library(GSEABase)
GBMSeurat_cancer = readRDS("./Data/GSM3828673_10X_GBM_seurat_cancer.RDS")
metaModuleGenes <- read.table(file = "./Data/MetaModuleGenes_CellPaper.tsv", header = TRUE)

# Creating genesets for the meta module markers
MesLike2GeneSet <- GeneSet(metaModuleGenes[,1])
MesLike1GeneSet <- GeneSet(metaModuleGenes[,2])
ACLikeGeneSet <- GeneSet(metaModuleGenes[!is.na(metaModuleGenes[,3]), 3])
OPCLikeGeneSet <- GeneSet(metaModuleGenes[,4])
NPCLike1GeneSet <- GeneSet(metaModuleGenes[,5])
NPCLike2GeneSet <- GeneSet(metaModuleGenes[,6])

MesLike1GeneSet@setName <- "MesLike1"
MesLike2GeneSet@setName <- "MesLike2"
ACLikeGeneSet@setName <- "ACLike"
OPCLikeGeneSet@setName <- "OPCLike"
NPCLike1GeneSet@setName <- "NPCLike1"
NPCLike2GeneSet@setName <- "NPCLike2"
```



```{r}

# split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(GBMSeurat_cancer, split.by = "orig.ident")
# normalize and identify variable features for each dataset independently
ifnb.list <- lapply(X = ifnb.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = ifnb.list)
immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features)
# this command creates an 'integrated' data assay
GBM.combined <- IntegrateData(anchorset = immune.anchors)
GBM.combined <- ScaleData(GBM.combined, verbose = FALSE)
GBM.combined <- RunPCA(GBM.combined, npcs = 30, verbose = FALSE)
ElbowPlot(GBM.combined,ndims = 30)
GBM.combined <- RunUMAP(GBM.combined, reduction = "pca", dims = 1:15)
GBM.combined <- FindNeighbors(GBM.combined, reduction = "pca", dims = 1:15)
GBM.combined <- FindClusters(GBM.combined, resolution = 0.8)
```

# UMAP after integration {.tabset}
```{r results='asis'}
print_tab(DimPlot(GBM.combined, reduction = "umap", group.by = "orig.ident"),title = "by patient")
print_tab(DimPlot(GBM.combined, reduction = "umap",label = T),title = "by cluster")
```


```{r}
cancer_markers = list(
  MesLike1 = MesLike1GeneSet@geneIds,
  MesLike2 = MesLike2GeneSet@geneIds,
  ACLike = ACLikeGeneSet@geneIds,
  OPCLike = OPCLikeGeneSet@geneIds,
  NPCLike1 = NPCLike1GeneSet@geneIds,
  NPCLike2 = NPCLike2GeneSet@geneIds
)

GBMSeurat_cancer <- AddModuleScore(
  object = GBMSeurat_cancer,
  features = cancer_markers,ctrl = 50,name = "cancer_markers")

GBM.combined[["MesLike1"]] = GBMSeurat_cancer$cancer_markers1
GBM.combined[["MesLike2"]] = GBMSeurat_cancer$cancer_markers2
GBM.combined[["ACLike"]] = GBMSeurat_cancer$cancer_markers3
GBM.combined[["OPCLike"]] = GBMSeurat_cancer$cancer_markers4
GBM.combined[["NPCLike1"]] = GBMSeurat_cancer$cancer_markers5
GBM.combined[["NPCLike2"]] = GBMSeurat_cancer$cancer_markers6


cancer_scores  = FetchData(object = GBM.combined,vars = c("MesLike1","MesLike2","ACLike","OPCLike","NPCLike1","NPCLike2"))
cancer_scores$assignment<- colnames(cancer_scores)[max.col(cancer_scores, ties.method = "first")]

GBM.combined %<>% AddMetaData(metadata = cancer_scores$assignment,col.name = "cancer_type")
```

```{r}
DimPlot(GBM.combined,group.by = "cancer_type",label = T)
DimPlot(GBM.combined,group.by = "seurat_clusters",label = T)

```






# Feature plots {.tabset}
```{r results='asis'}


# Testing for clusters enriched in markers of the different malignant meta-modules
print_tab(FeaturePlot(GBM.combined, features = "MesLike1"),title = "MESLike1Scores")
print_tab(FeaturePlot(GBM.combined, features = "MesLike2"),title = "MESLike2Scores")
print_tab(FeaturePlot(GBM.combined, features = "ACLike"),title = "ACLikeScores")
print_tab(FeaturePlot(GBM.combined, features = "OPCLike"),title = "OPCLikeScores")
print_tab(FeaturePlot(GBM.combined, features = "NPCLike1"),title = "NPCLike1Scores")
print_tab(FeaturePlot(GBM.combined, features = "NPCLike2"),title = "NPCLike2Scores")


```


```{r}
pdf(file = "./Figures/Seurat/UMAP_orig.ident.pdf", onefile = FALSE)
DimPlot(GBM.combined, reduction = "umap", group.by = "orig.ident")
dev.off()


pdf(file = "./Figures/Seurat/UMAP_clustering.pdf", onefile = FALSE)
DimPlot(GBM.combined, reduction = "umap")
dev.off()


pdf(file = "./Figures/Seurat/macrophageScore.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "macrophageScore"))

dev.off()

pdf(file = "./Figures/Seurat/TCellScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "TCellScores"))

dev.off()
pdf(file = "./Figures/Seurat/oligodendrocyteScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "oligodendrocyteScores"))

dev.off()
pdf(file = "./Figures/Seurat/oligodendrocyteScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "oligodendrocyteScores"))

dev.off()
pdf(file = "./Figures/Seurat/MESLike1Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "MESLike1Scores"))

dev.off()
pdf(file = "./Figures/Seurat/MESLike2Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "MESLike2Scores"))

dev.off()
pdf(file = "./Figures/Seurat/ACLikeScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "ACLikeScores"))

dev.off()
pdf(file = "./Figures/Seurat/OPCLikeScores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "OPCLikeScores"))

dev.off()


pdf(file = "./Figures/Seurat/NPCLike1Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "NPCLike1Scores"))

dev.off()

pdf(file = "./Figures/Seurat/NPCLike2Scores.pdf", onefile = FALSE)
print(FeaturePlot(GBM.combined, features = "NPCLike2Scores"))

dev.off()



```

# Analysis


```{r}
clusters_and_scores = FetchData(object = GBM.combined,vars= c("cancer_type","seurat_clusters")) %>%  group_by(seurat_clusters,cancer_type) %>%  summarise(n_cells = n())%>% mutate(per =  100 *n_cells/sum(n_cells))

integration_score = clusters_and_scores %>%  group_by(seurat_clusters) %>% filter(n_cells == max(n_cells)) %>% pull(per) %>% mean() %>% round(digits = 2)

v_factor_levels <-c( "MesLike1", "MesLike2", "NPCLike1", "NPCLike2", "OPCLike","ACLike")
colors = RColorBrewer::brewer.pal(6, "Paired"); colors[5] = "orange"
p3 = ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(cancer_type, levels = v_factor_levels))) +
  geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name  = "Cancer type")+ labs(title = "Seurat",subtitle = "integration score=" %s+% integration_score %s+% "%")+ylab("% from cluster")
p3
```




# silhouette score
```{r}
library(cluster, quietly = TRUE)
dist.matrix <- dist(x = Embeddings(object = GBM.combined[["umap"]]))
clusters <- GBM.combined$cancer_type
sil <- silhouette(x = as.numeric(x = as.factor(x = clusters)), dist = dist.matrix)
summary(sil)
GBM.combined$sil = sil[,3]
VlnPlot(object = GBM.combined,features = "sil",group.by = "cancer_type")
```

# Combine cancer subtypes
```{r}
GBM.combined$cancer_type = GBM.combined$cancer_type %>% gsub(pattern = "MesLike1|MesLike2",replacement = "MesLike")%>% gsub(pattern = "NPCLike1|NPCLike2",replacement = "NPCLike")
```

```{r}
clusters_and_scores = FetchData(object = GBM.combined,vars= c("cancer_type","seurat_clusters")) %>%  group_by(seurat_clusters,cancer_type) %>%  summarise(n_cells = n())%>% mutate(per =  100 *n_cells/sum(n_cells))

integration_score = clusters_and_scores %>%  group_by(seurat_clusters) %>% filter(n_cells == max(n_cells)) %>% pull(per) %>% mean() %>% round(digits = 2)

v_factor_levels <-c( "MesLike", "NPCLike", "OPCLike","ACLike")
colors = RColorBrewer::brewer.pal(6, "Paired")[c(2,4,5,6)]; colors[3] = "orange"
p4 = ggplot(data=clusters_and_scores, aes(x=seurat_clusters, y=per, fill=factor(cancer_type, levels = v_factor_levels))) +
  geom_bar(stat="identity")+theme_minimal() + scale_fill_manual(values = colors,name  = "Cancer type")+ labs(title = "SiPSiC",subtitle = "integration score=" %s+% integration_score %s+% "%")+ylab("% from cluster")
p4
```

```{r}
library(cluster, quietly = TRUE)
dist.matrix <- dist(x = Embeddings(object = GBM.combined[["umap"]]))
clusters <- GBM.combined$cancer_type
sil <- silhouette(x = as.numeric(x = as.factor(x = clusters)), dist = dist.matrix)
summary(sil)
GBM.combined$sil = sil[,3]
VlnPlot(object = GBM.combined,features = "sil",group.by = "cancer_type")
```


```{r fig.height=7, fig.width=10}
ggarrange(p1,p2,p3,common.legend = T)
```


```{r}

# Measuring the percentage of cells of each malignant meta module that are in a single cluster
clusterAssignments <- Idents(GBM.combined)
ScoresAndClusters <- cbind(clusterAssignments, MacrophageScores = macrophageScores$pathwayScores, OligodecdrocytesScores = oligodendrocyteScores$pathwayScores,
                           MESLike1Scores = MesLike1Scores$pathwayScores, MESLike2Scores = MesLike2Scores$pathwayScores, 
                           ACLikeScores = ACLikeScores$pathwayScores, OPCLikeScores = OPCLikeScores$pathwayScores, 
                           NPCLike1Scores = NPCLike1Scores$pathwayScores, NPCLike2Scores = NPCLike2Scores$pathwayScores, 
                           TCellScores = TCellScores$pathwayScores, as.data.frame(GBM.combined@meta.data$orig.ident))
colnames(ScoresAndClusters)[colnames(ScoresAndClusters) == "GBM.combined@meta.data$orig.ident"] <- "Patient"

# Checking the proportions of the different patients in each cluster
clusterByPatientIDs <- ScoresAndClusters[,c("clusterAssignments", "Patient")]
clusterByPatientIDs <- clusterByPatientIDs[order(clusterByPatientIDs$clusterAssignments),]
for(currCluster in 0:max(as.numeric(as.character(clusterByPatientIDs[,"clusterAssignments"]))))
{
  currClusterCells <- clusterByPatientIDs[clusterByPatientIDs[,"clusterAssignments"] == currCluster,"Patient"]
  cat("\n", "Cluster number: ", currCluster, " Total Cells: ", sum(clusterByPatientIDs[,"clusterAssignments"] == currCluster), "\n")
  print(table(currClusterCells))
}


NPCThreshold <- ACLikeThreshold <- OPCLikeThreshold <- MESLikesThreshold <- 0.13
OligodendrocyteThreshold <- 1.5
MacrophageThreshold <- TCellThreshold <- 0.05

macrophageOverThresh <- ScoresAndClusters[ScoresAndClusters[,"MacrophageScores"] > MacrophageThreshold,c("clusterAssignments", "MacrophageScores")]
print("Macrophage cells: Total over threshold = ")
print(nrow(macrophageOverThresh))
print(table(macrophageOverThresh[,"clusterAssignments"]))
cat("\n\n")

TCellsOverThresh <- ScoresAndClusters[ScoresAndClusters[,"TCellScores"] > TCellThreshold,c("clusterAssignments", "TCellScores")]
print("T cells: Total over threshold = ")
print(nrow(TCellsOverThresh))
print(table(TCellsOverThresh[,"clusterAssignments"]))
cat("\n\n")

OligodendrocytesOverThresh <- ScoresAndClusters[ScoresAndClusters[,"OligodecdrocytesScores"] > OligodendrocyteThreshold,c("clusterAssignments", "OligodecdrocytesScores")]
print("Oligodendrocyte cells: Total over threshold = ")
print(nrow(OligodendrocytesOverThresh))
print(table(OligodendrocytesOverThresh[,"clusterAssignments"]))
cat("\n\n")

NPCs1OverThresh <- ScoresAndClusters[ScoresAndClusters[,"NPCLike1Scores"] > NPCThreshold,c("clusterAssignments", "NPCLike1Scores")]
print("NPC1 cells: Total over threshold = ")
print(nrow(NPCs1OverThresh))
print(table(NPCs1OverThresh[,"clusterAssignments"]))
cat("\n\n")

NPCs2OverThresh <- ScoresAndClusters[ScoresAndClusters[,"NPCLike2Scores"] > NPCThreshold,c("clusterAssignments", "NPCLike2Scores")]
print("NPC2 cells: Total over threshold = ")
print(nrow(NPCs2OverThresh))
print(table(NPCs2OverThresh[,"clusterAssignments"]))
cat("\n\n")

ACLikesOverThresh <- ScoresAndClusters[ScoresAndClusters[,"ACLikeScores"] > ACLikeThreshold,c("clusterAssignments", "ACLikeScores")]
print("ACLike cells: Total over threshold = ")
print(nrow(ACLikesOverThresh))
print(table(ACLikesOverThresh[,"clusterAssignments"]))
cat("\n\n")

OPCLikesOverThresh <- ScoresAndClusters[ScoresAndClusters[,"OPCLikeScores"] > OPCLikeThreshold,c("clusterAssignments", "OPCLikeScores")]
print("OPCLike cells: Total over threshold = ")
print(nrow(OPCLikesOverThresh))
print(table(OPCLikesOverThresh[,"clusterAssignments"]))
cat("\n\n")

MES1OverThresh <- ScoresAndClusters[ScoresAndClusters[,"MESLike1Scores"] > MESLikesThreshold,c("clusterAssignments", "MESLike1Scores")]
print("MES1 cells: Total over threshold = ")
print(nrow(MES1OverThresh))
print(table(MES1OverThresh[,"clusterAssignments"]))
cat("\n\n")

MES2OverThresh <- ScoresAndClusters[ScoresAndClusters[,"MESLike2Scores"] > MESLikesThreshold,c("clusterAssignments", "MESLike2Scores")]
print("MES2 cells: Total over threshold = ")
print(nrow(MES2OverThresh))
print(table(MES2OverThresh[,"clusterAssignments"]))
cat("\n\n")

# Checking the distribution of each patient's malignant cells across clusters

isCellMalignant <- ScoresAndClusters[,"NPCLike1Scores"] > NPCThreshold |
                   ScoresAndClusters[,"NPCLike2Scores"] > NPCThreshold |
                   ScoresAndClusters[,"ACLikeScores"] > ACLikeThreshold |
                   ScoresAndClusters[,"OPCLikeScores"] > OPCLikeThreshold |
                   ScoresAndClusters[,"MESLike1Scores"] > MESLikesThreshold |
                   ScoresAndClusters[,"MESLike2Scores"] > MESLikesThreshold
malignantCellsOnly <- ScoresAndClusters[isCellMalignant,]

for (currPatient in levels(malignantCellsOnly$Patient))
{
  currPatientMaligCells <- malignantCellsOnly[malignantCellsOnly[,"Patient"] == currPatient,]
  cat("\n", "Patient number: ", currPatient, "\nTotal number of cells: ", nrow(currPatientMaligCells), "\n")
  print(table(currPatientMaligCells[,"clusterAssignments"]))
  
  # Finding the NPC-Like cells of this patient only
  currPatientNPCLike1 <- currPatientMaligCells[currPatientMaligCells[,"NPCLike1Scores"] > NPCThreshold,]
  currPatientNPCLike2 <- currPatientMaligCells[currPatientMaligCells[,"NPCLike2Scores"] > NPCThreshold,]
  
  cat("\n", "NPC-Like1 cells in this cluster:\n")
  cat("Total number of NPC-Like1: ", nrow(currPatientNPCLike1))
  print(table(currPatientNPCLike1[, "clusterAssignments"]))
  
  cat("\n\n", "NPC-Like2 cells in this cluster:\n")
  cat("Total number of NPC-Like2: ", nrow(currPatientNPCLike2))
  print(table(currPatientNPCLike2[, "clusterAssignments"]))
  
  cat("\n\n")
}
```

```{r}
clusters_assignment_results = data.frame(cluster = 0:9)
clusters_assignment_results[,"Macrophages"] = table(macrophageOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"TCellS"] = table(TCellsOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"Oligodecdrocytes"] = table(OligodendrocytesOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"NPC1"] = table(NPCs1OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)

clusters_assignment_results[,"NPC2"] = table(NPCs2OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"ACLike"] = table(ACLikesOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"OPCLike"] = table(OPCLikesOverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)

clusters_assignment_results[,"MES1"] = table(MES1OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)
clusters_assignment_results[,"MES2"] = table(MES2OverThresh[,"clusterAssignments"]) %>% as.data.frame() %>% pull(Freq)

```

```{r}
df2 = reshape2::melt(clusters_assignment_results, id.vars = c("cluster"), variable.name = "cell_type", value.name = "n_cells")
ggplot(data=df2, aes(x=cluster, y=n_cells, fill=cell_type)) +
  geom_bar(stat="identity")+theme_minimal() + scale_x_continuous(breaks=0:13)+ scale_fill_brewer(palette="Paired")
```


# Session info
```{r}
session_info()
```

<script src="https://hypothes.is/embed.js" async></script>

