library(stringi)
source_from_github(repositoy = "DEG_functions",version = "0.2.45")
ℹ SHA-1 hash of file is 645c7d8e9571eb9caed4b7baf4b058f6a2c051cc
Welcome to enrichR
Checking connection ...
Enrichr ... Connection is Live!
FlyEnrichr ... Connection is available!
WormEnrichr ... Connection is available!
YeastEnrichr ... Connection is available!
FishEnrichr ... Connection is available!
source_from_github(repositoy = "cNMF_functions",version = "0.4.0",script_name = "cnmf_functions_V3.R")
ℹ SHA-1 hash of file is 7b39dfd215cf7e1d29ee976ecc6bfa0e3f1532f6
Loading required package: reticulate
source_from_github(repositoy = "sc_general_functions",version = "0.1.28",script_name = "functions.R")
ℹ SHA-1 hash of file is 737683e4e0a82ffa22769bbd4a0842a271fe63fc
Loading required package: RColorBrewer
library("readxl")
acc_all = readRDS(file = "./Data/acc_tpm_nCount_mito_no146_15k_alldata.rds")
acc_cancer_pri = readRDS(file = "./Data/acc_cancer_no146_primaryonly15k_cancercells.rds")
acc_cancer = readRDS(file = "./Data/acc_tpm_nCount_mito_no146_15k_cancercells.rds")
neuronal_signatures <- read_excel("./Data/Neuronal Signatures.xlsx",col_names =F)
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
neuronal_signatures %<>% t() %>% as.data.frame() %>% janitor::row_to_names(1) %>% filter(!row_number() == 1)
rownames(neuronal_signatures) <- NULL
colnames(neuronal_signatures) %<>% gsub(replacement = "", pattern = "_\\d.*") #remove any _numbers
colnames(neuronal_signatures) %<>% gsub(replacement = "", pattern = "\\(.*") #rename "(" to the end
colnames(neuronal_signatures) %<>% gsub(replacement = "_", pattern = " ") #rename all spaces
neuronal_pathways <- read_excel("./Data/Pathway analysis Gene sets.xlsx",col_names =F)
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
• `` -> `...33`
• `` -> `...34`
• `` -> `...35`
• `` -> `...36`
• `` -> `...37`
• `` -> `...38`
• `` -> `...39`
• `` -> `...40`
• `` -> `...41`
• `` -> `...42`
• `` -> `...43`
• `` -> `...44`
• `` -> `...45`
• `` -> `...46`
• `` -> `...47`
• `` -> `...48`
• `` -> `...49`
• `` -> `...50`
neuronal_pathways %<>% t() %>% as.data.frame() %>% janitor::row_to_names(1) %>% filter(!row_number() == 1) %>% as.list()
neuronal_pathways = lapply(neuronal_pathways, na.omit)
neuronal_pathways = lapply(neuronal_pathways, as.character)
DimPlot(acc_all)
Warning in grSoftVersion() :
unable to load shared object '/usr/local/lib/R/modules//R_X11.so':
libXt.so.6: cannot open shared object file: No such file or directory
for (neural_name in colnames(neuronal_signatures)) {
genes = neuronal_signatures[,neural_name,drop=T]
pathways_scores = getPathwayScores(acc_all@assays$RNA@data,pathwayGenes = genes)
acc_all %<>% AddMetaData(metadata = pathways_scores$pathwayScores,col.name = neural_name)
}
FeaturePlot(object = acc_all,features = colnames(neuronal_signatures))
for (neural_name in colnames(neuronal_signatures)) {
genes = neuronal_signatures[,neural_name,drop=T]
anontation = plot_genes_cor(dataset = acc_all,geneIds = genes,num_of_clusters = 3,show_rownames =T,title = neural_name)
correlated_genes = anontation %>% filter(cluster == 1) %>% rownames()
pathways_scores = getPathwayScores(acc_all@assays$RNA@data,pathwayGenes = correlated_genes)
acc_all %<>% AddMetaData(metadata = pathways_scores$pathwayScores,col.name = paste0(neural_name,"_cor"))
}
NA
VlnPlot(object = acc_all,features = "CSF3",group.by = "patient.ident")
DimPlot(acc_cancer)
acc_cancer <- RunPCA(acc_cancer, features = VariableFeatures(object = acc_cancer),verbose = F)
ElbowPlot(acc_cancer)
acc_cancer <- FindNeighbors(acc_cancer, dims = 1:10,verbose = F) %>% FindClusters(resolution = 0.5) %>% RunUMAP(dims = 1:10,verbose = F)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 951
Number of edges: 28888
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8456
Number of communities: 7
Elapsed time: 0 seconds
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
DimPlot(acc_cancer,group.by = "patient.ident")
for (pathway_name in names(neuronal_pathways)) {
genes = neuronal_pathways[[pathway_name]]
pathways_scores = getPathwayScores(acc_cancer@assays$RNA@data,pathwayGenes = genes)
acc_cancer %<>% AddMetaData(metadata = pathways_scores$pathwayScores,col.name = pathway_name)
}
names(neuronal_pathways)
[1] "GOBP_NOREPINEPHRINE_SECRETION" "GOBP_NOREPINEPHRINE_TRANSPORT"
[3] "GOBP_NORADRENERGIC_NEURON_DEVELOPMENT" "GOBP_NORADRENERGIC_NEURON_DIFFERENTIATION"
[5] "GOBP_AUTONOMIC_NERVOUS_SYSTEM_DEVELOPMENT" "GOBP_PARASYMPATHETIC_NERVOUS_SYSTEM_DEVELOPMENT"
[7] "GOBP_ADRENERGIC_RECEPTOR_SIGNALING_PATHWAY" "GOMF_BETA_2_ADRENERGIC_RECEPTOR_BINDING"
FeaturePlot(object = acc_cancer,features = names(neuronal_pathways))
VlnPlot(object = acc_cancer,features = names(neuronal_pathways),group.by = "patient.ident")
volcano_plot<- function(de_genes, top_genes_text=0, title = "" ,show_gene_names = NULL, ident1 = "",
ident2 = "" , fdr_cutoff = 0.05 , log2fc_cutoff = 0.6, max_names = 10,
return_de_genes = F, show_graph = T, show_legend = T) {
library(ggrepel,quietly = T)
library(dplyr,quietly = T)
names_for_label = c(paste(ident2,"down genes"),paste(ident2,"up genes"))
#color genes if there are over/under/same expressed
de_genes$diffexpressed <- "Same"
de_genes$avg_log2FC[!is.finite(de_genes$avg_log2FC)] <- 1000 #remove infinite values
# #calculate fdr from genes that has logFC > clac_fdr_from_logfc (clac_fdr_from_logfc = 0 -> all genes)
# filtered_markers = filter(de_genes, abs(avg_log2FC) > clac_fdr_from_logfc) #take genes with logFC > clac_fdr_from_logfc
# filtered_markers$fdr <-p.adjust(p = filtered_markers$p_val ,method = "fdr") #calc fdr
# de_genes$fdr = 1
# filtered_markers_indexes = which(rownames(de_genes) %in% rownames(filtered_markers))
# de_genes[filtered_markers_indexes, "fdr"] = filtered_markers$fdr
de_genes$fdr = p.adjust(p = de_genes$p_val ,method = "fdr")
# if log2Foldchange > 0.6 and pvalue < 0.05, set as "UP"
de_genes$diffexpressed[de_genes$avg_log2FC > log2fc_cutoff & de_genes$fdr < fdr_cutoff] <- names_for_label[1]
# if log2Foldchange < -0.6 and pvalue < 0.05, set as "DOWN"
de_genes$diffexpressed[de_genes$avg_log2FC < -log2fc_cutoff & de_genes$fdr < fdr_cutoff] <- names_for_label[2]
#set name labels for highest genes
de_genes$delabel <- NA
de_genes$delabel[0:top_genes_text] <- rownames(de_genes)[0:top_genes_text]
#set name labels for significant genes
down_genes = de_genes$diffexpressed == names_for_label[1]
up_genes = de_genes$diffexpressed == names_for_label[2]
last_gene_to_show_down = which(de_genes$diffexpressed == names_for_label[1])[max_names]
last_gene_to_show_up = which(de_genes$diffexpressed == names_for_label[2])[max_names]
down_genes[last_gene_to_show_down:length(down_genes)] = F
up_genes[last_gene_to_show_up:length(up_genes)] = F
de_genes$delabel[up_genes] <- rownames(de_genes)[up_genes]
de_genes$delabel[down_genes] <- rownames(de_genes)[down_genes]
#Show genes that specify in show_gene_names
if (!is.null(show_gene_names)){
de_genes_index = match(show_gene_names,rownames(de_genes)) #indexes of de_genes that in show_gene_names
de_genes_index <- de_genes_index[!is.na(de_genes_index)] #remove NA
show_gene_names_index = show_gene_names %in% rownames(de_genes) #indexes of show_gene_names that in de_genes
de_genes$delabel[de_genes_index] = show_gene_names [show_gene_names_index]
}
#colors for diff exp genes
cols <- structure(c("green4", "red", "grey"), .Names = c(names_for_label[2],names_for_label[1], "Same"))
title = paste(title,"Volcano plot- ", ident1,"vs", ident2)
p = ggplot(data=de_genes, aes(x=avg_log2FC, y=-log10(p_val), col=diffexpressed, label=delabel)) +
geom_point() +
theme_minimal() +
scale_color_manual(values=cols) +
geom_text_repel(na.rm = T,box.padding = 1,max.overlaps = Inf,color = "blue") +
xlab(paste("avg_log2FC (Positive = up in", ident1,")"))+
ylab("Significance")+
scale_y_continuous(labels = function(x) {parse(text = paste0("10^-",x))})+
guides(col=guide_legend(title=paste0("Significant DEG\n(FDR<",fdr_cutoff," ,abs(log2fc) >", log2fc_cutoff,")")))+
{if(show_legend == F) theme(legend.position="none")}+
{if(show_legend) ggtitle(title) }+
theme(axis.text = element_text( color="black", size=12),axis.title = element_text( color="black", size=12))
if(show_graph == T){print(p)}
if (return_de_genes == T){ return(de_genes)}
else {return (p)}
}
acc_cancer_pri = SetIdent(object = acc_cancer_pri,value = "patient.ident")
markers = FindMarkers(object = acc_cancer_pri,ident.1 = "ACC2",features = VariableFeatures(acc_cancer_pri),densify = T)
| | 0 % ~calculating
|+ | 1 % ~08s
|++ | 2 % ~08s
|++ | 3 % ~08s
|+++ | 4 % ~08s
|+++ | 5 % ~07s
|++++ | 6 % ~07s
|++++ | 7 % ~07s
|+++++ | 8 % ~07s
|+++++ | 9 % ~07s
|++++++ | 10% ~07s
|++++++ | 11% ~07s
|+++++++ | 12% ~07s
|+++++++ | 13% ~06s
|++++++++ | 14% ~06s
|++++++++ | 15% ~06s
|+++++++++ | 16% ~06s
|+++++++++ | 17% ~06s
|++++++++++ | 18% ~06s
|++++++++++ | 19% ~06s
|+++++++++++ | 20% ~06s
|+++++++++++ | 21% ~06s
|++++++++++++ | 22% ~05s
|++++++++++++ | 23% ~05s
|+++++++++++++ | 24% ~05s
|+++++++++++++ | 25% ~05s
|++++++++++++++ | 26% ~05s
|++++++++++++++ | 27% ~05s
|+++++++++++++++ | 28% ~05s
|+++++++++++++++ | 29% ~05s
|++++++++++++++++ | 30% ~05s
|++++++++++++++++ | 31% ~05s
|+++++++++++++++++ | 32% ~05s
|+++++++++++++++++ | 33% ~04s
|++++++++++++++++++ | 34% ~04s
|++++++++++++++++++ | 35% ~04s
|+++++++++++++++++++ | 36% ~04s
|+++++++++++++++++++ | 37% ~04s
|++++++++++++++++++++ | 38% ~04s
|++++++++++++++++++++ | 39% ~04s
|+++++++++++++++++++++ | 40% ~04s
|+++++++++++++++++++++ | 41% ~04s
|++++++++++++++++++++++ | 42% ~04s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++ | 44% ~04s
|+++++++++++++++++++++++ | 45% ~04s
|++++++++++++++++++++++++ | 46% ~04s
|++++++++++++++++++++++++ | 47% ~04s
|+++++++++++++++++++++++++ | 48% ~03s
|+++++++++++++++++++++++++ | 49% ~03s
|++++++++++++++++++++++++++ | 51% ~03s
|++++++++++++++++++++++++++ | 52% ~03s
|+++++++++++++++++++++++++++ | 53% ~03s
|+++++++++++++++++++++++++++ | 54% ~03s
|++++++++++++++++++++++++++++ | 55% ~03s
|++++++++++++++++++++++++++++ | 56% ~03s
|+++++++++++++++++++++++++++++ | 57% ~03s
|+++++++++++++++++++++++++++++ | 58% ~03s
|++++++++++++++++++++++++++++++ | 59% ~03s
|++++++++++++++++++++++++++++++ | 60% ~03s
|+++++++++++++++++++++++++++++++ | 61% ~03s
|+++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++ | 63% ~02s
|++++++++++++++++++++++++++++++++ | 64% ~02s
|+++++++++++++++++++++++++++++++++ | 65% ~02s
|+++++++++++++++++++++++++++++++++ | 66% ~02s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++ | 68% ~02s
|+++++++++++++++++++++++++++++++++++ | 69% ~02s
|+++++++++++++++++++++++++++++++++++ | 70% ~02s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|++++++++++++++++++++++++++++++++++++ | 72% ~02s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|+++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++ | 76% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s
volcano_plot(de_genes = markers,max_names = 10,title = "markers",ident1 = "ACC2",ident2 = "PNI patients",show_graph = F)
up = up in ACC2
library(hypeR)
genesets <- msigdb_download("Homo sapiens",category="H") %>% append( msigdb_download("Homo sapiens",category="C2",subcategory = "CP"))
all_genes = markers %>% arrange(desc(avg_log2FC)) %>% select("avg_log2FC")
ranked_list <- setNames(all_genes$avg_log2FC, rownames(all_genes))
hyp_obj <- hypeR_fgsea(signature = ranked_list,genesets = genesets,up_only = F)
hyp_dots(hyp_obj)
$up
$dn
genesets <- msigdb_download("Homo sapiens",category="H") %>% append( msigdb_download("Homo sapiens",category="C5",subcategory = "GO:BP"))
all_genes = markers %>% arrange(desc(avg_log2FC)) %>% select("avg_log2FC")
ranked_list <- setNames(all_genes$avg_log2FC, rownames(all_genes))
hyp_obj <- hypeR_fgsea(signature = ranked_list,genesets = genesets,up_only = F)
Warning in fgsea::fgseaMultilevel(stats = signature, pathways = gsets.obj$genesets, :
For some of the pathways the P-values were likely overestimated. For such pathways log2err is set to NA.
hyp_dots(hyp_obj)
$up
$dn
lum_score = FetchData(acc_cancer_pri,"luminal_over_myo")
lum_score %<>% mutate (lum_or_myo = case_when(
luminal_over_myo > 1 ~ "luminal",
luminal_over_myo < (-1) ~ "myo",
TRUE ~ "unknowkn"))
for (pathway_name in names(neuronal_pathways)) {
genes = neuronal_pathways[[pathway_name]]
pathways_scores = getPathwayScores(acc_cancer_pri@assays$RNA@data,pathwayGenes = genes)
acc_cancer_pri %<>% AddMetaData(metadata = pathways_scores$pathwayScores,col.name = pathway_name)
}
FeaturePlot(object = acc_cancer_pri,features = names(neuronal_pathways))
library(ggpubr)
for (pathway in names(neuronal_pathways)) {
data = FetchData(object = acc_cancer_pri,vars = c("lum_or_myo",pathway))
p = ggboxplot(data, x = "lum_or_myo", y =pathway,
palette = "jco",
add = "jitter")+
stat_compare_means(method = "wilcox.test",comparisons = list(c("luminal","myo")))+ stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("score")+ggtitle(pathway)
print_tab(p,title = pathway)
}
NA