In this Document I aim to analyse the differences between the clusters. I will focus on the clusters that have been spotted of interest from sex_age_tissue_diff.Rmd
# libraries
library(Seurat) # for scrnaseq analyisis
## Attaching SeuratObject
library(here) # for reproducible paths
## here() starts at C:/Users/nbestard/OneDrive/Luise_HCA
library(dplyr) # manipulated df (rename)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # plots
library(patchwork) # for adding customs to grid-composed Seurat plots
# load last object from sex_age_tissue_diff_03.Rmd
srt_all <- readRDS(here("processed", "srt_anno_01.RDS"))
Idents(srt_all) <- srt_all$clusters_named
Visually check by plotting the data by colouring it for mitochondrial count or UMI/ gene count
Idents(srt_all) <- srt_all$clusters_named
FeaturePlot(srt_all, features = "detected") + labs(title="Detected gene number")
FeaturePlot(srt_all, features = "sum" ) + labs(title="Total UMI counts")
# Wich mito is true? Total_mt_pc, high_subsets_mito_percent, percent.mt, $total_percent_mito
FeaturePlot(srt_all, features = "total_percent_mito") +
labs(title="Mitochondiral percentage content")
To see the top genes for the clusters of interest, first calculate for each gene the expression for an ‘average’ single cell in each cluster
# Create an object only with the average expression
if(!file.exists(here("processed","srt_average_expression.RDS"))){
# Calculate average expression
average_expresion <- AverageExpression(srt_all, assays="RNA", group.by = "clusters_named")
# also save to a seurat object to facilitate plotting
srt_average_expresion <- AverageExpression(srt_all, assays="RNA", return.seurat = TRUE, group.by = "clusters_named")
# Making sure the clusters are sorted as the srt_all
Idents(srt_average_expresion) <-
factor(srt_average_expresion$clusters_named,
levels = levels(srt_all$clusters_named)
)
srt_average_expresion$clusters_named <- Idents(srt_average_expresion)
saveRDS(srt_average_expresion, here("processed","srt_average_expression.RDS"))
write.csv(average_expresion, here("outs", "average_expression.csv"))
} else{
srt_average_expresion <- readRDS(here("processed","srt_average_expression.RDS"))
}
In previous steps we only scaled the variable features, now we are interested in scaling all the genes, as some non highly variable genes may come up from a DE analysis, and for visualisation using scaled genes can help.
srt_all <- ScaleData(srt_all, features = rownames(srt_all))
## Centering and scaling data matrix
For all CellTypes, differences between the subgroups.
if(!file.exists(here("outs", "de_Stromal_all.csv"))) {
# Create a separate seurato object for each cell type
srt_celltype_list <- SplitObject(srt_all, split.by = "celltype")
names_celltype <- names(srt_celltype_list)
# find markers for each celltype clustering
mapply(
FUN = function(srt, name) {
if (!name %in% c("OPC", "Oligo", "Immune")){
# Find markers
markers <- FindAllMarkers(srt, test.use = "MAST", only.pos = TRUE)
# Select markers of interest
top10_markers <- markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
top100_markers <- markers %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
# Plot markers of interest
top10_plot <- DoHeatmap(srt, features = top10_markers$gene, size = 4) + NoLegend() +
ggtitle("Top 10 differentially expressed")
# Save the objects
write.csv(markers, here("outs", paste0("de_", name, "_all.csv")))
write.csv(top100_markers, here("outs", paste0("de_", name, "_top100.csv")))
pdf(here("outs", paste0("de_", name, "_top10.pdf")))
print(top10_plot)
dev.off()
}
},
srt_celltype_list, #seurat objects
names_celltype # name of the celltype
)
}
Add some pairwise comparisons
# Function for pariwise comparisons
pairwise_mark <- function(seur_obj,
int_cols,
clust_id_list,
only_pos = TRUE,
min_pct = 0.25,
logfc_threshold = 0.25,
fil_pct_1 = 0.25,
fil_pct_2 = 0.1,
save_dir = getwd(),
test_use = "MAST"){
for(k in 1:length(int_cols)){
Idents(seur_obj) <- int_cols[k]
for(i in 1: length(clust_id_list)){
clust_mark <- FindMarkers(seur_obj,
ident.1 = clust_id_list[[i]][[1]],
ident.2 = clust_id_list[[i]][[2]],
min.pct = min_pct,
test.use = test_use)
clust_mark$cluster <- clust_id_list[[i]][[1]]
clust_mark$comp_to_clust <- clust_id_list[[i]][[2]]
# I modify the write csv compared with luise function to match the ones already done
write.csv(clust_mark,
paste(save_dir,
"/",
"de_pairwise_",
# int_cols[k],
clust_id_list[[i]][[1]],
"_",
clust_id_list[[i]][[2]],
".csv", sep = "" ))
}
}
}
# if not run yet do the comparisons
if(!file.exists(here("outs", "de_pairwise_Astrocyte_1_Astrocyte_2.csv"))) {
clust_id_list <- list(list("Astrocyte_1", "Astrocyte_2"),
list("Neuron_In_1", "Neuron_In_2"),
list("Neuron_In_2", "Neuron_In_3"),
list("Neuron_In_1", "Neuron_In_3"),
list("Neuron_Ex_1", "Neuron_Ex_2"),
list("Neuron_Ex_2", "Neuron_Ex_3"),
list("Neuron_Ex_1", "Neuron_Ex_3"))
pairwise_mark(srt_all,
int_cols = "clusters_named",
save_dir = here("outs"),
clust_id_list = clust_id_list)
}
With a minimum de averarage log fold change of 1.2, present in more than 25 % of cells in the cluster of interest and less than 60 % in the others. Then taking the top 10 of each group.
gen_mark_list <-function(file_dir = getwd(),
pattern = "*.csv",
avg_log = 1.2,
pct_1 = 0.25,
pct_2 = 0.6,
pairwise = FALSE
){
temp = list.files(path = file_dir,
pattern= pattern)
myfiles = lapply(paste(file_dir, temp, sep = "/"), read.csv)
for(i in 1:length(myfiles)){
dat <- myfiles[[i]]
av_log_fil <- subset(dat, dat$avg_log2FC > avg_log &
dat$pct.1 > pct_1 &
dat$pct.2 < pct_2)
if(pairwise == TRUE){
top10 <- av_log_fil %>% top_n(10, avg_log2FC)
top10$gene <- top10$X
}else{
av_log_fil$cluster <- as.character(av_log_fil$cluster)
top10 <- av_log_fil %>% group_by(cluster) %>%
top_n(10, avg_log2FC)
}
if(i ==1){
fil_genes <- top10
}else{
fil_genes <- rbind(fil_genes, top10)
}
fil_genes <- fil_genes[!duplicated(fil_genes$gene),]
}
return(fil_genes)
}
fil_genes <- gen_mark_list(file_dir = here("outs"), pattern = "all.*csv")
fil_genes_pw <- gen_mark_list(file_dir = here("outs"), pattern = "de_pairwise_.*csv", pairwise = TRUE)
int_genes <- c(fil_genes$gene, fil_genes_pw$gene)
int_genes <- unique(int_genes)
This heatmap does summarise all the DE done so far (filtered): first the celltype to celltype, then in each celltype each group against the others and then, at the end, the pairwise comparisons.
hm_av <- DoHeatmap(object = srt_average_expresion,
features = int_genes,
label = TRUE,
group.by = "clusters_named",
# group.colors = mycoloursP[6:40],
draw.lines = F)
## Warning in DoHeatmap(object = srt_average_expresion, features = int_genes, : The
## following features were omitted as they were not found in the scale.data slot
## for the RNA assay: NA
hm_av
Plot the 4 astrocyte clusters alone
srt_astro <- subset(srt_all, idents = c("Astrocyte_3", "Astrocyte_1", "Astrocyte_2", "Astrocyte_4"))
DimPlot(srt_astro)
Plot the the Astrocytes and Oligodendrocyte markers to visualize this subpopulation
FeaturePlot(srt_astro, features = c("GJA1","AQP4", "GLUL", "SOX9", "NDRG2", "GFAP", "ALDH1A1", "ALDH1L1", "VIM"))
# Oligodendrocytes
FeaturePlot(srt_astro, features = c("PLP1", "CNP", "MAG", "MOG", "MOBP", "MBP", "SOX10" ), ncol = 3) +
plot_annotation(title = "Oligodendrocytes")
, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3480225/
We confirmed the induction of astrocyte reactive gliosis in two complementary brain injury models: focal ischemic stroke produced by transient MCAO and neuroinflammation induced by systemic LPS injection.
FeaturePlot(srt_astro, features = c("GFAP", "VIM", "NES", "SERPINA3", "LCN2", "FGL2", "TAPBP"))
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: SERPINA3, LCN2
established markers of reactive astrocytes: GFAP“,”VIM“,”NES“, Vimentin should be very low in control”Lcn2 and Serpina3n could not be detected in healthy brain sections ", here either MCAO reactive astrocytes and LPS reactive astrocytes express differing levels of extracellular binding/adhesion/modification genes: FGL2 enes within the antigen presentation pathway and peptide processing and MHC association
These reactive markers do not seem to collocalise with any of the Astrocyte cellstates.
These markers are chosen from the list in the heatmap above, trying to spot genes that are only expressed in the group of interest and not in others.
VlnPlot(srt_astro, features= c("LUZP2","TRPM3","SHISA6","SLC6A11", "RGS6"))
FeaturePlot(srt_all, features = c("LUZP2","TRPM3","SHISA6","SLC6A11", "RGS6"))
VlnPlot(srt_all,features = c("LUZP2","TRPM3","SHISA6","SLC6A11", "RGS6"))
SLC6A11 is a very clear marker for Astrocyte 1, however it is only present in a subpopulation. SHISA6 better represents the whole Asctrocyte 1 state, eventhough it is also slighly expressed in neurons.
VlnPlot(srt_astro, features= c("ADGRV1", "AL589740.1", "EMID1", "EPHB1"), ncol = 4)
FeaturePlot(srt_all, features = c("ADGRV1", "AL589740.1", "EMID1", "EPHB1"))
VlnPlot(srt_all,features = c("ADGRV1", "EMID1", "EPHB1"), ncol = 3)
Astroycte 3 contains a subpopulation of Astro-Oligos
VlnPlot(srt_astro, features= c("MBP","CHI3L1", "MTURN", "PAQR6", "RPL37A"))
FeaturePlot(srt_all, features = c("MBP","CHI3L1", "MTURN", "PAQR6", "RPL37A"))
“DNAH9” This gene encodes a ciliary outer dynein arm protein and is a member of the dynein heavy chain family. It is a microtubule-dependent motor ATPase and has been reported to be involved in the movement of respiratory cilia. This gene encodes a member of the cilia- and flagella-associated protein family. Cilia And Flagella Associated Protein 43
VlnPlot(srt_astro, features= c("DNAH9",
"SPAG17",
"DNAH11",
"CFAP43",
"DNAH9"))
FeaturePlot(srt_all, features =c("DNAH9",
"SPAG17",
"DNAH11",
"CFAP43",
"DNAH9"))
The differences in the microglia clusters are dispersed, and seem driven by many genes.
srt_micro <- subset(srt_all, subset = celltype == "Microglia-Macrophages")
DimPlot(srt_micro)
VlnPlot(srt_micro,
features =
# DE with the other ex_neurons
c( "APOE", "GPNMB","RANBP2", "ZNF331", "HIF1A",
# from all markers heatmap
"CXCR4")
)
FeaturePlot(srt_all, features = c("APOE", "GPNMB", "RANBP2", "ZNF331", "HIF1A", "CXCR4"), ncol = 3)
VlnPlot(srt_all, features = c("APOE", "GPNMB", "RANBP2", "ZNF331", "HIF1A", "CXCR4"))
The best overall marker is probly “CXCR4”, eventhough it is not present in all microglia-macrophages 1 cells.
VlnPlot(srt_micro,
features = c( "XACT", "AC008691.1", "P2RY12", "AP003481.1", "NAV3")
)
FeaturePlot(srt_all, features = c( "XACT", "AC008691.1", "P2RY12", "AP003481.1", "NAV3"), ncol = 3)
VlnPlot(srt_all, features = c( "XACT", "AC008691.1", "P2RY12", "AP003481.1", "NAV3"))
XACT is the best marker for microglia macrophages 2.
Plot the 3 Excitatory neurons clusters alone
srt_neu_ex <- subset(srt_all, idents = c("Neuron_Ex_1","Neuron_Ex_2","Neuron_Ex_3" ))
DimPlot(srt_neu_ex)
VlnPlot(srt_neu_ex,
features =
# DE with the other ex_neurons
c( "HS3ST4", "ZFHX3","KIAA1217", "ZNF385D",
# from all markers heatmap
"SGCZ", "MGAT4C", "XKR4",
# From pairwise with other ex_neurons
"TLE4", "SEMA3E"
))
FeaturePlot(srt_all, features = c("HS3ST4", "ZFHX3", "KIAA1217", "ZNF385D", "SGCZ", "MGAT4C", "XKR4", "TLE4", "SEMA3E"))
VlnPlot(srt_all, features = c("HS3ST4", "ZFHX3", "KIAA1217", "ZNF385D", "SGCZ", "MGAT4C", "XKR4", "TLE4", "SEMA3E"))
HS3ST4 is the best marker for this excitatory neuron state, it is also present in microglia/macrophages. Some markers such as SGCZ and MGAT4 are only present in a sub-population from Neuron_Ex_1.
VlnPlot(srt_neu_ex,
features =
# DE with the other ex_neurons
c("TENM2", "FAM19A1", "SYN3",
# from all markers heatmap
"CTNNA2", "PTPRK","ADGRL2", "CNTN5")
)
FeaturePlot(srt_all, features = c("TENM2", "FAM19A1", "SYN3", "CTNNA2", "PTPRK", "ADGRL2", "CNTN5"))
VlnPlot(srt_all, features = c("TENM2", "FAM19A1", "SYN3", "CTNNA2", "PTPRK", "ADGRL2", "CNTN5"))
FAM19A1 and SYN3 are the markers that best differentiate Neuron ex 2 with the other excitatory neurons. CNTN5 is less expressed in other cell types, but is slightly expressed by excitatory 1 too.
These neurons are more abundant in BA4 than the other tissues. The markers from this group are harder to find, it does not seem to express much genes that are not expressed by the other ex. neurons. Some of the slightly more expressed genes could be related with stress.Such as “CLU”, the protein encoded by this gene is a secreted chaperone, and mitochondrial genes.
VlnPlot(srt_neu_ex,
features =
# DE with the other ex_neurons
c("MT-ATP8", "CLU", "EEF1A2",
# from all markers heatmap
"ETH1", "ALDOA"),
ncol = 2
)
## Warning in FetchData(object = object, vars = features, slot = slot): The
## following requested variables were not found: ETH1
FeaturePlot(srt_all, features = c("MT-ATP8", "CLU", "EEF1A2", "ETH1", "ALDOA"))
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: ETH1
VlnPlot(srt_all,
features = c("MT-ATP8", "CLU", "EEF1A2", "ETH1", "ALDOA"),
ncol = 2)
## Warning in FetchData(object = object, vars = features, slot = slot): The
## following requested variables were not found: ETH1
Expression plot with granulate cells marker genes
# Searching for markers for Granulate cells
FeaturePlot(srt_all, features = c("CDH15", "CALB2", "RBFOX3", "RELN"))
Should I add Neuron_RELN+_1 (only in CB) vs other Neurons?
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252
## [2] LC_CTYPE=English_United Kingdom.1252
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 ggplot2_3.3.3 dplyr_1.0.5 here_1.0.1
## [5] SeuratObject_4.0.0 Seurat_4.0.0
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-0 deldir_0.2-10
## [4] ellipsis_0.3.1 ggridges_0.5.3 rprojroot_2.0.2
## [7] spatstat.data_2.0-0 leiden_0.3.7 listenv_0.8.0
## [10] farver_2.1.0 ggrepel_0.9.1 fansi_0.4.2
## [13] codetools_0.2-18 splines_4.0.4 knitr_1.31
## [16] polyclip_1.10-0 jsonlite_1.7.2 ica_1.0-2
## [19] cluster_2.1.1 png_0.1-7 uwot_0.1.10
## [22] shiny_1.6.0 sctransform_0.3.2 compiler_4.0.4
## [25] httr_1.4.2 assertthat_0.2.1 Matrix_1.3-2
## [28] fastmap_1.1.0 lazyeval_0.2.2 later_1.1.0.1
## [31] htmltools_0.5.1.1 tools_4.0.4 igraph_1.2.6
## [34] gtable_0.3.0 glue_1.4.2 RANN_2.6.1
## [37] reshape2_1.4.4 Rcpp_1.0.6 spatstat_1.64-1
## [40] scattermore_0.7 jquerylib_0.1.3 vctrs_0.3.6
## [43] nlme_3.1-152 lmtest_0.9-38 xfun_0.21
## [46] stringr_1.4.0 globals_0.14.0 mime_0.10
## [49] miniUI_0.1.1.1 lifecycle_1.0.0 irlba_2.3.3
## [52] goftest_1.2-2 future_1.21.0 MASS_7.3-53.1
## [55] zoo_1.8-9 scales_1.1.1 promises_1.2.0.1
## [58] spatstat.utils_2.1-0 parallel_4.0.4 RColorBrewer_1.1-2
## [61] yaml_2.2.1 reticulate_1.18 pbapply_1.4-3
## [64] gridExtra_2.3 sass_0.3.1 rpart_4.1-15
## [67] stringi_1.5.3 highr_0.8 rlang_0.4.10
## [70] pkgconfig_2.0.3 matrixStats_0.58.0 evaluate_0.14
## [73] lattice_0.20-41 ROCR_1.0-11 purrr_0.3.4
## [76] tensor_1.5 htmlwidgets_1.5.3 labeling_0.4.2
## [79] cowplot_1.1.1 tidyselect_1.1.0 parallelly_1.24.0
## [82] RcppAnnoy_0.0.18 plyr_1.8.6 magrittr_2.0.1
## [85] R6_2.5.0 generics_0.1.0 DBI_1.1.1
## [88] pillar_1.5.1 withr_2.4.1 mgcv_1.8-34
## [91] fitdistrplus_1.1-3 survival_3.2-10 abind_1.4-5
## [94] tibble_3.1.0 future.apply_1.7.0 crayon_1.4.1
## [97] KernSmooth_2.23-18 utf8_1.2.1 plotly_4.9.3
## [100] rmarkdown_2.7 grid_4.0.4 data.table_1.14.0
## [103] digest_0.6.27 xtable_1.8-4 tidyr_1.1.3
## [106] httpuv_1.5.5 munsell_0.5.0 viridisLite_0.3.0
## [109] bslib_0.2.4