library(Seurat)
library(tidyverse)
ggtitle() function.# read in dataset taken from the lung cancer paper
seurat_lung_mouse <- readRDS(file = "/Users/cathal.king/Desktop/SAGC_workshop/seurat_lung_mouse.rds")
# observe data
seurat_lung_mouse
## An object of class Seurat
## 28205 features across 17549 samples within 1 assay
## Active assay: originalexp (28205 features, 0 variable features)
## 1 dimensional reduction calculated: SPRING
# check dimensions of dataset
dim(seurat_lung_mouse)
## [1] 28205 17549
# Plot the UMAP plot as shown in Figure 1D of the paper.
DimPlot(object = seurat_lung_mouse, reduction = "SPRING", group.by = "Major.cell.type", label = T)
?NormalizeData) and observe the
effects on the counts.Normalisation of gene counts.
# normalize with the log normalise method
seurat_lung_mouse <- Seurat::NormalizeData(seurat_lung_mouse, normalization.method = "LogNormalize")
# alternate methods for normalisation are possible and relevant to certain use cases
seurat_lung_mouse in the RStudio console.FindVariableFeatures will use n number of genes
(features) as set in the parameter nFeatures =. Ideally, we
want to capture as much biological variation as possible (more genes)
while at the same time ensuring our computer can handle and process the
data in a reasonable time. 2000 is standard but change this
value to see the difference in compute time.RunPCA will find the most PC’s in the data and is an
essential part of generating a UMAP plot. The number of PC’s or
dimensions that is used for the remaining part of the analysis is not as
important as other parameters. View what the ideal number of the PCs
might be for your data with the ElbowPlot() function.FindClusters() will identify clusters of cells in the
dataset. The resolution = parameter is important in
defining how many clusters are found in the data and therefore
represented on the UMAP plot. It will not change the positions of the
cells on the UMAP plot. Increase this value if you expect there to be
more clusters present in the data. The algorithm =
parameter will select the algorithm used in finding clusters. The
Louvain algorithm is used as standard here but the
Leiden algorithm is also possible to implement in Seurat with
the addition of other dependency packages.Cell Clustering.
# standard Seurat clustering work-flow
seurat_lung_mouse <- FindVariableFeatures(seurat_lung_mouse, nfeatures = 2000)
seurat_lung_mouse <- ScaleData(seurat_lung_mouse)
## Centering and scaling data matrix
seurat_lung_mouse <- RunPCA(seurat_lung_mouse, verbose = F)
ElbowPlot(seurat_lung_mouse)
seurat_lung_mouse <- FindNeighbors(seurat_lung_mouse, dims = 1:30, reduction = "pca")
## Computing nearest neighbor graph
## Computing SNN
seurat_lung_mouse <- FindClusters(seurat_lung_mouse, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 17549
## Number of edges: 693646
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9423
## Number of communities: 21
## Elapsed time: 1 seconds
RunUMAP will calculate the UMAP embedding which is used
to later plot the UMAP plot.seurat_lung_mouse@reductions$n.neighboursparameter controls the number of
neighbouring points used in local approximations. Change this
n.neighbours parameter in the RunUMAP function
and compare the results.Dimensionality reduction.
# Calculate UMAP embedding
seurat_lung_mouse <- RunUMAP(object = seurat_lung_mouse,
reduction = "pca",
dims = 1:30,
# n.neighbors = 10,
reduction.name = "New_UMAP",
verbose = F)
# plot UMAP from what we calculated above
p <- DimPlot(object = seurat_lung_mouse,
reduction = "New_UMAP",
group.by = "Major.cell.type",
label = T,
pt.size = 0.5) +
NoLegend() +
ggtitle("Seurat UMAP")
# UMAP from paper
q <- DimPlot(object = seurat_lung_mouse,
reduction = "SPRING",
group.by = "Major.cell.type",
label = T) +
NoLegend() +
ggtitle("Figure 1D UMAP")
# plot 2 UMAP's side-by-side for comparison
p|q
FindClusters()
function to find more or less clusters in the data.## plot cell clusters on the UMAP
DimPlot(object = seurat_lung_mouse,
reduction = "New_UMAP",
group.by = "seurat_clusters",
label = T,
pt.size = 0.5) +
NoLegend()
shuffle and order to work around this if
needed.dittoSeq can help to present a UMAP plot with more colour
blind friendly options.dittoSeq can also make the plot interactive. Re-plot
and hover your mouse over each point on the UMAP to better understand
the data.A UMAP representation of cell clusters.
## opacity
DimPlot(object = seurat_lung_mouse,
reduction = "SPRING",
group.by = "Major.cell.type",
label = T,
pt.size = 0.5)
## Warning: Removed 1610 rows containing missing values (`geom_point()`).
## order and shuffle parameter
j <- DimPlot(object = seurat_lung_mouse,
reduction = "New_UMAP",
group.by = "Major.cell.type",
label = T,
pt.size = 0.5,
# order = NULL,
shuffle = T)
j
## CVD
library(dittoSeq)
#
dittoDimPlot(object = seurat_lung_mouse,
reduction.use = "SPRING",
var = "Major.cell.type",
opacity = 0.5,
do.label = T,
labels.size = 3) + ggtitle("My UMAP Representation")
## Warning: Removed 1610 rows containing missing values (`geom_point()`).
#
dittoDimPlot(object = seurat_lung_mouse,
reduction.use = "SPRING",
var = "Major.cell.type",
opacity = 0.5,
do.hover = T)
FindMarkers identifies positive and
negative markers of a single cluster (specified in ident.1), compared to
all other cells. FindAllMarkers automates this process for
all clusters, but you can also test groups of clusters vs. each other,
or against all cells.VlnPlot() and the
FeaturePlot() functions.write.csv()) and open it in Excel.Differentially expressed genes per cluster
# find all markers of cluster 2
cluster2.markers <- FindMarkers(seurat_lung_mouse, ident.1 = 2, min.pct = 0.25)
head(cluster2.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## 4930402H24Rik 0 2.486358 0.583 0.099 0
## Actg1 0 -1.680312 0.583 0.862 0
## Alox5ap 0 -3.312858 0.043 0.513 0
## Anxa2 0 -2.620054 0.037 0.481 0
## Bank1 0 2.074459 0.266 0.025 0
#
# # find all markers distinguishing cluster 5 from clusters 0 and 3
cluster5.markers <- FindMarkers(seurat_lung_mouse, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25)
head(cluster5.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Cd74 4.635817e-232 4.395628 0.340 0.039 1.307532e-227
## S100a11 1.002689e-182 -1.903234 0.150 0.747 2.828083e-178
## S100a6 3.157390e-170 -1.893960 0.140 0.716 8.905418e-166
## Selplg 6.151238e-169 -2.004332 0.089 0.673 1.734957e-164
## Fos 7.315773e-166 -2.075319 0.178 0.724 2.063414e-161
# # find markers for every cluster compared to all remaining cells, report only the positive ones
seurat_lung_mouse.markers <- FindAllMarkers(seurat_lung_mouse, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
seurat_lung_mouse.markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
## # A tibble: 42 × 7
## # Groups: cluster [21]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 3.41 0.787 0.111 0 0 Retnlg
## 2 0 2.80 0.549 0.082 0 0 Mmp8
## 3 0 3.14 0.74 0.098 0 1 Clec4n
## 4 0 2.98 0.745 0.138 0 1 Ier3
## 5 0 3.73 0.947 0.063 0 2 Cd79a
## 6 0 3.29 0.587 0.035 0 2 Fcmr
## 7 0 3.19 0.547 0.03 0 3 Thy1
## 8 0 2.75 0.337 0.025 0 3 Lef1
## 9 0 3.99 0.825 0.101 0 4 Plac8
## 10 0 2.98 0.411 0.024 0 4 Fn1
## # ℹ 32 more rows
## Export that table to a file and open in Excel for example.
#write.csv(x = seurat_lung_mouse.markers, file = "/PATH/seurat_lung_mouse.markers.csv")
## visualise DE
VlnPlot(seurat_lung_mouse, features = c("Cd74", "Trp53"))
# featureplot DE
FeaturePlot(seurat_lung_mouse, features = c("Cd74", "Trp53"), reduction = "SPRING")
10x Visium Spatial Spots
Spatial Transcriptomics Image
# read in
spatial_seurat <- readRDS("/Users/cathal.king/Documents/Projects/DL/Visium_Mouse/Seurat/Visium_mouse_Re_seurat_A1.rds")
# add name in idents
spatial_seurat@meta.data$orig.ident <- "A1"
# plot spatial image
SpatialPlot(spatial_seurat,
pt.size.factor = 2.5,
alpha = 1) +
NoLegend() +
ggtitle('Spatial Transcriptomics mouse colon') +
theme(plot.title = element_text(hjust = 0.5))
# QC metrics
spatial_seurat <- PercentageFeatureSet(spatial_seurat, "^mt-",
col.name = "percent_mito")
# plot
SpatialFeaturePlot(spatial_seurat,
features = c("nFeature_Spatial", "nCount_Spatial", "percent_mito"),
pt.size.factor = 2.8)
SCTransform method of
normalisation for ST data.# Normalise and process data
spatial_seurat <- SCTransform(spatial_seurat, assay = "Spatial", verbose = FALSE)
spatial_seurat <- RunPCA(spatial_seurat, assay = "SCT", verbose = FALSE)
spatial_seurat <- FindNeighbors(spatial_seurat, reduction = "pca", dims = 1:30)
spatial_seurat <- FindClusters(spatial_seurat, verbose = FALSE, algorithm = 1)
spatial_seurat <- RunUMAP(spatial_seurat, reduction = "pca", dims = 1:30)
## UMAP plot
p1 <- DimPlot(spatial_seurat, reduction = "umap", label = TRUE, pt.size = 3, label.size = 7) + ggtitle("UMAP representation of Spot Clusters for A1")+
theme(plot.title = element_text(hjust = 0.5))
p2 <- SpatialDimPlot(spatial_seurat, label = TRUE, label.size = 3.6, pt.size.factor = 3) + ggtitle("Seurat Clusters A1")+
theme(plot.title = element_text(hjust = 0.5))
p1 + p2
# Plot chosen Genes on spatial image
SpatialFeaturePlot(spatial_seurat,
features = c("Cd8a", "Cd81", "Cd14", "Lag3"),
pt.size.factor = 2.8,
max.cutoff = 2) # min.cutoff = 0, max.cutoff = 20
# A better colour scheme
color.panel = c("#73CA6F",
# "#AF75CE",
"#C5916F",
# "#ABC0D4",
"#ECEAC5",
"#C49CA4",
"#C66FA0",
# "#706EC4",
"#DDCDE3",
# "#7395C3",
# "#ACA1DC",
"#E5C8BB",
# "#C96C69",
"#FFFF66",
"#99ff99",
"#C1AE76",
"#C4E6E3",
"#C3D37B",
"#BCE9BD",
"#7ABB9B")
# observe clusters clearer
SpatialDimPlot(spatial_seurat, label = TRUE, label.size = 3.6, pt.size.factor = 3, image.alpha = 0.6) +
ggtitle("Seurat Clusters overlayed on Spatial image") +
theme(plot.title = element_text(hjust = 0.5))
# findmarkers will find markers between spots in ident_1 and all other spots in the dataset (not in that cluster).
Idents(spatial_seurat) <- "seurat_clusters"
# DEG's between cluster 1 and 4
#spatial_seurat <- FindMarkers(object = spatial_seurat, ident.1 = 1, ident.2 = 4)
# This loop runs the FindMarkers function on all of the clusters.
lapply(
levels(spatial_seurat[["seurat_clusters"]][[1]]),
function(x)FindMarkers(spatial_seurat,ident.1 = x,min.pct = 0.25)
) -> cluster.markers.A1
#
sapply(0:(length(cluster.markers.A1)-1),function(x) {
cluster.markers.A1[[x+1]]$gene <<- rownames(cluster.markers.A1[[x+1]])
cluster.markers.A1[[x+1]]$cluster <<- x
})
## [1] 0 1 2 3 4 5 6
#
as_tibble(do.call(rbind,cluster.markers.A1)) %>% arrange(p_val_adj) -> cluster.markers.A1
cluster.markers.A1 # export
## # A tibble: 17,504 × 7
## p_val avg_log2FC pct.1 pct.2 p_val_adj gene cluster
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <int>
## 1 8.16e-59 1.75 1 1 1.36e-54 Acta2 0
## 2 8.99e-59 2.43 1 1 1.50e-54 Tagln 0
## 3 4.04e-58 2.25 1 1 6.75e-54 Actg2 0
## 4 8.03e-58 1.75 1 1 1.34e-53 Tpm2 0
## 5 2.13e-57 1.93 1 1 3.57e-53 Cnn1 0
## 6 3.30e-57 1.73 1 1 5.52e-53 Myl9 0
## 7 3.78e-57 1.66 1 1 6.32e-53 Csrp1 0
## 8 4.14e-57 1.32 1 1 6.92e-53 Flna 0
## 9 4.69e-56 1.71 1 0.994 7.84e-52 Ckb 0
## 10 8.41e-55 0.713 1 1 1.41e-50 mt-Nd4 0
## # ℹ 17,494 more rows
#
# write.csv(x = cluster.markers.A1, file = "/PATH/spatial.cluster.markers.csv", row.names = F)
#
spatial_seurat <- FindSpatiallyVariableFeatures(spatial_seurat,
assay = "SCT",
features = VariableFeatures(spatial_seurat)[1:1000],
selection.method = "moransi")
## Computing Moran's I
#
top.features <- head(SpatiallyVariableFeatures(spatial_seurat, selection.method = "moransi"), 6)
#
SpatialFeaturePlot(spatial_seurat,
features = top.features,
ncol = 3,
alpha = c(0.1, 1),
pt.size.factor = 2.5)
#SpatialDimPlot(spatial_seurat, interactive = TRUE)
Remove uninteresting sources of variation with integration.
#read in
A1_seu <- readRDS("/Users/cathal.king/Documents/Projects/DL/Visium_Mouse/Seurat/Visium_mouse_Re_seurat_A1.rds")
C1_seu <- readRDS("/Users/cathal.king/Documents/Projects/DL/Visium_Mouse/Seurat/Visium_mouse_Re_seurat_C1.rds")
# rename orig.ident
A1_seu@meta.data$orig.ident <- "A1"
C1_seu@meta.data$orig.ident <- "C1"
# merge
spat.list.merged <- merge(A1_seu, C1_seu)
# plot un-integrated first
# Run the standard workflow for visualization and clustering
spat.list.merged <- FindVariableFeatures(object = spat.list.merged, nfeatures = 2000)
spat.list.merged <- ScaleData(spat.list.merged, verbose = FALSE)
spat.list.merged <- RunPCA(spat.list.merged, npcs = 30, verbose = FALSE)
spat.list.merged <- RunUMAP(spat.list.merged, reduction = "pca", dims = 1:30)
spat.list.merged <- FindNeighbors(spat.list.merged, reduction = "pca", dims = 1:30)
spat.list.merged <- FindClusters(spat.list.merged, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 859
## Number of edges: 24686
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8464
## Number of communities: 8
## Elapsed time: 0 seconds
# plot
DimPlot(spat.list.merged, reduction = "umap", group.by = "orig.ident", label = T,
pt.size = 1.5, label.size = 6)
# now this time integrate samples
spat.list.merged <- merge(A1_seu, C1_seu)
# split the dataset into a list of two seurat objects (stim and CTRL)
spat.list.merged <- SplitObject(spat.list.merged, split.by = "orig.ident")
# normalize and identify variable features for each dataset independently
spat.list.merged <- lapply(X = spat.list.merged, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = spat.list.merged)
#
immune.anchors <- FindIntegrationAnchors(object.list = spat.list.merged, anchor.features = features)
# this command creates an 'integrated' data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(immune.combined) <- "integrated"
# Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindClusters(immune.combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 859
## Number of edges: 29313
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8443
## Number of communities: 7
## Elapsed time: 0 seconds
# Visualization
DimPlot(immune.combined, reduction = "umap", group.by = "orig.ident", pt.size = 1.5, label.size = 6)
DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)