Research Question: What are the spatial distributions of distinct cell populations and their gene expression patterns in the mouse small intestine?
Objectives:
Health Status of the Intestine
Disease-Related Biomarkers
Immunological State
Nutrient Absorption
Microbiome Biomarkers
Are there microbiome-associated biomarkers in the intestine, and what is their spatial distribution?
Importing libraries
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(arrow)
##
## Attaching package: 'arrow'
##
## The following object is masked from 'package:lubridate':
##
## duration
##
## The following object is masked from 'package:utils':
##
## timestamp
set.seed(12345)
object <-Load10X_Spatial(data.dir = "C:/Users/iyand/Downloads/spatial 2/", bin.size = 8)
Assays(object)
## [1] "Spatial.008um"
view(object@meta.data)
VlnPlot(object, features = c("nFeature_Spatial.008um", "nCount_Spatial.008um"), pt.size = 0, raster = FALSE)
## Warning: Default search for "data" layer in "Spatial.008um" assay yielded no
## results; utilizing "counts" layer instead.
SpatialFeaturePlot(object, features = "nCount_Spatial.008um")
Observation:
Filtering of low quality Reads
object <- subset(object, subset = nFeature_Spatial.008um > 200 & nFeature_Spatial.008um < 2500)
## Warning: Not validating Centroids objects
## Not validating Centroids objects
## Warning: Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Warning: Not validating Seurat objects
VlnPlot(object = object, features = c("nFeature_Spatial.008um", "nCount_Spatial.008um"), pt.size = 0, raster = FALSE)
## Warning: Default search for "data" layer in "Spatial.008um" assay yielded no
## results; utilizing "counts" layer instead.
Normalization, finding Variable feature and Running PCA
object <- NormalizeData(object)
## Normalizing layer: counts
object <- FindVariableFeatures(object)
## Finding variable features for layer counts
object <- ScaleData(object)
## Centering and scaling data matrix
object <- RunPCA(object)
## PC_ 1
## Positive: Ighm, Lyz1, Defa21, Apoe, Itln1, Igkc, Ighd, Ang4, Cd52, AY761184
## Cd79a, Olfm4, Mptx2, Coro1a, Clu, Cd79b, Spink4, C3, Cd24a, Hist1h2ap
## Ptp4a3, Mmp7, Ptprc, H2-Ob, Laptm5, Igfbp7, Tagln, Mfge8, Ly6e, Cd37
## Negative: Apoa1, Fabp2, Selenop, Ace2, Slc15a1, Clca4b, Sis, Slc34a2, Cubn, Cdhr2
## Slc6a8, Slc6a19, Apob, Dpp4, Tmigd1, Mep1a, Ace, Slc13a2, Fabp6, H2-Q2
## Slc27a4, Ggt1, Mxd1, Sectm1b, Xpnpep2, Enpp7, Pdzk1, Alpi, Mpp1, Rbp2
## PC_ 2
## Positive: Ighm, Ighd, Cd52, Coro1a, Cd79a, Ptprc, Clu, Cd79b, H2-Ob, Laptm5
## Cd37, Rac2, Arhgdib, Cd53, Cd19, Srgn, Apoe, Tbc1d10c, Selenop, Ets1
## Ms4a1, Sell, Mfge8, Igkc, H2-Eb2, Shisa5, Fcer2a, Pax5, Tnfrsf13c, Ptpn6
## Negative: Spink4, Lgals2, Defa21, Itln1, Lyz1, Muc2, Olfm4, Ang4, AY761184, Agr2
## Mptx2, Dmbt1, Mmp7, Cps1, Clca3b, Tff3, Clps, Pycard, Ppp1r1b, Reg4
## Slc12a2, Oat, Tspan1, Gvin1, Kcnq1, Krt18, Stmn1, Car8, Tmprss2, Pnliprp2
## PC_ 3
## Positive: Acta2, Actg2, Tpm2, Myh11, Tagln, Myl9, Des, Cald1, Mylk, Cnn1
## Flna, Rgs5, Pgm5, Sparcl1, Dmpk, Csrp1, Synm, Tpm1, Synpo2, Col3a1
## Sparc, Col4a1, Igfbp7, Atp2b4, Pcp4, Fn1, Smtn, Uchl1, Cpe, Rbpms
## Negative: Lgals2, Dmbt1, Ighd, Hist1h2ap, Pycard, Cd79a, Ighm, Tff3, Cd79b, H2-Ob
## Agr2, Cps1, Hist1h1e, Cd37, Ms4a1, Mki67, Ppp1r1b, Cd19, Clu, Spib
## Muc2, Fcgbp, Coro1a, Stmn1, Cd52, Tnfrsf13c, Clca3b, Pax5, Cotl1, H2-Eb2
## PC_ 4
## Positive: Actg2, Uchl1, Des, Tpm2, Myl9, Mylk, Hand2, Cpe, Synm, Pgm5
## Myh11, Pcp4, Dmpk, Cnn1, Pcsk1n, Sncg, Snhg11, Scg2, Acta2, Ptprn
## Stmn2, Tpm1, Prph, Snap25, Psd, Ighd, Tagln, Rgs5, Flna, Fxyd6
## Negative: Igha, Jchain, Igkc, Adamdec1, Iglc1, Igfbp3, Col3a1, Apoe, Tmem176b, Bgn
## Tmem176a, Cxcl14, Col6a1, Plpp3, Igfbp7, Col6a2, Col6a4, Gpx3, Tnc, Bmp5
## Col4a1, Col6a3, Ecm1, C1qa, Npnt, Col1a1, Csf1r, C1qb, Itih5, Igkv4-72
## PC_ 5
## Positive: AY761184, Ang4, Mptx2, Defa21, Lyz1, Itln1, Clps, Mmp7, Olfm4, Pnliprp2
## Defa26, Reg4, Cd24a, Habp2, Nupr1, Apoc2, Spink4, Apoa4, Clca4b, Ada
## Clca4a, Samd5, Apoc3, Rbp2, Car8, Defa35, Ly6m, Pmp22, Slc6a19, Creb3l3
## Negative: Fabp6, Pycard, Dmbt1, Car4, Mt1, Oat, Ms4a18, Zg16, Cyp4f40, Lgals2
## Mt2, Apol10a, Fcgbp, Mal2, Ppp1r1b, Clca1, Gpx1, Reg3a, Hkdc1, Akr1c19
## Fzd5, Mgam2-ps, Tff3, Sis, Bmp3, Scin, Suox, Gsdmc4, Fgfbp1, Nr1h4
PC_1: Immune-metabolic axis - Positive: Immune-related genes (e.g., Ighm, Lyz1) - Negative: Absorption and metabolism-related genes (e.g., Fabp2, Apoa1)
PC_2: Immune defense vs. secretory processes - Positive: Lymphoid immune genes (e.g., Cd79a, Ptprc) - Negative: Secretory epithelial genes (e.g., Muc2, Defa21)
PC_3: Smooth muscle and extracellular matrix vs. immune proliferation - Positive: Smooth muscle and extracellular matrix genes (e.g., Acta2, Col3a1) - Negative: Immune proliferation genes (e.g., Mki67, Ighd)
PC_4: Neuronal and smooth muscle vs. extracellular matrix and immune signaling - Positive: Neuronal and smooth muscle genes (e.g., Uchl1, Actg2) - Negative: Extracellular matrix and immune-related genes
PC_5: Antimicrobial and lipid metabolism vs. absorption and epithelial defense - Positive: Antimicrobial and lipid metabolism genes (e.g., Defa21, Apoa4) - Negative: Absorption and epithelial defense genes (e.g., Fabp6, Reg3a)
object <- RunUMAP(object, dims= 1: 30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:16:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:16:14 Read 210280 rows and found 30 numeric columns
## 16:16:14 Using Annoy for neighbor search, n_neighbors = 30
## 16:16:14 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:16:44 Writing NN index file to temp file C:\Users\iyand\AppData\Local\Temp\RtmpCItp4z\file550c7c924a22
## 16:16:44 Searching Annoy index using 1 thread, search_k = 3000
## 16:18:28 Annoy recall = 100%
## 16:18:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:18:41 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:18:55 Commencing optimization for 200 epochs, with 9839076 positive edges
## 16:22:25 Optimization finished
object <- FindNeighbors(object, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
object <- FindClusters(object, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 210280
## Number of edges: 5906202
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9326
## Number of communities: 13
## Elapsed time: 200 seconds
## 3 singletons identified. 10 final clusters.
DimPlot(object, group.by = "seurat_clusters", label = TRUE)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
cluster annotation and finding marker for each cluster
marker <- FindAllMarkers(object, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
head(marker)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Jchain 0 4.140756 0.625 0.133 0 0 Jchain
## Igha 0 4.268982 0.842 0.377 0 0 Igha
## Igkc 0 3.805956 0.851 0.392 0 0 Igkc
## Apoe 0 2.168477 0.592 0.155 0 0 Apoe
## Adamdec1 0 3.646689 0.499 0.071 0 0 Adamdec1
## Col3a1 0 3.180548 0.470 0.075 0 0 Col3a1
top_markers <- marker %>%
group_by(cluster) %>%
slice_max(order_by = avg_log2FC, n = 10)
# View the results
print(top_markers, n=100)
## # A tibble: 100 × 7
## # Groups: cluster [10]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 4.69 0.482 0.133 0 0 Iglc1
## 2 0 4.27 0.842 0.377 0 0 Igha
## 3 0 4.14 0.625 0.133 0 0 Jchain
## 4 0 3.81 0.851 0.392 0 0 Igkc
## 5 0 3.65 0.499 0.071 0 0 Adamdec1
## 6 0 3.18 0.47 0.075 0 0 Col3a1
## 7 0 2.86 0.273 0.044 0 0 Bgn
## 8 0 2.74 0.297 0.055 0 0 Col1a1
## 9 0 2.60 0.319 0.056 0 0 Col4a1
## 10 0 2.51 0.252 0.051 0 0 Col1a2
## 11 0 3.16 0.572 0.077 0 1 Olfm4
## 12 0 3.11 0.347 0.048 0 1 Clps
## 13 0 3.03 0.543 0.106 0 1 AY761184
## 14 0 3.02 0.72 0.197 0 1 Defa21
## 15 0 2.98 0.724 0.206 0 1 Lyz1
## 16 0 2.97 0.565 0.117 0 1 Ang4
## 17 0 2.92 0.433 0.064 0 1 Mmp7
## 18 0 2.89 0.461 0.068 0 1 Clca3b
## 19 0 2.88 0.498 0.099 0 1 Mptx2
## 20 0 2.82 0.703 0.186 0 1 Itln1
## 21 0 2.90 0.261 0.039 0 2 Ms4a18
## 22 0 2.16 0.344 0.086 0 2 Car4
## 23 0 2.02 0.261 0.07 0 2 Cyp4f40
## 24 0 2.01 0.945 0.447 0 2 Lypd8
## 25 0 1.95 0.96 0.49 0 2 Krt19
## 26 0 1.86 0.996 0.704 0 2 Reg3b
## 27 0 1.78 0.893 0.432 0 2 Fabp6
## 28 0 1.74 0.253 0.081 0 2 Mal2
## 29 0 1.61 0.981 0.656 0 2 Reg3g
## 30 0 1.49 0.446 0.173 0 2 Oat
## 31 0 5.11 0.435 0.016 0 3 Lct
## 32 0 4.37 0.614 0.04 0 3 Enpp7
## 33 0 3.89 0.255 0.019 0 3 Fam151a
## 34 0 3.83 0.322 0.032 0 3 Apoc3
## 35 0 3.62 0.267 0.023 0 3 Apoa4
## 36 0 3.60 0.389 0.037 0 3 Asah2
## 37 0 3.07 0.469 0.072 0 3 Rbp2
## 38 0 2.96 0.339 0.048 0 3 Mme
## 39 0 2.86 0.991 0.417 0 3 Fabp2
## 40 0 2.81 0.769 0.165 0 3 Slc15a1
## 41 0 3.73 0.38 0.037 0 4 Saa1
## 42 0 3.48 0.786 0.115 0 4 Tmigd1
## 43 0 3.37 0.622 0.086 0 4 Slc10a2
## 44 0 2.98 0.382 0.06 0 4 Slc13a1
## 45 0 2.72 0.289 0.048 0 4 Sprr2a3
## 46 0 2.64 0.509 0.091 0 4 Pmp22
## 47 0 2.46 0.296 0.062 0 4 Neu1
## 48 0 2.42 0.466 0.1 0 4 Sectm1b
## 49 0 2.41 0.657 0.155 0 4 Slc34a2
## 50 0 2.35 0.296 0.066 0 4 Abcg2
## 51 0 8.45 0.546 0.003 0 5 Ighd
## 52 0 7.16 0.293 0.003 0 5 H2-Ob
## 53 0 6.44 0.427 0.008 0 5 Cd79a
## 54 0 6.39 0.45 0.008 0 5 Clu
## 55 0 6.31 0.741 0.025 0 5 Ighm
## 56 0 6.28 0.307 0.006 0 5 Cd37
## 57 0 6.25 0.389 0.008 0 5 Cd79b
## 58 0 5.36 0.258 0.009 0 5 Ccl21a
## 59 0 4.92 0.265 0.013 0 5 Tbc1d10c
## 60 0 4.73 0.312 0.017 0 5 Arhgdib
## 61 0 5.93 0.339 0.01 0 6 Synm
## 62 0 5.83 0.261 0.008 0 6 Cpe
## 63 0 5.78 0.382 0.013 0 6 Pgm5
## 64 0 5.74 0.804 0.05 0 6 Actg2
## 65 0 5.59 0.746 0.046 0 6 Des
## 66 0 5.53 0.827 0.065 0 6 Tpm2
## 67 0 5.46 0.425 0.018 0 6 Cnn1
## 68 0 5.40 0.391 0.018 0 6 Dmpk
## 69 0 5.33 0.754 0.054 0 6 Myl9
## 70 0 5.31 0.826 0.07 0 6 Myh11
## 71 0 7.40 0.479 0.004 0 7 Sh2d6
## 72 0 5.85 0.313 0.005 0 7 Rgs13
## 73 0 4.78 0.444 0.018 0 7 Hck
## 74 0 3.35 0.29 0.033 0 7 Fabp1
## 75 0 3.16 0.561 0.075 0 7 Kctd12
## 76 0 2.96 0.416 0.061 0 7 Tm4sf4
## 77 0 2.23 0.646 0.154 0 7 Cd24a
## 78 0 2.23 0.48 0.12 0 7 Krt18
## 79 0 2.02 0.453 0.119 0 7 Atp2a3
## 80 0 1.60 0.514 0.196 0 7 Eppk1
## 81 0 7.00 0.332 0.003 0 8 Chga
## 82 0 6.70 0.581 0.008 0 8 Chgb
## 83 0 3.75 0.256 0.034 0 8 Sct
## 84 2.66e-269 1.25 0.34 0.157 5.08e-265 8 Olfm4
## 85 2.91e-212 1.11 0.286 0.131 5.55e-208 8 Clca3b
## 86 4.31e-144 0.976 0.25 0.126 8.21e-140 8 Mlxipl
## 87 4.97e-177 0.925 0.303 0.152 9.47e-173 8 Cps1
## 88 3.72e-124 0.822 0.308 0.177 7.10e-120 8 AY761184
## 89 5.04e-146 0.809 0.445 0.282 9.60e-142 8 Defa21
## 90 3.31e-154 0.798 0.46 0.29 6.30e-150 8 Lyz1
## 91 0 9.41 0.538 0.002 0 9 Ccl20
## 92 0 6.18 0.497 0.009 0 9 Ubd
## 93 0 5.75 0.652 0.054 0 9 Apoa4
## 94 0 4.04 0.315 0.018 0 9 Spib
## 95 0 3.95 0.515 0.041 0 9 Anxa5
## 96 0 3.91 0.446 0.035 0 9 Marcksl1
## 97 0 3.55 0.337 0.037 0 9 Ighg2b
## 98 0 3.32 0.464 0.038 0 9 Clu
## 99 1.42e-205 2.91 0.324 0.069 2.72e-201 9 Tm4sf4
## 100 0 2.77 0.701 0.197 0 9 Serpinb6a
cluster annotation
#creating cluster annotation table
cluster_annotations <- c(
"0" = "B Cells",
"1" = "Paneth Cells",
"2" = "Goblet Cells",
"3" = "Enterocytes",
"4" = "Tuft Cells",
"5" = "Plasma Cells",
"6" = "Smooth Muscle Cells",
"7" = "Macrophages",
"8" = "Neuroendocrine Cells",
"9" = "Lymphocytes"
)
object@meta.data$cell_type <- cluster_annotations[as.character(object$seurat_clusters)]
####cluster with annotation
DimPlot(object, group.by = "cell_type", label = TRUE, repel = TRUE) +
ggtitle("Annotated Clusters")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
######
SpatialDimPlot(object, group.by = "cell_type", label = TRUE, repel = TRUE) +
ggtitle("Spatial Distribution of Annotated Clusters")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Visualize epithelial integrity markers
SpatialFeaturePlot(object, features = c("Claudin", "Occludin", "Cdh1"), combine = FALSE)
## Warning: The following requested variables were not found: Claudin, Occludin
## [[1]]
# Visualize stress and inflammation markers
SpatialFeaturePlot(object, features = c("Hsp70", "Atf4", "Tnf", "Il1b"), combine = FALSE)
## Warning: The following requested variables were not found: Hsp70
## [[1]]
##
## [[2]]
##
## [[3]]
# Violin plot for expression distribution across clusters
VlnPlot(object, features = c("Claudin", "Occludin", "Tnf"), group.by = "seurat_clusters")
## Warning: The following requested variables were not found: Claudin, Occludin
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
# Inflammatory Bowel Disease (IBD) markers
SpatialFeaturePlot(object, features = c("Il10", "Il23r", "S100a8", "S100a9"), combine = FALSE)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# Colorectal cancer markers
SpatialFeaturePlot(object, features = c("Tp53", "Kras", "Apc", "Muc2"), combine = FALSE)
## Warning: The following requested variables were not found: Tp53
## [[1]]
##
## [[2]]
##
## [[3]]
# Infection-related markers
SpatialFeaturePlot(object, features = c("Defa21", "Lyz1"), combine = FALSE)
## [[1]]
##
## [[2]]
# Markers for different immune cell types
SpatialFeaturePlot(object, features = c("Cd19", "Cd79a", "Cd3e", "Cd8a", "Foxp3", "Cd68", "Hck"), combine = FALSE)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
# Cytokines and immune signaling genes
SpatialFeaturePlot(object, features = c("Il6", "Tnf", "Cxcl8"), combine = FALSE)
## Warning: The following requested variables were not found: Cxcl8
## [[1]]
##
## [[2]]
# Visualize cluster-specific immune cell types
DimPlot(object, group.by = "seurat_clusters", label = TRUE)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
# Carbohydrate metabolism markers
SpatialFeaturePlot(object, features = c("Slc5a1", "Slc2a2"), combine = FALSE)
## [[1]]
##
## [[2]]
# Lipid metabolism markers
SpatialFeaturePlot(object, features = c("Fabp2", "Apoa4"), combine = FALSE)
## [[1]]
##
## [[2]]
# Protein absorption markers
SpatialFeaturePlot(object, features = c("Slc15a1"), combine = FALSE)
## [[1]]
# Cluster-specific expression for nutrient absorption markers
VlnPlot(object, features = c("Slc5a1", "Fabp2", "Slc15a1"), group.by = "seurat_clusters")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
# Antimicrobial peptides and immune-microbe interaction genes
SpatialFeaturePlot(object, features = c("Defa21", "Lyz1", "Reg3g", "Reg3b", "Lypd8"), combine = FALSE)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
# Barrier genes affected by microbiota
SpatialFeaturePlot(object, features = c("Muc2", "Claudin"), combine = FALSE)
## Warning: The following requested variables were not found: Claudin
## [[1]]