Research Question: What are the spatial distributions of distinct cell populations and their gene expression patterns in the mouse small intestine?

Objectives:

Health Status of the Intestine

Disease-Related Biomarkers

Immunological State

Nutrient Absorption

Microbiome Biomarkers

library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(arrow)
## 
## Attaching package: 'arrow'
## 
## The following object is masked from 'package:lubridate':
## 
##     duration
## 
## The following object is masked from 'package:utils':
## 
##     timestamp
set.seed(12345)
object <-Load10X_Spatial(data.dir = "C:/Users/iyand/Downloads/spatial 2/", bin.size = 8)
Assays(object)
## [1] "Spatial.008um"
view(object@meta.data)
VlnPlot(object, features = c("nFeature_Spatial.008um", "nCount_Spatial.008um"), pt.size = 0, raster = FALSE)
## Warning: Default search for "data" layer in "Spatial.008um" assay yielded no
## results; utilizing "counts" layer instead.

SpatialFeaturePlot(object, features = "nCount_Spatial.008um")

object <- subset(object, subset = nFeature_Spatial.008um > 200 & nFeature_Spatial.008um < 2500)
## Warning: Not validating Centroids objects
## Not validating Centroids objects
## Warning: Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Not validating FOV objects
## Warning: Not validating Seurat objects
VlnPlot(object = object, features = c("nFeature_Spatial.008um", "nCount_Spatial.008um"), pt.size = 0, raster = FALSE)
## Warning: Default search for "data" layer in "Spatial.008um" assay yielded no
## results; utilizing "counts" layer instead.

Normalization, finding Variable feature and Running PCA

object <- NormalizeData(object)
## Normalizing layer: counts
object <- FindVariableFeatures(object)
## Finding variable features for layer counts
object <- ScaleData(object)
## Centering and scaling data matrix
object <- RunPCA(object)
## PC_ 1 
## Positive:  Ighm, Lyz1, Defa21, Apoe, Itln1, Igkc, Ighd, Ang4, Cd52, AY761184 
##     Cd79a, Olfm4, Mptx2, Coro1a, Clu, Cd79b, Spink4, C3, Cd24a, Hist1h2ap 
##     Ptp4a3, Mmp7, Ptprc, H2-Ob, Laptm5, Igfbp7, Tagln, Mfge8, Ly6e, Cd37 
## Negative:  Apoa1, Fabp2, Selenop, Ace2, Slc15a1, Clca4b, Sis, Slc34a2, Cubn, Cdhr2 
##     Slc6a8, Slc6a19, Apob, Dpp4, Tmigd1, Mep1a, Ace, Slc13a2, Fabp6, H2-Q2 
##     Slc27a4, Ggt1, Mxd1, Sectm1b, Xpnpep2, Enpp7, Pdzk1, Alpi, Mpp1, Rbp2 
## PC_ 2 
## Positive:  Ighm, Ighd, Cd52, Coro1a, Cd79a, Ptprc, Clu, Cd79b, H2-Ob, Laptm5 
##     Cd37, Rac2, Arhgdib, Cd53, Cd19, Srgn, Apoe, Tbc1d10c, Selenop, Ets1 
##     Ms4a1, Sell, Mfge8, Igkc, H2-Eb2, Shisa5, Fcer2a, Pax5, Tnfrsf13c, Ptpn6 
## Negative:  Spink4, Lgals2, Defa21, Itln1, Lyz1, Muc2, Olfm4, Ang4, AY761184, Agr2 
##     Mptx2, Dmbt1, Mmp7, Cps1, Clca3b, Tff3, Clps, Pycard, Ppp1r1b, Reg4 
##     Slc12a2, Oat, Tspan1, Gvin1, Kcnq1, Krt18, Stmn1, Car8, Tmprss2, Pnliprp2 
## PC_ 3 
## Positive:  Acta2, Actg2, Tpm2, Myh11, Tagln, Myl9, Des, Cald1, Mylk, Cnn1 
##     Flna, Rgs5, Pgm5, Sparcl1, Dmpk, Csrp1, Synm, Tpm1, Synpo2, Col3a1 
##     Sparc, Col4a1, Igfbp7, Atp2b4, Pcp4, Fn1, Smtn, Uchl1, Cpe, Rbpms 
## Negative:  Lgals2, Dmbt1, Ighd, Hist1h2ap, Pycard, Cd79a, Ighm, Tff3, Cd79b, H2-Ob 
##     Agr2, Cps1, Hist1h1e, Cd37, Ms4a1, Mki67, Ppp1r1b, Cd19, Clu, Spib 
##     Muc2, Fcgbp, Coro1a, Stmn1, Cd52, Tnfrsf13c, Clca3b, Pax5, Cotl1, H2-Eb2 
## PC_ 4 
## Positive:  Actg2, Uchl1, Des, Tpm2, Myl9, Mylk, Hand2, Cpe, Synm, Pgm5 
##     Myh11, Pcp4, Dmpk, Cnn1, Pcsk1n, Sncg, Snhg11, Scg2, Acta2, Ptprn 
##     Stmn2, Tpm1, Prph, Snap25, Psd, Ighd, Tagln, Rgs5, Flna, Fxyd6 
## Negative:  Igha, Jchain, Igkc, Adamdec1, Iglc1, Igfbp3, Col3a1, Apoe, Tmem176b, Bgn 
##     Tmem176a, Cxcl14, Col6a1, Plpp3, Igfbp7, Col6a2, Col6a4, Gpx3, Tnc, Bmp5 
##     Col4a1, Col6a3, Ecm1, C1qa, Npnt, Col1a1, Csf1r, C1qb, Itih5, Igkv4-72 
## PC_ 5 
## Positive:  AY761184, Ang4, Mptx2, Defa21, Lyz1, Itln1, Clps, Mmp7, Olfm4, Pnliprp2 
##     Defa26, Reg4, Cd24a, Habp2, Nupr1, Apoc2, Spink4, Apoa4, Clca4b, Ada 
##     Clca4a, Samd5, Apoc3, Rbp2, Car8, Defa35, Ly6m, Pmp22, Slc6a19, Creb3l3 
## Negative:  Fabp6, Pycard, Dmbt1, Car4, Mt1, Oat, Ms4a18, Zg16, Cyp4f40, Lgals2 
##     Mt2, Apol10a, Fcgbp, Mal2, Ppp1r1b, Clca1, Gpx1, Reg3a, Hkdc1, Akr1c19 
##     Fzd5, Mgam2-ps, Tff3, Sis, Bmp3, Scin, Suox, Gsdmc4, Fgfbp1, Nr1h4

PC_1: Immune-metabolic axis - Positive: Immune-related genes (e.g., Ighm, Lyz1) - Negative: Absorption and metabolism-related genes (e.g., Fabp2, Apoa1)

PC_2: Immune defense vs. secretory processes - Positive: Lymphoid immune genes (e.g., Cd79a, Ptprc) - Negative: Secretory epithelial genes (e.g., Muc2, Defa21)

PC_3: Smooth muscle and extracellular matrix vs. immune proliferation - Positive: Smooth muscle and extracellular matrix genes (e.g., Acta2, Col3a1) - Negative: Immune proliferation genes (e.g., Mki67, Ighd)

PC_4: Neuronal and smooth muscle vs. extracellular matrix and immune signaling - Positive: Neuronal and smooth muscle genes (e.g., Uchl1, Actg2) - Negative: Extracellular matrix and immune-related genes

PC_5: Antimicrobial and lipid metabolism vs. absorption and epithelial defense - Positive: Antimicrobial and lipid metabolism genes (e.g., Defa21, Apoa4) - Negative: Absorption and epithelial defense genes (e.g., Fabp6, Reg3a)

object <- RunUMAP(object, dims= 1: 30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:16:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:16:14 Read 210280 rows and found 30 numeric columns
## 16:16:14 Using Annoy for neighbor search, n_neighbors = 30
## 16:16:14 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:16:44 Writing NN index file to temp file C:\Users\iyand\AppData\Local\Temp\RtmpCItp4z\file550c7c924a22
## 16:16:44 Searching Annoy index using 1 thread, search_k = 3000
## 16:18:28 Annoy recall = 100%
## 16:18:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:18:41 Initializing from normalized Laplacian + noise (using RSpectra)
## 16:18:55 Commencing optimization for 200 epochs, with 9839076 positive edges
## 16:22:25 Optimization finished
object <- FindNeighbors(object, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
object <- FindClusters(object, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 210280
## Number of edges: 5906202
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9326
## Number of communities: 13
## Elapsed time: 200 seconds
## 3 singletons identified. 10 final clusters.
DimPlot(object, group.by = "seurat_clusters", label = TRUE)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`

cluster annotation and finding marker for each cluster

marker <- FindAllMarkers(object, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
head(marker)
##          p_val avg_log2FC pct.1 pct.2 p_val_adj cluster     gene
## Jchain       0   4.140756 0.625 0.133         0       0   Jchain
## Igha         0   4.268982 0.842 0.377         0       0     Igha
## Igkc         0   3.805956 0.851 0.392         0       0     Igkc
## Apoe         0   2.168477 0.592 0.155         0       0     Apoe
## Adamdec1     0   3.646689 0.499 0.071         0       0 Adamdec1
## Col3a1       0   3.180548 0.470 0.075         0       0   Col3a1
top_markers <- marker %>%
  group_by(cluster) %>%
  slice_max(order_by = avg_log2FC, n = 10)

# View the results
print(top_markers, n=100)
## # A tibble: 100 × 7
## # Groups:   cluster [10]
##         p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene     
##         <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>    
##   1 0              4.69  0.482 0.133 0         0       Iglc1    
##   2 0              4.27  0.842 0.377 0         0       Igha     
##   3 0              4.14  0.625 0.133 0         0       Jchain   
##   4 0              3.81  0.851 0.392 0         0       Igkc     
##   5 0              3.65  0.499 0.071 0         0       Adamdec1 
##   6 0              3.18  0.47  0.075 0         0       Col3a1   
##   7 0              2.86  0.273 0.044 0         0       Bgn      
##   8 0              2.74  0.297 0.055 0         0       Col1a1   
##   9 0              2.60  0.319 0.056 0         0       Col4a1   
##  10 0              2.51  0.252 0.051 0         0       Col1a2   
##  11 0              3.16  0.572 0.077 0         1       Olfm4    
##  12 0              3.11  0.347 0.048 0         1       Clps     
##  13 0              3.03  0.543 0.106 0         1       AY761184 
##  14 0              3.02  0.72  0.197 0         1       Defa21   
##  15 0              2.98  0.724 0.206 0         1       Lyz1     
##  16 0              2.97  0.565 0.117 0         1       Ang4     
##  17 0              2.92  0.433 0.064 0         1       Mmp7     
##  18 0              2.89  0.461 0.068 0         1       Clca3b   
##  19 0              2.88  0.498 0.099 0         1       Mptx2    
##  20 0              2.82  0.703 0.186 0         1       Itln1    
##  21 0              2.90  0.261 0.039 0         2       Ms4a18   
##  22 0              2.16  0.344 0.086 0         2       Car4     
##  23 0              2.02  0.261 0.07  0         2       Cyp4f40  
##  24 0              2.01  0.945 0.447 0         2       Lypd8    
##  25 0              1.95  0.96  0.49  0         2       Krt19    
##  26 0              1.86  0.996 0.704 0         2       Reg3b    
##  27 0              1.78  0.893 0.432 0         2       Fabp6    
##  28 0              1.74  0.253 0.081 0         2       Mal2     
##  29 0              1.61  0.981 0.656 0         2       Reg3g    
##  30 0              1.49  0.446 0.173 0         2       Oat      
##  31 0              5.11  0.435 0.016 0         3       Lct      
##  32 0              4.37  0.614 0.04  0         3       Enpp7    
##  33 0              3.89  0.255 0.019 0         3       Fam151a  
##  34 0              3.83  0.322 0.032 0         3       Apoc3    
##  35 0              3.62  0.267 0.023 0         3       Apoa4    
##  36 0              3.60  0.389 0.037 0         3       Asah2    
##  37 0              3.07  0.469 0.072 0         3       Rbp2     
##  38 0              2.96  0.339 0.048 0         3       Mme      
##  39 0              2.86  0.991 0.417 0         3       Fabp2    
##  40 0              2.81  0.769 0.165 0         3       Slc15a1  
##  41 0              3.73  0.38  0.037 0         4       Saa1     
##  42 0              3.48  0.786 0.115 0         4       Tmigd1   
##  43 0              3.37  0.622 0.086 0         4       Slc10a2  
##  44 0              2.98  0.382 0.06  0         4       Slc13a1  
##  45 0              2.72  0.289 0.048 0         4       Sprr2a3  
##  46 0              2.64  0.509 0.091 0         4       Pmp22    
##  47 0              2.46  0.296 0.062 0         4       Neu1     
##  48 0              2.42  0.466 0.1   0         4       Sectm1b  
##  49 0              2.41  0.657 0.155 0         4       Slc34a2  
##  50 0              2.35  0.296 0.066 0         4       Abcg2    
##  51 0              8.45  0.546 0.003 0         5       Ighd     
##  52 0              7.16  0.293 0.003 0         5       H2-Ob    
##  53 0              6.44  0.427 0.008 0         5       Cd79a    
##  54 0              6.39  0.45  0.008 0         5       Clu      
##  55 0              6.31  0.741 0.025 0         5       Ighm     
##  56 0              6.28  0.307 0.006 0         5       Cd37     
##  57 0              6.25  0.389 0.008 0         5       Cd79b    
##  58 0              5.36  0.258 0.009 0         5       Ccl21a   
##  59 0              4.92  0.265 0.013 0         5       Tbc1d10c 
##  60 0              4.73  0.312 0.017 0         5       Arhgdib  
##  61 0              5.93  0.339 0.01  0         6       Synm     
##  62 0              5.83  0.261 0.008 0         6       Cpe      
##  63 0              5.78  0.382 0.013 0         6       Pgm5     
##  64 0              5.74  0.804 0.05  0         6       Actg2    
##  65 0              5.59  0.746 0.046 0         6       Des      
##  66 0              5.53  0.827 0.065 0         6       Tpm2     
##  67 0              5.46  0.425 0.018 0         6       Cnn1     
##  68 0              5.40  0.391 0.018 0         6       Dmpk     
##  69 0              5.33  0.754 0.054 0         6       Myl9     
##  70 0              5.31  0.826 0.07  0         6       Myh11    
##  71 0              7.40  0.479 0.004 0         7       Sh2d6    
##  72 0              5.85  0.313 0.005 0         7       Rgs13    
##  73 0              4.78  0.444 0.018 0         7       Hck      
##  74 0              3.35  0.29  0.033 0         7       Fabp1    
##  75 0              3.16  0.561 0.075 0         7       Kctd12   
##  76 0              2.96  0.416 0.061 0         7       Tm4sf4   
##  77 0              2.23  0.646 0.154 0         7       Cd24a    
##  78 0              2.23  0.48  0.12  0         7       Krt18    
##  79 0              2.02  0.453 0.119 0         7       Atp2a3   
##  80 0              1.60  0.514 0.196 0         7       Eppk1    
##  81 0              7.00  0.332 0.003 0         8       Chga     
##  82 0              6.70  0.581 0.008 0         8       Chgb     
##  83 0              3.75  0.256 0.034 0         8       Sct      
##  84 2.66e-269      1.25  0.34  0.157 5.08e-265 8       Olfm4    
##  85 2.91e-212      1.11  0.286 0.131 5.55e-208 8       Clca3b   
##  86 4.31e-144      0.976 0.25  0.126 8.21e-140 8       Mlxipl   
##  87 4.97e-177      0.925 0.303 0.152 9.47e-173 8       Cps1     
##  88 3.72e-124      0.822 0.308 0.177 7.10e-120 8       AY761184 
##  89 5.04e-146      0.809 0.445 0.282 9.60e-142 8       Defa21   
##  90 3.31e-154      0.798 0.46  0.29  6.30e-150 8       Lyz1     
##  91 0              9.41  0.538 0.002 0         9       Ccl20    
##  92 0              6.18  0.497 0.009 0         9       Ubd      
##  93 0              5.75  0.652 0.054 0         9       Apoa4    
##  94 0              4.04  0.315 0.018 0         9       Spib     
##  95 0              3.95  0.515 0.041 0         9       Anxa5    
##  96 0              3.91  0.446 0.035 0         9       Marcksl1 
##  97 0              3.55  0.337 0.037 0         9       Ighg2b   
##  98 0              3.32  0.464 0.038 0         9       Clu      
##  99 1.42e-205      2.91  0.324 0.069 2.72e-201 9       Tm4sf4   
## 100 0              2.77  0.701 0.197 0         9       Serpinb6a

cluster annotation

#creating cluster annotation table
cluster_annotations <- c(
  "0" = "B Cells",
  "1" = "Paneth Cells",
  "2" = "Goblet Cells",
  "3" = "Enterocytes",
  "4" = "Tuft Cells",
  "5" = "Plasma Cells",
  "6" = "Smooth Muscle Cells",
  "7" = "Macrophages",
  "8" = "Neuroendocrine Cells",
  "9" = "Lymphocytes"
)

object@meta.data$cell_type <- cluster_annotations[as.character(object$seurat_clusters)]
####cluster with annotation
DimPlot(object, group.by = "cell_type", label = TRUE, repel = TRUE) +
  ggtitle("Annotated Clusters")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`

######

SpatialDimPlot(object, group.by = "cell_type", label = TRUE, repel = TRUE) +
  ggtitle("Spatial Distribution of Annotated Clusters")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Visualize epithelial integrity markers
SpatialFeaturePlot(object, features = c("Claudin", "Occludin", "Cdh1"), combine = FALSE)
## Warning: The following requested variables were not found: Claudin, Occludin
## [[1]]

# Visualize stress and inflammation markers
SpatialFeaturePlot(object, features = c("Hsp70", "Atf4", "Tnf", "Il1b"), combine = FALSE)
## Warning: The following requested variables were not found: Hsp70
## [[1]]

## 
## [[2]]

## 
## [[3]]

# Violin plot for expression distribution across clusters
VlnPlot(object, features = c("Claudin", "Occludin", "Tnf"), group.by = "seurat_clusters")
## Warning: The following requested variables were not found: Claudin, Occludin
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`

# Inflammatory Bowel Disease (IBD) markers
SpatialFeaturePlot(object, features = c("Il10", "Il23r", "S100a8", "S100a9"), combine = FALSE)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# Colorectal cancer markers
SpatialFeaturePlot(object, features = c("Tp53", "Kras", "Apc", "Muc2"), combine = FALSE)
## Warning: The following requested variables were not found: Tp53
## [[1]]

## 
## [[2]]

## 
## [[3]]

# Infection-related markers
SpatialFeaturePlot(object, features = c("Defa21", "Lyz1"), combine = FALSE)
## [[1]]

## 
## [[2]]

# Markers for different immune cell types
SpatialFeaturePlot(object, features = c("Cd19", "Cd79a", "Cd3e", "Cd8a", "Foxp3", "Cd68", "Hck"), combine = FALSE)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

# Cytokines and immune signaling genes
SpatialFeaturePlot(object, features = c("Il6", "Tnf", "Cxcl8"), combine = FALSE)
## Warning: The following requested variables were not found: Cxcl8
## [[1]]

## 
## [[2]]

# Visualize cluster-specific immune cell types
DimPlot(object, group.by = "seurat_clusters", label = TRUE)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`

# Carbohydrate metabolism markers
SpatialFeaturePlot(object, features = c("Slc5a1", "Slc2a2"), combine = FALSE)
## [[1]]

## 
## [[2]]

# Lipid metabolism markers
SpatialFeaturePlot(object, features = c("Fabp2", "Apoa4"), combine = FALSE)
## [[1]]

## 
## [[2]]

# Protein absorption markers
SpatialFeaturePlot(object, features = c("Slc15a1"), combine = FALSE)
## [[1]]

# Cluster-specific expression for nutrient absorption markers
VlnPlot(object, features = c("Slc5a1", "Fabp2", "Slc15a1"), group.by = "seurat_clusters")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`

# Antimicrobial peptides and immune-microbe interaction genes
SpatialFeaturePlot(object, features = c("Defa21", "Lyz1", "Reg3g", "Reg3b", "Lypd8"), combine = FALSE)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

# Barrier genes affected by microbiota
SpatialFeaturePlot(object, features = c("Muc2", "Claudin"), combine = FALSE)
## Warning: The following requested variables were not found: Claudin
## [[1]]