Seurat_ BDS3

Активізували поле - CNTR + ALT + I ###### Встановлення пакетів #####

#install.packages("Seurat")

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(Seurat)

## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)

## Attaching SeuratObject

library(patchwork)

# Load the PBMC dataset
pbmc.data <- Read10X(data.dir = "/Users/Lenovo/Downloads/BDS3/")
# Initialize the Seurat object with the raw (non-normalized data).
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)

## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')

pbmc

## An object of class Seurat 
## 13714 features across 2700 samples within 1 assay 
## Active assay: RNA (13714 features, 0 variable features)

pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30]

## 3 x 30 sparse Matrix of class "dgCMatrix"

##   [[ suppressing 30 column names 'AAACATACAACCAC-1', 'AAACATTGAGCTAC-1', 'AAACATTGATCAGC-1' ... ]]

##                                                                    
## CD3D  4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2  3 . . . . . 3 4 1 5
## TCL1A . .  . . . . . . 1 . . . . . . . . . . .  . 1 . . . . . . . .
## MS4A1 . 6  . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .

Якість контролю даних

# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")

head(pbmc@meta.data, 5)

##                  orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACATACAACCAC-1     pbmc3k       2419          779  3.0177759
## AAACATTGAGCTAC-1     pbmc3k       4903         1352  3.7935958
## AAACATTGATCAGC-1     pbmc3k       3147         1129  0.8897363
## AAACCGTGCTTCCG-1     pbmc3k       2639          960  1.7430845
## AAACCGTGTATGCG-1     pbmc3k        980          521  1.2244898

meta_data <- pbmc@meta.data

pbmc@assays$RNA

## Assay data with 13714 features for 2700 cells
## First 10 features:
##  AL627309.1, AP006222.2, RP11-206L10.2, RP11-206L10.9, LINC00115, NOC2L,
## KLHL17, PLEKHN1, RP11-54O7.17, HES4

VlnPlot(pbmc, features = c("percent.mt", "nFeature_RNA", "nCount_RNA"), ncol = 3)

# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.

plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2

pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)

pbmc

## An object of class Seurat 
## 13714 features across 2638 samples within 1 assay 
## Active assay: RNA (13714 features, 0 variable features)

pbmc2 <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5 & nCount_RNA < 5000 & nCount_RNA > 1000)

pbmc2

## An object of class Seurat 
## 13714 features across 2447 samples within 1 assay 
## Active assay: RNA (13714 features, 0 variable features)

Нормалізація даних

pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- SCTransform(pbmc)

## Calculating cell attributes from input UMI matrix: log_umi

## Variance stabilizing transformation of count matrix of size 12519 by 2638

## Model formula is y ~ log_umi

## Get Negative Binomial regression parameters per gene

## Using 2000 genes, 2638 cells

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

## Found 137 outliers - those will be ignored in fitting/regularization step

## Second step: Get residuals using fitted parameters for 12519 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%

## Computing corrected count matrix for 12519 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%

## Calculating gene attributes

## Wall clock passed: Time difference of 35.985 secs

## Determine variable features

## Place corrected count matrix in counts slot

## Centering data matrix

## Set default assay to SCT

# pbmc@assays$SCT@data

sct_mtrix <- as.matrix(pbmc@assays$SCT@data)
raw_mtrix <- as.matrix(pbmc@assays$SCT@data)

 sct_mtrix [10:15, 1:5]

##          AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1 AAACCGTGCTTCCG-1
## C1orf159                0        0.0000000        0.0000000                0
## TNFRSF18                0        0.6931472        0.0000000                0
## TNFRSF4                 0        0.0000000        0.0000000                0
## SDF4                    0        0.0000000        0.6931472                0
## B3GALT6                 0        0.0000000        0.0000000                0
## UBE2J2                  0        0.0000000        0.0000000                0
##          AAACCGTGTATGCG-1
## C1orf159                0
## TNFRSF18                0
## TNFRSF4                 0
## SDF4                    0
## B3GALT6                 0
## UBE2J2                  0

 raw_mtrix [10:15, 1:5]

##          AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1 AAACCGTGCTTCCG-1
## C1orf159                0        0.0000000        0.0000000                0
## TNFRSF18                0        0.6931472        0.0000000                0
## TNFRSF4                 0        0.0000000        0.0000000                0
## SDF4                    0        0.0000000        0.6931472                0
## B3GALT6                 0        0.0000000        0.0000000                0
## UBE2J2                  0        0.0000000        0.0000000                0
##          AAACCGTGTATGCG-1
## C1orf159                0
## TNFRSF18                0
## TNFRSF4                 0
## SDF4                    0
## B3GALT6                 0
## UBE2J2                  0

Ідентифікація високо варіабельних ознак (відбір ознак)

pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 1000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(pbmc), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)

## When using repel, set xnudge and ynudge to 0 for optimal results

plot1 + plot2

Виконайте лінійне зменшення розмірів

pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))

## PC_ 1 
## Positive:  FTL, LYZ, FTH1, S100A9, CST3, S100A8, TYROBP, FCN1, AIF1, LST1 
##     LGALS1, FCER1G, LGALS2, S100A4, SAT1, S100A6, TYMP, CTSS, COTL1, IFITM3 
##     CFD, HLA-DRA, PSAP, S100A11, GPX1, OAZ1, GSTP1, SERPINA1, CD14, CD68 
## Negative:  MALAT1, CCL5, NKG7, RPS27A, LTB, IL32, RPS6, CD3D, GZMA, PTPRCAP 
##     CST7, CTSW, RPL13, GNLY, CD7, GZMB, PRF1, IL7R, RPL34, CXCR4 
##     FGFBP2, GZMH, CD247, RPS5, GZMK, AES, CD2, JUN, CD69, CD8B 
## PC_ 2 
## Positive:  CD74, HLA-DRA, CD79A, HLA-DQA1, HLA-DPB1, HLA-DQB1, CD79B, TCL1A, MS4A1, HLA-DRB1 
##     HLA-DPA1, LINC00926, HLA-DRB5, VPREB3, HLA-DQA2, LTB, CD37, FCER2, HLA-DMA, RPL13 
##     HLA-DMB, RPS5, FCRLA, HVCN1, IGLL5, KIAA0125, EAF2, P2RX5, BLNK, RPS6 
## Negative:  NKG7, CCL5, GNLY, GZMB, GZMA, CST7, PRF1, FGFBP2, CTSW, GZMH 
##     CCL4, SPON2, FCGR3A, CD247, CLIC3, HOPX, KLRD1, S100A4, XCL2, TYROBP 
##     ACTB, AKR1C3, IGFBP7, FCER1G, SRGN, TTC38, APMAP, CCL3, CD7, IL32 
## PC_ 3 
## Positive:  JUNB, RPS6, IL7R, S100A8, RPL13, S100A9, CD3D, LTB, NOSIP, RPS27A 
##     RPL34, AQP3, MAL, IL32, FYB, CD27, CD2, RGS10, LDLRAP1, NGFRAP1 
##     GIMAP7, JUN, GIMAP5, NELL2, FTL, PASK, S100A6, GIMAP4, SLC2A3, NDFIP1 
## Negative:  CD74, HLA-DRA, NKG7, GZMB, HLA-DPB1, CD79A, GNLY, HLA-DQA1, HLA-DRB1, FGFBP2 
##     HLA-DPA1, PRF1, HLA-DQB1, CD79B, CST7, GZMA, TCL1A, CCL5, MS4A1, GZMH 
##     HLA-DRB5, CTSW, CCL4, SPON2, LINC00926, HLA-DQA2, FCGR3A, VPREB3, HLA-DMA, CLIC3 
## PC_ 4 
## Positive:  S100A8, S100A9, LYZ, LGALS2, CD14, GPX1, MS4A6A, GSTP1, S100A12, FOLR3 
##     FCN1, CCL3, CEBPD, RBP7, GRN, IL8, ID1, GAPDH, ALDH2, ASGR1 
##     NKG7, PLBD1, RPL13, FCGR1A, NFKBIA, CD79A, LY86, IL1B, GNLY, TALDO1 
## Negative:  FCGR3A, LST1, FCER1G, FTH1, AIF1, IFITM3, MS4A7, COTL1, TIMP1, SAT1 
##     SERPINA1, ACTB, CEBPB, SPI1, ABI3, FTL, WARS, LYN, CFD, PSAP 
##     HLA-DPA1, S100A11, S100A4, CD68, ASAH1, CTSC, CTSS, APOBEC3A, IFI30, OAZ1 
## PC_ 5 
## Positive:  CCL5, GPX1, PPBP, PF4, SDPR, NRGN, SPARC, GNG11, RGS18, HIST1H2AC 
##     CD9, CLU, GP9, TUBB1, ITGA2B, AP001189.4, RUFY1, CA2, MPP1, PTCRA 
##     TREML1, TMEM40, PGRMC1, ACRBP, F13A1, MMD, GRAP2, NCOA4, NGFRAP1, MYL9 
## Negative:  FCGR3A, LST1, AIF1, GNLY, FCER1G, IFITM3, MALAT1, FTL, RPS6, GZMB 
##     TYROBP, S100A4, FGFBP2, PRF1, JUNB, RPL34, SPON2, RPS27A, RPL13, S100A11 
##     FTH1, MS4A7, CTSS, SERPINA1, CD247, APOBEC3A, IGFBP7, CD7, PLAC8, ISG15

print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)

## PC_ 1 
## Positive:  FTL, LYZ, FTH1, S100A9, CST3 
## Negative:  MALAT1, CCL5, NKG7, RPS27A, LTB 
## PC_ 2 
## Positive:  CD74, HLA-DRA, CD79A, HLA-DQA1, HLA-DPB1 
## Negative:  NKG7, CCL5, GNLY, GZMB, GZMA 
## PC_ 3 
## Positive:  JUNB, RPS6, IL7R, S100A8, RPL13 
## Negative:  CD74, HLA-DRA, NKG7, GZMB, HLA-DPB1 
## PC_ 4 
## Positive:  S100A8, S100A9, LYZ, LGALS2, CD14 
## Negative:  FCGR3A, LST1, FCER1G, FTH1, AIF1 
## PC_ 5 
## Positive:  CCL5, GPX1, PPBP, PF4, SDPR 
## Negative:  FCGR3A, LST1, AIF1, GNLY, FCER1G

VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")

DimPlot(pbmc, reduction = "pca")

DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)

Визначте “розмірність” набору даних

# NOTE: This process can take a long time for big datasets, comment out for expediency. More
# approximate techniques such as those implemented in ElbowPlot() can be used to reduce
# computation time
# pbmc <- JackStraw(pbmc, num.replicate = 100)
# pbmc <- ScoreJackStraw(pbmc, dims = 1:20)
# 
# JackStrawPlot(pbmc, dims = 1:15)

ElbowPlot(pbmc)

Кластеризація даних

pbmc <- FindNeighbors(pbmc, dims = 1:10)

## Computing nearest neighbor graph

## Computing SNN

pbmc <- FindClusters(pbmc, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2638
## Number of edges: 91735
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8939
## Number of communities: 10
## Elapsed time: 0 seconds

# Look at cluster IDs of the first 5 cells
head(Idents(pbmc), 5)

## AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1 AAACCGTGCTTCCG-1 
##                4                3                0                2 
## AAACCGTGTATGCG-1 
##                5 
## Levels: 0 1 2 3 4 5 6 7 8 9

Запустити нелінійне зменшення розмірності (UMAP/tSNE)

# If you haven't installed UMAP, you can do so via reticulate::py_install(packages =
# 'umap-learn')
pbmc <- RunUMAP(pbmc, dims = 1:10)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 16:15:34 UMAP embedding parameters a = 0.9922 b = 1.112

## 16:15:34 Read 2638 rows and found 10 numeric columns

## 16:15:34 Using Annoy for neighbor search, n_neighbors = 30

## 16:15:34 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 16:15:35 Writing NN index file to temp file C:\Users\Lenovo\AppData\Local\Temp\Rtmp8q5D1L\file3e8812cf29ec
## 16:15:35 Searching Annoy index using 1 thread, search_k = 3000
## 16:15:35 Annoy recall = 100%
## 16:15:35 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:15:36 Initializing from normalized Laplacian + noise (using irlba)
## 16:15:36 Commencing optimization for 500 epochs, with 106716 positive edges
## 16:15:42 Optimization finished

# note that you can set `label = TRUE` or use the LabelClusters function to help label
# individual clusters
DimPlot(pbmc, reduction = "umap")

Пошук диференційовано виражених ознак (кластерних біомаркерів)

# find all markers of cluster 2
cluster2.markers <- FindMarkers(pbmc, ident.1 = 2, min.pct = 0.25)

## For a more efficient implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the limma package
## --------------------------------------------
## install.packages('BiocManager')
## BiocManager::install('limma')
## --------------------------------------------
## After installation of limma, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session

head(cluster2.markers, n = 5)

##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## S100A9  0.000000e+00   4.415480 0.992 0.207  0.000000e+00
## S100A8  0.000000e+00   3.906618 0.965 0.106  0.000000e+00
## FCN1    0.000000e+00   2.342009 0.955 0.137  0.000000e+00
## LGALS2  0.000000e+00   2.177497 0.904 0.053  0.000000e+00
## CD14   5.160135e-296   1.287849 0.660 0.024 6.459973e-292

# find all markers distinguishing cluster 5 from clusters 0 and 3
cluster5.markers <- FindMarkers(pbmc, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25)
head(cluster5.markers, n = 5)

##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## GZMB   1.930652e-189   3.366091 0.955 0.029 2.416983e-185
## PRF1   1.355180e-167   2.669407 0.942 0.054 1.696549e-163
## FGFBP2 8.925915e-164   2.564747 0.852 0.029 1.117435e-159
## GNLY   3.555642e-160   4.501668 0.981 0.090 4.451308e-156
## NKG7   2.074474e-157   4.562264 1.000 0.109 2.597034e-153

# find markers for every cluster compared to all remaining cells, report only the positive
# ones
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)

## Calculating cluster 0

## Calculating cluster 1

## Calculating cluster 2

## Calculating cluster 3

## Calculating cluster 4

## Calculating cluster 5

## Calculating cluster 6

## Calculating cluster 7

## Calculating cluster 8

## Calculating cluster 9

pbmc.markers %>%
    group_by(cluster) %>%
    slice_max(n = 2, order_by = avg_log2FC)

## # A tibble: 20 × 7
## # Groups:   cluster [10]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene    
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>   
##  1 1.81e-123      1.33  0.976 0.627 2.26e-119 0       LTB     
##  2 4.23e-100      0.967 0.92  0.45  5.30e- 96 0       IL32    
##  3 2.01e-122      0.824 1     0.984 2.52e-118 1       RPS3A   
##  4 2.85e- 57      0.773 0.953 0.91  3.56e- 53 1       RPL34   
##  5 0              4.42  0.992 0.207 0         2       S100A9  
##  6 5.78e-273      4.00  1     0.53  7.23e-269 2       LYZ     
##  7 1.13e-186      2.60  1     0.839 1.42e-182 3       CD74    
##  8 0              2.42  0.928 0.037 0         3       CD79A   
##  9 3.42e-135      2.02  0.955 0.239 4.28e-131 4       CCL5    
## 10 6.32e-201      1.75  0.704 0.056 7.91e-197 4       GZMK    
## 11 1.35e-211      4.24  0.981 0.119 1.69e-207 5       GNLY    
## 12 4.36e-144      3.47  1     0.238 5.46e-140 5       NKG7    
## 13 1.88e-122      2.63  1     0.312 2.35e-118 6       LST1    
## 14 1.67e-178      2.39  0.974 0.136 2.10e-174 6       FCGR3A  
## 15 1.48e-114      2.62  1     0.242 1.86e-110 7       NKG7    
## 16 6.67e-113      2.58  1     0.26  8.35e-109 7       CCL5    
## 17 3.47e- 23      2.63  1     0.513 4.35e- 19 8       HLA-DPB1
## 18 7.98e-223      2.50  0.788 0.01  9.99e-219 8       FCER1A  
## 19 1.80e-102      5.98  1     0.024 2.25e- 98 9       PPBP    
## 20 8.76e-186      4.69  1     0.011 1.10e-181 9       PF4

cluster0.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE)

## Warning: The following arguments are not used: norm.method

VlnPlot(pbmc, features = c("MS4A1", "CD79A"))

# you can plot raw counts as well
VlnPlot(pbmc, features = c("NKG7", "PF4"), slot = "counts", log = TRUE)

FeaturePlot(pbmc, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP",
    "CD8A"))

pbmc.markers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(pbmc, features = top10$gene) + NoLegend()

## Warning in DoHeatmap(pbmc, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the SCT assay: RPL31

new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",
    "NK", "DC", "Platelet", "T")
names(new.cluster.ids) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, new.cluster.ids)
DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

Нова спроба

pbmc <- FindNeighbors(pbmc, dims = 1:8)

## Computing nearest neighbor graph

## Computing SNN

pbmc <- FindClusters(pbmc, resolution = 0.1)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2638
## Number of edges: 85069
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9692
## Number of communities: 5
## Elapsed time: 0 seconds

head(Idents(pbmc), 5)

## AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1 AAACCGTGCTTCCG-1 
##                2                3                0                1 
## AAACCGTGTATGCG-1 
##                2 
## Levels: 0 1 2 3 4

pbmc <- RunUMAP(pbmc, dims = 1:8)

## 16:16:11 UMAP embedding parameters a = 0.9922 b = 1.112

## 16:16:11 Read 2638 rows and found 8 numeric columns

## 16:16:11 Using Annoy for neighbor search, n_neighbors = 30

## 16:16:11 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 16:16:12 Writing NN index file to temp file C:\Users\Lenovo\AppData\Local\Temp\Rtmp8q5D1L\file3e886d082708
## 16:16:12 Searching Annoy index using 1 thread, search_k = 3000
## 16:16:12 Annoy recall = 100%
## 16:16:13 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:16:13 Initializing from normalized Laplacian + noise (using irlba)
## 16:16:13 Commencing optimization for 500 epochs, with 104174 positive edges
## 16:16:19 Optimization finished

DimPlot(pbmc, reduction = "umap")

cluster2.markers <- FindMarkers(pbmc, ident.1 = 2, min.pct = 0.25)
head(cluster2.markers, n = 4)

##              p_val avg_log2FC pct.1 pct.2     p_val_adj
## GZMA  0.000000e+00   1.877496 0.780 0.052  0.000000e+00
## CST7  0.000000e+00   1.923400 0.772 0.049  0.000000e+00
## NKG7  0.000000e+00   3.685017 0.912 0.130  0.000000e+00
## CCL5 1.091124e-302   3.054144 0.887 0.158 1.365978e-298

cluster5.markers <- FindMarkers(pbmc, ident.1 = 4, ident.2 = c(0, 3), min.pct = 0.25)
head(cluster5.markers, n = 4)

##                       p_val avg_log2FC pct.1 pct.2     p_val_adj
## FCGR3A        2.707925e-225   2.607119 0.901 0.042 3.390051e-221
## RP11-290F20.3 1.536609e-218   1.337713 0.765 0.016 1.923681e-214
## CD68          1.834157e-217   1.512291 0.877 0.038 2.296181e-213
## CFD           3.537559e-214   1.818856 0.840 0.033 4.428671e-210

pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)

## Calculating cluster 0

## Calculating cluster 1

## Calculating cluster 2

## Calculating cluster 3

## Calculating cluster 4

pbmc.markers %>%
    group_by(cluster) %>%
    slice_max(n = 2, order_by = avg_log2FC)

## # A tibble: 10 × 7
## # Groups:   cluster [5]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr> 
##  1 4.46e-231       1.12 0.943 0.468 5.59e-227 0       LDHB  
##  2 1.22e-130       1.06 0.943 0.532 1.52e-126 0       LTB   
##  3 0               4.39 0.962 0.201 0         1       S100A9
##  4 2.91e-295       4.30 1     0.522 3.64e-291 1       LYZ   
##  5 0               3.69 0.912 0.13  0         2       NKG7  
##  6 6.77e-113       3.06 0.475 0.096 8.48e-109 2       GNLY  
##  7 1.13e-186       2.60 1     0.839 1.42e-182 3       CD74  
##  8 0               2.42 0.928 0.037 0         3       CD79A 
##  9 1.65e-106       2.52 0.926 0.314 2.06e-102 4       LST1  
## 10 3.24e-160       2.31 0.901 0.137 4.06e-156 4       FCGR3A

cluster0.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE)

## Warning: The following arguments are not used: norm.method

VlnPlot(pbmc, features = c("MS4A1", "CD79A"))

0 - кодує субодиницю В ферменту лактатдегідрогенази 1 - є мембранним білком II типу сімейства TNF; лімфоїдна тканина. 2 - ген, що кодує білки; більшість типів тканин. 3 - 4 -

VlnPlot(pbmc, features = c("NKG7", "PF4"), slot = "counts", log = TRUE)

FeaturePlot(pbmc, features = c("LDHB", "LTB", "S100A9", "LYZ", "NKG7"))

pbmc.markers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(pbmc, features = top10$gene) + NoLegend()

## Warning in DoHeatmap(pbmc, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the SCT assay: RPS25

new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",
    "NK", "DC", "Platelet")
names(new.cluster.ids) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, new.cluster.ids)

## Warning: Cannot find identity NA

## Warning: Cannot find identity NA

## Warning: Cannot find identity NA

## Warning: Cannot find identity NA

DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()