Setup: Libraries and Data Import
# load packages
library(dplyr)
library(Seurat)
library(patchwork)
library(SingleCellExperiment)
library(SingleR)
library(cowplot)
library(ggplot2)
library(gridExtra)
Import raw data
# Stimulated PBMC samples
stim <- read.delim("scRNAseq/data/clean_RSEC_MolsPerCell_stim.txt", skip = 7, row.names = 1)
stim_count <- stim[, 3:ncol(stim)]
stim_meta <- stim[, 1:2]
stim_meta$cartrige <- 1
stim_meta$stim <- "stim"
stim <- CreateSeuratObject(counts = t(stim_count),
project = "NFAT_stim",
meta.data = stim_meta)
# Unstimulated PBMC samples
unstim <- read.delim("scRNAseq/data/clean_RSEC_MolsPerCell_unstim.txt", skip = 7, row.names = 1)
unstim_count <- unstim[, 3:ncol(unstim)]
unstim_meta <- unstim[, 1:2]
unstim_meta$cartrige <- 2
unstim_meta$stim <- "unstim"
unstim <- CreateSeuratObject(counts = t(unstim_count),
project = "NFAT_unstim",
meta.data = unstim_meta)
Merge dataset, no batch correction
The samples were normalized but kept separate by the stim/unstim identity.
comb <- merge(stim, unstim, add.cell.ids = c("stim", "unstim"))
data <- SplitObject(comb, split.by = "stim")
data <- lapply(X = data, FUN = function(x) {
x <- FindVariableFeatures(x) %>%
NormalizeData(.) %>%
ScaleData(.) %>%
RunPCA(.) %>%
RunUMAP(., dims = 1:30) %>%
RunTSNE(.) %>%
FindNeighbors(.) %>%
FindClusters(.)
})
## Finding variable features for layer counts.NFAT_stim
## Normalizing layer: counts.NFAT_stim
## Centering and scaling data matrix
## PC_ 1
## Positive: CYBB, CCR1, ITGAX, TLR2, TGFBI, NRP1, CTSD, IL3RA, S100A9, LILRB4
## LGALS3, FCER1G, CXCL16, ANXA5, CCR5, MMP9, CD9, ICAM1, LGALS9, CD163
## CD36, LAT2, AQP9, FCGR3A, MGST1, CD63, LGALS1, FCN1, CCL3, HLA.DMA
## Negative: CD69, IRF4, CD2, STAT5A, TRAC, TRBC2, ITK, CD3D, RPL6, MYC
## CD3E, ICOS, NFKB1, FYN, LDHA, CD6, TNFAIP8, IL2, VSIR, IL23A
## SELL, STAT4, CD7, EIF5A, LRRC8B, TNFRSF25, LEF1, DUSP2, IL32, F5
## PC_ 2
## Positive: RPL6, BCL2A1, EIF5A, ATF4, CD40, CCR7, CXCR5, HLA.DRA, NFKB1, PSME2
## ILF2, HLA.DQB1, EIF1, IRF1, TUBA1C, MYC, CD74, MS4A1, HLA.DPA1, LYN
## CD44, CD83, NFKBIA, IL6, EBF1, CD22, CYCS, ABCE1, FOSL1, IRF8
## Negative: NKG7, CCL5, PRF1, FASLG, APOBEC3G, GZMB, GNLY, KLRC3, IFNG, GZMH
## KLRK1, KLRC1, VSIR, LAG3, IL2RB, CX3CR1, B3GAT1, S100A10, CCL4, EVI2A
## CTSW, CSF2, HAVCR2, CXCR3, CD226, IL18RAP, TRDC, CD300A, PRDM1, GZMA
## PC_ 3
## Positive: CD2, TRAC, ICOS, IER3, ITK, PTPRC, SELL, CD3D, STAT1, TGFBI
## DUSP6, CCR1, TLR2, CD3E, PFN1, NRP1, LEF1, S100A9, CTSD, CD7
## F5, MMP9, CD109, LILRB4, CCR5, CXCL16, FTH1, TNFRSF25, IL3RA, GBP2
## Negative: CCL4, CCL3, CCR7, CD40, HLA.DQB1, MS4A1, CXCR5, IL6, CD83, EBF1
## LYN, LTA, CD74, CD22, HLA.DPA1, HLA.DRA, PIK3AP1, NKG7, PRF1, CCL5
## TNF, REL, BIRC3, APOBEC3G, POU2AF1, CD79A, GZMB, DUSP4, KLRC3, CR2
## PC_ 4
## Positive: IL2, FOSB, EGR1, NR4A2, FOS, EGR3, TNF, CD40LG, JUN, IL23A
## NFKBIZ, DUSP1, IL7R, MAST4, CCL20, TNFSF8, NFKBID, FOSL1, RORC, TGIF1
## IL21, IL6ST, IL1R1, IFRD1, SPAG9, MYC, CCR4, CD200, RAPH1, CXCL3
## Negative: PFN1, GZMB, NKG7, IL2RB, PRF1, GAPDH, GNLY, APOBEC3G, LDHA, KLRK1
## HLA.A, GZMH, KLRC3, JAK1, IL2RG, B3GAT1, CTSW, CX3CR1, CCL5, PDIA6
## HAVCR2, PTPN7, KLRC1, CD44, CD300A, CD52, GZMA, ARPC1B, GNAI2, TRDC
## PC_ 5
## Positive: IL2, PDIA6, TNFRSF4, TNFSF8, TNF, IER3, PFN1, F5, TRIB1, CD200
## DUSP4, TBX21, CCR4, IL21, GAPDH, EIF5A, LTA, VSIR, TNFAIP8, RPL6
## TUBA1C, CD3D, TNFRSF25, CD274, CSF2, BCAT1, FURIN, BATF, CD52, CCND2
## Negative: NR4A2, FOS, FOSB, BTG1, CREM, EGR3, NFKBIZ, SDCBP, CSRNP1, EGR1
## NFKB1, REL, FBXO11, JUN, SPAG9, IFRD1, DUSP2, EIF5, JUNB, EVI2B
## PIK3IP1, CXCR4, HLA.A, IL6ST, CD69, DUSP1, MAST4, IL7R, IRF7, NANS
## 21:14:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:14:04 Read 22753 rows and found 30 numeric columns
## 21:14:04 Using Annoy for neighbor search, n_neighbors = 30
## 21:14:04 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:14:06 Writing NN index file to temp file C:\Users\suanni\AppData\Local\Temp\Rtmp40d4qa\file6e9c46de6424
## 21:14:06 Searching Annoy index using 1 thread, search_k = 3000
## 21:14:12 Annoy recall = 100%
## 21:14:12 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 21:14:14 Initializing from normalized Laplacian + noise (using RSpectra)
## 21:14:14 Commencing optimization for 200 epochs, with 1032308 positive edges
## 21:14:14 Using rng type: pcg
## 21:14:36 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 22753
## Number of edges: 706528
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8608
## Number of communities: 13
## Elapsed time: 4 seconds
## Finding variable features for layer counts.NFAT_unstim
## Normalizing layer: counts.NFAT_unstim
## Centering and scaling data matrix
## PC_ 1
## Positive: HLA.A, CD3E, LCK, TRAC, TRBC2, RPL6, CD2, PTPRC, ITK, FYN
## LAT, BTG1, PIK3IP1, CD6, IL32, LEF1, IL2RG, BCL11B, GIMAP5, CD52
## CD5, SELPLG, JAK1, SELL, IL7R, PASK, GIMAP2, CD247, JAK3, IFITM2
## Negative: LILRB4, CCR1, SLC7A7, CYBB, ITGAX, LGALS3, KCNE3, CD86, NRP1, LYN
## MITF, CD163, SNX8, CXCL16, MMP9, CD33, LY86, HLA.DMA, CD36, IRF8
## CD9, LGALS9, DUSP6, S100A9, TGFBI, TRIB1, ICAM1, CTSD, TLR2, HLA.DRA
## PC_ 2
## Positive: CD79A, MS4A1, CD79B, IGHD, IGHM, POU2AF1, TNFRSF13C, TCL1A, VPREB3, CXCR5
## CD22, IGHG2, HLA.DQB1, IGHG3, BLNK, IGKC, CD40, CD74, CD24, CD37
## CD72, IRF8, IGLC3, IGHG1, HLA.DRA, SP140, MZB1, CCR7, HLA.DMA, IGHG4
## Negative: SELPLG, ITGB2, S100A10, FCGR3A, PFN1, FYB1, FTH1, CCR1, LILRB4, CTSD
## ARL4C, CD163, FCER1G, TGFBI, KCNE3, LGALS3, MMP9, CD300A, NRP1, ITGAX
## CD2, CD3E, MITF, CD33, CD14, HAVCR2, S100A9, CMKLR1, CD36, GIMAP5
## PC_ 3
## Positive: NKG7, PRF1, CCL5, GZMB, CST7, KLRC3, CX3CR1, GNLY, CTSW, GZMH
## TBX21, APOBEC3G, GZMA, KLRC1, TRDC, IL2RB, KLRK1, CD300A, KLRF1, ZNF683
## CD244, FCGR3A, FASLG, IL18RAP, B3GAT1, LAG3, RUNX3, CCL4, CXCR3, KLRG1
## Negative: LEF1, CCR7, CD4, SELL, IL6ST, MYC, PASK, CD27, TRAC, CD5
## FYB1, IL7R, PIK3IP1, GIMAP5, CD6, LAT, TNFAIP8, FTH1, LTB, RPL6
## IL6R, BCL11B, TCF7, ITK, ICOS, EIF1, CD3E, CD28, SOCS3, TRBC2
## PC_ 4
## Positive: CREM, PSME2, NFKB1, RGS1, EIF5A, DUSP5, STAT1, GBP1, DUSP4, IRF1
## CD69, IRF4, IRF7, ATF4, DUSP2, ILF2, LAMP3, PSMB8, LDHA, NR4A2
## REL, PSMB9, CCL22, BCL2A1, CCND2, STAT3, IL15RA, IL12RB2, LAP3, EIF1
## Negative: CD52, EVI2B, CD48, CD37, VPREB3, ZNF683, CCL2, ITGB2, IGHD, CD14
## RNASE6, S100A9, NT5E, TCL1A, CD9, SELL, CD79B, LY86, CXCL3, GZMH
## POU2AF1, MMP9, GNAI2, TGFBI, CYBB, SELPLG, FN1, FYB1, CD163, CXCL2
## PC_ 5
## Positive: CD52, GNAI2, PFN1, RPL6, ITGB2, CD48, GAPDH, SELPLG, CD37, ITGA4
## EVI2B, IFITM2, CD27, ARPC1B, WAS, PDIA6, S100A10, TRIB2, DOCK8, CD4
## MYC, FYB1, IL32, PTPRC, SLC1A5, PDIA4, IGHG3, IGHG2, VSIR, LDHA
## Negative: NR4A2, CREM, IL1A, CXCL1, IL1B, CXCL3, IL6, RGS1, CXCL8, NFKBIA
## CXCL5, CCL3, CD69, IRF7, CXCL2, CXCR4, HIF1A, BTG1, LAMP3, CCL2
## CSRNP1, CCL4, NAMPT, DUSP5, IRF1, NFKB2, FTH1, GBP1, TREM1, CCL20
## 21:16:12 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:16:12 Read 14591 rows and found 30 numeric columns
## 21:16:12 Using Annoy for neighbor search, n_neighbors = 30
## 21:16:12 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:16:14 Writing NN index file to temp file C:\Users\suanni\AppData\Local\Temp\Rtmp40d4qa\file6e9c13ba2b73
## 21:16:14 Searching Annoy index using 1 thread, search_k = 3000
## 21:16:17 Annoy recall = 100%
## 21:16:18 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 21:16:19 Initializing from normalized Laplacian + noise (using RSpectra)
## 21:16:19 Commencing optimization for 200 epochs, with 658592 positive edges
## 21:16:19 Using rng type: pcg
## 21:16:33 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 14591
## Number of edges: 457136
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8567
## Number of communities: 15
## Elapsed time: 1 seconds
Cell Annotation
Reference-based annotation with SingleR
Here we are labeling cells based on published reference sets, comparing multiple single-cell references: Immune cell expression database, Novernshtern, and Monaco.
# Extracting the normalized counts as input
stim.sc <- LayerData(data$stim,
assay = "RNA",
layer = 'data') # normalized counts - stim
unstim.sc <- LayerData(data$unstim,
assay = "RNA",
layer = 'data') # normalized counts
# Get reference dataset
#hpca.ref <- HumanPrimaryCellAtlasData() # Really bad reference for PBMC
#im.ref <- celldex::DatabaseImmuneCellExpressionData()
nh.ref <- celldex::NovershternHematopoieticData() # decided to go with Novershtern
#mon.ref <- celldex::MonacoImmuneData()
ct.pred <- lapply(list(stim = stim.sc, unstim = unstim.sc), function(sc){
pred.nh <- SingleR(test = sc, ref = nh.ref, labels = nh.ref$label.main)
pred.nh.f <- SingleR(test = sc, ref = nh.ref, labels = nh.ref$label.fine)
list(NH = pred.nh, NH.fine = pred.nh.f)
})
# Inspect prediction
plotScoreHeatmap(ct.pred$stim$NH)
plotScoreHeatmap(ct.pred$unstim$NH)
plotScoreHeatmap(ct.pred$stim$NH.fine, show.labels = F)
plotScoreHeatmap(ct.pred$unstim$NH.fine, show.labels = F)
# Add cell type annotation to Seurat object
data$stim <- AddMetaData(data$stim,
ct.pred$stim$NH$labels,
col.name = 'NH')
data$stim <- AddMetaData(data$stim,
ct.pred$stim$NH.fine$labels,
col.name = 'NH.fine')
data$unstim <- AddMetaData(data$unstim,
ct.pred$unstim$NH$labels,
col.name = 'NH')
data$unstim <- AddMetaData(data$unstim,
ct.pred$unstim$NH.fine$labels,
col.name = 'NH.fine')
Clustering analysis with SingleR
Manual relabeling of stim data
### Visual examination of seurat clustering
DimPlot(data$stim, label = T)
DimPlot(data$stim, group.by = "NH.fine", label = T, repel = T) + NoLegend()
FeaturePlot(data$stim,
features = c("IGHG1", "IGHG2", "IGHG3", "IGHG4"),
label = T, label.size = 4) + NoLegend()
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
id <- "dad"
p1 <- DimPlot(data$stim[, data$stim$sample_tag == id], label = T) + NoLegend()
p2 <- DimPlot(data$stim[, data$stim$sample_tag == id], group.by = "NH.fine", label = T, repel = T) + NoLegend()
plot_grid(p1, p2)
table(data$stim$NH.fine[data$stim$sample_tag == id])
##
## CD4+ Central Memory CD4+ Effector Memory
## 468 237
## CD8+ Central Memory CD8+ Effector Memory
## 61 166
## CD8+ Effector Memory RA Colony Forming Unit-Monocytes
## 267 1
## Hematopoietic stem cells_CD133+ CD34dim Mature B cells
## 1 209
## Mature B cells class able to switch Mature B cells class switched
## 46 356
## Mature NK cells_CD56- CD16+ CD3- Mature NK cells_CD56+ CD16+ CD3-
## 189 93
## Monocytes Myeloid Dendritic Cells
## 3 8
## Naive B cells Naive CD4+ T cells
## 318 94
## Naive CD8+ T cells NK T cells
## 74 5
FeaturePlot(data$stim[, data$stim$sample_tag == id],
features = c("IGHG1", "IGHG2", "IGHG3", "IGHG4"),
label = T, label.size = 4) + NoLegend()
### Removing some clusters
table(data$stim$NH.fine)
##
## CD4+ Central Memory CD4+ Effector Memory
## 4722 2134
## CD8+ Central Memory CD8+ Effector Memory
## 656 1625
## CD8+ Effector Memory RA Colony Forming Unit-Monocytes
## 3521 21
## Hematopoietic stem cells_CD133+ CD34dim Hematopoietic stem cells_CD38- CD34+
## 3 3
## Mature B cells Mature B cells class able to switch
## 716 182
## Mature B cells class switched Mature NK cells_CD56- CD16- CD3-
## 910 3
## Mature NK cells_CD56- CD16+ CD3- Mature NK cells_CD56+ CD16+ CD3-
## 1354 1049
## Monocytes Myeloid Dendritic Cells
## 198 51
## Naive B cells Naive CD4+ T cells
## 2246 2160
## Naive CD8+ T cells NK T cells
## 1132 54
## Plasmacytoid Dendritic Cells
## 13
data$stim$NH.fine[data$stim$NH.fine == "Mature NK cells_CD56- CD16+ CD3-"] <- "Monocytes"
data$stim$NH.fine[data$stim$NH.fine %in% c("CD4+ Effector Memory", "CD4+ Central Memory")] <- "CD4+ Memory"
data$stim$NH.fine[data$stim$NH.fine %in% c("CD8+ Effector Memory", "CD8+ Central Memory", "CD8+ Effector Memory RA")] <- "CD8+ Memory"
data$stim$NH.fine[data$stim$NH.fine %in% c("Mature NK cells_CD56+ CD16+ CD3-", "Mature NK cells_CD56- CD16- CD3-")] <- "NK cells"
data$stim$NH.fine[data$stim$NH.fine %in% c("Mature B cells class switched", "Mature B cells class able to switch")] <- "Mature B cells"
data$stim$keep <- data$stim$NH.fine
data$stim$keep[data$stim$keep %in% c("CD4+ Memory", "CD8+ Memory", "Mature B cells",
"NK cells", "Monocytes", "Naive B cells", "Naive CD4+ T cells", "Naive CD8+ T cells")] <- "Y"
data$stim$keep[data$stim$keep %in% c("Basophils", "Colony Forming Unit-Monocytes", "Eosinophils", "Erythroid_CD34+ CD71+ GlyA-", "Granulocytes (Neutrophils)",
"Hematopoietic stem cells_CD133+ CD34dim", "Hematopoietic stem cells_CD38- CD34+", "Megakaryocyte/erythroid progenitors", "NK T cells",
"Myeloid Dendritic Cells", "Plasmacytoid Dendritic Cells", "Colony Forming Unit-Granulocytes", "Granulocytes (Neutrophilic Metamyelocytes)")] <- "N"
test <- data$stim[, data$stim$keep == "Y"]
p1 <- DimPlot(test, label = T, label.size = 4)
p2 <- DimPlot(test, group.by = "NH.fine", label = T, repel = F, label.size = 4)
plot_grid(p1, p2)
### Relabel some clusters
test$label <- test$NH.fine
test$label[test$seurat_clusters %in% c("4", "10", "11")] <- "CD8+ Memory"
test$label[test$seurat_clusters %in% c("3", "6")] <- "CD4+ Memory"
test$label[test$seurat_clusters == "5" & !test$label %in% c("Naive CD4+ T cells", "Naive CD8+ T cells")] <- "CD4+ Memory"
test$label[test$seurat_clusters == "9"] <- "NK cells"
test$label[test$seurat_clusters %in% "7" & !test$label %in% "Naive B cells"] <- "Memory B cells"
test$label[test$seurat_clusters %in% "2" & !test$label %in% c("Memory B cells", "Mature B cells")] <- "Naive B cells"
test$label[test$seurat_clusters == "8"] <- "Monocytes"
test$label[test$seurat_clusters == "12"] <- "Undefined"
test$label[test$label == "Mature B cells"] <- "Memory B cells"
test$label[test$seurat_clusters %in% c("0", "1") & !test$label %in% c("Naive CD4+ T cells", "Naive CD8+ T cells")] <- "Naive CD4+ T cells"
test$label[test$seurat_clusters == "13" & test$label == "Monocytes"] <- "CD4+ Memory"
p1 <- DimPlot(test, label = T) + NoLegend()
p2 <- DimPlot(test, group.by = "label", label = T, repel = T) + NoLegend()
plot_grid(p1, p2)
table(test$label)
##
## CD4+ Memory CD8+ Memory Memory B cells Monocytes
## 4952 2926 1697 1339
## Naive B cells Naive CD4+ T cells Naive CD8+ T cells NK cells
## 2131 7445 835 916
## Undefined
## 367
data$stim <- test
Manual relabeling of unstim data
### Visual examination of seurat clustering
DimPlot(data$unstim, label = T)
DimPlot(data$unstim, group.by = "NH.fine", label = T, repel = T) + NoLegend()
FeaturePlot(data$stim,
features = c("IGHG1", "IGHG2", "IGHG3", "IGHG4"),
label = T) + NoLegend()
id <- "patient"
p1 <- DimPlot(data$unstim[, data$unstim$sample_tag == id], label = T) + NoLegend()
p2 <- DimPlot(data$unstim[, data$unstim$sample_tag == id], group.by = "NH.fine", label = F, repel = T) + NoLegend()
plot_grid(p1, p2)
### Removing some clusters
table(data$unstim$NH.fine)
##
## Basophils
## 17
## CD4+ Central Memory
## 400
## CD4+ Effector Memory
## 652
## CD8+ Central Memory
## 220
## CD8+ Effector Memory
## 931
## CD8+ Effector Memory RA
## 1915
## Colony Forming Unit-Granulocytes
## 298
## Colony Forming Unit-Monocytes
## 16
## Eosinophils
## 7
## Erythroid_CD34+ CD71+ GlyA-
## 3
## Granulocytes (Neutrophilic Metamyelocytes)
## 110
## Granulocytes (Neutrophils)
## 42
## Hematopoietic stem cells_CD133+ CD34dim
## 3
## Hematopoietic stem cells_CD38- CD34+
## 7
## Mature B cells
## 78
## Mature B cells class able to switch
## 207
## Mature B cells class switched
## 238
## Mature NK cells_CD56- CD16- CD3-
## 92
## Mature NK cells_CD56- CD16+ CD3-
## 192
## Mature NK cells_CD56+ CD16+ CD3-
## 665
## Megakaryocyte/erythroid progenitors
## 1
## Monocytes
## 1403
## Myeloid Dendritic Cells
## 11
## Naive B cells
## 1297
## Naive CD4+ T cells
## 3109
## Naive CD8+ T cells
## 2661
## NK T cells
## 1
## Plasmacytoid Dendritic Cells
## 15
data$unstim$NH.fine[data$unstim$NH.fine == "Mature NK cells_CD56- CD16+ CD3-"] <- "Monocytes"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("CD4+ Effector Memory", "CD4+ Central Memory")] <- "CD4+ Memory"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("CD8+ Effector Memory", "CD8+ Central Memory", "CD8+ Effector Memory RA")] <- "CD8+ Memory"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("Mature NK cells_CD56+ CD16+ CD3-", "Mature NK cells_CD56- CD16- CD3-")] <- "NK cells"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("Mature B cells class switched", "Mature B cells class able to switch")] <- "Mature B cells"
data$unstim$keep <- data$unstim$NH.fine
data$unstim$keep[data$unstim$keep %in% c("CD4+ Memory", "CD8+ Memory", "Mature B cells", "NK cells", "Monocytes",
"Naive B cells", "Naive CD4+ T cells", "Naive CD8+ T cells")] <- "Y"
data$unstim$keep[data$unstim$keep %in% c("Basophils", "Colony Forming Unit-Monocytes", "Eosinophils",
"Erythroid_CD34+ CD71+ GlyA-", "Granulocytes (Neutrophils)",
"Hematopoietic stem cells_CD133+ CD34dim", "Hematopoietic stem cells_CD38- CD34+",
"Megakaryocyte/erythroid progenitors", "NK T cells", "Myeloid Dendritic Cells",
"Plasmacytoid Dendritic Cells", "Colony Forming Unit-Granulocytes",
"Granulocytes (Neutrophilic Metamyelocytes)")] <- "N"
test <- data$unstim[, data$unstim$keep == "Y"]
test$label <- test$NH.fine
#test$label[test$seurat_clusters %in% c("9", "13")] <- "Non-naive B cells"
#test$label[test$label == "Mature B cells"] <- "Non-naive B cells"
#test$label[test$seurat_clusters == "3"] <- "Naive B cells"
#test$label[colSums(test[["RNA"]]@data[grep("IGHG", rownames(test)), ]) > 0 & test$label == "Naive B cells"] <- "Non-naive B cells"
p1 <- DimPlot(test, label = T) + NoLegend()
p2 <- DimPlot(test, group.by = "label", label = T, repel = F) + NoLegend()
plot_grid(p1, p2)
table(test$label)
##
## CD4+ Memory CD8+ Memory Mature B cells Monocytes
## 1052 3066 523 1595
## Naive B cells Naive CD4+ T cells Naive CD8+ T cells NK cells
## 1297 3109 2661 757
data$unstim <- test
Clustering Visualization - Fig 3A
# Define groups for comparison - patient vs HCs - compare clustering
data <- lapply(data, function(x){
# find markers of patient vs hc in each cluster
x$twogroups <- x$sample_tag # define patient vs others
x$twogroups[x$twogroups != "patient"] <- "hc"
x$twogroups <- factor(x$twogroups, levels = c("hc", "patient"))
x$threegroups <- x$sample_tag # define patient vs family vs hc
x$threegroups[!x$threegroups %in% c("patient", "hc")] <- "family"
x$celltype.patient <- paste(x$label, x$twogroups, sep = "_")
Idents(x) <- "celltype.patient"
x
})
p1 <- DimPlot(data$stim,
group.by = "label",
label = F,
repel = F,
label.size = 4,
split.by = "twogroups")
p1[[1]]$layers[[1]]$aes_params$alpha <- 0.3
p1 # Fig 3A
Identify cluster-specific biomarkers
ct.markers <- lapply(data, function(x){
# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(x) <- "label"
ct.markers <- FindAllMarkers(x, only.pos = T, min.pct = 0.1, logfc.threshold = 0.1)
ct.markers.top <- ct.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
ct.heatmap <- DoHeatmap(x, features = ct.markers.top$gene, size = 4) + NoLegend()
})
## Calculating cluster NK cells
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster Memory B cells
## Calculating cluster Monocytes
## Calculating cluster Naive B cells
## Calculating cluster CD8+ Memory
## Calculating cluster CD4+ Memory
## Calculating cluster Naive CD4+ T cells
## Calculating cluster Undefined
## Calculating cluster Naive CD8+ T cells
## Calculating cluster Monocytes
## Calculating cluster Naive B cells
## Calculating cluster Mature B cells
## Calculating cluster CD8+ Memory
## Calculating cluster Naive CD4+ T cells
## Calculating cluster Naive CD8+ T cells
## Calculating cluster NK cells
## Calculating cluster CD4+ Memory
ct.markers$stim
ct.markers$unstim
Differential Expression Analysis
Differential expression - Fig 3B, 4A, and 5A
# Differential expression analysis
NFAT.DE <- lapply(names(data), function(x){
out <- lapply(unique(data[[x]]$label), function(ct){
NFAT.markers <- FindMarkers(data[[x]],
ident.1 = paste0(ct, "_patient"),
ident.2 = paste0(ct, "_hc"),
min.pct = 0,
logfc.threshold = 0,
test.use = "negbinom")
write.csv(NFAT.markers, file = paste0("scRNAseq/output/", x, "_", ct, ".csv"))
return(NFAT.markers)
})
names(out) <- unique(data[[x]]$label)
out
})
# Exported to graphpad for volcano plot - Fig 3B, 4A, and 5A
Candidate gene visualization - Fig 3C, 4B, and 5B
plt <- data$stim
### Bcell
cells <- plt[, plt$label == "Naive B cells"]
genes_of_interest <- c("TNFAIP8", "MYC", "HIF1A", "JAK1", "STAT3", "PIKAP1", "LTA", "TNF", "DUSP4")
p <- VlnPlot(cells, features = "TNFAIP8", group.by = "twogroups", pt.size = 0) #plot each gene of interest separately
p <- p + geom_boxplot(width = 0.1)
p # Fig 3C
### CD8+ Memory
cells <- plt[, plt$label == "CD8+ Memory"]
genes_of_interest <- c("GZMH", "GNLY", "PFN1", "S100A10", "GZMB", "TIGIT", "FASLG", "IFNG", "TNF")
p <- VlnPlot(cells, features = "GZMH", group.by = "twogroups", pt.size = 0) #plot each gene of interest separately
p <- p + geom_boxplot(width = 0.1)
p # Fig 4B
### CD4+ Memory
cells <- plt[, plt$label == "CD4+ Memory"]
genes_of_interest <- c("TNFRSF4", "TIGIT", "S100A10", "REL", "NFKBIA", "NFKBIZ", "CD40LG", "MYC", "TNF")
DotPlot(cells, features = genes_of_interest, group.by = "sample_tag")
p <- VlnPlot(cells, features = "TNFRSF4", group.by = "twogroups", pt.size = 0) #plot each gene of interest separately
p <- p + geom_boxplot(width = 0.1)
p # Fig 5B
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Vancouver
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] future_1.67.0 gridExtra_2.3
## [3] ggplot2_3.5.2 cowplot_1.2.0
## [5] SingleR_2.10.0 SingleCellExperiment_1.30.1
## [7] SummarizedExperiment_1.38.1 Biobase_2.68.0
## [9] GenomicRanges_1.60.0 GenomeInfoDb_1.44.1
## [11] IRanges_2.42.0 S4Vectors_0.46.0
## [13] BiocGenerics_0.54.0 generics_0.1.4
## [15] MatrixGenerics_1.20.0 matrixStats_1.5.0
## [17] patchwork_1.3.1 Seurat_5.3.0
## [19] SeuratObject_5.1.0 sp_2.2-0
## [21] dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.5.1
## [3] later_1.4.2 filelock_1.0.3
## [5] tibble_3.3.0 polyclip_1.10-7
## [7] fastDummies_1.7.5 httr2_1.2.1
## [9] lifecycle_1.0.4 globals_0.18.0
## [11] lattice_0.22-7 MASS_7.3-65
## [13] alabaster.base_1.8.1 magrittr_2.0.3
## [15] plotly_4.11.0 sass_0.4.10
## [17] rmarkdown_2.29 jquerylib_0.1.4
## [19] yaml_2.3.10 httpuv_1.6.16
## [21] sctransform_0.4.2 spam_2.11-1
## [23] spatstat.sparse_3.1-0 reticulate_1.43.0
## [25] pbapply_1.7-4 DBI_1.2.3
## [27] RColorBrewer_1.1-3 abind_1.4-8
## [29] Rtsne_0.17 purrr_1.1.0
## [31] rappdirs_0.3.3 GenomeInfoDbData_1.2.14
## [33] ggrepel_0.9.6 irlba_2.3.5.1
## [35] listenv_0.9.1 spatstat.utils_3.1-5
## [37] pheatmap_1.0.13 goftest_1.2-3
## [39] RSpectra_0.16-2 spatstat.random_3.4-1
## [41] fitdistrplus_1.2-4 parallelly_1.45.1
## [43] DelayedMatrixStats_1.30.0 codetools_0.2-20
## [45] DelayedArray_0.34.1 tidyselect_1.2.1
## [47] UCSC.utils_1.4.0 farver_2.1.2
## [49] viridis_0.6.5 BiocFileCache_2.16.1
## [51] rmdformats_1.0.4 spatstat.explore_3.5-2
## [53] jsonlite_2.0.0 BiocNeighbors_2.2.0
## [55] progressr_0.15.1 ggridges_0.5.6
## [57] survival_3.8-3 tools_4.5.1
## [59] ica_1.0-3 Rcpp_1.1.0
## [61] glue_1.8.0 SparseArray_1.8.1
## [63] xfun_0.52 HDF5Array_1.36.0
## [65] gypsum_1.4.0 withr_3.0.2
## [67] BiocManager_1.30.26 fastmap_1.2.0
## [69] rhdf5filters_1.20.0 digest_0.6.37
## [71] R6_2.6.1 mime_0.13
## [73] scattermore_1.2 tensor_1.5.1
## [75] spatstat.data_3.1-8 RSQLite_2.4.2
## [77] h5mread_1.0.1 celldex_1.18.0
## [79] tidyr_1.3.1 data.table_1.17.8
## [81] httr_1.4.7 htmlwidgets_1.6.4
## [83] S4Arrays_1.8.1 uwot_0.2.3
## [85] pkgconfig_2.0.3 gtable_0.3.6
## [87] blob_1.2.4 lmtest_0.9-40
## [89] XVector_0.48.0 htmltools_0.5.8.1
## [91] dotCall64_1.2 bookdown_0.44
## [93] alabaster.matrix_1.8.0 scales_1.4.0
## [95] png_0.1-8 spatstat.univar_3.1-4
## [97] knitr_1.50 reshape2_1.4.4
## [99] nlme_3.1-168 curl_7.0.0
## [101] rhdf5_2.52.1 cachem_1.1.0
## [103] zoo_1.8-14 stringr_1.5.1
## [105] BiocVersion_3.21.1 KernSmooth_2.23-26
## [107] vipor_0.4.7 parallel_4.5.1
## [109] miniUI_0.1.2 AnnotationDbi_1.70.0
## [111] ggrastr_1.0.2 alabaster.schemas_1.8.0
## [113] pillar_1.11.0 grid_4.5.1
## [115] vctrs_0.6.5 RANN_2.6.2
## [117] promises_1.3.3 dbplyr_2.5.0
## [119] beachmat_2.24.0 xtable_1.8-4
## [121] cluster_2.1.8.1 beeswarm_0.4.0
## [123] evaluate_1.0.4 cli_3.6.5
## [125] compiler_4.5.1 rlang_1.1.6
## [127] crayon_1.5.3 future.apply_1.20.0
## [129] labeling_0.4.3 ggbeeswarm_0.7.2
## [131] plyr_1.8.9 stringi_1.8.7
## [133] alabaster.se_1.8.0 viridisLite_0.4.2
## [135] deldir_2.0-4 BiocParallel_1.42.1
## [137] Biostrings_2.76.0 lazyeval_0.2.2
## [139] spatstat.geom_3.5-0 Matrix_1.7-3
## [141] ExperimentHub_2.16.1 RcppHNSW_0.6.0
## [143] sparseMatrixStats_1.20.0 bit64_4.6.0-1
## [145] Rhdf5lib_1.30.0 KEGGREST_1.48.1
## [147] shiny_1.11.1 alabaster.ranges_1.8.0
## [149] AnnotationHub_3.16.1 ROCR_1.0-11
## [151] igraph_2.1.4 memoise_2.0.1
## [153] bslib_0.9.0 bit_4.6.0