NFAT_snRNAseq

Setup: Libraries and Data Import

# load packages
library(dplyr)
library(Seurat)
library(patchwork)
library(SingleCellExperiment)
library(SingleR)
library(cowplot)
library(ggplot2)
library(gridExtra)

Import raw data

# Stimulated PBMC samples
stim <- read.delim("scRNAseq/data/clean_RSEC_MolsPerCell_stim.txt", skip = 7, row.names = 1)
stim_count <- stim[, 3:ncol(stim)]
stim_meta <- stim[, 1:2]
stim_meta$cartrige <- 1
stim_meta$stim <- "stim"
stim <- CreateSeuratObject(counts = t(stim_count), 
                           project = "NFAT_stim", 
                           meta.data = stim_meta)

# Unstimulated PBMC samples
unstim <- read.delim("scRNAseq/data/clean_RSEC_MolsPerCell_unstim.txt", skip = 7, row.names = 1)
unstim_count <- unstim[, 3:ncol(unstim)]
unstim_meta <- unstim[, 1:2]
unstim_meta$cartrige <- 2
unstim_meta$stim <- "unstim"
unstim <- CreateSeuratObject(counts = t(unstim_count), 
                             project = "NFAT_unstim", 
                             meta.data = unstim_meta)

Merge dataset, no batch correction

The samples were normalized but kept separate by the stim/unstim identity.

comb <- merge(stim, unstim, add.cell.ids = c("stim", "unstim"))
data <- SplitObject(comb, split.by = "stim")
data <- lapply(X = data, FUN = function(x) {
    x <- FindVariableFeatures(x) %>%
        NormalizeData(.) %>% 
        ScaleData(.) %>%
        RunPCA(.) %>%
        RunUMAP(., dims = 1:30) %>%
        RunTSNE(.) %>% 
        FindNeighbors(.) %>%
        FindClusters(.)
})
## Finding variable features for layer counts.NFAT_stim
## Normalizing layer: counts.NFAT_stim
## Centering and scaling data matrix
## PC_ 1 
## Positive:  CYBB, CCR1, ITGAX, TLR2, TGFBI, NRP1, CTSD, IL3RA, S100A9, LILRB4 
##     LGALS3, FCER1G, CXCL16, ANXA5, CCR5, MMP9, CD9, ICAM1, LGALS9, CD163 
##     CD36, LAT2, AQP9, FCGR3A, MGST1, CD63, LGALS1, FCN1, CCL3, HLA.DMA 
## Negative:  CD69, IRF4, CD2, STAT5A, TRAC, TRBC2, ITK, CD3D, RPL6, MYC 
##     CD3E, ICOS, NFKB1, FYN, LDHA, CD6, TNFAIP8, IL2, VSIR, IL23A 
##     SELL, STAT4, CD7, EIF5A, LRRC8B, TNFRSF25, LEF1, DUSP2, IL32, F5 
## PC_ 2 
## Positive:  RPL6, BCL2A1, EIF5A, ATF4, CD40, CCR7, CXCR5, HLA.DRA, NFKB1, PSME2 
##     ILF2, HLA.DQB1, EIF1, IRF1, TUBA1C, MYC, CD74, MS4A1, HLA.DPA1, LYN 
##     CD44, CD83, NFKBIA, IL6, EBF1, CD22, CYCS, ABCE1, FOSL1, IRF8 
## Negative:  NKG7, CCL5, PRF1, FASLG, APOBEC3G, GZMB, GNLY, KLRC3, IFNG, GZMH 
##     KLRK1, KLRC1, VSIR, LAG3, IL2RB, CX3CR1, B3GAT1, S100A10, CCL4, EVI2A 
##     CTSW, CSF2, HAVCR2, CXCR3, CD226, IL18RAP, TRDC, CD300A, PRDM1, GZMA 
## PC_ 3 
## Positive:  CD2, TRAC, ICOS, IER3, ITK, PTPRC, SELL, CD3D, STAT1, TGFBI 
##     DUSP6, CCR1, TLR2, CD3E, PFN1, NRP1, LEF1, S100A9, CTSD, CD7 
##     F5, MMP9, CD109, LILRB4, CCR5, CXCL16, FTH1, TNFRSF25, IL3RA, GBP2 
## Negative:  CCL4, CCL3, CCR7, CD40, HLA.DQB1, MS4A1, CXCR5, IL6, CD83, EBF1 
##     LYN, LTA, CD74, CD22, HLA.DPA1, HLA.DRA, PIK3AP1, NKG7, PRF1, CCL5 
##     TNF, REL, BIRC3, APOBEC3G, POU2AF1, CD79A, GZMB, DUSP4, KLRC3, CR2 
## PC_ 4 
## Positive:  IL2, FOSB, EGR1, NR4A2, FOS, EGR3, TNF, CD40LG, JUN, IL23A 
##     NFKBIZ, DUSP1, IL7R, MAST4, CCL20, TNFSF8, NFKBID, FOSL1, RORC, TGIF1 
##     IL21, IL6ST, IL1R1, IFRD1, SPAG9, MYC, CCR4, CD200, RAPH1, CXCL3 
## Negative:  PFN1, GZMB, NKG7, IL2RB, PRF1, GAPDH, GNLY, APOBEC3G, LDHA, KLRK1 
##     HLA.A, GZMH, KLRC3, JAK1, IL2RG, B3GAT1, CTSW, CX3CR1, CCL5, PDIA6 
##     HAVCR2, PTPN7, KLRC1, CD44, CD300A, CD52, GZMA, ARPC1B, GNAI2, TRDC 
## PC_ 5 
## Positive:  IL2, PDIA6, TNFRSF4, TNFSF8, TNF, IER3, PFN1, F5, TRIB1, CD200 
##     DUSP4, TBX21, CCR4, IL21, GAPDH, EIF5A, LTA, VSIR, TNFAIP8, RPL6 
##     TUBA1C, CD3D, TNFRSF25, CD274, CSF2, BCAT1, FURIN, BATF, CD52, CCND2 
## Negative:  NR4A2, FOS, FOSB, BTG1, CREM, EGR3, NFKBIZ, SDCBP, CSRNP1, EGR1 
##     NFKB1, REL, FBXO11, JUN, SPAG9, IFRD1, DUSP2, EIF5, JUNB, EVI2B 
##     PIK3IP1, CXCR4, HLA.A, IL6ST, CD69, DUSP1, MAST4, IL7R, IRF7, NANS
## 21:14:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:14:04 Read 22753 rows and found 30 numeric columns
## 21:14:04 Using Annoy for neighbor search, n_neighbors = 30
## 21:14:04 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:14:06 Writing NN index file to temp file C:\Users\suanni\AppData\Local\Temp\Rtmp40d4qa\file6e9c46de6424
## 21:14:06 Searching Annoy index using 1 thread, search_k = 3000
## 21:14:12 Annoy recall = 100%
## 21:14:12 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 21:14:14 Initializing from normalized Laplacian + noise (using RSpectra)
## 21:14:14 Commencing optimization for 200 epochs, with 1032308 positive edges
## 21:14:14 Using rng type: pcg
## 21:14:36 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 22753
## Number of edges: 706528
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8608
## Number of communities: 13
## Elapsed time: 4 seconds
## Finding variable features for layer counts.NFAT_unstim
## Normalizing layer: counts.NFAT_unstim
## Centering and scaling data matrix
## PC_ 1 
## Positive:  HLA.A, CD3E, LCK, TRAC, TRBC2, RPL6, CD2, PTPRC, ITK, FYN 
##     LAT, BTG1, PIK3IP1, CD6, IL32, LEF1, IL2RG, BCL11B, GIMAP5, CD52 
##     CD5, SELPLG, JAK1, SELL, IL7R, PASK, GIMAP2, CD247, JAK3, IFITM2 
## Negative:  LILRB4, CCR1, SLC7A7, CYBB, ITGAX, LGALS3, KCNE3, CD86, NRP1, LYN 
##     MITF, CD163, SNX8, CXCL16, MMP9, CD33, LY86, HLA.DMA, CD36, IRF8 
##     CD9, LGALS9, DUSP6, S100A9, TGFBI, TRIB1, ICAM1, CTSD, TLR2, HLA.DRA 
## PC_ 2 
## Positive:  CD79A, MS4A1, CD79B, IGHD, IGHM, POU2AF1, TNFRSF13C, TCL1A, VPREB3, CXCR5 
##     CD22, IGHG2, HLA.DQB1, IGHG3, BLNK, IGKC, CD40, CD74, CD24, CD37 
##     CD72, IRF8, IGLC3, IGHG1, HLA.DRA, SP140, MZB1, CCR7, HLA.DMA, IGHG4 
## Negative:  SELPLG, ITGB2, S100A10, FCGR3A, PFN1, FYB1, FTH1, CCR1, LILRB4, CTSD 
##     ARL4C, CD163, FCER1G, TGFBI, KCNE3, LGALS3, MMP9, CD300A, NRP1, ITGAX 
##     CD2, CD3E, MITF, CD33, CD14, HAVCR2, S100A9, CMKLR1, CD36, GIMAP5 
## PC_ 3 
## Positive:  NKG7, PRF1, CCL5, GZMB, CST7, KLRC3, CX3CR1, GNLY, CTSW, GZMH 
##     TBX21, APOBEC3G, GZMA, KLRC1, TRDC, IL2RB, KLRK1, CD300A, KLRF1, ZNF683 
##     CD244, FCGR3A, FASLG, IL18RAP, B3GAT1, LAG3, RUNX3, CCL4, CXCR3, KLRG1 
## Negative:  LEF1, CCR7, CD4, SELL, IL6ST, MYC, PASK, CD27, TRAC, CD5 
##     FYB1, IL7R, PIK3IP1, GIMAP5, CD6, LAT, TNFAIP8, FTH1, LTB, RPL6 
##     IL6R, BCL11B, TCF7, ITK, ICOS, EIF1, CD3E, CD28, SOCS3, TRBC2 
## PC_ 4 
## Positive:  CREM, PSME2, NFKB1, RGS1, EIF5A, DUSP5, STAT1, GBP1, DUSP4, IRF1 
##     CD69, IRF4, IRF7, ATF4, DUSP2, ILF2, LAMP3, PSMB8, LDHA, NR4A2 
##     REL, PSMB9, CCL22, BCL2A1, CCND2, STAT3, IL15RA, IL12RB2, LAP3, EIF1 
## Negative:  CD52, EVI2B, CD48, CD37, VPREB3, ZNF683, CCL2, ITGB2, IGHD, CD14 
##     RNASE6, S100A9, NT5E, TCL1A, CD9, SELL, CD79B, LY86, CXCL3, GZMH 
##     POU2AF1, MMP9, GNAI2, TGFBI, CYBB, SELPLG, FN1, FYB1, CD163, CXCL2 
## PC_ 5 
## Positive:  CD52, GNAI2, PFN1, RPL6, ITGB2, CD48, GAPDH, SELPLG, CD37, ITGA4 
##     EVI2B, IFITM2, CD27, ARPC1B, WAS, PDIA6, S100A10, TRIB2, DOCK8, CD4 
##     MYC, FYB1, IL32, PTPRC, SLC1A5, PDIA4, IGHG3, IGHG2, VSIR, LDHA 
## Negative:  NR4A2, CREM, IL1A, CXCL1, IL1B, CXCL3, IL6, RGS1, CXCL8, NFKBIA 
##     CXCL5, CCL3, CD69, IRF7, CXCL2, CXCR4, HIF1A, BTG1, LAMP3, CCL2 
##     CSRNP1, CCL4, NAMPT, DUSP5, IRF1, NFKB2, FTH1, GBP1, TREM1, CCL20 
## 21:16:12 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:16:12 Read 14591 rows and found 30 numeric columns
## 21:16:12 Using Annoy for neighbor search, n_neighbors = 30
## 21:16:12 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:16:14 Writing NN index file to temp file C:\Users\suanni\AppData\Local\Temp\Rtmp40d4qa\file6e9c13ba2b73
## 21:16:14 Searching Annoy index using 1 thread, search_k = 3000
## 21:16:17 Annoy recall = 100%
## 21:16:18 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 21:16:19 Initializing from normalized Laplacian + noise (using RSpectra)
## 21:16:19 Commencing optimization for 200 epochs, with 658592 positive edges
## 21:16:19 Using rng type: pcg
## 21:16:33 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 14591
## Number of edges: 457136
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8567
## Number of communities: 15
## Elapsed time: 1 seconds

Cell Annotation

Reference-based annotation with SingleR

Here we are labeling cells based on published reference sets, comparing multiple single-cell references: Immune cell expression database, Novernshtern, and Monaco.

# Extracting the normalized counts as input
stim.sc <- LayerData(data$stim, 
                     assay = "RNA", 
                     layer = 'data') # normalized counts - stim
unstim.sc <- LayerData(data$unstim, 
                       assay = "RNA", 
                       layer = 'data') # normalized counts

# Get reference dataset
#hpca.ref <- HumanPrimaryCellAtlasData() # Really bad reference for PBMC
#im.ref <- celldex::DatabaseImmuneCellExpressionData()
nh.ref <- celldex::NovershternHematopoieticData() # decided to go with Novershtern
#mon.ref <- celldex::MonacoImmuneData()

ct.pred <- lapply(list(stim = stim.sc, unstim = unstim.sc), function(sc){
    pred.nh <- SingleR(test = sc, ref = nh.ref, labels = nh.ref$label.main)
    pred.nh.f <- SingleR(test = sc, ref = nh.ref, labels = nh.ref$label.fine)
    list(NH = pred.nh, NH.fine = pred.nh.f)
})

# Inspect prediction
plotScoreHeatmap(ct.pred$stim$NH)

plotScoreHeatmap(ct.pred$unstim$NH)

plotScoreHeatmap(ct.pred$stim$NH.fine, show.labels = F)

plotScoreHeatmap(ct.pred$unstim$NH.fine, show.labels = F)

# Add cell type annotation to Seurat object
data$stim <- AddMetaData(data$stim, 
                         ct.pred$stim$NH$labels, 
                         col.name = 'NH')
data$stim <- AddMetaData(data$stim, 
                         ct.pred$stim$NH.fine$labels, 
                         col.name = 'NH.fine')
data$unstim <- AddMetaData(data$unstim, 
                           ct.pred$unstim$NH$labels, 
                           col.name = 'NH')
data$unstim <- AddMetaData(data$unstim, 
                           ct.pred$unstim$NH.fine$labels, 
                           col.name = 'NH.fine')

Clustering analysis with SingleR

Manual relabeling of stim data

### Visual examination of seurat clustering
DimPlot(data$stim, label = T)

DimPlot(data$stim, group.by = "NH.fine", label = T, repel = T) + NoLegend()

FeaturePlot(data$stim,
            features = c("IGHG1", "IGHG2", "IGHG3", "IGHG4"),
            label = T, label.size = 4) + NoLegend()
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

id <- "dad"
p1 <- DimPlot(data$stim[, data$stim$sample_tag == id], label = T) + NoLegend()
p2 <- DimPlot(data$stim[, data$stim$sample_tag == id], group.by = "NH.fine", label = T, repel = T) + NoLegend()
plot_grid(p1, p2)

table(data$stim$NH.fine[data$stim$sample_tag == id])
## 
##                     CD4+ Central Memory                    CD4+ Effector Memory 
##                                     468                                     237 
##                     CD8+ Central Memory                    CD8+ Effector Memory 
##                                      61                                     166 
##                 CD8+ Effector Memory RA           Colony Forming Unit-Monocytes 
##                                     267                                       1 
## Hematopoietic stem cells_CD133+ CD34dim                          Mature B cells 
##                                       1                                     209 
##     Mature B cells class able to switch           Mature B cells class switched 
##                                      46                                     356 
##        Mature NK cells_CD56- CD16+ CD3-        Mature NK cells_CD56+ CD16+ CD3- 
##                                     189                                      93 
##                               Monocytes                 Myeloid Dendritic Cells 
##                                       3                                       8 
##                           Naive B cells                      Naive CD4+ T cells 
##                                     318                                      94 
##                      Naive CD8+ T cells                              NK T cells 
##                                      74                                       5
FeaturePlot(data$stim[, data$stim$sample_tag == id],
            features = c("IGHG1", "IGHG2", "IGHG3", "IGHG4"),
            label = T, label.size = 4) + NoLegend()

### Removing some clusters
table(data$stim$NH.fine)
## 
##                     CD4+ Central Memory                    CD4+ Effector Memory 
##                                    4722                                    2134 
##                     CD8+ Central Memory                    CD8+ Effector Memory 
##                                     656                                    1625 
##                 CD8+ Effector Memory RA           Colony Forming Unit-Monocytes 
##                                    3521                                      21 
## Hematopoietic stem cells_CD133+ CD34dim    Hematopoietic stem cells_CD38- CD34+ 
##                                       3                                       3 
##                          Mature B cells     Mature B cells class able to switch 
##                                     716                                     182 
##           Mature B cells class switched        Mature NK cells_CD56- CD16- CD3- 
##                                     910                                       3 
##        Mature NK cells_CD56- CD16+ CD3-        Mature NK cells_CD56+ CD16+ CD3- 
##                                    1354                                    1049 
##                               Monocytes                 Myeloid Dendritic Cells 
##                                     198                                      51 
##                           Naive B cells                      Naive CD4+ T cells 
##                                    2246                                    2160 
##                      Naive CD8+ T cells                              NK T cells 
##                                    1132                                      54 
##            Plasmacytoid Dendritic Cells 
##                                      13
data$stim$NH.fine[data$stim$NH.fine == "Mature NK cells_CD56- CD16+ CD3-"] <- "Monocytes"
data$stim$NH.fine[data$stim$NH.fine %in% c("CD4+ Effector Memory", "CD4+ Central Memory")] <- "CD4+ Memory"
data$stim$NH.fine[data$stim$NH.fine %in% c("CD8+ Effector Memory", "CD8+ Central Memory", "CD8+ Effector Memory RA")] <- "CD8+ Memory"
data$stim$NH.fine[data$stim$NH.fine %in% c("Mature NK cells_CD56+ CD16+ CD3-", "Mature NK cells_CD56- CD16- CD3-")] <- "NK cells"
data$stim$NH.fine[data$stim$NH.fine %in% c("Mature B cells class switched", "Mature B cells class able to switch")] <- "Mature B cells"
data$stim$keep <- data$stim$NH.fine
data$stim$keep[data$stim$keep %in% c("CD4+ Memory", "CD8+ Memory", "Mature B cells",
                                     "NK cells", "Monocytes", "Naive B cells", "Naive CD4+ T cells", "Naive CD8+ T cells")] <- "Y"
data$stim$keep[data$stim$keep %in% c("Basophils", "Colony Forming Unit-Monocytes", "Eosinophils", "Erythroid_CD34+ CD71+ GlyA-", "Granulocytes (Neutrophils)", 
                                     "Hematopoietic stem cells_CD133+ CD34dim", "Hematopoietic stem cells_CD38- CD34+", "Megakaryocyte/erythroid progenitors", "NK T cells", 
                                     "Myeloid Dendritic Cells", "Plasmacytoid Dendritic Cells", "Colony Forming Unit-Granulocytes", "Granulocytes (Neutrophilic Metamyelocytes)")] <- "N"
test <- data$stim[, data$stim$keep == "Y"]

p1 <- DimPlot(test, label = T, label.size = 4)
p2 <- DimPlot(test, group.by = "NH.fine", label = T, repel = F, label.size = 4)
plot_grid(p1, p2)

### Relabel some clusters
test$label <- test$NH.fine
test$label[test$seurat_clusters %in% c("4", "10", "11")] <- "CD8+ Memory"
test$label[test$seurat_clusters %in% c("3", "6")] <- "CD4+ Memory"
test$label[test$seurat_clusters == "5" & !test$label %in% c("Naive CD4+ T cells", "Naive CD8+ T cells")] <- "CD4+ Memory"
test$label[test$seurat_clusters == "9"] <- "NK cells"
test$label[test$seurat_clusters %in% "7" & !test$label %in% "Naive B cells"] <- "Memory B cells"
test$label[test$seurat_clusters %in% "2" & !test$label %in% c("Memory B cells", "Mature B cells")] <- "Naive B cells"
test$label[test$seurat_clusters == "8"] <- "Monocytes"
test$label[test$seurat_clusters == "12"] <- "Undefined"
test$label[test$label == "Mature B cells"] <- "Memory B cells"
test$label[test$seurat_clusters %in% c("0", "1") & !test$label %in% c("Naive CD4+ T cells", "Naive CD8+ T cells")] <- "Naive CD4+ T cells"
test$label[test$seurat_clusters == "13" & test$label == "Monocytes"] <- "CD4+ Memory"

p1 <- DimPlot(test, label = T) + NoLegend()
p2 <- DimPlot(test, group.by = "label", label = T, repel = T) + NoLegend()
plot_grid(p1, p2)

table(test$label)
## 
##        CD4+ Memory        CD8+ Memory     Memory B cells          Monocytes 
##               4952               2926               1697               1339 
##      Naive B cells Naive CD4+ T cells Naive CD8+ T cells           NK cells 
##               2131               7445                835                916 
##          Undefined 
##                367
data$stim <- test

Manual relabeling of unstim data

### Visual examination of seurat clustering
DimPlot(data$unstim, label = T)

DimPlot(data$unstim, group.by = "NH.fine", label = T, repel = T) + NoLegend()

FeaturePlot(data$stim,
            features = c("IGHG1", "IGHG2", "IGHG3", "IGHG4"),
            label = T) + NoLegend()

id <- "patient"
p1 <- DimPlot(data$unstim[, data$unstim$sample_tag == id], label = T) + NoLegend()
p2 <- DimPlot(data$unstim[, data$unstim$sample_tag == id], group.by = "NH.fine", label = F, repel = T) + NoLegend()
plot_grid(p1, p2)

### Removing some clusters
table(data$unstim$NH.fine)
## 
##                                  Basophils 
##                                         17 
##                        CD4+ Central Memory 
##                                        400 
##                       CD4+ Effector Memory 
##                                        652 
##                        CD8+ Central Memory 
##                                        220 
##                       CD8+ Effector Memory 
##                                        931 
##                    CD8+ Effector Memory RA 
##                                       1915 
##           Colony Forming Unit-Granulocytes 
##                                        298 
##              Colony Forming Unit-Monocytes 
##                                         16 
##                                Eosinophils 
##                                          7 
##                Erythroid_CD34+ CD71+ GlyA- 
##                                          3 
## Granulocytes (Neutrophilic Metamyelocytes) 
##                                        110 
##                 Granulocytes (Neutrophils) 
##                                         42 
##    Hematopoietic stem cells_CD133+ CD34dim 
##                                          3 
##       Hematopoietic stem cells_CD38- CD34+ 
##                                          7 
##                             Mature B cells 
##                                         78 
##        Mature B cells class able to switch 
##                                        207 
##              Mature B cells class switched 
##                                        238 
##           Mature NK cells_CD56- CD16- CD3- 
##                                         92 
##           Mature NK cells_CD56- CD16+ CD3- 
##                                        192 
##           Mature NK cells_CD56+ CD16+ CD3- 
##                                        665 
##        Megakaryocyte/erythroid progenitors 
##                                          1 
##                                  Monocytes 
##                                       1403 
##                    Myeloid Dendritic Cells 
##                                         11 
##                              Naive B cells 
##                                       1297 
##                         Naive CD4+ T cells 
##                                       3109 
##                         Naive CD8+ T cells 
##                                       2661 
##                                 NK T cells 
##                                          1 
##               Plasmacytoid Dendritic Cells 
##                                         15
data$unstim$NH.fine[data$unstim$NH.fine == "Mature NK cells_CD56- CD16+ CD3-"] <- "Monocytes"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("CD4+ Effector Memory", "CD4+ Central Memory")] <- "CD4+ Memory"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("CD8+ Effector Memory", "CD8+ Central Memory", "CD8+ Effector Memory RA")] <- "CD8+ Memory"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("Mature NK cells_CD56+ CD16+ CD3-", "Mature NK cells_CD56- CD16- CD3-")] <- "NK cells"
data$unstim$NH.fine[data$unstim$NH.fine %in% c("Mature B cells class switched", "Mature B cells class able to switch")] <- "Mature B cells"
data$unstim$keep <- data$unstim$NH.fine
data$unstim$keep[data$unstim$keep %in% c("CD4+ Memory", "CD8+ Memory", "Mature B cells", "NK cells", "Monocytes", 
                                         "Naive B cells", "Naive CD4+ T cells", "Naive CD8+ T cells")] <- "Y"
data$unstim$keep[data$unstim$keep %in% c("Basophils", "Colony Forming Unit-Monocytes", "Eosinophils", 
                                         "Erythroid_CD34+ CD71+ GlyA-", "Granulocytes (Neutrophils)",
                                         "Hematopoietic stem cells_CD133+ CD34dim", "Hematopoietic stem cells_CD38- CD34+", 
                                         "Megakaryocyte/erythroid progenitors", "NK T cells", "Myeloid Dendritic Cells",
                                         "Plasmacytoid Dendritic Cells", "Colony Forming Unit-Granulocytes", 
                                         "Granulocytes (Neutrophilic Metamyelocytes)")] <- "N"
test <- data$unstim[, data$unstim$keep == "Y"]

test$label <- test$NH.fine
#test$label[test$seurat_clusters %in% c("9", "13")] <- "Non-naive B cells"
#test$label[test$label == "Mature B cells"] <- "Non-naive B cells"
#test$label[test$seurat_clusters == "3"] <- "Naive B cells"
#test$label[colSums(test[["RNA"]]@data[grep("IGHG", rownames(test)), ]) > 0 & test$label == "Naive B cells"]  <- "Non-naive B cells"

p1 <- DimPlot(test, label = T) + NoLegend()
p2 <- DimPlot(test, group.by = "label", label = T, repel = F) + NoLegend()
plot_grid(p1, p2)

table(test$label)
## 
##        CD4+ Memory        CD8+ Memory     Mature B cells          Monocytes 
##               1052               3066                523               1595 
##      Naive B cells Naive CD4+ T cells Naive CD8+ T cells           NK cells 
##               1297               3109               2661                757
data$unstim <- test

Clustering Visualization - Fig 3A

# Define groups for comparison - patient vs HCs - compare clustering
data <- lapply(data, function(x){
    # find markers of patient vs hc in each cluster
    x$twogroups <- x$sample_tag # define patient vs others
    x$twogroups[x$twogroups != "patient"] <- "hc"
    x$twogroups <- factor(x$twogroups, levels = c("hc", "patient"))
    x$threegroups <- x$sample_tag # define patient vs family vs hc
    x$threegroups[!x$threegroups %in% c("patient", "hc")] <- "family"
    x$celltype.patient <- paste(x$label, x$twogroups, sep = "_")
    Idents(x) <- "celltype.patient"
    x
})

p1 <- DimPlot(data$stim, 
              group.by = "label", 
              label = F, 
              repel = F, 
              label.size = 4, 
              split.by = "twogroups")
p1[[1]]$layers[[1]]$aes_params$alpha <- 0.3
p1 # Fig 3A

Identify cluster-specific biomarkers

ct.markers <- lapply(data, function(x){
    # find markers for every cluster compared to all remaining cells, report only the positive ones
    Idents(x) <- "label"
    ct.markers <- FindAllMarkers(x, only.pos = T, min.pct = 0.1, logfc.threshold = 0.1)
    ct.markers.top <- ct.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
    ct.heatmap <- DoHeatmap(x, features = ct.markers.top$gene, size = 4) + NoLegend()
})
## Calculating cluster NK cells
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster Memory B cells
## Calculating cluster Monocytes
## Calculating cluster Naive B cells
## Calculating cluster CD8+ Memory
## Calculating cluster CD4+ Memory
## Calculating cluster Naive CD4+ T cells
## Calculating cluster Undefined
## Calculating cluster Naive CD8+ T cells
## Calculating cluster Monocytes
## Calculating cluster Naive B cells
## Calculating cluster Mature B cells
## Calculating cluster CD8+ Memory
## Calculating cluster Naive CD4+ T cells
## Calculating cluster Naive CD8+ T cells
## Calculating cluster NK cells
## Calculating cluster CD4+ Memory
ct.markers$stim

ct.markers$unstim

Differential Expression Analysis

Differential expression - Fig 3B, 4A, and 5A

# Differential expression analysis
NFAT.DE <- lapply(names(data), function(x){
    out <- lapply(unique(data[[x]]$label), function(ct){
        NFAT.markers <- FindMarkers(data[[x]], 
                                    ident.1 = paste0(ct, "_patient"), 
                                    ident.2 = paste0(ct, "_hc"), 
                                    min.pct = 0, 
                                    logfc.threshold = 0, 
                                    test.use = "negbinom")
        write.csv(NFAT.markers, file = paste0("scRNAseq/output/", x, "_", ct, ".csv"))
        return(NFAT.markers)
    })
    names(out) <- unique(data[[x]]$label)
    out
})

# Exported to graphpad for volcano plot - Fig 3B, 4A, and 5A

Candidate gene visualization - Fig 3C, 4B, and 5B

plt <- data$stim

### Bcell
cells <- plt[, plt$label == "Naive B cells"]
genes_of_interest <- c("TNFAIP8", "MYC", "HIF1A", "JAK1", "STAT3", "PIKAP1", "LTA", "TNF", "DUSP4")
p <- VlnPlot(cells, features = "TNFAIP8", group.by = "twogroups", pt.size = 0) #plot each gene of interest separately
p <- p + geom_boxplot(width = 0.1)
p # Fig 3C

### CD8+ Memory
cells <- plt[, plt$label == "CD8+ Memory"]
genes_of_interest <- c("GZMH", "GNLY", "PFN1", "S100A10", "GZMB", "TIGIT", "FASLG", "IFNG", "TNF")
p <- VlnPlot(cells, features = "GZMH", group.by = "twogroups", pt.size = 0) #plot each gene of interest separately
p <- p + geom_boxplot(width = 0.1)
p # Fig 4B

### CD4+ Memory
cells <- plt[, plt$label == "CD4+ Memory"]
genes_of_interest <- c("TNFRSF4", "TIGIT", "S100A10", "REL", "NFKBIA", "NFKBIZ", "CD40LG", "MYC", "TNF")
DotPlot(cells, features = genes_of_interest, group.by = "sample_tag")

p <- VlnPlot(cells, features = "TNFRSF4", group.by = "twogroups", pt.size = 0) #plot each gene of interest separately
p <- p + geom_boxplot(width = 0.1)
p # Fig 5B

sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Vancouver
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] future_1.67.0               gridExtra_2.3              
##  [3] ggplot2_3.5.2               cowplot_1.2.0              
##  [5] SingleR_2.10.0              SingleCellExperiment_1.30.1
##  [7] SummarizedExperiment_1.38.1 Biobase_2.68.0             
##  [9] GenomicRanges_1.60.0        GenomeInfoDb_1.44.1        
## [11] IRanges_2.42.0              S4Vectors_0.46.0           
## [13] BiocGenerics_0.54.0         generics_0.1.4             
## [15] MatrixGenerics_1.20.0       matrixStats_1.5.0          
## [17] patchwork_1.3.1             Seurat_5.3.0               
## [19] SeuratObject_5.1.0          sp_2.2-0                   
## [21] dplyr_1.1.4                
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22          splines_4.5.1            
##   [3] later_1.4.2               filelock_1.0.3           
##   [5] tibble_3.3.0              polyclip_1.10-7          
##   [7] fastDummies_1.7.5         httr2_1.2.1              
##   [9] lifecycle_1.0.4           globals_0.18.0           
##  [11] lattice_0.22-7            MASS_7.3-65              
##  [13] alabaster.base_1.8.1      magrittr_2.0.3           
##  [15] plotly_4.11.0             sass_0.4.10              
##  [17] rmarkdown_2.29            jquerylib_0.1.4          
##  [19] yaml_2.3.10               httpuv_1.6.16            
##  [21] sctransform_0.4.2         spam_2.11-1              
##  [23] spatstat.sparse_3.1-0     reticulate_1.43.0        
##  [25] pbapply_1.7-4             DBI_1.2.3                
##  [27] RColorBrewer_1.1-3        abind_1.4-8              
##  [29] Rtsne_0.17                purrr_1.1.0              
##  [31] rappdirs_0.3.3            GenomeInfoDbData_1.2.14  
##  [33] ggrepel_0.9.6             irlba_2.3.5.1            
##  [35] listenv_0.9.1             spatstat.utils_3.1-5     
##  [37] pheatmap_1.0.13           goftest_1.2-3            
##  [39] RSpectra_0.16-2           spatstat.random_3.4-1    
##  [41] fitdistrplus_1.2-4        parallelly_1.45.1        
##  [43] DelayedMatrixStats_1.30.0 codetools_0.2-20         
##  [45] DelayedArray_0.34.1       tidyselect_1.2.1         
##  [47] UCSC.utils_1.4.0          farver_2.1.2             
##  [49] viridis_0.6.5             BiocFileCache_2.16.1     
##  [51] rmdformats_1.0.4          spatstat.explore_3.5-2   
##  [53] jsonlite_2.0.0            BiocNeighbors_2.2.0      
##  [55] progressr_0.15.1          ggridges_0.5.6           
##  [57] survival_3.8-3            tools_4.5.1              
##  [59] ica_1.0-3                 Rcpp_1.1.0               
##  [61] glue_1.8.0                SparseArray_1.8.1        
##  [63] xfun_0.52                 HDF5Array_1.36.0         
##  [65] gypsum_1.4.0              withr_3.0.2              
##  [67] BiocManager_1.30.26       fastmap_1.2.0            
##  [69] rhdf5filters_1.20.0       digest_0.6.37            
##  [71] R6_2.6.1                  mime_0.13                
##  [73] scattermore_1.2           tensor_1.5.1             
##  [75] spatstat.data_3.1-8       RSQLite_2.4.2            
##  [77] h5mread_1.0.1             celldex_1.18.0           
##  [79] tidyr_1.3.1               data.table_1.17.8        
##  [81] httr_1.4.7                htmlwidgets_1.6.4        
##  [83] S4Arrays_1.8.1            uwot_0.2.3               
##  [85] pkgconfig_2.0.3           gtable_0.3.6             
##  [87] blob_1.2.4                lmtest_0.9-40            
##  [89] XVector_0.48.0            htmltools_0.5.8.1        
##  [91] dotCall64_1.2             bookdown_0.44            
##  [93] alabaster.matrix_1.8.0    scales_1.4.0             
##  [95] png_0.1-8                 spatstat.univar_3.1-4    
##  [97] knitr_1.50                reshape2_1.4.4           
##  [99] nlme_3.1-168              curl_7.0.0               
## [101] rhdf5_2.52.1              cachem_1.1.0             
## [103] zoo_1.8-14                stringr_1.5.1            
## [105] BiocVersion_3.21.1        KernSmooth_2.23-26       
## [107] vipor_0.4.7               parallel_4.5.1           
## [109] miniUI_0.1.2              AnnotationDbi_1.70.0     
## [111] ggrastr_1.0.2             alabaster.schemas_1.8.0  
## [113] pillar_1.11.0             grid_4.5.1               
## [115] vctrs_0.6.5               RANN_2.6.2               
## [117] promises_1.3.3            dbplyr_2.5.0             
## [119] beachmat_2.24.0           xtable_1.8-4             
## [121] cluster_2.1.8.1           beeswarm_0.4.0           
## [123] evaluate_1.0.4            cli_3.6.5                
## [125] compiler_4.5.1            rlang_1.1.6              
## [127] crayon_1.5.3              future.apply_1.20.0      
## [129] labeling_0.4.3            ggbeeswarm_0.7.2         
## [131] plyr_1.8.9                stringi_1.8.7            
## [133] alabaster.se_1.8.0        viridisLite_0.4.2        
## [135] deldir_2.0-4              BiocParallel_1.42.1      
## [137] Biostrings_2.76.0         lazyeval_0.2.2           
## [139] spatstat.geom_3.5-0       Matrix_1.7-3             
## [141] ExperimentHub_2.16.1      RcppHNSW_0.6.0           
## [143] sparseMatrixStats_1.20.0  bit64_4.6.0-1            
## [145] Rhdf5lib_1.30.0           KEGGREST_1.48.1          
## [147] shiny_1.11.1              alabaster.ranges_1.8.0   
## [149] AnnotationHub_3.16.1      ROCR_1.0-11              
## [151] igraph_2.1.4              memoise_2.0.1            
## [153] bslib_0.9.0               bit_4.6.0