Cell Line L1 Analysis
1. load libraries
2. Load Seurat Object
#Load Seurat Object L1
load("../Documents/1-SS-STeps/4-Analysis_and_Robj_Marie/analyse juillet 2023/ObjetsR/L1.Robj")
L1
## An object of class Seurat
## 36629 features across 5825 samples within 2 assays
## Active assay: RNA (36601 features, 0 variable features)
## 2 layers present: counts, data
## 1 other assay present: ADT
3. QC
# Set identity classes to an existing column in meta data
Idents(object = L1) <- "cell_line"
L1[["percent.rb"]] <- PercentageFeatureSet(L1, pattern = "^RP[SL]")
VlnPlot(L1, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mito",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
##FeatureScatter is typically used to visualize feature-feature
relationships ##for anything calculated by the object, ##i.e. columns in
object metadata, PC scores etc.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Assign
Cell-Cycle Scores
## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 17646 by 5825
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## Found 99 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 17646 genes
## Computing corrected count matrix for 17646 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 22.25219 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Set default assay to SCT
## Warning: The following features are not present in the object: MLF1IP, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: FAM64A, HN1, not
## searching for symbol synonyms
4. Normalize data
# Apply SCTransform
L1 <- SCTransform(L1, vars.to.regress = c("percent.rb","percent.mito", "CC.Difference"),
do.scale=TRUE,
do.center=TRUE,
verbose = TRUE)
## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 17646 by 5825
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## Found 99 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 17646 genes
## Computing corrected count matrix for 17646 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 18.47038 secs
## Determine variable features
## Regressing out percent.rb, percent.mito, CC.Difference
## Centering and scaling data matrix
## Place corrected count matrix in counts slot
## Set default assay to SCT
5. Perform PCA
Variables_genes <- L1@assays$SCT@var.features
# Exclude genes starting with "HLA-" AND "Xist" AND "TRBV, TRAV"
Variables_genes_after_exclusion <- Variables_genes[!grepl("^HLA-|^XIST|^TRBV|^TRAV", Variables_genes)]
# These are now standard steps in the Seurat workflow for visualization and clustering
L1 <- RunPCA(L1,
features = Variables_genes_after_exclusion,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 15,
npcs = 50)
## PC_ 1
## Positive: TUBA1B, H2AFZ, HMGB1, STMN1, DUT, TUBB, HMGB2, MCM7, HMGN2, HIST1H4C
## UBE2C, TYMS, PCLAF, PTMA, TUBB4B, TK1, DEK, RRM2, MCM3, CDT1
## CKS2, PKMYT1, SIVA1, TOP2A, DNMT1, CENPM, UBE2S, ALYREF, RANBP1, HIST1H1A
## Negative: CCND2, LTB, RNF213, BTG2, CD44, SLFN5, CD48, NFATC3, CTSC, DDIT4
## GAS5, CSF1, INPP4B, SEMA4A, CD74, CALD1, IKZF3, TGFBR3, BTG1, ZYX
## KDM5B, EGLN3, DLEU1, PNRC1, PTGIS, PTPRC, VIM, KDM7A, IL4, RALGAPA2
## PC_ 2
## Positive: ACTB, TMSB10, PFN1, NME1, MIF, SRRT, CLIC1, ATP5MC3, EIF4A1, PRDX1
## MRPL4, CFL1, PRELID1, HSP90AB1, PPIB, ATP5MF, POMP, TIMM13, LDHA, SSBP1
## NDUFS5, MRPL16, PARK7, BCAP31, PSMB3, CORO1A, PSMA7, PSME2, ACTG1, GADD45GIP1
## Negative: MALAT1, ASPM, CENPF, MKI67, TOP2A, HIST1H1E, HIST1H1B, TUBB, PCLAF, NUSAP1
## CAMK4, RRM2, TYMS, ATAD2, MBNL1, DHFR, SYNE2, RCSD1, HIST1H1C, STMN1
## RABGAP1L, PDE3B, KIF11, IKZF2, PTPRC, MAL, NEIL3, NEK7, NCAPG2, BRCA1
## PC_ 3
## Positive: RPS28, RPL32, RPL29, RPL35A, RPS15, RPL36, RPL14, GZMA, CEBPD, HSP90AB1
## LSR, SLCO3A1, ADGRE5, OAZ1, FXYD5, EEF2, AC002069.2, BSG, RPSA, MLLT3
## FAM107B, AC069410.1, PDGFD, LEF1, P2RY14, FOXP1, SPINT2, H1FX, PRDX2, TRBC2
## Negative: GAPDH, HPGDS, PKM, NKG7, KRT1, FABP5, LY6E, S100A4, NPDC1, EEF1A1
## VIM, CD48, NQO1, S100P, C12orf75, CFH, FSCN1, SLC25A5, LMNA, SH3BGRL3
## SH2D2A, CYP1B1, PRDX1, SIX3, ALOX5AP, ACTG1, P4HA2, TMSB4X, BACE2, TPST2
## PC_ 4
## Positive: RPL10, MRPL16, ID3, SEPTIN11, HTATSF1, LAGE3, TCN1, GIMAP7, BCAP31, RIPOR2
## PON2, ARHGEF6, FYB1, NRXN3, KLF2, LINC00892, TMSB10, AIFM1, STK26, DKC1
## TCF7, DSC1, ZP3, CD81, MKRN1, TAFA2, IDH3G, COA1, COBL, IKZF2
## Negative: RPL14, RPL32, RPL29, IL32, RPS27, CYBA, CTSC, CISH, RPSA, CDKN1A
## RHOC, RPL35A, CACYBP, ACTB, GAS5, IFITM1, ATP1B1, SOCS1, FTH1, FXYD5
## CXCR3, CEBPD, LAT, S100A11, H3F3A, LTB, GZMM, ITGB7, PIM1, B2M
## PC_ 5
## Positive: KLF2, HNRNPA1, S100A10, IMPDH2, GIMAP7, NUCKS1, SH3BP5, MAL, MT-CYB, TLE5
## VIM, ETS1, S1PR4, SELPLG, KLF3, CD55, IL7R, PTMA, RPSA, HDGF
## ITGB1, CCR7, ATP5MC2, NOSIP, HSD17B11, CDKN2B, CACYBP, FGFBP2, HMGB2, RPL28
## Negative: HIST1H1A, SKAP1, HIST1H1B, TUBA4A, HIST1H1E, HIST1H2AH, HIST1H4C, HIST1H1D, HIST1H1C, SRRT
## CTSC, HIST1H3B, MRPL16, TUBB, SSR4, EIF4G3, ARHGAP4, RPL10, HIST1H3D, CD96
## SRM, MRPL41, HIST2H2AC, PTPN7, HIST1H3C, HIST1H4F, HIST1H2AL, SEPTIN11, TMC8, RALGAPA2
# Perform
PCA TEST
library(ggplot2)
library(RColorBrewer)
# Assuming you have 10 different cell lines, generating a color palette with 10 colors
cell_line_colors <- brewer.pal(10, "Set3")
# Assuming L1$cell_line is a factor or character vector containing cell line names
data <- as.data.frame(table(L1$cell_line))
colnames(data) <- c("cell_line", "nUMI") # Change column name to nUMI
ncells <- ggplot(data, aes(x = cell_line, y = nUMI, fill = cell_line)) +
geom_col() +
theme_classic() +
geom_text(aes(label = nUMI),
position = position_dodge(width = 0.9),
vjust = -0.25) +
scale_fill_manual(values = cell_line_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) + # Adjust the title position
ggtitle("Filtered cells per sample") +
xlab("Cell lines") + # Adjust x-axis label
ylab("Frequency") # Adjust y-axis label
print(ncells)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- L1[["pca"]]@stdev / sum(L1[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
## [1] 15
# TEST-2
# get significant PCs
stdv <- L1[["pca"]]@stdev
sum.stdv <- sum(L1[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
## [1] 15
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
6. Clustering
L1 <- FindNeighbors(L1,
dims = 1:min.pc,
verbose = FALSE)
# understanding resolution
L1 <- FindClusters(L1,
resolution = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
0.7,0.8, 0.9, 1, 1.1, 1.2))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9215
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8916
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8701
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8494
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8287
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8146
## Number of communities: 9
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8022
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7907
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7793
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7681
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7579
## Number of communities: 13
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5825
## Number of edges: 200767
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7486
## Number of communities: 14
## Elapsed time: 0 seconds
# non-linear dimensionality reduction --------------
L1 <- RunUMAP(L1,
dims = 1:min.pc,
verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
# note that you can set `label = TRUE` or use the Label Clusters function to help label
# individual clusters
DimPlot(L1,group.by = "cell_line",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.3",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.4",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.5",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.6",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.7",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.8",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.0.9",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.1.1",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1,
group.by = "SCT_snn_res.1.2",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
7. Azimuth Annotation
## Warning: The following packages are already installed and will not be
## reinstalled: pbmcref
# The RunAzimuth function can take a Seurat object as input
L1 <- RunAzimuth(L1, reference = "pbmcref")
## Warning: Overwriting miscellanous data for model
## Warning: Adding a dimensional reduction (refUMAP) without the associated assay
## being present
## Warning: Adding a dimensional reduction (refUMAP) without the associated assay
## being present
## detected inputs from HUMAN with id type Gene.name
## reference rownames detected HUMAN with id type Gene.name
## Normalizing query using reference SCT model
## Warning: 113 features of the features specified were not present in both the reference query assays.
## Continuing with remaining 4887 features.
## Projecting cell embeddings
## Finding query neighbors
## Finding neighborhoods
## Finding anchors
## Found 489 anchors
## Finding integration vectors
## Finding integration vector weights
## Predicting cell labels
## Predicting cell labels
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Predicting cell labels
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
##
## Integrating dataset 2 with reference dataset
## Finding integration vectors
## Integrating data
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from integrated_dr_ to integrateddr_
## Computing nearest neighbors
## Running UMAP projection
## Warning in RunUMAP.default(object = neighborlist, reduction.model =
## reduction.model, : Number of neighbors between query and reference is not equal
## to the number of neighbors within reference
## 15:25:49 Read 5825 rows
## 15:25:49 Processing block 1 of 1
## 15:25:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 15:25:49 Initializing by weighted average of neighbor coordinates using 1 thread
## 15:25:49 Commencing optimization for 67 epochs, with 116500 positive edges
## 15:25:49 Finished
## Warning: No assay specified, setting assay as RNA by default.
## Projecting reference PCA onto query
## Finding integration vector weights
## Projecting back the query cells into original PCA space
## Finding integration vector weights
## Computing scores:
## Finding neighbors of original query cells
## Finding neighbors of transformed query cells
## Computing query SNN
## Determining bandwidth and computing transition probabilities
## Total elapsed time: 2.19832563400269
UMAPPlot(L1, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
UMAPPlot(L1, group.by = "predicted.celltype.l2",
reduction = "umap",
label.size = 3,
repel = T,
label = T)
8. Cell type annotation using ProjectTils
#Load reference atlas and query data
ref <- readRDS(file = "CD4T_human_ref_v1.rds")
#Run Projection algorithm
query.projected <- Run.ProjecTILs(L1, ref = ref)
## | | | 0%[1] "Using assay SCT for query"
## Pre-filtering cells with scGate...
##
## ### Detected a total of 5396 pure 'Target' cells (92.64% of total)
## [1] "429 out of 5825 ( 7% ) non-pure cells removed. Use filter.cells=FALSE to avoid pre-filtering"
## [1] "Aligning query to reference map for batch-correction..."
## Warning: Layer counts isn't present in the assay object[[assay]]; returning
## NULL
## Preparing PCA embeddings for objects...
## Warning: Number of dimensions changing from 50 to 20
##
## Projecting corrected query onto Reference PCA space
##
## Projecting corrected query onto Reference UMAP space
## Warning: Not all features provided are in this Assay object, removing the
## following feature(s): CXCL13, GNLY, TNFRSF4, CD177, CCL22, LAG3, CGA, G0S2,
## IL17A, CCL4L2, TRDC, IL21, TNFRSF18, FOXP3, HLA-DQA1, KRT86, CAV1, IL10, CCL3,
## IL1R2, ACTG2, CCR8, EBI3, DIRAS3, KLRD1, THBS1, ZBED2, CAVIN3, GNG8, MS4A6A,
## TYROBP, PROK2, BEX3, BASP1, GLUL, PVALB, HES4, MRC1, CTLA4, TASL, IL1RN,
## LHFPL6, GBP5, MYADM, H1-4, KLF4, CTSH, H2AZ1, TENT5A, CD9, MSC, RCAN2, GPR25,
## CCR6, TNS3, ELAPOR1, NR4A3, IGFBP3, CCL3L3, PRRG4, POLR1F, SDC4, FCRL3, GPR55,
## PLK2, CRLF2, AHSP, FAIM2, CSF2RB, CDCA7L, HTRA1, H1-2, H1-0, ETV7, FLT1, LMCD1,
## WARS1, H1-3, ASCL2, CLNK, H2BC11, ADRB2, GPX1, STAC, H2AC6, IRAG2, GNA15, CD80,
## PLAAT3, H1-10, MYO7A, DAPK2, H3C10, NAP1L2, IL22, CHGB, KRT81, SLC28A3, PDLIM4,
## ZNF683, ECEL1, HES1, HSD11B1, CPE, CD40, CA2, NT5E, CD86, H4C3, RORC, CXXC5,
## NEBL, GSTM2
## | |======================================================================| 100%
## Creating slots functional.cluster and functional.cluster.conf in query object
#Plot the predicted composition of the query in terms of reference T cell subtypes
plot.statepred.composition(ref, query.projected, metric = "Percent")
## | | | 0%[1] "Using assay SCT for query"
## Pre-filtering cells with scGate...
##
## ### Detected a total of 5396 pure 'Target' cells (92.64% of total)
## [1] "429 out of 5825 ( 7% ) non-pure cells removed. Use filter.cells=FALSE to avoid pre-filtering"
## [1] "Aligning query to reference map for batch-correction..."
## Warning: Layer counts isn't present in the assay object[[assay]]; returning
## NULL
## Preparing PCA embeddings for objects...
## Warning: Number of dimensions changing from 50 to 20
##
## Projecting corrected query onto Reference PCA space
##
## Projecting corrected query onto Reference UMAP space
## Warning: Not all features provided are in this Assay object, removing the
## following feature(s): CXCL13, GNLY, TNFRSF4, CD177, CCL22, LAG3, CGA, G0S2,
## IL17A, CCL4L2, TRDC, IL21, TNFRSF18, FOXP3, HLA-DQA1, KRT86, CAV1, IL10, CCL3,
## IL1R2, ACTG2, CCR8, EBI3, DIRAS3, KLRD1, THBS1, ZBED2, CAVIN3, GNG8, MS4A6A,
## TYROBP, PROK2, BEX3, BASP1, GLUL, PVALB, HES4, MRC1, CTLA4, TASL, IL1RN,
## LHFPL6, GBP5, MYADM, H1-4, KLF4, CTSH, H2AZ1, TENT5A, CD9, MSC, RCAN2, GPR25,
## CCR6, TNS3, ELAPOR1, NR4A3, IGFBP3, CCL3L3, PRRG4, POLR1F, SDC4, FCRL3, GPR55,
## PLK2, CRLF2, AHSP, FAIM2, CSF2RB, CDCA7L, HTRA1, H1-2, H1-0, ETV7, FLT1, LMCD1,
## WARS1, H1-3, ASCL2, CLNK, H2BC11, ADRB2, GPX1, STAC, H2AC6, IRAG2, GNA15, CD80,
## PLAAT3, H1-10, MYO7A, DAPK2, H3C10, NAP1L2, IL22, CHGB, KRT81, SLC28A3, PDLIM4,
## ZNF683, ECEL1, HES1, HSD11B1, CPE, CD40, CA2, NT5E, CD86, H4C3, RORC, CXXC5,
## NEBL, GSTM2
## | |======================================================================| 100%
## Creating slots functional.cluster and functional.cluster.conf in query object
UMAPPlot(L1, group.by = "functional.cluster",
reduction = "umap",
label.size = 3,
repel = T,
label = T)
9.Cell type annotation using SingleR
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
#convert our Seurat object to single cell experiment (SCE)
sce <- as.SingleCellExperiment(DietSeurat(L1))
#using SingleR
monaco.main <- SingleR(test = sce,assay.type.test = 1,ref = monaco.ref,labels = monaco.ref$label.main)
monaco.fine <- SingleR(test = sce,assay.type.test = 1,ref = monaco.ref,labels = monaco.ref$label.fine)
hpca.main <- SingleR(test = sce,assay.type.test = 1,ref = hpca.ref,labels = hpca.ref$label.main)
hpca.fine <- SingleR(test = sce,assay.type.test = 1,ref = hpca.ref,labels = hpca.ref$label.fine)
dice.main <- SingleR(test = sce,assay.type.test = 1,ref = dice.ref,labels = dice.ref$label.main)
dice.fine <- SingleR(test = sce,assay.type.test = 1,ref = dice.ref,labels = dice.ref$label.fine)
bpe.main <- SingleR(test = sce,assay.type.test = 1,ref = bpe.ref,labels = bpe.ref$label.main)
bpe.fine <- SingleR(test = sce,assay.type.test = 1,ref = bpe.ref,labels = bpe.ref$label.fine)
#summary of general cell type annotations
#table(monaco.main$pruned.labels)
#table(hpca.main$pruned.labels)
#table(dice.main$pruned.labels)
#table(bpe.main$pruned.labels)
#The finer cell types annotations are you after, the harder they are to get reliably.
#This is where comparing many databases, as well as using individual markers from literature,
#would all be very valuable.
#table(monaco.fine$pruned.labels)
#table(hpca.fine$pruned.labels)
#table(dice.fine$pruned.labels)
#table(bpe.fine$pruned.labels)
#add the annotations to the Seurat object metadata
L1@meta.data$monaco.main <- monaco.main$pruned.labels
L1@meta.data$monaco.fine <- monaco.fine$pruned.labels
#
L1@meta.data$hpca.main <- hpca.main$pruned.labels
L1@meta.data$hpca.fine <- hpca.fine$pruned.labels
#
L1@meta.data$dice.main <- dice.main$pruned.labels
L1@meta.data$dice.fine <- dice.fine$pruned.labels
#
L1@meta.data$bpe.main <- bpe.main$pruned.labels
L1@meta.data$bpe.fine <- bpe.fine$pruned.labels
# Monaco Annotations
DimPlot(L1, group.by = "monaco.main",
reduction = "umap",
label.size = 3,
repel = T,
label = F)
DimPlot(L1, group.by = "monaco.main",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1, group.by = "monaco.fine",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# HPCA Annotations
DimPlot(L1, group.by = "hpca.main",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1, group.by = "hpca.fine",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# DICE Annotations
DimPlot(L1, group.by = "dice.main",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1, group.by = "dice.fine",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
# BPE Annotations
DimPlot(L1, group.by = "bpe.main",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
DimPlot(L1, group.by = "bpe.fine",
reduction = "umap",
label.size = 3,
repel = T,
label = T, label.box = T)
10. clusTree
## Loading required package: ggraph
##
## Attaching package: 'ggraph'
## The following object is masked from 'package:sp':
##
## geometry