ELeFHAnt
library(ELeFHAnt)
#> Loading required package: tidymodels
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.2.0 ──
#> ✔ broom 0.8.0 ✔ recipes 0.2.0
#> ✔ dials 1.0.0 ✔ rsample 0.1.1
#> ✔ dplyr 1.0.9 ✔ tibble 3.1.7
#> ✔ ggplot2 3.3.6 ✔ tidyr 1.2.0
#> ✔ infer 1.0.2 ✔ tune 0.2.0
#> ✔ modeldata 0.1.1 ✔ workflows 0.2.6
#> ✔ parsnip 1.0.0 ✔ workflowsets 0.2.1
#> ✔ purrr 0.3.4 ✔ yardstick 1.0.0
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> ✖ purrr::discard() masks scales::discard()
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ✖ recipes::step() masks stats::step()
#> • Use suppressPackageStartupMessages() to eliminate package startup messages
#> Loading required package: Seurat
#> Attaching SeuratObject
#> Attaching sp
#> Loading required package: class
#> Loading required package: splitstackshape
#> Loading required package: fgsea
#> Loading required package: msigdbr
#> Loading required package: scPred
#> Loading required package: magrittr
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:tidyr':
#>
#> extract
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> Loading required package: harmony
#> Loading required package: Rcpp
#>
#> Attaching package: 'Rcpp'
#> The following object is masked from 'package:rsample':
#>
#> populate
#> Loading required package: scater
#> Loading required package: SingleCellExperiment
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: 'matrixStats'
#> The following object is masked from 'package:dplyr':
#>
#> count
#>
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#>
#> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#> clusterExport, clusterMap, parApply, parCapply, parLapply,
#> parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:dplyr':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#> union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:tidyr':
#>
#> expand
#> The following objects are masked from 'package:dplyr':
#>
#> first, rename
#> The following object is masked from 'package:base':
#>
#> expand.grid
#> Loading required package: IRanges
#>
#> Attaching package: 'IRanges'
#> The following object is masked from 'package:sp':
#>
#> %over%
#> The following object is masked from 'package:purrr':
#>
#> reduce
#> The following objects are masked from 'package:dplyr':
#>
#> collapse, desc, slice
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#>
#> rowMedians
#> The following objects are masked from 'package:matrixStats':
#>
#> anyMissing, rowMedians
#>
#> Attaching package: 'SummarizedExperiment'
#> The following object is masked from 'package:SeuratObject':
#>
#> Assays
#> The following object is masked from 'package:Seurat':
#>
#> Assays
#>
#> Attaching package: 'scater'
#> The following object is masked from 'package:rsample':
#>
#> bootstraps
#> Loading required package: hrbrthemes
#> NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
#> Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
#> if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
#> Loading required package: ranger
#> Loading required package: LiblineaR
#> Loading required package: caTools
#>
#> Attaching package: 'caTools'
#> The following object is masked from 'package:IRanges':
#>
#> runmean
#> The following object is masked from 'package:S4Vectors':
#>
#> runmean
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'ELeFHAnt'
data("reference_PBMC")
data("query_PBMC")
reference = reference_PBMC
query = query_PBMC
query = NormalizeData(query)
query = FindVariableFeatures(query)
query = ScaleData(query)
#> Centering and scaling data matrix
query = RunPCA(query)
#> PC_ 1
#> Positive: CST3, AIF1, LST1, FTL, FTH1, TYMP, TYROBP, CFD, FCER1G, SERPINA1
#> FCN1, LYZ, CTSS, IFITM3, S100A9, LGALS1, COTL1, PSAP, IFI30, S100A11
#> NPC2, CFP, SAT1, RP11-290F20.3, S100A8, PYCARD, S100A6, PILRA, LGALS2, CEBPB
#> Negative: IL32, LTB, CD3E, LDHB, CTSW, GZMM, CD2, IL7R, CCL5, CD247
#> ACAP1, CST7, GZMA, STK17A, NKG7, CD27, PRF1, HOPX, GIMAP5, NOSIP
#> AQP3, GZMK, NCR3, FGFBP2, LYAR, KLRG1, SAMD3, CD8B, ETS1, GZMB
#> PC_ 2
#> Positive: PF4, SDPR, GNG11, PPBP, SPARC, GP9, TUBB1, HIST1H2AC, CLU, AP001189.4
#> PTCRA, ITGA2B, NRGN, RGS18, CD9, TMEM40, MMD, CA2, ACRBP, TREML1
#> F13A1, SEPT5, TSC22D1, PTGS1, CMTM5, LY6G6F, GP1BA, RP11-367G6.3, MYL9, RUFY1
#> Negative: RPS2, TMSB10, CYBA, NKG7, S100A4, GZMA, CST7, PRF1, CTSW, GNLY
#> FGFBP2, CD247, EIF4A1, GZMB, GZMM, ID2, IFITM2, GZMH, SPON2, ANXA1
#> CCL4, FCGR3A, PFN1, APOBEC3G, RBM3, S100A10, GIMAP7, IGFBP7, HOPX, CLIC3
#> PC_ 3
#> Positive: NKG7, PRF1, GZMB, CST7, GZMA, FGFBP2, GNLY, CTSW, SPON2, CD247
#> GZMH, GZMM, CCL5, CCL4, FCGR3A, SRGN, CLIC3, AKR1C3, XCL2, PFN1
#> ACTB, IGFBP7, TTC38, HOPX, APMAP, SH3BGRL3, RHOC, ID2, ARPC5L, ANXA1
#> Negative: CD79A, MS4A1, HLA-DRA, HLA-DQB1, TCL1A, HLA-DQA1, RPS2, HLA-DRB1, CD74, CD79B
#> LTB, HLA-DPB1, HLA-DMA, HLA-DRB5, HLA-DPA1, HLA-DQA2, FCER2, LY86, HVCN1, SNHG7
#> KIAA0125, P2RX5, IRF8, CD19, QRSL1, SWAP70, IGLL5, FCGR2B, C6orf48, POU2AF1
#> PC_ 4
#> Positive: S100A4, S100A8, TMSB4X, S100A6, S100A9, CD14, GIMAP7, FCN1, IL32, RBP7
#> LGALS2, S100A11, CD3E, TYROBP, ANXA1, LYZ, S100A12, IL7R, MS4A6A, GZMM
#> GIMAP4, FTL, CFD, LGALS1, S100A10, NOSIP, CD2, AIF1, FYB, TIMP1
#> Negative: HLA-DQA1, KIAA0101, TYMS, CD79A, HLA-DQB1, RRM2, TK1, CD74, CD79B, GINS2
#> MS4A1, HLA-DQA2, MKI67, HLA-DPB1, ZWINT, HLA-DRA, MYBL2, HLA-DRB1, BIRC5, HLA-DPA1
#> HLA-DRB5, KIFC1, TCL1A, CLSPN, HLA-DMA, CENPM, MZB1, AURKB, STMN1, NUSAP1
#> PC_ 5
#> Positive: LDHB, VIM, IL7R, CD3E, IL32, AQP3, NOSIP, CD27, RPS2, CD2
#> FYB, GIMAP7, CD40LG, RRM2, KIAA0101, S100A10, LTB, TYMS, GIMAP4, TK1
#> ZWINT, MKI67, PPA1, LDLRAP1, GIMAP5, BIRC5, GINS2, GAPDH, TRADD, COTL1
#> Negative: GZMB, FGFBP2, CD79B, CD79A, GNLY, TCL1A, SPON2, PRF1, MS4A1, CD74
#> HLA-DQA1, NKG7, CCL4, HLA-DQB1, HLA-DPB1, CLIC3, HLA-DPA1, HLA-DRA, CST7, HLA-DRB1
#> IGFBP7, PLAC8, TTC38, AKR1C3, GZMA, FCGR3A, XCL2, HLA-DRB5, FCER2, APMAP
query = RunUMAP(query, dims = 1:20)
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
#> 19:30:59 UMAP embedding parameters a = 0.9922 b = 1.112
#> 19:30:59 Read 1358 rows and found 20 numeric columns
#> 19:30:59 Using Annoy for neighbor search, n_neighbors = 30
#> 19:30:59 Building Annoy index with metric = cosine, n_trees = 50
#> 0% 10 20 30 40 50 60 70 80 90 100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 19:30:59 Writing NN index file to temp file /var/folders/bw/whg3swn15jb08_f7v2y09xw9glk1wg/T//Rtmpsj5cKz/file1414515154c55
#> 19:30:59 Searching Annoy index using 1 thread, search_k = 3000
#> 19:30:59 Annoy recall = 100%
#> 19:31:00 Commencing smooth kNN distance calibration using 1 thread
#> 19:31:01 Initializing from normalized Laplacian + noise
#> 19:31:01 Commencing optimization for 500 epochs, with 54848 positive edges
#> 19:31:03 Optimization finished
###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
out.CelltypeAnnotation = CelltypeAnnotation(reference = reference, query = query, downsample = FALSE, classification.method = "Ensemble", validatePredictions = FALSE)
#> Setting Assay of reference and query to RNA
#> Running Diagonistis on reference and query
#> Number of cells in reference:2019
#> Number of cells in query:1358
#> Calculating ratio of number of cells in reference vs query
#> Ratio of number of cells in query vs reference:0.672610203070827
#> Merging reference and query
#> Normalization, Variable Feature Selection and scaling
#> Centering and scaling data matrix
#> Number of Features selected:2000
#> Generating train and test sets
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning
#> Training & Classifying using randomForest classifier
#> Added Predicted celltypes using randomForest to query
#> Setting up SVM classifier learning
#> Training & Classifying using SVM classifier
#> Added Predicted celltypes using SVM to query
#> randomForest and SVM based learning and predictions completed. Using predictions from RF and SVM to make Ensemble Predictions
#> Calculating weights for randomForest classifier
#> Accuracy estimate of randomForest classifier:67.2070211702965
#> Calculating weights for SVM classifier
#> Accuracy estimate of SVM classifier:62.6052501238237
#> Added Predicted celltypes using Ensemble learning to query
#> Ensembl celltype annotation completed.
query$Celltypes = out.CelltypeAnnotation$PredictedCelltype_UsingEnsemble
p1 = DimPlot(out.CelltypeAnnotation, group.by = "seurat_clusters", label = T, reduction = "umap", label.size = 6, repel = T) + NoLegend()
p2 = DimPlot(out.CelltypeAnnotation, group.by = "PredictedCelltype_UsingEnsemble", label = T, reduction = "umap", label.size = 6, repel = T) + NoLegend()
p1+p2

###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
out.LabelHarmonization = LabelHarmonization(seurat.objects = c(reference, query), perform_integration = TRUE, downsample = FALSE, classification.method = "Ensemble", validatePredictions = FALSE)
#> Starting integration using Seurat
#> Computing 2000 integration features
#> Scaling features for provided objects
#> Finding all pairwise anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#> Found 4756 anchors
#> Filtering anchors
#> Retained 3607 anchors
#> Merging dataset 2 into 1
#> Extracting anchors for merged samples
#> Finding integration vectors
#> Finding integration vector weights
#> Integrating data
#> Integration Completed. Performing Scaling, Dimension reduction and clustering
#> 19:32:21 UMAP embedding parameters a = 0.9922 b = 1.112
#> 19:32:21 Read 3377 rows and found 30 numeric columns
#> 19:32:21 Using Annoy for neighbor search, n_neighbors = 30
#> 19:32:21 Building Annoy index with metric = cosine, n_trees = 50
#> 0% 10 20 30 40 50 60 70 80 90 100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 19:32:21 Writing NN index file to temp file /var/folders/bw/whg3swn15jb08_f7v2y09xw9glk1wg/T//Rtmpsj5cKz/file141452f080a38
#> 19:32:21 Searching Annoy index using 1 thread, search_k = 3000
#> 19:32:22 Annoy recall = 100%
#> 19:32:22 Commencing smooth kNN distance calibration using 1 thread
#> 19:32:23 Initializing from normalized Laplacian + noise
#> 19:32:24 Commencing optimization for 500 epochs, with 143682 positive edges
#> 19:32:29 Optimization finished
#> Computing nearest neighbor graph
#> Computing SNN
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#>
#> Number of nodes: 3377
#> Number of edges: 170045
#>
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8618
#> Number of communities: 13
#> Elapsed time: 0 seconds
#> Number of cells in integrated atlas:3377
#> Generating train and test datasets using stratification -- 60% for training & 40% for testing
#> Number of Features selected:2000
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning
#> Training & Classifying using randomForest classifier
#> Predicting using trained randomForest classifier
#> Calculating weights for randomForest classifier
#> Accuracy estimate of randomForest classifier:73.140903586674
#> Assigning weights to randomForest predictions
#> Added Harmonized Labels using randomForest to integrated object
#> Setting up SVM classifier learning
#> Training & Classification using SVM classifier
#> Predicting using trained SVM classifier
#> Calculating weights for each SVM classifier
#> Accuracy estimate of SVM classifier:69.3484698914116
#> Assigning weights to SVM predictions
#> Added harmonized labels using SVM to integrated object
#> randomForest and SVM based learning and harmonization completed. Using predictions from all models for Ensemble harmonization
#> Added Harmonized labels using Ensemble learning to query
#> Ensembl harmonization completed.
p1 = DimPlot(out.LabelHarmonization, group.by = "Celltypes", label = T, label.size = 6, repel = T) + NoLegend()
p2 = DimPlot(out.LabelHarmonization, group.by = "HarmonizedLabels_UsingEnsemble", label = T, label.size = 6, repel = T) + NoLegend()
p1+p2

###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
query_DR = query
query_DR$Celltypes = query$seurat_clusters
out.DR = DeduceRelationship(reference1 = reference, reference2 = query_DR, downsample = FALSE, classification.method = "Ensemble")
#> Setting Assay of reference1 and reference2 to RNA
#> Number of cells in reference1:2019
#> Number of cells in reference2:1358
#> Merging reference1 and reference2
#> Normalization, Variable Feature Selection and scaling
#> Centering and scaling data matrix
#> Number of Features selected:2000
#> Generating train and test sets
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning.
#> Training & Classifying using randomForest classifier
#> Predicting using trained randomForest classifier
#> Calculating weight for randomForest classifier
#> Accuracy estimate of randomForest classifier:67.4060752158266
#> Assigning weights to randomForest predictions
#> Generating confusion matrix and heatmap
#> Setting up SVM classifier learning.
#> Training & Classification using SVM classifier
#> Predicting using trained SVM classifier
#> Calculating weight for SVM classifier
#> Accuracy estimate of SVM classifier:63.2986627043091
#> Assigning weights to SVM predictions
#> Generating confusion matrix and heatmap
#> randomForest and SVM based learning and relationship inference completed. Using predictions from all models to make Ensemble Predictions
#> Generating confusion matrix and heatmap
#> Ensemble based learning and relationship inference completed
out.DR

###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
out.Benchmark = BenchmarkELeFHAnt(reference = reference, query = query, downsample = FALSE)
#>
#> Deploying ELeFHAnt: classification.method == Ensemble | classification.approach == ClassifyCells
#> Setting Assay of reference and query to RNA
#> Running Diagonistis on reference and query
#> Number of cells in reference:2019
#> Number of cells in query:1358
#> Calculating ratio of number of cells in reference vs query
#> Ratio of number of cells in query vs reference:0.672610203070827
#> Merging reference and query
#> Normalization, Variable Feature Selection and scaling
#> Centering and scaling data matrix
#> Number of Features selected:2000
#> Generating train and test sets
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning
#> Training & Classifying using randomForest classifier
#> Added Predicted celltypes using randomForest to query
#> Setting up SVM classifier learning
#> Training & Classifying using SVM classifier
#> Added Predicted celltypes using SVM to query
#> randomForest and SVM based learning and predictions completed. Using predictions from RF and SVM to make Ensemble Predictions
#> Calculating weights for randomForest classifier
#> Accuracy estimate of randomForest classifier:67.3256147893947
#> Calculating weights for SVM classifier
#> Accuracy estimate of SVM classifier:63.8434868746904
#> Added Predicted celltypes using Ensemble learning to query
#> Ensembl celltype annotation completed.
#>
#> Deploying Seurat Label Transfer
#> Performing PCA on the provided reference using 1263 features as input.
#> Projecting cell embeddings
#> Finding neighborhoods
#> Finding anchors
#> Found 2199 anchors
#> Filtering anchors
#> Retained 1966 anchors
#> Finding integration vectors
#> Finding integration vector weights
#> Predicting cell labels
#>
#> Deploying scPred
#> Warning in PrepDR(object = object, features = features, verbose = verbose): The
#> following 3 features requested have zero variance (running reduction without
#> them): AANAT, RP11-316P17.2, RP11-109E24.1
#> PC_ 1
#> Positive: PPBP, GNG11, PF4, SDPR, CLU, NRGN, GP9, TUBB1, SPARC, CMTM5
#> TREML1, HIST1H2AC, ACRBP, ITGA2B, NCOA4, RUFY1, AP001189.4, MYL9, PTCRA, SNCA
#> MPP1, CD9, MAP3K7CL, CLDN5, AC147651.3, RGS18, PARVB, TSC22D1, TMEM40, CLEC1B
#> Negative: RPS2, RPL13, RPS6, TMSB10, RPLP1, JUNB, LTB, CYBA, IER2, S100A6
#> S100A4, IL32, JUN, VIM, CD7, S100A10, IFITM2, DUSP1, PFN1, MT-CO1
#> IL7R, ANXA1, PPIB, NKG7, FOS, GZMA, CD74, ID2, XBP1, CCR7
#> PC_ 2
#> Positive: IL32, RPS6, RPL13, LTB, CD7, CCL5, IL7R, CTSW, GZMA, CST7
#> CLU, PF4, JUN, SDPR, PPBP, TUBA4A, SPARC, TUBB1, TREML1, GP9
#> CMTM5, GRAP2, NKG7, CCR7, GNLY, AQP3, CD8B, HOPX, HIST1H2AC, GZMK
#> Negative: SPI1, CST3, SERPINA1, LYZ, LST1, HCK, RP11-290F20.3, CD68, AIF1, IFI30
#> CFD, PILRA, FCN1, MS4A7, CFP, LRRC25, TMEM176B, TYMP, FCER1G, HMOX1
#> C1QA, CPVL, HLA-DRA, HLA-DRB1, TYROBP, HLA-DPA1, IGSF6, NCF2, VMO1, HLA-DRB5
#> PC_ 3
#> Positive: FCGR3A, RP11-290F20.3, TMSB4X, SERPINA1, VMO1, MS4A7, IL32, CDKN1C, IFITM2, S100A4
#> PILRA, CFD, CD7, NKG7, GNLY, HMOX1, HES4, IFITM3, C1QA, GZMA
#> TMEM176B, CCL5, CTSW, GPBAR1, CST7, CD68, HCK, LILRA3, CTD-2006K23.1, RHOC
#> Negative: CD79A, HLA-DRA, HLA-DQA1, MS4A1, HLA-DQA2, TCL1A, HLA-DMB, LINC00926, HLA-DRB1, CD74
#> HLA-DMA, HLA-DPB1, FCER2, CD79B, HLA-DPA1, HLA-DRB5, HLA-DQB1, BANK1, IRF8, SPIB
#> PNOC, KIAA0125, HVCN1, VPREB3, FCGR2B, CD1C, HLA-DOB, MEF2C, FCER1A, PKIG
#> PC_ 4
#> Positive: CD79A, CD79B, MS4A1, MS4A7, LINC00926, FCER2, RP11-290F20.3, VMO1, TCL1A, BANK1
#> HMOX1, CDKN1C, EAF2, LTB, HVCN1, VPREB3, HES4, C1QA, SIGLEC10, PNOC
#> CD40, LYPD2, CXCL16, CTD-2006K23.1, CD19, CD68, SCIMP, BLK, MAFB, ARHGAP24
#> Negative: FCER1A, LGALS2, CLEC10A, ALDH2, MS4A6A, LYZ, CST3, ENHO, IL1B, CPVL
#> S100A8, S100A9, GRN, CD1C, TYROBP, RNASE6, GPX1, CD1D, RAB32, CSF3R
#> IGFBP7, GSN, GSTP1, LGALS1, MNDA, BLVRB, S100A4, FCGRT, SERPINF1, CAPG
#> PC_ 5
#> Positive: NFE2, LYL1, TSC22D1, GATA1, GNA15, FAM212A, EGFL7, DAB2, RP11-367G6.3, RPS2
#> RAB32, GATA2, TMEM40, LMNA, RPS6, NEXN, RPL13, PRSS57, RGS18, CYTL1
#> GFI1B, MPP1, FAM110A, SMOX, CDKN1A, ESAM, STX11, LCN2, MMD, PBX1
#> Negative: TMSB4X, MYL9, PARVB, NCOA4, CD151, PPBP, CLU, PF4, NRGN, SNCA
#> TREML1, ITGA2B, CD9, GNG11, SPARC, GSN, AP001189.4, SDPR, ACTB, FCER1G
#> C12orf39, C12orf75, HIST1H2BK, CMTM5, GZMB, SH3BGRL3, CCL5, GP9, CST3, GNLY
#> 19:34:40 UMAP embedding parameters a = 0.9922 b = 1.112
#> 19:34:40 Read 2019 rows and found 30 numeric columns
#> 19:34:40 Using Annoy for neighbor search, n_neighbors = 30
#> 19:34:40 Building Annoy index with metric = cosine, n_trees = 50
#> 0% 10 20 30 40 50 60 70 80 90 100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 19:34:40 Writing NN index file to temp file /var/folders/bw/whg3swn15jb08_f7v2y09xw9glk1wg/T//Rtmpsj5cKz/file14145363fce23
#> 19:34:40 Searching Annoy index using 1 thread, search_k = 3000
#> 19:34:40 Annoy recall = 100%
#> 19:34:41 Commencing smooth kNN distance calibration using 1 thread
#> 19:34:42 Initializing from normalized Laplacian + noise
#> 19:34:42 Commencing optimization for 500 epochs, with 89970 positive edges
#> 19:34:45 Optimization finished
#> Warning in getFeatureSpace(reference.scPred, "Celltypes"): 504 genes have zero variance but are present in the gene loadings.
#> Did you subset or integrated this data before?
#> Removing zero-variance genes from loadings
#> ● Extracting feature space for each cell type...
#> DONE!
#> ● Training models for each cell type...
#> Loading required package: lattice
#>
#> Attaching package: 'caret'
#>
#> The following objects are masked from 'package:yardstick':
#>
#> precision, recall, sensitivity, specificity
#>
#> The following object is masked from 'package:purrr':
#>
#> lift
#> DONE!
#> ● Matching reference with new dataset...
#> ─ 1493 features present in reference loadings
#> ─ 1183 features shared between reference and new dataset
#> ─ 79.24% of features in the reference are present in new dataset
#> ● Aligning new data to reference...
#> Harmony 1/20
#> Harmony 2/20
#> Harmony 3/20
#> Harmony 4/20
#> Harmony 5/20
#> Harmony 6/20
#> Harmony 7/20
#> Harmony 8/20
#> Harmony 9/20
#> Harmony 10/20
#> Harmony converged after 10 iterations
#> ● Classifying cells...
#> DONE!
p1 = DimPlot(out.Benchmark, group.by = "PredictedCelltype_UsingEnsemble", label=T, repel = T, label.size = 6) + NoLegend() + ggtitle("ELeFHAnt Predictions")
p2 = DimPlot(out.Benchmark, group.by = "predicted.id", label=T, repel = T, label.size = 6) + NoLegend() + ggtitle("LabelTransfer Predictions")
p3 = DimPlot(out.Benchmark, group.by = "scpred_prediction", label=T, repel = T, label.size = 6) + NoLegend() + ggtitle("scPred Predictions")
p1+p2+p3
