ELeFHAnt

library(ELeFHAnt)
#> Loading required package: tidymodels
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.2.0 ──
#> ✔ broom        0.8.0     ✔ recipes      0.2.0
#> ✔ dials        1.0.0     ✔ rsample      0.1.1
#> ✔ dplyr        1.0.9     ✔ tibble       3.1.7
#> ✔ ggplot2      3.3.6     ✔ tidyr        1.2.0
#> ✔ infer        1.0.2     ✔ tune         0.2.0
#> ✔ modeldata    0.1.1     ✔ workflows    0.2.6
#> ✔ parsnip      1.0.0     ✔ workflowsets 0.2.1
#> ✔ purrr        0.3.4     ✔ yardstick    1.0.0
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> ✖ purrr::discard() masks scales::discard()
#> ✖ dplyr::filter()  masks stats::filter()
#> ✖ dplyr::lag()     masks stats::lag()
#> ✖ recipes::step()  masks stats::step()
#> • Use suppressPackageStartupMessages() to eliminate package startup messages
#> Loading required package: Seurat
#> Attaching SeuratObject
#> Attaching sp
#> Loading required package: class
#> Loading required package: splitstackshape
#> Loading required package: fgsea
#> Loading required package: msigdbr
#> Loading required package: scPred
#> Loading required package: magrittr
#> 
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:tidyr':
#> 
#>     extract
#> The following object is masked from 'package:purrr':
#> 
#>     set_names
#> Loading required package: harmony
#> Loading required package: Rcpp
#> 
#> Attaching package: 'Rcpp'
#> The following object is masked from 'package:rsample':
#> 
#>     populate
#> Loading required package: scater
#> Loading required package: SingleCellExperiment
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: 'matrixStats'
#> The following object is masked from 'package:dplyr':
#> 
#>     count
#> 
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#>     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#>     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#>     colWeightedMeans, colWeightedMedians, colWeightedSds,
#>     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#>     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#>     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#>     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#>     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#>     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#> 
#>     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#>     clusterExport, clusterMap, parApply, parCapply, parLapply,
#>     parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:dplyr':
#> 
#>     combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#>     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#>     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#>     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#>     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#>     union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#> 
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:tidyr':
#> 
#>     expand
#> The following objects are masked from 'package:dplyr':
#> 
#>     first, rename
#> The following object is masked from 'package:base':
#> 
#>     expand.grid
#> Loading required package: IRanges
#> 
#> Attaching package: 'IRanges'
#> The following object is masked from 'package:sp':
#> 
#>     %over%
#> The following object is masked from 'package:purrr':
#> 
#>     reduce
#> The following objects are masked from 'package:dplyr':
#> 
#>     collapse, desc, slice
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> 
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#> 
#>     rowMedians
#> The following objects are masked from 'package:matrixStats':
#> 
#>     anyMissing, rowMedians
#> 
#> Attaching package: 'SummarizedExperiment'
#> The following object is masked from 'package:SeuratObject':
#> 
#>     Assays
#> The following object is masked from 'package:Seurat':
#> 
#>     Assays
#> 
#> Attaching package: 'scater'
#> The following object is masked from 'package:rsample':
#> 
#>     bootstraps
#> Loading required package: hrbrthemes
#> NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
#>       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
#>       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
#> Loading required package: ranger
#> Loading required package: LiblineaR
#> Loading required package: caTools
#> 
#> Attaching package: 'caTools'
#> The following object is masked from 'package:IRanges':
#> 
#>     runmean
#> The following object is masked from 'package:S4Vectors':
#> 
#>     runmean
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'ELeFHAnt'
data("reference_PBMC")
data("query_PBMC")
reference = reference_PBMC
query = query_PBMC
query = NormalizeData(query)
query = FindVariableFeatures(query)
query = ScaleData(query)
#> Centering and scaling data matrix
query = RunPCA(query)
#> PC_ 1 
#> Positive:  CST3, AIF1, LST1, FTL, FTH1, TYMP, TYROBP, CFD, FCER1G, SERPINA1 
#>     FCN1, LYZ, CTSS, IFITM3, S100A9, LGALS1, COTL1, PSAP, IFI30, S100A11 
#>     NPC2, CFP, SAT1, RP11-290F20.3, S100A8, PYCARD, S100A6, PILRA, LGALS2, CEBPB 
#> Negative:  IL32, LTB, CD3E, LDHB, CTSW, GZMM, CD2, IL7R, CCL5, CD247 
#>     ACAP1, CST7, GZMA, STK17A, NKG7, CD27, PRF1, HOPX, GIMAP5, NOSIP 
#>     AQP3, GZMK, NCR3, FGFBP2, LYAR, KLRG1, SAMD3, CD8B, ETS1, GZMB 
#> PC_ 2 
#> Positive:  PF4, SDPR, GNG11, PPBP, SPARC, GP9, TUBB1, HIST1H2AC, CLU, AP001189.4 
#>     PTCRA, ITGA2B, NRGN, RGS18, CD9, TMEM40, MMD, CA2, ACRBP, TREML1 
#>     F13A1, SEPT5, TSC22D1, PTGS1, CMTM5, LY6G6F, GP1BA, RP11-367G6.3, MYL9, RUFY1 
#> Negative:  RPS2, TMSB10, CYBA, NKG7, S100A4, GZMA, CST7, PRF1, CTSW, GNLY 
#>     FGFBP2, CD247, EIF4A1, GZMB, GZMM, ID2, IFITM2, GZMH, SPON2, ANXA1 
#>     CCL4, FCGR3A, PFN1, APOBEC3G, RBM3, S100A10, GIMAP7, IGFBP7, HOPX, CLIC3 
#> PC_ 3 
#> Positive:  NKG7, PRF1, GZMB, CST7, GZMA, FGFBP2, GNLY, CTSW, SPON2, CD247 
#>     GZMH, GZMM, CCL5, CCL4, FCGR3A, SRGN, CLIC3, AKR1C3, XCL2, PFN1 
#>     ACTB, IGFBP7, TTC38, HOPX, APMAP, SH3BGRL3, RHOC, ID2, ARPC5L, ANXA1 
#> Negative:  CD79A, MS4A1, HLA-DRA, HLA-DQB1, TCL1A, HLA-DQA1, RPS2, HLA-DRB1, CD74, CD79B 
#>     LTB, HLA-DPB1, HLA-DMA, HLA-DRB5, HLA-DPA1, HLA-DQA2, FCER2, LY86, HVCN1, SNHG7 
#>     KIAA0125, P2RX5, IRF8, CD19, QRSL1, SWAP70, IGLL5, FCGR2B, C6orf48, POU2AF1 
#> PC_ 4 
#> Positive:  S100A4, S100A8, TMSB4X, S100A6, S100A9, CD14, GIMAP7, FCN1, IL32, RBP7 
#>     LGALS2, S100A11, CD3E, TYROBP, ANXA1, LYZ, S100A12, IL7R, MS4A6A, GZMM 
#>     GIMAP4, FTL, CFD, LGALS1, S100A10, NOSIP, CD2, AIF1, FYB, TIMP1 
#> Negative:  HLA-DQA1, KIAA0101, TYMS, CD79A, HLA-DQB1, RRM2, TK1, CD74, CD79B, GINS2 
#>     MS4A1, HLA-DQA2, MKI67, HLA-DPB1, ZWINT, HLA-DRA, MYBL2, HLA-DRB1, BIRC5, HLA-DPA1 
#>     HLA-DRB5, KIFC1, TCL1A, CLSPN, HLA-DMA, CENPM, MZB1, AURKB, STMN1, NUSAP1 
#> PC_ 5 
#> Positive:  LDHB, VIM, IL7R, CD3E, IL32, AQP3, NOSIP, CD27, RPS2, CD2 
#>     FYB, GIMAP7, CD40LG, RRM2, KIAA0101, S100A10, LTB, TYMS, GIMAP4, TK1 
#>     ZWINT, MKI67, PPA1, LDLRAP1, GIMAP5, BIRC5, GINS2, GAPDH, TRADD, COTL1 
#> Negative:  GZMB, FGFBP2, CD79B, CD79A, GNLY, TCL1A, SPON2, PRF1, MS4A1, CD74 
#>     HLA-DQA1, NKG7, CCL4, HLA-DQB1, HLA-DPB1, CLIC3, HLA-DPA1, HLA-DRA, CST7, HLA-DRB1 
#>     IGFBP7, PLAC8, TTC38, AKR1C3, GZMA, FCGR3A, XCL2, HLA-DRB5, FCER2, APMAP
query = RunUMAP(query, dims = 1:20)
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
#> 19:30:59 UMAP embedding parameters a = 0.9922 b = 1.112
#> 19:30:59 Read 1358 rows and found 20 numeric columns
#> 19:30:59 Using Annoy for neighbor search, n_neighbors = 30
#> 19:30:59 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 19:30:59 Writing NN index file to temp file /var/folders/bw/whg3swn15jb08_f7v2y09xw9glk1wg/T//Rtmpsj5cKz/file1414515154c55
#> 19:30:59 Searching Annoy index using 1 thread, search_k = 3000
#> 19:30:59 Annoy recall = 100%
#> 19:31:00 Commencing smooth kNN distance calibration using 1 thread
#> 19:31:01 Initializing from normalized Laplacian + noise
#> 19:31:01 Commencing optimization for 500 epochs, with 54848 positive edges
#> 19:31:03 Optimization finished
###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
out.CelltypeAnnotation = CelltypeAnnotation(reference = reference, query = query, downsample = FALSE, classification.method = "Ensemble", validatePredictions = FALSE)
#> Setting Assay of reference and query to RNA
#> Running Diagonistis on reference and query
#> Number of cells in reference:2019
#> Number of cells in query:1358
#> Calculating ratio of number of cells in reference vs query
#> Ratio of number of cells in query vs reference:0.672610203070827
#> Merging reference and query
#> Normalization, Variable Feature Selection and scaling
#> Centering and scaling data matrix
#> Number of Features selected:2000
#> Generating train and test sets
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning
#> Training & Classifying using randomForest classifier
#> Added Predicted celltypes using randomForest to query
#> Setting up SVM classifier learning
#> Training & Classifying using SVM classifier
#> Added Predicted celltypes using SVM to query
#> randomForest and SVM based learning and predictions completed. Using predictions from RF and SVM to make Ensemble Predictions
#> Calculating weights for randomForest classifier
#> Accuracy estimate of randomForest classifier:67.2070211702965
#> Calculating weights for SVM classifier
#> Accuracy estimate of SVM classifier:62.6052501238237
#> Added Predicted celltypes using Ensemble learning to query
#> Ensembl celltype annotation completed.
query$Celltypes = out.CelltypeAnnotation$PredictedCelltype_UsingEnsemble
p1 = DimPlot(out.CelltypeAnnotation, group.by = "seurat_clusters", label = T, reduction = "umap", label.size = 6, repel = T) + NoLegend()
p2 = DimPlot(out.CelltypeAnnotation, group.by = "PredictedCelltype_UsingEnsemble", label = T, reduction = "umap", label.size = 6, repel = T) + NoLegend()
p1+p2

###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
out.LabelHarmonization = LabelHarmonization(seurat.objects = c(reference, query), perform_integration = TRUE, downsample = FALSE, classification.method = "Ensemble", validatePredictions = FALSE)
#> Starting integration using Seurat
#> Computing 2000 integration features
#> Scaling features for provided objects
#> Finding all pairwise anchors
#> Running CCA
#> Merging objects
#> Finding neighborhoods
#> Finding anchors
#>  Found 4756 anchors
#> Filtering anchors
#>  Retained 3607 anchors
#> Merging dataset 2 into 1
#> Extracting anchors for merged samples
#> Finding integration vectors
#> Finding integration vector weights
#> Integrating data
#> Integration Completed. Performing Scaling, Dimension reduction and clustering
#> 19:32:21 UMAP embedding parameters a = 0.9922 b = 1.112
#> 19:32:21 Read 3377 rows and found 30 numeric columns
#> 19:32:21 Using Annoy for neighbor search, n_neighbors = 30
#> 19:32:21 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 19:32:21 Writing NN index file to temp file /var/folders/bw/whg3swn15jb08_f7v2y09xw9glk1wg/T//Rtmpsj5cKz/file141452f080a38
#> 19:32:21 Searching Annoy index using 1 thread, search_k = 3000
#> 19:32:22 Annoy recall = 100%
#> 19:32:22 Commencing smooth kNN distance calibration using 1 thread
#> 19:32:23 Initializing from normalized Laplacian + noise
#> 19:32:24 Commencing optimization for 500 epochs, with 143682 positive edges
#> 19:32:29 Optimization finished
#> Computing nearest neighbor graph
#> Computing SNN
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 3377
#> Number of edges: 170045
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8618
#> Number of communities: 13
#> Elapsed time: 0 seconds
#> Number of cells in integrated atlas:3377
#> Generating train and test datasets using stratification -- 60% for training & 40% for testing
#> Number of Features selected:2000
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning
#> Training & Classifying using randomForest classifier
#> Predicting using trained randomForest classifier
#> Calculating weights for randomForest classifier
#> Accuracy estimate of randomForest classifier:73.140903586674
#> Assigning weights to randomForest predictions
#> Added Harmonized Labels using randomForest to integrated object
#> Setting up SVM classifier learning
#> Training & Classification using SVM classifier
#> Predicting using trained SVM classifier
#> Calculating weights for each SVM classifier
#> Accuracy estimate of SVM classifier:69.3484698914116
#> Assigning weights to SVM predictions
#> Added harmonized labels using SVM to integrated object
#> randomForest and SVM based learning and harmonization completed. Using predictions from all models for Ensemble harmonization
#> Added Harmonized labels using Ensemble learning to query
#> Ensembl harmonization completed.
p1 = DimPlot(out.LabelHarmonization, group.by = "Celltypes", label = T, label.size = 6, repel = T) + NoLegend()
p2 = DimPlot(out.LabelHarmonization, group.by = "HarmonizedLabels_UsingEnsemble", label = T, label.size = 6, repel = T) + NoLegend()
p1+p2

###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
query_DR = query
query_DR$Celltypes = query$seurat_clusters
out.DR = DeduceRelationship(reference1 = reference, reference2 = query_DR, downsample = FALSE, classification.method = "Ensemble")
#> Setting Assay of reference1 and reference2 to RNA
#> Number of cells in reference1:2019
#> Number of cells in reference2:1358
#> Merging reference1 and reference2
#> Normalization, Variable Feature Selection and scaling
#> Centering and scaling data matrix
#> Number of Features selected:2000
#> Generating train and test sets
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning.
#> Training & Classifying using randomForest classifier
#> Predicting using trained randomForest classifier
#> Calculating weight for randomForest classifier
#> Accuracy estimate of randomForest classifier:67.4060752158266
#> Assigning weights to randomForest predictions
#> Generating confusion matrix and heatmap
#> Setting up SVM classifier learning.
#> Training & Classification using SVM classifier
#> Predicting using trained SVM classifier
#> Calculating weight for SVM classifier
#> Accuracy estimate of SVM classifier:63.2986627043091
#> Assigning weights to SVM predictions
#> Generating confusion matrix and heatmap
#> randomForest and SVM based learning and relationship inference completed. Using predictions from all models to make Ensemble Predictions
#> Generating confusion matrix and heatmap
#> Ensemble based learning and relationship inference completed
out.DR

###### Downsample is set to FALSE as number of cells in reference / query have already been downsampled ########
out.Benchmark = BenchmarkELeFHAnt(reference = reference, query = query, downsample = FALSE)
#> 
#> Deploying ELeFHAnt: classification.method == Ensemble | classification.approach == ClassifyCells
#> Setting Assay of reference and query to RNA
#> Running Diagonistis on reference and query
#> Number of cells in reference:2019
#> Number of cells in query:1358
#> Calculating ratio of number of cells in reference vs query
#> Ratio of number of cells in query vs reference:0.672610203070827
#> Merging reference and query
#> Normalization, Variable Feature Selection and scaling
#> Centering and scaling data matrix
#> Number of Features selected:2000
#> Generating train and test sets
#> Ensemble learning using classification accuracy of both Random Forest and SVM classifiers
#> Setting up randomForest classifier learning
#> Training & Classifying using randomForest classifier
#> Added Predicted celltypes using randomForest to query
#> Setting up SVM classifier learning
#> Training & Classifying using SVM classifier
#> Added Predicted celltypes using SVM to query
#> randomForest and SVM based learning and predictions completed. Using predictions from RF and SVM to make Ensemble Predictions
#> Calculating weights for randomForest classifier
#> Accuracy estimate of randomForest classifier:67.3256147893947
#> Calculating weights for SVM classifier
#> Accuracy estimate of SVM classifier:63.8434868746904
#> Added Predicted celltypes using Ensemble learning to query
#> Ensembl celltype annotation completed.
#> 
#> Deploying Seurat Label Transfer
#> Performing PCA on the provided reference using 1263 features as input.
#> Projecting cell embeddings
#> Finding neighborhoods
#> Finding anchors
#>  Found 2199 anchors
#> Filtering anchors
#>  Retained 1966 anchors
#> Finding integration vectors
#> Finding integration vector weights
#> Predicting cell labels
#> 
#> Deploying scPred
#> Warning in PrepDR(object = object, features = features, verbose = verbose): The
#> following 3 features requested have zero variance (running reduction without
#> them): AANAT, RP11-316P17.2, RP11-109E24.1
#> PC_ 1 
#> Positive:  PPBP, GNG11, PF4, SDPR, CLU, NRGN, GP9, TUBB1, SPARC, CMTM5 
#>     TREML1, HIST1H2AC, ACRBP, ITGA2B, NCOA4, RUFY1, AP001189.4, MYL9, PTCRA, SNCA 
#>     MPP1, CD9, MAP3K7CL, CLDN5, AC147651.3, RGS18, PARVB, TSC22D1, TMEM40, CLEC1B 
#> Negative:  RPS2, RPL13, RPS6, TMSB10, RPLP1, JUNB, LTB, CYBA, IER2, S100A6 
#>     S100A4, IL32, JUN, VIM, CD7, S100A10, IFITM2, DUSP1, PFN1, MT-CO1 
#>     IL7R, ANXA1, PPIB, NKG7, FOS, GZMA, CD74, ID2, XBP1, CCR7 
#> PC_ 2 
#> Positive:  IL32, RPS6, RPL13, LTB, CD7, CCL5, IL7R, CTSW, GZMA, CST7 
#>     CLU, PF4, JUN, SDPR, PPBP, TUBA4A, SPARC, TUBB1, TREML1, GP9 
#>     CMTM5, GRAP2, NKG7, CCR7, GNLY, AQP3, CD8B, HOPX, HIST1H2AC, GZMK 
#> Negative:  SPI1, CST3, SERPINA1, LYZ, LST1, HCK, RP11-290F20.3, CD68, AIF1, IFI30 
#>     CFD, PILRA, FCN1, MS4A7, CFP, LRRC25, TMEM176B, TYMP, FCER1G, HMOX1 
#>     C1QA, CPVL, HLA-DRA, HLA-DRB1, TYROBP, HLA-DPA1, IGSF6, NCF2, VMO1, HLA-DRB5 
#> PC_ 3 
#> Positive:  FCGR3A, RP11-290F20.3, TMSB4X, SERPINA1, VMO1, MS4A7, IL32, CDKN1C, IFITM2, S100A4 
#>     PILRA, CFD, CD7, NKG7, GNLY, HMOX1, HES4, IFITM3, C1QA, GZMA 
#>     TMEM176B, CCL5, CTSW, GPBAR1, CST7, CD68, HCK, LILRA3, CTD-2006K23.1, RHOC 
#> Negative:  CD79A, HLA-DRA, HLA-DQA1, MS4A1, HLA-DQA2, TCL1A, HLA-DMB, LINC00926, HLA-DRB1, CD74 
#>     HLA-DMA, HLA-DPB1, FCER2, CD79B, HLA-DPA1, HLA-DRB5, HLA-DQB1, BANK1, IRF8, SPIB 
#>     PNOC, KIAA0125, HVCN1, VPREB3, FCGR2B, CD1C, HLA-DOB, MEF2C, FCER1A, PKIG 
#> PC_ 4 
#> Positive:  CD79A, CD79B, MS4A1, MS4A7, LINC00926, FCER2, RP11-290F20.3, VMO1, TCL1A, BANK1 
#>     HMOX1, CDKN1C, EAF2, LTB, HVCN1, VPREB3, HES4, C1QA, SIGLEC10, PNOC 
#>     CD40, LYPD2, CXCL16, CTD-2006K23.1, CD19, CD68, SCIMP, BLK, MAFB, ARHGAP24 
#> Negative:  FCER1A, LGALS2, CLEC10A, ALDH2, MS4A6A, LYZ, CST3, ENHO, IL1B, CPVL 
#>     S100A8, S100A9, GRN, CD1C, TYROBP, RNASE6, GPX1, CD1D, RAB32, CSF3R 
#>     IGFBP7, GSN, GSTP1, LGALS1, MNDA, BLVRB, S100A4, FCGRT, SERPINF1, CAPG 
#> PC_ 5 
#> Positive:  NFE2, LYL1, TSC22D1, GATA1, GNA15, FAM212A, EGFL7, DAB2, RP11-367G6.3, RPS2 
#>     RAB32, GATA2, TMEM40, LMNA, RPS6, NEXN, RPL13, PRSS57, RGS18, CYTL1 
#>     GFI1B, MPP1, FAM110A, SMOX, CDKN1A, ESAM, STX11, LCN2, MMD, PBX1 
#> Negative:  TMSB4X, MYL9, PARVB, NCOA4, CD151, PPBP, CLU, PF4, NRGN, SNCA 
#>     TREML1, ITGA2B, CD9, GNG11, SPARC, GSN, AP001189.4, SDPR, ACTB, FCER1G 
#>     C12orf39, C12orf75, HIST1H2BK, CMTM5, GZMB, SH3BGRL3, CCL5, GP9, CST3, GNLY
#> 19:34:40 UMAP embedding parameters a = 0.9922 b = 1.112
#> 19:34:40 Read 2019 rows and found 30 numeric columns
#> 19:34:40 Using Annoy for neighbor search, n_neighbors = 30
#> 19:34:40 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 19:34:40 Writing NN index file to temp file /var/folders/bw/whg3swn15jb08_f7v2y09xw9glk1wg/T//Rtmpsj5cKz/file14145363fce23
#> 19:34:40 Searching Annoy index using 1 thread, search_k = 3000
#> 19:34:40 Annoy recall = 100%
#> 19:34:41 Commencing smooth kNN distance calibration using 1 thread
#> 19:34:42 Initializing from normalized Laplacian + noise
#> 19:34:42 Commencing optimization for 500 epochs, with 89970 positive edges
#> 19:34:45 Optimization finished
#> Warning in getFeatureSpace(reference.scPred, "Celltypes"): 504 genes have zero variance but are present in the gene loadings. 
#> Did you subset or integrated this data before?
#> Removing zero-variance genes from loadings
#> ●  Extracting feature space for each cell type...
#> DONE!
#> ●  Training models for each cell type...
#> Loading required package: lattice
#> 
#> Attaching package: 'caret'
#> 
#> The following objects are masked from 'package:yardstick':
#> 
#>     precision, recall, sensitivity, specificity
#> 
#> The following object is masked from 'package:purrr':
#> 
#>     lift
#> DONE!
#> ●  Matching reference with new dataset...
#>   ─ 1493 features present in reference loadings
#>   ─ 1183 features shared between reference and new dataset
#>   ─ 79.24% of features in the reference are present in new dataset
#> ●  Aligning new data to reference...
#> Harmony 1/20
#> Harmony 2/20
#> Harmony 3/20
#> Harmony 4/20
#> Harmony 5/20
#> Harmony 6/20
#> Harmony 7/20
#> Harmony 8/20
#> Harmony 9/20
#> Harmony 10/20
#> Harmony converged after 10 iterations
#> ●  Classifying cells...
#> DONE!
p1 = DimPlot(out.Benchmark, group.by = "PredictedCelltype_UsingEnsemble", label=T, repel = T, label.size = 6) + NoLegend() + ggtitle("ELeFHAnt Predictions")
p2 = DimPlot(out.Benchmark, group.by = "predicted.id", label=T, repel = T, label.size = 6) + NoLegend() + ggtitle("LabelTransfer Predictions")
p3 = DimPlot(out.Benchmark, group.by = "scpred_prediction", label=T, repel = T, label.size = 6) + NoLegend() + ggtitle("scPred Predictions")
p1+p2+p3