#load seurat package and dependencies
library(dplyr)
library(Seurat)
library(patchwork)
library(cowplot)
library(ggplot2)
library(rstatix)
library(RColorBrewer)
setwd("D:/Bioinformatics Projects/shalini_bm_noe15")
load NoE15 dataset
# read the NoE15 sample
NoE15.data <- Read10X(data.dir = "NoE15/outs/filtered_feature_bc_matrix/")
dim(NoE15.data)
## [1] 36601 8511
NoE15.data[1:10,1:6]
## 10 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCACAGGCTACC-1 AAACCCAGTCTCTCTG-1 AAACCCAGTCTTCAAG-1
## MIR1302-2HG . . .
## FAM138A . . .
## OR4F5 . . .
## AL627309.1 . . .
## AL627309.3 . . .
## AL627309.2 . . .
## AL627309.5 1 . .
## AL627309.4 . . .
## AP006222.2 . . .
## AL732372.1 . . .
## AAACGAAAGACTCTTG-1 AAACGAAAGGTTTGAA-1 AAACGAAAGTCACGAG-1
## MIR1302-2HG . . .
## FAM138A . . .
## OR4F5 . . .
## AL627309.1 . . .
## AL627309.3 . . .
## AL627309.2 . . .
## AL627309.5 . . .
## AL627309.4 . . .
## AP006222.2 . . .
## AL732372.1 . . .
NoE15 <- CreateSeuratObject(counts = NoE15.data, project = "ssbm.NoE15.8k", min.cells = 3, min.features = 200)
NoE15
## An object of class Seurat
## 24570 features across 8478 samples within 1 assay
## Active assay: RNA (24570 features, 0 variable features)
NoE15[["percent.mt"]] <- PercentageFeatureSet(NoE15, pattern = "^MT-")
HB.genes_total <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ") # common RBC genes for human blood sample
HB_m <- match(HB.genes_total,rownames(NoE15@assays$RNA))
HB.genes <- rownames(NoE15@assays$RNA)[HB_m]
HB.genes <- HB.genes[!is.na(HB.genes)]
NoE15[["percent.HB"]]<-PercentageFeatureSet(NoE15,features=HB.genes)
VlnPlot(NoE15, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(NoE15, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(NoE15, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
NoE15 <- subset(NoE15, subset = nFeature_RNA > 500 & nFeature_RNA < 7500 & percent.mt < 10)
dim(NoE15)
## [1] 24570 7863
# 24570 7863
NoE15 <- NormalizeData(NoE15, normalization.method = "LogNormalize", scale.factor = 10000)
NoE15 <- FindVariableFeatures(NoE15, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(NoE15), 10)
top10
## [1] "HBB" "PPBP" "HBG2" "HBG1" "PF4" "CLC"
## [7] "S100A9" "LTB" "HIST3H2A" "PRTN3"
plot1 <- VariableFeaturePlot(NoE15)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1
plot2
# Preparation stage for PCA analyses, need to use ScaleData() to scale
and center all data
all.genes <- rownames(NoE15)
NoE15 <- ScaleData(NoE15, features = all.genes)
## Centering and scaling data matrix
NoE15 <- ScaleData(NoE15, vars.to.regress = "percent.mt") # regress out the effects of mitochondria variation
## Regressing out percent.mt
## Centering and scaling data matrix
NoE15 <- RunPCA(NoE15, features = VariableFeatures(object = NoE15))
## PC_ 1
## Positive: ATP8B4, P2RY8, KCNQ5, FNDC3B, AUTS2, PLCB1, CD44, LSP1, ITGA4, BCL2
## LRMDA, ANXA1, BCL11A, PRAM1, ZEB1, PLAC8, RHEX, FLT3, CSF3R, S100Z
## IGLL1, MGST1, SBF2, PRKCE, MED13L, CALN1, SPINK2, MPO, AFF3, RUNX2
## Negative: GP1BB, RAB27B, ITGB3, GP9, CAVIN2, CMTM5, DNM3, ARHGAP6, DAAM1, PROS1
## PF4, SELP, TUBB1, AC004053.1, RGS6, MPIG6B, MMRN1, MYLK, KALRN, LAT
## F2R, PRKAR2B, THBS1, ITGA2B, PPBP, GP6, C2orf88, NRGN, GP1BA, CD36
## PC_ 2
## Positive: MRC1, HLA-DQB1, CST3, SLC8A1, HLA-DQA1, SAMHD1, JAML, PKIB, CD1C, HLA-DQA2
## IFI30, MS4A6A, HLA-DMB, TYROBP, CLEC10A, MARCH1, FCER1G, HLA-DPB1, CD86, HLA-DPA1
## LY86, FGL2, CTSH, CYBB, FCGRT, LYZ, TMSB4X, CD1E, HLA-DRB1, CD74
## Negative: CDK6, TFRC, KCNQ5, GYPC, MIR924HG, PVT1, ZNF521, XACT, TUBB, RIF1
## CENPF, HMGB2, KIAA1211, ZNF385D, ST8SIA6, TPX2, PIP5K1B, ZNF804A, PDZD8, MSI2
## TOP2A, NUSAP1, HIST1H4C, RNF220, MYH10, TAFA2, SLC40A1, TUBA1B, CYTL1, DIAPH3
## PC_ 3
## Positive: RNF220, CALN1, SPINK2, MEF2C, ERG, PRAM1, AUTS2, BAALC, FLT3, CD34
## GUCY1A1, DANT2, PLAC8, PROM1, S100Z, BCL11A, CSF3R, CASC15, SBF2, PTPRM
## ANGPT1, FHIT, CHRM3, HMGA2, EGFL7, COL24A1, TNFSF13B, C1QTNF4, SPNS3, PRDX1
## Negative: AKAP12, CSF2RB, HDC, CLC, ENPP3, PRG2, SRGN, CR1, GATA2, HPGD
## CPA3, SYTL3, SLC24A3, MS4A2, EXT1, ALOX5, IKZF2, PRNP, JAZF1, TMEM154
## PAG1, MS4A3, CKAP4, ALOX5AP, SLC45A3, PALM2-AKAP2, GRAP2, GCSAML, LINC00535, PTGER3
## PC_ 4
## Positive: ANK1, KLF1, BLVRB, SPTA1, HBD, THRB, MYH10, DNAJC6, APOC1, HBB
## TFR2, CNRIP1, PLEKHG1, S100A4, XACT, SLC40A1, MICAL2, KEL, FREM1, HES6
## S100A6, MINPP1, SLC25A21, DLC1, HLA-DRA, PKIG, HLA-DRB1, ADD2, PLXNC1, LDLRAD3
## Negative: RFLNB, ATP8B4, PLCB1, FNDC3B, P2RY8, RHEX, MCTP1, SPINK2, LRMDA, DACH1
## CALN1, AKAP12, CLC, ZEB1, RFX8, SLC24A3, ARHGEF3, CSF3R, PAG1, RNF220
## RAP1GAP2, FLI1, BCL2, IKZF2, GAB2, HDC, PECAM1, BAALC, SRGN, ANXA1
## PC_ 5
## Positive: MKI67, TOP2A, ASPM, CENPE, NUSAP1, GTSE1, TPX2, KNL1, HMMR, KIF14
## DLGAP5, PRC1, CDCA2, CENPF, HIST1H4C, NDC80, UBE2C, KPNA2, CDK1, KIF4A
## KIF11, CKAP2L, CDCA8, NCAPG, BUB1B, HIST1H1B, HMGB2, PRR11, ARHGAP11B, CDKN3
## Negative: PCOLCE, SLC25A4, COL1A1, TJP1, BAIAP2L1, PRRX1, AC116337.3, COL1A2, FN1, AC099560.1
## EFEMP2, ZNF90, PMEPA1, HIST3H2A, AL136454.1, CRYAB, PCDH19, AL157388.1, RASGEF1B, HOXC10
## TPM2, DNAH12, ENO3, EFCAB10, AP001605.1, AC010528.1, NFIX, MTRNR2L6, CDH11, ZFHX4
print(NoE15[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: ATP8B4, P2RY8, KCNQ5, FNDC3B, AUTS2
## Negative: GP1BB, RAB27B, ITGB3, GP9, CAVIN2
## PC_ 2
## Positive: MRC1, HLA-DQB1, CST3, SLC8A1, HLA-DQA1
## Negative: CDK6, TFRC, KCNQ5, GYPC, MIR924HG
## PC_ 3
## Positive: RNF220, CALN1, SPINK2, MEF2C, ERG
## Negative: AKAP12, CSF2RB, HDC, CLC, ENPP3
## PC_ 4
## Positive: ANK1, KLF1, BLVRB, SPTA1, HBD
## Negative: RFLNB, ATP8B4, PLCB1, FNDC3B, P2RY8
## PC_ 5
## Positive: MKI67, TOP2A, ASPM, CENPE, NUSAP1
## Negative: PCOLCE, SLC25A4, COL1A1, TJP1, BAIAP2L1
VizDimLoadings(NoE15, dims = 1:2, reduction = "pca")
DimPlot(NoE15, reduction = "pca")
DimHeatmap(NoE15, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(NoE15, dims = 1:5, cells = 500, balanced = TRUE)
# determine number of PCs in the dataset
NoE15 <- JackStraw(NoE15, num.replicate = 100)
NoE15 <- ScoreJackStraw(NoE15, dims = 1:20)
JackStrawPlot(NoE15, dims = 1:15)
## Warning: Removed 21000 rows containing missing values (`geom_point()`).
ElbowPlot(NoE15)
# cell clustering
NoE15 <- FindNeighbors(NoE15, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
NoE15 <- FindClusters(NoE15, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7863
## Number of edges: 240537
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8962
## Number of communities: 13
## Elapsed time: 0 seconds
head(Idents(NoE15), 5)
## AAACCCACAGGCTACC-1 AAACCCAGTCTCTCTG-1 AAACCCAGTCTTCAAG-1 AAACGAAAGACTCTTG-1
## 1 8 3 1
## AAACGAAAGGTTTGAA-1
## 1
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12
NoE15 <- RunTSNE(NoE15, dims = 1:10)
DimPlot(NoE15,reduction = "tsne",label = TRUE,pt.size = 1.5)
NoE15 <- RunUMAP(NoE15, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:30:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:30:39 Read 7863 rows and found 10 numeric columns
## 12:30:39 Using Annoy for neighbor search, n_neighbors = 30
## 12:30:39 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:30:40 Writing NN index file to temp file C:\Users\user\AppData\Local\Temp\RtmpWS8G6S\filece84daa7cf3
## 12:30:40 Searching Annoy index using 1 thread, search_k = 3000
## 12:30:43 Annoy recall = 100%
## 12:30:43 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 12:30:44 Initializing from normalized Laplacian + noise (using irlba)
## 12:30:44 Commencing optimization for 500 epochs, with 306596 positive edges
## 12:31:04 Optimization finished
DimPlot(NoE15,reduction = "umap",label = TRUE,pt.size = 1.5)
# score cell cycle
NoE15<- CellCycleScoring(object = NoE15, g2m.features = cc.genes$g2m.genes, s.features = cc.genes$s.genes)
## Warning: The following features are not present in the object: MLF1IP, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: FAM64A, HN1, not
## searching for symbol synonyms
head(x = NoE15@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt percent.HB
## AAACCCACAGGCTACC-1 ssbm.NoE15.8k 14433 5033 4.468925 0.00000000
## AAACCCAGTCTCTCTG-1 ssbm.NoE15.8k 6410 2799 5.819033 0.18720749
## AAACCCAGTCTTCAAG-1 ssbm.NoE15.8k 2561 1754 1.366654 0.07809449
## AAACGAAAGACTCTTG-1 ssbm.NoE15.8k 9698 3734 5.939369 0.02062281
## AAACGAAAGGTTTGAA-1 ssbm.NoE15.8k 16671 5222 5.848479 0.01799532
## AAACGAAAGTCACGAG-1 ssbm.NoE15.8k 12660 3994 7.266983 0.00000000
## RNA_snn_res.0.5 seurat_clusters S.Score G2M.Score Phase
## AAACCCACAGGCTACC-1 1 1 0.05068216 0.3939077 G2M
## AAACCCAGTCTCTCTG-1 8 8 -0.08082797 -0.2600288 G1
## AAACCCAGTCTTCAAG-1 3 3 0.34361789 -0.0816220 S
## AAACGAAAGACTCTTG-1 1 1 -0.20519010 0.4907255 G2M
## AAACGAAAGGTTTGAA-1 1 1 0.18941026 0.2847622 G2M
## AAACGAAAGTCACGAG-1 2 2 0.06969817 -0.2373076 S
DimPlot(NoE15,reduction = "tsne",label = TRUE,group.by="Phase",pt.size = 1.5)
# inspect and save UMI, Standardize and normalize data
#write.table(NoE15[["RNA"]]@counts,"./umi.xls",sep="\t") # 原始UMI Original UMI ~250MB
#write.table(NoE15[["RNA"]]@data,"./data.xls",sep="\t") #标准化的data,进行了LogNormalize, ~650MB size
#write.table(NoE15[["RNA"]]@scale.data,"./scale.data.xls",sep="\t")#归一化的scale.data,有正负 ~180MB size
saveRDS(NoE15, file = "./NoE15_sample.rds")
save(NoE15,file="./NoE15_res0.5.Robj")
library(DoubletFinder)
sweep.res.list_NoE15 <- paramSweep_v3(NoE15, PCs = 1:10, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_NoE15 <- summarizeSweep(sweep.res.list_NoE15, GT = FALSE)
bcmvn_NoE15 <- find.pK(sweep.stats_NoE15)
## NULL
opt_pK <- as.numeric(as.vector(bcmvn_NoE15$pK[which.max(bcmvn_NoE15$BCmetric)]))
print(opt_pK)
## [1] 0.005
annotations <- NoE15@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.06*nrow(NoE15@meta.data))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
# find doublets
NoE15 <- doubletFinder_v3(NoE15,PCs = 1:10,pN = 0.25,pK = opt_pK,nExp = nExp_poi,reuse.pANN = FALSE,sct = FALSE)
## [1] "Creating 2621 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(NoE15, reduction = "tsne", group.by = "DF.classifications_0.25_0.005_472",order = T)
table(NoE15$DF.classifications_0.25_0.005_472)
##
## Doublet Singlet
## 472 7391
cluster1.markers <- FindMarkers(NoE15, ident.1 = 1, min.pct = 0.25) #SQ: marker has to be expressed in at least 25% of cluster 1 cells.
## For a more efficient implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the limma package
## --------------------------------------------
## install.packages('BiocManager')
## BiocManager::install('limma')
## --------------------------------------------
## After installation of limma, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
head(cluster1.markers, n = 15) # top 5 markers for cluster 1, that distinct from the other clusters
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## TUBA1B 0.000000e+00 1.0290026 0.998 0.872 0.000000e+00
## SPINK2 4.108513e-305 1.0974860 0.906 0.313 1.009462e-300
## TOP2A 6.673660e-272 1.2057987 0.962 0.455 1.639718e-267
## TPX2 8.858176e-262 0.9925417 0.946 0.424 2.176454e-257
## BAALC 2.018909e-259 0.7193938 0.775 0.227 4.960459e-255
## ASPM 9.154638e-258 1.0162701 0.892 0.359 2.249295e-253
## SMIM24 9.922466e-256 0.7474917 0.929 0.352 2.437950e-251
## TUBB 1.020341e-249 0.9411356 0.998 0.776 2.506978e-245
## MKI67 4.311422e-243 0.9631272 0.946 0.441 1.059316e-238
## CALN1 3.838443e-242 1.0874761 0.919 0.364 9.431054e-238
## RNF220 8.532574e-226 1.0337896 0.994 0.641 2.096453e-221
## DLGAP5 7.705318e-222 0.7234896 0.700 0.223 1.893197e-217
## GTSE1 1.281418e-220 0.6145660 0.764 0.258 3.148445e-216
## NUSAP1 3.883978e-216 0.8293817 0.930 0.461 9.542935e-212
## CENPE 6.281745e-207 0.7767565 0.758 0.277 1.543425e-202
NoE15.markers <- FindAllMarkers(NoE15, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
NoE15.markers.top100 <- NoE15.markers %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
##存储marker -save markers
#write.table(NoE15.markers,file="./NoE15_allmarker.txt",sep="\t")
write.csv(NoE15.markers,file="./NoE15_allmarker.csv")
write.csv(NoE15.markers.top100,file="./NoE15_allmarker_top100.csv")
library(SingleR)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
##
## Attaching package: 'stats4'
## The following object is masked from 'package:spam':
##
## mle
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:spam':
##
## cbind, rbind
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:rstatix':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
library(celldex)
##
## Attaching package: 'celldex'
## The following objects are masked from 'package:SingleR':
##
## BlueprintEncodeData, DatabaseImmuneCellExpressionData,
## HumanPrimaryCellAtlasData, ImmGenData, MonacoImmuneData,
## MouseRNAseqData, NovershternHematopoieticData
refdata <- celldex::HumanPrimaryCellAtlasData()
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
# refdata_ms <- celldex::MouseRNAseqData()
cellpred <- SingleR(test = NoE15@assays$RNA@data, ref = refdata, labels = refdata$label.fine, clusters = NoE15@active.ident, assay.type.test = "logcounts", assay.type.ref = "logcounts")
cellpred$labels
## [1] "CMP" "CMP"
## [3] "CMP" "CMP"
## [5] "CMP" "MEP"
## [7] "CMP" "Monocyte:leukotriene_D4"
## [9] "CMP" "CMP"
## [11] "CMP" "GMP"
## [13] "CMP"
# output of calculated cell types
celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = F)
write.table(celltype,"./NoE15_celltype_singleR.xls",row.names = F,sep="\t")
#plot the calculated cell types
NoE15@meta.data$singleR=celltype[match(NoE15@active.ident,celltype$ClusterID),'celltype']
DimPlot(NoE15, reduction = "tsne", group.by = "singleR",label=T, label.size=5)
# collection of plotting codes
FeaturePlot(NoE15, features = c("CD44", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A"),cols = c("gray", "red"))
## Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features),
## : The following requested variables were not found: FCGR3A
##绘制Marker基因的小提琴图
VlnPlot(NoE15, features = c("CD44", "CD4"))
VlnPlot(NoE15, features = c("NKG7", "PF4"), slot = "counts", log = TRUE)
##绘制分cluster的热图
top10 <- NoE15.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)#n = 10可以自行调整数字
DoHeatmap(NoE15, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(NoE15, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the RNA assay:
## EIF4G2, B4GALT5, CAT
##绘制气泡图
DotPlot(NoE15, features = c("CD44", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A"),cols = c("blue", "red"))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: FCGR3A
##绘制RidgePlot
RidgePlot(NoE15, features = c("CD44", "GNLY", "CD3E", "CD14"), ncol = 2)
saveRDS(NoE15, file = "./NoE15_sample.rds")
Check the other seurat_tutorial_v2.Rmd file for sub-group and assign cell IDs # visualization using shinnyCell # Do not knit this block, will not finish.
library(ShinyCell)
NoE15obj = readRDS("NoE15_sample.rds")
scConf = createConfig(NoE15obj)
makeShinyApp(NoE15obj, scConf, gene.mapping = TRUE,
shiny.title = "ssbm.NoE15.8k_visualization")
setwd("D:/Bioinformatics Projects/shalini_bm_noe15/shinyApp") # verify change of dir in console using getwd()
shiny::runApp()