#load seurat package and dependencies
library(dplyr)
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.2.2
library(patchwork)
library(cowplot)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.1
library(rstatix)
library(RColorBrewer)
setwd("D:/Bioinformatics Projects/shalini_bm")
load E15 dataset
# read the E15 sample
E15.data <- Read10X(data.dir = "E15/outs/filtered_feature_bc_matrix/")
dim(E15.data)
## [1] 36601 6757
E15.data[1:10,1:6]
## 10 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCACATCGCTCT-1 AAACCCAGTAGGGTAC-1 AAACCCAGTGCATCTA-1
## MIR1302-2HG . . .
## FAM138A . . .
## OR4F5 . . .
## AL627309.1 . . .
## AL627309.3 . . .
## AL627309.2 . . .
## AL627309.5 . . .
## AL627309.4 . . .
## AP006222.2 . . .
## AL732372.1 . . .
## AAACCCATCATAGAGA-1 AAACCCATCGGAAACG-1 AAACCCATCTAACACG-1
## MIR1302-2HG . . .
## FAM138A . . .
## OR4F5 . . .
## AL627309.1 . . .
## AL627309.3 . . .
## AL627309.2 . . .
## AL627309.5 . . .
## AL627309.4 . . .
## AP006222.2 . . .
## AL732372.1 . . .
E15 <- CreateSeuratObject(counts = E15.data, project = "ssbm.e15.8k", min.cells = 3, min.features = 200)
E15
## An object of class Seurat
## 24124 features across 6710 samples within 1 assay
## Active assay: RNA (24124 features, 0 variable features)
E15[["percent.mt"]] <- PercentageFeatureSet(E15, pattern = "^MT-")
HB.genes_total <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ") # common RBC genes for human blood sample
HB_m <- match(HB.genes_total,rownames(E15@assays$RNA))
HB.genes <- rownames(E15@assays$RNA)[HB_m]
HB.genes <- HB.genes[!is.na(HB.genes)]
E15[["percent.HB"]]<-PercentageFeatureSet(E15,features=HB.genes)
VlnPlot(E15, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(E15, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(E15, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
E15 <- subset(E15, subset = nFeature_RNA > 500 & nFeature_RNA < 7500 & percent.mt < 10)
dim(E15)
## [1] 24124 6135
# 24124 6135
E15 <- NormalizeData(E15, normalization.method = "LogNormalize", scale.factor = 10000)
E15 <- FindVariableFeatures(E15, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(E15), 10)
top10
## [1] "PPBP" "CLC" "PF4" "LYZ" "CST3" "F13A1" "HPGD" "HBG2" "THBS1"
## [10] "NRGN"
plot1 <- VariableFeaturePlot(E15)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2 rows containing missing values (geom_point).
plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Removed 2 rows containing missing values (geom_point).
# Preparation stage for PCA analyses, need to use ScaleData() to scale
and center all data
all.genes <- rownames(E15)
E15 <- ScaleData(E15, features = all.genes)
## Centering and scaling data matrix
E15 <- ScaleData(E15, vars.to.regress = "percent.mt") # regress out the effects of mitochondria variation
## Regressing out percent.mt
## Centering and scaling data matrix
E15 <- RunPCA(E15, features = VariableFeatures(object = E15))
## PC_ 1
## Positive: SLC25A21, NFIA, HBM, DNAJA4, KCNH2, APOC1, LINC01133, CD36, NETO2, DDIT4
## PKLR, SYNGR1, A4GALT, HIST1H1B, HIST1H4C, C16orf95, HIST1H1D, REXO2, S100A6, HIST2H2AC
## ZDHHC14, FAM178B, TOP2A, GLRX, DOCK9, APOE, CENPE, EPB41L4A, HMMR, AC079804.3
## Negative: ACTG1, CELF2, PKM, ARHGDIB, MSN, MACF1, ELMO1, ETV6, TPM4, ARPC1B
## GMFG, FLI1, MAML3, PRKCB, UTRN, TBXAS1, FXYD5, SH3BGRL3, TMSB4X, FNBP1
## WIPF1, ENO1, VAV3, LGALS1, SERPINB1, SEPTIN6, AIF1, CYBA, MEF2C, TMSB10
## PC_ 2
## Positive: ITGB3, GP9, SELP, CMTM5, GP1BB, PF4, PPBP, RAB27B, ABCC3, CAVIN2
## KALRN, THBS1, TUBB1, LGALSL, ARHGAP6, MYLK, GP6, MPIG6B, AC004053.1, GP1BA
## HPSE, NRGN, ESAM, F2R, MMRN1, ITGB5, CLEC1B, TMEM40, DNM3, CD226
## Negative: ATP8B4, P2RY8, DIAPH2, LCP1, RCSD1, AUTS2, LSP1, CHST11, KCNQ5, BST2
## HSH2D, ZFP36L2, APBB1IP, FNDC3B, S100Z, PLCB1, ANKRD44, PRAM1, FLT3, CERS6
## CD44, DOCK2, GLIPR1, FCHSD2, SBF2, XYLT1, KCNAB2, AFF3, BCL2, PLAC8
## PC_ 3
## Positive: CDK6, RNF220, CALN1, KCNQ5, ERG, NKAIN2, MSI2, CMSS1, EBPL, MYB
## MCTP2, ZNF521, SMIM24, CHRM3, PRSS57, CYTL1, CD34, MSRB3, NME1, FIRRE
## SPINK2, EGFL7, ZNF804A, NPW, FAM30A, BAALC, CASC15, NPR3, CCDC171, PVT1
## Negative: MRC1, HLA-DQB1, SLC8A1, CD1C, HLA-DMB, MARCH1, LY86, HLA-DQA1, JAML, PKIB
## FGL2, MS4A6A, CLEC10A, SAMHD1, CD86, HLA-DQA2, IFI30, CST3, TBC1D9, CD1E
## CTSH, IGSF6, ARL4C, IL13RA1, MS4A4E, RTN1, CYBB, TYROBP, GPRIN3, KCTD12
## PC_ 4
## Positive: AKAP12, HDC, CSF2RB, CLC, ENPP3, SYTL3, MS4A2, CPA3, MS4A3, LINC00535
## IKZF2, CKAP4, IL18R1, SLC45A3, TMEM154, PRNP, HPGD, ALOX5, GATA2, PAG1
## THSD7A, SLC7A8, KLF2, ATP10D, ALOX5AP, IL3RA, LINC02458, NCF1, AC093865.1, GCSAML
## Negative: AFF3, FLT3, MEF2C, HLA-DRA, GLIPR1, HLA-DRB1, HDAC9, CD74, GSTP1, HLA-DPA1
## HLA-DPB1, IFI16, AIF1, S100Z, DISC1, CALN1, PEBP1, CIITA, JAML, CCSER1
## ST18, HLA-DQA2, TFEC, SPINK2, PRAM1, HLA-DMB, TNFSF13B, AHR, WDFY4, DANT2
## PC_ 5
## Positive: MPO, CSF3R, PRAM1, PLCB1, RAB27A, P2RY8, TNFSF13B, DGKG, PLAC8, RFX8
## SPNS3, RFLNB, PDE4D, TMEM40, SELL, BAHCC1, C1QTNF4, SPARC, FAM107B, C16orf74
## CTSG, PXK, SORL1, RAB7B, ATP8B4, TSPOAP1, SPINK2, S100Z, SMIM24, CLEC1B
## Negative: THRB, XACT, FREM1, RYR3, PVT1, RNF130, PKIG, LDLRAD3, IL7, ADAMTS14
## PLEKHG1, FCER1A, S100A4, RGS20, CPPED1, SLC40A1, ST8SIA6, LDLRAD4, DLC1, CPQ
## GDF11, MTSS1, TPSAB1, SLC12A6, SOS1, PLXNC1, ABCC4, TMEM164, LDHA, ELOVL6
print(E15[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: SLC25A21, NFIA, HBM, DNAJA4, KCNH2
## Negative: ACTG1, CELF2, PKM, ARHGDIB, MSN
## PC_ 2
## Positive: ITGB3, GP9, SELP, CMTM5, GP1BB
## Negative: ATP8B4, P2RY8, DIAPH2, LCP1, RCSD1
## PC_ 3
## Positive: CDK6, RNF220, CALN1, KCNQ5, ERG
## Negative: MRC1, HLA-DQB1, SLC8A1, CD1C, HLA-DMB
## PC_ 4
## Positive: AKAP12, HDC, CSF2RB, CLC, ENPP3
## Negative: AFF3, FLT3, MEF2C, HLA-DRA, GLIPR1
## PC_ 5
## Positive: MPO, CSF3R, PRAM1, PLCB1, RAB27A
## Negative: THRB, XACT, FREM1, RYR3, PVT1
VizDimLoadings(E15, dims = 1:2, reduction = "pca")
DimPlot(E15, reduction = "pca")
DimHeatmap(E15, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(E15, dims = 1:5, cells = 500, balanced = TRUE)
# determine number of PCs in the dataset
E15 <- JackStraw(E15, num.replicate = 100)
E15 <- ScoreJackStraw(E15, dims = 1:20)
JackStrawPlot(E15, dims = 1:15)
## Warning: Removed 21000 rows containing missing values (geom_point).
ElbowPlot(E15)
# cell clustering
E15 <- FindNeighbors(E15, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
E15 <- FindClusters(E15, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6135
## Number of edges: 191453
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8980
## Number of communities: 10
## Elapsed time: 0 seconds
head(Idents(E15), 5)
## AAACCCACATCGCTCT-1 AAACCCAGTAGGGTAC-1 AAACCCAGTGCATCTA-1 AAACCCATCATAGAGA-1
## 3 4 0 1
## AAACCCATCGGAAACG-1
## 7
## Levels: 0 1 2 3 4 5 6 7 8 9
E15 <- RunTSNE(E15, dims = 1:10)
DimPlot(E15,reduction = "tsne",label = TRUE,pt.size = 1.5)
E15 <- RunUMAP(E15, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:16:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:16:39 Read 6135 rows and found 10 numeric columns
## 17:16:39 Using Annoy for neighbor search, n_neighbors = 30
## 17:16:39 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:16:40 Writing NN index file to temp file C:\Users\user\AppData\Local\Temp\Rtmp4iTOPt\file780c3f815bc0
## 17:16:40 Searching Annoy index using 1 thread, search_k = 3000
## 17:16:42 Annoy recall = 100%
## 17:16:42 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:16:43 Initializing from normalized Laplacian + noise (using irlba)
## 17:16:44 Commencing optimization for 500 epochs, with 240570 positive edges
## 17:17:01 Optimization finished
DimPlot(E15,reduction = "umap",label = TRUE,pt.size = 1.5)
# score cell cycle
E15<- CellCycleScoring(object = E15, g2m.features = cc.genes$g2m.genes, s.features = cc.genes$s.genes)
## Warning: The following features are not present in the object: MLF1IP, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: FAM64A, HN1, not
## searching for symbol synonyms
head(x = E15@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt percent.HB
## AAACCCACATCGCTCT-1 ssbm.e15.8k 32469 6770 4.992454 0.1478333
## AAACCCAGTAGGGTAC-1 ssbm.e15.8k 6036 1507 1.590457 42.3956262
## AAACCCAGTGCATCTA-1 ssbm.e15.8k 6346 1735 2.600063 44.6738103
## AAACCCATCATAGAGA-1 ssbm.e15.8k 20373 4500 6.577333 17.3317626
## AAACCCATCGGAAACG-1 ssbm.e15.8k 4683 2395 6.939996 0.3203075
## AAACCCATCTAACACG-1 ssbm.e15.8k 9431 2869 3.456685 26.4553070
## RNA_snn_res.0.5 seurat_clusters S.Score G2M.Score Phase
## AAACCCACATCGCTCT-1 3 3 0.04567932 0.2927050 G2M
## AAACCCAGTAGGGTAC-1 4 4 -0.31802209 -0.3079963 G1
## AAACCCAGTGCATCTA-1 0 0 -0.04268012 -0.1170832 G1
## AAACCCATCATAGAGA-1 1 1 -0.03453748 0.4065812 G2M
## AAACCCATCGGAAACG-1 7 7 -0.30080349 -0.4493450 G1
## AAACCCATCTAACACG-1 0 0 0.17961522 0.3602452 G2M
DimPlot(E15,reduction = "tsne",label = TRUE,group.by="Phase",pt.size = 1.5)
# inspect and save UMI, Standardize and normalize data
write.table(E15[["RNA"]]@counts,"./umi.xls",sep="\t") # 原始UMI Original UMI ~250MB
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.1 GiB
write.table(E15[["RNA"]]@data,"./data.xls",sep="\t") #标准化的data,进行了LogNormalize, ~650MB size
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.1 GiB
write.table(E15[["RNA"]]@scale.data,"./scale.data.xls",sep="\t")#归一化的scale.data,有正负 ~180MB size
saveRDS(E15, file = "./E15_sample.rds")
save(E15,file="./E15_res0.5.Robj")
library(DoubletFinder)
sweep.res.list_E15 <- paramSweep_v3(E15, PCs = 1:10, sct = FALSE)
## Warning: package 'fields' was built under R version 4.2.2
## Warning: package 'spam' was built under R version 4.2.2
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_E15 <- summarizeSweep(sweep.res.list_E15, GT = FALSE)
bcmvn_E15 <- find.pK(sweep.stats_E15)
## NULL
opt_pK <- as.numeric(as.vector(bcmvn_E15$pK[which.max(bcmvn_E15$BCmetric)]))
print(opt_pK)
## [1] 0.005
annotations <- E15@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.06*nrow(E15@meta.data))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
# find doublets
E15 <- doubletFinder_v3(E15,PCs = 1:10,pN = 0.25,pK = opt_pK,nExp = nExp_poi,reuse.pANN = FALSE,sct = FALSE)
## [1] "Creating 2045 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(E15, reduction = "tsne", group.by = "DF.classifications_0.25_0.005_368",order = T)
table(E15$DF.classifications_0.25_0.005_368)
##
## Doublet Singlet
## 368 5767
cluster1.markers <- FindMarkers(E15, ident.1 = 1, min.pct = 0.25) #SQ: marker has to be expressed in at least 25% of cluster 1 cells.
head(cluster1.markers, n = 15) # top 5 markers for cluster 1, that distinct from the other clusters
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## LINC01133 1.588785e-302 1.0959463 0.870 0.334 3.832786e-298
## TMEM14B 4.165758e-264 1.0454912 0.981 0.832 1.004947e-259
## MT-CO1 5.486434e-250 0.5489504 1.000 0.993 1.323547e-245
## TMEM14C 1.655414e-248 0.9668561 0.960 0.730 3.993521e-244
## HEBP1 1.220413e-242 0.9628672 0.940 0.641 2.944125e-238
## UBAC1 3.728994e-242 0.9030137 0.978 0.817 8.995825e-238
## PRDX2 3.188152e-235 1.0067714 0.987 0.919 7.691099e-231
## NETO2 1.217452e-193 0.9538187 0.892 0.485 2.936982e-189
## LGALS3 8.871083e-191 0.8668698 0.972 0.661 2.140060e-186
## TK1 4.958848e-185 0.7560403 0.918 0.595 1.196272e-180
## METAP2 1.965007e-181 0.7800846 0.965 0.818 4.740383e-177
## MGST3 6.515865e-181 0.8311863 0.969 0.849 1.571887e-176
## PCLAF 5.228378e-179 0.7481664 0.965 0.763 1.261294e-174
## TUBG1 1.442680e-176 0.7415574 0.894 0.562 3.480321e-172
## ATP5IF1 9.865081e-172 0.7355588 0.977 0.842 2.379852e-167
E15.markers <- FindAllMarkers(E15, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
E15.markers.top100 <- E15.markers %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
##存储marker -save markers
write.table(E15.markers,file="./E15_allmarker.txt",sep="\t")
write.csv(E15.markers,file="./E15_allmarker.csv")
write.csv(E15.markers.top100,file="./E15_allmarker_top100.csv")
library(SingleR)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
##
## Attaching package: 'stats4'
## The following object is masked from 'package:spam':
##
## mle
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:spam':
##
## cbind, rbind
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:rstatix':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
library(celldex)
##
## Attaching package: 'celldex'
## The following objects are masked from 'package:SingleR':
##
## BlueprintEncodeData, DatabaseImmuneCellExpressionData,
## HumanPrimaryCellAtlasData, ImmGenData, MonacoImmuneData,
## MouseRNAseqData, NovershternHematopoieticData
refdata <- celldex::HumanPrimaryCellAtlasData()
## snapshotDate(): 2022-04-26
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
# refdata_ms <- celldex::MouseRNAseqData()
cellpred <- SingleR(test = E15@assays$RNA@data, ref = refdata, labels = refdata$label.fine, clusters = E15@active.ident, assay.type.test = "logcounts", assay.type.ref = "logcounts")
cellpred$labels
## [1] "Erythroblast" "Erythroblast" "Erythroblast" "CMP" "Erythroblast"
## [6] "MEP" "MEP" "CMP" "GMP" "Erythroblast"
# output of calculated cell types
celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = F)
write.table(celltype,"./E15_celltype_singleR.xls",row.names = F,sep="\t")
#plot the calculated cell types
E15@meta.data$singleR=celltype[match(E15@active.ident,celltype$ClusterID),'celltype']
DimPlot(E15, reduction = "tsne", group.by = "singleR",label=T, label.size=5)
# collection of plotting codes
FeaturePlot(E15, features = c("CD44", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A"),cols = c("gray", "red"))
## Warning in FetchData.Seurat(object = object, vars = c(dims, "ident",
## features), : The following requested variables were not found: CD8A
##绘制Marker基因的小提琴图
VlnPlot(E15, features = c("CD44", "CD4"))
VlnPlot(E15, features = c("NKG7", "PF4"), slot = "counts", log = TRUE)
##绘制分cluster的热图
top10 <- E15.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)#n = 10可以自行调整数字
DoHeatmap(E15, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(E15, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the RNA assay: EIF4G2,
## RPL22L1, FAU, XPO7, CPEB4, RIOK3, SEC62, PRDX2, UBAC1, HEBP1, TMEM14C, TMEM14B,
## CR1L, HMGB2, HBA2, SOX6, NUSAP1, HBA1, H1F0, HEMGN
##绘制气泡图
DotPlot(E15, features = c("CD44", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A"),cols = c("blue", "red"))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: CD8A
##绘制RidgePlot
RidgePlot(E15, features = c("CD44", "GNLY", "CD3E", "CD14"), ncol = 2)
saveRDS(E15, file = "./E15_sample.rds")
Check the other seurat_tutorial_v2.Rmd file for sub-group and assign cell IDs # visualization using shinnyCell # Do not knit this block, will not finish.
#library(ShinyCell)
#E15obj = readRDS("E15_sample.rds")
#scConf = createConfig(E15obj)
#makeShinyApp(E15obj, scConf, gene.mapping = TRUE,
#shiny.title = "ssbm.e15.8k_visualization")
#setwd("D:/Bioinformatics Projects/shalini_bm/shinyApp") # verify change of dir in console using getwd()
#shiny::runApp()