This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document.
#Setup the Seurat object (Read data)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Seurat)
## Attaching SeuratObject
## Attaching sp
Axin2_scRNAseq.data<- Read10X(data.dir = "outs/filtered_feature_bc_matrix/")
class(Axin2_scRNAseq.data)
## [1] "dgCMatrix"
## attr(,"package")
## [1] "Matrix"
Axin2_scRNAseq.data[c("Orm1","Bglap","Fn1"),1:30]
## 3 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCCAAGCCTGAGA-1', 'AAACCCACAGTTGTCA-1', 'AAACCCAGTCACCTTC-1' ... ]]
##
## Orm1 . 11 . . . . 4 . . . 6 . . 1 . . . 16 . . . . . 22 . 13 . . . .
## Bglap . . . 1 1 1 . . 1 . 1 . 1 . 386 . . 1 . . . . . 4 . . . . . .
## Fn1 . . . 1 . . . . . . . . . . . 11 2 . . . . . . . . . 1 . . .
Axin2<-CreateSeuratObject(counts = Axin2_scRNAseq.data, project = "Axin2", min.cells = 3, min.features = 200)
Axin2
## An object of class Seurat
## 15859 features across 3873 samples within 1 assay
## Active assay: RNA (15859 features, 0 variable features)
head(x=Axin2@meta.data,5)
## orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGCCTGAGA-1 Axin2 4629 1634
## AAACCCACAGTTGTCA-1 Axin2 31098 3797
## AAACCCAGTCACCTTC-1 Axin2 4512 1610
## AAACCCATCCATACAG-1 Axin2 10249 1924
## AAACCCATCCTAGCGG-1 Axin2 10096 2068
##show QC metrics for the first 5 cells
head(x=Axin2@meta.data,5)
## orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGCCTGAGA-1 Axin2 4629 1634
## AAACCCACAGTTGTCA-1 Axin2 31098 3797
## AAACCCAGTCACCTTC-1 Axin2 4512 1610
## AAACCCATCCATACAG-1 Axin2 10249 1924
## AAACCCATCCTAGCGG-1 Axin2 10096 2068
#Visualize QC Metrics as a violin plot
VlnPlot(object = Axin2, features = c("nFeature_RNA", "nCount_RNA"), ncol=3, pt.size = 0)
plot1<- FeatureScatter(object = Axin2, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1
#Normalization of the data
Axin2<- NormalizeData(object = Axin2, normalization.method = "LogNormalize", scale.factor = 1e4)
#Identify variable features
Axin2<-FindVariableFeatures(object = Axin2, selection.method = 'vst',nfeatuers = 2000)
## Warning: The following arguments are not used: nfeatuers
#Identify top10 variable genes and view them
top10<-head(x=VariableFeatures(object = Axin2),10)
top10
## [1] "Ctsg" "Dcn" "Spp1" "Mpo" "Serpinf1" "Bgn"
## [7] "Ccl4" "Lum" "Elane" "Sparc"
plot1<-VariableFeaturePlot(object = Axin2)
Plot2<-LabelPoints(plot = plot1,points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
Plot2
#Scaling the data
Shifts the expression of each gene, so that the mean expression across cells is 0 Scales the expression of each gene, so that the variance across cells is 1 The results of this are stored in pbmc[[“RNA”]]@scale.data
all.genes<-rownames(x=Axin2)
class(all.genes)
## [1] "character"
head(all.genes)
## [1] "Mrpl15" "Lypla1" "Gm37988" "Tcea1" "Atp6v1h" "Rb1cc1"
Axin2 <- ScaleData(object = Axin2, features = all.genes)
## Centering and scaling data matrix
#Linear dimensional reduction
Axin2 <- RunPCA(object = Axin2,features = VariableFeatures(object=Axin2))
## PC_ 1
## Positive: Crispld2, Timp2, Fpr1, Il1f9, Cxcl2, Pi16, Hcar2, Acod1, Il1b, Osm
## Dusp1, Acta2, Asprv1, Nfkbiz, Nlrp3, Ptgs2, Ccrl2, Stfa2l1, Egr1, Gm5483
## Krt83, Pim1, Rsad2, Isg15, Ier3, BC100530, Bcl2a1b, Ifi27l2a, Irf7, Slc2a1
## Negative: Rpl3, Rpl10a, Rps20, Rps18, Rpl12, Rpl36a, Ptma, Rpl35, Rpsa, Snrpf
## Rpl36al, Rps2, Hint1, Rpl14, Npm1, Rpl32, Rps17, Krtcap2, Rpl15, Snrpg
## Rps5, Rpl22, Rps19, Rps28, Rpl13, Rack1, Rps8, Ppia, Rpl22l1, Rpl39
## PC_ 2
## Positive: Stmn1, Hmgn2, Mki67, Pclaf, Smc2, Top2a, H2afz, Hist1h2ap, Cdk1, Spc24
## Birc5, Gmnn, Cdca8, Tuba4a, Cdca3, Fcnb, Hist1h2ae, H2afx, Prc1, Nusap1
## Rrm2, Knl1, Smc4, Hist1h1b, Asf1b, Kif11, Rrm1, Tyms, Ccna2, Hist1h3c
## Negative: Serpinf1, Dcn, Pcolce, Rcn3, Serpinh1, Mxra8, Prrx1, Adamts2, Lum, Bgn
## Col5a2, Cald1, Fxyd1, Gxylt2, Fgfr1, Maged1, Ptgis, Nbl1, Fkbp9, Mmp2
## Twist1, Pdgfra, Mrc2, Sparc, Id3, S100a16, Col1a2, Ppic, Cfh, Efemp2
## PC_ 3
## Positive: Ccr2, S100a10, Ms4a6b, Irf8, Ctss, Ms4a6c, Ly6e, Mndal, S100a4, Cd48
## Ctsc, Ahnak, Ifi203, Ccl9, Ly86, Pld4, Rassf4, Crip1, F13a1, Itgb7
## Ms4a4b, Ms4a4c, Capn2, H2-DMa, Padi2, Lat2, Clec4a1, Mcub, Pdlim1, Mrpl52
## Negative: Camp, Mki67, Nusap1, Top2a, Spc25, Hist1h3c, Hmgn2, Cdk1, Kif11, Cdca8
## Prc1, Pclaf, Hist1h2ab, Hist1h4d, Birc5, Hist1h2af, Smc2, Hist1h2ap, Cebpe, Ccna2
## Cdca3, Tpx2, Spc24, Knl1, Fbxo5, Chil3, H2afx, Cit, Pbk, Kif15
## PC_ 4
## Positive: Cpe, Slc36a2, Cadm1, Bglap, Bglap3, Car3, Cd200, Fign, Mlip, Omd
## Satb2, Fxyd6, Pla2g5, Insc, Cfh, Bglap2, Sp7, Tbx2, Fap, Lifr
## Dapk2, Robo2, Enpp6, Dmp1, Tmem176a, Rapgef4, Lmo7, Vdr, Bmp3, Ptgis
## Negative: Cpxm1, Aebp1, Aspn, Col3a1, Fndc1, Col6a3, Htra1, Plpp3, Sfrp2, Ahnak2
## Dpt, Meg3, Ramp2, Eln, Lama2, Il11ra1, Col4a1, Rhoj, Col14a1, Cdon
## Egfr, Nid1, Col6a1, Fbn1, Ehd2, Gstt1, Loxl2, Lamb1, Efemp1, Cd34
## PC_ 5
## Positive: S100a4, Pld4, F13a1, Ms4a6c, Ctss, Ms4a4c, Ly86, Rassf4, Mcub, Klf4
## Fn1, Clec4a1, Irf5, Pid1, Ctsc, Stxbp6, Tifab, Msr1, Itga1, H2-DMa
## Plekho1, Dusp3, Clec4a3, Naaa, Wfdc17, Ccdc88a, Cd68, Cd302, Ifi207, Ccr2
## Negative: Nkg7, Ptprcap, Il2rb, Cst7, Skap1, Ccl5, Klre1, AW112010, Sh2d2a, Ctsw
## Gimap5, Sytl3, Trbc1, Klrd1, Lck, Atp1b1, Gimap1, Txk, Ncr1, Spry2
## Gimap4, Gimap6, Prf1, Klrk1, Klrb1c, Gzma, Gimap3, Sh2d1a, Ctla2a, H2-Q7
#Examine and Visualize PCA results
print(x=Axin2[['pca']],dims = 1:5,nfeatures = 10)
## PC_ 1
## Positive: Crispld2, Timp2, Fpr1, Il1f9, Cxcl2, Pi16, Hcar2, Acod1, Il1b, Osm
## Negative: Rpl3, Rpl10a, Rps20, Rps18, Rpl12, Rpl36a, Ptma, Rpl35, Rpsa, Snrpf
## PC_ 2
## Positive: Stmn1, Hmgn2, Mki67, Pclaf, Smc2, Top2a, H2afz, Hist1h2ap, Cdk1, Spc24
## Negative: Serpinf1, Dcn, Pcolce, Rcn3, Serpinh1, Mxra8, Prrx1, Adamts2, Lum, Bgn
## PC_ 3
## Positive: Ccr2, S100a10, Ms4a6b, Irf8, Ctss, Ms4a6c, Ly6e, Mndal, S100a4, Cd48
## Negative: Camp, Mki67, Nusap1, Top2a, Spc25, Hist1h3c, Hmgn2, Cdk1, Kif11, Cdca8
## PC_ 4
## Positive: Cpe, Slc36a2, Cadm1, Bglap, Bglap3, Car3, Cd200, Fign, Mlip, Omd
## Negative: Cpxm1, Aebp1, Aspn, Col3a1, Fndc1, Col6a3, Htra1, Plpp3, Sfrp2, Ahnak2
## PC_ 5
## Positive: S100a4, Pld4, F13a1, Ms4a6c, Ctss, Ms4a4c, Ly86, Rassf4, Mcub, Klf4
## Negative: Nkg7, Ptprcap, Il2rb, Cst7, Skap1, Ccl5, Klre1, AW112010, Sh2d2a, Ctsw
VizDimLoadings(object = Axin2,dims = 1:2, reduction = 'pca')
DimPlot(object = Axin2, reduction = 'pca')
DimHeatmap(object = Axin2,dims = 1,cells = 3000, balanced = TRUE)
##Determine the dimentionality of the dataset
ElbowPlot(object = Axin2)
#Cluster the cells
Axin2<- FindNeighbors(object = Axin2,dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
Axin2<- FindClusters(object = Axin2, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3873
## Number of edges: 129812
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9570
## Number of communities: 5
## Elapsed time: 0 seconds
#Non-linear dimentional reduction (UMAP and tSNE)
Axin2<- RunUMAP(object = Axin2,dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:03:05 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:03:05 Read 3873 rows and found 10 numeric columns
## 11:03:05 Using Annoy for neighbor search, n_neighbors = 30
## 11:03:05 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:03:05 Writing NN index file to temp file C:\Users\zjiang\AppData\Local\Temp\RtmpCung8D\file3718606e793d
## 11:03:05 Searching Annoy index using 1 thread, search_k = 3000
## 11:03:06 Annoy recall = 100%
## 11:03:07 Commencing smooth kNN distance calibration using 1 thread
## 11:03:07 Initializing from normalized Laplacian + noise
## 11:03:07 Commencing optimization for 500 epochs, with 154966 positive edges
## 11:03:16 Optimization finished
Axin2<- RunTSNE(object = Axin2,dims = 1:10)
DimPlot(object = Axin2, reduction = 'umap')
#Check the effect of sequence status, such as total read and the detected gene numbers
FeaturePlot(Axin2, features = c("nFeature_RNA", "nCount_RNA"))
#t-SNE plot
DimPlot(object = Axin2, reduction = 'tsne')
#Finding differentially expressed features (Cluster biomarkers)
Axin2.markers<- FindAllMarkers(object = Axin2, only.pos = TRUE, min.pct = 0.25,logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
Axin2.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
## # A tibble: 10 × 7
## # Groups: cluster [5]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 7.59e-265 3.49 0.771 0.389 1.20e-260 0 Il1b
## 2 1.02e-230 3.01 0.808 0.521 1.62e-226 0 Cxcl2
## 3 0 2.02 0.996 0.938 0 1 Ltf
## 4 0 1.53 0.987 0.882 0 1 Ifitm6
## 5 1.07e-294 4.07 0.929 0.342 1.70e-290 2 Hist1h2ap
## 6 5.64e-187 4.03 0.686 0.178 8.95e-183 2 Prtn3
## 7 9.70e- 13 5.99 0.489 0.336 1.54e- 8 3 Col1a1
## 8 1.84e- 10 6.44 0.479 0.313 2.92e- 6 3 Bglap
## 9 3.48e-148 6.99 0.972 0.044 5.52e-144 4 Mcpt8
## 10 2.94e-113 7.06 0.861 0.046 4.66e-109 4 Prss34
#violin plot of gene expression in each cluster
VlnPlot(object = Axin2, features = c("Bglap"), slot='counts', log = TRUE, pt.size = 0)
#Draw the expression pattern of genes on UMAP
FeaturePlot(object = Axin2, features = c("Bglap"))
#Heatmap of specific genes for each cluster
Axin2.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(object = Axin2, features = top10$gene) + NoLegend()
#Name on each cluster
new.cluster.ids<- c("A","B","C","SuSC","E")
names(x=new.cluster.ids) <-levels(x=Axin2)
Axin2<-RenameIdents(object = Axin2,new.cluster.ids)
DimPlot(object = Axin2,reduction = 'umap',label = TRUE,pt.size = 0.5)