R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document.

#Setup the Seurat object (Read data)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Seurat)
## Attaching SeuratObject
## Attaching sp
Axin2_scRNAseq.data<- Read10X(data.dir = "outs/filtered_feature_bc_matrix/")
class(Axin2_scRNAseq.data)
## [1] "dgCMatrix"
## attr(,"package")
## [1] "Matrix"
Axin2_scRNAseq.data[c("Orm1","Bglap","Fn1"),1:30]
## 3 x 30 sparse Matrix of class "dgCMatrix"
##    [[ suppressing 30 column names 'AAACCCAAGCCTGAGA-1', 'AAACCCACAGTTGTCA-1', 'AAACCCAGTCACCTTC-1' ... ]]
##                                                                         
## Orm1  . 11 . . . . 4 . . . 6 . . 1   .  . . 16 . . . . . 22 . 13 . . . .
## Bglap .  . . 1 1 1 . . 1 . 1 . 1 . 386  . .  1 . . . . .  4 .  . . . . .
## Fn1   .  . . 1 . . . . . . . . . .   . 11 2  . . . . . .  . .  . 1 . . .
Axin2<-CreateSeuratObject(counts = Axin2_scRNAseq.data, project = "Axin2", min.cells = 3, min.features = 200)
Axin2
## An object of class Seurat 
## 15859 features across 3873 samples within 1 assay 
## Active assay: RNA (15859 features, 0 variable features)
head(x=Axin2@meta.data,5)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGCCTGAGA-1      Axin2       4629         1634
## AAACCCACAGTTGTCA-1      Axin2      31098         3797
## AAACCCAGTCACCTTC-1      Axin2       4512         1610
## AAACCCATCCATACAG-1      Axin2      10249         1924
## AAACCCATCCTAGCGG-1      Axin2      10096         2068

##show QC metrics for the first 5 cells

head(x=Axin2@meta.data,5)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGCCTGAGA-1      Axin2       4629         1634
## AAACCCACAGTTGTCA-1      Axin2      31098         3797
## AAACCCAGTCACCTTC-1      Axin2       4512         1610
## AAACCCATCCATACAG-1      Axin2      10249         1924
## AAACCCATCCTAGCGG-1      Axin2      10096         2068

#Visualize QC Metrics as a violin plot

VlnPlot(object = Axin2, features = c("nFeature_RNA", "nCount_RNA"), ncol=3, pt.size = 0)

plot1<- FeatureScatter(object = Axin2, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1

#Normalization of the data

Axin2<- NormalizeData(object = Axin2, normalization.method = "LogNormalize", scale.factor = 1e4)

#Identify variable features

Axin2<-FindVariableFeatures(object = Axin2, selection.method = 'vst',nfeatuers = 2000)
## Warning: The following arguments are not used: nfeatuers

#Identify top10 variable genes and view them

top10<-head(x=VariableFeatures(object = Axin2),10)
top10
##  [1] "Ctsg"     "Dcn"      "Spp1"     "Mpo"      "Serpinf1" "Bgn"     
##  [7] "Ccl4"     "Lum"      "Elane"    "Sparc"
plot1<-VariableFeaturePlot(object = Axin2)
Plot2<-LabelPoints(plot = plot1,points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
Plot2

#Scaling the data

Shifts the expression of each gene, so that the mean expression across cells is 0 Scales the expression of each gene, so that the variance across cells is 1 The results of this are stored in pbmc[[“RNA”]]@scale.data

all.genes<-rownames(x=Axin2)
class(all.genes)
## [1] "character"
head(all.genes)
## [1] "Mrpl15"  "Lypla1"  "Gm37988" "Tcea1"   "Atp6v1h" "Rb1cc1"
Axin2 <- ScaleData(object = Axin2, features = all.genes)
## Centering and scaling data matrix

#Linear dimensional reduction

Axin2 <- RunPCA(object = Axin2,features = VariableFeatures(object=Axin2))
## PC_ 1 
## Positive:  Crispld2, Timp2, Fpr1, Il1f9, Cxcl2, Pi16, Hcar2, Acod1, Il1b, Osm 
##     Dusp1, Acta2, Asprv1, Nfkbiz, Nlrp3, Ptgs2, Ccrl2, Stfa2l1, Egr1, Gm5483 
##     Krt83, Pim1, Rsad2, Isg15, Ier3, BC100530, Bcl2a1b, Ifi27l2a, Irf7, Slc2a1 
## Negative:  Rpl3, Rpl10a, Rps20, Rps18, Rpl12, Rpl36a, Ptma, Rpl35, Rpsa, Snrpf 
##     Rpl36al, Rps2, Hint1, Rpl14, Npm1, Rpl32, Rps17, Krtcap2, Rpl15, Snrpg 
##     Rps5, Rpl22, Rps19, Rps28, Rpl13, Rack1, Rps8, Ppia, Rpl22l1, Rpl39 
## PC_ 2 
## Positive:  Stmn1, Hmgn2, Mki67, Pclaf, Smc2, Top2a, H2afz, Hist1h2ap, Cdk1, Spc24 
##     Birc5, Gmnn, Cdca8, Tuba4a, Cdca3, Fcnb, Hist1h2ae, H2afx, Prc1, Nusap1 
##     Rrm2, Knl1, Smc4, Hist1h1b, Asf1b, Kif11, Rrm1, Tyms, Ccna2, Hist1h3c 
## Negative:  Serpinf1, Dcn, Pcolce, Rcn3, Serpinh1, Mxra8, Prrx1, Adamts2, Lum, Bgn 
##     Col5a2, Cald1, Fxyd1, Gxylt2, Fgfr1, Maged1, Ptgis, Nbl1, Fkbp9, Mmp2 
##     Twist1, Pdgfra, Mrc2, Sparc, Id3, S100a16, Col1a2, Ppic, Cfh, Efemp2 
## PC_ 3 
## Positive:  Ccr2, S100a10, Ms4a6b, Irf8, Ctss, Ms4a6c, Ly6e, Mndal, S100a4, Cd48 
##     Ctsc, Ahnak, Ifi203, Ccl9, Ly86, Pld4, Rassf4, Crip1, F13a1, Itgb7 
##     Ms4a4b, Ms4a4c, Capn2, H2-DMa, Padi2, Lat2, Clec4a1, Mcub, Pdlim1, Mrpl52 
## Negative:  Camp, Mki67, Nusap1, Top2a, Spc25, Hist1h3c, Hmgn2, Cdk1, Kif11, Cdca8 
##     Prc1, Pclaf, Hist1h2ab, Hist1h4d, Birc5, Hist1h2af, Smc2, Hist1h2ap, Cebpe, Ccna2 
##     Cdca3, Tpx2, Spc24, Knl1, Fbxo5, Chil3, H2afx, Cit, Pbk, Kif15 
## PC_ 4 
## Positive:  Cpe, Slc36a2, Cadm1, Bglap, Bglap3, Car3, Cd200, Fign, Mlip, Omd 
##     Satb2, Fxyd6, Pla2g5, Insc, Cfh, Bglap2, Sp7, Tbx2, Fap, Lifr 
##     Dapk2, Robo2, Enpp6, Dmp1, Tmem176a, Rapgef4, Lmo7, Vdr, Bmp3, Ptgis 
## Negative:  Cpxm1, Aebp1, Aspn, Col3a1, Fndc1, Col6a3, Htra1, Plpp3, Sfrp2, Ahnak2 
##     Dpt, Meg3, Ramp2, Eln, Lama2, Il11ra1, Col4a1, Rhoj, Col14a1, Cdon 
##     Egfr, Nid1, Col6a1, Fbn1, Ehd2, Gstt1, Loxl2, Lamb1, Efemp1, Cd34 
## PC_ 5 
## Positive:  S100a4, Pld4, F13a1, Ms4a6c, Ctss, Ms4a4c, Ly86, Rassf4, Mcub, Klf4 
##     Fn1, Clec4a1, Irf5, Pid1, Ctsc, Stxbp6, Tifab, Msr1, Itga1, H2-DMa 
##     Plekho1, Dusp3, Clec4a3, Naaa, Wfdc17, Ccdc88a, Cd68, Cd302, Ifi207, Ccr2 
## Negative:  Nkg7, Ptprcap, Il2rb, Cst7, Skap1, Ccl5, Klre1, AW112010, Sh2d2a, Ctsw 
##     Gimap5, Sytl3, Trbc1, Klrd1, Lck, Atp1b1, Gimap1, Txk, Ncr1, Spry2 
##     Gimap4, Gimap6, Prf1, Klrk1, Klrb1c, Gzma, Gimap3, Sh2d1a, Ctla2a, H2-Q7

#Examine and Visualize PCA results

print(x=Axin2[['pca']],dims = 1:5,nfeatures = 10)
## PC_ 1 
## Positive:  Crispld2, Timp2, Fpr1, Il1f9, Cxcl2, Pi16, Hcar2, Acod1, Il1b, Osm 
## Negative:  Rpl3, Rpl10a, Rps20, Rps18, Rpl12, Rpl36a, Ptma, Rpl35, Rpsa, Snrpf 
## PC_ 2 
## Positive:  Stmn1, Hmgn2, Mki67, Pclaf, Smc2, Top2a, H2afz, Hist1h2ap, Cdk1, Spc24 
## Negative:  Serpinf1, Dcn, Pcolce, Rcn3, Serpinh1, Mxra8, Prrx1, Adamts2, Lum, Bgn 
## PC_ 3 
## Positive:  Ccr2, S100a10, Ms4a6b, Irf8, Ctss, Ms4a6c, Ly6e, Mndal, S100a4, Cd48 
## Negative:  Camp, Mki67, Nusap1, Top2a, Spc25, Hist1h3c, Hmgn2, Cdk1, Kif11, Cdca8 
## PC_ 4 
## Positive:  Cpe, Slc36a2, Cadm1, Bglap, Bglap3, Car3, Cd200, Fign, Mlip, Omd 
## Negative:  Cpxm1, Aebp1, Aspn, Col3a1, Fndc1, Col6a3, Htra1, Plpp3, Sfrp2, Ahnak2 
## PC_ 5 
## Positive:  S100a4, Pld4, F13a1, Ms4a6c, Ctss, Ms4a4c, Ly86, Rassf4, Mcub, Klf4 
## Negative:  Nkg7, Ptprcap, Il2rb, Cst7, Skap1, Ccl5, Klre1, AW112010, Sh2d2a, Ctsw
VizDimLoadings(object = Axin2,dims = 1:2, reduction = 'pca')

DimPlot(object = Axin2, reduction = 'pca')

DimHeatmap(object = Axin2,dims = 1,cells = 3000, balanced = TRUE)

##Determine the dimentionality of the dataset

ElbowPlot(object = Axin2)

#Cluster the cells

Axin2<- FindNeighbors(object = Axin2,dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
Axin2<- FindClusters(object = Axin2, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3873
## Number of edges: 129812
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9570
## Number of communities: 5
## Elapsed time: 0 seconds

#Non-linear dimentional reduction (UMAP and tSNE)

Axin2<- RunUMAP(object = Axin2,dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:03:05 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:03:05 Read 3873 rows and found 10 numeric columns
## 11:03:05 Using Annoy for neighbor search, n_neighbors = 30
## 11:03:05 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:03:05 Writing NN index file to temp file C:\Users\zjiang\AppData\Local\Temp\RtmpCung8D\file3718606e793d
## 11:03:05 Searching Annoy index using 1 thread, search_k = 3000
## 11:03:06 Annoy recall = 100%
## 11:03:07 Commencing smooth kNN distance calibration using 1 thread
## 11:03:07 Initializing from normalized Laplacian + noise
## 11:03:07 Commencing optimization for 500 epochs, with 154966 positive edges
## 11:03:16 Optimization finished
Axin2<- RunTSNE(object = Axin2,dims = 1:10)
DimPlot(object = Axin2, reduction = 'umap')

#Check the effect of sequence status, such as total read and the detected gene numbers

FeaturePlot(Axin2, features = c("nFeature_RNA", "nCount_RNA"))

#t-SNE plot

DimPlot(object = Axin2, reduction = 'tsne')

#Finding differentially expressed features (Cluster biomarkers)

Axin2.markers<- FindAllMarkers(object = Axin2, only.pos = TRUE, min.pct = 0.25,logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
Axin2.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
## # A tibble: 10 × 7
## # Groups:   cluster [5]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene     
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>    
##  1 7.59e-265       3.49 0.771 0.389 1.20e-260 0       Il1b     
##  2 1.02e-230       3.01 0.808 0.521 1.62e-226 0       Cxcl2    
##  3 0               2.02 0.996 0.938 0         1       Ltf      
##  4 0               1.53 0.987 0.882 0         1       Ifitm6   
##  5 1.07e-294       4.07 0.929 0.342 1.70e-290 2       Hist1h2ap
##  6 5.64e-187       4.03 0.686 0.178 8.95e-183 2       Prtn3    
##  7 9.70e- 13       5.99 0.489 0.336 1.54e-  8 3       Col1a1   
##  8 1.84e- 10       6.44 0.479 0.313 2.92e-  6 3       Bglap    
##  9 3.48e-148       6.99 0.972 0.044 5.52e-144 4       Mcpt8    
## 10 2.94e-113       7.06 0.861 0.046 4.66e-109 4       Prss34

#violin plot of gene expression in each cluster

VlnPlot(object = Axin2, features = c("Bglap"), slot='counts', log = TRUE, pt.size = 0)

#Draw the expression pattern of genes on UMAP

FeaturePlot(object = Axin2, features = c("Bglap"))

#Heatmap of specific genes for each cluster

Axin2.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(object = Axin2, features = top10$gene) + NoLegend()

#Name on each cluster

new.cluster.ids<- c("A","B","C","SuSC","E")
names(x=new.cluster.ids) <-levels(x=Axin2)
Axin2<-RenameIdents(object = Axin2,new.cluster.ids)
DimPlot(object = Axin2,reduction = 'umap',label = TRUE,pt.size = 0.5)