#if (!requireNamespace("BiocManager", quietly = TRUE))
#install.packages("BiocManager")
#install.packages("devtools")
#devtools::install_github('cole-trapnell-lab/monocle3')
#install.packages('Seurat')
#remotes::install_github('satijalab/seurat-wrappers')
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.2.1
## Attaching SeuratObject
## Attaching sp
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.2.1
## Warning: package 'dplyr' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:sp':
##
## %over%
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
##
## Attaching package: 'monocle3'
## The following objects are masked from 'package:Biobase':
##
## exprs, fData, fData<-, pData, pData<-
library(SeuratWrappers)
library(ggplot2)
library(ggridges)
data <- Read10X(data.dir = "D:/ChIP seq and RNA seq analysis/Arlotta_lab scRNA/E17.5.filtered_feature_bc_matrix")
data <- CreateSeuratObject(counts = data, project='E17.5', min.cells = 3, min.features = 200)
##Check and Visualize QC metrics like mitochonrial RNA reads.
data[["percent.mt"]] <- PercentageFeatureSet(data, pattern = "^MT-")
VlnPlot(data, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
FeatureScatter(data, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
data <- subset(data, subset = nFeature_RNA >200 & nFeature_RNA <2500 & percent.mt <5)
data <- NormalizeData(data)
data <- FindVariableFeatures(data, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(data), 10)
top10
## [1] "Fabp7" "Hbb-bs" "Hba-a1" "Hba-a2" "Npy" "Mt3" "Sst" "Hbb-bt"
## [9] "Dbi" "Apoe"
plot1 <- VariableFeaturePlot(data)
LabelPoints(plot = plot1, points = top10, repel = T)
## When using repel, set xnudge and ynudge to 0 for optimal results
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 63 rows containing missing values (geom_point).
all.genes <- rownames(data)
data <- ScaleData(data, features = all.genes)
data <- RunPCA(data, features = VariableFeatures(object = data))
## PC_ 1
## Positive: Tubb3, Tmsb10, Tuba1a, Stmn2, Mllt11, Tubb2b, Cd24a, Nsg1, Mapt, Stmn1
## Map1b, Stmn3, Crmp1, Mef2c, Stmn4, Tubb5, Dpysl3, Actg1, Neurod6, Thra
## Ly6h, Mpped1, Nrep, mt-Cytb, Sox11, Serf1, Zeb2, Gpm6a, Ncam1, Actb
## Negative: Fabp7, Phgdh, Hes5, Dbi, Hmgb2, Mfge8, Mt3, Slc1a3, Aldoc, Ccnd2
## Ddah1, Pclaf, Pbk, Qk, Vim, Rgcc, Top2a, Mt1, Zfp36l1, Spc25
## Pax6, Cdca3, Birc5, Lockd, Cdk1, Ccna2, Ndrg2, Cdkn2c, Cks2, Sox9
## PC_ 2
## Positive: Igfbpl1, Sox11, Sstr2, Zbtb20, Mir99ahg, Nnat, Unc5d, Meis2, Neurod1, Sema3c
## Mdk, Elavl4, Pou3f2, Hmgn1, Malat1, Mllt3, Nrn1, Pantr1, Gm17750, Cited2
## Eomes, Mfng, Epha5, Mfap4, Pcp4, Pam, Plekhf2, 2600014E21Rik, H1f0, Nrxn3
## Negative: Gap43, Sybu, Mef2c, Tubb2a, Dync1i1, Ntm, Nme2, Opcml, Foxp1, Rac3
## Osbpl1a, S100a10, Camkv, Map1b, Stmn2, Mapt, Bend6, Crip2, Caly, Cadm2
## Ldha, Thra, Grin2b, Rprm, Hmgcs1, Ppp1r1a, Fdps, Ppia, Ssbp2, Fabp3
## PC_ 3
## Positive: Spc25, Top2a, Ube2c, Nusap1, Birc5, Ccnb1, Pbk, Ccna2, Cks2, Cdca3
## Cdk1, Cenpf, Stmn1, Prc1, Aurkb, Sgo1, Kif22, Cdca8, Tpx2, Pclaf
## Ckap2l, Cenpe, Cenpa, Cdc20, Spc24, Mis18bp1, Kif2c, Hmmr, Aurka, Ccnb2
## Negative: Aldoc, Apoe, Sparc, Ddah1, Mt1, Ndrg2, Mt3, Atp1a2, Tnc, Slc1a3
## Rgcc, Slc9a3r1, S100a1, Ttyh1, Pdpn, Pea15a, Mlc1, Trim47, Mfge8, Gm11627
## Fabp7, Mt2, Baalc, Ccn1, AW047730, Tst, Phgdh, S100a11, Vim, Ccdc80
## PC_ 4
## Positive: Ptn, Pantr1, 9130024F11Rik, Neurod6, Neurod2, Ttc28, Dok5, Zbtb18, Pou3f2, Satb2
## Nfix, Tmsb4x, Hs6st2, Gria2, Tsc22d1, Abracl, Gpm6a, Eif1b, Igfbpl1, Id2
## Pou3f3, Limch1, Frmd4a, Tafa2, Mn1, Bhlhe22, Dab1, Mdk, Plxna4, Mir99ahg
## Negative: Meg3, Nrxn3, Rpp25, Maf, Lhx6, Gm14204, Dlx2, Npas1, Dlx5, Mafb
## Arx, Sst, Nxph1, Dlx1, Gad2, Cadm1, Neto1, Slc32a1, Elmo1, Bcl11b
## Akap7, Tshz3, Gria1, Atp1b1, Celf4, Junb, Sox2ot, Erbb4, Cbfa2t3, Rian
## PC_ 5
## Positive: Nfib, Ppp1r1b, Meis2, Islr2, Pcp4, Sox5, Hs3st4, Tle4, Rprm, Wnt7b
## Nfia, Foxp2, Ppp2r2b, Nxph3, Nfix, Xpr1, Nfe2l3, Fezf2, Tmem108, Ramp3
## Ephb1, Kctd12, Sstr2, Id2, Kif26a, Lmo7, Nxph4, Syt6, Gpr22, Baiap2
## Negative: Rpp25, Maf, Nrxn3, Lhx6, Ptn, Gucy1a1, Gm14204, Nxph1, Npas1, Bzw2
## Tafa2, Arx, Mafb, Mif, Sst, Dlx2, Dok5, Dlx5, Dlx1, Mef2c
## Gpm6a, Inhba, Gad2, Runx1t1, Gria1, Limch1, Map1b, Sox2ot, Eif1b, Cux1
DimHeatmap(data, dims = 1:6, cells = 500, balanced = T)
ElbowPlot(data)
data <- FindNeighbors(data, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
data <- FindClusters(data, resolution = c(0.3, 0.5, 0.7, 1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8401
## Number of edges: 326259
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9365
## Number of communities: 11
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8401
## Number of edges: 326259
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9142
## Number of communities: 15
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8401
## Number of edges: 326259
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8934
## Number of communities: 16
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8401
## Number of edges: 326259
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8696
## Number of communities: 19
## Elapsed time: 1 seconds
head(data@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGCAGACTG-1 E17.5 5022 2087 0
## AAACCTGAGCCAGAAC-1 E17.5 3563 1706 0
## AAACCTGAGCTACCGC-1 E17.5 3414 1543 0
## AAACCTGAGGCAATTA-1 E17.5 5643 2149 0
## AAACCTGAGTGGGATC-1 E17.5 2551 1298 0
## AAACCTGAGTGTCTCA-1 E17.5 3733 1678 0
## RNA_snn_res.0.3 RNA_snn_res.0.5 RNA_snn_res.0.7
## AAACCTGAGCAGACTG-1 0 0 0
## AAACCTGAGCCAGAAC-1 4 8 5
## AAACCTGAGCTACCGC-1 0 0 0
## AAACCTGAGGCAATTA-1 2 2 2
## AAACCTGAGTGGGATC-1 0 0 0
## AAACCTGAGTGTCTCA-1 5 3 3
## RNA_snn_res.1 seurat_clusters
## AAACCTGAGCAGACTG-1 1 1
## AAACCTGAGCCAGAAC-1 8 8
## AAACCTGAGCTACCGC-1 2 2
## AAACCTGAGGCAATTA-1 4 4
## AAACCTGAGTGGGATC-1 2 2
## AAACCTGAGTGTCTCA-1 3 3
p1 <- DimPlot(data, group.by = "RNA_snn_res.0.3", label = T)
p2 <- DimPlot(data, group.by = "RNA_snn_res.0.5", label = T)
p3 <- DimPlot(data, group.by = "RNA_snn_res.0.7", label = T)
p4 <- DimPlot(data, group.by = "RNA_snn_res.1", label = T)
p1 + p2 + p3 + p4
Idents(data) <- "RNA_snn_res.0.7"
data <- RunUMAP(data, dims = 1:20)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 14:18:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:18:14 Read 8401 rows and found 20 numeric columns
## 14:18:14 Using Annoy for neighbor search, n_neighbors = 30
## 14:18:14 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 14:18:15 Writing NN index file to temp file C:\Users\Lenovo\AppData\Local\Temp\Rtmp2jaCs0\file126417c81007
## 14:18:15 Searching Annoy index using 1 thread, search_k = 3000
## 14:18:18 Annoy recall = 100%
## 14:18:19 Commencing smooth kNN distance calibration using 1 thread
## 14:18:20 Initializing from normalized Laplacian + noise
## 14:18:21 Commencing optimization for 500 epochs, with 356346 positive edges
## 14:18:44 Optimization finished
DimPlot(data, reduction = "umap")
VlnPlot(data, features = c("Pax6", "Rbfox1"), slot = "counts", log = TRUE)
FeaturePlot(data, features = c("Pax6", "Eomes", "Aldh1l1",
"Tbr1", "Olig2", "Sox2", "Cux2", "Neurog2"))
#new.cluster.ids <- c("A progenitors","B progenitors", "Cnn2+ Msx1+ population" , "B progenitors", "Putative early neuronal population", "Lmx1a+ population",
# "Intermediate Projenitors")
#names(new.cluster.ids) <- levels(data)
#data <- RenameIdents(data, new.cluster.ids)
#DimPlot(data, reduction = "umap", label = TRUE, pt.size = 0.5)
cds <- as.cell_data_set(data)
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object
head(colData(cds))
## DataFrame with 6 rows and 11 columns
## orig.ident nCount_RNA nFeature_RNA percent.mt
## <factor> <numeric> <integer> <numeric>
## AAACCTGAGCAGACTG-1 E17.5 5022 2087 0
## AAACCTGAGCCAGAAC-1 E17.5 3563 1706 0
## AAACCTGAGCTACCGC-1 E17.5 3414 1543 0
## AAACCTGAGGCAATTA-1 E17.5 5643 2149 0
## AAACCTGAGTGGGATC-1 E17.5 2551 1298 0
## AAACCTGAGTGTCTCA-1 E17.5 3733 1678 0
## RNA_snn_res.0.3 RNA_snn_res.0.5 RNA_snn_res.0.7
## <factor> <factor> <factor>
## AAACCTGAGCAGACTG-1 0 0 0
## AAACCTGAGCCAGAAC-1 4 8 5
## AAACCTGAGCTACCGC-1 0 0 0
## AAACCTGAGGCAATTA-1 2 2 2
## AAACCTGAGTGGGATC-1 0 0 0
## AAACCTGAGTGTCTCA-1 5 3 3
## RNA_snn_res.1 seurat_clusters ident Size_Factor
## <factor> <factor> <factor> <numeric>
## AAACCTGAGCAGACTG-1 1 1 0 5022
## AAACCTGAGCCAGAAC-1 8 8 5 3563
## AAACCTGAGCTACCGC-1 2 2 0 3414
## AAACCTGAGGCAATTA-1 4 4 2 5643
## AAACCTGAGTGGGATC-1 2 2 0 2551
## AAACCTGAGTGTCTCA-1 3 3 3 3733
fData(cds)
## DataFrame with 17008 rows and 0 columns
rownames(fData(cds))[1:10]
## [1] "Xkr4" "Gm19938" "Rp1" "Sox17" "Mrpl15" "Lypla1" "Tcea1"
## [8] "Rgs20" "Atp6v1h" "Rb1cc1"
fData(cds)$gene_short_name <- rownames(fData(cds))
head(fData(cds))
## DataFrame with 6 rows and 1 column
## gene_short_name
## <character>
## Xkr4 Xkr4
## Gm19938 Gm19938
## Rp1 Rp1
## Sox17 Sox17
## Mrpl15 Mrpl15
## Lypla1 Lypla1
head(counts(cds))
## [[ suppressing 8401 column names 'AAACCTGAGCAGACTG-1', 'AAACCTGAGCCAGAAC-1', 'AAACCTGAGCTACCGC-1' ... ]]
recreate.partitions <- c(rep(1, length(cds@colData@rownames)))
names(recreate.partitions) <- cds@colData@rownames
recreate.partitions <- as.factor(recreate.partitions)
recreate.partitions
cds@clusters@listData[["UMAP"]][["partitions"]] <- recreate.partitions
list.cluster <- data@active.ident
cds@clusters@listData[["UMAP"]][["clusters"]] <- list.cluster
cds@int_colData@listData[["reducedDims"]]@listData[["UMAP"]] <- data@reductions$umap@cell.embeddings
cluster.before.traj <-plot_cells(cds, color_cells_by = "cluster", label_groups_by_cluster = F,
group_label_size = 5) + theme(legend.position = "right")
## No trajectory to plot. Has learn_graph() been called yet?
cluster.before.traj
cds <- learn_graph(cds, use_partition = F)
##
|
| | 0%
|
|======================================================================| 100%
## Warning in igraph::graph.dfs(stree_ori, root = root_cell, neimode = "all", :
## Argument `neimode' is deprecated; use `mode' instead
plot_cells(cds, color_cells_by = "cluster", label_groups_by_cluster = F,
label_branch_points = T, label_roots = T, label_leaves = F,
group_label_size = 5)
cds <- order_cells(cds, reduction_method = "UMAP", root_cells = colnames(cds[, clusters(cds) == 5]))
plot_cells(cds, color_cells_by = "pseudotime", label_groups_by_cluster = T,
label_branch_points = T, label_roots = F, label_leaves = F)
## Cells aren't colored in a way that allows them to be grouped.
head(pseudotime(cds), 10)
## AAACCTGAGCAGACTG-1 AAACCTGAGCCAGAAC-1 AAACCTGAGCTACCGC-1 AAACCTGAGGCAATTA-1
## 24.50532906 0.04643948 22.93558701 33.49121137
## AAACCTGAGTGGGATC-1 AAACCTGAGTGTCTCA-1 AAACCTGCAAAGAATC-1 AAACCTGCACATCCAA-1
## 20.18428700 47.71342517 12.55716513 54.82340234
## AAACCTGCACATGGGA-1 AAACCTGCACGAAATA-1
## 23.75238886 26.69687864
cds$monocle3_pseudotime <- pseudotime(cds)
data.pseudo <- as.data.frame(colData(cds))
ggplot(data.pseudo, aes(monocle3_pseudotime, seurat_clusters, fill = seurat_clusters)) + geom_boxplot()
ggplot(data.pseudo, aes(monocle3_pseudotime, reorder(seurat_clusters, monocle3_pseudotime), fill = seurat_clusters)) + geom_boxplot()
deg <- graph_test(cds, neighbor_graph = "principal_graph")
deg %>% arrange(q_value) %>% filter(status == "OK") %>% head()
FeaturePlot(data, features = c("Rgs20", "Bend6", "Mcm3", "B3gat2"))
data$pseudotime <- pseudotime(cds)
FeaturePlot(data, features = "pseudotime")
RidgePlot(data, features = c("Sox2", "Eomes", "Neurod2"), sort = T, idents = c("5", "6", "0", "1", "7"))
## Picking joint bandwidth of 0.0446
## Picking joint bandwidth of 0.0551
## Picking joint bandwidth of 0.148
my_genes <- row.names(subset(fData(cds), gene_short_name %in% c("Sox2", "Eomes", "Neurod2")))
cds_subset <- cds[my_genes,]
plot_genes_in_pseudotime(cds_subset, color_cells_by = "monocle3_pseudotime" )
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: Removed 25203 rows containing missing values (geom_point).