## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
## Attaching SeuratObject
##
## Attaching package: 'Seurat'
## The following object is masked from 'package:SummarizedExperiment':
##
## Assays
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
markers <- read.delim('ABC_Marker.txt', header = T) # gene metadata
metadata <- read.delim('ABC_Meta.txt', header = T) # cell metadata
expr <- read.delim('ABC_umi_matrix_7551_cells.csv', header = T, sep = ',') # expression matrix
# create seurat object ---------------
expr.t <- t(expr)
seu.obj <- CreateSeuratObject(counts = expr.t)
View(seu.obj@meta.data)
seu.obj@meta.data <- merge(seu.obj@meta.data, metadata, by.x = 'row.names', by.y = 'cell_id')
#View(seu.obj@meta.data)
seu.obj@meta.data <- seu.obj@meta.data %>%
column_to_rownames(var = 'Row.names')
seu.obj$mitopercent <- PercentageFeatureSet(seu.obj, pattern = '^MT-')
seu.obj.filtered <- subset(seu.obj, subset = nCount_RNA > 800 &
nFeature_RNA > 500 &
mitopercent < 10)
unique(seu.obj.filtered@meta.data$population)
## [1] "sp" "t" "mo" "nk" "e" "b" "n"
Idents(seu.obj.filtered) <- seu.obj.filtered$population
b.seu <- subset(seu.obj.filtered, idents = "b")
b.seu
## An object of class Seurat
## 19813 features across 1448 samples within 1 assay
## Active assay: RNA (19813 features, 0 variable features)
unique(b.seu@meta.data$redefined_cluster)
## [1] "Pre-B/Regulatory B" "Immature B" "Naive B"
## [4] "Cycling Pre-B" "Pro-B" "Memory B"
## [7] "Plasma"
## Centering and scaling data matrix
## PC_ 1
## Positive: PTMA, VPREB3, SOX4, STMN1, HMGB2, VPREB1, CD24, NUSAP1, KIAA0101, TOP2A
## CENPF, MKI67, HBB, DEK, TPX2, TYMS, AURKB, RRM2, BIRC5, H2AFY
## ZWINT, HJURP, HIST1H2AM, C21orf58, IGLL1, MME, HBA2, HBA1, TCL1A, HIST1H1B
## Negative: SLAMF7, FKBP11, SDC1, DERL3, TXNDC5, PRDX4, SEC11C, SSR4, SELM, FNDC3B
## SPAG4, FKBP2, TNFRSF17, ELL2, TRIB1, CD27, PSAP, MEI1, ZBTB38, HDLBP
## HSP90B1, VIMP, SCNN1B, HERPUD1, PARM1, MYDGF, FAM46C, SSR3, CD63, SEL1L
## PC_ 2
## Positive: TMEM154, SCIMP, GPR183, HES1, DUSP2, BCL2A1, LY9, CNTNAP2, NFKBIA, TCL1A
## MACROD2, FCRL3, BTLA, COTL1, ARL4C, SPOCK2, MPEG1, FTL, IER2, SNX10
## BACE2, RIN3, TSC22D3, IL10RA, PDE4B, CNFN, IL6, DBNDD1, IL2RA, PYHIN1
## Negative: TYMS, AURKB, NUSAP1, RRM2, TOP2A, CENPF, TPX2, MKI67, UBE2C, KIAA0101
## GTSE1, BIRC5, HJURP, ZWINT, CKAP2L, CCNB2, KIF15, CDCA8, NUCB2, UBE2T
## CDKN3, CDCA3, TK1, ESCO2, HMGB2, NCAPG, ASPM, SPC25, DLGAP5, HIST1H1B
## PC_ 3
## Positive: TMEM154, GPR183, SCIMP, CRIP1, IER2, LY9, TSC22D3, BCL2A1, NFKBIA, HERPUD1
## HES1, AURKB, DUSP2, SPC25, COTL1, CENPM, KIF15, NCAPG, CDCA8, E2F7
## UBE2T, CLECL1, RRM2, HJURP, SKA3, CNTNAP2, MCM5, RIC3, PBK, JUN
## Negative: ARPP21, C10orf10, CD9, DNTT, MME, VPREB1, SMIM3, SOX4, AKAP12, RAG1
## TOP2B, MYB, IGLL1, REXO2, ERG, SOCS2, SMAD1, HPS4, VPREB3, MT1X
## RPS4Y2, MZB1, FHIT, HMHB1, GLRX, GNG11, CD99, CMTM8, TNFRSF21, HHIP
## PC_ 4
## Positive: S100A9, S100A8, S100A12, FCN1, LYZ, CXCL8, MMP9, PTGS2, S100P, LTF
## G0S2, ANXA1, TYROBP, PLAUR, LCN2, CYP4F3, PLBD1, FYB, MNDA, HBD
## AHSP, CLEC12A, FCER1G, CAMP, SERPINA1, HBA1, CA1, HBM, GYPB, HBA2
## Negative: PPIA, PTMA, PRDX1, CD99, MIF, VPREB3, GAPDH, VPREB1, SMIM3, C10orf10
## FAM26F, COX6C, IGLL1, OST4, KRTCAP2, GLRX, CD9, DNTT, ARPP21, CMTM8
## HHIP, ADA, ERG, PEBP1, LDHB, HMGN2, HMHB1, EGFL7, HMGB1, SOCS2
## PC_ 5
## Positive: CD99, GAPDH, ERG, DNTT, S100A4, EGFL7, IFITM3, SOCS2, C5orf56, FAM26F
## GPR183, TXN, RPS4Y2, S100A6, AIF1, C10orf10, COTL1, PPIA, ADA, HPS4
## GNG11, NPY, BCL2A1, PLSCR1, CLECL1, COL5A1, TMEM154, ITGB1, CRIP1, HMHB1
## Negative: TCL1A, PCDH9, ACSM3, IGLL5, CCDC191, BMP3, NSMCE1, HRK, CCDC112, LAMP5
## TFDP2, GAS7, EMP2, HIST1H1C, DDX54, BEST3, RGS16, STMN1, RAPGEF5, HIST1H2BG
## DCAF12, CD24, RP1.34B20.21, RIMS3, ROR1, SOX4, RGS2, ZBTB18, TMEM38A, FBXO25
## Computing nearest neighbor graph
## Computing SNN
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:06:02 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:06:02 Read 1448 rows and found 30 numeric columns
## 13:06:02 Using Annoy for neighbor search, n_neighbors = 50
## 13:06:02 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:06:03 Writing NN index file to temp file /tmp/RtmpXRoSFm/file254222d6f973
## 13:06:03 Searching Annoy index using 1 thread, search_k = 5000
## 13:06:03 Annoy recall = 100%
## 13:06:03 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 50
## 13:06:04 Initializing from normalized Laplacian + noise (using irlba)
## 13:06:04 Commencing optimization for 500 epochs, with 101148 positive edges
## 13:06:06 Optimization finished

cds <- as.cell_data_set(b.seu)
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object
#Cell Metadata
colData(cds)
## DataFrame with 1448 rows and 16 columns
## orig.ident nCount_RNA nFeature_RNA celltype UMI_number
## <factor> <numeric> <integer> <character> <integer>
## immB_bBM1_L10_bar67 immB 27000 2237 immB 26975
## immB_bBM1_L10_bar68 immB 43490 2748 immB 43448
## immB_bBM1_L10_bar69 immB 10705 2282 immB 10696
## immB_bBM1_L10_bar70 immB 37543 2525 immB 37528
## immB_bBM1_L10_bar71 immB 30581 2124 immB 30577
## ... ... ... ... ... ...
## regB_bBM1_L27_bar78 regB 75205 3592 regB 75161
## regB_bBM1_L27_bar79 regB 73826 2362 regB 73772
## regB_bBM1_L27_bar80 regB 435971 5855 regB 435843
## regB_bBM1_L27_bar81 regB 71972 4209 regB 71936
## regB_bBM1_L27_bar82 regB 14328 1993 regB 14304
## gene_number individual tissue population cluster
## <integer> <character> <character> <character> <character>
## immB_bBM1_L10_bar67 2233 bBM1 bBM b B_C3
## immB_bBM1_L10_bar68 2744 bBM1 bBM b B_C4
## immB_bBM1_L10_bar69 2278 bBM1 bBM b B_C4
## immB_bBM1_L10_bar70 2523 bBM1 bBM b B_C4
## immB_bBM1_L10_bar71 2120 bBM1 bBM b B_C4
## ... ... ... ... ... ...
## regB_bBM1_L27_bar78 3585 bBM1 bBM b B_C2
## regB_bBM1_L27_bar79 2356 bBM1 bBM b B_C3
## regB_bBM1_L27_bar80 5824 bBM1 bBM b B_C2
## regB_bBM1_L27_bar81 4201 bBM1 bBM b B_C2
## regB_bBM1_L27_bar82 1985 bBM1 bBM b B_C4
## redefined_cluster mitopercent RNA_snn_res.0.9
## <character> <numeric> <factor>
## immB_bBM1_L10_bar67 Pre-B/Regulatory B 0 1
## immB_bBM1_L10_bar68 Immature B 0 1
## immB_bBM1_L10_bar69 Immature B 0 2
## immB_bBM1_L10_bar70 Immature B 0 2
## immB_bBM1_L10_bar71 Immature B 0 1
## ... ... ... ...
## regB_bBM1_L27_bar78 Cycling Pre-B 0 6
## regB_bBM1_L27_bar79 Pre-B/Regulatory B 0 1
## regB_bBM1_L27_bar80 Cycling Pre-B 0 6
## regB_bBM1_L27_bar81 Cycling Pre-B 0 6
## regB_bBM1_L27_bar82 Immature B 0 1
## seurat_clusters ident Size_Factor
## <factor> <factor> <numeric>
## immB_bBM1_L10_bar67 1 1 27000
## immB_bBM1_L10_bar68 1 1 43490
## immB_bBM1_L10_bar69 2 2 10705
## immB_bBM1_L10_bar70 2 2 37543
## immB_bBM1_L10_bar71 1 1 30581
## ... ... ... ...
## regB_bBM1_L27_bar78 6 6 75205
## regB_bBM1_L27_bar79 1 1 73826
## regB_bBM1_L27_bar80 6 6 435971
## regB_bBM1_L27_bar81 6 6 71972
## regB_bBM1_L27_bar82 1 1 14328
#Gene Metdata
fData(cds)
## DataFrame with 19813 rows and 0 columns
rownames(fData(cds))[1:10]
## [1] "OR4F5" "FO538757.3" "FO538757.2" "OR4F29" "OR4F16"
## [6] "SAMD11" "NOC2L" "KLHL17" "PLEKHN1" "PERM1"
#adding a column for "shorter gene name"
fData(cds)$gene_short_name <- rownames(fData(cds))
## [[ suppressing 32 column names 'immB_bBM1_L10_bar67', 'immB_bBM1_L10_bar68', 'immB_bBM1_L10_bar69' ... ]]
## [[ suppressing 32 column names 'immB_bBM1_L10_bar67', 'immB_bBM1_L10_bar68', 'immB_bBM1_L10_bar69' ... ]]
## [[ suppressing 32 column names 'immB_bBM1_L10_bar67', 'immB_bBM1_L10_bar68', 'immB_bBM1_L10_bar69' ... ]]
reacreate.partition <- c(rep(1,length(cds@colData@rownames)))
names(reacreate.partition) <- cds@colData@rownames
reacreate.partition <- as.factor(reacreate.partition)
cds@clusters$UMAP$partitions <- reacreate.partition
list_cluster <- b.seu@active.ident
cds@clusters$UMAP$clusters <- list_cluster
cds@int_colData@listData$reducedDims$UMAP <- b.seu@reductions$umap@cell.embeddings
cluster.before.trajectory <- plot_cells(cds,
color_cells_by = 'cluster',
label_groups_by_cluster = FALSE,
group_label_size = 5) +
theme(legend.position = "right")
## No trajectory to plot. Has learn_graph() been called yet?
## No trajectory to plot. Has learn_graph() been called yet?


cds <- order_cells(cds, reduction_method = 'UMAP', root_cells = colnames(cds[,clusters(cds) == 5]))
plot_cells(cds,
color_cells_by = 'pseudotime',
label_groups_by_cluster = FALSE,
label_branch_points = FALSE,
label_roots = FALSE,
label_leaves = FALSE)
## Cells aren't colored in a way that allows them to be grouped.


FeaturePlot(b.seu, features = c('E2F2', 'STMN1', 'CD52'))

b.seu$pseudotime <- pseudotime(cds)
Idents(b.seu) <- b.seu$redefined_cluster
FeaturePlot(b.seu, features = "pseudotime", label = T)

cluster.before.trajectory <- plot_cells(cds,
color_cells_by = 'cluster',
label_groups_by_cluster = FALSE,
group_label_size = 5) +
theme(legend.position = "right")
cluster.names <- plot_cells(cds,
color_cells_by = "redefined_cluster",
label_groups_by_cluster = FALSE,
group_label_size = 5) +
scale_color_manual(values = c('red', 'blue', 'green', 'maroon', 'yellow', 'grey', 'cyan')) +
theme(legend.position = "right")
cluster.before.trajectory | cluster.names

cluster.before.trajectory

cluster.names
