R Notebook

## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)

## Attaching SeuratObject

## 
## Attaching package: 'Seurat'

## The following object is masked from 'package:SummarizedExperiment':
## 
##     Assays

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

markers <- read.delim('ABC_Marker.txt', header = T) # gene metadata
metadata <- read.delim('ABC_Meta.txt', header = T) # cell metadata
expr <- read.delim('ABC_umi_matrix_7551_cells.csv', header = T, sep = ',') # expression matrix

# create seurat object ---------------
expr.t <- t(expr)
seu.obj <- CreateSeuratObject(counts = expr.t)
View(seu.obj@meta.data)
seu.obj@meta.data <- merge(seu.obj@meta.data, metadata, by.x = 'row.names', by.y = 'cell_id')
#View(seu.obj@meta.data)
seu.obj@meta.data <- seu.obj@meta.data %>% 
  column_to_rownames(var = 'Row.names')

seu.obj$mitopercent <- PercentageFeatureSet(seu.obj, pattern = '^MT-')
seu.obj.filtered <- subset(seu.obj, subset = nCount_RNA > 800 &
                    nFeature_RNA > 500 &
                    mitopercent < 10)

unique(seu.obj.filtered@meta.data$population)

## [1] "sp" "t"  "mo" "nk" "e"  "b"  "n"

Idents(seu.obj.filtered) <- seu.obj.filtered$population
b.seu <- subset(seu.obj.filtered, idents = "b")
b.seu

## An object of class Seurat 
## 19813 features across 1448 samples within 1 assay 
## Active assay: RNA (19813 features, 0 variable features)

unique(b.seu@meta.data$redefined_cluster)

## [1] "Pre-B/Regulatory B" "Immature B"         "Naive B"           
## [4] "Cycling Pre-B"      "Pro-B"              "Memory B"          
## [7] "Plasma"

## Centering and scaling data matrix

## PC_ 1 
## Positive:  PTMA, VPREB3, SOX4, STMN1, HMGB2, VPREB1, CD24, NUSAP1, KIAA0101, TOP2A 
##     CENPF, MKI67, HBB, DEK, TPX2, TYMS, AURKB, RRM2, BIRC5, H2AFY 
##     ZWINT, HJURP, HIST1H2AM, C21orf58, IGLL1, MME, HBA2, HBA1, TCL1A, HIST1H1B 
## Negative:  SLAMF7, FKBP11, SDC1, DERL3, TXNDC5, PRDX4, SEC11C, SSR4, SELM, FNDC3B 
##     SPAG4, FKBP2, TNFRSF17, ELL2, TRIB1, CD27, PSAP, MEI1, ZBTB38, HDLBP 
##     HSP90B1, VIMP, SCNN1B, HERPUD1, PARM1, MYDGF, FAM46C, SSR3, CD63, SEL1L 
## PC_ 2 
## Positive:  TMEM154, SCIMP, GPR183, HES1, DUSP2, BCL2A1, LY9, CNTNAP2, NFKBIA, TCL1A 
##     MACROD2, FCRL3, BTLA, COTL1, ARL4C, SPOCK2, MPEG1, FTL, IER2, SNX10 
##     BACE2, RIN3, TSC22D3, IL10RA, PDE4B, CNFN, IL6, DBNDD1, IL2RA, PYHIN1 
## Negative:  TYMS, AURKB, NUSAP1, RRM2, TOP2A, CENPF, TPX2, MKI67, UBE2C, KIAA0101 
##     GTSE1, BIRC5, HJURP, ZWINT, CKAP2L, CCNB2, KIF15, CDCA8, NUCB2, UBE2T 
##     CDKN3, CDCA3, TK1, ESCO2, HMGB2, NCAPG, ASPM, SPC25, DLGAP5, HIST1H1B 
## PC_ 3 
## Positive:  TMEM154, GPR183, SCIMP, CRIP1, IER2, LY9, TSC22D3, BCL2A1, NFKBIA, HERPUD1 
##     HES1, AURKB, DUSP2, SPC25, COTL1, CENPM, KIF15, NCAPG, CDCA8, E2F7 
##     UBE2T, CLECL1, RRM2, HJURP, SKA3, CNTNAP2, MCM5, RIC3, PBK, JUN 
## Negative:  ARPP21, C10orf10, CD9, DNTT, MME, VPREB1, SMIM3, SOX4, AKAP12, RAG1 
##     TOP2B, MYB, IGLL1, REXO2, ERG, SOCS2, SMAD1, HPS4, VPREB3, MT1X 
##     RPS4Y2, MZB1, FHIT, HMHB1, GLRX, GNG11, CD99, CMTM8, TNFRSF21, HHIP 
## PC_ 4 
## Positive:  S100A9, S100A8, S100A12, FCN1, LYZ, CXCL8, MMP9, PTGS2, S100P, LTF 
##     G0S2, ANXA1, TYROBP, PLAUR, LCN2, CYP4F3, PLBD1, FYB, MNDA, HBD 
##     AHSP, CLEC12A, FCER1G, CAMP, SERPINA1, HBA1, CA1, HBM, GYPB, HBA2 
## Negative:  PPIA, PTMA, PRDX1, CD99, MIF, VPREB3, GAPDH, VPREB1, SMIM3, C10orf10 
##     FAM26F, COX6C, IGLL1, OST4, KRTCAP2, GLRX, CD9, DNTT, ARPP21, CMTM8 
##     HHIP, ADA, ERG, PEBP1, LDHB, HMGN2, HMHB1, EGFL7, HMGB1, SOCS2 
## PC_ 5 
## Positive:  CD99, GAPDH, ERG, DNTT, S100A4, EGFL7, IFITM3, SOCS2, C5orf56, FAM26F 
##     GPR183, TXN, RPS4Y2, S100A6, AIF1, C10orf10, COTL1, PPIA, ADA, HPS4 
##     GNG11, NPY, BCL2A1, PLSCR1, CLECL1, COL5A1, TMEM154, ITGB1, CRIP1, HMHB1 
## Negative:  TCL1A, PCDH9, ACSM3, IGLL5, CCDC191, BMP3, NSMCE1, HRK, CCDC112, LAMP5 
##     TFDP2, GAS7, EMP2, HIST1H1C, DDX54, BEST3, RGS16, STMN1, RAPGEF5, HIST1H2BG 
##     DCAF12, CD24, RP1.34B20.21, RIMS3, ROR1, SOX4, RGS2, ZBTB18, TMEM38A, FBXO25

## Computing nearest neighbor graph

## Computing SNN

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 13:06:02 UMAP embedding parameters a = 0.9922 b = 1.112

## 13:06:02 Read 1448 rows and found 30 numeric columns

## 13:06:02 Using Annoy for neighbor search, n_neighbors = 50

## 13:06:02 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 13:06:03 Writing NN index file to temp file /tmp/RtmpXRoSFm/file254222d6f973
## 13:06:03 Searching Annoy index using 1 thread, search_k = 5000
## 13:06:03 Annoy recall = 100%
## 13:06:03 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 50
## 13:06:04 Initializing from normalized Laplacian + noise (using irlba)
## 13:06:04 Commencing optimization for 500 epochs, with 101148 positive edges
## 13:06:06 Optimization finished

cds <- as.cell_data_set(b.seu)

## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object

#Cell Metadata
colData(cds)

## DataFrame with 1448 rows and 16 columns
##                     orig.ident nCount_RNA nFeature_RNA    celltype UMI_number
##                       <factor>  <numeric>    <integer> <character>  <integer>
## immB_bBM1_L10_bar67       immB      27000         2237        immB      26975
## immB_bBM1_L10_bar68       immB      43490         2748        immB      43448
## immB_bBM1_L10_bar69       immB      10705         2282        immB      10696
## immB_bBM1_L10_bar70       immB      37543         2525        immB      37528
## immB_bBM1_L10_bar71       immB      30581         2124        immB      30577
## ...                        ...        ...          ...         ...        ...
## regB_bBM1_L27_bar78       regB      75205         3592        regB      75161
## regB_bBM1_L27_bar79       regB      73826         2362        regB      73772
## regB_bBM1_L27_bar80       regB     435971         5855        regB     435843
## regB_bBM1_L27_bar81       regB      71972         4209        regB      71936
## regB_bBM1_L27_bar82       regB      14328         1993        regB      14304
##                     gene_number  individual      tissue  population     cluster
##                       <integer> <character> <character> <character> <character>
## immB_bBM1_L10_bar67        2233        bBM1         bBM           b        B_C3
## immB_bBM1_L10_bar68        2744        bBM1         bBM           b        B_C4
## immB_bBM1_L10_bar69        2278        bBM1         bBM           b        B_C4
## immB_bBM1_L10_bar70        2523        bBM1         bBM           b        B_C4
## immB_bBM1_L10_bar71        2120        bBM1         bBM           b        B_C4
## ...                         ...         ...         ...         ...         ...
## regB_bBM1_L27_bar78        3585        bBM1         bBM           b        B_C2
## regB_bBM1_L27_bar79        2356        bBM1         bBM           b        B_C3
## regB_bBM1_L27_bar80        5824        bBM1         bBM           b        B_C2
## regB_bBM1_L27_bar81        4201        bBM1         bBM           b        B_C2
## regB_bBM1_L27_bar82        1985        bBM1         bBM           b        B_C4
##                      redefined_cluster mitopercent RNA_snn_res.0.9
##                            <character>   <numeric>        <factor>
## immB_bBM1_L10_bar67 Pre-B/Regulatory B           0               1
## immB_bBM1_L10_bar68         Immature B           0               1
## immB_bBM1_L10_bar69         Immature B           0               2
## immB_bBM1_L10_bar70         Immature B           0               2
## immB_bBM1_L10_bar71         Immature B           0               1
## ...                                ...         ...             ...
## regB_bBM1_L27_bar78      Cycling Pre-B           0               6
## regB_bBM1_L27_bar79 Pre-B/Regulatory B           0               1
## regB_bBM1_L27_bar80      Cycling Pre-B           0               6
## regB_bBM1_L27_bar81      Cycling Pre-B           0               6
## regB_bBM1_L27_bar82         Immature B           0               1
##                     seurat_clusters    ident Size_Factor
##                            <factor> <factor>   <numeric>
## immB_bBM1_L10_bar67               1        1       27000
## immB_bBM1_L10_bar68               1        1       43490
## immB_bBM1_L10_bar69               2        2       10705
## immB_bBM1_L10_bar70               2        2       37543
## immB_bBM1_L10_bar71               1        1       30581
## ...                             ...      ...         ...
## regB_bBM1_L27_bar78               6        6       75205
## regB_bBM1_L27_bar79               1        1       73826
## regB_bBM1_L27_bar80               6        6      435971
## regB_bBM1_L27_bar81               6        6       71972
## regB_bBM1_L27_bar82               1        1       14328

#Gene Metdata
fData(cds)

## DataFrame with 19813 rows and 0 columns

rownames(fData(cds))[1:10]

##  [1] "OR4F5"      "FO538757.3" "FO538757.2" "OR4F29"     "OR4F16"    
##  [6] "SAMD11"     "NOC2L"      "KLHL17"     "PLEKHN1"    "PERM1"

#adding a column for "shorter gene name"
fData(cds)$gene_short_name <- rownames(fData(cds))

##   [[ suppressing 32 column names 'immB_bBM1_L10_bar67', 'immB_bBM1_L10_bar68', 'immB_bBM1_L10_bar69' ... ]]
##   [[ suppressing 32 column names 'immB_bBM1_L10_bar67', 'immB_bBM1_L10_bar68', 'immB_bBM1_L10_bar69' ... ]]
##   [[ suppressing 32 column names 'immB_bBM1_L10_bar67', 'immB_bBM1_L10_bar68', 'immB_bBM1_L10_bar69' ... ]]

reacreate.partition <- c(rep(1,length(cds@colData@rownames)))
names(reacreate.partition) <- cds@colData@rownames
reacreate.partition <- as.factor(reacreate.partition)

cds@clusters$UMAP$partitions <- reacreate.partition

list_cluster <- b.seu@active.ident
cds@clusters$UMAP$clusters <- list_cluster

cds@int_colData@listData$reducedDims$UMAP <- b.seu@reductions$umap@cell.embeddings

cluster.before.trajectory <- plot_cells(cds,
           color_cells_by = 'cluster',
           label_groups_by_cluster = FALSE,
           group_label_size = 5) +
  theme(legend.position = "right")

## No trajectory to plot. Has learn_graph() been called yet?

## No trajectory to plot. Has learn_graph() been called yet?

cds <- order_cells(cds, reduction_method = 'UMAP', root_cells = colnames(cds[,clusters(cds) == 5]))

plot_cells(cds,
           color_cells_by = 'pseudotime',
           label_groups_by_cluster = FALSE,
           label_branch_points = FALSE,
           label_roots = FALSE,
           label_leaves = FALSE)

## Cells aren't colored in a way that allows them to be grouped.

FeaturePlot(b.seu, features = c('E2F2', 'STMN1', 'CD52'))

b.seu$pseudotime <- pseudotime(cds)
Idents(b.seu) <- b.seu$redefined_cluster
FeaturePlot(b.seu, features = "pseudotime", label = T)

cluster.before.trajectory <- plot_cells(cds,
           color_cells_by = 'cluster',
           label_groups_by_cluster = FALSE,
           group_label_size = 5) +
  theme(legend.position = "right")

cluster.names <- plot_cells(cds,
           color_cells_by = "redefined_cluster",
           label_groups_by_cluster = FALSE,
           group_label_size = 5) +
  scale_color_manual(values = c('red', 'blue', 'green', 'maroon', 'yellow', 'grey', 'cyan')) +
  theme(legend.position = "right")

cluster.before.trajectory | cluster.names

cluster.before.trajectory

cluster.names