Notes about reclustering

https://github.com/satijalab/seurat/issues/1621 https://github.com/satijalab/seurat/issues/2340 https://github.com/satijalab/seurat/issues/1883

Setup

knitr::opts_knit$set(
  root.dir = ".")

library(dplyr)
library(dittoSeq)
library(ggrepel)
library(ggtree)
library(parallel)
library(plotly)  # 3D plot
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()
library(tibble)  # rownnames_to_column
library(harmony) # RunHarmony()
#options(mc.cores = detectCores() - 1)

Read in object

pigs.merged <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.merged) <- pigs.merged$merged_clusters
DefaultAssay(pigs.merged) <- "RNA"
# natural-log
pigs.merged <- NormalizeData(pigs.merged, verbose = FALSE)
pigs.merged

## An object of class Seurat 
## 42773 features across 8934 samples within 2 assays 
## Active assay: RNA (21805 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Annotated UMAP

# UMAP
u1 <- DimPlot(object = pigs.merged, 
              reduction = "umap",
              label = TRUE,
              label.box = TRUE,
              label.size = 4,
              repel = TRUE)
u1

Neurons before re-cluster

# subset
neuron <- subset(pigs.merged, merged_clusters == "Neuron")

# UMAP of neuron only 
neuron_colors <- c("gold")
u1 <- DimPlot(object = neuron, 
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 4,
        repel = TRUE, 
        cols = neuron_colors) 
u1

# show neuron cells by treatment 
u2 <- DimPlot(object = neuron, 
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 4,
        repel = TRUE, 
        group.by = "treatment",
        cols = treatment_colors) 
u2

## png 
##   2

## null device 
##           1

## pdf 
##   2

## null device 
##           1

Quality checks

Number of cells

# Visualize the number of cell counts per sample
data <- as.data.frame(table(neuron$sample))
colnames(data) <- c("sample","frequency")

ncells <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = frequency), 
            position=position_dodge(width=0.9), 
            vjust=-0.25) +
  scale_fill_manual(values = sample_colors) + 
 # scale_y_continuous(breaks = seq(0,30000, by = 5000), limits = c(0,30000)) +
  ggtitle("Cells per sample") +
  theme(legend.position =  "none") + 
  theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells

## Top transcripts

df <- data.frame(row.names = rownames(neuron))
df$rsum <- rowSums(x = neuron, slot = "counts")
df$gene_name <- rownames(df)
df <- df[order(df$rsum,decreasing = TRUE),]
head(df, 10)

##             rsum gene_name
## KK-MALAT1 220728 KK-MALAT1
## NRXN3     151323     NRXN3
## RBFOX1    126301    RBFOX1
## KCNIP4    119233    KCNIP4
## OPCML     112231     OPCML
## DPP10     101984     DPP10
## DLG2       99168      DLG2
## CADM2      98708     CADM2
## PCDH9      91867     PCDH9
## NRG3       90702      NRG3

Top variable genes

# Identify the most variable genes
neuron <- FindVariableFeatures(neuron,
                                   selection.method = "vst",  # default vst
                                   nfeatures = 2000,  # default 2000
                                   verbose = FALSE)

# view top variable genes
top40 <- head(VariableFeatures(neuron), 40)
top40

##  [1] "EYA1"               "GRIK1"              "NXPH1"             
##  [4] "TTR"                "RELN"               "RSAD2"             
##  [7] "ZNF536"             "ADARB2"             "TMEM132C"          
## [10] "SOX6"               "SST"                "FOXP2"             
## [13] "ERBB4"              "ENSSSCG00000050139" "IL1RAPL2"          
## [16] "ENSSSCG00000033089" "ENSSSCG00000004511" "ANK1"              
## [19] "CLIC6"              "NELL1"              "PRLR"              
## [22] "HTR2C"              "PDE1C"              "PTCHD4"            
## [25] "COL8A1"             "HS3ST4"             "RNF220"            
## [28] "CDH13"              "SLC35F4"            "PTPRM"             
## [31] "GRIN3A"             "SDK1"               "IDO2"              
## [34] "CEMIP"              "OPALIN"             "BTBD11"            
## [37] "ZNF385D"            "MX2"                "SORCS1"            
## [40] "LMX1A"

# plot variable features with labels
VarFeatPlot <- VariableFeaturePlot(neuron, cols = c("gray47","red"))
VarFeatPlotLabel <- LabelPoints(plot = VarFeatPlot, 
                    points = top40, repel = TRUE, fontface="italic", 
                    xnudge = 0, ynudge = 0, max.overlaps = 12)
VarFeatPlotLabel

## Median absolute deviation

Remove outliers with log-library size greater than 3 median absolute deviations (MADs) or below the median log-library size.

# MAD example
log.lib <- as.numeric(log10(neuron$nCount_RNA))
med <- median(log.lib)
abs.dev <- abs(log.lib - med)
mad <- median(abs.dev)
mad <- mad * 1.4826 # multiply by consistency cutoff
mad

## [1] 0.4667321

# stats function
mad <- mad(log.lib)
mad

## [1] 0.4667321

# remove outliers greater than 3 MADs
remove <- abs(log.lib - median(log.lib)) / mad(log.lib) > 3
table(remove)

## remove
## FALSE 
##  4517

Re-cluster

Split, transform

# split object by sample
Idents(neuron) <- neuron$sample
neuron.split <- SplitObject(neuron, split.by = "sample")

# SCTransform and regress percent.mt
neuron.split <- lapply(neuron.split, SCTransform, vars.to.regress = "percent.mt")

## Calculating cell attributes from input UMI matrix: log_umi

## Variance stabilizing transformation of count matrix of size 9945 by 129

## Model formula is y ~ log_umi

## Get Negative Binomial regression parameters per gene

## Using 2000 genes, 129 cells

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

## Found 37 outliers - those will be ignored in fitting/regularization step

## Second step: Get residuals using fitted parameters for 9945 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |======================================================================| 100%

## Computing corrected count matrix for 9945 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |======================================================================| 100%

## Calculating gene attributes

## Wall clock passed: Time difference of 7.262964 secs

## Determine variable features

## Place corrected count matrix in counts slot

## Regressing out percent.mt

## Centering data matrix

## Set default assay to SCT

## Calculating cell attributes from input UMI matrix: log_umi

## Variance stabilizing transformation of count matrix of size 13900 by 639

## Model formula is y ~ log_umi

## Get Negative Binomial regression parameters per gene

## Using 2000 genes, 639 cells

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

## Found 33 outliers - those will be ignored in fitting/regularization step

## Second step: Get residuals using fitted parameters for 13900 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |======================================================================| 100%

## Computing corrected count matrix for 13900 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |======================================================================| 100%

## Calculating gene attributes

## Wall clock passed: Time difference of 20.52171 secs

## Determine variable features

## Place corrected count matrix in counts slot

## Regressing out percent.mt

## Centering data matrix

## Set default assay to SCT

## Calculating cell attributes from input UMI matrix: log_umi

## Variance stabilizing transformation of count matrix of size 8563 by 179

## Model formula is y ~ log_umi

## Get Negative Binomial regression parameters per gene

## Using 2000 genes, 179 cells

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

## Found 41 outliers - those will be ignored in fitting/regularization step

## Second step: Get residuals using fitted parameters for 8563 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%

## Computing corrected count matrix for 8563 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%

## Calculating gene attributes

## Wall clock passed: Time difference of 7.700988 secs

## Determine variable features

## Place corrected count matrix in counts slot

## Regressing out percent.mt

## Centering data matrix

## Set default assay to SCT

## Calculating cell attributes from input UMI matrix: log_umi

## Variance stabilizing transformation of count matrix of size 19567 by 3570

## Model formula is y ~ log_umi

## Get Negative Binomial regression parameters per gene

## Using 2000 genes, 3570 cells

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%

## Found 101 outliers - those will be ignored in fitting/regularization step

## Second step: Get residuals using fitted parameters for 19567 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |======================================================================| 100%

## Computing corrected count matrix for 19567 genes

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |======================================================================| 100%

## Calculating gene attributes

## Wall clock passed: Time difference of 1.638789 mins

## Determine variable features

## Place corrected count matrix in counts slot

## Regressing out percent.mt

## Centering data matrix

## Set default assay to SCT

# Choose the features to use when integrating multiple datasets. 
# will use nfeatures as 3000 as defined by running SCTransform above
var.features <- SelectIntegrationFeatures(object.list = neuron.split, 
                                          nfeatures = 3000)

# Merge the split object
neuron.sct.merged <- merge(x = neuron.split[[1]],
                         y = c(neuron.split[[2]], neuron.split[[3]], neuron.split[[4]]),
                         project = "LPS Pigs Neurons")

# Define the variable features 
VariableFeatures(neuron.sct.merged) <- var.features

# Run PCA on the merged object
neuron.sct.merged <- RunPCA(object = neuron.sct.merged, assay = "SCT")

## PC_ 1 
## Positive:  KCNIP4, TAFA1, ARPP21, NRG1, LDB2, PDE1A, ST6GALNAC5, SLIT3, PTPRK, ENSSSCG00000011729 
##     GRM7, PCSK2, CUX2, DSCAM, SV2B, GPC6, ENSSSCG00000049709, RYR2, PHACTR1, HS3ST4 
##     RALYL, ENSSSCG00000014097, ENSSSCG00000042224, DLG1, KCNB2, KCNQ5, GALNT17, HS6ST3, ENSSSCG00000035525, NELL2 
## Negative:  ERBB4, NXPH1, PTPRM, SOX6, ZNF536, KIAA1217, GALNTL6, KCNC2, ZNF385D, PTCHD4 
##     TMEM132C, INPP4B, BTBD11, ZNF804A, GAD2, SLC6A1, ENSSSCG00000044043, ZNF804B, SDK1, GRIP1 
##     NHS, ANK1, UBASH3B, DNER, NRXN3, GAD1, ENSSSCG00000008533, KCNIP1, ENSSSCG00000004511, LSAMP 
## PC_ 2 
## Positive:  NRXN3, NXPH1, HS3ST4, DPP10, RBFOX1, SGCZ, GRIK3, KIAA1217, GALNTL6, KCNC2 
##     LRP1B, GABRG3, GRIP1, OPCML, ROBO1, SOX6, KCNMA1, KCNQ3, ASTN2, PTCHD4 
##     TRHDE, ENSSSCG00000011489, IL1RAPL1, ENSSSCG00000050971, SLC24A2, DLGAP1, ENSSSCG00000051274, SPOCK3, FGF12, TENM2 
## Negative:  ZBTB20, ENSSSCG00000017146, STAT1, SLC1A2, ENSSSCG00000044913, LRMDA, TCF7L2, NOL11, CGNL1, GLIS3 
##     SLC1A3, PARP14, MX2, NHSL1, MAML2, EPSTI1, FBXL7, PLEKHA7, ARHGAP42, FGFR2 
##     EGFR, ACSS3, CDC14A, STK3, SAT1, RBM47, EYA2, CLIC4, ETV6, ENSSSCG00000024973 
## PC_ 3 
## Positive:  HS3ST4, ZFPM2, DPP10, GRIK3, IL1RAPL2, SEMA3E, KIAA1217, LRP1B, CLSTN2, SORCS1 
##     SLC35F4, SGCZ, GRM3, GALNT9, MCTP1, TSPAN18, RASGRP1, LSAMP, FMNL2, RORA 
##     KHDRBS3, PRKCB, XYLT1, TLE4, BCAR3, DCC, SLIT1, SULF1, CCSER1, SOX5 
## Negative:  CA10, RFX3, NPAS3, TAFA1, GRIA4, RASGRF2, SLIT2, ENC1, HS6ST3, KCND2 
##     ENSSSCG00000014097, CUX2, EPHA6, UNC5D, CACNB2, KCTD16, CNTN3, CSMD3, FSTL5, CCK 
##     ERBB4, GALNT13, GABRA2, SLIT3, STPG4, ENSSSCG00000040650, CDH13, STXBP6, NXPH1, PRKG1 
## PC_ 4 
## Positive:  ERBB4, ZNF536, ZNF385D, ZNF804A, SDK1, BTBD11, ZMAT4, SGCZ, IL1RAPL1, ENSSSCG00000004511 
##     ADARB2, CNTN5, KAZN, PTCHD4, ASIC2, VAV3, GABRG3, CNTNAP5, ENSSSCG00000009543, HMCN1 
##     KLHL13, ENSSSCG00000050221, SLC6A1, PTPRO, GRIA4, KIT, SORCS3, PTPRM, IQGAP2, GPR158 
## Negative:  PDE1C, GRIK1, CACNA2D3, TRHDE, GRIN3A, THSD7B, RELN, UTRN, ROBO2, TMCC3 
##     CDH13, NELL1, EYA1, BRINP3, PARD3, GRIK3, SYNPR, SOX6, NXPH1, PLCB4 
##     GRIK2, SYTL5, SATB1, CDH9, ENSSSCG00000048562, NRXN3, GRID2, ENSSSCG00000044043, RBMS1, NPAS3 
## PC_ 5 
## Positive:  GALNTL6, ADARB2, RSPO2, ENSSSCG00000050221, SORCS1, SORCS3, ERBB4, PTPRT, ENSSSCG00000051441, KIT 
##     CCDC85A, SGCZ, GPR158, SLC35F4, CDH20, ZNF536, MACROD2, LUZP2, AMOTL1, SLC24A3 
##     KLHL13, THSD7B, ENSSSCG00000023627, NWD2, GRIK1, ZNF804B, DOCK10, RELN, RIN2, FSTL5 
## Negative:  KIAA1217, NXPH1, SOX6, TMEM132C, ENSSSCG00000004511, KCNC2, VAV3, IQGAP2, PTCHD4, ANK1 
##     IL1RAPL1, ZNF385D, PTPRM, ROBO1, DOCK4, ENSSSCG00000043464, SLIT2, GRIK3, SPOCK3, ZNF804A 
##     GRIA4, ETV6, PCDH7, SEMA3E, SULF1, RBFOX1, KCNMA1, ENSSSCG00000051588, SLIT1, PLCXD3

No integration

# get significant PCs
stdv <- neuron.sct.merged[["pca"]]@stdev
sum.stdv <- sum(neuron.sct.merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] - 
                       percent.stdv[2:length(percent.stdv)]) > 0.1), 
              decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc

## [1] 17

# run umap
neuron.nointergration <- RunUMAP(neuron.sct.merged, dims = 1:min.pc, reduction = "pca")

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 20:34:34 UMAP embedding parameters a = 0.9922 b = 1.112

## 20:34:34 Read 4517 rows and found 17 numeric columns

## 20:34:34 Using Annoy for neighbor search, n_neighbors = 30

## 20:34:34 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 20:34:34 Writing NN index file to temp file /tmp/RtmpII5OCO/file13ad539a9ead6
## 20:34:34 Searching Annoy index using 1 thread, search_k = 3000
## 20:34:36 Annoy recall = 100%
## 20:34:37 Commencing smooth kNN distance calibration using 1 thread
## 20:34:39 Initializing from normalized Laplacian + noise
## 20:34:40 Commencing optimization for 500 epochs, with 194374 positive edges
## 20:34:54 Optimization finished

# cluster
neuron.nointergration <- FindNeighbors(object = neuron.nointergration, dims = 1:min.pc)

## Computing nearest neighbor graph
## Computing SNN

# Determine the clusters for various resolutions                                
neuron.nointergration <- FindClusters(object = neuron.nointergration,resolution = seq(0.1,0.8,by=0.1))

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9493
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9280
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9162
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9041
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8936
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8839
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8744
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 17
## Elapsed time: 0 seconds

Idents(neuron.nointergration) <- "SCT_snn_res.0.1"
neuron.nointergration$seurat_clusters <- neuron.nointergration$SCT_snn_res.0.1

UMAP no integration

u1 <- dittoDimPlot(object = neuron.nointergration,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)
u1

u2 <- dittoDimPlot(object = neuron.nointergration,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             split.by = "treatment",
             labels.highlight = FALSE)
u2

# show neuron cells by treatment 
u3 <- DimPlot(object = neuron.nointergration, 
        reduction = "umap", 
        label = FALSE,
        repel = TRUE, 
        group.by = "treatment",
        cols = treatment_colors) 
u3

# show neuron cells by treatment 

remove(u1,u2,u3)

Cells per cluster

# Cells per treatment per cluster
treatment_ncells <- FetchData(neuron.nointergration, 
                     vars = c("ident", "treatment")) %>%
  dplyr::count(ident,treatment) %>%
  tidyr::spread(ident, n)
treatment_ncells

##   treatment    0   1   2   3   4   5   6  7  8  9
## 1       LPS 1873 794 246 200 150 178 161 74 47 26
## 2    Saline  567  46  10  22  56  11   3 30 23 NA

# Cells per sample per cluster
sample_ncells <- FetchData(neuron.nointergration, 
                     vars = c("ident", "sample")) %>%
  dplyr::count(ident,sample) %>%
  tidyr::spread(ident, n)
sample_ncells

##       sample    0   1   2   3   4   5   6  7  8  9
## 1  10.Saline  497  33   4  21  56   6   1 10 11 NA
## 2     12.LPS 1735 793 245 200 150 178 141 68 34 26
## 3 4.R.Saline   70  13   6   1  NA   5   2 20 12 NA
## 4    8.R.LPS  138   1   1  NA  NA  NA  20  6 13 NA

# treatment
b1 <- neuron.nointergration@meta.data %>%
  group_by(seurat_clusters, treatment) %>%
  dplyr::count() %>%
  group_by(seurat_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=treatment)) +
  geom_col() +
  scale_fill_manual(values = treatment_colors) +
  ggtitle("Percentage of treatment per cluster")
b1

# sample
b2 <- neuron.nointergration@meta.data %>%
  group_by(seurat_clusters, sample) %>%
  dplyr::count() %>%
  group_by(seurat_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=sample)) +
  geom_col() +
  #scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster")
b2

remove(b1, b2)

Integrate with harmony

# Run harmony to harmonize over samples 
neuron.integrated <- RunHarmony(object = neuron.sct.merged, 
                              group.by.vars = "sample", 
                              assay.use = "SCT",
                              reduction = "pca", 
                              plot_convergence = TRUE)

## Harmony 1/10

## Harmony 2/10

## Harmony converged after 2 iterations

## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.SCT.harmony; see ?make.names for more details
## on syntax validity

Find significant PCs

First metric

# Determine percent of variation associated with each PC
stdv <- neuron.integrated[["pca"]]@stdev
sum.stdv <- sum(neuron.integrated[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100

# Calculate cumulative percents for each PC
cumulative <- cumsum(percent.stdv)

# Determine which PC exhibits cumulative percent greater than 90% and
# and % variation associated with the PC as less than 5
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co1

## [1] 42

Second metric

# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which(
  (percent.stdv[1:length(percent.stdv) - 1] - 
     percent.stdv[2:length(percent.stdv)]) > 0.1), 
  decreasing = T)[1] + 1

# last point where change of % of variation is more than 0.1%.
co2

## [1] 17

Usually, we would choose the minimum of these two metrics as the PCs covering the majority of the variation in the data.

# Minimum of the two calculation
min.pc <- min(co1, co2)
min.pc

## [1] 17

Use min.pc we just calculated to generate the clusters. We can plot the elbow plot again and overlay the information determined using our metrics:

# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv, 
           cumu = cumulative, 
           rank = 1:length(percent.stdv))

# Elbow plot to visualize 
  ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) + 
  geom_text() + 
  geom_vline(xintercept = 90, color = "grey") + 
  geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
  theme_bw()

Run UMAP and Cluster

# Run UMAP
neuron.integrated <- RunUMAP(neuron.integrated,
                           dims = 1:min.pc,
                           reduction = "pca",
                           n.components = 3) # set to 3 to use with VR

## 20:35:10 UMAP embedding parameters a = 0.9922 b = 1.112

## 20:35:10 Read 4517 rows and found 17 numeric columns

## 20:35:10 Using Annoy for neighbor search, n_neighbors = 30

## 20:35:10 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 20:35:11 Writing NN index file to temp file /tmp/RtmpII5OCO/file13ad558400030
## 20:35:11 Searching Annoy index using 1 thread, search_k = 3000
## 20:35:12 Annoy recall = 100%
## 20:35:14 Commencing smooth kNN distance calibration using 1 thread
## 20:35:16 Initializing from normalized Laplacian + noise
## 20:35:16 Commencing optimization for 500 epochs, with 194374 positive edges
## 20:35:31 Optimization finished

# Determine the K-nearest neighbor graph
neuron.integrated <- FindNeighbors(object = neuron.integrated,dims = 1:min.pc)

## Computing nearest neighbor graph
## Computing SNN

# Determine the clusters for various resolutions                                
neuron.unannotated <- FindClusters(object = neuron.integrated,resolution = seq(0.1,0.8,by=0.1))

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9493
## Number of communities: 10
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9280
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9162
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9041
## Number of communities: 15
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8936
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8839
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8744
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4517
## Number of edges: 152001
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 17
## Elapsed time: 0 seconds

Idents(neuron.unannotated) <- "SCT_snn_res.0.1"

# save
saveRDS(neuron.unannotated,"../../rObjects/neuron_unannotated.rds")

Neurons after re-cluster

UMAP

DefaultAssay(neuron.unannotated) <- "RNA"
#neuron.unannotated <- NormalizeData(neuron.unannotated, verbose = FALSE)
neuron.unannotated$seurat_clusters <- neuron.unannotated$SCT_snn_res.0.1

u1 <- dittoDimPlot(object = neuron.unannotated,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)
u1

u2 <- dittoDimPlot(object = neuron.unannotated,
             var = "seurat_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             split.by = "treatment",
             labels.highlight = FALSE)
u2

# show neuron cells by treatment 
u3 <- DimPlot(object = neuron.unannotated, 
        reduction = "umap", 
        label = FALSE,
        repel = TRUE, 
        group.by = "treatment",
        cols = treatment_colors) 
u3

remove(u1,u2,u3)

Cluster tree

cluster_colors <- c("gold","firebrick1","dodgerblue","green",
                    "cyan","chocolate4","gray40","purple", "blue")

neuron.unannotated <- BuildClusterTree(object = neuron.unannotated,
                                     dims = 1:min.pc,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- neuron.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

## Warning: Removed 1 rows containing missing values (geom_point_g_gtree).

Cells per cluster

# Cells per treatment per cluster
treatment_ncells <- FetchData(neuron.unannotated, 
                     vars = c("ident", "treatment")) %>%
  dplyr::count(ident,treatment) %>%
  tidyr::spread(ident, n)
treatment_ncells

##   treatment    0   1   2   3   4   5   6  7  8  9
## 1       LPS 1873 794 246 200 150 178 161 74 47 26
## 2    Saline  567  46  10  22  56  11   3 30 23 NA

# Cells per sample per cluster
sample_ncells <- FetchData(neuron.unannotated, 
                     vars = c("ident", "sample")) %>%
  dplyr::count(ident,sample) %>%
  tidyr::spread(ident, n)
sample_ncells

##       sample    0   1   2   3   4   5   6  7  8  9
## 1  10.Saline  497  33   4  21  56   6   1 10 11 NA
## 2     12.LPS 1735 793 245 200 150 178 141 68 34 26
## 3 4.R.Saline   70  13   6   1  NA   5   2 20 12 NA
## 4    8.R.LPS  138   1   1  NA  NA  NA  20  6 13 NA

# treatment
b1 <- neuron.unannotated@meta.data %>%
  group_by(seurat_clusters, treatment) %>%
  dplyr::count() %>%
  group_by(seurat_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=treatment)) +
  geom_col() +
  scale_fill_manual(values = treatment_colors) +
  ggtitle("Percentage of treatment per cluster")
b1

# sample
b2 <- neuron.unannotated@meta.data %>%
  group_by(seurat_clusters, sample) %>%
  dplyr::count() %>%
  group_by(seurat_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=sample)) +
  geom_col() +
  #scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster")
b2

remove(b1, b2)

Conserved markers

# set levels
neuron.unannotated$treatment <- factor(neuron.unannotated$treatment,
                                       levels = c("LPS","Saline"))
neuron.unannotated$SCT_snn_res.0.1 <- factor(neuron.unannotated$SCT_snn_res.0.1)
# initialize df
conserved.markers <- data.frame()
all.clusters <- unique(neuron.unannotated$SCT_snn_res.0.1)

# loop through each cluster
for (i in all.clusters) {
  
  # print the cluster you're on
  print(i)
  
  # find conserved marker in cluster vs all other clusters
  markers <- FindConservedMarkers(neuron.unannotated,
                                  ident.1 = i, # subset to single cluster
                                  grouping.var = "treatment", # compare by treatment
                                  only.pos = TRUE, # only positive markers
                                  min.pct = 0.1, # default
                                  logfc.threshold = 0.25, # default
                                  test.use = "MAST"
                                  )
  
  # skip if none
  if(nrow(markers) == 0) {
    next
  }
  
  # make rownames a column
  markers <- rownames_to_column(markers, var = "gene")
  
  # make cluster number a column
  markers$cluster <- i
  
  # add to final table
  conserved.markers <- rbind(conserved.markers, markers)
}

# create delta_pct
conserved.markers$Saline_delta_pct <- abs(conserved.markers$Saline_pct.1 - 
                                         conserved.markers$Saline_pct.2)
conserved.markers$LPS_delta_pct <- abs(conserved.markers$LPS_pct.1 - 
                                            conserved.markers$LPS_pct.2)
  
# more stringent filtering
markers.strict <- conserved.markers[
  conserved.markers$Saline_delta_pct > summary(conserved.markers$Saline_delta_pct)[5],]
markers.strict <- conserved.markers[
  conserved.markers$LPS_delta_pct > summary(conserved.markers$LPS_delta_pct)[5],]
markers.strict$gene_name <- markers.strict$gene
markers.strict$row.num <- 1:nrow(markers.strict)

# compare 
table(conserved.markers$cluster)
table(markers.strict$cluster)

saveRDS(conserved.markers, paste0("../../rObjects/brain_conserved_markers_", cell, ".rds"))

conserved.markers <- readRDS(paste0("../../rObjects/brain_conserved_markers_", cell, ".rds"))
# create delta_pct
conserved.markers$Saline_delta_pct <- abs(conserved.markers$Saline_pct.1 - 
                                         conserved.markers$Saline_pct.2)
conserved.markers$LPS_delta_pct <- abs(conserved.markers$LPS_pct.1 - 
                                            conserved.markers$LPS_pct.2)
  
# more stringent filtering
markers.strict <- conserved.markers[
  conserved.markers$Saline_delta_pct > summary(conserved.markers$Saline_delta_pct)[5],]
markers.strict <- conserved.markers[
  conserved.markers$LPS_delta_pct > summary(conserved.markers$LPS_delta_pct)[5],]
markers.strict$gene_name <- markers.strict$gene
markers.strict$row.num <- 1:nrow(markers.strict)
# subset
cluster0 <- markers.strict[markers.strict$cluster == 0,]
cluster1 <- markers.strict[markers.strict$cluster == 1,]
cluster2 <- markers.strict[markers.strict$cluster == 2,]
cluster3 <- markers.strict[markers.strict$cluster == 3,]
cluster4 <- markers.strict[markers.strict$cluster == 4,]
cluster5 <- markers.strict[markers.strict$cluster == 5,]
cluster6 <- markers.strict[markers.strict$cluster == 6,]
cluster7 <- markers.strict[markers.strict$cluster == 7,]
cluster8 <- markers.strict[markers.strict$cluster == 8,]
cluster9 <- markers.strict[markers.strict$cluster == 9,]

Cluster Annotations

Cluster 0

VlnPlot(neuron.unannotated, 
        features = cluster0$gene[1:2],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

### Cluster 1

VlnPlot(neuron.unannotated, 
        features = cluster1$gene[1:20],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster1$gene[21:40],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

### Cluster 2

VlnPlot(neuron.unannotated, 
        features = cluster2$gene[1:20],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster2$gene[21:41],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

Cluster 3

VlnPlot(neuron.unannotated, 
        features = cluster3$gene[1:21],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster3$gene[21:40],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

### Cluster 4

VlnPlot(neuron.unannotated, 
        features = cluster4$gene[1:20],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster4$gene[21:40],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

### Cluster 5

VlnPlot(neuron.unannotated, 
        features = cluster5$gene[1:20],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster5$gene[21:40],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

Cluster 6

VlnPlot(neuron.unannotated, 
        features = cluster6$gene[1:20],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster6$gene[21:40],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

Cluster 7

VlnPlot(neuron.unannotated, 
        features = cluster7$gene[1:20],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster7$gene[21:29],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

### Cluster 8

VlnPlot(neuron.unannotated, 
        features = cluster8$gene[1:20],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

VlnPlot(neuron.unannotated, 
        features = cluster8$gene[21:40],
        split.by = "seurat_clusters",
        cols = cluster_colors,
        flip = TRUE,
        stack = TRUE)

# Neuron markers

# Interneuron

Annotation

# umap with annotations

Differential expression

LPS vs Saline within each cluster

cell_types <- unique(neuron.unannotated$SCT_snn_res.0.1)
neuron.unannotated$cell_type <- neuron.unannotated$SCT_snn_res.0.1
DE.df <- data.frame()

for (i in cell_types) {
  print(i)
  cluster <- subset(neuron.unannotated, cell_type == i)
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "LPS",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  if(nrow(markers) == 0) {
    next
  }
  markers$cluster <- i
  DE.df <- rbind(DE.df, markers)
}

# change column names
colnames(DE.df)[c(3,4)] <- c("percent_LPS","percent_Saline")
# add gene name column
DE.df$gene <- rownames(DE.df)
# change row names
rownames(DE.df) <- 1:nrow(DE.df)
# calculate percent difference
DE.df$percent_difference <- abs(DE.df$percent_LPS - DE.df$percent_Saline)
# reorder columns
DE.df <- DE.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(DE.df, "../../results/recluster/neuron/LPS_vs_Saline_DEGs.tsv", sep = "\t",
            quote = FALSE, row.names = FALSE)

Shiny App

Cleanup object

metadata <- neuron.unannotated@meta.data
neuron.unannotated@meta.data <- metadata
neuron.unannotated@assays$SCT@meta.features <- metadata
neuron.unannotated@assays$RNA@meta.features <- metadata

Output folder

# load libraries
library(ShinyCell)

# make shiny folder
DefaultAssay(neuron.unannotated) <- "RNA"
Idents(neuron.unannotated) <- "merged_clusters"
sc.config <- createConfig(neuron.unannotated)
makeShinyApp(neuron.unannotated, sc.config, gene.mapping = TRUE,
             shiny.title = "recluster neuron")

recluster microglia

Kennedi Todd & Kimberly Olney

3/24/2022

Notes about reclustering

Setup

Read in object

Annotated UMAP

Neurons before re-cluster

Quality checks

Number of cells

Top variable genes

Re-cluster

Split, transform

No integration

UMAP no integration

Cells per cluster

Integrate with harmony

Find significant PCs

Run UMAP and Cluster

Neurons after re-cluster

UMAP

Cluster tree

Cells per cluster

Conserved markers

Cluster Annotations

Cluster 0

Cluster 3

Cluster 6

Cluster 7

Annotation

Differential expression

LPS vs Saline within each cluster

Shiny App

Cleanup object

Output folder