Setup

knitr::opts_knit$set(
  root.dir = ".")
library(dplyr)
library(dittoSeq)
library(ggrepel)
library(ggtree)
library(parallel)
library(plotly)  # 3D plot
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()
library(tibble)  # rownnames_to_column
#options(mc.cores = detectCores() - 1)

Read in object and DEG tables

## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
## 3248 3118 3443 3400 4647 1997 2494 3479 2453 1583 1976  384 2529
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
##  452  748  692  738  945  321  638  797  888  190  684  172 1383
## 
##    0    1    2    3    4    5    6    7    8    9   10   11 
##  798  724  907  980 1443  626  670 1257  711  662  487  623
## 
##   0   1   2   3   4   5   6   7   8  10  11 
##  36 611 149 297  99 351  59 153 478  42 196

Define resolution and groups

# set levels and idents
pigs.unannotated$individual_clusters <- 
  factor(pigs.unannotated$SCT_snn_res.0.1,
         levels = c("0","1","2","3","4","5","6","7","8","9","10","11", "12"))
pigs.unannotated$treatment <- factor(pigs.unannotated$treatment,
                                     levels = c("LPS","Saline"))
Idents(pigs.unannotated) <- "individual_clusters"

# normalize if SCTtransform was used
DefaultAssay(pigs.unannotated) <- "RNA"
# natural-log
pigs.unannotated <- NormalizeData(pigs.unannotated, verbose = FALSE)

Clustering QC before annotation

Unannotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","purple",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        #label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors
        )

# Split by sample
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "sample",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

# Split by treatment 
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "treatment",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

Color blind friendly UMAP

dittoDimPlot(object = pigs.unannotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Percent cells per cluter

# treatment
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, treatment) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=treatment)) +
  geom_col() +
  scale_fill_manual(values = c("purple","gray")) +
  ggtitle("Percentage of treatment per cluster")

# sample
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, sample) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=sample)) +
  geom_col() +
  #scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster")

Cells per cluster

n_cells <- FetchData(pigs.unannotated, 
                     vars = c("ident", "treatment")) %>%
  dplyr::count(ident,treatment) %>%
  tidyr::spread(ident, n)
n_cells
##   treatment    0    1   2   3   4   5   6   7   8   9  10 11  12
## 1       LPS 2774 1070 383 375 341 373 443 138 194 118 147 72   1
## 2    Saline  487  403 368 336 252 204  38 174  24  78  10 31 100

Median absolute deviation

Remove outliers with log-library size greater than 3 median absolute deviations (MADs) or below the median log-library size.

# MAD example
log.lib <- as.numeric(log10(pigs.unannotated$nCount_RNA))
med <- median(log.lib)
abs.dev <- abs(log.lib - med)
mad <- median(abs.dev)
mad <- mad * 1.4826 # multiply by consistency cutoff
mad
## [1] 0.4023281
# stats function
mad <- mad(log.lib)
mad
## [1] 0.4023281
# remove outliers greater than 3 MADs
remove <- abs(log.lib - median(log.lib)) / mad(log.lib) > 3
table(remove)
## remove
## FALSE 
##  8934

Cluster tree

pigs.unannotated <- BuildClusterTree(object = pigs.unannotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- pigs.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], 
                        shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Cluster Annotation

Cluster 0 - Neuron - excitatory neurons

# Number of cells per condition
n_cells[,c(1,2)]
##   treatment    0
## 1       LPS 2774
## 2    Saline  487
# UMAP with only cluster 0
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        split.by = "treatment",
        group.by = "SCT_snn_res.0.1",
        cols = "dodgerblue")

# Top 40 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(pigs.unannotated,
        features = cluster0$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes of interest
# NRGN (excitatory neurons)
VlnPlot(pigs.unannotated,
        features = "NRGN",
        cols = colors,
        split.by = "individual_clusters")

# conserved cluster markers
conserved.cluster0 <- conserved.markers[conserved.markers$cluster == 0,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 1 - Astrocytes

  • GFAP - Glial fibrillary acidic protein is an intermediate filament and major component of the astrocyte cytoskeleton (Abcam)
  • SLC1A3/EAAT1/GLAST - Excitatory amino acid transporter 1 (EAAT1) is an astrocyte-specific glutamate transporter. It may also be known as glutamate aspartate Transporter (GLAST) (Abcam).
  • SLC1A2/EAAT2/GLT-1 - Excitatory amino acid transporter 2 (EAAT2) is an astrocyte-specific glutamate transporter. EAAT2 has many alternative names including solute carrier family 1 member 2 (SLC1A2) and glutamate transporter 1 (GLT-1) (Abcam)
# Number of cells per condition
n_cells[,c(1,3)]
##   treatment    1
## 1       LPS 1070
## 2    Saline  403
# UMAP with only cluster 1
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "gold")

# Top 40 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster1$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes of interest
VlnPlot(pigs.unannotated,
        features = c("AQP4","GFAP","GJA1","SLC1A2"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("ACSBG1","AQP4","CPVL","ETNPPL","GFAP","GJA1","GNA14","HRH1","HUNK",
                     "IRAG1","LAMA1","LRP4","NR2E1","PLEKHA7","RFX2","RPE65","TNC"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# Genes of interest
VlnPlot(pigs.unannotated,
        features = c("SLC1A3", "SLC1A2"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# conserved cluster markers
conserved.cluster1 <- conserved.markers[conserved.markers$cluster == 1,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 2 - Fibroblast

# Number of cells per condition
n_cells[,c(1,4)]
##   treatment   2
## 1       LPS 383
## 2    Saline 368
# UMAP with only cluster 2
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "firebrick1")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster2$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Additional genes of interest
VlnPlot(pigs.unannotated,
        features = "SLC6A13",
        cols = colors,
        split.by = "individual_clusters")

# FLT1 (endothelial cells)
VlnPlot(pigs.unannotated,
        features = "FLT1",
        cols = colors,
        split.by = "individual_clusters")

# conserved cluster markers
conserved.cluster2 <- conserved.markers[conserved.markers$cluster == 2,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 3 - Oligodendrocyte

# Number of cells per condition
n_cells[,c(1,5)]
##   treatment   3
## 1       LPS 375
## 2    Saline 336
# UMAP with only cluster 3
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "green")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster3$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Additional genes of interest
# MBP (oligodendrocytes)
VlnPlot(pigs.unannotated,
        features = "MBP",
        cols = colors,
        split.by = "individual_clusters")

VlnPlot(pigs.unannotated,
        features = c("COL1A1", "CLDN5", "OCLN"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("AGMO","CNP","ENPP2","FAM102A","LRRC71","MAG","MBP","MOG",
                     "OPALIN","PLEKHH1","PRR5L","SHROOM4","TMEM144"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster3 <- conserved.markers[conserved.markers$cluster == 3,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 4 - Mircoglia/Macrophage

# Number of cells per condition
n_cells[,c(1,6)]
##   treatment   4
## 1       LPS 341
## 2    Saline 252
# UMAP with only cluster 4
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster4$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Additional genes of interest
VlnPlot(pigs.unannotated,
        features = "P2RY12",
        cols = colors,
        split.by = "individual_clusters")

VlnPlot(pigs.unannotated,
        features = "C1QB",
        cols = colors,
        split.by = "individual_clusters")

# microglia marker
VlnPlot(pigs.unannotated,
        features = "CSF1R",
        cols = colors,
        split.by = "individual_clusters")

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("ARHGAP15","DOCK8","IKZF1","INPP5D","IRAK2","ITGAM",
                     "KYNU","LCP1","LYN","P2RY12","P2RY6","PACSIN2","PIK3R5",
                     "PLEKHA2","RUNX1","SKAP2","VAV1"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# conserved cluster markers
conserved.cluster4 <- conserved.markers[conserved.markers$cluster == 4,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 5 - Polydendrocyte

# Number of cells per condition
n_cells[,c(1,7)]
##   treatment   5
## 1       LPS 373
## 2    Saline 204
# UMAP with only cluster 5
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster5$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# VCAN (oligodendrocyte progenitor cells)
VlnPlot(pigs.unannotated,
        features = "VCAN",
        cols = colors,
        split.by = "individual_clusters")

# Polydendrocyte markers 
VlnPlot(pigs.unannotated,
        features = "OLIG1",
        cols = colors,
        split.by = "individual_clusters")

VlnPlot(pigs.unannotated,
        features = "TNR",
        cols = colors,
        split.by = "individual_clusters")

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("BCAN","CA8","CACNG4","CHST9","CSPG4","MAP3K1","MEGF11",
                     "P2RX7","PDGFRA","PLPP4","SNX22","STK32A"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster5 <- conserved.markers[conserved.markers$cluster == 5,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 6 - Interneuron

# Number of cells per condition
n_cells[,c(1,8)]
##   treatment   6
## 1       LPS 443
## 2    Saline  38
# UMAP with only cluster 6
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "cyan")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster6$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster6$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# GAD1 (inhibitory neurons), 
VlnPlot(pigs.unannotated,
        features = "GAD1",
        cols = colors,
        split.by = "individual_clusters")

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("BTBD11","CACNA2D2","ENSSSCG00000050221","GAD1","GAD2",
                     "GRIN3A","HTR2C","LHX6","PLD5","PTCHD4","RSPO2","SIAH3"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster6 <- conserved.markers[conserved.markers$cluster == 6,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster6$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 7 - Endothelial

# Number of cells per condition
n_cells[,c(1,9)]
##   treatment   7
## 1       LPS 138
## 2    Saline 174
# UMAP with only cluster 7
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "chocolate4")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster7$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster7$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("C4A","CYP2B22","IL1R1","ITGBL1","KIAA1755",
                     "NET1","RSPO3","SAMHD1","SLC25A37","SLC26A2","SLC47A1"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes co-expressed in other clusters
VlnPlot(pigs.unannotated,
        features = c("ADAMTSL3","BNC2","EYA1","NTN1","SLC16A9"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster7 <- conserved.markers[conserved.markers$cluster == 7,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster7$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 8 - Neuron

# Number of cells per condition
n_cells[,c(1,10)]
##   treatment   8
## 1       LPS 194
## 2    Saline  24
# UMAP with only cluster 8
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "pink")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster8$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster8$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = c("ADAMTS3","EPHB1","FOXP2","FRAS1","OLFML2B","P3H2","SEMA3E",
                     "SLC35F4","SYT6"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster8 <- conserved.markers[conserved.markers$cluster == 8,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster8$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 9 - Neuron

# Number of cells per condition
n_cells[,c(1,11)]
##   treatment   9
## 1       LPS 118
## 2    Saline  78
# UMAP with only cluster 9
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "orange")

VlnPlot(pigs.unannotated,
        features = cluster9$gene[1],
        cols = colors,
        split.by = "individual_clusters")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster9$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster9$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# FLT1 (endothelial cells),
VlnPlot(pigs.unannotated,
        features = "FLT1",
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("CBX6","CFL1","EEF1A2","EEF2","FBXL16","HMGCS1","HPCAL4",
                     "HSPA8","LIN7C","NEFL","OAZ1","RPL15","UBC"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster9 <- conserved.markers[conserved.markers$cluster == 9,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster9$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 10 - Interneuron

# Number of cells per condition
n_cells[,c(1,12)]
##   treatment  10
## 1       LPS 147
## 2    Saline  10
# UMAP with only cluster 10
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "darkgreen")

VlnPlot(pigs.unannotated,
        features = cluster10$gene[1],
        cols = colors,
        split.by = "individual_clusters")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster10$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster10$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = "RELN",
        cols = colors,
        split.by = "individual_clusters",
      #  stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster10 <- conserved.markers[conserved.markers$cluster == 10,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster10$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 11 - Neuron

# Number of cells per condition
n_cells[,c(1,13)]
##   treatment 11
## 1       LPS 72
## 2    Saline 31
# UMAP with only cluster 11
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "purple")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster11$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster11$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = "NRG1",
        cols = colors,
        split.by = "individual_clusters")

# conserved cluster markers
conserved.cluster11 <- conserved.markers[conserved.markers$cluster == 11,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster11$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

## Cluster 12 - Neuron

# Number of cells per condition
n_cells[,c(1,14)]
##   treatment  12
## 1       LPS   1
## 2    Saline 100
# UMAP with only cluster 11
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "deeppink")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster12$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster12$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("ENSSSCG00000041299","HS3ST2","GABRA5","VSTM2B"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# no conserved markers for cluster 12
#conserved.cluster12 <- conserved.markers[conserved.markers$cluster == 12,]
#VlnPlot(pigs.unannotated,
 #       features = conserved.cluster12$gene[1:20],
#        cols = colors,
 #       split.by = "individual_clusters",
 #       stack = TRUE,
 #       flip = TRUE)

Indiviudal cluster names

# Rename all identities
pigs.annotated <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Excitatory neuron",
                               "1" = "Astrocytes",
                               "2" = "Fibroblast",
                               "3" = "Oligodendrocyte",
                               "4" = "microglia",
                               "5" = "Polydendrocyte",
                               "6" = "Interneuron",
                               "7" = "Endothelial",
                               "8" = "Neuron 1",
                               "9" = "Neuron 2",
                               "10" = "Interneuron",
                               "11" = "Neuron 3", 
                               "12" = "Neuron 4")
pigs.annotated$individual_clusters <- Idents(pigs.annotated)

Merged cluster names

# Rename all identities
pigs.merged <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Neuron",
                               "1" = "Astrocytes",
                               "2" = "Fibroblast",
                               "3" = "Oligodendrocyte",
                               "4" = "microglia",
                               "5" = "Oligodendrocyte",
                               "6" = "Neuron",
                               "7" = "Fibroblast",
                               "8" = "Neuron",
                               "9" = "Neuron",
                               "10" = "Neuron",
                               "11" = "Neuron", 
                               "12" = "Neuron")
pigs.annotated$merged_clusters <- Idents(pigs.merged)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated
## An object of class Seurat 
## 42773 features across 8934 samples within 2 assays 
## Active assay: RNA (21805 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Pseudo bulk merge all clusters cluster names

# Rename all identities
pigs.pseudo.bulk <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Pseudo bulk",
                               "1" = "Pseudo bulk",
                               "2" = "Pseudo bulk",
                               "3" = "Pseudo bulk",
                               "4" = "Pseudo bulk",
                               "5" = "Pseudo bulk",
                               "6" = "Pseudo bulk",
                               "7" = "Pseudo bulk",
                               "8" = "Pseudo bulk",
                               "9" = "Pseudo bulk",
                               "10" = "Pseudo bulk",
                               "11" = "Pseudo bulk", 
                               "12" = "Pseudo bulk")
pigs.annotated$pseudo_bulk <- Idents(pigs.pseudo.bulk)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

Clustering QC after annotation

Annotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","purple",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
u1 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "individual_clusters",)
u1

# Plot the UMAP
u2 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "merged_clusters",)
u2

## png 
##   2
## png 
##   2
## null device 
##           1
## pdf 
##   2
## pdf 
##   2
## null device 
##           1

Color blind friendly UMAP

dittoDimPlot(object = pigs.annotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

dittoDimPlot(object = pigs.annotated,
             var = "merged_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Cluster tree

# individual clusters
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t1 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
t1

# merged clusters
Idents(pigs.annotated) <- "merged_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t2 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
t2

## png 
##   2
## png 
##   2
## null device 
##           1
## pdf 
##   2
## pdf 
##   2
## null device 
##           1

Differential expression

Individual clusters

LPS vs saline within cluster

cell.types <- unique(pigs.annotated$individual_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("LPS","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, individual_clusters == i)
  
  # skip cluster 11 - Neuron 4 (no LPS cells) Fewer than 3 cells 
  if (i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "LPS",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}
## [1] "Neuron 4"
## [1] "Interneuron"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Polydendrocyte"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Fibroblast"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Neuron 3"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Astrocytes"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Excitatory neuron"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Endothelial"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Oligodendrocyte"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "microglia"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Neuron 2"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Neuron 1"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_LPS", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_LPS - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/individual/brain_individual_DEGs_LPS_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_LPS_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)
## 
##        Astrocytes       Endothelial Excitatory neuron        Fibroblast 
##              2430              1462              1928              2311 
##       Interneuron         microglia          Neuron 1          Neuron 2 
##               358              1273                68                69 
##          Neuron 3   Oligodendrocyte    Polydendrocyte 
##                16              1190              1625

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "individual_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")
## Calculating cluster Excitatory neuron
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Astrocytes
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Fibroblast
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Oligodendrocyte
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster microglia
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Polydendrocyte
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Interneuron
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Endothelial
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Neuron 1
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Neuron 2
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Neuron 3
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Neuron 4
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)
## 
## Excitatory neuron        Astrocytes        Fibroblast   Oligodendrocyte 
##              3249              3130              3468              3445 
##         microglia    Polydendrocyte       Interneuron       Endothelial 
##              4649              2090              2540              3634 
##          Neuron 1          Neuron 2          Neuron 3          Neuron 4 
##              2536              1803               449              2574

Pseudo bulk LPS vs. saline

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "pseudo_bulk"
pigs.annotated
## An object of class Seurat 
## 42773 features across 8934 samples within 2 assays 
## Active assay: RNA (21805 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap
cell.types <- unique(pigs.annotated$pseudo_bulk)
pb.treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("LPS","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)

  # subset cluster
  cluster <- subset(pigs.annotated, pseudo_bulk == i)

  # skip cluster 11 - Neuron 4 (no LPS cells) Fewer than 3 cells 
  if (i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "LPS",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  pb.treatment.df <- rbind(pb.treatment.df, markers)
}
## [1] "Pseudo bulk"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
# reformat table
colnames(pb.treatment.df)[c(3,4)] <- c("percent_LPS", "percent_saline")
pb.treatment.df$percent_difference <- abs(pb.treatment.df$percent_LPS - pb.treatment.df$percent_saline)
pb.treatment.df <- pb.treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(pb.treatment.df,
            "../../results/DEGs/individual/brain_pseudo_bulk_DEGs_LPS_vs_saline_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
pb.treatment.df <- pb.treatment.df[pb.treatment.df$p_val_adj < 0.05,]
write.table(pb.treatment.df,
            "../../results/DEGs/individual/brain_pseudo_bulk_DEGs_LPS_vs_saline_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(pb.treatment.df$cluster)
## 
## Pseudo bulk 
##        1950

Merged clusters

LPS vs saline within cluster

cell.types <- unique(pigs.annotated$merged_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("LPS","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, merged_clusters == i)
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "LPS",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}
## [1] "Neuron"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Oligodendrocyte"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Fibroblast"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "Astrocytes"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## [1] "microglia"
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_LPS", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_LPS - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/merged/brain_merged_DEGs_LPS_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_LPS_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)
## 
##      Astrocytes      Fibroblast       microglia          Neuron Oligodendrocyte 
##            2430            2444            1273            1293            1404

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "merged_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")
## Calculating cluster Neuron
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Astrocytes
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Fibroblast
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster Oligodendrocyte
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster microglia
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)
## 
##          Neuron      Astrocytes      Fibroblast Oligodendrocyte       microglia 
##            3751            3130            3765            2888            4649

Volcanoes

df <- treatment.df
cell_types <- as.vector(unique(pigs.annotated$merged_clusters))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," LPS vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    )
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}
## Warning: Removed 2 rows containing missing values (geom_text_repel).
df <- pb.treatment.df
cell_types <- as.vector(unique(pigs.annotated$pseudo_bulk))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," LPS vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) 
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}

Check DEGs

FeaturePlot(pigs.annotated, 
            features = c("TSEN2"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

FeaturePlot(pigs.annotated, 
            features = c("ROR1","DPP10"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

Shiny App

Cleanup object

metadata <- pigs.annotated@meta.data
#metadata <- metadata[,c(34,33,2:9)]
pigs.annotated@meta.data <- metadata
pigs.annotated@assays$SCT@meta.features <- metadata
pigs.annotated@assays$RNA@meta.features <- metadata

Create output folder

# load libraries
library(ShinyCell)

# Run
DefaultAssay(pigs.annotated) <- "RNA"
Idents(pigs.annotated) <- "individual_clusters"
sc.config <- createConfig(pigs.annotated)
makeShinyApp(pigs.annotated, sc.config, gene.mapping = TRUE, shiny.title = "Brain snRNAseq")