Setup

knitr::opts_knit$set(
  root.dir = ".")

library(dplyr)
library(dittoSeq)
library(ggrepel)
library(ggtree)
library(parallel)
library(plotly)  # 3D plot
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()
library(tibble)  # rownnames_to_column
#options(mc.cores = detectCores() - 1)

Read in object and DEG tables

## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
## 3248 3118 3443 3400 4647 1997 2494 3479 2453 1583 1976  384 2529

## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
##  452  748  692  738  945  321  638  797  888  190  684  172 1383

## 
##    0    1    2    3    4    5    6    7    8    9   10   11 
##  798  724  907  980 1443  626  670 1257  711  662  487  623

## 
##   0   1   2   3   4   5   6   7   8  10  11 
##  36 611 149 297  99 351  59 153 478  42 196

Define resolution and groups

# set levels and idents
pigs.unannotated$individual_clusters <- 
  factor(pigs.unannotated$SCT_snn_res.0.1,
         levels = c("0","1","2","3","4","5","6","7","8","9","10","11", "12"))
pigs.unannotated$treatment <- factor(pigs.unannotated$treatment,
                                     levels = c("LPS","Saline"))
Idents(pigs.unannotated) <- "individual_clusters"

# normalize if SCTtransform was used
DefaultAssay(pigs.unannotated) <- "RNA"
# natural-log
pigs.unannotated <- NormalizeData(pigs.unannotated, verbose = FALSE)

Clustering QC before annotation

Unannotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","purple",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        #label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors
        )

# Split by sample
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "sample",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

# Split by treatment 
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "treatment",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

Color blind friendly UMAP

dittoDimPlot(object = pigs.unannotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Percent cells per cluter

# treatment
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, treatment) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=treatment)) +
  geom_col() +
  scale_fill_manual(values = c("purple","gray")) +
  ggtitle("Percentage of treatment per cluster")

# sample
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, sample) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=sample)) +
  geom_col() +
  #scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster")

Cells per cluster

n_cells <- FetchData(pigs.unannotated, 
                     vars = c("ident", "treatment")) %>%
  dplyr::count(ident,treatment) %>%
  tidyr::spread(ident, n)
n_cells

##   treatment    0    1   2   3   4   5   6   7   8   9  10 11  12
## 1       LPS 2774 1070 383 375 341 373 443 138 194 118 147 72   1
## 2    Saline  487  403 368 336 252 204  38 174  24  78  10 31 100

Median absolute deviation

Remove outliers with log-library size greater than 3 median absolute deviations (MADs) or below the median log-library size.

# MAD example
log.lib <- as.numeric(log10(pigs.unannotated$nCount_RNA))
med <- median(log.lib)
abs.dev <- abs(log.lib - med)
mad <- median(abs.dev)
mad <- mad * 1.4826 # multiply by consistency cutoff
mad

## [1] 0.4023281

# stats function
mad <- mad(log.lib)
mad

## [1] 0.4023281

# remove outliers greater than 3 MADs
remove <- abs(log.lib - median(log.lib)) / mad(log.lib) > 3
table(remove)

## remove
## FALSE 
##  8934

Cluster tree

pigs.unannotated <- BuildClusterTree(object = pigs.unannotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- pigs.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], 
                        shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Cluster Annotation

Cluster 0 - Neuron - excitatory neurons

# Number of cells per condition
n_cells[,c(1,2)]

##   treatment    0
## 1       LPS 2774
## 2    Saline  487

# UMAP with only cluster 0
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        split.by = "treatment",
        group.by = "SCT_snn_res.0.1",
        cols = "dodgerblue")

# Top 40 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(pigs.unannotated,
        features = cluster0$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes of interest
# NRGN (excitatory neurons)
VlnPlot(pigs.unannotated,
        features = "NRGN",
        cols = colors,
        split.by = "individual_clusters")

# conserved cluster markers
conserved.cluster0 <- conserved.markers[conserved.markers$cluster == 0,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 1 - Astrocytes

GFAP - Glial fibrillary acidic protein is an intermediate filament and major component of the astrocyte cytoskeleton (Abcam)
SLC1A3/EAAT1/GLAST - Excitatory amino acid transporter 1 (EAAT1) is an astrocyte-specific glutamate transporter. It may also be known as glutamate aspartate Transporter (GLAST) (Abcam).
SLC1A2/EAAT2/GLT-1 - Excitatory amino acid transporter 2 (EAAT2) is an astrocyte-specific glutamate transporter. EAAT2 has many alternative names including solute carrier family 1 member 2 (SLC1A2) and glutamate transporter 1 (GLT-1) (Abcam)

# Number of cells per condition
n_cells[,c(1,3)]

##   treatment    1
## 1       LPS 1070
## 2    Saline  403

# UMAP with only cluster 1
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "gold")

# Top 40 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster1$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes of interest
VlnPlot(pigs.unannotated,
        features = c("AQP4","GFAP","GJA1","SLC1A2"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("ACSBG1","AQP4","CPVL","ETNPPL","GFAP","GJA1","GNA14","HRH1","HUNK",
                     "IRAG1","LAMA1","LRP4","NR2E1","PLEKHA7","RFX2","RPE65","TNC"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# Genes of interest
VlnPlot(pigs.unannotated,
        features = c("SLC1A3", "SLC1A2"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# conserved cluster markers
conserved.cluster1 <- conserved.markers[conserved.markers$cluster == 1,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 2 - Fibroblast

# Number of cells per condition
n_cells[,c(1,4)]

##   treatment   2
## 1       LPS 383
## 2    Saline 368

# UMAP with only cluster 2
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "firebrick1")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster2$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Additional genes of interest
VlnPlot(pigs.unannotated,
        features = "SLC6A13",
        cols = colors,
        split.by = "individual_clusters")

# FLT1 (endothelial cells)
VlnPlot(pigs.unannotated,
        features = "FLT1",
        cols = colors,
        split.by = "individual_clusters")

# conserved cluster markers
conserved.cluster2 <- conserved.markers[conserved.markers$cluster == 2,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 3 - Oligodendrocyte

# Number of cells per condition
n_cells[,c(1,5)]

##   treatment   3
## 1       LPS 375
## 2    Saline 336

# UMAP with only cluster 3
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "green")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster3$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Additional genes of interest
# MBP (oligodendrocytes)
VlnPlot(pigs.unannotated,
        features = "MBP",
        cols = colors,
        split.by = "individual_clusters")

VlnPlot(pigs.unannotated,
        features = c("COL1A1", "CLDN5", "OCLN"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("AGMO","CNP","ENPP2","FAM102A","LRRC71","MAG","MBP","MOG",
                     "OPALIN","PLEKHH1","PRR5L","SHROOM4","TMEM144"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster3 <- conserved.markers[conserved.markers$cluster == 3,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 4 - Mircoglia/Macrophage

# Number of cells per condition
n_cells[,c(1,6)]

##   treatment   4
## 1       LPS 341
## 2    Saline 252

# UMAP with only cluster 4
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster4$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Additional genes of interest
VlnPlot(pigs.unannotated,
        features = "P2RY12",
        cols = colors,
        split.by = "individual_clusters")

VlnPlot(pigs.unannotated,
        features = "C1QB",
        cols = colors,
        split.by = "individual_clusters")

# microglia marker
VlnPlot(pigs.unannotated,
        features = "CSF1R",
        cols = colors,
        split.by = "individual_clusters")

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("ARHGAP15","DOCK8","IKZF1","INPP5D","IRAK2","ITGAM",
                     "KYNU","LCP1","LYN","P2RY12","P2RY6","PACSIN2","PIK3R5",
                     "PLEKHA2","RUNX1","SKAP2","VAV1"),
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE,
        stack = TRUE)

# conserved cluster markers
conserved.cluster4 <- conserved.markers[conserved.markers$cluster == 4,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 5 - Polydendrocyte

# Number of cells per condition
n_cells[,c(1,7)]

##   treatment   5
## 1       LPS 373
## 2    Saline 204

# UMAP with only cluster 5
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster5$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# VCAN (oligodendrocyte progenitor cells)
VlnPlot(pigs.unannotated,
        features = "VCAN",
        cols = colors,
        split.by = "individual_clusters")

# Polydendrocyte markers 
VlnPlot(pigs.unannotated,
        features = "OLIG1",
        cols = colors,
        split.by = "individual_clusters")

VlnPlot(pigs.unannotated,
        features = "TNR",
        cols = colors,
        split.by = "individual_clusters")

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("BCAN","CA8","CACNG4","CHST9","CSPG4","MAP3K1","MEGF11",
                     "P2RX7","PDGFRA","PLPP4","SNX22","STK32A"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster5 <- conserved.markers[conserved.markers$cluster == 5,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 6 - Interneuron

# Number of cells per condition
n_cells[,c(1,8)]

##   treatment   6
## 1       LPS 443
## 2    Saline  38

# UMAP with only cluster 6
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "6"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "cyan")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster6$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster6$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# GAD1 (inhibitory neurons), 
VlnPlot(pigs.unannotated,
        features = "GAD1",
        cols = colors,
        split.by = "individual_clusters")

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("BTBD11","CACNA2D2","ENSSSCG00000050221","GAD1","GAD2",
                     "GRIN3A","HTR2C","LHX6","PLD5","PTCHD4","RSPO2","SIAH3"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster6 <- conserved.markers[conserved.markers$cluster == 6,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster6$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 7 - Endothelial

# Number of cells per condition
n_cells[,c(1,9)]

##   treatment   7
## 1       LPS 138
## 2    Saline 174

# UMAP with only cluster 7
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "7"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "chocolate4")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster7$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster7$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("C4A","CYP2B22","IL1R1","ITGBL1","KIAA1755",
                     "NET1","RSPO3","SAMHD1","SLC25A37","SLC26A2","SLC47A1"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes co-expressed in other clusters
VlnPlot(pigs.unannotated,
        features = c("ADAMTSL3","BNC2","EYA1","NTN1","SLC16A9"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster7 <- conserved.markers[conserved.markers$cluster == 7,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster7$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 8 - Neuron

# Number of cells per condition
n_cells[,c(1,10)]

##   treatment   8
## 1       LPS 194
## 2    Saline  24

# UMAP with only cluster 8
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "8"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "pink")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster8$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster8$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = c("ADAMTS3","EPHB1","FOXP2","FRAS1","OLFML2B","P3H2","SEMA3E",
                     "SLC35F4","SYT6"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster8 <- conserved.markers[conserved.markers$cluster == 8,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster8$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 9 - Neuron

# Number of cells per condition
n_cells[,c(1,11)]

##   treatment   9
## 1       LPS 118
## 2    Saline  78

# UMAP with only cluster 9
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "9"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "orange")

VlnPlot(pigs.unannotated,
        features = cluster9$gene[1],
        cols = colors,
        split.by = "individual_clusters")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster9$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster9$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# FLT1 (endothelial cells),
VlnPlot(pigs.unannotated,
        features = "FLT1",
        cols = colors,
        split.by = "individual_clusters",
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("CBX6","CFL1","EEF1A2","EEF2","FBXL16","HMGCS1","HPCAL4",
                     "HSPA8","LIN7C","NEFL","OAZ1","RPL15","UBC"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster9 <- conserved.markers[conserved.markers$cluster == 9,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster9$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 10 - Interneuron

# Number of cells per condition
n_cells[,c(1,12)]

##   treatment  10
## 1       LPS 147
## 2    Saline  10

# UMAP with only cluster 10
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "10"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "darkgreen")

VlnPlot(pigs.unannotated,
        features = cluster10$gene[1],
        cols = colors,
        split.by = "individual_clusters")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster10$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster10$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = "RELN",
        cols = colors,
        split.by = "individual_clusters",
      #  stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster10 <- conserved.markers[conserved.markers$cluster == 10,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster10$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 11 - Neuron

# Number of cells per condition
n_cells[,c(1,13)]

##   treatment 11
## 1       LPS 72
## 2    Saline 31

# UMAP with only cluster 11
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "11"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "purple")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster11$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster11$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = "NRG1",
        cols = colors,
        split.by = "individual_clusters")

# conserved cluster markers
conserved.cluster11 <- conserved.markers[conserved.markers$cluster == 11,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster11$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

## Cluster 12 - Neuron

# Number of cells per condition
n_cells[,c(1,14)]

##   treatment  12
## 1       LPS   1
## 2    Saline 100

# UMAP with only cluster 11
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "12"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "deeppink")

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster12$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster12$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# Genes specific to cluster
VlnPlot(pigs.unannotated,
        features = c("ENSSSCG00000041299","HS3ST2","GABRA5","VSTM2B"),
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# no conserved markers for cluster 12
#conserved.cluster12 <- conserved.markers[conserved.markers$cluster == 12,]
#VlnPlot(pigs.unannotated,
 #       features = conserved.cluster12$gene[1:20],
#        cols = colors,
 #       split.by = "individual_clusters",
 #       stack = TRUE,
 #       flip = TRUE)

Indiviudal cluster names

# Rename all identities
pigs.annotated <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Excitatory neuron",
                               "1" = "Astrocytes",
                               "2" = "Fibroblast",
                               "3" = "Oligodendrocyte",
                               "4" = "microglia",
                               "5" = "Polydendrocyte",
                               "6" = "Interneuron",
                               "7" = "Endothelial",
                               "8" = "Neuron 1",
                               "9" = "Neuron 2",
                               "10" = "Interneuron",
                               "11" = "Neuron 3", 
                               "12" = "Neuron 4")
pigs.annotated$individual_clusters <- Idents(pigs.annotated)

Merged cluster names

# Rename all identities
pigs.merged <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Neuron",
                               "1" = "Astrocytes",
                               "2" = "Fibroblast",
                               "3" = "Oligodendrocyte",
                               "4" = "microglia",
                               "5" = "Oligodendrocyte",
                               "6" = "Neuron",
                               "7" = "Fibroblast",
                               "8" = "Neuron",
                               "9" = "Neuron",
                               "10" = "Neuron",
                               "11" = "Neuron", 
                               "12" = "Neuron")
pigs.annotated$merged_clusters <- Idents(pigs.merged)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated

## An object of class Seurat 
## 42773 features across 8934 samples within 2 assays 
## Active assay: RNA (21805 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Pseudo bulk merge all clusters cluster names

# Rename all identities
pigs.pseudo.bulk <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Pseudo bulk",
                               "1" = "Pseudo bulk",
                               "2" = "Pseudo bulk",
                               "3" = "Pseudo bulk",
                               "4" = "Pseudo bulk",
                               "5" = "Pseudo bulk",
                               "6" = "Pseudo bulk",
                               "7" = "Pseudo bulk",
                               "8" = "Pseudo bulk",
                               "9" = "Pseudo bulk",
                               "10" = "Pseudo bulk",
                               "11" = "Pseudo bulk", 
                               "12" = "Pseudo bulk")
pigs.annotated$pseudo_bulk <- Idents(pigs.pseudo.bulk)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

Clustering QC after annotation

Annotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","purple",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
u1 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "individual_clusters",)
u1

# Plot the UMAP
u2 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "merged_clusters",)
u2

## png 
##   2

## png 
##   2

## null device 
##           1

## pdf 
##   2

## pdf 
##   2

## null device 
##           1

Color blind friendly UMAP

dittoDimPlot(object = pigs.annotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

dittoDimPlot(object = pigs.annotated,
             var = "merged_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Cluster tree

# individual clusters
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t1 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

t1

# merged clusters
Idents(pigs.annotated) <- "merged_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t2 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

t2

## png 
##   2

## png 
##   2

## null device 
##           1

## pdf 
##   2

## pdf 
##   2

## null device 
##           1

Differential expression

Individual clusters

LPS vs saline within cluster

cell.types <- unique(pigs.annotated$individual_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("LPS","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, individual_clusters == i)
  
  # skip cluster 11 - Neuron 4 (no LPS cells) Fewer than 3 cells 
  if (i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "LPS",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}

## [1] "Neuron 4"
## [1] "Interneuron"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Polydendrocyte"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Fibroblast"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Neuron 3"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Astrocytes"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Excitatory neuron"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Endothelial"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Oligodendrocyte"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "microglia"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Neuron 2"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Neuron 1"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_LPS", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_LPS - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/individual/brain_individual_DEGs_LPS_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_LPS_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)

## 
##        Astrocytes       Endothelial Excitatory neuron        Fibroblast 
##              2430              1462              1928              2311 
##       Interneuron         microglia          Neuron 1          Neuron 2 
##               358              1273                68                69 
##          Neuron 3   Oligodendrocyte    Polydendrocyte 
##                16              1190              1625

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "individual_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")

## Calculating cluster Excitatory neuron

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Astrocytes

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Fibroblast

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Oligodendrocyte

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster microglia

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Polydendrocyte

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Interneuron

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Endothelial

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Neuron 1

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Neuron 2

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Neuron 3

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Neuron 4

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)

## 
## Excitatory neuron        Astrocytes        Fibroblast   Oligodendrocyte 
##              3249              3130              3468              3445 
##         microglia    Polydendrocyte       Interneuron       Endothelial 
##              4649              2090              2540              3634 
##          Neuron 1          Neuron 2          Neuron 3          Neuron 4 
##              2536              1803               449              2574

Pseudo bulk LPS vs. saline

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "pseudo_bulk"
pigs.annotated

## An object of class Seurat 
## 42773 features across 8934 samples within 2 assays 
## Active assay: RNA (21805 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

cell.types <- unique(pigs.annotated$pseudo_bulk)
pb.treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("LPS","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)

  # subset cluster
  cluster <- subset(pigs.annotated, pseudo_bulk == i)

  # skip cluster 11 - Neuron 4 (no LPS cells) Fewer than 3 cells 
  if (i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "LPS",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  pb.treatment.df <- rbind(pb.treatment.df, markers)
}

## [1] "Pseudo bulk"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

# reformat table
colnames(pb.treatment.df)[c(3,4)] <- c("percent_LPS", "percent_saline")
pb.treatment.df$percent_difference <- abs(pb.treatment.df$percent_LPS - pb.treatment.df$percent_saline)
pb.treatment.df <- pb.treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(pb.treatment.df,
            "../../results/DEGs/individual/brain_pseudo_bulk_DEGs_LPS_vs_saline_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
pb.treatment.df <- pb.treatment.df[pb.treatment.df$p_val_adj < 0.05,]
write.table(pb.treatment.df,
            "../../results/DEGs/individual/brain_pseudo_bulk_DEGs_LPS_vs_saline_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(pb.treatment.df$cluster)

## 
## Pseudo bulk 
##        1950

Merged clusters

LPS vs saline within cluster

cell.types <- unique(pigs.annotated$merged_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("LPS","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, merged_clusters == i)
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "LPS",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}

## [1] "Neuron"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Oligodendrocyte"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Fibroblast"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "Astrocytes"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## [1] "microglia"

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_LPS", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_LPS - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/merged/brain_merged_DEGs_LPS_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_LPS_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)

## 
##      Astrocytes      Fibroblast       microglia          Neuron Oligodendrocyte 
##            2430            2444            1273            1293            1404

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "merged_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")

## Calculating cluster Neuron

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Astrocytes

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Fibroblast

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster Oligodendrocyte

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

## Calculating cluster microglia

## 
## Done!

## Combining coefficients and standard errors

## Calculating log-fold changes

## Calculating likelihood ratio tests

## Refitting on reduced model...

## 
## Done!

# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)

## 
##          Neuron      Astrocytes      Fibroblast Oligodendrocyte       microglia 
##            3751            3130            3765            2888            4649

Volcanoes

df <- treatment.df
cell_types <- as.vector(unique(pigs.annotated$merged_clusters))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," LPS vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    )
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}

## Warning: Removed 2 rows containing missing values (geom_text_repel).

df <- pb.treatment.df
cell_types <- as.vector(unique(pigs.annotated$pseudo_bulk))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," LPS vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) 
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}

Check DEGs

FeaturePlot(pigs.annotated, 
            features = c("TSEN2"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

FeaturePlot(pigs.annotated, 
            features = c("ROR1","DPP10"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

Shiny App

Cleanup object

metadata <- pigs.annotated@meta.data
#metadata <- metadata[,c(34,33,2:9)]
pigs.annotated@meta.data <- metadata
pigs.annotated@assays$SCT@meta.features <- metadata
pigs.annotated@assays$RNA@meta.features <- metadata

Create output folder

# load libraries
library(ShinyCell)

# Run
DefaultAssay(pigs.annotated) <- "RNA"
Idents(pigs.annotated) <- "individual_clusters"
sc.config <- createConfig(pigs.annotated)
makeShinyApp(pigs.annotated, sc.config, gene.mapping = TRUE, shiny.title = "Brain snRNAseq")

LPS Pigs Single Nucleus Brain Annotation

Kennedi Todd & Kimberly Olney

03/19/2022

Setup

Read in object and DEG tables

Define resolution and groups

Clustering QC before annotation

Unannotated UMAP

Color blind friendly UMAP

Percent cells per cluter

Cells per cluster

Median absolute deviation

Cluster tree

Cluster Annotation

Cluster 0 - Neuron - excitatory neurons

Cluster 1 - Astrocytes

Cluster 2 - Fibroblast

Cluster 3 - Oligodendrocyte

Cluster 4 - Mircoglia/Macrophage

Cluster 5 - Polydendrocyte

Cluster 6 - Interneuron

Cluster 7 - Endothelial

Cluster 8 - Neuron

Cluster 9 - Neuron

Cluster 10 - Interneuron

Cluster 11 - Neuron

Indiviudal cluster names

Merged cluster names

Pseudo bulk merge all clusters cluster names

Clustering QC after annotation

Annotated UMAP

Color blind friendly UMAP

Cluster tree

Differential expression

Individual clusters

LPS vs saline within cluster

Single cluster vs. all other clusters

Pseudo bulk LPS vs. saline

Merged clusters

LPS vs saline within cluster

Single cluster vs. all other clusters

Volcanoes

Check DEGs

Shiny App

Cleanup object

Create output folder