Setup

knitr::opts_knit$set(
  root.dir = ".")

library(dplyr)
library(dittoSeq)
library(ggrepel)
library(ggtree)
library(parallel)
library(plotly)  # 3D plot
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()
library(tibble)  # rownnames_to_column
#options(mc.cores = detectCores() - 1)

Read in object and DEG tables

## 
##    0    1    2    3    4    5 
## 2587 2955  750  564 2555  311

## 
##    0    1    2    3    4    5 
##  301  523  111   48 1174  271

## 
##   0   1   2   3   4   5 
## 175 657 132 176 789 346

## 
##   0   1   2   3   4   5 
##  21  49  85  55 289  68

Define resolution and groups

# set levels and idents
pigs.unannotated$individual_clusters <- 
  factor(pigs.unannotated$SCT_snn_res.0.1,
         levels = c("0","1","2","3","4","5","6","7","8"))
pigs.unannotated$treatment <- factor(pigs.unannotated$treatment,
                                     levels = c("Ecoli","Saline"))
Idents(pigs.unannotated) <- "individual_clusters"

# normalize if SCTtransform was used
DefaultAssay(pigs.unannotated) <- "RNA"
# natural-log
pigs.unannotated <- NormalizeData(pigs.unannotated, verbose = FALSE)

Clustering QC before annotation

Unannotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","green",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        #label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors
        )

# Split by sample
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "sample",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

# Split by treatment 
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "treatment",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

Color blind friendly UMAP

dittoDimPlot(object = pigs.unannotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Percent cells per cluter

# treatment
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, treatment) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=treatment)) +
  geom_col() +
  scale_fill_manual(values = c("green","gray")) +
  ggtitle("Percentage of treatment per cluster")

# sample
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, sample) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=sample)) +
  geom_col() +
  #scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster")

Cells per cluster

n_cells <- FetchData(pigs.unannotated, 
                     vars = c("ident", "treatment")) %>%
  dplyr::count(ident,treatment) %>%
  tidyr::spread(ident, n)
n_cells

##   treatment    0   1  2  3  4 5
## 1     Ecoli 1364 113 58 30 32 9
## 2    Saline 2926 267 97 85 38 8

Cluster tree

pigs.unannotated <- BuildClusterTree(object = pigs.unannotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- pigs.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], 
                        shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Cluster Annotation

Cluster 0 -

# Number of cells per condition
n_cells[,c(1,2)]

##   treatment    0
## 1     Ecoli 1364
## 2    Saline 2926

# UMAP with only cluster 0
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "dodgerblue")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 20 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster0 <- conserved.markers[conserved.markers$cluster == 0,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 1

# Number of cells per condition
n_cells[,c(1,3)]

##   treatment   1
## 1     Ecoli 113
## 2    Saline 267

# UMAP with only cluster 1
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "gold")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster1$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster1 <- conserved.markers[conserved.markers$cluster == 1,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 2

# Number of cells per condition
n_cells[,c(1,4)]

##   treatment  2
## 1     Ecoli 58
## 2    Saline 97

# UMAP with only cluster 2
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "firebrick1")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 20 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster2 <- conserved.markers[conserved.markers$cluster == 2,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 3

# Number of cells per condition
n_cells[,c(1,5)]

##   treatment  3
## 1     Ecoli 30
## 2    Saline 85

# UMAP with only cluster 3
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "green")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster3 <- conserved.markers[conserved.markers$cluster == 3,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 4

# Number of cells per condition
n_cells[,c(1,6)]

##   treatment  4
## 1     Ecoli 32
## 2    Saline 38

# UMAP with only cluster 4
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster4$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster4 <- conserved.markers[conserved.markers$cluster == 4,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 5

# Number of cells per condition
n_cells[,c(1,7)]

##   treatment 5
## 1     Ecoli 9
## 2    Saline 8

# UMAP with only cluster 5
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster5$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster5 <- conserved.markers[conserved.markers$cluster == 5,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Indiviudal cluster names

# Rename all identities
pigs.annotated <- RenameIdents(object = pigs.unannotated, 
                               "0" = "cluster 0",
                               "1" = "cluster 1",
                               "2" = "cluster 2",
                               "3" = "cluster 3",
                               "4" = "cluster 4",
                               "5" = "cluster 5")
pigs.annotated$individual_clusters <- Idents(pigs.annotated)

Merged cluster names

# Rename all identities
pigs.merged <- RenameIdents(object = pigs.unannotated, 
                               "0" = "0",
                               "1" = "1",
                               "2" = "2",
                               "3" = "3",
                               "4" = "4",
                               "5" = "5")
pigs.annotated$merged_clusters <- Idents(pigs.merged)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated

## An object of class Seurat 
## 28225 features across 5027 samples within 2 assays 
## Active assay: RNA (14276 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Pseudo bulk merge all clusters cluster names

# Rename all identities
pigs.pseudo.bulk <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Pseudo bulk",
                               "1" = "Pseudo bulk",
                               "2" = "Pseudo bulk",
                               "3" = "Pseudo bulk",
                               "4" = "Pseudo bulk",
                               "5" = "Pseudo bulk")
pigs.annotated$pseudo_bulk <- Idents(pigs.pseudo.bulk)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

Clustering QC after annotation

Annotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","green",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
u1 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "individual_clusters",)
u1

# Plot the UMAP
u2 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "merged_clusters",)
u2

## png 
##   2

## png 
##   2

## null device 
##           1

## pdf 
##   2

## pdf 
##   2

## null device 
##           1

Color blind friendly UMAP

dittoDimPlot(object = pigs.annotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

dittoDimPlot(object = pigs.annotated,
             var = "merged_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Cluster tree

# individual clusters
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t1 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

t1

# merged clusters
Idents(pigs.annotated) <- "merged_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t2 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

t2

## png 
##   2

## png 
##   2

## null device 
##           1

## pdf 
##   2

## pdf 
##   2

## null device 
##           1

Differential expression

Individual clusters

Ecoli vs saline within cluster

cell.types <- unique(pigs.annotated$individual_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("Ecoli","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, individual_clusters == i)
  
  # skip cluster 11 - Neuron 4 (no Ecoli cells) Fewer than 3 cells 
  if (i == "Neuron 3" | i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "Ecoli",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}

# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_Ecoli - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/individual/brain_individual_DEGs_Ecoli_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_Ecoli_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "individual_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")

# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)

Pseudo bulk Ecoli vs. saline

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "pseudo_bulk"
pigs.annotated

cell.types <- unique(pigs.annotated$pseudo_bulk)
pb.treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("Ecoli","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)

  # subset cluster
  cluster <- subset(pigs.annotated, pseudo_bulk == i)

  # skip cluster 11 - Neuron 4 (no Ecoli cells) Fewer than 3 cells 
  if (i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "Ecoli",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  pb.treatment.df <- rbind(pb.treatment.df, markers)
}

# reformat table
colnames(pb.treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
pb.treatment.df$percent_difference <- abs(pb.treatment.df$percent_Ecoli - pb.treatment.df$percent_saline)
pb.treatment.df <- pb.treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(pb.treatment.df,
            "../../results/DEGs/pseudobulk/brain_pseudo_bulk_DEGs_Ecoli_vs_saline_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
pb.treatment.df <- pb.treatment.df[pb.treatment.df$p_val_adj < 0.05,]
write.table(pb.treatment.df,
            "../../results/DEGs/pseudobulk/brain_pseudo_bulk_DEGs_Ecoli_vs_saline_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(pb.treatment.df$cluster)

Merged clusters

Ecoli vs saline within cluster

cell.types <- unique(pigs.annotated$merged_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("Ecoli","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, merged_clusters == i)
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "Ecoli",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}

# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_Ecoli - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/merged/brain_merged_DEGs_Ecoli_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_Ecoli_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "merged_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")

# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)

Volcanoes

df <- treatment.df
cell_types <- as.vector(unique(pigs.annotated$merged_clusters))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," Ecoli vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    )
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}

df <- pb.treatment.df
cell_types <- as.vector(unique(pigs.annotated$pseudo_bulk))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," Ecoli vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) 
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}

Check DEGs

FeaturePlot(pigs.annotated, 
            features = c("ATF3", "CXCL10"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

FeaturePlot(pigs.annotated, 
            features = c("TSEN2", "CXCL2"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

FeaturePlot(pigs.annotated, 
            features = c("ROR1","DPP10"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

Shiny App

Cleanup object

metadata <- pigs.annotated@meta.data
#metadata <- metadata[,c(34,33,2:9)]
pigs.annotated@meta.data <- metadata
pigs.annotated@assays$SCT@meta.features <- metadata
pigs.annotated@assays$RNA@meta.features <- metadata

Create output folder

# load libraries
remove.packages("reticulate")
library(ShinyCell)

# Run
DefaultAssay(pigs.annotated) <- "RNA"
Idents(pigs.annotated) <- "individual_clusters"
sc.config <- createConfig(pigs.annotated)
makeShinyApp(pigs.annotated, sc.config, gene.mapping = TRUE, shiny.title = "Brain snRNAseq two good pigs")

Ecoli Pigs Single Cell Brain Annotation

Kennedi Todd & Kimberly Olney

04/01/2022

Setup

Read in object and DEG tables

Define resolution and groups

Clustering QC before annotation

Unannotated UMAP

Color blind friendly UMAP

Percent cells per cluter

Cells per cluster

Cluster tree

Cluster Annotation

Cluster 0 -

Cluster 1

Cluster 2

Cluster 3

Cluster 4

Cluster 5

Indiviudal cluster names

Merged cluster names

Pseudo bulk merge all clusters cluster names

Clustering QC after annotation

Annotated UMAP

Color blind friendly UMAP

Cluster tree

Differential expression

Individual clusters

Ecoli vs saline within cluster

Single cluster vs. all other clusters

Pseudo bulk Ecoli vs. saline

Merged clusters

Ecoli vs saline within cluster

Single cluster vs. all other clusters

Volcanoes

Check DEGs

Shiny App

Cleanup object

Create output folder