Setup

knitr::opts_knit$set(
  root.dir = ".")
library(dplyr)
library(dittoSeq)
library(ggrepel)
library(ggtree)
library(parallel)
library(plotly)  # 3D plot
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()
library(tibble)  # rownnames_to_column
#options(mc.cores = detectCores() - 1)

Read in object and DEG tables

## 
##    0    1    2    3    4    5 
## 2587 2955  750  564 2555  311
## 
##    0    1    2    3    4    5 
##  301  523  111   48 1174  271
## 
##   0   1   2   3   4   5 
## 175 657 132 176 789 346
## 
##   0   1   2   3   4   5 
##  21  49  85  55 289  68

Define resolution and groups

# set levels and idents
pigs.unannotated$individual_clusters <- 
  factor(pigs.unannotated$SCT_snn_res.0.1,
         levels = c("0","1","2","3","4","5","6","7","8"))
pigs.unannotated$treatment <- factor(pigs.unannotated$treatment,
                                     levels = c("Ecoli","Saline"))
Idents(pigs.unannotated) <- "individual_clusters"

# normalize if SCTtransform was used
DefaultAssay(pigs.unannotated) <- "RNA"
# natural-log
pigs.unannotated <- NormalizeData(pigs.unannotated, verbose = FALSE)

Clustering QC before annotation

Unannotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","green",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        #label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors
        )

# Split by sample
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "sample",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

# Split by treatment 
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "treatment",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

Color blind friendly UMAP

dittoDimPlot(object = pigs.unannotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Percent cells per cluter

# treatment
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, treatment) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=treatment)) +
  geom_col() +
  scale_fill_manual(values = c("green","gray")) +
  ggtitle("Percentage of treatment per cluster")

# sample
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, sample) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=sample)) +
  geom_col() +
  #scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster")

Cells per cluster

n_cells <- FetchData(pigs.unannotated, 
                     vars = c("ident", "treatment")) %>%
  dplyr::count(ident,treatment) %>%
  tidyr::spread(ident, n)
n_cells
##   treatment    0   1  2  3  4 5
## 1     Ecoli 1364 113 58 30 32 9
## 2    Saline 2926 267 97 85 38 8

Cluster tree

pigs.unannotated <- BuildClusterTree(object = pigs.unannotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- pigs.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], 
                        shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Cluster Annotation

Cluster 0 -

# Number of cells per condition
n_cells[,c(1,2)]
##   treatment    0
## 1     Ecoli 1364
## 2    Saline 2926
# UMAP with only cluster 0
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        cols = "dodgerblue")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "0"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 20 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster0 <- conserved.markers[conserved.markers$cluster == 0,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster0$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 1

# Number of cells per condition
n_cells[,c(1,3)]
##   treatment   1
## 1     Ecoli 113
## 2    Saline 267
# UMAP with only cluster 1
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "gold")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "1"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value
VlnPlot(pigs.unannotated,
        features = cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster1$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster1 <- conserved.markers[conserved.markers$cluster == 1,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster1$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 2

# Number of cells per condition
n_cells[,c(1,4)]
##   treatment  2
## 1     Ecoli 58
## 2    Saline 97
# UMAP with only cluster 2
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "firebrick1")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "2"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 20 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster2 <- conserved.markers[conserved.markers$cluster == 2,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster2$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 3

# Number of cells per condition
n_cells[,c(1,5)]
##   treatment  3
## 1     Ecoli 30
## 2    Saline 85
# UMAP with only cluster 3
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "green")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "3"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster3 <- conserved.markers[conserved.markers$cluster == 3,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster3$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 4

# Number of cells per condition
n_cells[,c(1,6)]
##   treatment  4
## 1     Ecoli 32
## 2    Saline 38
# UMAP with only cluster 4
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "4"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster4$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster4 <- conserved.markers[conserved.markers$cluster == 4,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster4$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Cluster 5

# Number of cells per condition
n_cells[,c(1,7)]
##   treatment 5
## 1     Ecoli 9
## 2    Saline 8
# UMAP with only cluster 5
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
        reduction = "umap", 
        label = TRUE,
        label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "SCT_snn_res.0.1",
        cols = "lightgray")

# show treatment colors 
DimPlot(object = subset(pigs.unannotated, SCT_snn_res.0.1 == "5"),
        group.by = "treatment",
        shuffle = TRUE,
        cols = treatment_colors)

# Top 40 markers by p-value and then log2FC
VlnPlot(pigs.unannotated,
        features = cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

VlnPlot(pigs.unannotated,
        features = cluster5$gene[21:40],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

# conserved cluster markers
conserved.cluster5 <- conserved.markers[conserved.markers$cluster == 5,]
VlnPlot(pigs.unannotated,
        features = conserved.cluster5$gene[1:20],
        cols = colors,
        split.by = "individual_clusters",
        stack = TRUE,
        flip = TRUE)

Indiviudal cluster names

# Rename all identities
pigs.annotated <- RenameIdents(object = pigs.unannotated, 
                               "0" = "cluster 0",
                               "1" = "cluster 1",
                               "2" = "cluster 2",
                               "3" = "cluster 3",
                               "4" = "cluster 4",
                               "5" = "cluster 5")
pigs.annotated$individual_clusters <- Idents(pigs.annotated)

Merged cluster names

# Rename all identities
pigs.merged <- RenameIdents(object = pigs.unannotated, 
                               "0" = "0",
                               "1" = "1",
                               "2" = "2",
                               "3" = "3",
                               "4" = "4",
                               "5" = "5")
pigs.annotated$merged_clusters <- Idents(pigs.merged)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated
## An object of class Seurat 
## 28225 features across 5027 samples within 2 assays 
## Active assay: RNA (14276 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Pseudo bulk merge all clusters cluster names

# Rename all identities
pigs.pseudo.bulk <- RenameIdents(object = pigs.unannotated, 
                               "0" = "Pseudo bulk",
                               "1" = "Pseudo bulk",
                               "2" = "Pseudo bulk",
                               "3" = "Pseudo bulk",
                               "4" = "Pseudo bulk",
                               "5" = "Pseudo bulk")
pigs.annotated$pseudo_bulk <- Idents(pigs.pseudo.bulk)
#remove(pigs.merged)

#save object
saveRDS(pigs.annotated, "../../rObjects/brain_annotated.rds")

Clustering QC after annotation

Annotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","green",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
u1 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "individual_clusters",)
u1

# Plot the UMAP
u2 <- DimPlot(object = pigs.annotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        label.box = TRUE,
        repel = TRUE,
        cols = colors,
        group.by = "merged_clusters",)
u2

## png 
##   2
## png 
##   2
## null device 
##           1
## pdf 
##   2
## pdf 
##   2
## null device 
##           1

Color blind friendly UMAP

dittoDimPlot(object = pigs.annotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

dittoDimPlot(object = pigs.annotated,
             var = "merged_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = FALSE)

Cluster tree

# individual clusters
Idents(pigs.annotated) <- "individual_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t1 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
t1

# merged clusters
Idents(pigs.annotated) <- "merged_clusters"
pigs.annotated <- BuildClusterTree(object = pigs.annotated,
                                   dims = 1:15,
                                   reorder = FALSE,
                                   reorder.numeric = FALSE)

tree <- pigs.annotated@tools$BuildClusterTree
tree$tip.label <- tree$tip.label
t2 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
t2

## png 
##   2
## png 
##   2
## null device 
##           1
## pdf 
##   2
## pdf 
##   2
## null device 
##           1

Differential expression

Individual clusters

Ecoli vs saline within cluster

cell.types <- unique(pigs.annotated$individual_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("Ecoli","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, individual_clusters == i)
  
  # skip cluster 11 - Neuron 4 (no Ecoli cells) Fewer than 3 cells 
  if (i == "Neuron 3" | i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "Ecoli",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}

# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_Ecoli - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/individual/brain_individual_DEGs_Ecoli_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_Ecoli_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "individual_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")

# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/individual/brain_individual_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)

Pseudo bulk Ecoli vs. saline

# read object
pigs.annotated <- readRDS("../../rObjects/brain_annotated.rds")
Idents(pigs.annotated) <- "pseudo_bulk"
pigs.annotated

cell.types <- unique(pigs.annotated$pseudo_bulk)
pb.treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("Ecoli","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)

  # subset cluster
  cluster <- subset(pigs.annotated, pseudo_bulk == i)

  # skip cluster 11 - Neuron 4 (no Ecoli cells) Fewer than 3 cells 
  if (i == "Neuron 4") { next }
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "Ecoli",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  pb.treatment.df <- rbind(pb.treatment.df, markers)
}

# reformat table
colnames(pb.treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
pb.treatment.df$percent_difference <- abs(pb.treatment.df$percent_Ecoli - pb.treatment.df$percent_saline)
pb.treatment.df <- pb.treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(pb.treatment.df,
            "../../results/DEGs/pseudobulk/brain_pseudo_bulk_DEGs_Ecoli_vs_saline_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
pb.treatment.df <- pb.treatment.df[pb.treatment.df$p_val_adj < 0.05,]
write.table(pb.treatment.df,
            "../../results/DEGs/pseudobulk/brain_pseudo_bulk_DEGs_Ecoli_vs_saline_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(pb.treatment.df$cluster)

Merged clusters

Ecoli vs saline within cluster

cell.types <- unique(pigs.annotated$merged_clusters)
treatment.df <- data.frame()
pigs.annotated$treatment <- factor(pigs.annotated$treatment,
                                   levels = c("Ecoli","Saline"))

for (i in cell.types) {
  
  # print what cluster we're on
  print(i)
  
  # subset cluster
  cluster <- subset(pigs.annotated, merged_clusters == i)
  
  # DE by treatment
  Idents(cluster) <- cluster$treatment
  markers <- FindMarkers(object = cluster,
                         ident.1 = "Ecoli",
                         ident.2 = "Saline",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  
  # skip if no DEGs
  if(nrow(markers) == 0) {
    next
  }
  
  # reformat
  markers$gene <- rownames(markers)
  rownames(markers) <- 1:nrow(markers)
  markers$cluster <- i
  
  # add to master table
  treatment.df <- rbind(treatment.df, markers)
}

# reformat table
colnames(treatment.df)[c(3,4)] <- c("percent_Ecoli", "percent_saline")
treatment.df$percent_difference <- abs(treatment.df$percent_Ecoli - treatment.df$percent_saline)
treatment.df <- treatment.df[,c(7,6,1,5,2,3,4,8)]

# write table
write.table(treatment.df,
            "../../results/DEGs/merged/brain_merged_DEGs_Ecoli_vs_saline_within_cluster_pval_1.00.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

# strict filtering
treatment.df.strict <- treatment.df[treatment.df$p_val_adj < 0.05,]
write.table(treatment.df.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_Ecoli_vs_saline_within_cluster_pval_0.05.tsv",
            sep = "\t",
            quote = FALSE, row.names = FALSE)

table(treatment.df.strict$cluster)

Single cluster vs. all other clusters

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
Idents(pigs.annotated) <- "merged_clusters"
all.markers <- FindAllMarkers(object = pigs.annotated,
                              test.use = "MAST")

# create new column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# reorder columns
all.markers <- all.markers[,c(6,7,3,4,8,5,2)]
rownames(all.markers) <- 1:nrow(all.markers)

# strict filtering
all.markers.strict <- all.markers[all.markers$p_val_adj < 0.05,]

# save object and write table
saveRDS(all.markers, "../../rObjects/annotated_all_markers.rds")
write.table(all.markers,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_1.00.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)
write.table(all.markers.strict,
            "../../results/DEGs/merged/brain_merged_DEGs_single_cluster_vs_other_clusters_0.05.tsv",
            sep = "\t",
            quote = FALSE,
            row.names = FALSE)

# view number of DEGs
table(all.markers.strict$cluster)

Volcanoes

df <- treatment.df
cell_types <- as.vector(unique(pigs.annotated$merged_clusters))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," Ecoli vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    )
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}
df <- pb.treatment.df
cell_types <- as.vector(unique(pigs.annotated$pseudo_bulk))

for (cell_cluster in cell_types) {
  # data
  treatment_vs_control <- df[df$cluster == cell_cluster,]
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  num_DEGs <- vector()
  
  # cutoff based on cluster
  if(cell_cluster == "microglia macrophage") {
    FDRq <- 0.001
    LFC <- 2
  } else {
    FDRq <- 0.05
    LFC <- 1
  }

  # loop through every gene
  for(i in 1:max){
    # if FDRq is met
    if (treatment_vs_control$p_val_adj[i] < FDRq){
      
      if (treatment_vs_control$avg_log2FC[i] > LFC){
        color_values <- c(color_values, 1) # 1 when logFC met and positive and FDRq met
        num_DEGs <- c(num_DEGs,"up")
      }
      else if (treatment_vs_control$avg_log2FC[i] < -LFC){
        color_values <- c(color_values, 2) # 2 when logFC met and negative and FDRq met
        num_DEGs <- c(num_DEGs, "down")
      }
      else{
        color_values <- c(color_values, 3) # 3 when logFC not met and FDRq met
        num_DEGs <- c(num_DEGs, "neither")
      }
    }
    # if FDRq is not met
    else{
      color_values <- c(color_values, 3) # 3 when FDRq not met
      num_DEGs <- c(num_DEGs, "neither")
    }
  }
  
  table(num_DEGs)
  treatment_vs_control$color <- factor(color_values)

  # subset genes to label
  up <- treatment_vs_control[treatment_vs_control$color == 1,]
  up10 <- up[1:10,]
  down <- treatment_vs_control[treatment_vs_control$color == 2,]
  down10 <- down[1:10,]
  
  # find hadjpval
  hadjpval <- (-log10(max(
    treatment_vs_control$p_val_adj[treatment_vs_control$p_val_adj < FDRq], 
    na.rm=TRUE)))
  
  # plot
  p <-
    ggplot(data = treatment_vs_control, 
           aes(x = avg_log2FC,  # x-axis is logFC
               y = -log10(p_val_adj),  # y-axis will be -log10 of P.Value
               color = color)) +  # color is based on factored color column
    geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
    theme_bw() +  # set color theme
    theme(legend.position = "none") +  # no legend
    scale_color_manual(values = c("red", "blue","grey")) +  # set factor colors
    labs(
      title = "", # no main title
      x = expression(log[2](FC)), # x-axis title
      y = expression(-log[10] ~ "(" ~ italic("adjusted p") ~ "-value)") # y-axis title
    ) +
    theme(axis.title.x = element_text(size = 10),
          axis.text.x = element_text(size = 10)) +
    theme(axis.title.y = element_text(size = 10),
          axis.text.y = element_text(size = 10)) +
    geom_hline(yintercept = hadjpval,  #  horizontal line
                       colour = "black",
                       linetype = "dashed") +
    geom_vline(xintercept = c(LFC,-LFC),  #  vertical line
                       colour = "black",
                       linetype = "dashed") +
    ggtitle(paste0(tissue," ",cell_cluster," Ecoli vs Saline\nFDRq < ", FDRq, ", LFC = ", LFC)) +
    geom_text_repel(data = up10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "maroon", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) +
    geom_text_repel(data = down10,
                    aes(x = avg_log2FC, y= -log10(p_val_adj), label = gene), 
                    color = "navyblue", 
                    fontface="italic",
                    max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                    ) 
  pdf(paste0("../../results/volcano/",tolower(tissue),"_",
             tolower(gsub(" ","",cell_cluster)),
             "_volcano.pdf"),width = 6, height = 4)
  print(p)
  dev.off()
}

Check DEGs

FeaturePlot(pigs.annotated, 
            features = c("ATF3", "CXCL10"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

FeaturePlot(pigs.annotated, 
            features = c("TSEN2", "CXCL2"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

FeaturePlot(pigs.annotated, 
            features = c("ROR1","DPP10"), 
            split.by = "treatment", 
            max.cutoff = 3,
            cols = c("grey", "red"))

Shiny App

Cleanup object

metadata <- pigs.annotated@meta.data
#metadata <- metadata[,c(34,33,2:9)]
pigs.annotated@meta.data <- metadata
pigs.annotated@assays$SCT@meta.features <- metadata
pigs.annotated@assays$RNA@meta.features <- metadata

Create output folder

# load libraries
remove.packages("reticulate")
library(ShinyCell)

# Run
DefaultAssay(pigs.annotated) <- "RNA"
Idents(pigs.annotated) <- "individual_clusters"
sc.config <- createConfig(pigs.annotated)
makeShinyApp(pigs.annotated, sc.config, gene.mapping = TRUE, shiny.title = "Brain snRNAseq two good pigs")