Setup

knitr::opts_knit$set(
  root.dir = ".")
library(dplyr)
library(dittoSeq)
library(ggrepel)
library(ggtree)
library(parallel)
library(plotly)  # 3D plot
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()
library(tibble)  # rownnames_to_column
library(variancePartition)
options(mc.cores = detectCores() - 1)

Read in object

Define resolution and groups

# set levels and idents
pigs.unannotated$individual_clusters <- 
  factor(pigs.unannotated$SCT_snn_res.0.1,
         levels = c("0","1","2","3","4","5","6","7","8","9","10","11", "12"))
pigs.unannotated$treatment <- factor(pigs.unannotated$treatment,
                                     levels = c("LPS","Saline"))
Idents(pigs.unannotated) <- "individual_clusters"

# normalize if SCTtransform was used
DefaultAssay(pigs.unannotated) <- "RNA"
# natural-log
pigs.unannotated <- NormalizeData(pigs.unannotated, verbose = FALSE)

Add batch column to metadata

# create treatment column
batch <- gsub("4.R.Saline", "batch1", pigs.unannotated$sample)
batch <- gsub("10.Saline", "batch2", batch)
batch <- gsub("8.R.LPS", "batch1", batch)
batch <- gsub("12.LPS", "batch2", batch)

pigs.unannotated$batch <- factor(batch, levels = c("batch1","batch2"))
table(pigs.unannotated$batch)
## 
## batch1 batch2 
##   1762   7172

Clustering QC before annotation

Unannotated UMAP

# Set colors
colors <- c("dodgerblue","yellow","firebrick1","green","gray40","lightgray",
            "cyan","chocolate4","pink","orange","darkgreen","purple",
            "deeppink","blue","gold","navyblue")

# Plot the UMAP
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        #label.box = TRUE,
        label.size = 3,
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors
        )

# Split by sample
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "sample",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

# Split by treatment 
DimPlot(object = pigs.unannotated, 
        reduction = "umap", 
        label = TRUE,
        label.size = 3,
        split.by = "treatment",
        repel = TRUE,
        group.by = "individual_clusters",
        cols = colors)

Color blind friendly UMAP

dittoDimPlot(object = pigs.unannotated,
             var = "individual_clusters",
             reduction.use = "umap",
             do.label = TRUE,
             labels.highlight = TRUE)

Mitochondrial contamination

# umap percent.mt
FeaturePlot(pigs.unannotated, 
            reduction = "umap", 
            features = "CEMIP",
            min.cutoff = 'q10',
            label = TRUE)

# umap percent.mt split by sample
FeaturePlot(pigs.unannotated, 
            reduction = "umap", 
            features = "percent.mt",
            split.by = "sample",
            min.cutoff = 'q10',
            label = TRUE)

Ridge plot

dittoRidgePlot(object = pigs.unannotated,
               var = "CXCL2",
               group.by = "individual_clusters",
               split.by = "treatment")

Percent cells per cluter

# treatment
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, treatment) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=treatment)) +
  geom_col() +
  scale_fill_manual(values = c("purple","gray")) +
  ggtitle("Percentage of treatment per cluster")

# sample
pigs.unannotated@meta.data %>%
  group_by(individual_clusters, sample) %>%
  dplyr::count() %>%
  group_by(individual_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=individual_clusters,y=percent, fill=sample)) +
  geom_col() +
  #scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of sample per cluster")

Cells per cluster

n_cells <- FetchData(pigs.unannotated, 
                     vars = c("ident", "treatment")) %>%
  dplyr::count(ident,treatment) %>%
  tidyr::spread(ident, n)
n_cells
##   treatment    0    1   2   3   4   5   6   7   8   9  10 11  12
## 1       LPS 2774 1070 383 375 341 373 443 138 194 118 147 72   1
## 2    Saline  487  403 368 336 252 204  38 174  24  78  10 31 100

Median absolute deviation

Remove outliers with log-library size greater than 3 median absolute deviations (MADs) or below the median log-library size.

# MAD example
log.lib <- as.numeric(log10(pigs.unannotated$nCount_RNA))
med <- median(log.lib)
abs.dev <- abs(log.lib - med)
mad <- median(abs.dev)
mad <- mad * 1.4826 # multiply by consistency cutoff
mad
## [1] 0.4023281
# stats function
mad <- mad(log.lib)
mad
## [1] 0.4023281
# remove outliers greater than 3 MADs
remove <- abs(log.lib - median(log.lib)) / mad(log.lib) > 3
table(remove)
## remove
## FALSE 
##  8934

Cluster tree

pigs.unannotated <- BuildClusterTree(object = pigs.unannotated,
                                     dims = 1:15,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- pigs.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

tree_1 <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors[1:length(tree$tip.label)], 
                        shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Variance Partition

info <- pigs.unannotated@meta.data
expression <- GetAssayData(object = pigs.unannotated, slot = "counts")
form <- ~ (1|treatment) + (1|sample) + (1|batch) + percent.mt

varPart <- fitExtractVarPartModel(expression, form, info)
## Dividing work into 100 chunks...
## 
## Total:6654 s
vp <- sortCols(varPart)

vp_violin <- plotVarPart(vp)
vp_violin

vp_bar <- plotPercentBars(vp[1:10,])
vp_bar

## Top variable genes per variable

varPart.df <- as.data.frame(vp_df)
# sort genes based on variance explained by treatment
vp <- vp[order(vp$treatment, decreasing = TRUE),]
head(vp["treatment"], 10)

# sort genes based on variance explained by sample
vp <- vp[order(vp$sample, decreasing = TRUE),]
head(vp["sample"], 10)

# sort genes based on variance explained by percent mito
vp <- vp[order(vp$percent.mt, decreasing = TRUE),]
head(vp["percent.mt"], 10)

# sort genes based on variance explained by batch
vp <- vp[order(vp$batch, decreasing = TRUE),]
head(vp["batch"], 10)

Marker exploration

Find all markers

# Find markers for each cluster
# Default is logfc.threshold = 0.25 and min.pct = 0.1
all.markers <- FindAllMarkers(object = pigs.unannotated,
                              test.use = "MAST")

# add column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# rename columns and rows
rownames(all.markers) <- 1:nrow(all.markers)
all.markers <- all.markers[,c(6,7,1,5,2:4,8)]
colnames(all.markers)[c(6,7)] <- c("pct_1","pct_2")

# save
saveRDS(all.markers, "../../rObjects/unannotated_all_markers.rds")
# read object
all.markers <- readRDS("../../rObjects/unannotated_all_markers.rds")

# more stringent filtering
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]
markers.strict <- all.markers[
  all.markers$delta_pct > summary(all.markers$delta_pct)[5],]

# compare 
table(all.markers$cluster)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
## 3248 3118 3443 3400 4647 1997 2494 3479 2453 1583 1976  384 2529
table(markers.strict$cluster)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
##  452  748  692  738  945  321  638  797  888  190  684  172 1383
# subset
cluster0 <- markers.strict[markers.strict$cluster == 0,]
cluster1 <- markers.strict[markers.strict$cluster == 1,]
cluster2 <- markers.strict[markers.strict$cluster == 2,]
cluster3 <- markers.strict[markers.strict$cluster == 3,]
cluster4 <- markers.strict[markers.strict$cluster == 4,]
cluster5 <- markers.strict[markers.strict$cluster == 5,]
cluster6 <- markers.strict[markers.strict$cluster == 6,]
cluster7 <- markers.strict[markers.strict$cluster == 7,]
cluster8 <- markers.strict[markers.strict$cluster == 8,]
cluster9 <- markers.strict[markers.strict$cluster == 9,]
cluster10 <- markers.strict[markers.strict$cluster == 10,]
cluster11 <- markers.strict[markers.strict$cluster == 11,]
cluster12 <- markers.strict[markers.strict$cluster == 12,]

Conserved markers

FindConservedMarkers() will first separate cells by treatment and then perform differential gene expression testing for a single specified cluster against all other clusters.

There are three important arguments which provide thresholds for determining if a gene is a marker.
** logfc.threshold ** min.diff.pct ** min.pct

Output:
** gene: gene symbol ** condition_p_val: p-value not adjusted for multiple test correction for condition ** condition_avg_logFC: average log fold change for condition. Positive values indicate that the gene is more highly expressed in the cluster. ** condition_pct.1: percentage of cells where the gene is detected in the cluster for condition ** condition_pct.2: percentage of cells where the gene is detected on average in the other clusters for condition ** condition_p_val_adj: adjusted p-value for condition, based on bonferroni correction using all genes in the dataset, used to determine significance ** max_pval: largest p value of p value calculated by each group/condition ** minimump_p_val: combined p value

When looking at the output, we suggest looking for markers with large differences in expression between pct.1 and pct.2 and larger fold changes. For instance if pct.1 = 0.90 and pct.2 = 0.80, it may not be as exciting of a marker. However, if pct.2 = 0.1 instead, the bigger difference would be more convincing.

Also, of interest is if the majority of cells expressing the marker is in your cluster of interest. If pct.1 is low, such as 0.3, it may not be as interesting. Both of these are also possible parameters to include when running the function.

# initialize df
conserved.markers <- data.frame()
all.clusters <- levels(pigs.unannotated$SCT_snn_res.0.1)

# loop through each cluster
for (i in all.clusters) {
  
  # print the cluster you're on
  print(i)
  
  # find conserved marker in cluster vs all other clusters
  markers <- FindConservedMarkers(pigs.unannotated,
                                  ident.1 = i, # subset to single cluster
                                  grouping.var = "treatment", # compare by treatment
                                  only.pos = TRUE, # default
                                  min.pct = 0.1, # default
                                  logfc.threshold = 0.25, # default
                                  test.use = "MAST"
                                  )
  
  # skip if none
  if(nrow(markers) == 0) {
    next
  }
  
  # make rownames a column
  markers <- rownames_to_column(markers, var = "gene")
  
  # make cluster number a column
  markers$cluster <- i
  
  # add to final table
  conserved.markers <- rbind(conserved.markers, markers)
}

# create delta_pct
conserved.markers$LPS_delta_pct <- abs(conserved.markers$LPS_pct.1 - 
                                         conserved.markers$LPS_pct.2)
conserved.markers$Saline_delta_pct <- abs(conserved.markers$Saline_pct.1 - 
                                            conserved.markers$Saline_pct.2)

# more stringent filtering
markers.strict <- conserved.markers[
  conserved.markers$LPS_delta_pct > summary(conserved.markers$LPS_delta_pct)[5],]
markers.strict <- conserved.markers[
  conserved.markers$Saline_delta_pct > summary(conserved.markers$Saline_delta_pct)[5],]
markers.strict$gene_name <- markers.strict$gene
markers.strict$row.num <- 1:nrow(markers.strict)

# compare 
table(as.numeric(conserved.markers$cluster))
table(as.numeric(markers.strict$cluster))

# save (takes a while to run)
saveRDS(conserved.markers, "../../rObjects/brain_conserved_markers.rds")
saveRDS(markers.strict, "../../rObjects/brain_conserved_markers_strict.rds")
conserved.markers <- readRDS("../../rObjects/brain_conserved_markers.rds")
markers.strict <- readRDS("../../rObjects/brain_conserved_markers.rds")

PCs

# Printing out the most variable genes driving PCs
print(x = pigs.unannotated[["pca"]], 
      dims = 1:10, 
      nfeatures = 10)
## PC_ 1 
## Positive:  SLC1A2, PREX2, NOL11, NHSL1, SLC1A3, EYA2, IRAG1, POLR3B, LRIG1, ZBTB20 
## Negative:  KCNIP4, RBFOX1, DPP10, ROBO2, OPCML, NRXN3, TENM2, LRRTM4, GALNTL6, HS6ST3 
## PC_ 2 
## Positive:  SLC1A2, PREX2, NOL11, NHSL1, LSAMP, CADM2, IRAG1, POLR3B, CTNND2, NTM 
## Negative:  ENSSSCG00000017146, MX2, ENSSSCG00000033089, GAB2, RBMS3, HERC6, EPSTI1, BNC2, ARHGAP24, LRMDA 
## PC_ 3 
## Positive:  CEMIP, BNC2, RBMS3, ADAM12, SVIL, EYA1, COLEC12, GPC6, USP53, BMP6 
## Negative:  RNF220, C10orf90, ST18, MBP, OPALIN, PLP1, PLEKHH1, ENSSSCG00000049542, DOCK10, ZNF536 
## PC_ 4 
## Positive:  CALCR, INPP5D, ARHGAP24, ARHGAP15, DOCK8, GAB2, PDE3B, LCP2, RUNX1, VAV1 
## Negative:  RBMS3, CEMIP, BNC2, ADAM12, EYA1, COLEC12, FBXL7, BMP6, SVIL, BICC1 
## PC_ 5 
## Positive:  KCNIP4, NKAIN2, TAFA1, ENSSSCG00000011729, RNF220, C10orf90, MBP, SLIT3, PLEKHH1, OPALIN 
## Negative:  LHFPL3, NXPH1, SOX6, TNR, MMP16, NELL1, ITGA9, KCNIP1, AGAP1, THSD7B 
## PC_ 6 
## Positive:  LHFPL3, DSCAM, TNR, XYLT1, AGAP1, MMP16, KCNB2, ITGA9, STK32A, KCNIP4 
## Negative:  ERBB4, NXPH1, GALNTL6, PTCHD4, KIAA1217, ZNF536, BTBD11, ZNF804B, KCNC2, GAD2 
## PC_ 7 
## Positive:  SLC16A9, GPC6, FOXP1, NTN1, SLC26A2, SLC47A1, KIAA1755, BNC2, ZFHX3, ADAMTS9 
## Negative:  CEMIP, MX2, ENSSSCG00000033089, EPSTI1, RSAD2, ENSSSCG00000017146, COLEC12, HERC6, ARHGAP28, PARP14 
## PC_ 8 
## Positive:  ENSSSCG00000033089, MX2, RSAD2, ENSSSCG00000017146, RBMS3, ROR2, PARP14, C3, PLEKHG1, ADAMTS9 
## Negative:  TRPM3, GPC5, CEMIP, ATP1A4, ENSSSCG00000044580, SLC6A20, KCNK2, ADAM12, PRKAG2, MAPK4 
## PC_ 9 
## Positive:  CA10, TAFA1, ENC1, RFX3, GRIA4, RASGRF2, CCK, SLIT2, FSTL5, ENSSSCG00000014097 
## Negative:  HS3ST4, DPP10, GRIK3, IL1RAPL2, KIAA1217, SEMA3E, ZFPM2, CLSTN2, SLC35F4, RASGRP1 
## PC_ 10 
## Positive:  NHSL1, EYA2, SLC1A2, EYA1, EFEMP1, PREX2, GPC5, RBFOX1, ENSSSCG00000051274, ROBO2 
## Negative:  HS3ST4, DPP10, ESRRG, GRM8, WWC1, GALNT9, GPM6A, SEMA3E, KIRREL3, SEZ6L

Top variable genes

# print top variable genes
top100 <- pigs.unannotated@assays$SCT@var.features[1:100]
top100
##   [1] "SLC1A2"             "TNR"                "SLC16A9"           
##   [4] "LHFPL3"             "NOL11"              "ENSSSCG00000049542"
##   [7] "CALCR"              "MBP"                "RNF220"            
##  [10] "ENSSSCG00000044602" "XYLT1"              "ST18"              
##  [13] "ARHGAP24"           "CEMIP"              "GALNTL6"           
##  [16] "C10orf90"           "RBFOX1"             "KCNIP4"            
##  [19] "ARHGAP15"           "ANK1"               "DPP10"             
##  [22] "OPALIN"             "NRG1"               "SLC26A2"           
##  [25] "PREX2"              "NELL1"              "LUZP2"             
##  [28] "C3"                 "ROBO2"              "IDO2"              
##  [31] "NXPH1"              "NRXN3"              "NTM"               
##  [34] "IL1RAPL2"           "OPCML"              "TRPM3"             
##  [37] "BNC2"               "CLSTN2"             "COLEC12"           
##  [40] "INPP5D"             "TCF7L2"             "BMP6"              
##  [43] "LRMDA"              "RUNX1"              "MX2"               
##  [46] "MMP16"              "EBF1"               "KIRREL3"           
##  [49] "FLI1"               "PDE3B"              "LSAMP"             
##  [52] "GLI3"               "NHSL1"              "ZNF536"            
##  [55] "DLGAP2"             "ABLIM1"             "PLAT"              
##  [58] "BCAS1"              "DOCK8"              "SLC6A20"           
##  [61] "GPC5"               "AGMO"               "PLEKHH1"           
##  [64] "DOCK10"             "ENSSSCG00000045193" "ADAM12"            
##  [67] "ENSSSCG00000051274" "ENSSSCG00000004830" "RBPMS"             
##  [70] "ENSSSCG00000017146" "ITGA9"              "GPR39"             
##  [73] "ENSSSCG00000051497" "TENM2"              "HS6ST3"            
##  [76] "RN18S"              "CREB5"              "ASIC2"             
##  [79] "ARHGAP28"           "DOCK2"              "SVIL"              
##  [82] "TMCC3"              "SLC1A3"             "PRR5L"             
##  [85] "FOXP1"              "THSD7B"             "USP53"             
##  [88] "ENSSSCG00000024973" "TAFA1"              "TNC"               
##  [91] "FHOD3"              "EYA2"               "KIAA1217"          
##  [94] "KCTD16"             "ABR"                "ARHGAP10"          
##  [97] "NFASC"              "FRMD5"              "HMCN2"             
## [100] "TNIK"