DESeq2 with composition

# Prepare for DESeq2
count_data <- jensen_bulk[,-1]

count_data <- mutate_all(count_data, function(x) round(as.numeric(as.character(x)), digits = 0)) # round to integers

# Prepare sample information
sample_info <- data.frame(
  row.names = colnames(count_data),
  genotype =  lapply(strsplit(colnames(count_data), "[.]"),  "[[", 1) |> unlist() |> as.factor() ,
  treatment = lapply(strsplit(colnames(count_data), "[.]"),  "[[", 2) |> unlist() |> as.factor()
)

sample_info$genotype <- relevel(sample_info$genotype, ref = "WT")
sample_info$treatment <- relevel(sample_info$treatment, ref = "sham")

# Make sure the sample information matches the count data
all(rownames(sample_info) == colnames(count_data))

## [1] TRUE

# Create a DESeqDataSet
dds <- DESeqDataSetFromMatrix(
  countData = count_data,
  colData = sample_info,
  design = ~ genotype + treatment + genotype:treatment  # include interaction term
)

## converting counts to integer mode

# Run the DESeq pipeline
dds <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

resultsNames(dds)

## [1] "Intercept"              "genotype_KO_vs_WT"      "treatment_MI_vs_sham"  
## [4] "genotypeKO.treatmentMI"

# Run the differential expression analysis
res <- results(dds, name="genotypeKO.treatmentMI")

sample_info_comp <- cbind(sample_info, comps)
colnames(sample_info_comp)[5] <- "endothelial_cells"
# Make sure the sample information matches the count data
all(rownames(sample_info_comp) == colnames(count_data))

## [1] TRUE

# Create DESeqDataSet w/ variable compositions in design
dds_composition  <- DESeqDataSetFromMatrix(
  countData = count_data,
  colData = sample_info_comp,
  design = ~ genotype + treatment + genotype:treatment + cardiomyocytes + fibroblasts + immune 
)

## converting counts to integer mode

# Run 
dds_composition <- DESeq(dds_composition)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## 17 rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument with nbinomWaldTest

# Get interaction results
res_composition <- results(dds_composition, name = "genotypeKO.treatmentMI")

# Compare results
comparison <- merge(as.data.frame(res), as.data.frame(res_composition), by = "row.names", suffixes = c(".no_comp", ".comp"))

# Add a column indicating whether a gene is significant in either analysis
comparison$significant <- (comparison$pvalue.no_comp < 0.05) | (comparison$pvalue.comp < 0.05)

# Scatterplot of p-values
ggplot(comparison, aes(x = -log10(pvalue.no_comp), y = -log10(pvalue.comp), color = significant)) +
  geom_point(alpha = 0.5) +
  theme_minimal() +
  labs(x = "-log10(p-value) without composition", y = "-log10(p-value) with composition")

# Scatterplot of log fold changes
ggplot(comparison, aes(x = log2FoldChange.no_comp, y = log2FoldChange.comp, color = significant)) +
  geom_point(alpha = 0.5) +
  theme_minimal() +
  labs(x = "log2(Fold Change) without composition", y = "log2(Fold Change) with composition")

# Calculate difference in -log10 p-values
comparison$p_diff <- -log10(comparison$pvalue.comp) - -log10(comparison$pvalue.no_comp)

# Make categorical column
comparison <- comparison|>
              mutate(up_down = ifelse(p_diff >= 0,"up","down")) 

# Get the gene names for the top 5 genes by padj.comp
top_changed_genes <- comparison |> 
  group_by(up_down) |>  # take the top 5 rows
  arrange(desc(abs(p_diff)), .by_group = TRUE) |>
  na.omit() |>
  slice_head(n = 5) 

top_genes <- comparison |>  # take the top 5 rows
  arrange(padj.comp) |>
  na.omit() |>
  slice_head(n = 5) 

ggplot(comparison, aes(x = log2FoldChange.comp, y = -log10(padj.comp), color = p_diff)) +
  geom_point(alpha = 0.8, size = 4) +
  scale_color_viridis(option = "plasma") +
  geom_text_repel(data = top_genes, aes(label = Row.names), size = 10, box.padding = unit(0.35, "lines"), 
                  force = 40, segment.linetype = 2, segment.size = 0.6, color = "black", force_pull = 0.01, min.segment.length = 0) +
  geom_text_repel(data = top_changed_genes, aes(label = Row.names), size = 8, box.padding = unit(0.35, "lines"), 
                  force = 20, segment.linetype = 2, segment.size = 0.6, color = "grey", force_pull = 0.01, min.segment.length = 0) +
  theme_minimal() +
  labs(x = "log2(Fold Change)", y = "-log10(adjusted p-value)", color = "Change in -log10(p-value)") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") + 
  geom_vline(xintercept = 0, linetype = "solid") + # add a line for p=0.05
  theme(legend.position = "bottom",
        axis.text.x = element_text(color = "black", size = 25),
        axis.text.y = element_text(color = "black", size = 25),
        axis.ticks = element_blank(),
        title = element_text(size = 25),
        legend.text = element_text(size = 25),
        legend.key.size = unit(0.7, 'in'),
        legend.title = element_text(vjust = 0.7),
        axis.title.y = element_text(color = "black", size = 25),
        axis.title.x = element_text(color = "black", size = 25),
         panel.background = element_rect(fill='transparent'),
         plot.background = element_rect(fill='transparent', color=NA),
         panel.grid.major = element_blank(),
         panel.grid.minor = element_blank())  # move legend to the bottom

## Expression of genes with greatest p-value change

plotGene <- function(seurat, slot = "scale.data", gene, plot.title = T, plot.legend = T){
  seurat |>
    do_FeaturePlot(gene, reduction = "umap", slot = slot, 
                   legend.position = if(plot.legend == T){"bottom"}else{"none"}, 
                   legend.width = 2, legend.length = 25,  order = T) +
    theme(title = element_text(size = 18),
          legend.title = element_blank()) +
    labs(title = if(plot.title == T){paste0(gene, "\n-log10(p-value) change: " , names(gene))}else{" "})
}

genes_up <- top_changed_genes |>
            subset(up_down == "up") |>
            pull(Row.names)

names(genes_up) <- top_changed_genes |>
            subset(up_down == "up") |>
            pull(p_diff) |>
            round(digits = 2)

genes_down <- top_changed_genes |>
            subset(up_down == "down") |>
            pull(Row.names)

names(genes_down) <- top_changed_genes |>
            subset(up_down == "down") |>
            pull(p_diff) |>
            round(digits = 2)
# plot new
plots <- c()

plots[["cell"]] <- do_DimPlot(sn, reduction = "umap", label = T, repel = T)  +  
  NoLegend() + theme(title = element_text(size = 22))

plots.up <- lapply(names(genes_up), function(x){plotGene(seurat = sn, slot = "data", gene = genes_up[x], plot.legend = F)})

plots.down <- lapply(names(genes_down), function(x){plotGene(seurat = sn, slot = "data", gene = genes_down[x], plot.legend = F)})


plots.up <- c(plots, plots.up)

plots.down <- c(plots, plots.down)

wrap_plots(plots.up, 
           ncol = 6)

wrap_plots(plots.down, 
           ncol = 6)

DESeq2 with composition

Brian Gural

2023-08-03