# Prepare for DESeq2
count_data <- jensen_bulk[,-1]
count_data <- mutate_all(count_data, function(x) round(as.numeric(as.character(x)), digits = 0)) # round to integers
# Prepare sample information
sample_info <- data.frame(
row.names = colnames(count_data),
genotype = lapply(strsplit(colnames(count_data), "[.]"), "[[", 1) |> unlist() |> as.factor() ,
treatment = lapply(strsplit(colnames(count_data), "[.]"), "[[", 2) |> unlist() |> as.factor()
)
sample_info$genotype <- relevel(sample_info$genotype, ref = "WT")
sample_info$treatment <- relevel(sample_info$treatment, ref = "sham")
# Make sure the sample information matches the count data
all(rownames(sample_info) == colnames(count_data))
## [1] TRUE
# Create a DESeqDataSet
dds <- DESeqDataSetFromMatrix(
countData = count_data,
colData = sample_info,
design = ~ genotype + treatment + genotype:treatment # include interaction term
)
## converting counts to integer mode
# Run the DESeq pipeline
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(dds)
## [1] "Intercept" "genotype_KO_vs_WT" "treatment_MI_vs_sham"
## [4] "genotypeKO.treatmentMI"
# Run the differential expression analysis
res <- results(dds, name="genotypeKO.treatmentMI")
sample_info_comp <- cbind(sample_info, comps)
colnames(sample_info_comp)[5] <- "endothelial_cells"
# Make sure the sample information matches the count data
all(rownames(sample_info_comp) == colnames(count_data))
## [1] TRUE
# Create DESeqDataSet w/ variable compositions in design
dds_composition <- DESeqDataSetFromMatrix(
countData = count_data,
colData = sample_info_comp,
design = ~ genotype + treatment + genotype:treatment + cardiomyocytes + fibroblasts + immune
)
## converting counts to integer mode
# Run
dds_composition <- DESeq(dds_composition)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## 17 rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument with nbinomWaldTest
# Get interaction results
res_composition <- results(dds_composition, name = "genotypeKO.treatmentMI")
# Compare results
comparison <- merge(as.data.frame(res), as.data.frame(res_composition), by = "row.names", suffixes = c(".no_comp", ".comp"))
# Add a column indicating whether a gene is significant in either analysis
comparison$significant <- (comparison$pvalue.no_comp < 0.05) | (comparison$pvalue.comp < 0.05)
# Scatterplot of p-values
ggplot(comparison, aes(x = -log10(pvalue.no_comp), y = -log10(pvalue.comp), color = significant)) +
geom_point(alpha = 0.5) +
theme_minimal() +
labs(x = "-log10(p-value) without composition", y = "-log10(p-value) with composition")
# Scatterplot of log fold changes
ggplot(comparison, aes(x = log2FoldChange.no_comp, y = log2FoldChange.comp, color = significant)) +
geom_point(alpha = 0.5) +
theme_minimal() +
labs(x = "log2(Fold Change) without composition", y = "log2(Fold Change) with composition")
# Calculate difference in -log10 p-values
comparison$p_diff <- -log10(comparison$pvalue.comp) - -log10(comparison$pvalue.no_comp)
# Make categorical column
comparison <- comparison|>
mutate(up_down = ifelse(p_diff >= 0,"up","down"))
# Get the gene names for the top 5 genes by padj.comp
top_changed_genes <- comparison |>
group_by(up_down) |> # take the top 5 rows
arrange(desc(abs(p_diff)), .by_group = TRUE) |>
na.omit() |>
slice_head(n = 5)
top_genes <- comparison |> # take the top 5 rows
arrange(padj.comp) |>
na.omit() |>
slice_head(n = 5)
ggplot(comparison, aes(x = log2FoldChange.comp, y = -log10(padj.comp), color = p_diff)) +
geom_point(alpha = 0.8, size = 4) +
scale_color_viridis(option = "plasma") +
geom_text_repel(data = top_genes, aes(label = Row.names), size = 10, box.padding = unit(0.35, "lines"),
force = 40, segment.linetype = 2, segment.size = 0.6, color = "black", force_pull = 0.01, min.segment.length = 0) +
geom_text_repel(data = top_changed_genes, aes(label = Row.names), size = 8, box.padding = unit(0.35, "lines"),
force = 20, segment.linetype = 2, segment.size = 0.6, color = "grey", force_pull = 0.01, min.segment.length = 0) +
theme_minimal() +
labs(x = "log2(Fold Change)", y = "-log10(adjusted p-value)", color = "Change in -log10(p-value)") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "solid") + # add a line for p=0.05
theme(legend.position = "bottom",
axis.text.x = element_text(color = "black", size = 25),
axis.text.y = element_text(color = "black", size = 25),
axis.ticks = element_blank(),
title = element_text(size = 25),
legend.text = element_text(size = 25),
legend.key.size = unit(0.7, 'in'),
legend.title = element_text(vjust = 0.7),
axis.title.y = element_text(color = "black", size = 25),
axis.title.x = element_text(color = "black", size = 25),
panel.background = element_rect(fill='transparent'),
plot.background = element_rect(fill='transparent', color=NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) # move legend to the bottom
## Expression of genes with greatest p-value change
plotGene <- function(seurat, slot = "scale.data", gene, plot.title = T, plot.legend = T){
seurat |>
do_FeaturePlot(gene, reduction = "umap", slot = slot,
legend.position = if(plot.legend == T){"bottom"}else{"none"},
legend.width = 2, legend.length = 25, order = T) +
theme(title = element_text(size = 18),
legend.title = element_blank()) +
labs(title = if(plot.title == T){paste0(gene, "\n-log10(p-value) change: " , names(gene))}else{" "})
}
genes_up <- top_changed_genes |>
subset(up_down == "up") |>
pull(Row.names)
names(genes_up) <- top_changed_genes |>
subset(up_down == "up") |>
pull(p_diff) |>
round(digits = 2)
genes_down <- top_changed_genes |>
subset(up_down == "down") |>
pull(Row.names)
names(genes_down) <- top_changed_genes |>
subset(up_down == "down") |>
pull(p_diff) |>
round(digits = 2)
# plot new
plots <- c()
plots[["cell"]] <- do_DimPlot(sn, reduction = "umap", label = T, repel = T) +
NoLegend() + theme(title = element_text(size = 22))
plots.up <- lapply(names(genes_up), function(x){plotGene(seurat = sn, slot = "data", gene = genes_up[x], plot.legend = F)})
plots.down <- lapply(names(genes_down), function(x){plotGene(seurat = sn, slot = "data", gene = genes_down[x], plot.legend = F)})
plots.up <- c(plots, plots.up)
plots.down <- c(plots, plots.down)
wrap_plots(plots.up,
ncol = 6)
wrap_plots(plots.down,
ncol = 6)