TP53 Transcriptomics Visualizations

#Data Creation

set.seed(123)

# Define groups: wild-type and 3 TP53 mutants
groups <- factor(rep(c("WT", "R175H", "R248W", "R273H"), each = 5))

# Simulate expression values for 100 genes
n_genes <- 100
expression_matrix <- matrix(rnorm(n_genes * 20, mean = 8, sd = 1.5), nrow = n_genes)

# Make TP53 downregulated in mutants
expression_matrix[1, groups != "WT"] <- expression_matrix[1, groups != "WT"] - 3

rownames(expression_matrix) <- c("TP53", paste0("Gene_", 2:n_genes))
colnames(expression_matrix) <- paste0(groups, "_rep", 1:5)
# ─────────────────────────────────────────
# VISUALIZATION 1: Volcano Plot
# ─────────────────────────────────────────

# Simulate log2 fold change and p-values
log2FC <- rnorm(n_genes, mean = 0, sd = 2)
pvalues <- runif(n_genes, 0, 1)

# Make TP53 and key target genes highly significant
log2FC[1] <- -3.2   # TP53 downregulated
pvalues[1] <- 0.0001 # TP53 highly significant

# MDM2 upregulated
log2FC[2] <- 2.8
pvalues[2] <- 0.0002

# BAX downregulated
log2FC[3] <- -2.5
pvalues[3] <- 0.0003

# CDKN1A downregulated
log2FC[4] <- -2.2
pvalues[4] <- 0.0004

# BCL2 upregulated
log2FC[5] <- 2.1
pvalues[5] <- 0.0005

volcano_data <- data.frame(
  gene = rownames(expression_matrix),
  log2FC = log2FC,
  pvalue = pvalues,
  neglog10p = -log10(pvalues)
)

# Color coding
colors <- ifelse(volcano_data$gene == "TP53", "red",
          ifelse(volcano_data$gene == "Gene_2", "orange",  # MDM2
          ifelse(volcano_data$gene == "Gene_3", "blue",    # BAX
          ifelse(volcano_data$gene == "Gene_4", "purple",  # CDKN1A
          ifelse(volcano_data$gene == "Gene_5", "green",   # BCL2
          "gray")))))

# Plot volcano
plot(volcano_data$log2FC, volcano_data$neglog10p,
     pch = 20,
     col = colors,
     xlab = "Log2 Fold Change",
     ylab = "-log10(p-value)",
     main = "Volcano Plot: TP53 Mutant vs Wild-Type\nDifferential Gene Expression",
     ylim = c(0, max(volcano_data$neglog10p) + 1))

# Add threshold lines
abline(h = -log10(0.05), lty = 2, col = "black")
abline(v = c(-1, 1), lty = 2, col = "black")

# Add gene labels
text(log2FC[1], -log10(pvalues[1]), "TP53", pos = 4, col = "red", cex = 0.8)
text(log2FC[2], -log10(pvalues[2]), "MDM2", pos = 4, col = "orange", cex = 0.8)
text(log2FC[3], -log10(pvalues[3]), "BAX", pos = 4, col = "blue", cex = 0.8)
text(log2FC[4], -log10(pvalues[4]), "CDKN1A", pos = 4, col = "purple", cex = 0.8)
text(log2FC[5], -log10(pvalues[5]), "BCL2", pos = 4, col = "green", cex = 0.8)

# Add legend
legend("topright",
       legend = c("TP53", "MDM2", "BAX", "CDKN1A", "BCL2", "Other genes"),
       col = c("red", "orange", "blue", "purple", "green", "gray"),
       pch = 20,
       cex = 0.8)

# ─────────────────────────────────────────
# VISUALIZATION 2: Heatmap
# ─────────────────────────────────────────

# Select top 20 genes for heatmap
heatmap_data <- expression_matrix[1:20,]

# Rename key genes for clarity
rownames(heatmap_data)[1] <- "TP53"
rownames(heatmap_data)[2] <- "MDM2"
rownames(heatmap_data)[3] <- "BAX"
rownames(heatmap_data)[4] <- "CDKN1A"
rownames(heatmap_data)[5] <- "BCL2"

par(mar = c(10, 5, 10, 2))

# Color palette
col_palette <- colorRampPalette(c("yellow", "orange", "red"))(100)

# Plot heatmap
heatmap(heatmap_data,
        col = col_palette,
        scale = "row",
        margins = c(6,7),
        main = "Gene Expression: WT vs TP53 Mutants",
        xlab = "Samples",
        ylab = "Genes",
        labRow = ifelse(rownames(heatmap_data) %in%
                 c("TP53", "MDM2", "BAX", "CDKN1A", "BCL2"),
                 rownames(heatmap_data), ""),
        cex.main = 0.1,
        cexRow = 0.8,
        cexCol = 0.6)

Appendix: The code is written with the assistance of claude.ai (sonnet 4.6)

prompt 1: in the context of studying TP53 mutants with transcriptomics, how would I simulate data with 3 mutant TP53 and 1 wild type. prompt 2: What would be the best plots to represent the simulated data? prompt 3: draw a volcano plot and heatmap for the data in R.

Justification: I was unsure as to how to simulate data in the correct manner, so I used claude to point me into the right direction.