#calling the libraries
library(DESeq2)
library(ggplot2)
#calling in the count matrix
counts_D5 <- read.delim("counts_D5", header = TRUE, row.names = 1)
#understanding the distribution of counts in the data set
total_counts_per_gene_D5 <- rowSums(counts_D5)
hist(total_counts_per_gene_D5,
breaks=100,
main="Distribution of Total Counts per Gene",
xlab="Total counts across all samples",
ylab="Number of genes")
summary(total_counts_per_gene_D5)
#ignoring low expressed gene to remove noise & get better results
counts_D5 <- counts_D5[which(rowSums(counts_D5) > 50), ]
#calling in the metadata
metadata_D5 <- read.csv("metadata_D5.csv", header = TRUE)
counts_D5 <- round(counts_D5)
dds_D5 <- DESeqDataSetFromMatrix(countData = counts_D5,
colData = metadata_D5,
design = ~ cell_line + treatment)
dds_D5
# Set reference levels
dds_D5$treatment <- relevel(dds_D5$treatment, ref = "DMSO")
dds_D5$cell_line <- relevel(dds_D5$cell_line, ref = "HCC827")
# Run DESeq2 analysis
dds_D5 <- DESeq(dds_D5)
# Extract results
res_treatment_D5 <- results(dds_D5, contrast = c("treatment", "Gefitinib", "DMSO"))
res_cellline_D5 <- results(dds_D5, contrast = c("cell_line", "HCC4006", "HCC827"))
summary(res_treatment_D5)
summary(res_cellline_D5)
# Data transformation for visualization
vst_D5 <- vst(dds_D5, blind = FALSE)
vst_matrix_D5 <- assay(vst_D5)
head(vst_matrix_D5[1:5, 1:3])
library(reshape2)
vst_df_D5 <- as.data.frame(vst_matrix_D5)
vst_df_D5$gene_id <- rownames(vst_df_D5)
vst_final_D5 <- melt(vst_df_D5,
id.vars = "gene_id",
variable.name = "sample_id",
value.name = "expression_value")
head(vst_final_D5)
pca_plot_D5 <- plotPCA(vst_D5, intgroup = c("treatment", "cell_line"))
ggsave(("D5_PCA.pdf"), pca_plot_D5, width = 6, height = 5)
ggsave(("D5_PCA.png"), pca_plot_D5, width = 6, height = 5, dpi = 300)
# Dispersion Plot
plotDispEsts(dds_D5, main = "Dispersion Estimates")
pdf("DispersionPlot_D5.pdf", width = 6, height = 5)
plotDispEsts(dds_D5)
dev.off()
png("DispersionPlot_D5.png", width = 2000, height = 1600, res = 300)
plotDispEsts(dds_D5)
dev.off()
resultsNames(dds_D5)
#loading annotation packages for gene symbol conversion
library(AnnotationDbi)
library(org.Hs.eg.db)
# I'm converting ENSEMBL IDs to gene symbols
gene_symbols_D5 <- mapIds(org.Hs.eg.db,
keys = rownames(dds_D5),
column = "SYMBOL",
keytype = "ENSEMBL",
multiVals = "first")
res_annotated_D5 <- res_treatment_D5
res_annotated_D5$gene_symbol <- gene_symbols_D5
as.data.frame(res_annotated_D5)
#LFC Shrinkage
res_treatment_D5 <- lfcShrink(dds_D5, coef = "treatment_Gefitinib_vs_DMSO", type = "apeglm")
res_cellline_D5 <- lfcShrink(dds_D5, coef = "cell_line_HCC4006_vs_HCC827", type = "apeglm")
res_treatment_annotated_D5 <- res_treatment_D5
res_treatment_annotated_D5$gene_symbol <- gene_symbols_D5
res_cellline_annotated_D5 <- res_cellline_D5
res_cellline_annotated_D5$gene_symbol <- gene_symbols_D5
# Get significant genes
sig_treatment_D5 <- res_treatment_annotated_D5[which(res_treatment_annotated_D5$padj < 0.05 &
abs(res_treatment_annotated_D5$log2FoldChange) > 1), ]
sig_cellline_D5 <- res_cellline_annotated_D5[which(res_cellline_annotated_D5$padj < 0.05 &
abs(res_cellline_annotated_D5$log2FoldChange) > 1), ]
nrow(sig_treatment_D5)
nrow(sig_cellline_D5)
# Top genes
top_treatment_D5 <- head(sig_treatment_D5[order(sig_treatment_D5$padj), ], 20)
top_cellline_D5 <- head(sig_cellline_D5[order(sig_cellline_D5$padj), ], 20)
# Overlaps
overlap_D5 <- intersect(rownames(sig_treatment_D5), rownames(sig_cellline_D5))
length(overlap_D5)
Visualization Plots
# MA Plot
plotMA(res_treatment_D5, main = "MA Plot - Gefitinib vs DMSO", ylim = c(-3, 3))
pdf(("MA_treatment_D5.pdf"), width = 6, height = 5)
plotMA(res_treatment_D5, ylim=c(-5,5), main="MA Plot - Gefitinib vs DMSO")
dev.off()
png(("MA_treatment_D5.png"), width = 2000, height = 1600, res = 300)
plotMA(res_treatment_D5, ylim=c(-5,5), main="MA Plot - Gefitinib vs DMSO")
dev.off()
plotMA(res_cellline_D5, main = "MA Plot - HCC4006 vs HCC827", ylim = c(-3, 3))
pdf(("MA_cellline_D5.pdf"), width = 6, height = 5)
plotMA(res_cellline_D5, ylim=c(-5,5), main="MA Plot - HCC4006 vs HCC827")
dev.off()
png(("MA_cellline_D5.png"), width = 2000, height = 1600, res = 300)
plotMA(res_cellline_D5, ylim=c(-5,5), main="MA Plot - HCC4006 vs HCC827")
dev.off()
# Volcano plots
library(EnhancedVolcano)
volcano_treatment_D5 <- EnhancedVolcano(res_treatment_annotated_D5, lab = res_treatment_annotated_D5$gene_symbol,
x = 'log2FoldChange', y = 'padj',
title = 'Gefitinib vs DMSO')
ggsave(("volcano_treatment_D5.pdf"), volcano_treatment_D5, width = 6, height = 5)
ggsave(("volcano_treatment_D5.png"), volcano_treatment_D5, width = 6, height = 5, dpi = 300)
volcano_cellline_D5 <- EnhancedVolcano(res_cellline_annotated_D5, lab = res_cellline_annotated_D5$gene_symbol,
x = 'log2FoldChange', y = 'padj',
title = 'HCC4006 vs HCC827')
ggsave(("volcano_cellline_D5.pdf"), volcano_cellline_D5, width = 6, height = 5)
ggsave(("volcano_cellline_D5.png"), volcano_cellline_D5, width = 6, height = 5, dpi = 300)
# Heatmap of top DE genes
library(pheatmap)
if(length(overlap_D5) > 0) {
top_overlap_D5 <- head(overlap_D5[order(sig_treatment_D5[overlap_D5,]$padj)], 50)
pheatmap::pheatmap(assay(vst_D5)[top_overlap_D5,],
annotation_col = as.data.frame(colData(dds_D5)[,c("cell_line", "treatment")]),
scale = "row", show_rownames = FALSE,
main = "Top 50 Overlapping DE Genes",
filename = "DEGs_D5.png")
}
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(RColorBrewer)
# Prepare top genes for treatment effect
top_treatment_heatmap_D5 <- res_treatment_annotated_D5[!is.na(res_treatment_annotated_D5$padj) &
res_treatment_annotated_D5$padj < 0.05 &
res_treatment_annotated_D5$baseMean > 50 &
abs(res_treatment_annotated_D5$log2FoldChange) > 2,]
top_treatment_heatmap_D5 <- head(as.data.frame(top_treatment_heatmap_D5), 50)
top_treatment_heatmap_D5 <- top_treatment_heatmap_D5[order(top_treatment_heatmap_D5$log2FoldChange, decreasing = TRUE),]
# Create matrices
rlog_D5 <- rlog(dds_D5, blind = FALSE)
mat_D5 <- assay(rlog_D5)[rownames(top_treatment_heatmap_D5), ]
mat_scaled_D5 <- t(apply(mat_D5, 1, scale))
colnames(mat_scaled_D5) <- colnames(mat_D5)
# Select top and bottom genes
num_keep <- min(25, floor(nrow(mat_scaled_D5)/2))
rows_keep_D5 <- c(seq(1:num_keep), seq((nrow(mat_scaled_D5) - num_keep + 1), nrow(mat_scaled_D5)))
# Prepare annotations
l2_val_D5 <- as.matrix(top_treatment_heatmap_D5[rows_keep_D5,]$log2FoldChange)
colnames(l2_val_D5) <- "logFC"
mean_val_D5 <- as.matrix(top_treatment_heatmap_D5[rows_keep_D5,]$baseMean)
colnames(mean_val_D5) <- "AveExpr"
# Colors
col_logFC_D5 <- colorRamp2(c(min(l2_val_D5), 0, max(l2_val_D5)), c("blue", "white", "red"))
col_AveExpr_D5 <- colorRamp2(c(quantile(mean_val_D5)[1], quantile(mean_val_D5)[4]), c("white", "red"))
col_zscore_D5 <- colorRamp2(c(-2, 0, 2), c("#4575b4", "white", "#d73027"))
# Create heatmaps
h1_D5 <- Heatmap(mat_scaled_D5[rows_keep_D5,], cluster_rows = TRUE,
name = "Z-score", col = col_zscore_D5,
show_row_names = FALSE)
h2_D5 <- Heatmap(l2_val_D5, row_labels = top_treatment_heatmap_D5$gene_symbol[rows_keep_D5],
cluster_rows = FALSE, name = "logFC", col = col_logFC_D5,
width = unit(15, "mm"),
row_names_gp = gpar(fontsize = 6), # Decreased from default (usually 10-12)
cell_fun = function(j, i, x, y, w, h, col) {
grid.text(round(l2_val_D5[i, j], 1), x, y, gp = gpar(fontsize = 7))
})
h3_D5 <- Heatmap(mean_val_D5, row_labels = top_treatment_heatmap_D5$gene_symbol[rows_keep_D5],
cluster_rows = FALSE, name = "AveExpr", col = col_AveExpr_D5,
width = unit(15, "mm"),
row_names_gp = gpar(fontsize = 6), # Decreased from default
cell_fun = function(j, i, x, y, w, h, col) {
grid.text(round(mean_val_D5[i, j], 0), x, y, gp = gpar(fontsize = 7))
})
h_D5 <- h1_D5 + h2_D5 + h3_D5
print(h_D5)
pdf("AnnotatedHeatmap_D5.pdf", width = 10, height = 6)
draw(h_D5)
dev.off()
png("AnnotatedHeatmap_D5.png", width = 3000, height = 2000, res = 300)
draw(h_D5)
dev.off()
library(clusterProfiler)
library(DOSE)
library(enrichplot)
# TREATMENT EFFECT PATHWAYS
# Separate up/down genes
sig_genes_up_treatment_D5 <- rownames(sig_treatment_D5)[sig_treatment_D5$log2FoldChange > 1]
sig_genes_down_treatment_D5 <- rownames(sig_treatment_D5)[sig_treatment_D5$log2FoldChange < -1]
print(paste("Upregulated:", length(sig_genes_up_treatment_D5)))
print(paste("Downregulated:", length(sig_genes_down_treatment_D5)))
# Convert to ENTREZ for KEGG
entrez_up_treatment_D5 <- mapIds(org.Hs.eg.db, sig_genes_up_treatment_D5, "ENTREZID", "ENSEMBL")
entrez_down_treatment_D5 <- mapIds(org.Hs.eg.db, sig_genes_down_treatment_D5, "ENTREZID", "ENSEMBL")
entrez_up_treatment_D5 <- entrez_up_treatment_D5[!is.na(entrez_up_treatment_D5)]
entrez_down_treatment_D5 <- entrez_down_treatment_D5[!is.na(entrez_down_treatment_D5)]
# GO Analysis - Treatment
go_up_treatment_D5 <- enrichGO(gene = sig_genes_up_treatment_D5, OrgDb = org.Hs.eg.db,
keyType = "ENSEMBL", ont = "ALL",
pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_up_treatment_D5)
go_treatment_up_D5 <- barplot(go_up_treatment_D5, showCategory = 15) + labs(title = "GO Up: Gefitinib vs DMSO")
ggsave("GO_treatment_up_D5.pdf", go_treatment_up_D5, width = 7, height = 6)
ggsave("GO_treatment_up_D5.png", go_treatment_up_D5, width = 7, height = 6, dpi = 300)
go_down_treatment_D5 <- enrichGO(gene = sig_genes_down_treatment_D5, OrgDb = org.Hs.eg.db,
keyType = "ENSEMBL", ont = "ALL",
pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_down_treatment_D5)
top_go_down_treatment_D5 <- go_down_treatment_D5@result[1:10, ]
as.data.frame(top_go_down_treatment_D5)
go_treatment_down_D5 <- barplot(go_down_treatment_D5, showCategory = 15) + labs(title = "GO Down: Gefitinib vs DMSO")
ggsave("GO_treatment_down_D5.pdf", go_treatment_down_D5, width = 7, height = 6)
ggsave("GO_treatment_down_D5.png", go_treatment_down_D5, width = 7, height = 6, dpi = 300)
# KEGG Analysis - Treatment
kegg_up_treatment_D5 <- enrichKEGG(gene = entrez_up_treatment_D5, organism = 'hsa',
pvalueCutoff = 0.1, pAdjustMethod = "BH")
as.data.frame(kegg_up_treatment_D5)
kegg_treatment_up_D5 <- barplot(kegg_up_treatment_D5, showCategory = 15) + labs(title = "KEGG Up: Gefitinib vs DMSO")
ggsave("KEGG_treatment_up_D5.pdf", kegg_treatment_up_D5, width = 7, height = 6)
ggsave("KEGG_treatment_up_D5.png", kegg_treatment_up_D5, width = 7, height = 6, dpi = 300)
kegg_down_treatment_D5 <- enrichKEGG(gene = entrez_down_treatment_D5, organism = 'hsa',
pvalueCutoff = 0.1, pAdjustMethod = "BH")
as.data.frame(kegg_down_treatment_D5)
kegg_treatment_down_D5 <- barplot(kegg_down_treatment_D5, showCategory = 15) + labs(title = "KEGG Down: Gefitinib vs DMSO")
ggsave("KEGG_treatment_down_D5.pdf", kegg_treatment_down_D5, width = 7, height = 6)
ggsave("KEGG_treatment_down_D5.png", kegg_treatment_down_D5, width = 7, height = 6, dpi = 300)
# CELL LINE EFFECT PATHWAYS
# Separate up/down genes
sig_genes_up_cellline_D5 <- rownames(sig_cellline_D5)[sig_cellline_D5$log2FoldChange > 1]
sig_genes_down_cellline_D5 <- rownames(sig_cellline_D5)[sig_cellline_D5$log2FoldChange < -1]
# Convert to ENTREZ
entrez_up_cellline_D5 <- mapIds(org.Hs.eg.db, sig_genes_up_cellline_D5, "ENTREZID", "ENSEMBL")
entrez_down_cellline_D5 <- mapIds(org.Hs.eg.db, sig_genes_down_cellline_D5, "ENTREZID", "ENSEMBL")
entrez_up_cellline_D5 <- entrez_up_cellline_D5[!is.na(entrez_up_cellline_D5)]
entrez_down_cellline_D5 <- entrez_down_cellline_D5[!is.na(entrez_down_cellline_D5)]
# GO Analysis - Cell Line
go_up_cellline_D5 <- enrichGO(gene = sig_genes_up_cellline_D5, OrgDb = org.Hs.eg.db,
keyType = "ENSEMBL", ont = "ALL",
pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_up_cellline_D5)
go_cellline_up_D5 <- barplot(go_up_cellline_D5, showCategory = 15) + labs(title = "GO Up: HCC4006 vs HCC827")
ggsave("GO_cellline_up_D5.pdf", go_cellline_up_D5, width = 7, height = 6)
ggsave("GO_cellline_up_D5.png", go_cellline_up_D5, width = 7, height = 6, dpi = 300)
go_down_cellline_D5 <- enrichGO(gene = sig_genes_down_cellline_D5, OrgDb = org.Hs.eg.db,
keyType = "ENSEMBL", ont = "ALL",
pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_down_cellline_D5)
go_cellline_down_D5 <- barplot(go_down_cellline_D5, showCategory = 15) + labs(title = "GO Down: HCC4006 vs HCC827")
ggsave("GO_cellline_down_D5.pdf", go_cellline_down_D5, width = 7, height = 6)
ggsave("GO_cellline_down_D5.png", go_cellline_down_D5, width = 7, height = 6, dpi = 300)
# KEGG Analysis - Cell Line
kegg_up_cellline_D5 <- enrichKEGG(gene = entrez_up_cellline_D5, organism = 'hsa',
pvalueCutoff = 0.05, pAdjustMethod = "BH")
as.data.frame(kegg_up_cellline_D5)
kegg_cellline_up_D5 <- barplot(kegg_up_cellline_D5, showCategory = 15) + labs(title = "KEGG Up: HCC4006 vs HCC827")
ggsave("KEGG_cellline_up_D5.pdf", kegg_cellline_up_D5, width = 7, height = 6)
ggsave("KEGG_cellline_up_D5.png", kegg_cellline_up_D5, width = 7, height = 6, dpi = 300)
kegg_down_cellline_D5 <- enrichKEGG(gene = entrez_down_cellline_D5, organism = 'hsa',
pvalueCutoff = 0.05, pAdjustMethod = "BH")
as.data.frame(kegg_down_cellline_D5)
kegg_cellline_down_D5 <- barplot(kegg_down_cellline_D5, showCategory = 15) + labs(title = "KEGG Down: HCC4006 vs HCC827")
ggsave("KEGG_cellline_down_D5.pdf", kegg_cellline_down_D5, width = 7, height = 6)
ggsave("KEGG_cellline_down_D5.png", kegg_cellline_down_D5, width = 7, height = 6, dpi = 300)
library(fgsea)
library(msigdbr)
# TREATMENT GSEA
# Prepare ranked gene list
gene_ranks_treatment_D5 <- res_treatment_D5$log2FoldChange
names(gene_ranks_treatment_D5) <- rownames(res_treatment_D5)
gene_ranks_treatment_D5 <- gene_ranks_treatment_D5[!is.na(gene_ranks_treatment_D5)]
gene_ranks_treatment_D5 <- sort(gene_ranks_treatment_D5, decreasing = TRUE)
# Get Hallmark pathways
hallmark_sets_D5 <- msigdbr(species = "Homo sapiens", category = "H")
hallmark_list_D5 <- split(hallmark_sets_D5$ensembl_gene, hallmark_sets_D5$gs_name)
BiocParallel::register(BiocParallel::SerialParam())
# Run GSEA
fgsea_treatment_D5 <- fgseaMultilevel(pathways = hallmark_list_D5, stats = gene_ranks_treatment_D5,
minSize = 15, maxSize = 500)
print("Top GSEA Results - Treatment (Gefitinib vs DMSO):")
print(head(fgsea_treatment_D5[order(pval), ], 10))
top_pathways_treatment_D5 <- head(fgsea_treatment_D5[order(pval), ], 3)
# Plot top 3 pathways
for(i in 1:3) {
p1_D5 <- top_pathways_treatment_D5$pathway[i]
pathways_treatment_D5 <- plotEnrichment(hallmark_list_D5[[p1_D5]], gene_ranks_treatment_D5) +
labs(title = paste("Pathway", i, ":", p1_D5))
print(pathways_treatment_D5)
ggsave("Top3_GSEAtreatment_D5.pdf", pathways_treatment_D5, width = 7, height = 6)
ggsave("Top3_GSEAtreatment_D5.png", pathways_treatment_D5, width = 7, height = 6, dpi = 300)
}
# CELL LINE GSEA
# Prepare ranked gene list
gene_ranks_cellline_D5 <- res_cellline_D5$log2FoldChange
names(gene_ranks_cellline_D5) <- rownames(res_cellline_D5)
gene_ranks_cellline_D5 <- gene_ranks_cellline_D5[!is.na(gene_ranks_cellline_D5)]
gene_ranks_cellline_D5 <- sort(gene_ranks_cellline_D5, decreasing = TRUE)
# Run GSEA
fgsea_cellline_D5 <- fgseaMultilevel(pathways = hallmark_list_D5, stats = gene_ranks_cellline_D5,
minSize = 15, maxSize = 500)
print("Top GSEA Results - Cell Line (HCC4006 vs HCC827):")
print(head(fgsea_cellline_D5[order(pval), ], 10))
top_pathways_cellline_D5 <- head(fgsea_cellline_D5[order(pval), ], 3)
# Plot top 3 pathways
for(i in 1:3) {
p2_D5 <- top_pathways_cellline_D5$pathway[i]
pathways_cellline_D5 <- plotEnrichment(hallmark_list_D5[[p2_D5]], gene_ranks_cellline_D5) +
labs(title = paste("Pathway", i, ":", p2_D5))
print(pathways_cellline_D5)
ggsave("Top3_GSEAcellline_D5.pdf", pathways_cellline_D5, width = 7, height = 6)
ggsave("Top3_GSEAcellline_D5.png", pathways_cellline_D5, width = 7, height = 6, dpi = 300)
}
# Summary statistics table for D5
summary_stats_D5 <- data.frame(
Metric = c("Total Samples", "Genes Analyzed - Tretament", "Gene Analyzed - Cell Line", "Significant DEGs - Treatment", "Significant DEGs - Cell Line","Upregulated genes - Treatment", "Downregulated genes - Treatment", "Upregulated genes - Cell Line", "Upregulated genes - Cell Line"),
Value = c(ncol(counts_D5),
nrow(res_treatment_annotated_D5),
nrow(res_cellline_annotated_D5),
nrow(sig_treatment_D5),
nrow(sig_cellline_D5),
length(sig_genes_up_treatment_D5),
length(sig_genes_down_treatment_D5),
length(sig_genes_up_cellline_D5),
length(sig_genes_down_cellline_D5))
)
summary_stats_D5
# Top genes table
top_genes_treatment_D5 <- data.frame(
Gene_Symbol = gene_symbols_D5[rownames(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20))],
ENSEMBL_ID = rownames(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)),
Log2FC = round(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)$log2FoldChange, 3),
Padj = format(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)$padj, scientific = TRUE, digits = 3),
Direction = ifelse(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)$log2FoldChange > 0, "Up", "Down")
)
print("Top 20 Significant Genes: Treatment")
print(top_genes_treatment_D5)
top_genes_cellline_D5 <- data.frame(
Gene_Symbol = gene_symbols_D5[rownames(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20))],
ENSEMBL_ID = rownames(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)),
Log2FC = round(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)$log2FoldChange, 3),
Padj = format(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)$padj, scientific = TRUE, digits = 3),
Direction = ifelse(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)$log2FoldChange > 0, "Up", "Down")
)
print("Top 20 Significant Genes: Cell Line")
print(top_genes_cellline_D5)
# Export main results
write.csv(as.data.frame(res_treatment_annotated_D5), "D5_treatment_results.csv", row.names = TRUE)
write.csv(sig_treatment_D5, "D5_sig_treatment_genes.csv", row.names = TRUE)
write.csv(summary_stats_D5, "D5_summary_statistics.csv", row.names = FALSE)
write.csv(top_genes_treatment_D5, "D5_top20_treatment_genes.csv", row.names = FALSE)
# Export normalized data
write.csv(counts(dds_D5, normalized=TRUE), "D5_normalized_counts.csv", row.names = TRUE)
write.csv(assay(vst_D5), "D5_vst_data.csv", row.names = TRUE)
# Export pathway results
write.csv(go_up_treatment_D5@result, "D5_GO_up.csv", row.names = FALSE)
write.csv(go_down_treatment_D5@result, "D5_GO_down.csv", row.names = FALSE)
write.csv(kegg_up_treatment_D5@result, "D5_KEGG_up.csv", row.names = FALSE)
write.csv(kegg_down_treatment_D5@result, "D5_KEGG_down.csv", row.names = FALSE)
# Remove the leadingEdge column (which contains lists)
fgsea_results_clean_D5 <- fgsea_treatment_D5 %>%
select(-leadingEdge)
write.csv(fgsea_results_clean_D5, "D5_GSEA_hallmarks.csv", row.names = FALSE)
---
title: "Dataset 5"
output: html_notebook
---

```{r}
#calling the libraries

library(DESeq2)
library(ggplot2)
```

```{r}
#calling in the count matrix
counts_D5 <- read.delim("counts_D5", header = TRUE, row.names = 1)
```

```{r}
#understanding the distribution of counts in the data set

total_counts_per_gene_D5 <- rowSums(counts_D5)

hist(total_counts_per_gene_D5, 
     breaks=100, 
     main="Distribution of Total Counts per Gene", 
     xlab="Total counts across all samples",
     ylab="Number of genes")
summary(total_counts_per_gene_D5)
```

```{r}
#ignoring low expressed gene to remove noise & get better results

counts_D5 <- counts_D5[which(rowSums(counts_D5) > 50), ]
```

```{r}
#calling in the metadata
metadata_D5 <- read.csv("metadata_D5.csv", header = TRUE)
```

```{r}
counts_D5 <- round(counts_D5)
dds_D5 <- DESeqDataSetFromMatrix(countData = counts_D5,
                                colData = metadata_D5,
                                design = ~ cell_line + treatment)
dds_D5
```

```{r}
# Set reference levels
dds_D5$treatment <- relevel(dds_D5$treatment, ref = "DMSO")
dds_D5$cell_line <- relevel(dds_D5$cell_line, ref = "HCC827")

# Run DESeq2 analysis
dds_D5 <- DESeq(dds_D5)

# Extract results
res_treatment_D5 <- results(dds_D5, contrast = c("treatment", "Gefitinib", "DMSO"))
res_cellline_D5 <- results(dds_D5, contrast = c("cell_line", "HCC4006", "HCC827"))

summary(res_treatment_D5)
summary(res_cellline_D5)

```

```{r}
# Data transformation for visualization
vst_D5 <- vst(dds_D5, blind = FALSE)

vst_matrix_D5 <- assay(vst_D5)

head(vst_matrix_D5[1:5, 1:3])

library(reshape2)

vst_df_D5 <- as.data.frame(vst_matrix_D5)
vst_df_D5$gene_id <- rownames(vst_df_D5)

vst_final_D5 <- melt(vst_df_D5, 
                 id.vars = "gene_id", 
                 variable.name = "sample_id", 
                 value.name = "expression_value")

head(vst_final_D5)

pca_plot_D5 <- plotPCA(vst_D5, intgroup = c("treatment", "cell_line"))

ggsave(("D5_PCA.pdf"), pca_plot_D5, width = 6, height = 5)
ggsave(("D5_PCA.png"), pca_plot_D5, width = 6, height = 5, dpi = 300)
```

```{r}
# Dispersion Plot
plotDispEsts(dds_D5, main = "Dispersion Estimates")

pdf("DispersionPlot_D5.pdf", width = 6, height = 5)
plotDispEsts(dds_D5)
dev.off()

png("DispersionPlot_D5.png", width = 2000, height = 1600, res = 300)
plotDispEsts(dds_D5)
dev.off()

resultsNames(dds_D5)
```

```{r}
#loading annotation packages for gene symbol conversion
library(AnnotationDbi)
library(org.Hs.eg.db)

# I'm converting ENSEMBL IDs to gene symbols
gene_symbols_D5 <- mapIds(org.Hs.eg.db,
                         keys = rownames(dds_D5),
                         column = "SYMBOL",
                         keytype = "ENSEMBL",
                         multiVals = "first")

res_annotated_D5 <- res_treatment_D5
res_annotated_D5$gene_symbol <- gene_symbols_D5
as.data.frame(res_annotated_D5)
```


```{r}
 #LFC Shrinkage
res_treatment_D5 <- lfcShrink(dds_D5, coef = "treatment_Gefitinib_vs_DMSO", type = "apeglm")
res_cellline_D5 <- lfcShrink(dds_D5, coef = "cell_line_HCC4006_vs_HCC827", type = "apeglm")

res_treatment_annotated_D5 <- res_treatment_D5
res_treatment_annotated_D5$gene_symbol <- gene_symbols_D5

res_cellline_annotated_D5 <- res_cellline_D5
res_cellline_annotated_D5$gene_symbol <- gene_symbols_D5

# Get significant genes
sig_treatment_D5 <- res_treatment_annotated_D5[which(res_treatment_annotated_D5$padj < 0.05 & 
                                                     abs(res_treatment_annotated_D5$log2FoldChange) > 1), ]
sig_cellline_D5 <- res_cellline_annotated_D5[which(res_cellline_annotated_D5$padj < 0.05 & 
                                                   abs(res_cellline_annotated_D5$log2FoldChange) > 1), ]

nrow(sig_treatment_D5)
nrow(sig_cellline_D5)

# Top genes
top_treatment_D5 <- head(sig_treatment_D5[order(sig_treatment_D5$padj), ], 20)
top_cellline_D5 <- head(sig_cellline_D5[order(sig_cellline_D5$padj), ], 20)

# Overlaps
overlap_D5 <- intersect(rownames(sig_treatment_D5), rownames(sig_cellline_D5))
length(overlap_D5)
```

Visualization Plots
```{r}
# MA Plot
plotMA(res_treatment_D5, main = "MA Plot - Gefitinib vs DMSO", ylim = c(-3, 3))

pdf(("MA_treatment_D5.pdf"), width = 6, height = 5)
plotMA(res_treatment_D5, ylim=c(-5,5), main="MA Plot - Gefitinib vs DMSO")
dev.off()

png(("MA_treatment_D5.png"), width = 2000, height = 1600, res = 300)
plotMA(res_treatment_D5, ylim=c(-5,5), main="MA Plot - Gefitinib vs DMSO")
dev.off()

plotMA(res_cellline_D5, main = "MA Plot - HCC4006 vs HCC827", ylim = c(-3, 3))

pdf(("MA_cellline_D5.pdf"), width = 6, height = 5)
plotMA(res_cellline_D5, ylim=c(-5,5), main="MA Plot - HCC4006 vs HCC827")
dev.off()

png(("MA_cellline_D5.png"), width = 2000, height = 1600, res = 300)
plotMA(res_cellline_D5, ylim=c(-5,5), main="MA Plot - HCC4006 vs HCC827")
dev.off()

# Volcano plots
library(EnhancedVolcano)

volcano_treatment_D5 <- EnhancedVolcano(res_treatment_annotated_D5, lab = res_treatment_annotated_D5$gene_symbol,
                x = 'log2FoldChange', y = 'padj',
                title = 'Gefitinib vs DMSO')

ggsave(("volcano_treatment_D5.pdf"), volcano_treatment_D5, width = 6, height = 5)
ggsave(("volcano_treatment_D5.png"), volcano_treatment_D5, width = 6, height = 5, dpi = 300)


volcano_cellline_D5 <- EnhancedVolcano(res_cellline_annotated_D5, lab = res_cellline_annotated_D5$gene_symbol,
                x = 'log2FoldChange', y = 'padj',
                title = 'HCC4006 vs HCC827')

ggsave(("volcano_cellline_D5.pdf"), volcano_cellline_D5, width = 6, height = 5)
ggsave(("volcano_cellline_D5.png"), volcano_cellline_D5, width = 6, height = 5, dpi = 300)

# Heatmap of top DE genes
library(pheatmap)

if(length(overlap_D5) > 0) {
  top_overlap_D5 <- head(overlap_D5[order(sig_treatment_D5[overlap_D5,]$padj)], 50)
  pheatmap::pheatmap(assay(vst_D5)[top_overlap_D5,], 
           annotation_col = as.data.frame(colData(dds_D5)[,c("cell_line", "treatment")]),
           scale = "row", show_rownames = FALSE,
           main = "Top 50 Overlapping DE Genes",
           filename = "DEGs_D5.png")
}
```
```{r}
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(RColorBrewer)

# Prepare top genes for treatment effect
top_treatment_heatmap_D5 <- res_treatment_annotated_D5[!is.na(res_treatment_annotated_D5$padj) & 
                                                       res_treatment_annotated_D5$padj < 0.05 & 
                                                       res_treatment_annotated_D5$baseMean > 50 & 
                                                       abs(res_treatment_annotated_D5$log2FoldChange) > 2,]
top_treatment_heatmap_D5 <- head(as.data.frame(top_treatment_heatmap_D5), 50)
top_treatment_heatmap_D5 <- top_treatment_heatmap_D5[order(top_treatment_heatmap_D5$log2FoldChange, decreasing = TRUE),]

# Create matrices
rlog_D5 <- rlog(dds_D5, blind = FALSE)
mat_D5 <- assay(rlog_D5)[rownames(top_treatment_heatmap_D5), ]
mat_scaled_D5 <- t(apply(mat_D5, 1, scale))
colnames(mat_scaled_D5) <- colnames(mat_D5)

# Select top and bottom genes
num_keep <- min(25, floor(nrow(mat_scaled_D5)/2))
rows_keep_D5 <- c(seq(1:num_keep), seq((nrow(mat_scaled_D5) - num_keep + 1), nrow(mat_scaled_D5)))

# Prepare annotations
l2_val_D5 <- as.matrix(top_treatment_heatmap_D5[rows_keep_D5,]$log2FoldChange)
colnames(l2_val_D5) <- "logFC"
mean_val_D5 <- as.matrix(top_treatment_heatmap_D5[rows_keep_D5,]$baseMean)
colnames(mean_val_D5) <- "AveExpr"

# Colors
col_logFC_D5 <- colorRamp2(c(min(l2_val_D5), 0, max(l2_val_D5)), c("blue", "white", "red"))
col_AveExpr_D5 <- colorRamp2(c(quantile(mean_val_D5)[1], quantile(mean_val_D5)[4]), c("white", "red"))
col_zscore_D5 <- colorRamp2(c(-2, 0, 2), c("#4575b4", "white", "#d73027"))

# Create heatmaps
h1_D5 <- Heatmap(mat_scaled_D5[rows_keep_D5,], cluster_rows = TRUE,
                 name = "Z-score", col = col_zscore_D5,
                 show_row_names = FALSE)

h2_D5 <- Heatmap(l2_val_D5, row_labels = top_treatment_heatmap_D5$gene_symbol[rows_keep_D5],
                 cluster_rows = FALSE, name = "logFC", col = col_logFC_D5,
                 width = unit(15, "mm"),
                 row_names_gp = gpar(fontsize = 6),  # Decreased from default (usually 10-12)
                 cell_fun = function(j, i, x, y, w, h, col) {
                   grid.text(round(l2_val_D5[i, j], 1), x, y, gp = gpar(fontsize = 7))
                 })

h3_D5 <- Heatmap(mean_val_D5, row_labels = top_treatment_heatmap_D5$gene_symbol[rows_keep_D5],
                 cluster_rows = FALSE, name = "AveExpr", col = col_AveExpr_D5,
                 width = unit(15, "mm"),
                 row_names_gp = gpar(fontsize = 6),  # Decreased from default
                 cell_fun = function(j, i, x, y, w, h, col) {
                   grid.text(round(mean_val_D5[i, j], 0), x, y, gp = gpar(fontsize = 7))
                 })

h_D5 <- h1_D5 + h2_D5 + h3_D5
print(h_D5)

pdf("AnnotatedHeatmap_D5.pdf", width = 10, height = 6)
draw(h_D5)
dev.off()

png("AnnotatedHeatmap_D5.png", width = 3000, height = 2000, res = 300)
draw(h_D5)
dev.off()

```


```{r}
library(clusterProfiler)
library(DOSE)
library(enrichplot)

# TREATMENT EFFECT PATHWAYS
# Separate up/down genes
sig_genes_up_treatment_D5 <- rownames(sig_treatment_D5)[sig_treatment_D5$log2FoldChange > 1]
sig_genes_down_treatment_D5 <- rownames(sig_treatment_D5)[sig_treatment_D5$log2FoldChange < -1]

print(paste("Upregulated:", length(sig_genes_up_treatment_D5)))
print(paste("Downregulated:", length(sig_genes_down_treatment_D5)))

# Convert to ENTREZ for KEGG
entrez_up_treatment_D5 <- mapIds(org.Hs.eg.db, sig_genes_up_treatment_D5, "ENTREZID", "ENSEMBL")
entrez_down_treatment_D5 <- mapIds(org.Hs.eg.db, sig_genes_down_treatment_D5, "ENTREZID", "ENSEMBL")
entrez_up_treatment_D5 <- entrez_up_treatment_D5[!is.na(entrez_up_treatment_D5)]
entrez_down_treatment_D5 <- entrez_down_treatment_D5[!is.na(entrez_down_treatment_D5)]

# GO Analysis - Treatment
go_up_treatment_D5 <- enrichGO(gene = sig_genes_up_treatment_D5, OrgDb = org.Hs.eg.db,
                                keyType = "ENSEMBL", ont = "ALL", 
                                pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_up_treatment_D5)

go_treatment_up_D5 <- barplot(go_up_treatment_D5, showCategory = 15) + labs(title = "GO Up: Gefitinib vs DMSO")

ggsave("GO_treatment_up_D5.pdf", go_treatment_up_D5, width = 7, height = 6)
ggsave("GO_treatment_up_D5.png", go_treatment_up_D5, width = 7, height = 6, dpi = 300)

go_down_treatment_D5 <- enrichGO(gene = sig_genes_down_treatment_D5, OrgDb = org.Hs.eg.db,
                                  keyType = "ENSEMBL", ont = "ALL",
                                  pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_down_treatment_D5)
top_go_down_treatment_D5 <- go_down_treatment_D5@result[1:10, ]
as.data.frame(top_go_down_treatment_D5)

go_treatment_down_D5 <- barplot(go_down_treatment_D5, showCategory = 15) + labs(title = "GO Down: Gefitinib vs DMSO")

ggsave("GO_treatment_down_D5.pdf", go_treatment_down_D5, width = 7, height = 6)
ggsave("GO_treatment_down_D5.png", go_treatment_down_D5, width = 7, height = 6, dpi = 300)


# KEGG Analysis - Treatment
kegg_up_treatment_D5 <- enrichKEGG(gene = entrez_up_treatment_D5, organism = 'hsa',
                                    pvalueCutoff = 0.1, pAdjustMethod = "BH")
as.data.frame(kegg_up_treatment_D5)
kegg_treatment_up_D5 <- barplot(kegg_up_treatment_D5, showCategory = 15) + labs(title = "KEGG Up: Gefitinib vs DMSO")

ggsave("KEGG_treatment_up_D5.pdf", kegg_treatment_up_D5, width = 7, height = 6)
ggsave("KEGG_treatment_up_D5.png", kegg_treatment_up_D5, width = 7, height = 6, dpi = 300)

kegg_down_treatment_D5 <- enrichKEGG(gene = entrez_down_treatment_D5, organism = 'hsa',
                                      pvalueCutoff = 0.1, pAdjustMethod = "BH")
as.data.frame(kegg_down_treatment_D5)
kegg_treatment_down_D5 <- barplot(kegg_down_treatment_D5, showCategory = 15) + labs(title = "KEGG Down: Gefitinib vs DMSO")

ggsave("KEGG_treatment_down_D5.pdf", kegg_treatment_down_D5, width = 7, height = 6)
ggsave("KEGG_treatment_down_D5.png", kegg_treatment_down_D5, width = 7, height = 6, dpi = 300)

# CELL LINE EFFECT PATHWAYS
# Separate up/down genes
sig_genes_up_cellline_D5 <- rownames(sig_cellline_D5)[sig_cellline_D5$log2FoldChange > 1]
sig_genes_down_cellline_D5 <- rownames(sig_cellline_D5)[sig_cellline_D5$log2FoldChange < -1]

# Convert to ENTREZ
entrez_up_cellline_D5 <- mapIds(org.Hs.eg.db, sig_genes_up_cellline_D5, "ENTREZID", "ENSEMBL")
entrez_down_cellline_D5 <- mapIds(org.Hs.eg.db, sig_genes_down_cellline_D5, "ENTREZID", "ENSEMBL")
entrez_up_cellline_D5 <- entrez_up_cellline_D5[!is.na(entrez_up_cellline_D5)]
entrez_down_cellline_D5 <- entrez_down_cellline_D5[!is.na(entrez_down_cellline_D5)]

# GO Analysis - Cell Line
go_up_cellline_D5 <- enrichGO(gene = sig_genes_up_cellline_D5, OrgDb = org.Hs.eg.db,
                               keyType = "ENSEMBL", ont = "ALL",
                               pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_up_cellline_D5)
go_cellline_up_D5 <- barplot(go_up_cellline_D5, showCategory = 15) + labs(title = "GO Up: HCC4006 vs HCC827")

ggsave("GO_cellline_up_D5.pdf", go_cellline_up_D5, width = 7, height = 6)
ggsave("GO_cellline_up_D5.png", go_cellline_up_D5, width = 7, height = 6, dpi = 300)

go_down_cellline_D5 <- enrichGO(gene = sig_genes_down_cellline_D5, OrgDb = org.Hs.eg.db,
                                 keyType = "ENSEMBL", ont = "ALL",
                                 pAdjustMethod = "BH", pvalueCutoff = 0.05)
as.data.frame(go_down_cellline_D5)
go_cellline_down_D5 <- barplot(go_down_cellline_D5, showCategory = 15) + labs(title = "GO Down: HCC4006 vs HCC827")

ggsave("GO_cellline_down_D5.pdf", go_cellline_down_D5, width = 7, height = 6)
ggsave("GO_cellline_down_D5.png", go_cellline_down_D5, width = 7, height = 6, dpi = 300)

# KEGG Analysis - Cell Line
kegg_up_cellline_D5 <- enrichKEGG(gene = entrez_up_cellline_D5, organism = 'hsa',
                                   pvalueCutoff = 0.05, pAdjustMethod = "BH")
as.data.frame(kegg_up_cellline_D5)
kegg_cellline_up_D5 <- barplot(kegg_up_cellline_D5, showCategory = 15) + labs(title = "KEGG Up: HCC4006 vs HCC827")

ggsave("KEGG_cellline_up_D5.pdf", kegg_cellline_up_D5, width = 7, height = 6)
ggsave("KEGG_cellline_up_D5.png", kegg_cellline_up_D5, width = 7, height = 6, dpi = 300)

kegg_down_cellline_D5 <- enrichKEGG(gene = entrez_down_cellline_D5, organism = 'hsa',
                                     pvalueCutoff = 0.05, pAdjustMethod = "BH")
as.data.frame(kegg_down_cellline_D5)
kegg_cellline_down_D5 <- barplot(kegg_down_cellline_D5, showCategory = 15) + labs(title = "KEGG Down: HCC4006 vs HCC827")

ggsave("KEGG_cellline_down_D5.pdf", kegg_cellline_down_D5, width = 7, height = 6)
ggsave("KEGG_cellline_down_D5.png", kegg_cellline_down_D5, width = 7, height = 6, dpi = 300)

```


```{r}
library(fgsea)
library(msigdbr)

# TREATMENT GSEA
# Prepare ranked gene list
gene_ranks_treatment_D5 <- res_treatment_D5$log2FoldChange
names(gene_ranks_treatment_D5) <- rownames(res_treatment_D5)
gene_ranks_treatment_D5 <- gene_ranks_treatment_D5[!is.na(gene_ranks_treatment_D5)]
gene_ranks_treatment_D5 <- sort(gene_ranks_treatment_D5, decreasing = TRUE)

# Get Hallmark pathways
hallmark_sets_D5 <- msigdbr(species = "Homo sapiens", category = "H")
hallmark_list_D5 <- split(hallmark_sets_D5$ensembl_gene, hallmark_sets_D5$gs_name)

BiocParallel::register(BiocParallel::SerialParam())

# Run GSEA
fgsea_treatment_D5 <- fgseaMultilevel(pathways = hallmark_list_D5, stats = gene_ranks_treatment_D5,
                            minSize = 15, maxSize = 500)

print("Top GSEA Results - Treatment (Gefitinib vs DMSO):")
print(head(fgsea_treatment_D5[order(pval), ], 10))
top_pathways_treatment_D5 <- head(fgsea_treatment_D5[order(pval), ], 3)

# Plot top 3 pathways
for(i in 1:3) {
  p1_D5 <- top_pathways_treatment_D5$pathway[i]
  pathways_treatment_D5 <- plotEnrichment(hallmark_list_D5[[p1_D5]], gene_ranks_treatment_D5) + 
    labs(title = paste("Pathway", i, ":", p1_D5))
  print(pathways_treatment_D5)
  ggsave("Top3_GSEAtreatment_D5.pdf", pathways_treatment_D5, width = 7, height = 6)
  ggsave("Top3_GSEAtreatment_D5.png", pathways_treatment_D5, width = 7, height = 6, dpi = 300)
}
# CELL LINE GSEA
# Prepare ranked gene list
gene_ranks_cellline_D5 <- res_cellline_D5$log2FoldChange
names(gene_ranks_cellline_D5) <- rownames(res_cellline_D5)
gene_ranks_cellline_D5 <- gene_ranks_cellline_D5[!is.na(gene_ranks_cellline_D5)]
gene_ranks_cellline_D5 <- sort(gene_ranks_cellline_D5, decreasing = TRUE)

# Run GSEA
fgsea_cellline_D5 <- fgseaMultilevel(pathways = hallmark_list_D5, stats = gene_ranks_cellline_D5,
                           minSize = 15, maxSize = 500)

print("Top GSEA Results - Cell Line (HCC4006 vs HCC827):")
print(head(fgsea_cellline_D5[order(pval), ], 10))
top_pathways_cellline_D5 <- head(fgsea_cellline_D5[order(pval), ], 3)

# Plot top 3 pathways
for(i in 1:3) {
  p2_D5 <- top_pathways_cellline_D5$pathway[i]
  pathways_cellline_D5 <- plotEnrichment(hallmark_list_D5[[p2_D5]], gene_ranks_cellline_D5) + 
    labs(title = paste("Pathway", i, ":", p2_D5))
  print(pathways_cellline_D5)
  ggsave("Top3_GSEAcellline_D5.pdf", pathways_cellline_D5, width = 7, height = 6)
  ggsave("Top3_GSEAcellline_D5.png", pathways_cellline_D5, width = 7, height = 6, dpi = 300)
}
```


```{r}
# Summary statistics table for D5
summary_stats_D5 <- data.frame(
  Metric = c("Total Samples", "Genes Analyzed - Tretament", "Gene Analyzed - Cell Line", "Significant DEGs - Treatment", "Significant DEGs - Cell Line","Upregulated genes - Treatment", "Downregulated genes - Treatment", "Upregulated genes - Cell Line", "Upregulated genes - Cell Line"),
  Value = c(ncol(counts_D5), 
            nrow(res_treatment_annotated_D5),
            nrow(res_cellline_annotated_D5),
            nrow(sig_treatment_D5),
            nrow(sig_cellline_D5),
            length(sig_genes_up_treatment_D5),
            length(sig_genes_down_treatment_D5),
            length(sig_genes_up_cellline_D5),
            length(sig_genes_down_cellline_D5))
)

summary_stats_D5

# Top genes table
top_genes_treatment_D5 <- data.frame(
  Gene_Symbol = gene_symbols_D5[rownames(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20))],
  ENSEMBL_ID = rownames(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)),
  Log2FC = round(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)$log2FoldChange, 3),
  Padj = format(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)$padj, scientific = TRUE, digits = 3),
  Direction = ifelse(head(sig_treatment_D5[order(sig_treatment_D5$padj),], 20)$log2FoldChange > 0, "Up", "Down")
)

print("Top 20 Significant Genes: Treatment")
print(top_genes_treatment_D5)

top_genes_cellline_D5 <- data.frame(
  Gene_Symbol = gene_symbols_D5[rownames(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20))],
  ENSEMBL_ID = rownames(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)),
  Log2FC = round(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)$log2FoldChange, 3),
  Padj = format(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)$padj, scientific = TRUE, digits = 3),
  Direction = ifelse(head(sig_cellline_D5[order(sig_cellline_D5$padj),], 20)$log2FoldChange > 0, "Up", "Down")
)

print("Top 20 Significant Genes: Cell Line")
print(top_genes_cellline_D5)

```

```{r}
# Export main results
write.csv(as.data.frame(res_treatment_annotated_D5), "D5_treatment_results.csv", row.names = TRUE)
write.csv(sig_treatment_D5, "D5_sig_treatment_genes.csv", row.names = TRUE)
write.csv(summary_stats_D5, "D5_summary_statistics.csv", row.names = FALSE)
write.csv(top_genes_treatment_D5, "D5_top20_treatment_genes.csv", row.names = FALSE)

# Export normalized data
write.csv(res_annotated_D5, "D5_results.csv")
write.csv(counts(dds_D5, normalized=TRUE), "D5_normalized_counts.csv", row.names = TRUE)
write.csv(vst_final_D5, "D5_vst_data.csv", row.names = TRUE)

# Export pathway results
  write.csv(go_up_treatment_D5@result, "D5_GO_up.csv", row.names = FALSE)

  write.csv(go_down_treatment_D5@result, "D5_GO_down.csv", row.names = FALSE)


  write.csv(kegg_up_treatment_D5@result, "D5_KEGG_up.csv", row.names = FALSE)
  
  write.csv(kegg_down_treatment_D5@result, "D5_KEGG_down.csv", row.names = FALSE)

# Remove the leadingEdge column (which contains lists)
fgsea_results_clean_D5 <- fgsea_treatment_D5 %>%
  select(-leadingEdge)

write.csv(fgsea_results_clean_D5, "D5_GSEA_hallmarks.csv", row.names = FALSE)

```

