library(DESeq2)
library(ggplot2)
#loading my dataset
counts_D3 <- read.delim("counts_D3", header = TRUE, sep = "\t", row.names = 1)
#calling in the metadata
metadata_D3 <- read.csv("metadata_D3.csv", header = TRUE)

#checking the distribution of counts in my dataset
total_counts_per_gene_D3 <- rowSums(counts_D3)
hist(total_counts_per_gene_D3, 
     breaks=100, 
     main="Distribution of Total Counts per Gene", 
     xlab="Total counts across all samples",
     ylab="Number of genes")
summary(total_counts_per_gene_D3)
#ignoring low expressed gene to remove noise & get better results
counts_D3 <- counts_D3[which(rowSums(counts_D3) > 50), ]
#round counts for DESeq2 compatibility
counts_D3 <- round(counts_D3)
#loading DESeq2 for differential expression analysis
library(DESeq2)

#creating DESeq2 object with additive design
dds_D3 <- DESeqDataSetFromMatrix(countData = counts_D3,
                             colData = metadata_D3,        
                             design = ~ CellLine + Treatment)
dds_D3

#running the main DESeq2 analysis
dds_D3 <- DESeq(dds_D3)
dds_D3

res_D3 <- results(dds_D3, contrast = c("Treatment", "Apatinib", "Control"))
summary(res_D3)
plotDispEsts(dds_D3)

pdf("DispersionPlot_D3.pdf", width = 6, height = 5)
plotDispEsts(dds_D3)
dev.off()

png("DispersionPlot_D3.png", width = 2000, height = 1600, res = 300)
plotDispEsts(dds_D3)
dev.off()
#creating variance stabilized data for visualization
vsdata_D3 <- vst(dds_D3, blind = TRUE)

vst_matrix_D3 <- assay(vsdata_D3)

head(vst_matrix_D3[1:5, 1:3])

library(reshape2)

vst_df_D3 <- as.data.frame(vst_matrix_D3)
vst_df_D3$gene_id <- rownames(vst_df_D3)

vst_final_D3 <- melt(vst_df_D3, 
                 id.vars = "gene_id", 
                 variable.name = "sample_id", 
                 value.name = "expression_value")

# Check the result - should have exactly 3 columns
head(vst_final_D3)

#loading ggplot2 for enhanced plotting
library(ggplot2)
pca_plot_D3 <- plotPCA(vsdata_D3, intgroup = c("CellLine", "Treatment"))

ggsave(("D3_PCA.pdf"), pca_plot_D3, width = 6, height = 5)
ggsave(("D3_PCA.png"), pca_plot_D3, width = 6, height = 5, dpi = 300)

Strong Baseline Resistance:

The 90% variance dominated by cell line differences suggests PC9GR cells have undergone major transcriptional reprogramming. This indicates constitutive resistance mechanisms - the resistant cells are fundamentally different

Modest Treatment Response:

Only 6% variance from treatment suggests apatinib causes relatively subtle transcriptional changes compared to the baseline resistance.Both cell lines respond to treatment, but the response is small compared to their inherent differences.

resultsNames(dds_D3)
#clean LFC shrinkage
res_treatment_D3 <- lfcShrink(dds_D3, 
                                 coef = "Treatment_Control_vs_Apatinib", 
                                 type = "apeglm")

res_cellline_D3 <- lfcShrink(dds_D3, 
                                coef = "CellLine_PC9GR_vs_PC9", 
                                type = "apeglm")

summary(res_treatment_D3, alpha = 0.05)
summary(res_cellline_D3, alpha = 0.05)
library(AnnotationDbi)
library(org.Hs.eg.db)

# I'm converting ENSEMBL IDs to gene symbols
gene_symbols_D3 <- mapIds(org.Hs.eg.db,
                         keys = rownames(dds_D3),
                         column = "SYMBOL",
                         keytype = "ENSEMBL",
                         multiVals = "first")
res_annotated_D3 <- res_D3
res_annotated_D3$gene_symbol <- gene_symbols_D3
as.data.frame(res_annotated_D3)

#adding annotations to both main results
res_cellline_annotated_D3 <- res_cellline_D3
res_cellline_annotated_D3$gene_symbol <- gene_symbols_D3

res_treatment_annotated_D3 <- res_treatment_D3
res_treatment_annotated_D3$gene_symbol <- gene_symbols_D3
sig_treatment_D3 <- res_treatment_annotated_D3[which(res_treatment_annotated_D3$padj < 0.05 & abs(res_treatment_annotated_D3$log2FoldChange) > 1), ]
sig_cellline_D3 <- res_cellline_annotated_D3[which(res_cellline_annotated_D3$padj < 0.05 & abs(res_cellline_annotated_D3$log2FoldChange) > 1), ]

nrow(sig_treatment_D3)
[1] 489
nrow(sig_cellline_D3)
[1] 4818
# Get the top genes from each comparison
top20_treatment_D3 <- head(sig_treatment_D3[order(sig_treatment_D3$padj), ], 20)
top20_cellline_D3 <- head(sig_cellline_D3[order(sig_cellline_D3$padj), ], 20)

# Find overlapping genes
overlap_D3 <- intersect(rownames(sig_treatment_D3), rownames(sig_cellline_D3))
length(overlap_D3)
[1] 276

Visualization Plots

#MA Plots
plotMA(res_treatment_D3, ylim=c(-5,5), main="Treatment: Control vs Apatinib")
plotMA(res_cellline_D3, ylim=c(-5,5), main="Cell Line: PC9 vs PC9-GR")

pdf(("MA_res_treatment_D3.pdf"), width = 6, height = 5)
plotMA(res_treatment_D3, ylim=c(-5,5), main="Treatment: Control vs Apatinib")
dev.off()

png(("MA_res_treatment_D3.png"), width = 2000, height = 1600, res = 300)
plotMA(res_treatment_D3, ylim=c(-5,5), main="Treatment: Control vs Apatinib")
dev.off()

pdf(("MA_res_cellline_D3.pdf"), width = 6, height = 5)
plotMA(res_cellline_D3, ylim=c(-5,5), main="Cell Line: PC9 vs PC9-GR")
dev.off()

png(("MA_res_cellline_D3.png"), width = 2000, height = 1600, res = 300)
plotMA(res_cellline_D3, ylim=c(-5,5), main="Cell Line: PC9 vs PC9-GR")
dev.off()

# Volcano plots
library(EnhancedVolcano)
volcano_cellline_D3 <- EnhancedVolcano(res_cellline_annotated_D3, lab = res_cellline_annotated_D3$gene_symbol, 
                x = 'log2FoldChange', y = 'padj',
                title = 'PC9 cells vs PC9-GR cells')

ggsave(("D3_CelllineVolcano.pdf"), volcano_cellline_D3, width = 6, height = 5)
ggsave(("D3_CelllineVolcano.png"), volcano_cellline_D3, width = 6, height = 5, dpi = 300)

volcano_treatment_D3 <- EnhancedVolcano(res_treatment_annotated_D3, lab = res_treatment_annotated_D3$gene_symbol, 
                x = 'log2FoldChange', y = 'padj',
                title = 'Control vs Apatinib')

ggsave(("D3_TreatmentVolcano.pdf"), volcano_treatment_D3, width = 6, height = 5)
ggsave(("D3_TreatmentVolcano.png"), volcano_treatment_D3, width = 6, height = 5, dpi = 300)

# Heatmap of top differentially expressed genes
library(pheatmap)
top_overlap_D3 <- head(order(sig_treatment_D3[overlap_D3,]$padj), 50)
pheatmap::pheatmap(assay(vsdata_D3)[top_overlap_D3,], 
         annotation_col = as.data.frame(colData(dds_D3)[,c("CellLine", "Treatment")]), 
         scale = "row", 
         show_rownames = FALSE,
         main = "Top 50 Differentially Expressed Genes which overlapped",
         filename = "DEGs_D3.png")

In Volcano plots: The resistance genes (red dots in plot 1) are potential targets to overcome resistance The treatment genes (red dots in plot 2) reveal apatinib’s mechanism of action.

#creating enhanced heatmap with gene symbols and values
library(dplyr)
library(ComplexHeatmap)
library(RColorBrewer)
library(circlize)

#preparing top genes for enhanced heatmap 
top_cellline_heatmap_D3 <- res_cellline_annotated_D3[!is.na(res_cellline_annotated_D3$padj) & 
                                                    !is.na(res_cellline_annotated_D3$baseMean) & 
                                                    !is.na(res_cellline_annotated_D3$log2FoldChange) &
                                                    (res_cellline_annotated_D3$padj < 0.05) & 
                                                    (res_cellline_annotated_D3$baseMean > 50) & 
                                                    (abs(res_cellline_annotated_D3$log2FoldChange) > 2),]

top_cellline_heatmap_D3 <- top_cellline_heatmap_D3 %>%
  as.data.frame() %>%
  head(50)  

top_cellline_heatmap_D3 <- top_cellline_heatmap_D3[order(top_cellline_heatmap_D3$log2FoldChange, decreasing = TRUE),]

#getting normalized count data 
mat_D3 <- assay(vsdata_D3)[rownames(top_cellline_heatmap_D3), metadata_D3$Run]
colnames(mat_D3) <- metadata_D3$Run
base_mean_D3 <- rowMeans(mat_D3)
mat_scaled_D3 <- t(apply(mat_D3, 1, scale)) # Z-score normalization
colnames(mat_scaled_D3) <- colnames(mat_D3)

#selecting top and bottom genes
num_keep <- 25
rows_keep_D3 <- c(seq(1:num_keep), seq((nrow(mat_scaled_D3)-num_keep), nrow(mat_scaled_D3)))

# I'm preparing annotation matrices
l2_val_D3 <- as.matrix(top_cellline_heatmap_D3[rows_keep_D3,]$log2FoldChange)
colnames(l2_val_D3) <- "logFC"

mean_D3 <- as.matrix(top_cellline_heatmap_D3[rows_keep_D3,]$baseMean)
colnames(mean_D3) <- "AveExpr"

# I'm setting up color schemes (same as Dataset 2)
col_logFC_D3 <- colorRamp2(c(min(l2_val_D3), 0, max(l2_val_D3)), c("blue", "white", "red")) 
col_AveExpr_D3 <- colorRamp2(c(quantile(mean_D3)[1], quantile(mean_D3)[4]), c("white", "red"))
col_zscore_D3 <- colorRamp2(c(-2, 0, 2), c("#4575b4", "white", "#d73027"))

# I'm creating the main expression heatmap
h1_D3 <- Heatmap(mat_scaled_D3[rows_keep_D3,], 
              cluster_rows = TRUE,
              column_labels = colnames(mat_scaled_D3), 
              name="Z-score",
              col = col_zscore_D3,
              cluster_columns = TRUE,
              show_row_names = FALSE,
              column_names_gp = gpar(fontsize = 9),
              heatmap_legend_param = list(title_gp = gpar(fontsize = 10)))

# I'm creating the log2FC annotation heatmap
h2_D3 <- Heatmap(l2_val_D3, 
              row_labels = top_cellline_heatmap_D3$gene_symbol[rows_keep_D3],
              cluster_rows = FALSE, 
              name="logFC", 
              col = col_logFC_D3,
              width = unit(15, "mm"),
              show_column_names = TRUE,
              row_names_gp = gpar(fontsize = 8),
              column_names_gp = gpar(fontsize = 9),
              cell_fun = function(j, i, x, y, w, h, col) { 
                grid.text(round(l2_val_D3[i, j], 1), x, y, gp = gpar(fontsize = 7))
              })

# I'm creating the average expression annotation heatmap
h3_D3 <- Heatmap(mean_D3, 
              row_labels = top_cellline_heatmap_D3$gene_symbol[rows_keep_D3],
              cluster_rows = FALSE, 
              name = "AveExpr", 
              col = col_AveExpr_D3,
              width = unit(15, "mm"),
              show_row_names = TRUE,
              show_column_names = TRUE,
              column_names_gp = gpar(fontsize = 9),
              row_names_gp = gpar(fontsize = 7),
              cell_fun = function(j, i, x, y, w, h, col) { 
                grid.text(round(mean_D3[i, j], 0), x, y, gp = gpar(fontsize = 7))
              })

# I'm combining all heatmaps (same as Dataset 2)
h_D3 <- h1_D3 + h2_D3 + h3_D3
h_D3

pdf("AnnotatedHeatmap_D3.pdf", width = 10, height = 6)
draw(h_D3)
dev.off()

png("AnnotatedHeatmap_D3.png", width = 3000, height = 2000, res = 300)
draw(h_D3)
dev.off()
# Loading pathway analysis packages
  library(DOSE)
  library(enrichplot)
  library(clusterProfiler)
  
  # separating upregulated and downregulated genes
  sig_genes_up_cellline_D3 <- rownames(sig_cellline_D3)[sig_cellline_D3$log2FoldChange > 1]
  sig_genes_down_cellline_D3 <- rownames(sig_cellline_D3)[sig_cellline_D3$log2FoldChange < -1]
  
  # converting ENSEMBL to ENTREZ for KEGG analysis
  entrez_genes_up_cellline_D3 <- mapIds(org.Hs.eg.db, sig_genes_up_cellline_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_down_cellline_D3 <- mapIds(org.Hs.eg.db, sig_genes_down_cellline_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_up_cellline_D3 <- entrez_genes_up_cellline_D3[!is.na(entrez_genes_up_cellline_D3)]
  entrez_genes_down_cellline_D3 <- entrez_genes_down_cellline_D3[!is.na(entrez_genes_down_cellline_D3)]
  
  # running GO Analysis
  go_up_cellline_D3 <- enrichGO(gene = sig_genes_up_cellline_D3,
                                OrgDb = org.Hs.eg.db,
                                keyType = "ENSEMBL",
                                ont = "ALL",  # BP, CC, MF
                                pAdjustMethod = "BH",
                                pvalueCutoff = 0.05)
  as.data.frame(go_up_cellline_D3)

  go_up_D3 <-plot(barplot(go_up_cellline_D3, showCategory = 15)+labs(title = "Upregulated GO Terms (PC9GR vs PC9)"))
  ggsave("GO_cellline_Upreg_D3.pdf", go_up_D3, width = 7, height = 6)
  ggsave("GO_cellline_Upreg_D3.png", go_up_D3, width = 7, height = 6, dpi = 300)
  
  go_down_cellline_D3 <- enrichGO(gene = sig_genes_down_cellline_D3,
                                  OrgDb = org.Hs.eg.db,
                                  keyType = "ENSEMBL",
                                  ont = "ALL",
                                  pAdjustMethod = "BH",
                                  pvalueCutoff = 0.05)
  as.data.frame(go_down_cellline_D3)
  go_down_D3 <- plot(barplot(go_down_cellline_D3, showCategory = 15)+labs(title = "Downregulated GO Terms (PC9GR vs PC9)"))
  ggsave("GO_cellline_down_D3.pdf", go_down_D3, width = 7, height = 6)
  ggsave("GO_cellline_down_D3.png", go_down_D3, width = 7, height = 6, dpi = 300)
  
  # Running KEGG Pathway Analysis
  kegg_up_cellline_D3 <- enrichKEGG(gene = entrez_genes_up_cellline_D3,
                                    organism = 'hsa',
                                    pvalueCutoff = 0.05,
                                    pAdjustMethod = "BH")
  as.data.frame(kegg_up_cellline_D3)
  kegg_up_D3 <- plot(barplot(kegg_up_cellline_D3, showCategory = 15)+labs(title = "Upregulated KEGG Pathways (PC9GR vs PC9)"))
  ggsave("KEGG_cellline_up_D3.pdf", kegg_up_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_up_D3.png", kegg_up_D3, width = 7, height = 6, dpi = 300)
  
  kegg_down_cellline_D3 <- enrichKEGG(gene = entrez_genes_down_cellline_D3,
                                      organism = 'hsa',
                                      pvalueCutoff = 0.1,
                                      pAdjustMethod = "BH")
  as.data.frame(kegg_down_cellline_D3)
  kegg_down_D3 <- plot(barplot(kegg_down_cellline_D3, showCategory = 15)+labs(title = "Downregulated KEGG Pathways (PC9GR vs PC9)"))
  ggsave("KEGG_cellline_down_D3.pdf", kegg_down_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_down_D3.png", kegg_down_D3, width = 7, height = 6, dpi = 300)
  
# separating upregulated and downregulated genes
  sig_genes_up_treatment_D3 <- rownames(sig_treatment_D3)[sig_treatment_D3$log2FoldChange > 1]
length(sig_genes_up_treatment_D3)
  sig_genes_down_treatment_D3 <- rownames(sig_treatment_D3)[sig_treatment_D3$log2FoldChange < -1]
  
  print(paste("Upregulated:", length(sig_genes_up_treatment_D3)))
print(paste("Downregulated:", length(sig_genes_down_treatment_D3)))
  
  # converting ENSEMBL to ENTREZ for KEGG analysis
  entrez_genes_up_treatment_D3 <- mapIds(org.Hs.eg.db, sig_genes_up_treatment_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_down_treatment_D3 <- mapIds(org.Hs.eg.db, sig_genes_down_treatment_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_up_treatment_D3 <- entrez_genes_up_treatment_D3[!is.na(entrez_genes_up_treatment_D3)]
  entrez_genes_down_treatment_D3 <- entrez_genes_down_treatment_D3[!is.na(entrez_genes_down_treatment_D3)]
  
  # running GO Analysis
  go_up_treatment_D3 <- enrichGO(gene = sig_genes_up_treatment_D3,
                                OrgDb = org.Hs.eg.db,
                                keyType = "ENSEMBL",
                                ont = "ALL",  # BP, CC, MF
                                pAdjustMethod = "BH",
                                pvalueCutoff = 0.1)
  as.data.frame(go_up_treatment_D3)

  go_up_D3 <-plot(barplot(go_up_treatment_D3, showCategory = 15)+labs(title = "Upregulated GO Terms (Control vs Apatinib)"))
  ggsave("GO_treatment_Upreg_D3.pdf", go_uptreatment_D3, width = 7, height = 6)
  ggsave("GO_treatment_Upreg_D3.png", go_uptreatment_D3, width = 7, height = 6, dpi = 300)
  
  go_down_treatment_D3 <- enrichGO(gene = sig_genes_down_treatment_D3,
                                  OrgDb = org.Hs.eg.db,
                                  keyType = "ENSEMBL",
                                  ont = "ALL",
                                  pAdjustMethod = "BH",
                                  pvalueCutoff = 0.05)
  as.data.frame(go_down_treatment_D3)
  top_go_down_treatment_D3 <- go_down_treatment_D3@result[1:10, ]
as.data.frame(top_go_down_treatment_D3)
  
  go_down_D3 <- plot(barplot(go_down_treatment_D3, showCategory = 15)+labs(title = "Downregulated GO Terms (Control vs Apatinib)"))
  ggsave("GO_treatment_down_D3.pdf", go_downtreatment_D3, width = 7, height = 6)
  ggsave("GO_treatment_down_D3.png", go_downtreatment_D3, width = 7, height = 6, dpi = 300)
  
  # Running KEGG Pathway Analysis
  kegg_up_treatment_D3 <- enrichKEGG(gene = entrez_genes_up_treatment_D3,
                                    organism = 'hsa',
                                    pvalueCutoff = 0.1,
                                    pAdjustMethod = "BH")
  as.data.frame(kegg_up_treatment_D3)
  kegg_up_D3 <- plot(barplot(kegg_up_treatment_D3, showCategory = 15)+labs(title = "Upregulated KEGG Pathways (Control vs Apatinib)"))
  ggsave("KEGG_cellline_up_D3.pdf", kegg_up_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_up_D3.png", kegg_up_D3, width = 7, height = 6, dpi = 300)
  
  kegg_down_treatment_D3 <- enrichKEGG(gene = entrez_genes_down_treatment_D3,
                                      organism = 'hsa',
                                      pvalueCutoff = 0.05,
                                      pAdjustMethod = "BH")
  as.data.frame(kegg_down_treatment_D3)
  kegg_down_D3 <- plot(barplot(kegg_down_cellline_D3, showCategory = 15)+labs(title = "Downregulated KEGG Pathways (PC9GR vs PC9)"))
  ggsave("KEGG_cellline_down_D3.pdf", kegg_down_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_down_D3.png", kegg_down_D3, width = 7, height = 6, dpi = 300)
  
library(fgsea)
library(msigdbr)

#preparing ranked gene list for GSEA (cell line comparison)
gene_ranks_cellline_D3 <- res_cellline_D3$log2FoldChange
names(gene_ranks_cellline_D3) <- rownames(res_cellline_D3)
gene_ranks_cellline_D3 <- gene_ranks_cellline_D3[!is.na(gene_ranks_cellline_D3)]
gene_ranks_cellline_D3 <- sort(gene_ranks_cellline_D3, decreasing = TRUE)

#getting Hallmark pathways for GSEA 
hallmark_sets_D3 <- msigdbr(species = "Homo sapiens", category = "H")
hallmark_list_D3 <- split(hallmark_sets_D3$ensembl_gene, hallmark_sets_D3$gs_name)

#running GSEA analysis 
fgsea_results_cellline_D3 <- fgsea(pathways = hallmark_list_D3,
                                  stats = gene_ranks_cellline_D3,
                                  minSize = 15,
                                  maxSize = 500)

#viewing top results (same format as previous datasets)
print("Top GSEA Results (PC9GR vs PC9):")
print(head(fgsea_results_cellline_D3[order(pval), ], 10))

# I'm plotting top 3 most significant pathways from my GSEA results
print("Top 3 GSEA pathways:")
top_pathways_D3 <- head(fgsea_results_cellline_D3[order(pval), ], 3)
print(top_pathways_D3[, c("pathway", "pval", "NES")])

for(i in 1:3) {
  pathway_name <- top_pathways_D3$pathway[i]
  pathways_D3 <- plotEnrichment(hallmark_list_D3[[pathway_name]], gene_ranks_cellline_D3) + 
    labs(title = paste("Pathway", i, ":", pathway_name))
  print(pathways_D3)
  ggsave("Top3_GSEA_D3.pdf", pathways_D3, width = 7, height = 6)
  ggsave("Top3_GSEA_D3.png", pathways_D3, width = 7, height = 6, dpi = 300)
}
# Summary statistics table for D3
summary_stats_D3 <- data.frame(
  Metric = c("Total Samples", "Genes Analyzed", "Significant DEGs - PC9 vs PC9-GR", "Upregulated - PC9 vs PC9-GR", "Downregulated - PC9 vs PC9-GR"),
  Value = c(ncol(counts_D3), 
            nrow(res_D3),
            nrow(sig_cellline_D3),
            length(sig_genes_up_cellline_D3),
            length(sig_genes_down_cellline_D3))
)

summary_stats_D3

# Top genes table
top_genes_table_D3 <- data.frame(
  Gene_Symbol = gene_symbols_D3[rownames(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20))],
  ENSEMBL_ID = rownames(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)),
  Log2FC = round(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)$log2FoldChange, 3),
  Padj = format(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)$padj, scientific = TRUE, digits = 3),
  Direction = ifelse(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)$log2FoldChange > 0, "Up", "Down")
)

print("Top 20 Significant Genes: PC9 vs PC9-GR")
print(top_genes_table_D3)
# Export main results
write.csv(as.data.frame(res_cellline_annotated_D3), "D3_cellline_annotated_results.csv", row.names = TRUE)
write.csv(sig_cellline_D3, "D3_sig_cellline_genes.csv", row.names = TRUE)
write.csv(summary_stats_D3, "D3_summary_statistics.csv", row.names = FALSE)
write.csv(top_genes_table_D3, "D3_top20_genes.csv", row.names = FALSE)

# Export normalized data
write.csv(counts(dds_D3, normalized=TRUE), "D3_normalized_counts.csv", row.names = TRUE)
write.csv(assay(vsdata_D3), "D3_vst_data.csv", row.names = TRUE)

# Export pathway results
if(nrow(go_up_cellline_D3@result) > 0) {
  write.csv(go_up_cellline_D3@result, "D3_GO_upregulated.csv", row.names = FALSE)
}
if(nrow(go_down_cellline_D3@result) > 0) {
  write.csv(go_down_cellline_D3@result, "D3_GO_downregulated.csv", row.names = FALSE)
}
if(nrow(kegg_up_cellline_D3@result) > 0) {
  write.csv(kegg_up_cellline_D3@result, "D3_KEGG_upregulated.csv", row.names = FALSE)
}
if(nrow(kegg_down_cellline_D3@result) > 0) {
  write.csv(kegg_down_cellline_D3@result, "D3_KEGG_downregulated.csv", row.names = FALSE)
}

# Remove the leadingEdge column (which contains lists)
fgsea_results_clean_D3 <- fgsea_results_cellline_D3 %>%
  select(-leadingEdge)

write.csv(fgsea_results_clean_D3, "D3_GSEA_hallmarks.csv", row.names = FALSE)
---
title: "Dataset 3"
output: html_notebook
---

```{r}
library(DESeq2)
library(ggplot2)
```

```{r}
#loading my dataset
counts_D3 <- read.delim("counts_D3", header = TRUE, sep = "\t", row.names = 1)
#calling in the metadata
metadata_D3 <- read.csv("metadata_D3.csv", header = TRUE)

#checking the distribution of counts in my dataset
total_counts_per_gene_D3 <- rowSums(counts_D3)
hist(total_counts_per_gene_D3, 
     breaks=100, 
     main="Distribution of Total Counts per Gene", 
     xlab="Total counts across all samples",
     ylab="Number of genes")
summary(total_counts_per_gene_D3)
```

```{r}
#ignoring low expressed gene to remove noise & get better results
counts_D3 <- counts_D3[which(rowSums(counts_D3) > 50), ]
```

```{r}
#round counts for DESeq2 compatibility
counts_D3 <- round(counts_D3)
#loading DESeq2 for differential expression analysis
library(DESeq2)

#creating DESeq2 object with additive design
dds_D3 <- DESeqDataSetFromMatrix(countData = counts_D3,
                             colData = metadata_D3,        
                             design = ~ CellLine + Treatment)
dds_D3

#running the main DESeq2 analysis
dds_D3 <- DESeq(dds_D3)
dds_D3

res_D3 <- results(dds_D3, contrast = c("Treatment", "Apatinib", "Control"))
summary(res_D3)
```

```{r}
plotDispEsts(dds_D3)

pdf("DispersionPlot_D3.pdf", width = 6, height = 5)
plotDispEsts(dds_D3)
dev.off()

png("DispersionPlot_D3.png", width = 2000, height = 1600, res = 300)
plotDispEsts(dds_D3)
dev.off()
```

```{r}
#creating variance stabilized data for visualization
vsdata_D3 <- vst(dds_D3, blind = TRUE)

vst_matrix_D3 <- assay(vsdata_D3)

head(vst_matrix_D3[1:5, 1:3])

library(reshape2)

vst_df_D3 <- as.data.frame(vst_matrix_D3)
vst_df_D3$gene_id <- rownames(vst_df_D3)

vst_final_D3 <- melt(vst_df_D3, 
                 id.vars = "gene_id", 
                 variable.name = "sample_id", 
                 value.name = "expression_value")

# Check the result - should have exactly 3 columns
head(vst_final_D3)

#loading ggplot2 for enhanced plotting
library(ggplot2)
pca_plot_D3 <- plotPCA(vsdata_D3, intgroup = c("CellLine", "Treatment"))

ggsave(("D3_PCA.pdf"), pca_plot_D3, width = 6, height = 5)
ggsave(("D3_PCA.png"), pca_plot_D3, width = 6, height = 5, dpi = 300)
```
Strong Baseline Resistance:

The 90% variance dominated by cell line differences suggests PC9GR cells have undergone major transcriptional reprogramming. This indicates constitutive resistance mechanisms - the resistant cells are fundamentally different

Modest Treatment Response:

Only 6% variance from treatment suggests apatinib causes relatively subtle transcriptional changes compared to the baseline resistance.Both cell lines respond to treatment, but the response is small compared to their inherent differences.

```{r}
resultsNames(dds_D3)
```
```{r}
#clean LFC shrinkage
res_treatment_D3 <- lfcShrink(dds_D3, 
                                 coef = "Treatment_Control_vs_Apatinib", 
                                 type = "apeglm")

res_cellline_D3 <- lfcShrink(dds_D3, 
                                coef = "CellLine_PC9GR_vs_PC9", 
                                type = "apeglm")

summary(res_treatment_D3, alpha = 0.05)
summary(res_cellline_D3, alpha = 0.05)
```

```{r}
library(AnnotationDbi)
library(org.Hs.eg.db)

# I'm converting ENSEMBL IDs to gene symbols
gene_symbols_D3 <- mapIds(org.Hs.eg.db,
                         keys = rownames(dds_D3),
                         column = "SYMBOL",
                         keytype = "ENSEMBL",
                         multiVals = "first")
res_annotated_D3 <- res_D3
res_annotated_D3$gene_symbol <- gene_symbols_D3
as.data.frame(res_annotated_D3)

#adding annotations to both main results
res_cellline_annotated_D3 <- res_cellline_D3
res_cellline_annotated_D3$gene_symbol <- gene_symbols_D3

res_treatment_annotated_D3 <- res_treatment_D3
res_treatment_annotated_D3$gene_symbol <- gene_symbols_D3

```

```{r}
sig_treatment_D3 <- res_treatment_annotated_D3[which(res_treatment_annotated_D3$padj < 0.05 & abs(res_treatment_annotated_D3$log2FoldChange) > 1), ]
sig_cellline_D3 <- res_cellline_annotated_D3[which(res_cellline_annotated_D3$padj < 0.05 & abs(res_cellline_annotated_D3$log2FoldChange) > 1), ]

nrow(sig_treatment_D3)
nrow(sig_cellline_D3)
```

```{r}
# Get the top genes from each comparison
top20_treatment_D3 <- head(sig_treatment_D3[order(sig_treatment_D3$padj), ], 20)
top20_cellline_D3 <- head(sig_cellline_D3[order(sig_cellline_D3$padj), ], 20)

# Find overlapping genes
overlap_D3 <- intersect(rownames(sig_treatment_D3), rownames(sig_cellline_D3))
length(overlap_D3)
```
- These genes are both strongly dysregulated in resistance (PC9GR vs PC9)
- AND strongly respond to apatinib treatment
- This represents 56% of treatment-responsive genes (276/489)




Visualization Plots
```{r}
#MA Plots
plotMA(res_treatment_D3, ylim=c(-5,5), main="Treatment: Control vs Apatinib")
plotMA(res_cellline_D3, ylim=c(-5,5), main="Cell Line: PC9 vs PC9-GR")

pdf(("MA_res_treatment_D3.pdf"), width = 6, height = 5)
plotMA(res_treatment_D3, ylim=c(-5,5), main="Treatment: Control vs Apatinib")
dev.off()

png(("MA_res_treatment_D3.png"), width = 2000, height = 1600, res = 300)
plotMA(res_treatment_D3, ylim=c(-5,5), main="Treatment: Control vs Apatinib")
dev.off()

pdf(("MA_res_cellline_D3.pdf"), width = 6, height = 5)
plotMA(res_cellline_D3, ylim=c(-5,5), main="Cell Line: PC9 vs PC9-GR")
dev.off()

png(("MA_res_cellline_D3.png"), width = 2000, height = 1600, res = 300)
plotMA(res_cellline_D3, ylim=c(-5,5), main="Cell Line: PC9 vs PC9-GR")
dev.off()

# Volcano plots
library(EnhancedVolcano)
volcano_cellline_D3 <- EnhancedVolcano(res_cellline_annotated_D3, lab = res_cellline_annotated_D3$gene_symbol, 
                x = 'log2FoldChange', y = 'padj',
                title = 'PC9 cells vs PC9-GR cells')

ggsave(("D3_CelllineVolcano.pdf"), volcano_cellline_D3, width = 6, height = 5)
ggsave(("D3_CelllineVolcano.png"), volcano_cellline_D3, width = 6, height = 5, dpi = 300)

volcano_treatment_D3 <- EnhancedVolcano(res_treatment_annotated_D3, lab = res_treatment_annotated_D3$gene_symbol, 
                x = 'log2FoldChange', y = 'padj',
                title = 'Control vs Apatinib')

ggsave(("D3_TreatmentVolcano.pdf"), volcano_treatment_D3, width = 6, height = 5)
ggsave(("D3_TreatmentVolcano.png"), volcano_treatment_D3, width = 6, height = 5, dpi = 300)

# Heatmap of top differentially expressed genes
library(pheatmap)
top_overlap_D3 <- head(order(sig_treatment_D3[overlap_D3,]$padj), 50)
pheatmap::pheatmap(assay(vsdata_D3)[top_overlap_D3,], 
         annotation_col = as.data.frame(colData(dds_D3)[,c("CellLine", "Treatment")]), 
         scale = "row", 
         show_rownames = FALSE,
         main = "Top 50 Differentially Expressed Genes which overlapped",
         filename = "DEGs_D3.png")

```
In Volcano plots: The resistance genes (red dots in plot 1) are potential targets to overcome resistance
The treatment genes (red dots in plot 2) reveal apatinib's mechanism of action.

```{r}
#creating enhanced heatmap with gene symbols and values
library(dplyr)
library(ComplexHeatmap)
library(RColorBrewer)
library(circlize)

#preparing top genes for enhanced heatmap 
top_cellline_heatmap_D3 <- res_cellline_annotated_D3[!is.na(res_cellline_annotated_D3$padj) & 
                                                    !is.na(res_cellline_annotated_D3$baseMean) & 
                                                    !is.na(res_cellline_annotated_D3$log2FoldChange) &
                                                    (res_cellline_annotated_D3$padj < 0.05) & 
                                                    (res_cellline_annotated_D3$baseMean > 50) & 
                                                    (abs(res_cellline_annotated_D3$log2FoldChange) > 2),]

top_cellline_heatmap_D3 <- top_cellline_heatmap_D3 %>%
  as.data.frame() %>%
  head(50)  

top_cellline_heatmap_D3 <- top_cellline_heatmap_D3[order(top_cellline_heatmap_D3$log2FoldChange, decreasing = TRUE),]

#getting normalized count data 
mat_D3 <- assay(vsdata_D3)[rownames(top_cellline_heatmap_D3), metadata_D3$Run]
colnames(mat_D3) <- metadata_D3$Run
base_mean_D3 <- rowMeans(mat_D3)
mat_scaled_D3 <- t(apply(mat_D3, 1, scale)) # Z-score normalization
colnames(mat_scaled_D3) <- colnames(mat_D3)

#selecting top and bottom genes
num_keep <- 25
rows_keep_D3 <- c(seq(1:num_keep), seq((nrow(mat_scaled_D3)-num_keep), nrow(mat_scaled_D3)))

# I'm preparing annotation matrices
l2_val_D3 <- as.matrix(top_cellline_heatmap_D3[rows_keep_D3,]$log2FoldChange)
colnames(l2_val_D3) <- "logFC"

mean_D3 <- as.matrix(top_cellline_heatmap_D3[rows_keep_D3,]$baseMean)
colnames(mean_D3) <- "AveExpr"

# I'm setting up color schemes (same as Dataset 2)
col_logFC_D3 <- colorRamp2(c(min(l2_val_D3), 0, max(l2_val_D3)), c("blue", "white", "red")) 
col_AveExpr_D3 <- colorRamp2(c(quantile(mean_D3)[1], quantile(mean_D3)[4]), c("white", "red"))
col_zscore_D3 <- colorRamp2(c(-2, 0, 2), c("#4575b4", "white", "#d73027"))

# I'm creating the main expression heatmap
h1_D3 <- Heatmap(mat_scaled_D3[rows_keep_D3,], 
              cluster_rows = TRUE,
              column_labels = colnames(mat_scaled_D3), 
              name="Z-score",
              col = col_zscore_D3,
              cluster_columns = TRUE,
              show_row_names = FALSE,
              column_names_gp = gpar(fontsize = 9),
              heatmap_legend_param = list(title_gp = gpar(fontsize = 10)))

# I'm creating the log2FC annotation heatmap
h2_D3 <- Heatmap(l2_val_D3, 
              row_labels = top_cellline_heatmap_D3$gene_symbol[rows_keep_D3],
              cluster_rows = FALSE, 
              name="logFC", 
              col = col_logFC_D3,
              width = unit(15, "mm"),
              show_column_names = TRUE,
              row_names_gp = gpar(fontsize = 8),
              column_names_gp = gpar(fontsize = 9),
              cell_fun = function(j, i, x, y, w, h, col) { 
                grid.text(round(l2_val_D3[i, j], 1), x, y, gp = gpar(fontsize = 7))
              })

# I'm creating the average expression annotation heatmap
h3_D3 <- Heatmap(mean_D3, 
              row_labels = top_cellline_heatmap_D3$gene_symbol[rows_keep_D3],
              cluster_rows = FALSE, 
              name = "AveExpr", 
              col = col_AveExpr_D3,
              width = unit(15, "mm"),
              show_row_names = TRUE,
              show_column_names = TRUE,
              column_names_gp = gpar(fontsize = 9),
              row_names_gp = gpar(fontsize = 7),
              cell_fun = function(j, i, x, y, w, h, col) { 
                grid.text(round(mean_D3[i, j], 0), x, y, gp = gpar(fontsize = 7))
              })

# I'm combining all heatmaps (same as Dataset 2)
h_D3 <- h1_D3 + h2_D3 + h3_D3
h_D3

pdf("AnnotatedHeatmap_D3.pdf", width = 10, height = 6)
draw(h_D3)
dev.off()

png("AnnotatedHeatmap_D3.png", width = 3000, height = 2000, res = 300)
draw(h_D3)
dev.off()

```

```{r}
# Loading pathway analysis packages
  library(DOSE)
  library(enrichplot)
  library(clusterProfiler)
  
  # separating upregulated and downregulated genes
  sig_genes_up_cellline_D3 <- rownames(sig_cellline_D3)[sig_cellline_D3$log2FoldChange > 1]
  sig_genes_down_cellline_D3 <- rownames(sig_cellline_D3)[sig_cellline_D3$log2FoldChange < -1]
  
  # converting ENSEMBL to ENTREZ for KEGG analysis
  entrez_genes_up_cellline_D3 <- mapIds(org.Hs.eg.db, sig_genes_up_cellline_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_down_cellline_D3 <- mapIds(org.Hs.eg.db, sig_genes_down_cellline_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_up_cellline_D3 <- entrez_genes_up_cellline_D3[!is.na(entrez_genes_up_cellline_D3)]
  entrez_genes_down_cellline_D3 <- entrez_genes_down_cellline_D3[!is.na(entrez_genes_down_cellline_D3)]
  
  # running GO Analysis
  go_up_cellline_D3 <- enrichGO(gene = sig_genes_up_cellline_D3,
                                OrgDb = org.Hs.eg.db,
                                keyType = "ENSEMBL",
                                ont = "ALL",  # BP, CC, MF
                                pAdjustMethod = "BH",
                                pvalueCutoff = 0.05)
  as.data.frame(go_up_cellline_D3)

  go_up_D3 <-plot(barplot(go_up_cellline_D3, showCategory = 15)+labs(title = "Upregulated GO Terms (PC9GR vs PC9)"))
  ggsave("GO_cellline_Upreg_D3.pdf", go_up_D3, width = 7, height = 6)
  ggsave("GO_cellline_Upreg_D3.png", go_up_D3, width = 7, height = 6, dpi = 300)
  
  go_down_cellline_D3 <- enrichGO(gene = sig_genes_down_cellline_D3,
                                  OrgDb = org.Hs.eg.db,
                                  keyType = "ENSEMBL",
                                  ont = "ALL",
                                  pAdjustMethod = "BH",
                                  pvalueCutoff = 0.05)
  as.data.frame(go_down_cellline_D3)
  go_down_D3 <- plot(barplot(go_down_cellline_D3, showCategory = 15)+labs(title = "Downregulated GO Terms (PC9GR vs PC9)"))
  ggsave("GO_cellline_down_D3.pdf", go_down_D3, width = 7, height = 6)
  ggsave("GO_cellline_down_D3.png", go_down_D3, width = 7, height = 6, dpi = 300)
  
  # Running KEGG Pathway Analysis
  kegg_up_cellline_D3 <- enrichKEGG(gene = entrez_genes_up_cellline_D3,
                                    organism = 'hsa',
                                    pvalueCutoff = 0.05,
                                    pAdjustMethod = "BH")
  as.data.frame(kegg_up_cellline_D3)
  kegg_up_D3 <- plot(barplot(kegg_up_cellline_D3, showCategory = 15)+labs(title = "Upregulated KEGG Pathways (PC9GR vs PC9)"))
  ggsave("KEGG_cellline_up_D3.pdf", kegg_up_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_up_D3.png", kegg_up_D3, width = 7, height = 6, dpi = 300)
  
  kegg_down_cellline_D3 <- enrichKEGG(gene = entrez_genes_down_cellline_D3,
                                      organism = 'hsa',
                                      pvalueCutoff = 0.1,
                                      pAdjustMethod = "BH")
  as.data.frame(kegg_down_cellline_D3)
  kegg_down_D3 <- plot(barplot(kegg_down_cellline_D3, showCategory = 15)+labs(title = "Downregulated KEGG Pathways (PC9GR vs PC9)"))
  ggsave("KEGG_cellline_down_D3.pdf", kegg_down_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_down_D3.png", kegg_down_D3, width = 7, height = 6, dpi = 300)
  
```

```{r}
# separating upregulated and downregulated genes
  sig_genes_up_treatment_D3 <- rownames(sig_treatment_D3)[sig_treatment_D3$log2FoldChange > 1]
length(sig_genes_up_treatment_D3)
  sig_genes_down_treatment_D3 <- rownames(sig_treatment_D3)[sig_treatment_D3$log2FoldChange < -1]
  
  print(paste("Upregulated:", length(sig_genes_up_treatment_D3)))
print(paste("Downregulated:", length(sig_genes_down_treatment_D3)))
  
  # converting ENSEMBL to ENTREZ for KEGG analysis
  entrez_genes_up_treatment_D3 <- mapIds(org.Hs.eg.db, sig_genes_up_treatment_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_down_treatment_D3 <- mapIds(org.Hs.eg.db, sig_genes_down_treatment_D3, "ENTREZID", "ENSEMBL")
  entrez_genes_up_treatment_D3 <- entrez_genes_up_treatment_D3[!is.na(entrez_genes_up_treatment_D3)]
  entrez_genes_down_treatment_D3 <- entrez_genes_down_treatment_D3[!is.na(entrez_genes_down_treatment_D3)]
  
  # running GO Analysis
  go_up_treatment_D3 <- enrichGO(gene = sig_genes_up_treatment_D3,
                                OrgDb = org.Hs.eg.db,
                                keyType = "ENSEMBL",
                                ont = "ALL",  # BP, CC, MF
                                pAdjustMethod = "BH",
                                pvalueCutoff = 0.1)
  as.data.frame(go_up_treatment_D3)

  go_up_D3 <-plot(barplot(go_up_treatment_D3, showCategory = 15)+labs(title = "Upregulated GO Terms (Control vs Apatinib)"))
  ggsave("GO_treatment_Upreg_D3.pdf", go_uptreatment_D3, width = 7, height = 6)
  ggsave("GO_treatment_Upreg_D3.png", go_uptreatment_D3, width = 7, height = 6, dpi = 300)
  
  go_down_treatment_D3 <- enrichGO(gene = sig_genes_down_treatment_D3,
                                  OrgDb = org.Hs.eg.db,
                                  keyType = "ENSEMBL",
                                  ont = "ALL",
                                  pAdjustMethod = "BH",
                                  pvalueCutoff = 0.05)
  as.data.frame(go_down_treatment_D3)
  top_go_down_treatment_D3 <- go_down_treatment_D3@result[1:10, ]
as.data.frame(top_go_down_treatment_D3)
  
  go_down_D3 <- plot(barplot(go_down_treatment_D3, showCategory = 15)+labs(title = "Downregulated GO Terms (Control vs Apatinib)"))
  ggsave("GO_treatment_down_D3.pdf", go_downtreatment_D3, width = 7, height = 6)
  ggsave("GO_treatment_down_D3.png", go_downtreatment_D3, width = 7, height = 6, dpi = 300)
  
  # Running KEGG Pathway Analysis
  kegg_up_treatment_D3 <- enrichKEGG(gene = entrez_genes_up_treatment_D3,
                                    organism = 'hsa',
                                    pvalueCutoff = 0.1,
                                    pAdjustMethod = "BH")
  as.data.frame(kegg_up_treatment_D3)
  kegg_up_D3 <- plot(barplot(kegg_up_treatment_D3, showCategory = 15)+labs(title = "Upregulated KEGG Pathways (Control vs Apatinib)"))
  ggsave("KEGG_cellline_up_D3.pdf", kegg_up_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_up_D3.png", kegg_up_D3, width = 7, height = 6, dpi = 300)
  
  kegg_down_treatment_D3 <- enrichKEGG(gene = entrez_genes_down_treatment_D3,
                                      organism = 'hsa',
                                      pvalueCutoff = 0.05,
                                      pAdjustMethod = "BH")
  as.data.frame(kegg_down_treatment_D3)
  kegg_down_D3 <- plot(barplot(kegg_down_cellline_D3, showCategory = 15)+labs(title = "Downregulated KEGG Pathways (PC9GR vs PC9)"))
  ggsave("KEGG_cellline_down_D3.pdf", kegg_down_D3, width = 7, height = 6)
  ggsave("KEGG_cellline_down_D3.png", kegg_down_D3, width = 7, height = 6, dpi = 300)
  
```


```{r}
library(fgsea)
library(msigdbr)

#preparing ranked gene list for GSEA (cell line comparison)
gene_ranks_cellline_D3 <- res_cellline_D3$log2FoldChange
names(gene_ranks_cellline_D3) <- rownames(res_cellline_D3)
gene_ranks_cellline_D3 <- gene_ranks_cellline_D3[!is.na(gene_ranks_cellline_D3)]
gene_ranks_cellline_D3 <- sort(gene_ranks_cellline_D3, decreasing = TRUE)

#getting Hallmark pathways for GSEA 
hallmark_sets_D3 <- msigdbr(species = "Homo sapiens", category = "H")
hallmark_list_D3 <- split(hallmark_sets_D3$ensembl_gene, hallmark_sets_D3$gs_name)

#running GSEA analysis 
fgsea_results_cellline_D3 <- fgsea(pathways = hallmark_list_D3,
                                  stats = gene_ranks_cellline_D3,
                                  minSize = 15,
                                  maxSize = 500)

#viewing top results (same format as previous datasets)
print("Top GSEA Results (PC9GR vs PC9):")
print(head(fgsea_results_cellline_D3[order(pval), ], 10))

# I'm plotting top 3 most significant pathways from my GSEA results
print("Top 3 GSEA pathways:")
top_pathways_D3 <- head(fgsea_results_cellline_D3[order(pval), ], 3)
print(top_pathways_D3[, c("pathway", "pval", "NES")])

for(i in 1:3) {
  pathway_name <- top_pathways_D3$pathway[i]
  pathways_D3 <- plotEnrichment(hallmark_list_D3[[pathway_name]], gene_ranks_cellline_D3) + 
    labs(title = paste("Pathway", i, ":", pathway_name))
  print(pathways_D3)
  ggsave("Top3_GSEA_D3.pdf", pathways_D3, width = 7, height = 6)
  ggsave("Top3_GSEA_D3.png", pathways_D3, width = 7, height = 6, dpi = 300)
}
```


```{r}
# Summary statistics table for D3
summary_stats_D3 <- data.frame(
  Metric = c("Total Samples", "Genes Analyzed", "Significant DEGs - PC9 vs PC9-GR", "Upregulated - PC9 vs PC9-GR", "Downregulated - PC9 vs PC9-GR"),
  Value = c(ncol(counts_D3), 
            nrow(res_D3),
            nrow(sig_cellline_D3),
            length(sig_genes_up_cellline_D3),
            length(sig_genes_down_cellline_D3))
)

summary_stats_D3

# Top genes table
top_genes_table_D3 <- data.frame(
  Gene_Symbol = gene_symbols_D3[rownames(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20))],
  ENSEMBL_ID = rownames(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)),
  Log2FC = round(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)$log2FoldChange, 3),
  Padj = format(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)$padj, scientific = TRUE, digits = 3),
  Direction = ifelse(head(sig_cellline_D3[order(sig_cellline_D3$padj),], 20)$log2FoldChange > 0, "Up", "Down")
)

print("Top 20 Significant Genes: PC9 vs PC9-GR")
print(top_genes_table_D3)

```

```{r}
# Export main results
write.csv(as.data.frame(res_cellline_annotated_D3), "D3_cellline_annotated_results.csv", row.names = TRUE)
write.csv(sig_cellline_D3, "D3_sig_cellline_genes.csv", row.names = TRUE)
write.csv(summary_stats_D3, "D3_summary_statistics.csv", row.names = FALSE)
write.csv(top_genes_table_D3, "D3_top20_genes.csv", row.names = FALSE)

# Export normalized data
write.csv(res_annotated_D3, "D3_results.csv")
write.csv(counts(dds_D3, normalized=TRUE), "D3_normalized_counts.csv", row.names = TRUE)
write.csv(vst_final_D3, "D3_vst_data.csv", row.names = TRUE)

# Export pathway results
if(nrow(go_up_cellline_D3@result) > 0) {
  write.csv(go_up_cellline_D3@result, "D3_GO_upregulated.csv", row.names = FALSE)
}
if(nrow(go_down_cellline_D3@result) > 0) {
  write.csv(go_down_cellline_D3@result, "D3_GO_downregulated.csv", row.names = FALSE)
}
if(nrow(kegg_up_cellline_D3@result) > 0) {
  write.csv(kegg_up_cellline_D3@result, "D3_KEGG_upregulated.csv", row.names = FALSE)
}
if(nrow(kegg_down_cellline_D3@result) > 0) {
  write.csv(kegg_down_cellline_D3@result, "D3_KEGG_downregulated.csv", row.names = FALSE)
}

# Remove the leadingEdge column (which contains lists)
fgsea_results_clean_D3 <- fgsea_results_cellline_D3 %>%
  select(-leadingEdge)

write.csv(fgsea_results_clean_D3, "D3_GSEA_hallmarks.csv", row.names = FALSE)

```

