library(IHW)
library(apeglm)
library(DT)
library(dplyr)
library(biomaRt)
library(tximport)
library(rhdf5)
library(gplots)
library(DESeq2)
library(circlize)
library(RColorBrewer)
library(ComplexHeatmap)
library(pheatmap)
library(PCAtools)
library(clusterProfiler)
library(ggplot2)
library(tidyverse)
library(ggpubr)
library(BiocParallel)
library(ggvenn)
library(RColorBrewer)

NEPCa Analysis

Document analyzing the concordance of differentially expressed genes in LNCaP Clone1 vs. Control samples and neuroendocrine prostate cancer (NEPCa) gene signature sets derived from Cheng et al. and Beltran et al..

Cheng et al. derived a 12 signature gene set from a literature review of genes involved in NEPCa progression and a 304 gene signature set derived of overlapping DEGs from 3 microarray studies analyzing NEPCa vs. Adenocarcinoma prostate cancer (AdPCa). The two gene sets were merged, resulting in a 310 gene signature set.
Beltran et al. provided an integrated 70 gene signature NEPCa classifier developed by using expression data of genes that were prioritized by genomic, transcriptomic or epigenomic status.

Venn Diagram

Firstly, we assess the number of overlapping genes in LNCaP Clone1 vs. Control DEGs compared to the two gene signature lists provided by Cheng et al. and Beltran et al.

mtx <- read.table("/data/projects/marvin_lim/mRNA_counts.txt", header=T, row.names = "gene_symbols")
samples <- c("Clone1_N1", "Clone1_N2", "Clone1_N3",
             "Clone9_N1", "Clone9_N2", "Clone9_N3",
             "Control_N1", "Control_N2", "Control_N3")
colnames(mtx) <- samples
## Create sample DF
sample_vec <- c()
replicates_vec <- c()
sample_df <- as.data.frame(matrix(ncol=2, nrow=0))
for(i in samples){
  split <- strsplit(i, "\\_")
  sample <- split[[1]][1]
  rep <- split[[1]][2]
  sample_df[i,1] <- sample
  sample_df[i,2] <- rep
}
colnames(sample_df) <- c("Condition", "Replicate")
sample_df$Condition <- as.factor(sample_df$Condition)
sample_df$Replicate <- as.factor(sample_df$Replicate)
## Create DDS object
dds <- DESeqDataSetFromMatrix(mtx, colData = sample_df, design = ~ Replicate + Condition)
dds$Condition <- relevel(dds$Condition, ref = "Control")
keep <- rowSums(counts(dds)) >= 20
dds <- dds[keep,]
dds <- DESeq(dds) 

mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")

res1 <- results(dds, filterFun=ihw, alpha=0.05, c("Condition", "Clone1", "Control"))
LFC1 <- lfcShrink(dds = dds, res= res1, coef = 4, type = "apeglm")
LFC1_df <- as.data.frame(LFC1)

output_col <- c("Gene", "Chromosome", "Start", "Stop", "Strand", "Description", "Log2FC", "P-value", "Adj P-value")

up_key <-  intersect(rownames(LFC1)[which(LFC1$log2FoldChange>=1)],
                       rownames(LFC1)[which(LFC1$padj<=0.05)])
up_df <- as.data.frame((LFC1)[which(rownames(LFC1) %in% up_key),])
up_df <- tibble::rownames_to_column(up_df, "external_gene_name")
info <- getBM(attributes=c("external_gene_name",
                             "chromosome_name",
                             "start_position",
                             "end_position",
                             "strand",
                             "entrezgene_description"),
                        filters = c("external_gene_name"),
                        values = up_df$external_gene_name,
                        mart = mart,
                        useCache = F)
tmp <- merge(info, up_df, by="external_gene_name")
tmp <- tmp[order(-tmp$log2FoldChange),]
## col out index
index <- c(1, 2, 3, 4, 5, 6, 8, 10, 11)
tmp <- tmp[,index]
colnames(tmp) <- output_col
tmp$Strand <- gsub("-1", "-", tmp$Strand)  
tmp$Strand <- gsub("1", "+", tmp$Strand)
# remove dups
tmp <- tmp[!duplicated(tmp$Gene),]
up_regulated_table1 <- tmp

output_col <- c("Gene", "Chromosome", "Start", "Stop", "Strand", "Description", "Log2FC", "P-value", "Adj P-value")

down_key <- intersect(rownames(LFC1)[which(LFC1$log2FoldChange<=-1)],
                        rownames(LFC1)[which(LFC1$padj<=0.05)])
  
down_df <- as.data.frame((LFC1)[which(rownames(LFC1) %in% down_key),])
down_df <- tibble::rownames_to_column(down_df, "external_gene_name")
  
info <- getBM(attributes=c("external_gene_name",
                             "chromosome_name",
                             "start_position",
                             "end_position",
                             "strand",
                             "entrezgene_description"),
                         filters = c("external_gene_name"),
                         values = down_df$external_gene_name,
                         mart = mart,
                         useCache = F)
  
tmp <- merge(info, down_df, by="external_gene_name")
tmp <- tmp[order(tmp$log2FoldChange),]
## col out index
index <- c(1, 2, 3, 4, 5, 6, 8, 10, 11)
tmp <- tmp[,index]
colnames(tmp) <- output_col
tmp$Strand <- gsub("-1", "-", tmp$Strand)
tmp$Strand <- gsub("1", "+", tmp$Strand)
# remove dups
tmp <- tmp[!duplicated(tmp$Gene),]
down_regulated_table1 <- tmp

cheng_up <- read.table("/data/projects/marvin_lim/cheng_et_al/up_nepca.txt")
cheng_down <- read.table("/data/projects/marvin_lim/cheng_et_al/down_nepca.txt")
cheng <- rbind(cheng_up, cheng_down)

beltran_up <- read.table("/data/projects/marvin_lim/beltrans_et_al/up_nepca.txt")
beltran_down <- read.table("/data/projects/marvin_lim/beltrans_et_al/down_nepca.txt")
beltran <- rbind(beltran_up, beltran_down)

deg <- rbind(up_regulated_table1, down_regulated_table1)

up_venn <- list("Lim et al." = deg$Gene,
                "Cheng et al." = cheng$V1,
                "Beltran et al." = beltran$V1)

ggvenn(data=up_venn, show_percentage = F) + ggtitle("Intersection of DE genes (LNCaP Clone 1 vs. Control) \n vs. NEPCa gene signatures") + theme(plot.title = element_text(hjust = 0.50, size = 16, face = "bold"))

10 gene signature set

The venn diagram reveals 10 genes present in \(\text{Lim et al.} \cap \text{Beltran et al.} \cap \text{Cheng et al.}\)

Up Regulated

intersection <- subset(deg, deg$Gene %in% beltran$V1 & deg$Gene %in% cheng$V1)
up <- subset(intersection, intersection$Log2FC > 0 )

DT::datatable(up, rownames = FALSE, options=list(scrollX=T))

Down Regulated

down <- subset(intersection, intersection$Log2FC < 0 )

DT::datatable(down, rownames = FALSE, options=list(scrollX=T))

Heatmap

vsd <- assay(vst(dds, blind=FALSE))
# remove clone 9 
subs_vsd <- subset(vsd, select=-c(Clone9_N1, Clone9_N2, Clone9_N3))

# sub 10 gene signature
sub_sig <- subset(subs_vsd, rownames(subs_vsd) %in% intersection$Gene)

mat <- t(sub_sig)
mat <- scale(mat, center = T, scale = T)
mat <- t(mat)

df2 = data.frame(Cell_Line = c(rep("Clone1", 3), rep("Control", 3)))
ha = HeatmapAnnotation(df = df2, col = list(Cell_Line = c("Control" =  "black", "Clone1" = "gold")), annotation_legend_param = list(title_gp = gpar(fontsize = 12, fontface = "bold"), labels_gp = gpar(fontsize = 12)))             

hm <- Heatmap(mat, 
              col= colorRamp2(c(-2,-1,0,1,2),c("blue","skyblue","white","lightcoral","red")),
              heatmap_legend_param=list(at=c(-1.2,-0.6,0,0.6,1.2),color_bar="continuous", 
                                        legend_direction="vertical", legend_width=unit(15,"cm"),
                                        title_position="topcenter", title_gp=gpar(fontsize=10, fontface="bold")), 
              name = "Z-score",
              
              #Row annotation configurations
              cluster_rows=TRUE,
              show_row_dend=TRUE,
              row_title_side="left",
              row_title_gp=gpar(fontsize=10),
              show_row_names=TRUE,
              row_names_side="right",
              
              #Column annotation configuratiions
              cluster_columns=TRUE,
              show_column_dend=TRUE,
              column_title="NEPCa 10 gene signature\nClone1 vs Control",
              column_title_side="top",
              column_title_gp=gpar(fontsize=15, fontface="bold"),
              show_column_names = FALSE,
              column_names_gp = gpar(fontsize = 12, fontface="bold"),
              
              #Dendrogram configurations: columns
              clustering_distance_columns="euclidean",
              clustering_method_columns="complete",
              column_dend_height=unit(10,"mm"),

              #Dendrogram configurations: rows
              clustering_distance_rows="euclidean",
              clustering_method_rows="complete",
              row_dend_width=unit(1,"cm"),
              row_dend_side = "left",
              row_dend_reorder = TRUE,

              #Splits
              border=T,
              #row_km = 1,
              #column_km = 2,
              
              #plot params
              #width = unit(5, "inch"), 
              #height = unit(4, "inch"),
              #height = unit(0.8, "cm")*nrow(mat),
              
              #Annotations
              top_annotation = ha)


draw(hm, annotation_legend_side = "right", heatmap_legend_side="right")

174 gene signature set

174 genes were found in \(\text{Lim et al.} \cap \text{Beltran et al.} \text{ AND } \text{Lim et al.} \cap \text{Cheng et al.}\)

Up Regulated

beltran_int <- subset(deg, deg$Gene %in% beltran$V1)
cheng_int <- subset(deg, deg$Gene %in% cheng$V1)

int <- rbind(beltran_int, cheng_int)

up <- subset(int, int$Log2FC > 0 )
up <- up[!duplicated(up), ]

DT::datatable(up, rownames = FALSE, options=list(scrollX=T))

Down Regulated

down <- subset(int, int$Log2FC < 0 )
down <- down[!duplicated(down), ]

DT::datatable(down, rownames = FALSE, options=list(scrollX=T))

Heatmap

beltran_int <- subset(deg, deg$Gene %in% beltran$V1)
cheng_int <- subset(deg, deg$Gene %in% cheng$V1)

int <- rbind(beltran_int, cheng_int)

sub_sig <- subset(subs_vsd, rownames(subs_vsd) %in% int$Gene)

# remove low sd
sub_sig <- sub_sig[apply(sub_sig, MARGIN = 1, FUN = function(x) sd(x) != 0),]

mat <- t(sub_sig)
mat <- scale(mat, center = T, scale = T)
mat <- t(mat)

df2 = data.frame(Cell_Line = c(rep("Clone1", 3), rep("Control", 3)))
ha = HeatmapAnnotation(df = df2, col = list(Cell_Line = c("Control" =  "black", "Clone1" = "gold")), annotation_legend_param = list(title_gp = gpar(fontsize = 12, fontface = "bold"), labels_gp = gpar(fontsize = 12)))             

hm <- Heatmap(mat, 
              col= colorRamp2(c(-2,-1,0,1,2),c("blue","skyblue","white","lightcoral","red")),
              heatmap_legend_param=list(at=c(-1.8,-0.9,0,0.9,1.8),color_bar="continuous", 
                                        legend_direction="vertical", legend_width=unit(15,"cm"),
                                        title_position="topcenter", title_gp=gpar(fontsize=10, fontface="bold")), 
              name = "Z-score",
              
              #Row annotation configurations
              cluster_rows=TRUE,
              show_row_dend=TRUE,
              row_title_side="left",
              row_title_gp=gpar(fontsize=10),
              show_row_names=FALSE,
              row_names_side="right",
              
              #Column annotation configuratiions
              cluster_columns=TRUE,
              show_column_dend=TRUE,
              column_title="NEPCa 174 gene signature\nClone1 vs Control",
              column_title_side="top",
              column_title_gp=gpar(fontsize=15, fontface="bold"),
              show_column_names = FALSE,
              column_names_gp = gpar(fontsize = 12, fontface="bold"),
              
              #Dendrogram configurations: columns
              clustering_distance_columns="euclidean",
              clustering_method_columns="complete",
              column_dend_height=unit(10,"mm"),

              #Dendrogram configurations: rows
              clustering_distance_rows="euclidean",
              clustering_method_rows="complete",
              row_dend_width=unit(1,"cm"),
              row_dend_side = "left",
              row_dend_reorder = TRUE,

              #Splits
              border=T,
              #row_km = 1,
              #column_km = 2,
              
              #plot params
              #width = unit(5, "inch"), 
              #height = unit(4, "inch"),
              #height = unit(0.8, "cm")*nrow(mat),
              
              #Annotations
              top_annotation = ha)


draw(hm, annotation_legend_side = "right", heatmap_legend_side="right")