The purpose of this analysis is to perform GSEA and GSVA to evaluate the enrichment of differential gene expression data (DESeq2 output) for publicly available human and murine gene sets in the Molecular Signatures Database (MSigDB) and Gene Ontology terms using the package clusterProfiler.
Input: DESeq2 results in .CSV format for various comparisons of interest.
setwd("C:/Users/edlarsen/Documents/Jan2025 PTCLRNASeq for Dissertation")
# Read in data from deseq2
df1 = read.csv("Cohort_1/Output/CD4_PTCLvsCD4_CTRL_DESeq2res.csv", header=TRUE)
df2 = read.csv("Cohort_2/Output/CD4_PTCLvsCD4_LN_CTRL_DESeq2res.csv", header=TRUE)
There is no consensus for the what ranking metric should be used for GSEA. Popular options from the DESeq2 output include ranking by the ‘log2FoldChange’ column or the ‘stat’ column, which is the Wald test statistic. By default, this script ranks genes by the Wald test statistic. To rank by log2 fold change instead, replace ‘stat’ with ‘log2FoldChange’.
# Rank genes by stat column of deseq2 result
deg_genes_list1 <- df1$stat # Creates a vector of the stat column from DESeq2 output
names(deg_genes_list1) <- df1$gene_name # Names vector with gene symbols
deg_genes_list1 <- na.omit(deg_genes_list1) # Omits any NA values
deg_genes_list1 = sort(deg_genes_list1, decreasing = TRUE) # Sorts in decreasing order
deg_names1 <- names(deg_genes_list1) # Makes a vector of just the ranked gene names
deg_genes_list2 <- df2$stat # Creates a vector of the stat column from DESeq2 output
names(deg_genes_list2) <- df2$gene_name # Names vector with gene symbols
deg_genes_list2 <- na.omit(deg_genes_list2) # Omits any NA values
deg_genes_list2 = sort(deg_genes_list2, decreasing = TRUE) # Sorts in decreasing order
deg_names2 <- names(deg_genes_list2) # Makes a vector of just the ranked gene names
Retrieve the collections of interest from MSigDB:
# H = hallmark gene sets, C1 = positional gene sets, C2 = curated gene sets, C3 = motif gene sets, C4 = computational gene sets, C5 = GO gene sets, C6 = oncogenic signatures, C7 = immunologic signatures.
m_t2g_C5 <- msigdbr(species = "Homo sapiens", category = "C5") %>%
dplyr::select(gs_name, gene_symbol)
m_t2g_H <- msigdbr(species = "Homo sapiens", category = "H") %>%
dplyr::select(gs_name, gene_symbol)
m_t2g_all <- msigdbr(species = "Homo sapiens") %>%
dplyr::select(gs_name, gene_symbol)
gse1_msigdb_C5 <- GSEA(deg_genes_list1,
exponent = 1,
nPerm = 10000,
minGSSize = 1,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = m_t2g_C5,
verbose = TRUE,
by = "fgsea")
gse2_msigdb_C5 <- GSEA(deg_genes_list2,
exponent = 1,
nPerm = 10000,
minGSSize = 1,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = m_t2g_C5,
verbose = TRUE,
by = "fgsea")
gse1_msigdb_C5 %>%
dotplot(showCategory = 10, x = "NES", split=".sign") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 1, MSigDB Gene Sets\nC5 Collection (Gene Ontology)", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gse2_msigdb_C5 %>%
dotplot(showCategory = 10, x = "NES", split=".sign") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 2, MSigDB Gene Sets\nC5 Collection (Gene Ontology)", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gse1_msigdb_H <- GSEA(deg_genes_list1,
exponent = 1,
nPerm = 10000,
minGSSize = 1,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = m_t2g_H,
verbose = TRUE,
by = "fgsea")
gse2_msigdb_H <- GSEA(deg_genes_list2,
exponent = 1,
nPerm = 10000,
minGSSize = 1,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = m_t2g_H,
verbose = TRUE,
by = "fgsea")
gse1_msigdb_H %>%
dotplot(showCategory = 15, x = "NES", split=".sign") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 1, MSigDB Gene Sets\n Hallmark Collection", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gse2_msigdb_H %>%
dotplot(showCategory = 15, x = "NES", split=".sign") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 2, MSigDB Gene Sets\n Hallmark Collection", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gse1_msigdb_all <- GSEA(deg_genes_list1, # Use the sorted (ranked) list of gene symbols *and* stat ranking column
exponent = 1,
nPerm = 10000,
minGSSize = 1,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = m_t2g_all,
verbose = TRUE,
by = "fgsea")
gse2_msigdb_all <- GSEA(deg_genes_list2, # Use the sorted (ranked) list of gene symbols *and* stat ranking column
exponent = 1,
nPerm = 10000,
minGSSize = 1,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = m_t2g_all,
verbose = TRUE,
by = "fgsea")
gse1_msigdb_all %>%
dotplot(showCategory = 15, x = "NES", split=".sign") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) +
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 1, MSigDB Gene Sets\n All Collections", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gse2_msigdb_all %>%
dotplot(showCategory = 15, x = "NES", split=".sign") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) +
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 2, MSigDB Gene Sets\n All Collections", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
Export GSEA results for all significant results in all MSigDB categories as CSV files:
gse1_msigdb_df <- as.data.frame(gse1_msigdb_all)
write.csv(gse1_msigdb_df, file="Cohort_1/Output/Cohort1_gsea_MSigDBGeneSets.csv")
gse2_msigdb_df <- as.data.frame(gse2_msigdb_all)
write.csv(gse2_msigdb_df, file="Cohort_2/Output/Cohort2_gsea_MSigDBGeneSets.csv")
# import gmt
signaling_gmt <- read.gmt("CombinedSignalingPathways.gmt")
## Perform GSEA with order ranked gene list and TERM2GENE = the merged data frames of our annotated gene sets of interest.
### Cohort 1
gse_signaling1 <- GSEA(deg_genes_list1, # Use the sorted list of gene symbols *and* stat ranking column
exponent = 1,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = signaling_gmt,
verbose = TRUE,
by = "fgsea")
### Cohort 2
gse_signaling2 <- GSEA(deg_genes_list2, # Use the sorted list of gene symbols *and* stat ranking column
exponent = 1,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = signaling_gmt,
verbose = TRUE,
by = "fgsea")
gse_signaling1 %>%
dotplot(showCategory = 30, x = "NES") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 1, Common PTCL Signaling Pathways", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gseaplot2(gse_signaling1,
geneSetID = c("MTOR_UP.V1_UP",
"PTEN_DN.V1_UP",
"AKT_UP.V1_UP",
"HALLMARK_MTORC1_SIGNALING",
"WP_PI3KAKT_SIGNALING_PATHWAY"
),
title = "Effector Sets - Cohort 1",
color = c("#1F77B4", "#D62728"))
gse_signaling2 %>%
dotplot(showCategory = 30, x = "NES") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 2, Common PTCL Signaling Pathways", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gseaplot2(gse_signaling2,
geneSetID = c("MTOR_UP.V1_UP",
"PTEN_DN.V1_UP",
"AKT_UP.V1_UP",
"HALLMARK_MTORC1_SIGNALING",
"WP_PI3KAKT_SIGNALING_PATHWAY"
),
title = "Effector Sets - Cohort 2",
color = c("#1F77B4", "#D62728")
)
# import gmt
cellorigin_gmt <- read.gmt("thymusTCellAndTCRGeneSets.gmt")
### Cohort 1
gse_origin1 <- GSEA(deg_genes_list1, # Use the sorted list of gene symbols *and* stat ranking column
exponent = 1,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = cellorigin_gmt,
verbose = TRUE,
by = "fgsea")
write.csv(gse_origin1, file = "cellOrigin_gsea_cohort1.csv")
### Cohort 2
gse_origin2 <- GSEA(deg_genes_list2, # Use the sorted list of gene symbols *and* stat ranking column
exponent = 1,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = cellorigin_gmt,
verbose = TRUE,
by = "fgsea")
write.csv(gse_origin2, file = "cellOrigin_gsea_cohort2.csv")
gse_origin1 %>%
dotplot(showCategory = 15, x = "NES") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 1, Signatures of T-cell Development and Differentiation", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gseaplot2(gse_origin1,
geneSetID = c("LEE_EARLY_T_LYMPHOCYTE_UP",
"GSE139242_CD4_THYMOCYTES_VS_ADULT_BLOOD_TOP250UP",
"GSE139242_CD4_THYMOCYTES_VS_INFANT_BLOOD_TOP250UP",
"GSE1460_INTRATHYMIC_T_PROGENITOR_VS_NAIVE_CD4_TCELL_ADULT_BLOOD_UP",
"GSE1460_CD4_THYMOCYTE_VS_NAIVE_CD4_TCELL_ADULT_BLOOD_UP",
"BIOCARTA_TCR_PATHWAY",
"WP_T_CELL_RECEPTOR_AND_COSTIMULATORY_SIGNALING"),
title = "Effector Sets - Cohort 1",
color = c("red", "purple", "darkgreen", "orange2", "brown", "hotpink", "skyblue1")
)
gse_origin2 %>%
dotplot(showCategory = 30, x = "NES") +
scale_colour_viridis_c(name = "Adjusted\nP-value",
option="H") +
scale_y_discrete(labels = ~ str_wrap(gsub('_', ' ', .x), 40)) + # replaces underscores with spaces to allow wrapping of long gene set names
geom_vline(xintercept = 0, linetype=2) +
labs(title="GSEA, Cohort 2, Signatures of T-cell Development and Differentiation", y = "Gene Set") +
theme(plot.title = element_text(hjust = 0.5))
gseaplot2(gse_origin2,
geneSetID = c("LEE_EARLY_T_LYMPHOCYTE_UP",
"GSE139242_CD4_THYMOCYTES_VS_ADULT_BLOOD_TOP250UP",
"GSE139242_CD4_THYMOCYTES_VS_INFANT_BLOOD_TOP250UP",
"GSE1460_INTRATHYMIC_T_PROGENITOR_VS_NAIVE_CD4_TCELL_ADULT_BLOOD_UP",
"GSE1460_CD4_THYMOCYTE_VS_NAIVE_CD4_TCELL_ADULT_BLOOD_UP",
"BIOCARTA_TCR_PATHWAY",
"WP_T_CELL_RECEPTOR_AND_COSTIMULATORY_SIGNALING"),
title = "Effector Sets - Cohort 2",
color = c("red", "purple", "darkgreen", "orange2", "brown", "hotpink", "skyblue1")
)
# variance stabilized transformed count matrices
vsd_cohort1 <- read.csv("Cohort_1/Output/VST_NormalizedCounts.csv")
vsd_cohort2 <- read.csv("Cohort_2/Output/VST_NormalizedCounts.csv")
# metadata
metadata1 <- read.table(file = "Cohort_1/Input/metadata.txt", header = FALSE)
colnames(metadata1) <- c("avery_num", "sample_name", "phenotype")
rownames(metadata1) <- metadata1$sample_name
metadata1 <- dplyr::select(metadata1, c("phenotype"))
metadata1 <- metadata1[row.names(metadata1) != "CF21", , drop = FALSE]
metadata2 <- read.table(file = "Cohort_2/Input/metadata.txt", header = FALSE)
colnames(metadata2) <- c("avery_num", "sample_name", "phenotype")
rownames(metadata2) <- metadata2$sample_name
metadata2 <- dplyr::select(metadata2, c("phenotype"))
# gmt file
gmt <- getGmt("thymusTCellAndTCRGeneSets.gmt", geneIdType = SymbolIdentifier())
# prepare vst data for GSVA
vsd_cohort1 <- vsd_cohort1[!duplicated(vsd_cohort1$gene_name), ]
rownames(vsd_cohort1) <- vsd_cohort1$gene_name
vsd_cohort1 <- dplyr::select(vsd_cohort1, -c("X", "probe_id", "gene_name", "description"))
vsd_cohort1_matrix <- data.matrix(vsd_cohort1) # Convert to a matrix prior to analysis.
## keep only CD4 PTCL and CD4 CTRL samples from Cohort 1
keepGroups <- c("CD4_PTCL", "CD4_CTRL")
metadata1 <- metadata1 %>%
filter(phenotype %in% keepGroups)
keepList <- rownames(metadata1)
vsd_cohort1_matrix <- vsd_cohort1_matrix[, colnames(vsd_cohort1_matrix) %in% keepList]
vsd_cohort2 <- vsd_cohort2[!duplicated(vsd_cohort2$gene_name), ]
rownames(vsd_cohort2) <- vsd_cohort2$gene_name
vsd_cohort2 <- dplyr::select(vsd_cohort2, -c("X", "probe_id", "gene_name", "description"))
vsd_cohort2_matrix <- data.matrix(vsd_cohort2) # Convert to a matrix prior to analysis.
##### Cohort 1 #####
# set parameters for GSVA
gsva_param1 <- gsvaParam(
vsd_cohort1_matrix,
gmt,
kcdf = "Gaussian", # Compute Gaussian-distributed scores
minSize = 10, # Minimum gene set size
maxSize = 500, # Maximum gene set size
maxDiff = TRUE,
absRanking = FALSE
)
# perform GSVA
gsva_results1 <- gsva(
gsva_param1,
verbose=FALSE, # Don't print out the progress bar
)
## No annotation package name available in the input data object.
## Attempting to directly match identifiers in data to gene sets.
gsva_heatmap1 <- pheatmap::pheatmap(gsva_results1,
annotation_col = metadata1,
show_colnames = TRUE,
show_rownames = TRUE,
fontsize_row = 10,
cluster_cols = TRUE,
cluster_rows = TRUE,
cutree_rows = 3,
cutree_cols = 3,
main = "GSVA. Input: Vst transformed normalized DESeq2 counts, Cohort 1\nClustering: Ward, Distance: Euclidean",
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2") +
scale_fill_gradient(c('dodgerblue', 'black', "yellow"))# Shrink the pathway labels a tad
# set parameters for GSVA
gsva_param2 <- gsvaParam(
vsd_cohort2_matrix,
gmt,
kcdf = "Gaussian", # Compute Gaussian-distributed scores
minSize = 10, # Minimum gene set size
maxSize = 500, # Maximum gene set size
maxDiff = TRUE,
absRanking = FALSE
)
# perform GSVA
gsva_results2 <- gsva(
gsva_param2,
verbose=FALSE, # Don't print out the progress bar
)
## No annotation package name available in the input data object.
## Attempting to directly match identifiers in data to gene sets.
gsva_heatmap2 <- pheatmap::pheatmap(gsva_results2,
annotation_col = metadata2,
show_colnames = TRUE,
show_rownames = TRUE,
fontsize_row = 10,
cluster_cols = TRUE,
cluster_rows = TRUE,
cutree_rows = 3,
cutree_cols = 4,
main = "GSVA. Input: Vst transformed normalized DESeq2 counts, Cohort 2\nClustering: Ward, Distance: Euclidean",
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2") +
scale_fill_gradient(c('dodgerblue', 'black', "yellow"))# Shrink the pathway labels a tad
# Convert metadata file to annotated data frame
annot_metadata1 <- AnnotatedDataFrame(data=metadata1)
annot_metadata2 <- AnnotatedDataFrame(data=metadata2)
# Create ExpressionSet object from gsva results matrix and the new metadata. Colnames of gsva_results must match rownames of annot_metadata
gsva_exp1 <- ExpressionSet(assayData = gsva_results1, phenoData = annot_metadata1)
gsva_exp2 <- ExpressionSet(assayData = gsva_results2, phenoData = annot_metadata2)
# Perform differential expression anlaysis using limma package
mod1 <- model.matrix(~ factor(gsva_exp1$phenotype))
fit1 <- lmFit(gsva_results1, mod1)
fit1 <- eBayes(fit1)
res1 <- decideTests(fit1, p.value=0.05)
summary(res1)
## (Intercept) factor(gsva_exp1$phenotype)CD4_PTCL
## Down 19 15
## NotSig 10 10
## Up 13 17
# set factor levels for Cohort 2 since there are more than 2 groups
cohort2_groups <- factor(gsva_exp2$phenotype, levels = c("CD4_PTCL", "CD4_LN_CTRL", "CD4THYM_CTRL"))
mod2 <- model.matrix(~ cohort2_groups)
fit2 <- lmFit(gsva_results2, mod2)
fit2 <- eBayes(fit2)
res2 <- decideTests(fit2, p.value=0.05)
summary(res2)
## (Intercept) cohort2_groupsCD4_LN_CTRL cohort2_groupsCD4THYM_CTRL
## Down 26 14 7
## NotSig 16 12 22
## Up 0 16 13
# Export
fit1_df <- as.data.frame(fit1)
rownames(fit1_df) <- rownames(fit1)
write.csv(fit1_df, file="Cohort_1/Output/GSVAlimma_CD4PTCLvsCD4CTRL.csv")
fit2_df <- as.data.frame(fit2)
rownames(fit2_df) <- rownames(fit2)
write.csv(fit2_df, file="Cohort_2/Output/GSVAlimma_CD4PTCLvsCD4CTRL.csv")
# Cohort 1
tt1 <- topTable(fit1, coef=2, n=Inf) # Coefficient is the column of the comparison of interest.
DEpwys1 <- rownames(tt1)[tt1$adj.P.Val <= 0.05] # Extract only those gene sets differentially expressed with p > 0.05
DEpwys_es1 <- exprs(gsva_exp1[DEpwys1, ]) # Index the Expression Set for only those significantly differentially expressed gene sets.
# Cohort 2
## CD4_PTCL vs CD4_LN_CTRL
tt2 <- topTable(fit2, coef="cohort2_groupsCD4_LN_CTRL", n=Inf)
DEpwys2 <- rownames(tt2)[tt2$adj.P.Val <= 0.05]
DEpwys_es2 <- exprs(gsva_exp2[DEpwys2, ])
## CD4_PTCL vs CD4_THYM_CTRL
tt3 <- topTable(fit2, coef="cohort2_groupsCD4THYM_CTRL", n=Inf)
DEpwys3 <- rownames(tt3)[tt3$adj.P.Val <= 0.05]
DEpwys_es3 <- exprs(gsva_exp2[DEpwys3, ])
# Export
write.csv(tt1, file="Cohort_1/Output/GSVAlimma_CD4PTCLvCD4CTRL_toptable.csv")
write.csv(tt2, file="Cohort_2/Output/GSVAlimma_CD4PTCLvCD4_LN_CTRL_toptable.csv")
write.csv(tt3, file="Cohort_2/Output/GSVAlimma_CD4PTCLvCD4_THYM_CTRL_toptable.csv")
# Draw heatmaps
## Cohort 1
### Set phenotype labels for display on heatmap
colorLegend <- c("mediumaquamarine", "orange1")
names(colorLegend) <- c("CD4_PTCL", "CD4_CTRL")
sample.color.map <- colorLegend[pData(gsva_exp1)[, "phenotype"]]
names(sample.color.map) <- colnames(DEpwys_es1)
### Reset graphics settings (this heatmap is large and could throw margin size errors)
#graphics.off()
### Perform hierarchical clustering of GSVA scores
sampleClustering1 <- hclust(as.dist(1-cor(DEpwys_es1, method="spearman")),
method="ward.D2")
geneSetClustering1 <- hclust(as.dist(1-cor(t(DEpwys_es1), method="pearson")),
method="ward.D2")
### Plot
colors <- colorRampPalette(c("dodgerblue4", "aliceblue", "firebrick1"))(256)
par(mar=c(1,10,1,10))
heatmap.2(DEpwys_es1,
scale="row",
col=colors,
ColSideColors=sample.color.map,
xlab="",
ylab="",
main="Gene Set Variation Analysis, Cohort 1",
trace="none",
keysize=1,
density.info="none",
margins=c(8,35),
labRow=rownames(DEpwys_es1),
labCol=sampleClustering1$labels,
Colv=as.dendrogram(sampleClustering1),
Rowv=as.dendrogram(geneSetClustering1))
legend("topright", names(colorLegend), fill=colorLegend, inset=0.1, bty="n")
## Cohort 2
### Set phenotype labels for display on heatmap
colorLegend <- c("mediumaquamarine", "orange1", "steelblue")
names(colorLegend) <- c("CD4_PTCL", "CD4_LN_CTRL", "CD4THYM_CTRL")
sample.color.map <- colorLegend[pData(gsva_exp2)[, "phenotype"]]
names(sample.color.map) <- colnames(DEpwys_es2)
### Reset graphics settings (this heatmap is large and could throw margin size errors)
#graphics.off()
### Perform hierarchical clustering of GSVA scores
sampleClustering2 <- hclust(as.dist(1-cor(DEpwys_es2, method="spearman")),
method="ward.D2")
geneSetClustering2 <- hclust(as.dist(1-cor(t(DEpwys_es2), method="pearson")),
method="ward.D2")
### Plot
colors <- colorRampPalette(c("dodgerblue4", "aliceblue", "firebrick1"))(256)
par(oma = c(0, 0, 0, 0)) # Reduce outer margins
par(mar = c(4, 4, 2, 1)) # Fine-tune inner margins
heatmap.2(DEpwys_es2,
scale="row",
col=colors,
ColSideColors=sample.color.map,
xlab="",
ylab="",
main="Gene Set Variation Analysis, Cohort 2",
trace="none",
keysize=0.6,
density.info="none",
margins=c(3,40),
labRow=rownames(DEpwys_es2),
labCol=sampleClustering2$labels,
Colv=as.dendrogram(sampleClustering2),
Rowv=as.dendrogram(geneSetClustering2))
legend("topright", names(colorLegend), fill=colorLegend, inset=0.05, bty="n")
sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Denver
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gplots_3.2.0 limma_3.60.6
## [3] GSVA_1.52.3 knitr_1.49
## [5] RColorBrewer_1.1-3 GSEABase_1.66.0
## [7] graph_1.82.0 annotate_1.82.0
## [9] XML_3.99-0.18 AnnotationDbi_1.66.0
## [11] lubridate_1.9.4 forcats_1.0.0
## [13] stringr_1.5.1 purrr_1.0.2
## [15] readr_2.1.5 tidyr_1.3.1
## [17] tibble_3.2.1 tidyverse_2.0.0
## [19] msigdbr_7.5.1 ggplot2_3.5.1
## [21] enrichplot_1.24.4 pathview_1.44.0
## [23] dplyr_1.1.4 clusterProfiler_4.12.6
## [25] DESeq2_1.44.0 SummarizedExperiment_1.34.0
## [27] Biobase_2.64.0 MatrixGenerics_1.16.0
## [29] matrixStats_1.5.0 GenomicRanges_1.56.2
## [31] GenomeInfoDb_1.40.1 IRanges_2.38.1
## [33] S4Vectors_0.42.1 BiocGenerics_0.50.0
## [35] BiocManager_1.30.25
##
## loaded via a namespace (and not attached):
## [1] splines_4.4.0 bitops_1.0-9
## [3] ggplotify_0.1.2 R.oo_1.27.0
## [5] polyclip_1.10-7 lifecycle_1.0.4
## [7] httr2_1.1.0 lattice_0.22-6
## [9] MASS_7.3-60.2 magrittr_2.0.3
## [11] sass_0.4.9 rmarkdown_2.29
## [13] jquerylib_0.1.4 yaml_2.3.10
## [15] cowplot_1.1.3 DBI_1.2.3
## [17] abind_1.4-8 zlibbioc_1.50.0
## [19] R.utils_2.12.3 ggraph_2.2.1
## [21] RCurl_1.98-1.16 yulab.utils_0.2.0
## [23] tweenr_2.0.3 rappdirs_0.3.3
## [25] GenomeInfoDbData_1.2.12 ggrepel_0.9.6
## [27] irlba_2.3.5.1 tidytree_0.4.6
## [29] pheatmap_1.0.12 codetools_0.2-20
## [31] DelayedArray_0.30.1 DOSE_3.30.5
## [33] ggforce_0.4.2 tidyselect_1.2.1
## [35] aplot_0.2.4 UCSC.utils_1.0.0
## [37] farver_2.1.2 ScaledMatrix_1.12.0
## [39] viridis_0.6.5 jsonlite_1.8.9
## [41] tidygraph_1.3.1 tools_4.4.0
## [43] treeio_1.28.0 snow_0.4-4
## [45] Rcpp_1.0.14 glue_1.8.0
## [47] gridExtra_2.3 SparseArray_1.4.8
## [49] xfun_0.49 qvalue_2.36.0
## [51] HDF5Array_1.32.1 withr_3.0.2
## [53] fastmap_1.2.0 rhdf5filters_1.16.0
## [55] caTools_1.18.3 digest_0.6.35
## [57] rsvd_1.0.5 timechange_0.3.0
## [59] R6_2.5.1 gridGraphics_0.5-1
## [61] colorspace_2.1-1 GO.db_3.19.1
## [63] gtools_3.9.5 RSQLite_2.3.9
## [65] R.methodsS3_1.8.2 generics_0.1.3
## [67] data.table_1.16.4 graphlayouts_1.2.2
## [69] httr_1.4.7 S4Arrays_1.4.1
## [71] scatterpie_0.2.4 pkgconfig_2.0.3
## [73] gtable_0.3.6 blob_1.2.4
## [75] SingleCellExperiment_1.26.0 XVector_0.44.0
## [77] shadowtext_0.1.4 htmltools_0.5.8.1
## [79] fgsea_1.30.0 scales_1.3.0
## [81] png_0.1-8 SpatialExperiment_1.14.0
## [83] ggfun_0.1.8 rstudioapi_0.17.1
## [85] tzdb_0.4.0 reshape2_1.4.4
## [87] rjson_0.2.23 nlme_3.1-164
## [89] org.Hs.eg.db_3.19.1 cachem_1.1.0
## [91] rhdf5_2.48.0 KernSmooth_2.23-22
## [93] parallel_4.4.0 pillar_1.10.1
## [95] grid_4.4.0 vctrs_0.6.5
## [97] BiocSingular_1.20.0 beachmat_2.20.0
## [99] xtable_1.8-4 Rgraphviz_2.48.0
## [101] evaluate_1.0.3 KEGGgraph_1.64.0
## [103] magick_2.8.5 cli_3.6.2
## [105] locfit_1.5-9.10 compiler_4.4.0
## [107] rlang_1.1.3 crayon_1.5.3
## [109] labeling_0.4.3 plyr_1.8.9
## [111] fs_1.6.5 stringi_1.8.4
## [113] viridisLite_0.4.2 BiocParallel_1.38.0
## [115] babelgene_22.9 munsell_0.5.1
## [117] Biostrings_2.72.1 lazyeval_0.2.2
## [119] GOSemSim_2.30.2 Matrix_1.7-0
## [121] hms_1.1.3 patchwork_1.3.0
## [123] sparseMatrixStats_1.16.0 bit64_4.6.0-1
## [125] Rhdf5lib_1.26.0 KEGGREST_1.44.1
## [127] statmod_1.5.0 igraph_2.1.4
## [129] memoise_2.0.1 bslib_0.8.0
## [131] ggtree_3.12.0 fastmatch_1.1-6
## [133] bit_4.5.0.1 ape_5.8-1
## [135] gson_0.1.0
citation()
## To cite R in publications use:
##
## R Core Team (2024). _R: A Language and Environment for Statistical
## Computing_. R Foundation for Statistical Computing, Vienna, Austria.
## <https://www.R-project.org/>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2024},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.