# Load the required library
options(warn=-1) #suppress warnings
sw = suppressPackageStartupMessages #suppress startup message shhhhhhhhhh....
sw(library(dplyr))
sw(library(GSVA))
We create a matrix named genetable with gene expression
values and set column and row names accordingly.
# Create a matrix named 'genetable' with gene expression values and set column and row names
genetable <- matrix(c(5.5, 6.3, 5.4, 7.3, 5.9, 6.8, 11.2, 6.4, 8.2, 7.5), ncol=1)
colnames(genetable) <- paste0("Sam", c(1:ncol(genetable)))
rownames(genetable) <- paste0("Gene", c(1:nrow(genetable)))
genetable # Display the genetable matrix
## Sam1
## Gene1 5.5
## Gene2 6.3
## Gene3 5.4
## Gene4 7.3
## Gene5 5.9
## Gene6 6.8
## Gene7 11.2
## Gene8 6.4
## Gene9 8.2
## Gene10 7.5
We define a list named genelist containing two gene
sets.
# Define a list named 'genelist' containing two gene sets
genelist <- list(c("Gene1", "Gene3", "Gene5", "Gene7"),
c("Gene2", "Gene4", "Gene8"))
names(genelist) <- c("GenesetA", "GenesetB")
genelist # Display the genelist
## $GenesetA
## [1] "Gene1" "Gene3" "Gene5" "Gene7"
##
## $GenesetB
## [1] "Gene2" "Gene4" "Gene8"
We use the gsva function to perform single-sample Gene
Set Enrichment Analysis (ssGSEA).
gsva(expr = genetable,
gset.idx.list = genelist,
mx.diff = FALSE, method = "ssgsea",
verbose = TRUE,
ssgsea.norm = FALSE,
min.sz = 1,
tau = 0.25)
## Estimating ssGSEA scores for 2 gene sets.
## [1] "Calculating ranks..."
## [1] "Calculating absolute values from ranks..."
## | | | 0% | |======================================================================| 100%
## Sam1
## GenesetA -1.747631
## GenesetB -0.166031
Here the enrichment score for GenesetA is :
-1.747631. Let’s see how is it computed step by step:
We rank the gene expression values and sort them in decreasing order.
# Rank the gene expression values and sort them in decreasing order
S <- genetable[, 1] %>% rank() %>% sort(decreasing = TRUE)
S # Display the sorted ranks
## Gene7 Gene9 Gene10 Gene4 Gene6 Gene8 Gene2 Gene5 Gene1 Gene3
## 10 9 8 7 6 5 4 3 2 1
We determine which genes in the sorted ranks are in
GenesetA.
# Determine which genes in the sorted ranks are in 'GenesetA'
G <- names(S) %in% genelist[[1]]
G # Display the logical vector indicating presence in 'GenesetA'
## [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
We find the indices of the genes that are in
GenesetA.
# Display GenesetA
genelist$GenesetA
## [1] "Gene1" "Gene3" "Gene5" "Gene7"
# Find the indices of the genes that are in 'GenesetA'
ind_G <- which(G)
ind_G # Display the indices of genes in 'GenesetA'
## [1] 1 8 9 10
We initialize a vector PG of zeros and update it with
weighted ranks for genes in GenesetA.
# Initialize a vector 'PG' of zeros with the same length as 'S'
PG <- rep(0, length.out = length(S))
PG # Display the initialized PG vector
## [1] 0 0 0 0 0 0 0 0 0 0
# Assign the weighted ranks to the positions of genes in 'GenesetA' in 'PG'
PG[ind_G] <- abs(S[ind_G]) ^ 0.25
PG # Display the updated PG vector
## [1] 1.778279 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.316074
## [9] 1.189207 1.000000
# Normalize the 'PG' vector by dividing by the sum of its values
PG <- PG / sum(PG)
PG # Display the normalized PG vector
## [1] 0.3365684 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## [8] 0.2490885 0.2250768 0.1892663
# Compute the cumulative sum of the 'PG' vector
PG <- cumsum(PG)
PG # Display the cumulative sum of PG
## [1] 0.3365684 0.3365684 0.3365684 0.3365684 0.3365684 0.3365684 0.3365684
## [8] 0.5856569 0.8107337 1.0000000
We initialize a vector PNG of zeros and update it for
genes not in GenesetA.
# Find the indices of the genes that are not in 'GenesetA'
ind_NG <- which(!G)
ind_NG # Display the indices of genes not in 'GenesetA'
## [1] 2 3 4 5 6 7
# Initialize a vector 'PNG' of zeros with the same length as 'S'
PNG <- rep(0, length.out = length(S))
PNG # Display the initialized PNG vector
## [1] 0 0 0 0 0 0 0 0 0 0
# Assign the value 1 to the positions of genes not in 'GenesetA' in 'PNG'
PNG[ind_NG] <- 1
PNG # Display the updated PNG vector
## [1] 0 1 1 1 1 1 1 0 0 0
# Normalize the 'PNG' vector by dividing by the sum of its values
PNG <- PNG / sum(PNG)
PNG # Display the normalized PNG vector
## [1] 0.0000000 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## [8] 0.0000000 0.0000000 0.0000000
# Compute the cumulative sum of the 'PNG' vector
PNG <- cumsum(PNG)
PNG # Display the cumulative sum of PNG
## [1] 0.0000000 0.1666667 0.3333333 0.5000000 0.6666667 0.8333333 1.0000000
## [8] 1.0000000 1.0000000 1.0000000
Finally, we calculate the difference between the sums of
PG and PNG.
# Calculate the difference between the sums of 'PG' and 'PNG'
sum(PG) - sum(PNG)
## [1] -1.747631
This concludes the step-by-step analysis for GenesetA
using ssGSEA.