Calculation of P-val

# read in RDS file
# expdf is a dataframe that contains genes and their average absolute expression per patient along with l2fc value (mEGFR vs. mKRAS)
expdf <- readRDS("expdfForAnalysis.rds")

# initialize empty "p_values" as vector to store p val
p_values <- vector("list", nrow(expdf))

# convert as matrix
tetdf <- as.matrix(expdf)

# perform a Wilcoxon rank sum test on each gene, columns 1 through 8 represent mEGFR group while columns 9 through 20 represent mKRAS group
# utilize for loop to iterate through every row in expdf matrix and perform function on each row
# two sided test to determine difference from 0 (H0: mEGFR - mKRAS = 0, Ha: mEGFR - mKRAS /= 0)
for(i in seq_along(1: nrow(tetdf))){
  p_values[i] = wilcox.test(tetdf[i,1:8],tetdf[i,9:20], alternative = "two.sided", exact = FALSE)$p.value
}

# create as DF
p_values = data.frame(p_values = sapply(p_values, c))

# add column onto overall DF
expdf <- cbind(expdf, p_values)

# perform BH correction on each p-val
expdf$adj_p_values <- p.adjust(expdf$p_values, method = "BH", n = length(expdf$p_values))

# create markerwrapper DF that serves as input into volcano plot
# create new matrix with only genes, p-values, FDR adjusted p-values, and l2fc
# EnhancedVolcano must take this DF in this specific format, normally wraps around FindMarkers() function
markerwrapper <- cbind(rownames(expdf), expdf$p_values, expdf$adj_p_values, expdf$EKl2fc)
# as DF
markerwrapper <- as.data.frame(markerwrapper)
# edit col names
colnames(markerwrapper) <- c("Feature", "p_val", "adj_p_val", "avg_l2fc")
# make rownames as gene
rownames(markerwrapper) <- markerwrapper$Feature
# convert as numeric variables
markerwrapper$p_val <- as.numeric(markerwrapper$p_val)
markerwrapper$adj_p_val <- as.numeric(markerwrapper$adj_p_val)
markerwrapper$avg_l2fc <- as.numeric(markerwrapper$avg_l2fc)

# generate volcanoplot
# edit max.overlaps to decrease or increase # of feature labels
# note how there's an "artificial ceiling" of p-values
EnhancedVolcano(markerwrapper,
                rownames(markerwrapper),
                x ="avg_l2fc", 
                y ="adj_p_val",
    labCol = 'black', pCutoff = 0.05,
    labFace = 'bold', max.overlaps = 40, drawConnectors = TRUE)
## Warning: ggrepel: 1340 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps