# Also do Session > Set Working Directory > Choose Directory
knitr::opts_knit$set(root.dir = ".")
Sys.setenv(RSTUDIO_PANDOC="/usr/lib/rstudio-server/bin/pandoc")
library(rmarkdown)
library(BiocParallel) # SnowParam()
library(dplyr) # left_join()
library(edgeR) # DGEList()
library(limma) # plotMDS()
library(ggrepel) # geom_text_repel()
library(ggplot2) # ggplot()
library(gplots) # heatmap.2()
library(grDevices) # colorRampPalette()
require(philentropy) # JSD()
library(rtracklayer) # import()
library(stringr) # str_match()
require(variancePartition) # fitExtractVarPartModel()
library(reshape) # melt()
library(Glimma)
library(plyr)
library(corrplot)
library("ggpubr")
library(tidyverse)
library(caret)
library(glmnet)
control <- "CONTROL"
condition <- "LBD"
control_color <- "gray29"
condition_color <- "red"
myContrasts <- c("LBD - CONTROL")
tool = c("star")
pathToRef = c("/research/labs/neurology/fryer/projects/references/human/")
pathToRawData = c("/research/labs/neurology/fryer/projects/LBD_CWOW/")
# read in metadata
metadata <- read.delim(paste0(pathToRawData, "metadata.tsv"),
header = TRUE,
sep = "\t")
RIN <- read.delim(paste0(pathToRawData, "RIN.tsv"),
header = TRUE,
sep = "\t")
RIN$NPID <- RIN$Sample
metadata <- merge(metadata, RIN, by = "NPID")
# read in counts data
counts <- read.delim(
paste0("../../featureCounts/LBD.counts"),
header = TRUE,
sep = "\t", check.names=FALSE)
# get the sampleID from the counts file to match with metadata
sampleIDs <- colnames(counts)
samples <- as.data.frame(sampleIDs)
# split the sampleID by _
sample_count_id <- as.data.frame(str_split_fixed(samples$sampleID, "_", 2))
sample_count_id$counts_id <- samples$sampleIDs
# rename columns
names(sample_count_id)[names(sample_count_id) == 'V1'] <- 'NPID'
names(sample_count_id)[names(sample_count_id) == 'V2'] <- 'flow_lane'
# merge sample_count_id and metadata files
# this way metadata contains the sampleID to match the counts table
counts_metadata <- merge(metadata, sample_count_id, by = "NPID")
# gene information
counts.geneid <- read.delim(
"../../featureCounts/gene_info.txt",
header = TRUE,
sep = "\t"
)
counts.geneid <- as.data.frame(counts.geneid$Geneid)
colnames(counts.geneid) <- "gene_id"
# add gene_name as row names to counts file
rownames(counts) <- counts.geneid$gene_id
# read in annotation file
gtf.file <- paste0(pathToRef, "gencode.v38.annotation.gtf")
genes.gtf <- rtracklayer::import(gtf.file)
genes.gtf <- as.data.frame(genes.gtf)
genes.gtf <- genes.gtf[genes.gtf$type == "gene",]
table(genes.gtf$gene_type)
##
## IG_C_gene IG_C_pseudogene
## 14 9
## IG_D_gene IG_J_gene
## 37 18
## IG_J_pseudogene IG_pseudogene
## 3 1
## IG_V_gene IG_V_pseudogene
## 145 187
## lncRNA miRNA
## 16888 1879
## misc_RNA Mt_rRNA
## 2212 2
## Mt_tRNA polymorphic_pseudogene
## 22 49
## processed_pseudogene protein_coding
## 10163 19955
## pseudogene ribozyme
## 15 8
## rRNA rRNA_pseudogene
## 47 497
## scaRNA scRNA
## 49 1
## snoRNA snRNA
## 943 1901
## sRNA TEC
## 5 1056
## TR_C_gene TR_D_gene
## 6 4
## TR_J_gene TR_J_pseudogene
## 79 4
## TR_V_gene TR_V_pseudogene
## 106 33
## transcribed_processed_pseudogene transcribed_unitary_pseudogene
## 502 143
## transcribed_unprocessed_pseudogene translated_processed_pseudogene
## 950 2
## translated_unprocessed_pseudogene unitary_pseudogene
## 1 98
## unprocessed_pseudogene vault_RNA
## 2614 1
#counts.geneid
#names(counts.geneid)[1] <- 'gene_name'
joined.df <- join(counts.geneid, genes.gtf, type = "inner")
## Joining by: gene_id
# check columns and rows match up between files
all.equal(rownames(counts), counts.geneid$gene_id)
## [1] TRUE
all.equal(rownames(counts), genes.gtf$gene_id)
## [1] TRUE
# reorder counts to be in the same order as metadata table
counts <- counts[, counts_metadata$counts_id]
all.equal(colnames(counts), (counts_metadata$counts_id))
## [1] TRUE
# create object
dge <- DGEList(counts = counts,
samples = counts_metadata,
genes = genes.gtf)
table(dge$samples$TYPE)
##
## CONTROL CONTROL - AD CONTROL - PA LBD
## 88 56 48 450
# remove AD and PA controls
remove <- vector()
for (i in 1:nrow(dge$samples)) {
if (dge$samples$TYPE[i] == "CONTROL - AD" | dge$samples$TYPE[i] == "CONTROL - PA") {
remove <- c(remove, i)
}
}
dge <- dge[,-remove, keep.lib.sizes = FALSE]
table(dge$samples$TYPE)
##
## CONTROL LBD
## 88 450
dim(dge)
## [1] 60649 538
removeMT <- dge$genes$seqnames != "chrM" # true when NOT MT
dge <- dge[removeMT,,keep.lib.sizes = FALSE]
dim(dge)
## [1] 60612 538
These functions with help simultaneously save plots as a png, pdf, and tiff file.
saveToPDF <- function(...) {
d = dev.copy(pdf,...)
dev.off(d)
}
saveToPNG <- function(...) {
d = dev.copy(png,...)
dev.off(d)
}
# set colors and get data
table(dge$samples$TYPE)
##
## CONTROL LBD
## 88 450
dge$samples$TYPE <- as.factor(dge$samples$TYPE)
group_colors <- c("grey","red")[dge$samples$TYPE]
lcpm <- edgeR::cpm(dge$counts, log = TRUE)
cpm <- edgeR::cpm(dge$counts, log = FALSE)
par(bg = 'white')
# plot MDS
plotMDS(
lcpm,
top = 100,
labels = dge$samples$Sex,
cex = 2,
dim.plot = c(1,2),
plot = TRUE,
col = group_colors, gene.selection = "common"
)
title(expression('Top 100 Genes (Log'[2]~'CPM)'))
path <- paste0("../../results/", tool, "/MDS/", condition,"_gene_MDS_techreps_label_sex")
saveToPDF(paste0(path, ".pdf"), width = 5.2, height = 5.2)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 4, height = 4, unit = "in", res = 300)
## png
## 2
genes_and_counts <- cbind(dge$genes$gene_name, dge$counts)
genes_and_counts <- as.data.frame(genes_and_counts)
names(genes_and_counts)[names(genes_and_counts) == "V1"] <- "Geneid"
rownames(genes_and_counts) <- NULL
sex_genes <- c("XIST", "EIF1AY", "KDM5D", "UTY", "DDX3Y", "RPS4Y1")
sex_genes_counts <- subset(genes_and_counts, Geneid %in% sex_genes)
rownames(sex_genes_counts) <- sex_genes_counts$Geneid
sex_genes_counts$Geneid <- NULL
sex_gene_df <- as.data.frame(t(sex_genes_counts))
sex_gene_df$counts_id <- rownames(sex_gene_df)
rownames(sex_gene_df) <- NULL
meta_short <- counts_metadata[,c("counts_id","Sex")]
sex_gene_meta <- merge(sex_gene_df, meta_short, by = "counts_id")
cols.num <- c("XIST","EIF1AY", "KDM5D", "UTY", "DDX3Y", "RPS4Y1")
sex_gene_meta[cols.num] <- sapply(sex_gene_meta[cols.num], as.integer)
sex_gene_meta <- sex_gene_meta[,c(2,3,4,5,6,7,8,1)]
# replace M with male and F with female
sex_gene_meta <- sex_gene_meta %>%
mutate(
Sex = ifelse(Sex %in% c("M"), "male", "female")
)
training_model_data <- "for_rna_sex_check.tsv"
# Build the model using the GTEx data
# Load the data and remove NAs
train_data <- read.csv(training_model_data, sep="\t")
# Split the data into training and test set
set.seed(123)
training.samples <- train_data$sex %>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- train_data[training.samples, ]
test.data <- train_data[-training.samples, ]
# Dummy code categorical predictor variables
x <- model.matrix(sex~., train.data)[,-1]
# Convert the outcome (class) to a numerical variable
y <- ifelse(train.data$sex == "female", 1, 0)
cv.lasso <- cv.glmnet(x, y, alpha = 1, family = "binomial")
plot(cv.lasso)
cv.lasso$lambda.min
## [1] 3.452375e-05
coef(cv.lasso, cv.lasso$lambda.min)
## 7 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 4.0259095684
## XIST 0.0008731318
## EIF1AY -0.0064198554
## KDM5D .
## UTY -0.0063930288
## DDX3Y .
## RPS4Y1 -0.0028940802
coef(cv.lasso, cv.lasso$lambda.1se)
## 7 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 3.944517679
## XIST 0.000758968
## EIF1AY -0.006134509
## KDM5D .
## UTY -0.005928303
## DDX3Y .
## RPS4Y1 -0.002692325
# Final model with lambda.min
lasso.model <- glmnet(x, y, alpha = 1, family = "binomial",
lambda = cv.lasso$lambda.min)
# Make prediction on test data
x.test <- model.matrix(sex ~., test.data)[,-1]
probabilities <- lasso.model %>% predict(newx = x.test)
predicted.classes <- ifelse(probabilities > 0.5, "female", "male")
# Model accuracy
observed.classes <- test.data$sex
mean(predicted.classes == observed.classes)
## [1] 1
# ----------------------
# Run on experiment data
# ----------------------
counts_ids <- sex_gene_meta$counts_id
sex_gene_meta$counts_id <- NULL
test.experiment <- model.matrix(Sex ~., sex_gene_meta)[,-1]
probabilities <- lasso.model %>% predict(newx = test.experiment)
predicted.classes <- ifelse(probabilities > 4, "female", "male")
# Model accuracy
observed.classes <- sex_gene_meta$Sex
mean(predicted.classes == observed.classes)
## [1] 0.9962825
df <- cbind(as.data.frame(predicted.classes)$s0, sex_gene_meta$Sex)
df <- as.data.frame(df)
# Rename column where names is "Sepal.Length"
names(df)[names(df) == "V1"] <- "reported"
names(df)[names(df) == "V2"] <- "observed"
# add back in sample IDs
df$counts_id <- counts_ids
# what doesn't match between reported and observed?
which(df[,1]!=df[,2])
## [1] 37 38
# output results
write.table(df, "sex_check_RNA.txt", sep = "\t", row.names = FALSE, quote = FALSE)
genes_and_counts <- cbind(dge$genes$gene_name, lcpm)
genes_and_counts <- as.data.frame(genes_and_counts)
names(genes_and_counts)[names(genes_and_counts) == "V1"] <- "Geneid"
rownames(genes_and_counts)<-NULL
genes_counts <- melt(genes_and_counts, id=c("Geneid"))
names(genes_counts)[names(genes_counts) == "variable"] <- "counts_id"
df <- cbind(counts_metadata$counts_id, counts_metadata$Sex)
df <- as.data.frame(df)
names(df)[names(df) == "V1"] <- "counts_id"
names(df)[names(df) == "V2"] <- "Sex"
data <- merge(genes_counts, df, by = "counts_id")
sexGenes <- c("DDX3X, DDX3Y")
SelectGenes_counts <-
subset(
data,
Geneid %in% c(
"DDX3X",
"DDX3Y",
"ZFX",
"ZFY",
"USP9X",
"USP9Y",
"KDM6A",
"UTY",
"PCDH11X",
"PCDH11Y",
"XIST",
"SRY"
)
)
SelectGenes_counts[, "geneComb"] <- NA
SelectGenes_counts[, "group"] <- NA
SelectGenes_counts$geneComb <-
ifelse(
SelectGenes_counts$Geneid == "DDX3X",
"DDX3X:DDX3Y",
ifelse(
SelectGenes_counts$Geneid == "DDX3Y",
"DDX3X:DDX3Y",
ifelse(
SelectGenes_counts$Geneid == "ZFX",
"ZFX:ZFY",
ifelse(
SelectGenes_counts$Geneid == "ZFY",
"ZFX:ZFY",
ifelse(
SelectGenes_counts$Geneid == "USP9X",
"USP9X:USP9Y",
ifelse(
SelectGenes_counts$Geneid == "USP9Y",
"USP9X:USP9Y",
ifelse(
SelectGenes_counts$Geneid == "KDM6A",
"UTX:UTY",
ifelse(
SelectGenes_counts$Geneid == "UTY",
"UTX:UTY",
ifelse(
SelectGenes_counts$Geneid == "PCDH11X",
"PCDH11X:PCDH11Y",
ifelse(
SelectGenes_counts$Geneid == "PCDH11Y",
"PCDH11X:PCDH11Y",
ifelse(
SelectGenes_counts$Geneid == "XIST",
"XIST",
ifelse(SelectGenes_counts$Geneid == "SRY", "SRY", "NA")
)
)
)
)
)
)
)
)
)
)
)
SelectGenes_counts$group <-
ifelse(
SelectGenes_counts$geneComb == "DDX3X:DDX3Y",
1,
ifelse(
SelectGenes_counts$geneComb == "ZFX:ZFY",
4,
ifelse(
SelectGenes_counts$geneComb == "USP9X:USP9Y",
3,
ifelse(
SelectGenes_counts$geneComb == "UTX:UTY",
5,
ifelse(
SelectGenes_counts$geneComb == "PCDH11X:PCDH11Y",
2,
ifelse(
SelectGenes_counts$geneComb == "XIST",
6,
ifelse(SelectGenes_counts$geneComb == "SRY", 7, "NA")
)
)
)
)
)
)
# Plot
data <- SelectGenes_counts
# identify outliers
data_XIST <- subset(data, Geneid == "XIST")
lower_bound <- quantile(as.numeric(data_XIST$value), 0.025)
not_male <- subset(data, counts_id == "5510_FCH5MYFDMXY_L1" | counts_id == "5510_FCH5MYFDMXY_L2" )
not_male$Sex <- c()
leg_lab <- "reported sex"
cbPaletteJITTER = c("darkorange", "blue")
geneticSEXgenes_plot <- ggplot(data, aes(x = Geneid, y = value)) +
geom_jitter(aes(color = Sex, shape = Sex),
width = 0.25,
size = 2.0) +
scale_color_manual(leg_lab, values = cbPaletteJITTER) + # Jitter color palette
scale_shape_manual(leg_lab, values = c(19, 15)) +
labs(x = "", y = "", title = "") +
facet_grid(
. ~ group + geneComb,
switch = "x",
# Moves the labels from the top to the bottom
#labeller = label_both, # Adds the labels to the year and X variables
scales = "free_x",
space = "free_x"
) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.border = element_rect(
colour = "black",
fill = NA,
size = 1
)
) +
xlab("") + ylab("lcpm")
geneticSEXgenes_plot + theme(
panel.background = element_rect(
fill = "white",
colour = "white",
size = 0.5,
linetype = "solid"
),
panel.grid.major = element_line(
size = 0.5,
linetype = 'solid',
colour = "white"
),
panel.grid.minor = element_line(
size = 0.25,
linetype = 'solid',
colour = "grey"
),
panel.border = element_rect(
colour = "black",
fill = NA,
size = 1
),
axis.text.x = element_text(face = "italic")
)
path <- paste0("../../results/", tool, "/sex_check/LBD_gene_raw_sex_check")
saveToPDF(paste0(path, ".pdf"), width = 10.5, height = 6)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 10.5, height = 4, unit = "in", res = 300)
## png
## 2
# first filter by expression and normalize the data
keep.expr <- filterByExpr(dge, group = dge$samples$TYPE)
dim(dge)
## [1] 60612 538
dge.filtered.reps <- dge[keep.expr, , keep.lib.sizes = FALSE]
dim(dge.filtered.reps)
## [1] 17982 538
table(dge.filtered.reps$genes$gene_type)
##
## IG_C_gene lncRNA
## 1 2682
## miRNA misc_RNA
## 5 12
## polymorphic_pseudogene processed_pseudogene
## 5 196
## protein_coding scaRNA
## 14487 1
## scRNA snoRNA
## 1 8
## snRNA TEC
## 8 142
## TR_C_gene transcribed_processed_pseudogene
## 4 79
## transcribed_unitary_pseudogene transcribed_unprocessed_pseudogene
## 41 255
## unitary_pseudogene unprocessed_pseudogene
## 2 53
# Now, normalization by the method of trimmed mean of M-values (TMM)
dge.filtered.reps.norm <- calcNormFactors(dge.filtered.reps, method = "TMM")
# norm factor summary
summary(dge.filtered.reps.norm$samples$norm.factors)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6138 0.9424 1.0317 1.0060 1.0799 1.2114
log2cpm.norm.reps <- edgeR::cpm(dge.filtered.reps.norm, log = T)
nsamples <- ncol(dge.filtered.reps.norm)
boxplot(log2cpm.norm.reps,
main="Filtered normalized lcpm data with replicates",
xlab="RIN",
ylab=expression('Counts per gene (Log'[2]~'CPM)'),
axes=FALSE)
axis(2)
axis(1,at=1:nsamples,labels=(dge.filtered.reps.norm$samples$RIN),las=2,cex.axis=0.8)
path <- paste0("../../results/", tool, "/boxplot/LBD_gene_boxplot_with_reps_with_RIN")
saveToPDF(paste0(path, ".pdf"), width = 35, height = 6)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 35, height = 6, unit = "in", res = 300)
## png
## 2
# sum technical replicates
dim(dge)
## [1] 60612 538
dge.tech <- sumTechReps(dge, dge$samples$NPID)
dim(dge.tech$counts)
## [1] 60612 269
colnames(dge.tech$counts) <- dge.tech$samples$NPID
# set colors and get data
group_colors <- c("black", "red")[dge.tech$samples$TYPE]
lcpm <- edgeR::cpm(dge.tech$counts, log = TRUE)
par(bg = 'white')
# plot MDS
plotMDS(
lcpm,
top = 100,
labels = dge.tech$samples$NPID,
cex = .8,
dim.plot = c(4,5),
plot = TRUE,
col = group_colors,
gene.selection = "common"
)
title(expression('Top 100 Genes - Raw (Log'[2]~'CPM)'))
legend(
"top",
legend = c(control, condition),
pch = 16,
col = c(control_color, condition_color),
cex = 1
)
# save
path <- paste0("../../results/", tool, "/MDS/", condition, "_gene_MDS_raw")
saveToPDF(paste0(path, ".pdf"), width = 4, height = 4)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 4, height = 4, unit = "in", res = 300)
## png
## 2
The filterByExpr() function in the edgeR package determines which genes have a great enough count value to keep. We will filter by group. This means at least 6 samples (6 is the smallest group sample size) must express a minimum count of 10 (in cpm, default value).
keep.expr <- filterByExpr(dge.tech, group = dge.tech$samples$TYPE)
dim(dge.tech)
## [1] 60612 269
dge.filtered <- dge.tech[keep.expr, , keep.lib.sizes = FALSE]
dim(dge.filtered)
## [1] 20067 269
table(dge.filtered$genes$gene_type)
##
## IG_C_gene lncRNA
## 4 3752
## miRNA misc_RNA
## 15 26
## polymorphic_pseudogene processed_pseudogene
## 8 333
## protein_coding rRNA_pseudogene
## 15119 3
## scaRNA scRNA
## 1 1
## snoRNA snRNA
## 16 22
## TEC TR_C_gene
## 195 4
## TR_J_gene transcribed_processed_pseudogene
## 1 99
## transcribed_unitary_pseudogene transcribed_unprocessed_pseudogene
## 54 314
## unitary_pseudogene unprocessed_pseudogene
## 4 96
# Now, normalization by the method of trimmed mean of M-values (TMM)
dge.filtered.norm <- calcNormFactors(dge.filtered, method = "TMM")
# norm factor summary
summary(dge.filtered.norm$samples$norm.factors)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5944 0.9455 1.0313 1.0063 1.0793 1.2139
Density plots of log-intensity distribution of each library can be superposed on a single graph for a better comparison between libraries and for identification of libraries with weird distribution.
# set graphical parameter
par(mfrow = c(1,3))
# Normalize data for library size and expression intesntiy
log2cpm.tech <- edgeR::cpm(dge.tech, log = TRUE)
log2cpm.filtered <- edgeR::cpm(dge.filtered, log = TRUE)
log2cpm.norm <- edgeR::cpm(dge.filtered.norm, log = TRUE)
# set colors
colors <- c("red","orange","green","yellow","blue","purple",
"lightgray","brown","pink","cyan")
nsamples <- ncol(dge.tech)
# First, plot the first column of the log2cpm.tech density
plot(density(log2cpm.tech[,1]), col = colors[1], lwd = 2, ylim = c(0,0.25),
las = 2, main = "A. Raw", xlab = expression('Log'[2]~CPM))
# For each sample plot the lcpm density
for (i in 2:nsamples){
den <- density(log2cpm.tech[,i]) #subset each column
lines(den$x, den$y, col = colors[i], lwd = 2)
}
# Second, plot log2cpm.filtered
plot(density(log2cpm.filtered[,1]), col = colors[1], lwd = 2, ylim = c(0,0.25),
las = 2, main = "B. Filtered", xlab = expression('Log'[2]~CPM))
abline(v = edgeR::cpm(3, log = TRUE), lty = 3)
for (i in 2:nsamples) {
den <- density(log2cpm.filtered[,i])
lines(den$x, den$y, col = colors[i], lwd = 2)
}
# Third, plot log2cpm.norm
plot(density(log2cpm.norm[,1]), col = colors[1], lwd = 2, ylim = c(0,0.25),
las = 2, main = "C. Normalized", xlab = expression('Log'[2]~CPM))
abline(v = edgeR::cpm(3, log = TRUE), lty = 3)
for (i in 2:nsamples) {
den <- density(log2cpm.norm[,i])
lines(den$x, den$y, col = colors[i], lwd = 2)
}
# save
path <- paste0("../../results/", tool, "/density/LBD_gene_density")
saveToPDF(paste0(path, ".pdf"), width = 6, height = 4)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 6, height = 4, unit = "in", res = 300)
## png
## 2
# set parameters
par(mfrow = c(1,3))
# First look at dge.tech
boxplot(log2cpm.tech,
main="A. Raw",
xlab="",
ylab=expression('Counts per gene (Log'[2]~'CPM)'),
axes=FALSE,
col = colors
)
axis(2) # 2 = left
axis(1, # 1 = below
at = 1:nsamples, # points at which tick-marks should be drawn
labels = colnames(log2cpm.tech),
las = 2,
cex.axis = 0.8 # size of axis
)
# Second, look at dge.filtered
boxplot(log2cpm.filtered,
main="B. Filtered",
xlab="",
ylab=expression('Counts per gene (Log'[2]~'CPM)'),
axes=FALSE,
col = colors
)
axis(2)
axis(1, at=1:nsamples,labels=colnames(log2cpm.filtered),las=2,cex.axis=0.8)
# Third, look at dge.norm
boxplot(log2cpm.norm,
main="C. Normalized",
xlab="",
ylab=expression('Counts per gene (Log'[2]~'CPM)'),
axes=FALSE,
col = colors)
axis(2)
axis(1,at=1:nsamples,labels=colnames(log2cpm.norm),las=2,cex.axis=0.8)
# save
path <- paste0("../../results/", tool, "/boxplot/LBD_gene_boxplot")
saveToPDF(paste0(path, ".pdf"), width = 10, height = 6)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 10, height = 6, unit = "in", res = 300)
## png
## 2
boxplot(log2cpm.norm,
main="Filtered normalized lcpm data",
xlab="RIN",
ylab=expression('Counts per gene (Log'[2]~'CPM)'),
axes=FALSE,
col = colors)
axis(2)
axis(1,at=1:nsamples,labels=(dge.filtered.norm$samples$RIN),las=2,cex.axis=0.8)
path <- paste0("../../results/", tool, "/boxplot/LBD_gene_boxplot_sum_reps_with_RIN")
saveToPDF(paste0(path, ".pdf"), width = 12, height = 6)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 12, height = 6, unit = "in", res = 300)
## png
## 2
# check if there is correlation between RIN and library size
box <- dge.filtered.norm$samples
plot(box$RIN, box$lib.size)
cor(box$RIN, box$lib.size, method = c("pearson", "kendall", "spearman"))
## [1] 0.3483433
cor.test(box$RIN, box$lib.size, method=c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: box$RIN and box$lib.size
## t = 6.0723, df = 267, p-value = 4.317e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2386895 0.4492260
## sample estimates:
## cor
## 0.3483433
# is the data normally distrubited
ggqqplot(box$lib.size, ylab = "library size")
# wt
ggqqplot(box$RIN, ylab = "RIN")
res <- cor.test(box$lib.size, box$RIN,
method = "pearson")
res
##
## Pearson's product-moment correlation
##
## data: box$lib.size and box$RIN
## t = 6.0723, df = 267, p-value = 4.317e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2386895 0.4492260
## sample estimates:
## cor
## 0.3483433
ggscatter(box, x = "RIN", y = "lib.size",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
ylab = "library size", xlab = "RIN value")
## `geom_smooth()` using formula 'y ~ x'
path <- paste0("../../results/", tool, "/library/LBD_corr_RIN_lib_size")
saveToPDF(paste0(path, ".pdf"), width = 6, height = 6)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 6, height = 6, unit = "in", res = 300)
## png
## 2
lcpm <- edgeR::cpm(dge.filtered.norm, log = TRUE)
genes_and_counts <- cbind(dge.filtered.norm$genes$gene_name, lcpm)
genes_and_counts <- as.data.frame(genes_and_counts)
names(genes_and_counts)[names(genes_and_counts) == "V1"] <- "Geneid"
rownames(genes_and_counts)<-NULL
genes_counts <- melt(genes_and_counts, id=c("Geneid"))
names(genes_counts)[names(genes_counts) == "variable"] <- "NPID"
df <- cbind(counts_metadata$NPID, counts_metadata$Sex)
df <- as.data.frame(df)
names(df)[names(df) == "V1"] <- "NPID"
names(df)[names(df) == "V2"] <- "Sex"
data <- merge(genes_counts, df, by = "NPID")
sexGenes <- c("DDX3X, DDX3Y")
SelectGenes_counts <-
subset(
data,
Geneid %in% c(
"DDX3X",
"DDX3Y",
"ZFX",
"ZFY",
"USP9X",
"USP9Y",
"KDM6A",
"UTY",
"PCDH11X",
"PCDH11Y",
"XIST",
"SRY"
)
)
SelectGenes_counts[, "geneComb"] <- NA
SelectGenes_counts[, "group"] <- NA
SelectGenes_counts$geneComb <-
ifelse(
SelectGenes_counts$Geneid == "DDX3X",
"DDX3X:DDX3Y",
ifelse(
SelectGenes_counts$Geneid == "DDX3Y",
"DDX3X:DDX3Y",
ifelse(
SelectGenes_counts$Geneid == "ZFX",
"ZFX:ZFY",
ifelse(
SelectGenes_counts$Geneid == "ZFY",
"ZFX:ZFY",
ifelse(
SelectGenes_counts$Geneid == "USP9X",
"USP9X:USP9Y",
ifelse(
SelectGenes_counts$Geneid == "USP9Y",
"USP9X:USP9Y",
ifelse(
SelectGenes_counts$Geneid == "KDM6A",
"UTX:UTY",
ifelse(
SelectGenes_counts$Geneid == "UTY",
"UTX:UTY",
ifelse(
SelectGenes_counts$Geneid == "PCDH11X",
"PCDH11X:PCDH11Y",
ifelse(
SelectGenes_counts$Geneid == "PCDH11Y",
"PCDH11X:PCDH11Y",
ifelse(
SelectGenes_counts$Geneid == "XIST",
"XIST",
ifelse(SelectGenes_counts$Geneid == "SRY", "SRY", "NA")
)
)
)
)
)
)
)
)
)
)
)
SelectGenes_counts$group <-
ifelse(
SelectGenes_counts$geneComb == "DDX3X:DDX3Y",
1,
ifelse(
SelectGenes_counts$geneComb == "ZFX:ZFY",
4,
ifelse(
SelectGenes_counts$geneComb == "USP9X:USP9Y",
3,
ifelse(
SelectGenes_counts$geneComb == "UTX:UTY",
5,
ifelse(
SelectGenes_counts$geneComb == "PCDH11X:PCDH11Y",
2,
ifelse(
SelectGenes_counts$geneComb == "XIST",
6,
ifelse(SelectGenes_counts$geneComb == "SRY", 7, "NA")
)
)
)
)
)
)
# Plot
data <- SelectGenes_counts
leg_lab <- "reported sex"
cbPaletteJITTER = c("darkorange", "blue")
geneticSEXgenes_plot <- ggplot(data, aes(x = Geneid, y = value)) +
geom_jitter(aes(color = Sex, shape = Sex),
width = 0.25,
size = 3.0) +
scale_color_manual(leg_lab, values = cbPaletteJITTER) + # Jitter color palette
scale_shape_manual(leg_lab, values = c(19, 15)) +
labs(x = "", y = "", title = "") +
facet_grid(
. ~ group + geneComb,
switch = "x",
# Moves the labels from the top to the bottom
#labeller = label_both, # Adds the labels to the year and X variables
scales = "free_x",
space = "free_x"
) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.border = element_rect(
colour = "black",
fill = NA,
size = 1
)
) +
xlab("") + ylab("lcpm") # Removes the month legend
geneticSEXgenes_plot + theme(
panel.background = element_rect(
fill = "white",
colour = "white",
size = 0.5,
linetype = "solid"
),
panel.grid.major = element_line(
size = 0.5,
linetype = 'solid',
colour = "white"
),
panel.grid.minor = element_line(
size = 0.25,
linetype = 'solid',
colour = "grey"
),
panel.border = element_rect(
colour = "black",
fill = NA,
size = 1
),
axis.text.x = element_text(face = "italic")
)
path <- paste0("../../results/", tool, "/sex_check/LBD_gene_raw_sex_check_sumreps")
saveToPDF(paste0(path, ".pdf"), width = 10.5, height = 6)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 10.5, height = 4, unit = "in", res = 300)
## png
## 2
CCA Heatmap
#form <- ~ TYPE + Age + Sex + ClinicalDx +lib.size + LBD.type +CDLB
form <- ~ TYPE + PathDx + AD.subtype + LBD.type + CDLB + Braak.NFT + Thal.amyloid + MF.SP + MF.NFT + MF.LB + Cing.LB + MF.Amyloid + MF.Tau + Cing.Synuclein + CWOW.Category + VaD + TDP.type + Brain.wt + ClinicalDx + FHx + Duration + Sex + Age + Race + PMI + APOE + MAPT + GRN + TMEM106b + RIN + Total.RNA.ng
# Compute Canonical Correlation Analysis (CCA)
# between all pairs of variables
# returns absolute correlation value
info <- as.data.frame(dge.filtered.norm$samples)
C = canCorPairs( form, info)
## Warning in canCorPairs(form, info): Regression model may be problematic.
## High colinearity between variables:
## TYPE and PathDx
## TYPE and LBD.type
## TYPE and CDLB
## MF.Amyloid and CWOW.Category
## MF.Tau and CWOW.Category
## Cing.Synuclein and CWOW.Category
# replace NA with zero
C[is.na(C)] = 0
cor.mtest <- function(C, ...) {
C <- as.matrix(C)
n <- ncol(C)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(C[, i], C[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(C)
p.mat
}
# matrix of the p-value of the correlation
p.mat <- cor.mtest(C)
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(C, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
diag=FALSE
)
path <- paste0("../../results/", tool ,"/varpart/LBD_gene_CCA")
saveToPDF(paste0(path, ".pdf"), width = 25, height = 25)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 23, height = 25, unit = "in", res = 300)
## png
## 2
saveRDS(dge.filtered.norm, file = paste0("../../rObjects/LBD_dge.filtered.norm.rds"))
dge.filtered.norm <- readRDS(paste0("../../rObjects/LBD_dge.filtered.norm.rds"))
age <- as.numeric(dge.filtered.norm$samples$Age)
RIN <- as.numeric(dge.filtered.norm$samples$RIN)
sex <- as.factor(dge.filtered.norm$samples$Sex)
race <- as.factor(dge.filtered.norm$samples$Race)
group <- interaction(dge.filtered.norm$samples$TYPE)
design <- model.matrix(~ 0 + group + sex + (1|RIN))
colnames(design) <- c(control,condition, "sex", "RIN")
design
## CONTROL LBD sex RIN
## 1 1 0 1 1
## 2 1 0 1 1
## 3 1 0 1 1
## 4 0 1 0 1
## 5 1 0 0 1
## 6 0 1 1 1
## 7 0 1 0 1
## 8 1 0 0 1
## 9 0 1 1 1
## 10 0 1 1 1
## 11 0 1 0 1
## 12 0 1 0 1
## 13 0 1 0 1
## 14 0 1 1 1
## 15 1 0 0 1
## 16 0 1 1 1
## 17 0 1 1 1
## 18 0 1 1 1
## 19 0 1 1 1
## 20 1 0 1 1
## 21 0 1 1 1
## 22 0 1 1 1
## 23 1 0 1 1
## 24 1 0 1 1
## 25 1 0 0 1
## 26 0 1 0 1
## 27 0 1 1 1
## 28 0 1 1 1
## 29 0 1 1 1
## 30 0 1 0 1
## 31 0 1 1 1
## 32 0 1 0 1
## 33 0 1 1 1
## 34 0 1 0 1
## 35 0 1 0 1
## 36 0 1 0 1
## 37 0 1 1 1
## 38 0 1 0 1
## 39 0 1 1 1
## 40 0 1 1 1
## 41 0 1 0 1
## 42 0 1 0 1
## 43 1 0 1 1
## 44 0 1 1 1
## 45 1 0 1 1
## 46 0 1 1 1
## 47 0 1 1 1
## 48 1 0 0 1
## 49 0 1 1 1
## 50 0 1 1 1
## 51 0 1 1 1
## 52 0 1 1 1
## 53 1 0 1 1
## 54 1 0 1 1
## 55 0 1 1 1
## 56 0 1 1 1
## 57 0 1 1 1
## 58 0 1 1 1
## 59 1 0 0 1
## 60 0 1 1 1
## 61 0 1 0 1
## 62 0 1 1 1
## 63 0 1 0 1
## 64 0 1 1 1
## 65 0 1 1 1
## 66 0 1 1 1
## 67 1 0 1 1
## 68 0 1 1 1
## 69 0 1 0 1
## 70 0 1 1 1
## 71 1 0 1 1
## 72 0 1 0 1
## 73 0 1 1 1
## 74 0 1 1 1
## 75 1 0 1 1
## 76 0 1 1 1
## 77 0 1 1 1
## 78 0 1 1 1
## 79 0 1 1 1
## 80 0 1 1 1
## 81 1 0 1 1
## 82 0 1 1 1
## 83 0 1 0 1
## 84 0 1 1 1
## 85 0 1 0 1
## 86 0 1 0 1
## 87 0 1 0 1
## 88 1 0 1 1
## 89 0 1 1 1
## 90 0 1 1 1
## 91 0 1 1 1
## 92 0 1 1 1
## 93 0 1 0 1
## 94 0 1 1 1
## 95 1 0 1 1
## 96 0 1 0 1
## 97 0 1 1 1
## 98 0 1 1 1
## 99 0 1 0 1
## 100 0 1 1 1
## 101 0 1 0 1
## 102 1 0 1 1
## 103 0 1 0 1
## 104 0 1 1 1
## 105 0 1 1 1
## 106 0 1 1 1
## 107 0 1 1 1
## 108 0 1 0 1
## 109 0 1 0 1
## 110 0 1 1 1
## 111 0 1 1 1
## 112 1 0 1 1
## 113 0 1 1 1
## 114 0 1 0 1
## 115 0 1 1 1
## 116 1 0 0 1
## 117 1 0 1 1
## 118 0 1 0 1
## 119 1 0 0 1
## 120 0 1 0 1
## 121 0 1 0 1
## 122 0 1 1 1
## 123 0 1 0 1
## 124 0 1 1 1
## 125 1 0 0 1
## 126 0 1 0 1
## 127 0 1 1 1
## 128 0 1 1 1
## 129 0 1 0 1
## 130 0 1 1 1
## 131 0 1 1 1
## 132 0 1 0 1
## 133 0 1 0 1
## 134 1 0 1 1
## 135 1 0 0 1
## 136 0 1 0 1
## 137 0 1 1 1
## 138 0 1 1 1
## 139 0 1 1 1
## 140 0 1 0 1
## 141 0 1 0 1
## 142 1 0 0 1
## 143 0 1 1 1
## 144 0 1 0 1
## 145 0 1 1 1
## 146 0 1 1 1
## 147 1 0 0 1
## 148 0 1 1 1
## 149 1 0 0 1
## 150 0 1 1 1
## 151 0 1 1 1
## 152 0 1 1 1
## 153 0 1 1 1
## 154 0 1 1 1
## 155 0 1 1 1
## 156 0 1 0 1
## 157 0 1 1 1
## 158 1 0 1 1
## 159 0 1 1 1
## 160 0 1 1 1
## 161 0 1 1 1
## 162 0 1 1 1
## 163 0 1 1 1
## 164 0 1 0 1
## 165 0 1 0 1
## 166 0 1 1 1
## 167 0 1 1 1
## 168 0 1 0 1
## 169 0 1 1 1
## 170 0 1 0 1
## 171 0 1 1 1
## 172 0 1 1 1
## 173 0 1 0 1
## 174 0 1 1 1
## 175 1 0 1 1
## 176 0 1 0 1
## 177 0 1 1 1
## 178 0 1 1 1
## 179 0 1 1 1
## 180 0 1 1 1
## 181 0 1 1 1
## 182 0 1 1 1
## 183 0 1 1 1
## 184 0 1 1 1
## 185 1 0 1 1
## 186 0 1 1 1
## 187 0 1 0 1
## 188 0 1 1 1
## 189 0 1 1 1
## 190 0 1 1 1
## 191 0 1 1 1
## 192 0 1 1 1
## 193 0 1 1 1
## 194 0 1 1 1
## 195 0 1 1 1
## 196 0 1 0 1
## 197 0 1 0 1
## 198 1 0 1 1
## 199 1 0 0 1
## 200 0 1 1 1
## 201 0 1 1 1
## 202 1 0 0 1
## 203 0 1 0 1
## 204 0 1 1 1
## 205 0 1 1 1
## 206 0 1 0 1
## 207 0 1 1 1
## 208 0 1 0 1
## 209 0 1 0 1
## 210 0 1 1 1
## 211 0 1 0 1
## 212 0 1 1 1
## 213 0 1 1 1
## 214 0 1 0 1
## 215 0 1 0 1
## 216 0 1 0 1
## 217 0 1 0 1
## 218 0 1 0 1
## 219 0 1 1 1
## 220 0 1 1 1
## 221 0 1 0 1
## 222 0 1 1 1
## 223 0 1 1 1
## 224 0 1 1 1
## 225 0 1 0 1
## 226 0 1 1 1
## 227 0 1 0 1
## 228 0 1 0 1
## 229 0 1 1 1
## 230 0 1 0 1
## 231 0 1 1 1
## 232 0 1 1 1
## 233 0 1 1 1
## 234 0 1 0 1
## 235 0 1 1 1
## 236 0 1 1 1
## 237 1 0 1 1
## 238 1 0 0 1
## 239 0 1 0 1
## 240 0 1 0 1
## 241 1 0 1 1
## 242 0 1 0 1
## 243 0 1 1 1
## 244 0 1 1 1
## 245 0 1 1 1
## 246 0 1 1 1
## 247 0 1 1 1
## 248 0 1 1 1
## 249 0 1 0 1
## 250 0 1 1 1
## 251 0 1 1 1
## 252 0 1 1 1
## 253 0 1 0 1
## 254 0 1 0 1
## 255 0 1 0 1
## 256 0 1 1 1
## 257 0 1 0 1
## 258 0 1 0 1
## 259 0 1 1 1
## 260 0 1 1 1
## 261 1 0 1 1
## 262 0 1 0 1
## 263 1 0 0 1
## 264 0 1 1 1
## 265 0 1 1 1
## 266 0 1 1 1
## 267 0 1 0 1
## 268 0 1 1 1
## 269 0 1 1 1
## attr(,"assign")
## [1] 1 1 2 3
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
##
## attr(,"contrasts")$sex
## [1] "contr.treatment"
##
## attr(,"contrasts")$`1 | RIN`
## [1] "contr.treatment"
# voom transform counts
v <- voomWithQualityWeights(dge.filtered.norm,
design,
plot = TRUE)
## Coefficients not estimable: RIN
## Warning: Partial NA coefficients for 20067 probe(s)
## Coefficients not estimable: RIN
## Warning: Partial NA coefficients for 20067 probe(s)
# save
path <- paste0("../../results/", tool, "/voom/LBD_gene_mean_var_weights")
saveToPDF(paste0(path, ".pdf"), width = 6, height = 4)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 6, height = 4, unit = "in", res = 300)
## png
## 2
# fits linear model for each gene given a series of arrays
fit <- lmFit(v, design)
## Coefficients not estimable: RIN
## Warning: Partial NA coefficients for 20067 probe(s)
# contrast design for differential expression
contrasts <- makeContrasts(
title = myContrasts, # myContrasts was user input from beginning
levels = colnames(design))
head(contrasts)
## Contrasts
## Levels LBD - CONTROL
## CONTROL -1
## LBD 1
## sex 0
## RIN 0
# save contrast names
allComparisons <- colnames(contrasts)
allComparisons # check
## [1] "LBD - CONTROL"
# run contrast analysis
vfit <- contrasts.fit(fit, contrasts = contrasts)
# Compute differential expression based on the empirical Bayes moderation of the
# standard errors towards a common value.
veBayesFit <- eBayes(vfit)
plotSA(veBayesFit, main = "Final Model: Mean-variance Trend")
# save
path <- paste0("../../results/", tool, "/voom/LBD_gene_final_mean_var")
saveToPDF(paste0(path, ".pdf"), width = 6, height = 4)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 6, height = 4, unit = "in", res = 300)
## png
## 2
group_colors <- c("black", "red")[v$targets$TYPE]
names <- v$targets$NPID
plotMDS(
v,
top = 100,
labels = names,
cex = 1,
dim.plot = c(1,2),
plot = TRUE,
col = group_colors
)
title(expression('Top 100 Genes - Voom (Log'[2]~'CPM)'))
# save
# save
path <- paste0("../../results/", tool, "/MDS/LBD_gene_MDS_normalized")
saveToPDF(paste0(path, ".pdf"), width = 5, height = 5)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 4, height = 4, unit = "in", res = 300)
## png
## 2
saveRDS(v, file = paste0("../../rObjects/LBD_gene_voom.rds"))
v <- readRDS(paste0("../../rObjects/LBD_gene_voom.rds"))
Identify number of differential expressed genes.
pval <- 0.05
sumTable <-
summary(decideTests(
vfit, # object
adjust.method = "BH", # by default the method = "separate"
p.value = pval,
lfc = 0 # numeric, minimum absolute log2-fold change required
))
print(paste0(" FDRq < ", pval))
## [1] " FDRq < 0.05"
sumTable
## LBD - CONTROL
## Down 2590
## NotSig 13984
## Up 3493
coef <- 1
for (i in allComparisons) {
# p < 1, log2fc > 0
vTopTableAll <-
topTable(
veBayesFit,
coef = coef,
n = Inf,
p.value = 1,
lfc = 0
)
path <- paste("../../results/", tool, "/DEGs/LBD_gene_DEGs_FDRq1.00.txt", sep = "")
write.table(
vTopTableAll,
path,
sep = "\t",
row.names = FALSE,
quote = FALSE
)
# p < 0.05, log2fc > 0
vTopTable1 <-
topTable(
veBayesFit,
coef = coef,
n = Inf,
p.value = 0.05,
lfc = 0
)
path <- paste("../../results/", tool, "/DEGs/LBD_gene_DEGs_FDRq0.05.txt", sep = "")
write.table(
vTopTable1,
path,
sep = "\t",
row.names = FALSE,
quote = FALSE
)
# increment -----------------------------------------------------------------
coef <- coef + 1
}
Read and save table with all genes (FDRq = 1).
condition_vs_control <- read.table(
paste0("../../results/", tool, "/DEGs/LBD_gene_DEGs_FDRq1.00.txt", sep = ""),
header = TRUE,
sep = "\t",
stringsAsFactors = FALSE)
saveRDS(condition_vs_control, file = paste0("../../rObjects/LBD_gene_table.rds"))
Assign colors values based on FDRq cutoff of 0.05.
color_values <- vector()
max <- nrow(condition_vs_control)
for(i in 1:max){
if (condition_vs_control$adj.P.Val[i] < 0.05){
if (condition_vs_control$logFC[i] > 0){
color_values <- c(color_values, 1) # 1 when logFC > 0 and FDRq < 0.05
}
else if (condition_vs_control$logFC[i] < 0){
color_values <- c(color_values, 2) # 2 when logFC < 0 and FDRq < 0.05
}
}
else{
color_values <- c(color_values, 3) # 3 when FDRq >= 0.05
}
}
condition_vs_control$color_p0.05 <- factor(color_values)
Subset the top 10 up and down-regulated genes
up <- condition_vs_control[condition_vs_control$color_p0.05 == 1,]
up10 <- up[1:10,]
down <- condition_vs_control[condition_vs_control$color_p0.05 == 2,]
down <- subset(down, down$logFC < -1.5)
down10 <- down[1:7,]
hadjpval <- (-log10(max(
condition_vs_control$P.Value[condition_vs_control$adj.P.Val < 0.05],
na.rm=TRUE)))
p_vol <-
ggplot(data = condition_vs_control,
aes(x = logFC, # x-axis is logFC
y = -log10(P.Value), # y-axis will be -log10 of P.Value
color = color_p0.05)) + # color is based on factored color column
geom_point(alpha = 1.5, size = 1.7) + # create scatterplot, alpha makes points transparent
theme_bw() + # set color theme
theme(legend.position = "none") + # no legend
scale_color_manual(values = c("red", "blue","grey")) + # set factor colors
labs(
title = "", # no main title
x = expression(log[2](FC)), # x-axis title
y = expression(-log[10] ~ "(" ~ italic("p") ~ "-value)") # y-axis title
) +
theme(axis.title.x = element_text(size = 10),
axis.text.x = element_text(size = 10)) +
theme(axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10)) +
geom_hline(yintercept = hadjpval, # horizontal line
colour = "#000000",
linetype = "dashed") +
ggtitle("LBD vs Control\nFDRq < 0.05") +
theme(plot.title = element_text(size = 10)) +
geom_text_repel(data = up10,
aes(x = logFC, y= -log10(P.Value), label = gene_name),
color = "maroon",
fontface="italic",
size = 4,
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
) +
geom_text_repel(data = down10,
aes(x = logFC, y= -log10(P.Value), label = gene_name),
color = "navyblue",
fontface="italic",
size = 4,
max.overlaps = getOption("ggrepel.max.overlaps", default = 15)
) #+
# scale_y_continuous(breaks = seq(0,8,by=1), limits = c(0,8)) +
# scale_x_continuous(breaks = seq(-3,3,by=1), limits = c(-3,3))
p_vol
# save
path <- paste0("../../results/", tool, "/volcano/LBD_gene_volcano_FDRq0.05")
saveToPDF(paste0(path, ".pdf"), width = 5.2, height = 5.2)
## png
## 2
saveToPNG(paste0(path, ".png"), width = 8, height = 6, unit = "in", res = 300)
## png
## 2
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server 7.9 (Maipo)
##
## Matrix products: default
## BLAS: /usr/lib64/libblas.so.3.4.2
## LAPACK: /usr/lib64/liblapack.so.3.4.2
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] glmnet_4.1-4 Matrix_1.3-4 caret_6.0-92
## [4] lattice_0.20-45 forcats_0.5.1 purrr_0.3.4
## [7] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [10] tidyverse_1.3.1 ggpubr_0.4.0 corrplot_0.92
## [13] plyr_1.8.7 Glimma_2.4.0 reshape_0.8.9
## [16] variancePartition_1.24.1 stringr_1.4.0 rtracklayer_1.54.0
## [19] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0
## [22] S4Vectors_0.32.4 BiocGenerics_0.40.0 philentropy_0.6.0
## [25] gplots_3.1.3 ggrepel_0.9.1 ggplot2_3.3.6
## [28] edgeR_3.36.0 limma_3.50.3 dplyr_1.0.9
## [31] BiocParallel_1.28.3 rmarkdown_2.14
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 tidyselect_1.1.2
## [3] lme4_1.1-30 RSQLite_2.2.14
## [5] AnnotationDbi_1.56.2 htmlwidgets_1.5.4
## [7] grid_4.1.2 pROC_1.18.0
## [9] munsell_0.5.0 codetools_0.2-18
## [11] future_1.26.1 withr_2.5.0
## [13] colorspace_2.0-3 Biobase_2.54.0
## [15] highr_0.9 knitr_1.39
## [17] rstudioapi_0.13 ggsignif_0.6.3
## [19] listenv_0.8.0 labeling_0.4.2
## [21] MatrixGenerics_1.6.0 Rdpack_2.3.1
## [23] GenomeInfoDbData_1.2.7 farver_2.1.1
## [25] bit64_4.0.5 parallelly_1.32.0
## [27] vctrs_0.4.1 generics_0.1.3
## [29] ipred_0.9-13 xfun_0.31
## [31] R6_2.5.1 doParallel_1.0.17
## [33] locfit_1.5-9.5 bitops_1.0-7
## [35] cachem_1.0.6 DelayedArray_0.20.0
## [37] assertthat_0.2.1 BiocIO_1.4.0
## [39] scales_1.2.0 nnet_7.3-16
## [41] gtable_0.3.0 globals_0.15.1
## [43] timeDate_3043.102 rlang_1.0.4
## [45] genefilter_1.76.0 splines_4.1.2
## [47] rstatix_0.7.0 ModelMetrics_1.2.2.2
## [49] broom_1.0.0 yaml_2.3.5
## [51] reshape2_1.4.4 abind_1.4-5
## [53] modelr_0.1.8 backports_1.4.1
## [55] tools_4.1.2 lava_1.6.10
## [57] ellipsis_0.3.2 jquerylib_0.1.4
## [59] RColorBrewer_1.1-3 Rcpp_1.0.9
## [61] progress_1.2.2 zlibbioc_1.40.0
## [63] RCurl_1.98-1.7 prettyunits_1.1.1
## [65] rpart_4.1-15 SummarizedExperiment_1.24.0
## [67] haven_2.5.0 fs_1.5.2
## [69] magrittr_2.0.3 data.table_1.14.2
## [71] reprex_2.0.1 matrixStats_0.62.0
## [73] hms_1.1.1 evaluate_0.15
## [75] xtable_1.8-4 pbkrtest_0.5.1
## [77] RhpcBLASctl_0.21-247.1 XML_3.99-0.10
## [79] readxl_1.4.0 shape_1.4.6
## [81] compiler_4.1.2 KernSmooth_2.23-20
## [83] crayon_1.5.1 minqa_1.2.4
## [85] htmltools_0.5.2 mgcv_1.8-38
## [87] tzdb_0.3.0 geneplotter_1.72.0
## [89] lubridate_1.8.0 DBI_1.1.3
## [91] dbplyr_2.2.0 MASS_7.3-54
## [93] boot_1.3-28 car_3.1-0
## [95] cli_3.3.0 rbibutils_2.2.8
## [97] parallel_4.1.2 gower_1.0.0
## [99] pkgconfig_2.0.3 GenomicAlignments_1.30.0
## [101] recipes_1.0.1 xml2_1.3.3
## [103] foreach_1.5.2 annotate_1.72.0
## [105] bslib_0.3.1 hardhat_1.2.0
## [107] XVector_0.34.0 prodlim_2019.11.13
## [109] rvest_1.0.2 digest_0.6.29
## [111] Biostrings_2.62.0 cellranger_1.1.0
## [113] restfulr_0.0.15 Rsamtools_2.10.0
## [115] gtools_3.9.3 rjson_0.2.21
## [117] nloptr_2.0.3 lifecycle_1.0.1
## [119] nlme_3.1-153 jsonlite_1.8.0
## [121] aod_1.3.2 carData_3.0-5
## [123] fansi_1.0.3 pillar_1.7.0
## [125] KEGGREST_1.34.0 fastmap_1.1.0
## [127] httr_1.4.3 survival_3.2-13
## [129] glue_1.6.2 png_0.1-7
## [131] iterators_1.0.14 bit_4.0.4
## [133] class_7.3-19 stringi_1.7.8
## [135] sass_0.4.1 blob_1.2.3
## [137] DESeq2_1.34.0 caTools_1.18.2
## [139] memoise_2.0.1 future.apply_1.9.0