knitr::opts_chunk$set(echo = TRUE)
library(limma)
library(edgeR)
library(gplots)
library(org.Hs.eg.db)
library(RColorBrewer)
library(tidyverse)
library(reshape2)
library(biomaRt)
library(ggplot2)
library(ggrepel)
library(tximport)
library(rtracklayer)
library(cowplot)
library(statmod)
library(DESeq2)
library(clusterProfiler)
library(pheatmap)
library(dplyr)
library(scales)
library(data.table)
library(IsoformSwitchAnalyzeR)
library(ggthemes)
library(ggpubr)
library(rstatix)
library(stringr)
library(tidyr)
library(writexl)
library(png)
library(grid)
library(cluster)
library(patchwork)
base_dir <- "~/projects/MERS_v_MOCK_24hrs/2023_results"
#Read the CSV
growth_data <- read_csv(file.path(base_dir, "Growth_curves/Growth_comparison_transcriptomics.csv"))
## Rows: 36 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Group
## dbl (2): HPI, Result
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Don't need to plot Mocks so remove them
growth_data <- growth_data %>%
filter(Group != "Mock") %>%
mutate(log_result = log10(Result))
#Log transform the data for plotting and group into timepoints
summary_growth_data <- growth_data %>%
group_by(HPI) %>%
summarise(
mean_log = mean(log_result),
sd_log = sd(log_result),
.groups = "drop"
)
# Plot growth curve
tcid50 <- ggplot(summary_growth_data, aes(x = HPI, y = mean_log)) +
geom_line(size = 1.2, color = "red") +
geom_point(size = 3, color = "red") +
geom_errorbar(aes(ymin = mean_log - sd_log, ymax = mean_log + sd_log), width = 0.5) +
scale_x_continuous(breaks = seq(0, max(summary_growth_data$HPI), by = 4)) +
scale_y_continuous(
breaks = c(0, 1,2,3, 4,5, 6),
labels = scales::math_format(10^.x)
) +
labs(
x = "Time",
y = "TCID50/ml",
) +
theme_classic()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
tcid50
# Read in metadata and make factors
coldata <- read.csv(file.path(base_dir, "metadata.csv"))
coldata$Time <- factor(coldata$Time)
coldata$Treatment <- factor(trimws(coldata$Treatment))
coldata$Contrasts <- factor(coldata$Contrasts)
# Import Gtf file
gtf_file <- "/home/tprince/references/Gencode_v49_GRCh38.14/gencode.v49.primary_assembly.annotation.gtf"
# Read in GTF file
gtf_content <- import(gtf_file, feature.type = 'gene') # pulls out gene info
annotation_gene <- data.frame(elementMetadata(gtf_content), stringsAsFactors = FALSE)
rownames(annotation_gene) <- annotation_gene$gene_id # changes rownames to gene ids
### Import counts from salmon ###
annotation_transcript <- elementMetadata(import(gtf_file, feature.type = 'transcript')) # get transcript info
tx2gene <- annotation_transcript[,c("transcript_id", "gene_id")] # must be in this order
# Libraries 1–6
libs <- paste0("Output_library_", 1:6, "/quants/")
# Build full paths
paths <- file.path(base_dir, libs)
# Get all quant.sf files
quant <- unlist(lapply(paths, function(p) {
list.files(
list.dirs(path = p, full.names = TRUE, recursive = FALSE),
pattern = "quant.sf",
full.names = TRUE
)
}))
id <- as.data.frame(quant)
colnames(id) = ('id')
coldata<-cbind(id, coldata) # combine with metadata
### Import Quant files using Tximport ###
txi_gene <- tximport(quant, type = 'salmon', tx2gene = tx2gene, ignoreTxVersion = FALSE, txOut = FALSE, ignoreAfterBar = TRUE)
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## transcripts missing from tx2gene: 20994
## summarizing abundance
## summarizing counts
## summarizing length
colnames(txi_gene$counts) <- coldata$id_names
# Make DGEList object of counts using EdgeR
# Create matrix of transcript counts per sample
counts <- txi_gene$counts
# Add gene names to rows
rownames(counts)<-rownames(txi_gene$counts) # adds gene names to rows
# Add sample names to columns
names<-(coldata[["id_names"]]) # creates vector of names
colnames(counts)<-names # adds sample names to columns
# create table of annotated transcripts in samples (35604 genes)
annotation_salmon <- annotation_gene[rownames(counts), ]
#### EdgeR DGE #### 35604 x 36
dgList<- DGEList(counts = counts, # numeric matrix of read counts
samples = coldata,
genes = annotation_salmon,
group = coldata$Contrasts)
# look at library sizes
barplot(dgList$samples$lib.size,names=colnames(dgList),las=2, cex.names = 0.8, main = 'Library size of samples (Unormalised)')
# look for outliers - r2_MERS_24 and r6_MERS_4 small library, r1_MERS_4 outlier too
plotMDS(
dgList,
gene.selection = 'pairwise',
col = as.integer(dgList$samples$Contrasts), # colour by Contrast
labels = dgList$samples$id_names)
#### Normalisation ####
# Filter based on counts and group structure
keep <- filterByExpr(dgList, group = dgList$samples$Contrasts) # keep those with min.count reads.
# Remove low expression genes and update DGList - - down from 35604 to 11943
dgList <- dgList[keep, , keep.lib.sizes = FALSE]
print(paste("The number of transcripts identified across unnormalised samples is:", sum(keep)))
## [1] "The number of transcripts identified across unnormalised samples is: 11943"
# remove samples with small sample sizes
dgList <- dgList[,which(!dgList$samples$id_names == "r6_MERS_4")] # small library
dgList <-dgList[,which(!dgList$samples$id_names == "r1_MERS_4")]#outlier
dgList <-dgList[,which(!dgList$samples$id_names == "r2_MERS_24")] # outlier
# Filter again based on counts and new group structure - down from 12228 to 12115
keep <- filterByExpr(dgList, group = dgList$samples$Contrasts)
dgList <- dgList[keep, , keep.lib.sizes = FALSE] # now 11929
print(paste("The number of transcripts identified across normalised samples is:", sum(keep)))
## [1] "The number of transcripts identified across normalised samples is: 11929"
# add normalisation factors
dgList <- calcNormFactors(dgList)
# check MDS plot now - much cleaner!
plotMDS(
dgList,
gene.selection = 'pairwise',
col = as.integer(dgList$samples$Contrasts), # colour by Contrast
labels = dgList$samples$id_names)
#### PCA plot ####
logcounts <- cpm(dgList, log = TRUE)
# Prepare PCA input
data <- logcounts
rownames(data) <- dgList$genes$gene_name
data <- t(data)
# metadata
meta_df <- data.frame(
sample = rownames(data),
group = dgList$samples$Contrasts
)
# run PCA
data.pca <- prcomp(data, center = TRUE, scale. = TRUE)
# build pca dataframe
pca_df <- as.data.frame(data.pca$x)
pca_df$sample <- rownames(pca_df)
pca_df <- pca_df %>%
left_join(meta_df, by = "sample") %>%
separate(group, into = c("virus", "timepoint"), sep = "_") %>%
mutate(
timepoint = factor(timepoint, levels = c("0", "4", "24")),
group_combined = factor(
paste(virus, timepoint, sep = "_"),
levels = c(
"MOCK_0", "MOCK_4", "MOCK_24",
"MERS_0", "MERS_4", "MERS_24"
)
)
)
# variance explained
var_explained <- (data.pca$sdev^2) / sum(data.pca$sdev^2) * 100
# plot PCA
pPCA <- ggplot(pca_df,
aes(x = PC1, y = PC2,
color = group_combined,
shape = timepoint)) +
geom_point(size = 5, alpha = 0.85) +
scale_color_manual(values = c(
"MOCK_0" = "grey80",
"MOCK_4" = "grey60",
"MOCK_24" = "grey40",
"MERS_0" = "#F4A6A6",
"MERS_4" = "#E07070",
"MERS_24" = "#D62728"
)) +
stat_ellipse(aes(group = group_combined),
linetype = "dashed",
linewidth = 0.6,
alpha = 0.7,
show.legend = FALSE) +
theme_classic(base_size = 10) +
theme(
axis.title = element_text(size = 10),
axis.text = element_text(size = 8),
legend.title = element_text(size = 9),
legend.text = element_text(size = 8),
legend.key.size = unit(0.4, "cm")
) +
labs(
x = paste0("PC1 (", round(var_explained[1], 1), "%)"),
y = paste0("PC2 (", round(var_explained[2], 1), "%)"),
color = "Condition",
shape = "Timepoint"
)
### Design Matrix Using The GLM Approach ####
design <- model.matrix(~0+ group, data = dgList$samples)
dgGlm <- estimateDisp(dgList, design, robust = TRUE)
fit <- glmQLFit(dgGlm, design, robust = TRUE)
plotBCV(dgGlm) # common BCV (red line) should be between 0.2 and 0.4 to give highest number of DE genes.
# make contrast comparing viruses at each time point
my.contrasts <- makeContrasts(
MERS_v_MOCK_t0 = groupMERS_0-groupMOCK_0,
MERS_v_MOCK_t4 = groupMERS_4-groupMOCK_4,
MERS_v_MOCK_t24 = groupMERS_24-groupMOCK_24,
levels = design)
#### DE genes for each contrast ####
de.MERS_v_MOCK_t0 <- glmQLFTest(fit, contrast=my.contrasts[,"MERS_v_MOCK_t0"])
de.MERS_v_MOCK_t4 <- glmQLFTest(fit, contrast=my.contrasts[, "MERS_v_MOCK_t4"])
de.MERS_v_MOCK_t24 <- glmQLFTest(fit, contrast=my.contrasts[, "MERS_v_MOCK_t24"])
# MA Plot for T24 comparison to check relationship between expression and LFC
# Create MA plot as a grob
ma_grob <- as_grob(function() {
status <- decideTests(de.MERS_v_MOCK_t24, lfc = 1)
plotMD(de.MERS_v_MOCK_t24,
status = status,
xlab = "Average logCPM",
ylab = expression(Log[2]*" Fold Change"),
main = "")
abline(h = c(-1, 1), col = "red", lty = 2)
})
##### Figure S1 - QC of RNA seq data ####
Fig_S1 <- plot_grid(
pPCA,
ma_grob,
ncol = 1,
labels = c("A", "B"),
rel_heights = c(1, 1.2)
)
Fig_S1
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
#### Top DGE for each contrast ####
res_t0 <- top_genes.MERS_v_MOCK_t0 <- topTags(de.MERS_v_MOCK_t0, n=Inf)$table
res_t4 <- top_genes.MERS_v_MOCK_t4 <- topTags(de.MERS_v_MOCK_t4, n = Inf)$table
res_t24 <- top_genes.MERS_v_MOCK_t24 <- topTags(de.MERS_v_MOCK_t24, n = Inf)$table
head(res_t24)
## source type score phase gene_id
## ENSG00000204389.11 HAVANA gene NA NA ENSG00000204389.11
## ENSG00000213946.3 HAVANA gene NA NA ENSG00000213946.3
## ENSG00000204388.7 HAVANA gene NA NA ENSG00000204388.7
## ENSG00000132002.10 HAVANA gene NA NA ENSG00000132002.10
## ENSG00000087074.10 HAVANA gene NA NA ENSG00000087074.10
## ENSG00000106211.11 HAVANA gene NA NA ENSG00000106211.11
## gene_type gene_name level tag
## ENSG00000204389.11 protein_coding HSPA1A 2 retrogene
## ENSG00000213946.3 processed_pseudogene DNAJB1P1 1 pseudo_consens
## ENSG00000204388.7 protein_coding HSPA1B 2 overlapping_locus
## ENSG00000132002.10 protein_coding DNAJB1 1 <NA>
## ENSG00000087074.10 protein_coding PPP1R15A 2 <NA>
## ENSG00000106211.11 protein_coding HSPB1 2 <NA>
## hgnc_id havana_gene artif_dupl logFC
## ENSG00000204389.11 HGNC:5232 OTTHUMG00000031201.4 <NA> 7.211466
## ENSG00000213946.3 HGNC:24988 OTTHUMG00000154320.1 <NA> 6.585354
## ENSG00000204388.7 HGNC:5233 OTTHUMG00000031202.3 <NA> 4.803360
## ENSG00000132002.10 HGNC:5270 OTTHUMG00000183289.4 <NA> 6.726978
## ENSG00000087074.10 HGNC:14375 OTTHUMG00000183330.3 <NA> 5.105122
## ENSG00000106211.11 HGNC:5246 OTTHUMG00000023228.4 <NA> 3.657821
## logCPM F PValue FDR
## ENSG00000204389.11 3.444803 265.9188 2.312585e-24 2.758683e-20
## ENSG00000213946.3 3.162449 169.6849 1.083374e-19 5.700082e-16
## ENSG00000204388.7 7.011745 339.4633 1.433502e-19 5.700082e-16
## ENSG00000132002.10 10.519399 300.0207 1.132502e-18 3.377404e-15
## ENSG00000087074.10 6.747094 289.9556 1.760168e-18 4.199410e-15
## ENSG00000106211.11 10.442316 269.4616 5.225708e-18 1.038958e-14
# Get significant results only #
sig_t0 <- res_t0[res_t0$FDR < 0.05 & abs(res_t0$logFC) > 1,] # 0
sig_t4 <- res_t4[res_t4$FDR < 0.05 & abs(res_t4$logFC) > 1,] # 0
sig_t24 <- res_t24[res_t24$FDR < 0.05 & abs(res_t24$logFC) > 1,] # 3416
head(sig_t24)
## source type score phase gene_id
## ENSG00000204389.11 HAVANA gene NA NA ENSG00000204389.11
## ENSG00000213946.3 HAVANA gene NA NA ENSG00000213946.3
## ENSG00000204388.7 HAVANA gene NA NA ENSG00000204388.7
## ENSG00000132002.10 HAVANA gene NA NA ENSG00000132002.10
## ENSG00000087074.10 HAVANA gene NA NA ENSG00000087074.10
## ENSG00000106211.11 HAVANA gene NA NA ENSG00000106211.11
## gene_type gene_name level tag
## ENSG00000204389.11 protein_coding HSPA1A 2 retrogene
## ENSG00000213946.3 processed_pseudogene DNAJB1P1 1 pseudo_consens
## ENSG00000204388.7 protein_coding HSPA1B 2 overlapping_locus
## ENSG00000132002.10 protein_coding DNAJB1 1 <NA>
## ENSG00000087074.10 protein_coding PPP1R15A 2 <NA>
## ENSG00000106211.11 protein_coding HSPB1 2 <NA>
## hgnc_id havana_gene artif_dupl logFC
## ENSG00000204389.11 HGNC:5232 OTTHUMG00000031201.4 <NA> 7.211466
## ENSG00000213946.3 HGNC:24988 OTTHUMG00000154320.1 <NA> 6.585354
## ENSG00000204388.7 HGNC:5233 OTTHUMG00000031202.3 <NA> 4.803360
## ENSG00000132002.10 HGNC:5270 OTTHUMG00000183289.4 <NA> 6.726978
## ENSG00000087074.10 HGNC:14375 OTTHUMG00000183330.3 <NA> 5.105122
## ENSG00000106211.11 HGNC:5246 OTTHUMG00000023228.4 <NA> 3.657821
## logCPM F PValue FDR
## ENSG00000204389.11 3.444803 265.9188 2.312585e-24 2.758683e-20
## ENSG00000213946.3 3.162449 169.6849 1.083374e-19 5.700082e-16
## ENSG00000204388.7 7.011745 339.4633 1.433502e-19 5.700082e-16
## ENSG00000132002.10 10.519399 300.0207 1.132502e-18 3.377404e-15
## ENSG00000087074.10 6.747094 289.9556 1.760168e-18 4.199410e-15
## ENSG00000106211.11 10.442316 269.4616 5.225708e-18 1.038958e-14
#### Plot heatmaps ####
# Split up/down
up_genes <- rownames(res_t24)[res_t24$FDR < 0.05 & res_t24$logFC > 1]
down_genes <- rownames(res_t24)[res_t24$FDR < 0.05 & res_t24$logFC < -1]
# Take top 25 each
up_top <- head(
up_genes[order(-res_t24[up_genes, "logFC"], res_t24[up_genes, "FDR"])],
25
)
down_top <- head(
down_genes[order(res_t24[down_genes, "logFC"], res_t24[down_genes, "FDR"])],
25
)
# Combine
sig_genes <- c(up_top, down_top)
# get expression matrix
heatmap_data <- logcounts[sig_genes, ]
# Scale genes
heatmap_scaled <- t(scale(t(heatmap_data)))
# Add gene names
gene_symbols <- dgList$genes$gene_name[
match(rownames(heatmap_scaled), rownames(dgList))
]
rownames(heatmap_scaled) <- gene_symbols
# # Remove NA gene names
Keep_names <- !is.na(rownames(heatmap_scaled)) & rownames(heatmap_scaled) != ""
heatmap_scaled <- heatmap_scaled[Keep_names, ]
# Remove duplicates (keep first occurrence)
heatmap_scaled <- heatmap_scaled[!duplicated(rownames(heatmap_scaled)), ]
# order contrasts
dgList$samples$Contrasts <- factor(
dgList$samples$Contrasts,
levels = c(
"MOCK_0", "MOCK_4", "MOCK_24",
"MERS_0", "MERS_4", "MERS_24"
)
)
##### All heatmap ####
p_hmap <-pheatmap(
heatmap_scaled,
annotation_col = dgList$samples["Contrasts"],
show_rownames = TRUE,
cluster_rows = TRUE,
cluster_cols = TRUE,
fontsize_row = 6,
fontsize_col = 8,
cutree_rows = 2,
cutree_cols = 2,
scale = "none",
show_colnames = FALSE
)
p_hmap
# heatmap for 24h samples:
# Identify 24h samples
samples_t24 <- dgList$samples$Time == "24"
# Subset expression matrix
heatmap_data_t24 <- logcounts[, samples_t24]
heatmap_data_t24 <- heatmap_data_t24[sig_genes, ]
# scale
heatmap_scaled_t24 <- t(scale(t(heatmap_data_t24)))
#Map gene names
gene_symbols <- dgList$genes$gene_name[
match(rownames(heatmap_scaled_t24), rownames(dgList))
]
# Replace rownames
rownames(heatmap_scaled_t24) <- gene_symbols
##### T24 Heatmap ####
pheatmap(
heatmap_scaled_t24,
annotation_col = dgList$samples['Contrasts'],
annotation_colors = list(
Treatment = c(MERS = "#D62728", MOCK = "grey50")
),
show_rownames = TRUE,
fontsize_row = 6,
cluster_rows = TRUE,
cluster_cols = TRUE,
cutree_rows = 2,
cutree_cols = 2,
)
#### Volcano plots ####
# add a column for plotting
res_t24$negLogFDR <- -log10(res_t24$FDR)
# define significance categories
res_t24$Significance <- "NS" # default
res_t24$Significance[res_t24$FDR < 0.05 & res_t24$logFC > 1] <- "Up"
res_t24$Significance[res_t24$FDR < 0.05 & res_t24$logFC < -1] <- "Down"
# get top ten up and down to label
# Top 10 upregulated (highest logFC)
top_up <- rownames(
res_t24[res_t24$FDR < 0.05 & res_t24$logFC > 1, ][
order(res_t24[res_t24$FDR < 0.05 & res_t24$logFC > 1, "FDR"]),
][1:10, ]
)
# Top 10 downregulated (lowest logFC)
top_down <- rownames(
res_t24[res_t24$FDR < 0.05 & res_t24$logFC < -1, ][
order(res_t24[res_t24$FDR < 0.05 & res_t24$logFC < -1, "FDR"]),
][1:10, ]
)
# Combine
top_genes <- c(top_up, top_down)
##### T24 Volcano ####
# Add helper columns
res_t24$negLogFDR <- -log10(res_t24$FDR)
res_t24$Significance <- "NS"
res_t24$Significance[res_t24$FDR < 0.05 & res_t24$logFC > 1] <- "Up"
res_t24$Significance[res_t24$FDR < 0.05 & res_t24$logFC < -1] <- "Down"
p_Vol<-ggplot(res_t24, aes(x = logFC, y = negLogFDR)) +
# background points
geom_point(color = "grey80", size = 1) +
# highlight significant
geom_point(data = subset(res_t24, Significance != "NS"),
aes(color = Significance),
size = 1.5) +
geom_text_repel(
data = res_t24[rownames(res_t24) %in% top_genes, ],
aes(label = gene_name),
size = 3,
max.overlaps = Inf
) +
scale_color_manual(values = c(
"Up" = "#D62728",
"Down" = "Blue",
"NS" = 'gray'
)) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
labs(
x = expression(Log[2] * " Fold Change"),
y = expression(-log[10] *"(FDR)")
) +
theme_classic()
##### Figure 1 ####
# Convert heatmap to grob
# Extract grob
hmap_grob <- p_hmap$gtable
# combine
Figure1 <- plot_grid(
hmap_grob,
p_Vol,
ncol = 1,
rel_heights = c(3, 1.5),
labels = c('A', 'B')
)
Figure1
#### Read in data and get in correct format ####
cts <- fread(file.path(base_dir, "IsoQuant/OUT/OUT.discovered_transcript_grouped_counts.tsv"))
# first column is mislabelled, so convert to 'isoform_id'
setnames(cts, 1, "isoform_id")
# Inconsistent column headers in counts so fix 'library6' to library_6'
colnames(cts) <- sub("^library6_", "library_6_", colnames(cts))
# Convert to a plain matrix with isoform IDs as rownames
isoform_ids <- cts$isoform_id
cts_mat <- as.matrix(cts[, -1])
rownames(cts_mat) <- isoform_ids
# IsoformSwitchAnalyzeR expects integer counts for count-based tests
cts_mat <- round(cts_mat)
storage.mode(cts_mat) <- "integer"
# Trim whitespace
coldata <- as.data.table(coldata)
coldata[, Treatment := trimws(Treatment)]
# Create custom Group column to double check contrasts column is correct
coldata$Group = paste0(coldata$Treatment,"_", coldata$Time)
# Construct the IsoQuant column name so that the files can be found
coldata[, isoquant_col := paste0("library_", Library, "_", Barcode)]
# Ensure library is a string
coldata$Library <- as.character(coldata$Library)
# Create conditions
condition_1 = c('MOCK_24','MOCK_4','MOCK_0','MERS_0','MERS_0')
condition_2 = c('MERS_24','MERS_4','MERS_0','MERS_24','MERS_4')
# Create covariates
covariates = c('Library')
# Create design table
all_conds <- unique(c(condition_1, condition_2))
design <- data.frame(
sampleID = coldata[coldata[['Group']] %in% all_conds,][['isoquant_col']],
condition = coldata[coldata[['Group']] %in% all_conds,][['Group']],
if (!is.null(covariates)) coldata[coldata[['Group']] %in% all_conds, ..covariates, drop = FALSE],
stringsAsFactors = FALSE
)
# Make set of comparisons
comparisonsToMake <- data.frame(
condition_1 = condition_1,
condition_2 = condition_2,
stringsAsFactors = FALSE
)
# import R data from isoquant
aSwitchList <- importRdata(
isoformCountMatrix = cts_mat,
designMatrix = design,
isoformExonAnnoation = file.path(base_dir, 'IsoQuant/OUT/OUT.transcript_models.gtf'), #By supplying the GTF/GFF file to the “isoformExonAnnoation” argument IsoformSwitchAnalyzeR will automatically also import and integrate CDS regions annotated in the GTF/GFF file as the ORF regions
isoformNtFasta = file.path(base_dir, "IsoQuant/OUT/isoquant_transcript.fa"),
comparisonsToMake = comparisonsToMake,
detectUnwantedEffects = TRUE # gives you corrected data rather than uncorrected in other analysis
)
## Step 1 of 10: Checking data...
## Warning in importRdata(isoformCountMatrix = cts_mat, designMatrix = design, :
## Using row.names as 'isoform_id' for 'isoformCountMatrix'. If not suitable you
## must add them manually.
## Please note that some condition names were changed due to names not suited for modeling in R.
## Step 2 of 10: Obtaining annotation...
## importing GTF (this may take a while)...
## Warning in importRdata(isoformCountMatrix = cts_mat, designMatrix = design, : The annotation and quantification (count/abundance matrix and isoform annotation) Seem to be slightly different.
## Specifically:
## 224 isoforms were only found in the annotation
##
## Please make sure this is on purpouse since differences will cause inaccurate quantification and thereby skew all analysis.
## If you have quantified with Salmon this could be normal since it as default only keep one copy of identical sequnces (can be prevented using the --keepDuplicates option)
## We strongly encurage you to go back and figure out why this is the case.
## Step 3 of 10: Fixing StringTie gene annoation problems...
## 7801 isoforms were assigned the ref_gene_id and gene_name of their associated gene_id.
## This was only done when the parent gene_id were associated with a single ref_gene_id/gene_name.
## 10291 genes_id were assigned their original gene_id instead of the StringTie gene_id.
## This was only done when it could be done unambiguous.
## Step 4 of 10: Calculating expression estimates from count data...
## Step 5 of 10: Testing for unwanted effects...
## Added 5 batch/covariates to the design matrix
## Step 6 of 10: Batch correcting expression estimates...
## Step 7 of 10: Extracting data from each condition...
## | | | 0% | |============ | 17% | |======================= | 33% | |=================================== | 50% | |=============================================== | 67% | |========================================================== | 83% | |======================================================================| 100%
## Step 8 of 10: Making comparisons...
## | | | 0% | |============== | 20% | |============================ | 40% | |========================================== | 60% | |======================================================== | 80% | |======================================================================| 100%
## Step 9 of 10: Making switchAnalyzeRlist object...
## Warning in createSwitchAnalyzeRlist(isoformFeatures = isoAnnot, exons =
## isoformExonStructure, : The gene_ids or isoform_ids were not unique - we
## identified multiple instances of the same gene_id/isoform_id on different
## chromosomes. To solve this we removed 7 gene_id. Please note there might still
## be duplicated gene_id located on the same chromosome. Some of these could be
## due to fusion transcripts which IsoformSwitchAnalyzeR cannot handle.
## Step 10 of 10: Guestimating differential usage...
## The GUESSTIMATED number of genes with differential isoform usage are:
## comparison estimated_genes_with_dtu
## 1 MERS_0 vs MERS_24 0 - 0
## 2 MERS_0 vs MERS_4 0 - 0
## 3 MOCK_0 vs MERS_0 0 - 0
## Done
head(aSwitchList)
## iso_ref gene_ref isoform_id gene_id
## 10772 isoComp_00000003 geneComp_00000002 ENST00000647439.1 A4GALT
## 10773 isoComp_00000004 geneComp_00000002 ENST00000868261.1 A4GALT
## 10774 isoComp_00000005 geneComp_00000002 transcript10629.chr22.nnic A4GALT
## 10775 isoComp_00000006 geneComp_00000002 transcript10646.chr22.nnic A4GALT
## 10776 isoComp_00000007 geneComp_00000002 transcript10656.chr22.nnic A4GALT
## 10777 isoComp_00000008 geneComp_00000002 transcript10657.chr22.nnic A4GALT
## condition_1 condition_2 gene_name gene_biotype
## 10772 MERS_0 MERS_24 A4GALT protein_coding
## 10773 MERS_0 MERS_24 A4GALT protein_coding
## 10774 MERS_0 MERS_24 A4GALT <NA>
## 10775 MERS_0 MERS_24 A4GALT <NA>
## 10776 MERS_0 MERS_24 A4GALT <NA>
## 10777 MERS_0 MERS_24 A4GALT <NA>
## iso_biotype gene_overall_mean gene_value_1
## 10772 protein_coding_CDS_not_defined 44.94325 83.28927
## 10773 protein_coding 44.94325 83.28927
## 10774 <NA> 44.94325 83.28927
## 10775 <NA> 44.94325 83.28927
## 10776 <NA> 44.94325 83.28927
## 10777 <NA> 44.94325 83.28927
## gene_value_2 gene_stderr_1 gene_stderr_2 gene_log2_fold_change
## 10772 20.91268 40.1677 6.695243 -1.993236
## 10773 20.91268 40.1677 6.695243 -1.993236
## 10774 20.91268 40.1677 6.695243 -1.993236
## 10775 20.91268 40.1677 6.695243 -1.993236
## 10776 20.91268 40.1677 6.695243 -1.993236
## 10777 20.91268 40.1677 6.695243 -1.993236
## gene_q_value iso_overall_mean iso_value_1 iso_value_2 iso_stderr_1
## 10772 NA 0.1535124 9.543908e-07 5.853965e-07 9.543908e-07
## 10773 NA 20.0735346 3.863452e+01 8.781112e+00 2.176952e+01
## 10774 NA 1.3236456 6.905757e+00 1.522394e-04 5.597167e+00
## 10775 NA 0.6832861 1.195674e+00 3.875641e-05 1.195645e+00
## 10776 NA 2.8775425 6.649797e+00 9.668967e-05 5.714149e+00
## 10777 NA 0.2999035 6.382510e-01 1.306486e-05 4.709620e-01
## iso_stderr_2 iso_log2_fold_change iso_q_value IF_overall IF1
## 10772 5.853965e-07 -5.323053e-05 NA 0.004938889 0.00000000
## 10773 2.114550e+00 -2.136146e+00 NA 0.535502778 0.46296667
## 10774 5.389927e-05 -9.411945e+00 NA 0.015563889 0.08528333
## 10775 6.894785e-06 -6.908116e+00 NA 0.011688889 0.03595000
## 10776 4.267425e-05 -9.365452e+00 NA 0.035002778 0.07181667
## 10777 8.265638e-06 -6.016597e+00 NA 0.007755556 0.01765000
## IF2 dIF isoform_switch_q_value gene_switch_q_value PTC
## 10772 0.000000e+00 0.0000000 NA NA NA
## 10773 6.284500e-01 0.1654833 NA NA FALSE
## 10774 3.333333e-05 -0.0852500 NA NA NA
## 10775 0.000000e+00 -0.0359500 NA NA NA
## 10776 1.666667e-05 -0.0718000 NA NA NA
## 10777 0.000000e+00 -0.0176500 NA NA NA
# Prefilter - enables removal of non-interesting data such as non-expressed isoforms
# or genes with only one annotated isoform.
aSwitchList<- preFilter(
aSwitchList
# Uses default params:
# geneExpressionCutoff = 1, - keep genes with av. expression > 1
# isoformExpressionCutoff = 0,
# IFcutoff = 0.01 - removes spurious or noise driven isoforms
)
## The filtering removed 13938 ( 37.17% of ) transcripts. There is now 23563 isoforms left
#### Test isoform switching ####
aSwitchListAnalyzed <- isoformSwitchTestSatuRn(
switchAnalyzeRlist = aSwitchList,
reduceToSwitchingGenes=TRUE
)
## Step 1 of 4: Creating SummarizedExperiment data object...
## Step 2 of 4: Fitting linear models...
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
## Step 3 of 4: Testing pairwise comparison(s)...
## Warning in sqrt(vapply(X = models, FUN = varContrast, contrast = contrasts[, :
## NaNs produced
## Warning in sqrt(vapply(X = models, FUN = varContrast, contrast = contrasts[, :
## NaNs produced
## Warning in sqrt(vapply(X = models, FUN = varContrast, contrast = contrasts[, :
## NaNs produced
## Warning in sqrt(vapply(X = models, FUN = varContrast, contrast = contrasts[, :
## NaNs produced
## Warning in sqrt(vapply(X = models, FUN = varContrast, contrast = contrasts[, :
## NaNs produced
## Warning in locfdr(zz = zvalues_mid, main = paste0("diagplot 1: ", main)): f(z)
## misfit = 1.5. Rerun with increased df
##
## Step 4 of 4: Preparing output...
## Result added switchAnalyzeRlist
## Done
#### Isoform Switch analysis: extract the sequences for external analysis and perform Isoform Switch analysis ####
aSwitchListAnalyzed <- extractSequence(
aSwitchListAnalyzed,
pathToOutput = file.path(base_dir,'IsoQuant/ISAR/Output'),
removeLongAAseq=TRUE,
alsoSplitFastaFile=TRUE,
writeToFile=TRUE # to avoid output when running this example data
)
## Step 1 of 3 : Extracting transcript nucleotide sequences...
## Step 2 of 3 : Extracting ORF AA sequences...
## Warning in extractSequence(aSwitchListAnalyzed, pathToOutput =
## file.path(base_dir, : There were 3 isoforms where the amino acid sequence had a
## stop codon before the annotated stop codon. These was removed.
## Step 3 of 3 : Preparing output...
## The 'removeLongAAseq' and 'removeShortAAseq' arguments:
## Removed : 0 isoforms.
## Trimmed : 2 isoforms (to only contain the first 1000 AA)
## The 'alsoSplitFastaFile' caused 1 fasta files, each with a subset of the data, to be created (each named X of Y).
## Done
#### External analysis ####
### Add CPC2 analysis- adds coding potential to 627 (100% of transcripts)
aSwitchListAnalyzed <- analyzeCPC2(
switchAnalyzeRlist = aSwitchListAnalyzed,
pathToCPC2resultFile = file.path(base_dir, "IsoQuant/ISAR/extdata/result_cpc2.txt"),
removeNoncodinORFs = FALSE # because ORF was not predicted de novo (used isoquant and gtf file)
)
## Added coding potential to 627 (100%) transcripts
### Add PFAM analysis # added domain information to 127 (20.26% transcripts)
aSwitchListAnalyzed <- analyzePFAM(
switchAnalyzeRlist = aSwitchListAnalyzed,
pathToPFAMresultFile = file.path(base_dir, "IsoQuant/ISAR/extdata/pfam_scan.out"),
showProgress=TRUE
)
## Converting AA coordinats to transcript and genomic coordinats...
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## Added domain information to 127 (20.26%) transcripts
### Add SignalP analysis - added signal peptide information to 9 (1.44% transcripts)
aSwitchListAnalyzed <- analyzeSignalP(
switchAnalyzeRlist = aSwitchListAnalyzed,
pathToSignalPresultFile = file.path(base_dir, "IsoQuant/ISAR/extdata/output_protein_type.txt")
)
## Added signal peptide information to 9 (1.44%) transcripts
### Add IUPred2A analysis added IDR information to 63 (10.05% transcripts)
aSwitchListAnalyzed <- analyzeIUPred2A(
switchAnalyzeRlist = aSwitchListAnalyzed,
pathToIUPred2AresultFile = file.path(base_dir, "IsoQuant/ISAR/extdata/isoformSwitchAnalyzeR_isoform_AA_complete.result"),
showProgress = TRUE
)
## Step 1 of 4 : Reading results into R...
## Step 2 of 4 : Extracting regions of interest...
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## Step 3 of 4 : Integrating IDR with binding site predictions...
## Step 4 of 4 : Converting AA coordinats to transcript and genomic coordinats...
## | | | 0% | |= | 2% | |== | 3% | |=== | 5% | |==== | 6% | |====== | 8% | |======= | 10% | |======== | 11% | |========= | 13% | |========== | 14% | |=========== | 16% | |============ | 17% | |============= | 19% | |============== | 21% | |================ | 22% | |================= | 24% | |================== | 25% | |=================== | 27% | |==================== | 29% | |===================== | 30% | |====================== | 32% | |======================= | 33% | |======================== | 35% | |========================== | 37% | |=========================== | 38% | |============================ | 40% | |============================= | 41% | |============================== | 43% | |=============================== | 44% | |================================ | 46% | |================================= | 48% | |================================== | 49% | |==================================== | 51% | |===================================== | 52% | |====================================== | 54% | |======================================= | 56% | |======================================== | 57% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 62% | |============================================ | 63% | |============================================== | 65% | |=============================================== | 67% | |================================================ | 68% | |================================================= | 70% | |================================================== | 71% | |=================================================== | 73% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 78% | |======================================================== | 79% | |========================================================= | 81% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 92% | |================================================================== | 94% | |=================================================================== | 95% | |==================================================================== | 97% | |===================================================================== | 98% | |======================================================================| 100%
## Added IDR information to 63 (10.05%) transcripts
### Add DeepLoc2 analysis - added subcellular information to 145 (23.13% transcripts)
aSwitchListAnalyzed <- analyzeDeepLoc2(
switchAnalyzeRlist = aSwitchListAnalyzed,
pathToDeepLoc2resultFile = c(
file.path(base_dir, "IsoQuant/ISAR/extdata/results_69BBE44700254542643381C9.csv"),
file.path(base_dir, "IsoQuant/ISAR/extdata/results_69BBE6C100254AB25890A253.csv")
),
quiet = FALSE
)
## Added subcellular information to 145 (23.13%) transcripts
### Add DeepTMHMM analysis - added topology information to 147 transcripts (23.44%)
aSwitchListAnalyzed <- analyzeDeepTMHMM(
switchAnalyzeRlist = aSwitchListAnalyzed,
pathToDeepTMHMMresultFile = file.path(base_dir, "IsoQuant/ISAR/extdata/TMRs.gff3"),
showProgress=FALSE
)
## Step 1 of 2: Reading results into R...
## Step 2 of 2: Converting AA coordinats to transcript and genomic coordinats...
## Added topology information to 147 transcripts (23.44%).
# Get summary table
summary_df <- extractSwitchSummary(aSwitchListAnalyzed)
summary_df
## Comparison nrIsoforms nrSwitches nrGenes
## 1 MERS_0 vs MERS_24 114 110 84
## 2 MERS_0 vs MERS_4 47 43 39
## 3 MOCK_0 vs MERS_0 55 53 43
## 4 MOCK_24 vs MERS_24 84 73 59
## 5 MOCK_4 vs MERS_4 47 38 37
## 6 Combined 255 258 176
# Get significant switching genes regardless of if they have consequences or not
# Don't worry about 0 q values - R just rounds down very small numbers
top_switches <- extractTopSwitches(
aSwitchListAnalyzed,
filterForConsequences = FALSE,
n = Inf
)
#### Add alternative splicing analysis
aSwitchListAnalyzed <- analyzeAlternativeSplicing(
switchAnalyzeRlist = aSwitchListAnalyzed,
quiet=TRUE
)
#### Predicting Switch Consequences ############
consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status', 'ORF_seq_similarity', 'sub_cell_location', 'signal_peptide_identified', 'IDR_identified', 'IDR_type')
switch_consequences <- analyzeSwitchConsequences(
aSwitchListAnalyzed,
consequencesToAnalyze = consequencesOfInterest,
alpha = 0.05,
dIFcutoff = 0.1,
onlySigIsoforms=FALSE,
ntCutoff=50,
ntFracCutoff=NULL,
ntJCsimCutoff=0.8,
AaCutoff=10,
AaFracCutoff=0.8,
AaJCsimCutoff=0.9,
removeNonConseqSwitches=TRUE,
showProgress=TRUE,
quiet=FALSE
)
## Step 1 of 4: Extracting genes with isoform switches...
## Step 2 of 4: Analyzing 258 pairwise isoforms comparisons...
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
## Step 3 of 4: Massaging isoforms comparisons results...
## Step 4 of 4: Preparing output...
## Identified genes with containing isoforms switching with functional consequences...
#### 24h Subset ########
switch_cons_24 <- subsetSwitchAnalyzeRlist(
switch_consequences,
(switch_consequences$isoformFeatures$condition_1 == 'MOCK_24' &
switch_consequences$isoformFeatures$condition_2 == 'MERS_24')
)
# 4h subset
switch_cons_4 <- subsetSwitchAnalyzeRlist(
switch_consequences,
(switch_consequences$isoformFeatures$condition_1 == 'MOCK_4' &
switch_consequences$isoformFeatures$condition_2 == 'MERS_4')
)
### Extract data.frame with all switching isoforms in MERS 24 hr # 84 including non-sig
switchingIso24 <- extractTopSwitches(
switch_cons_24,
filterForConsequences = FALSE,
n = NA, # n=NA: all features are returned
extractGenes = FALSE, # when FALSE isoforms are returned
sortByQvals = TRUE # can change to false if want to sort by dIF values
)
## Warning in .fun(piece, ...): Less than Inf genes genes with significant
## switches were found. Returning those.
# Extract IF matrix
IF_matrix <- switch_consequences$isoformRepIF # 627 of 37 variables
# make a vector of isoform ids
isoform_ids <- IF_matrix$isoform_id
# remove isoform id column as next step cant use it
IF_matrix_num <- IF_matrix[ , !(colnames(IF_matrix) %in% "isoform_id")]
# make all numeric
IF_matrix_num <- data.matrix(IF_matrix_num)
# change rownames to isoform ids
rownames(IF_matrix_num) <- isoform_ids
# Match gene IDs to these isoforms
gene_map <- switch_consequences$isoformFeatures[ # 627
match(isoform_ids,
switch_consequences$isoformFeatures$isoform_id),
]
gene_ids <- gene_map$gene_id # 627
# Now find multi-isoform genes
multi_iso_genes <- names(table(gene_ids)[table(gene_ids) > 1]) # 176
keep_ids <- gene_ids %in% multi_iso_genes # 627
# Now lengths match
IF_matrix_num <- IF_matrix_num[keep_ids,] # 627
# Create timepoint and treatment variables
design$timepoint <- sub(".*_", "", design$condition)
design$treatment <- sub("_.*", "", design$condition)
# Calculate switch burden
switch_burden <- sapply(design$sampleID, function(sample){
sample_time <- design$timepoint[design$sampleID == sample]
matched_mock <- design$sampleID[
design$treatment == "MOCK" &
design$timepoint == sample_time
]
mock_mean <- rowMeans(
IF_matrix_num[, matched_mock, drop=FALSE],
na.rm=TRUE
)
sample_IF <- IF_matrix_num[,sample]
mean(abs(sample_IF - mock_mean), na.rm=TRUE)
})
### Make results dataframe (plot ready)
switch_burden_df <- data.frame(
sampleID = names(switch_burden),
switch_burden = switch_burden,
condition = design$condition[
match(names(switch_burden),
design$sampleID)
],
treatment = design$treatment[
match(names(switch_burden),
design$sampleID)
],
timepoint = design$timepoint[
match(names(switch_burden),
design$sampleID)
],
batch = design$Library[
match(names(switch_burden),
design$sampleID)
]
)
# make timepoint a factor so group correctly
switch_burden_df$timepoint <- factor(
switch_burden_df$timepoint,
levels=c("0","4","24")
)
# make condition a factor so group correctly
switch_burden_df$condition <- factor(
switch_burden_df$condition,
levels=c('MOCK_0', 'MERS_0', 'MOCK_4', 'MERS_4', 'MOCK_24', 'MERS_24')
)
switch_burden_df
## sampleID switch_burden condition treatment
## library_1_barcode01 library_1_barcode01 0.2131618 MERS_24 MERS
## library_1_barcode02 library_1_barcode02 0.1412752 MOCK_4 MOCK
## library_1_barcode03 library_1_barcode03 0.1332722 MOCK_0 MOCK
## library_1_barcode04 library_1_barcode04 0.1547297 MERS_0 MERS
## library_1_barcode05 library_1_barcode05 0.1409706 MOCK_24 MOCK
## library_1_barcode06 library_1_barcode06 0.1463495 MERS_0 MERS
## library_2_barcode07 library_2_barcode07 0.1186577 MOCK_0 MOCK
## library_2_barcode08 library_2_barcode08 0.1294693 MOCK_4 MOCK
## library_2_barcode09 library_2_barcode09 0.1424797 MOCK_4 MOCK
## library_2_barcode10 library_2_barcode10 0.1181119 MOCK_24 MOCK
## library_2_barcode11 library_2_barcode11 0.2314508 MERS_24 MERS
## library_2_barcode12 library_2_barcode12 0.1239082 MOCK_0 MOCK
## library_3_barcode09 library_3_barcode09 0.1267246 MOCK_24 MOCK
## library_3_barcode13 library_3_barcode13 0.1186271 MOCK_24 MOCK
## library_3_barcode14 library_3_barcode14 0.1167244 MOCK_4 MOCK
## library_3_barcode15 library_3_barcode15 0.1489548 MERS_0 MERS
## library_3_barcode16 library_3_barcode16 0.1315673 MOCK_4 MOCK
## library_3_barcode18 library_3_barcode18 0.1546273 MERS_4 MERS
## library_4_barcode19 library_4_barcode19 0.1754251 MERS_4 MERS
## library_4_barcode20 library_4_barcode20 0.1745196 MERS_4 MERS
## library_4_barcode21 library_4_barcode21 0.1182630 MOCK_24 MOCK
## library_4_barcode22 library_4_barcode22 0.1295101 MOCK_24 MOCK
## library_4_barcode23 library_4_barcode23 0.1419405 MOCK_0 MOCK
## library_4_barcode24 library_4_barcode24 0.1854326 MERS_24 MERS
## library_5_barcode01 library_5_barcode01 0.1344602 MOCK_4 MOCK
## library_5_barcode02 library_5_barcode02 0.1502007 MERS_0 MERS
## library_5_barcode03 library_5_barcode03 0.1678731 MERS_0 MERS
## library_5_barcode04 library_5_barcode04 0.1776802 MERS_24 MERS
## library_5_barcode05 library_5_barcode05 0.1782428 MERS_24 MERS
## library_5_barcode06 library_5_barcode06 0.1423066 MOCK_0 MOCK
## library_6_barcode07 library_6_barcode07 0.1542011 MOCK_0 MOCK
## library_6_barcode08 library_6_barcode08 0.1982791 MERS_24 MERS
## library_6_barcode10 library_6_barcode10 0.1462322 MERS_4 MERS
## library_6_barcode11 library_6_barcode11 0.1823359 MERS_0 MERS
## library_6_barcode12 library_6_barcode12 0.1456558 MERS_4 MERS
## library_6_barcode17 library_6_barcode17 0.1766053 MERS_4 MERS
## timepoint batch
## library_1_barcode01 24 1
## library_1_barcode02 4 1
## library_1_barcode03 0 1
## library_1_barcode04 0 1
## library_1_barcode05 24 1
## library_1_barcode06 0 1
## library_2_barcode07 0 2
## library_2_barcode08 4 2
## library_2_barcode09 4 2
## library_2_barcode10 24 2
## library_2_barcode11 24 2
## library_2_barcode12 0 2
## library_3_barcode09 24 3
## library_3_barcode13 24 3
## library_3_barcode14 4 3
## library_3_barcode15 0 3
## library_3_barcode16 4 3
## library_3_barcode18 4 3
## library_4_barcode19 4 4
## library_4_barcode20 4 4
## library_4_barcode21 24 4
## library_4_barcode22 24 4
## library_4_barcode23 0 4
## library_4_barcode24 24 4
## library_5_barcode01 4 5
## library_5_barcode02 0 5
## library_5_barcode03 0 5
## library_5_barcode04 24 5
## library_5_barcode05 24 5
## library_5_barcode06 0 5
## library_6_barcode07 0 6
## library_6_barcode08 24 6
## library_6_barcode10 4 6
## library_6_barcode11 0 6
## library_6_barcode12 4 6
## library_6_barcode17 4 6
# 1. Run ANOVA separately to see results
res_aov <- aov(switch_burden ~ condition, data = switch_burden_df)
shapiro.test(residuals(res_aov)) # P-value > 0.05: Data is likely normal (Parametric is okay).
##
## Shapiro-Wilk normality test
##
## data: residuals(res_aov)
## W = 0.96357, p-value = 0.2763
tukey_hsd(res_aov) # sig at 4hrs and 24hrs
## # A tibble: 15 × 9
## term group1 group2 null.value estimate conf.low conf.high p.adj
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 condition MOCK_0 MERS_0 0 0.0227 -0.00238 0.0478 0.094
## 2 condition MOCK_0 MOCK_4 0 -0.00305 -0.0281 0.0220 0.999
## 3 condition MOCK_0 MERS_4 0 0.0265 0.00139 0.0515 0.0339
## 4 condition MOCK_0 MOCK_24 0 -0.0103 -0.0354 0.0147 0.806
## 5 condition MOCK_0 MERS_24 0 0.0617 0.0366 0.0867 0.00000035
## 6 condition MERS_0 MOCK_4 0 -0.0257 -0.0508 -0.000671 0.0415
## 7 condition MERS_0 MERS_4 0 0.00377 -0.0213 0.0288 0.997
## 8 condition MERS_0 MOCK_24 0 -0.0330 -0.0581 -0.00797 0.00458
## 9 condition MERS_0 MERS_24 0 0.0390 0.0139 0.0640 0.000659
## 10 condition MOCK_4 MERS_4 0 0.0295 0.00444 0.0546 0.0138
## 11 condition MOCK_4 MOCK_24 0 -0.00729 -0.0324 0.0178 0.947
## 12 condition MOCK_4 MERS_24 0 0.0647 0.0396 0.0898 0.000000133
## 13 condition MERS_4 MOCK_24 0 -0.0368 -0.0619 -0.0117 0.00135
## 14 condition MERS_4 MERS_24 0 0.0352 0.0101 0.0603 0.00228
## 15 condition MOCK_24 MERS_24 0 0.0720 0.0469 0.0971 0.000000014
## # ℹ 1 more variable: p.adj.signif <chr>
# Run Tukey and Position on the data frame directly
tukey_res <- switch_burden_df %>%
tukey_hsd(switch_burden ~ condition) %>%
add_xy_position(x = "condition", step.increase = 0.01)
tukey_res <- tukey_res %>%
filter(group1=="MOCK_0" & group2=="MERS_0" |
group1=="MOCK_4" & group2=="MERS_4" |
group1=="MOCK_24" & group2=="MERS_24")
tukey_res
## # A tibble: 3 × 13
## term group1 group2 null.value estimate conf.low conf.high p.adj
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 condition MOCK_0 MERS_0 0 0.0227 -0.00238 0.0478 0.094
## 2 condition MOCK_4 MERS_4 0 0.0295 0.00444 0.0546 0.0138
## 3 condition MOCK_24 MERS_24 0 0.0720 0.0469 0.0971 0.000000014
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## # groups <named list>, xmin <dbl>, xmax <dbl>
#### Plot switch burden vs condition ####
p2A <- ggplot(switch_burden_df,
aes(condition, switch_burden, fill = treatment)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.12, size = 2) +
stat_pvalue_manual(tukey_res,
label = "p.adj.signif",
hide.ns = TRUE) +
theme_classic(base_size = 12) +
labs(
x = "Condition",
y = "Isoform switching burden"
)
p2A
#### Volcano plot of isoform switching across time ####
switch_consequences$isoformFeatures$condition_2 <- factor(
switch_consequences$isoformFeatures$condition_2,
levels = c("MERS_0", "MERS_4", "MERS_24")
)
p2B <- ggplot(
data = switch_consequences$isoformFeatures,
aes(x = dIF, y = -log10(isoform_switch_q_value))
) +
geom_point(
aes(color = ifelse(
is.na(dIF) | is.na(isoform_switch_q_value) | is.nan(isoform_switch_q_value),
"Not significant",
ifelse(abs(dIF) > 0.1 & isoform_switch_q_value < 0.05,
"Significant", "Not significant")
)),
size = 1
) +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed') +
geom_vline(xintercept = c(-0.1, 0.1), linetype = 'dashed') +
facet_wrap(~ condition_2) +
scale_color_manual(
'Isoform Switch',
values = c('black', 'red')
) +
labs(x = 'dIF', y = expression(-Log[10] ~ '(Switch Q Value) - capped')) +
theme_bw() +
theme(panel.spacing = unit(1, "cm"))
p2B
## Warning: Removed 198 rows containing missing values or values outside the scale range
## (`geom_point()`).
#### Example Switchplot - top correlated between RNA and protein
png("pC_switchplot.png", width = 2000, height = 1600, res = 300)
switchPlot(
switch_consequences,
gene = "DNAJB1",
condition1 = "MOCK_24",
condition2 = "MERS_24"
)
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the IsoformSwitchAnalyzeR package.
## Please report the issue at
## <https://github.com/kvittingseerup/IsoformSwitchAnalyzeR/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## ℹ The deprecated feature was likely used in the IsoformSwitchAnalyzeR package.
## Please report the issue at
## <https://github.com/kvittingseerup/IsoformSwitchAnalyzeR/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
dev.off()
## png
## 2
p2C <- rasterGrob(readPNG("pC_switchplot.png"), interpolate = TRUE)
p2C <- ggdraw() + draw_grob(p2C)
# Just the main figure and isoform usage together
# Generate individual plots
p_main <- switchPlotTranscript(switch_cons_24, gene = "DNAJB1")
p_iso <- switchPlotIsoUsage(
switch_cons_24,
gene = "DNAJB1",
condition1 = "MOCK_24",
condition2 = "MERS_24"
)
# Combine into one figure (stacked vertically)
p2C <- plot_grid(
p_main,
ggdraw() +
draw_plot(p_iso, x = 0.25, width = 0.5),
ncol = 1,
rel_heights = c(0.7, 1.3)
)
p2C
#### Switch consequences at 24h ####
p2D <- extractConsequenceSummary(
switch_cons_24,
removeEmptyConsequences = TRUE
)
p2D
# #### Switch consequences at 0, 4 and 24h only ####
p2D <- extractConsequenceSummary(switch_consequences, removeEmptyConsequences = TRUE)
keep_plot <- c(
"MOCK_0 vs MERS_0",
"MOCK_4 vs MERS_4",
"MOCK_24 vs MERS_24"
)
p2D$data <- p2D$data %>%
dplyr::filter(Comparison %in% keep_plot) %>%
dplyr::mutate(
Comparison = factor(
Comparison,
levels = c(
"MOCK_0 vs MERS_0",
"MOCK_4 vs MERS_4",
"MOCK_24 vs MERS_24"
)
)
)
p2D$data <- p2D$data %>%
dplyr::mutate(
plotComparison = factor(
plotComparison,
levels = c(
"MOCK_0\nvs\nMERS_0",
"MOCK_4\nvs\nMERS_4",
"MOCK_24\nvs\nMERS_24"
)
)
)
p2D
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
##### Figure 2 ####
Figure2 <- ggdraw() +
# 🔹 Top row (smaller panels)
draw_plot(p2A, 0, 0.65, 0.5, 0.35) +
draw_plot(p2B, 0.5, 0.65, 0.5, 0.35) +
# 🔹 Bottom row (larger panels)
draw_plot(p2C, 0, 0, 0.5, 0.65) +
draw_plot(p2D, 0.5, 0, 0.5, 0.65) +
# 🔹 Labels
draw_plot_label(
label = c("A", "B", "C", "D"),
x = c(0.015, 0.46, 0.015, 0.46),
y = c(0.98, 0.98, 0.70, 0.70),
size = 16,
fontface = "bold"
)
## Warning: Removed 198 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
Figure2
A - Sample level isoform counts B - Distribution of isoform expression C - dIF distribution
# make isoform expression matrix
expr_mat <- switch_consequences$isoformRepExpression
# Convert to numeric matrix (remove isoform_id column)
expr_mat_num <- expr_mat[, !(colnames(expr_mat) %in% "isoform_id")]
expr_mat_num <- data.matrix(expr_mat_num)
# Set isoform IDs as rownames
rownames(expr_mat_num) <- expr_mat$isoform_id
# count detected isoforms per samples
iso_counts_df <- data.frame(
sample = colnames(expr_mat_num),
n_isoforms = colSums(expr_mat_num > 0)
)
# add metadata from design table
meta <- design %>%
dplyr::select(sampleID, condition) %>%
rename(sample = sampleID)
iso_counts_df <- iso_counts_df %>%
left_join(meta, by = "sample")
# split into treatment + timepoint
iso_counts_df <- iso_counts_df %>%
mutate(
treatment = sub("_.*", "", condition),
timepoint = sub(".*_", "", condition)
)
# fix ordering
iso_counts_df$condition <- factor(
iso_counts_df$condition,
levels = c("MOCK_0", "MOCK_4", "MOCK_24",
"MERS_0", "MERS_4", "MERS_24")
)
# plot
pS2A <- ggplot(iso_counts_df,
aes(x = condition,
y = n_isoforms,
fill = treatment)) +
geom_boxplot(width = 0.6, outlier.shape = NA, alpha = 0.8) +
geom_jitter(width = 0.15, size = 1.8, alpha = 0.6) +
scale_fill_manual(values = c(
"MOCK" = "grey60",
"MERS" = "#D62728"
)) +
theme_classic(base_size = 10) +
theme(
axis.text.x = element_text(angle = 30, hjust = 1),
legend.position = "top"
) +
labs(
x = "Condition",
y = "Number of detected isoforms",
fill = "Treatment"
)
pS2A
#### Distribution of isoform expression - PANEL B ####
# Convert isoform expression matrix to long format
expr_long <- as.data.frame(expr_mat_num) %>%
tibble::rownames_to_column("isoform_id") %>%
pivot_longer(
cols = -isoform_id,
names_to = "sample",
values_to = "expression"
)
# add metadata
expr_long <- expr_long %>%
left_join(design, by = c("sample" = "sampleID"))
# log transform
expr_long <- expr_long %>%
mutate(log_expr = log2(expression + 1))
expr_long$Treatment <- expr_long$treatment
expr_long$condition <- factor(
expr_long$condition,
levels = c("MOCK_0", "MOCK_4", "MOCK_24",
"MERS_0", "MERS_4", "MERS_24")
)
# Boxplots per sample
pS2B <- ggplot(expr_long,
aes(x = condition, y = log_expr, fill = Treatment)) +
geom_boxplot(outlier.size = 0.3, alpha = 0.8) +
scale_fill_manual(values = c(
"MOCK" = "grey60",
"MERS" = "#D62728"
)) +
#geom_jitter(width = 0.1, size = 0.4, alpha = 0.2) +
theme_classic(base_size = 10) +
theme(
axis.text.x = element_text(angle = 30, hjust = 1),
legend.position = "top"
) +
labs(
x = "Condition",
y = "log2(Expression + 1)"
)
pS2B
#### Density plot of Isoform fractions across time- PANEL C ####
# extract dIF
dIF_df <- switch_consequences$isoformFeatures
# remove NA values
dIF_df <- dIF_df %>%
filter(!is.na(dIF))
# Plot
pS2C <- ggplot(dIF_df,
aes(x = dIF)) +
geom_density(fill = "grey60", alpha = 0.7) +
geom_vline(xintercept = c(-0.1, 0.1),
linetype = "dashed",
linewidth = 0.6,
color = "#D62728") +
#annotate("text",
# x = 0.1,
# y = Inf,
# label = "|ΔIF| = 0.1",
# vjust = 1.5,
# hjust = -0.1,
# size = 3) +
facet_wrap(~ condition_2) +
theme_classic(base_size = 10) +
theme(
panel.spacing = unit(1, "lines") ) +
labs(
x = "Δ Isoform Fraction (dIF)",
y = "Density"
)
pS2C
##### Figure S2 ####
Fig_S2 <- plot_grid(
pS2A,
pS2B,
pS2C,
ncol = 1,
labels = c("A", "B", "C"),
label_size = 16,
rel_heights = c(1, 1.2, 1),
label_x = 0.01,
label_y = 1.02
)
Fig_S2
# store gene_ids in new object (take just one, not duplicates)
sig_isoforms_MERS24 <- unique(switchingIso24$gene_id) # 59
# Run clusterprofiler
go_enrichment_ALL <- enrichGO(
gene = sig_isoforms_MERS24,
OrgDb = org.Hs.eg.db,
keyType = 'SYMBOL',
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.05
)
# Make dataframe
GO_iso_results_df <- as.data.frame(go_enrichment_ALL)
head(GO_iso_results_df)
## ONTOLOGY ID
## GO:0002181 BP GO:0002181
## GO:0072655 BP GO:0072655
## GO:0070585 BP GO:0070585
## GO:0070303 BP GO:0070303
## GO:0072594 BP GO:0072594
## GO:0045039 BP GO:0045039
## Description
## GO:0002181 cytoplasmic translation
## GO:0072655 establishment of protein localization to mitochondrion
## GO:0070585 protein localization to mitochondrion
## GO:0070303 negative regulation of stress-activated protein kinase signaling cascade
## GO:0072594 establishment of protein localization to organelle
## GO:0045039 protein insertion into mitochondrial inner membrane
## GeneRatio BgRatio RichFactor FoldEnrichment zScore pvalue
## GO:0002181 5/38 165/18986 0.03030303 15.140351 8.169514 1.856958e-05
## GO:0072655 4/38 125/18986 0.03200000 15.988211 7.529009 1.111392e-04
## GO:0070585 4/38 133/18986 0.03007519 15.026514 7.269436 1.412342e-04
## GO:0070303 2/38 10/18986 0.20000000 99.926316 14.012800 1.737643e-04
## GO:0072594 6/38 462/18986 0.01298701 6.488722 5.348601 2.865064e-04
## GO:0045039 2/38 13/18986 0.15384615 76.866397 12.253737 3.000517e-04
## p.adjust qvalue geneID Count
## GO:0002181 0.01283158 0.01034032 RPL21/RPL31/RPL34/RPL10A/RPS9 5
## GO:0072655 0.03001778 0.02418981 TIMM8B/DDIT3/PITRM1/TIMM10 4
## GO:0070585 0.03001778 0.02418981 TIMM8B/DDIT3/PITRM1/TIMM10 4
## GO:0070303 0.03001778 0.02418981 PPIA/GSTP1 2
## GO:0072594 0.03455596 0.02784691 SNF8/TIMM8B/DDIT3/PEX13/PITRM1/TIMM10 6
## GO:0045039 0.03455596 0.02784691 TIMM8B/TIMM10 2
# GO enrichment BP only for figure
go_enrichment_BP <- enrichGO(
gene = sig_isoforms_MERS24,
OrgDb = org.Hs.eg.db,
keyType = 'SYMBOL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05
)
# Make dataframe
GO_iso_resultsBP_df <- as.data.frame(go_enrichment_BP)
# Plot the results
dotplot(go_enrichment_BP)
dotplot(go_enrichment_ALL)
# Panel A - GO enrichment
p3A <- dotplot(go_enrichment_BP, showCategory = 8) +
theme_classic(base_size = 8) +
theme(
legend.position = "bottom"
) +
theme(
plot.title = element_text(size = 10),
axis.title = element_text(size = 8),
axis.text.y = element_text(size = 5),
axis.text.x = element_text(size = 8)
)
p3A
#Add metadata to IF_iso_long (list of IFs for every isoform in every sample)
IF_iso_long <- IF_iso_long %>%
left_join(design, by = "sampleID")
#Average Replicates per condition (treatment and time)
IF_avg <- IF_iso_long %>% # 3762 x 4
group_by(isoform_id, treatment, timepoint) %>%
summarise(mean_IF = mean(IF, na.rm = TRUE), .groups = "drop")
# dIF vs mock baseline
# Compute delta vs matched MOCK # 1881 x 5
IF_delta <- IF_avg %>%
dplyr::select(isoform_id, treatment, timepoint, mean_IF) %>%
tidyr::pivot_wider(
names_from = treatment,
values_from = mean_IF
) %>%
dplyr::mutate(delta_IF = MERS - MOCK)
# create matrix
delta_mat <- IF_delta %>% # [1:627, 1:3]
dplyr::select(isoform_id, timepoint, delta_IF) %>%
dplyr::group_by(isoform_id) %>%
tidyr::pivot_wider(
names_from = timepoint,
values_from = delta_IF
) %>%
dplyr::ungroup() %>%
tibble::column_to_rownames("isoform_id") %>%
as.matrix()
# remove incomplete rows
delta_mat <- delta_mat[complete.cases(delta_mat), ] # didnt remove anything
delta_mat <- delta_mat[, c("0", "4", "24")]
# scale
delta_mat_scaled <- t(scale(t(delta_mat)))
# Then cluster delta_IF trajectories instead of raw IF
set.seed(123)
# decide on number of clusters using elbow analysis
wss <- sapply(2:8, function(k){
kmeans(delta_mat_scaled, centers=k, nstart=10)$tot.withinss
})
# make elbow df
elbow_df <- data.frame(
k = 2:8,
wss = wss
)
# elbow plot - pick 5?
pS3A <- ggplot(elbow_df, aes(x = k, y = wss)) +
geom_line(color = "black") +
geom_point(size = 2) +
theme_classic(base_size = 10) +
labs(
x = "Number of clusters (k)",
y = "Total within-cluster sum of squares"
)
pS3A <- pS3A +
geom_vline(xintercept = 5,
linetype = "dashed",
color = "#D62728") +
annotate("text",
x = 5,
y = max(wss),
label = "k = 5",
vjust = -0.5,
size = 3)
pS3A
# Back it up with another analysis, silouette analysis
# set number of clusters
km <- kmeans(delta_mat_scaled, centers = 5) #can change number of clusters
#calculate silouette scores using euclidean distances
sil <- silhouette(km$cluster, dist(delta_mat_scaled))
# make df of sil widths
sil_df <- data.frame(
silhouette_width = sil[, 3]
)
# calculate scores for each k value between 2 and 8
sil_widths <- sapply(2:8, function(k) {
km_tmp <- kmeans(delta_mat_scaled, centers = k, nstart = 10)
sil_tmp <- silhouette(km_tmp$cluster, dist(delta_mat_scaled))
mean(sil_tmp[, 3])
})
# make df of results of sil_widths
sil_df2 <- data.frame(
k = 2:8,
silhouette = sil_widths
)
#plot
pS3B <- ggplot(sil_df2, aes(x = k, y = silhouette)) +
geom_line(color = "black") +
geom_point(size = 2) +
geom_vline(xintercept = 5,
linetype = "dashed",
color = "#D62728") +
annotate("text",
x = 5,
y = max(sil_df2$silhouette),
label = "k = 5",
vjust = -0.5,
size = 3) +
theme_classic(base_size = 10) +
labs(
x = "Number of clusters (k)",
y = "Average silhouette width"
)
pS3B
##### Figure S3 ####
Fig_S3 <- plot_grid(
pS3A,
pS3B,
ncol = 1,
labels = c("A", "B"),
label_size = 16,
rel_heights = c(1, 1)
)
Fig_S3
# Add cluster assignment
clusters <- km$cluster
# add clusters to df
delta_df <- as.data.frame(delta_mat_scaled) %>% # 627 x 5
tibble::rownames_to_column("isoform_id") %>%
dplyr::mutate(cluster = clusters)
# Convert to long format # 1881 x 4
delta_long <- delta_df %>%
tidyr::pivot_longer(
cols = -c(isoform_id, cluster),
names_to = "timepoint",
values_to = "delta_IF"
)
# Ensure correct order
delta_long$timepoint <- factor(delta_long$timepoint, levels = c("0", "4", "24"))
#### Trajectory Plot - each plot (1-4) is a cluster. Grey lines are each isoform (noise) amd red line is the mean IF ####
p4A <- ggplot(delta_long,
aes(x = timepoint, y = delta_IF)) +
geom_line(aes(group = isoform_id), alpha = 0.1, color = "grey50") +
stat_summary(
aes(group = 1),
fun = mean,
geom = "line",
color = "red",
linewidth = 1.2
) +
facet_wrap(~ cluster) +
theme_classic() +
labs(
x = "Timepoint",
y = "Δ Isoform usage",
title = "Isoform switching trajectories"
)
p4A
#### Gene/pathway enrichment per cluster - biological insight ####
cluster_gene_map <- delta_df %>%
left_join(
switch_consequences$isoformFeatures[, c("isoform_id","gene_id")],
by = "isoform_id"
)
# extract genes:
cluster1_genes <- cluster_gene_map %>% # 109
dplyr::filter(cluster == 1) %>%
dplyr::pull(gene_id) %>%
unique()
cluster2_genes <- cluster_gene_map %>% #80
dplyr::filter(cluster == 2) %>%
dplyr::pull(gene_id) %>%
unique()
cluster3_genes <- cluster_gene_map %>% # 116
dplyr::filter(cluster == 3) %>%
dplyr::pull(gene_id) %>%
unique()
cluster4_genes <- cluster_gene_map %>% # 85
dplyr::filter(cluster == 4) %>%
dplyr::pull(gene_id) %>%
unique()
cluster5_genes <- cluster_gene_map %>% # 106
dplyr::filter(cluster == 5) %>%
dplyr::pull(gene_id) %>%
unique()
# Convert ensemble to entrez gene ids for enrichGO
gene_conversion <- bitr(
cluster1_genes,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(cluster1_genes, fromType = "SYMBOL", toType = "ENTREZID", :
## 35.78% of input gene IDs are fail to map...
cluster1_entrez <- gene_conversion$ENTREZID # 70
gene_conversion <- bitr(
cluster2_genes,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(cluster2_genes, fromType = "SYMBOL", toType = "ENTREZID", :
## 33.75% of input gene IDs are fail to map...
cluster2_entrez <- gene_conversion$ENTREZID # 53
gene_conversion <- bitr(
cluster3_genes,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(cluster3_genes, fromType = "SYMBOL", toType = "ENTREZID", :
## 34.48% of input gene IDs are fail to map...
cluster3_entrez <- gene_conversion$ENTREZID # 76
gene_conversion <- bitr(
cluster4_genes,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(cluster4_genes, fromType = "SYMBOL", toType = "ENTREZID", :
## 38.82% of input gene IDs are fail to map...
cluster4_entrez <- gene_conversion$ENTREZID # 85
gene_conversion <- bitr(
cluster5_genes,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(cluster5_genes, fromType = "SYMBOL", toType = "ENTREZID", :
## 33.96% of input gene IDs are fail to map...
cluster5_entrez <- gene_conversion$ENTREZID # 70
# Mapping rates for reporting - all similar. Due to incomplete annotation of transcripts.
mapping_rate_1 <- length(cluster1_entrez) / length(cluster1_genes) # 64.22%
mapping_rate_2 <- length(cluster2_entrez) / length(cluster2_genes) # 66.25%
mapping_rate_3 <- length(cluster3_entrez) / length(cluster3_genes) # 65.51%
mapping_rate_4 <- length(cluster4_entrez) / length(cluster4_genes) # 61.17%
mapping_rate_5 <- length(cluster5_entrez) / length(cluster5_genes) # 66.03%
# enrichGO analysis
ego_cluster1_ALL <- enrichGO(
gene = cluster1_entrez,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "ALL",
pAdjustMethod = "BH"
)
ego_cluster2_ALL <- enrichGO(
gene = cluster2_entrez,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "ALL",
pAdjustMethod = "BH"
)
ego_cluster3_ALL <- enrichGO(
gene = cluster3_entrez,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "ALL",
pAdjustMethod = "BH"
)
ego_cluster4_ALL <- enrichGO(
gene = cluster4_entrez,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "ALL",
pAdjustMethod = "BH"
)
ego_cluster5_ALL <- enrichGO(
gene = cluster5_entrez,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "ALL",
pAdjustMethod = "BH"
)
# Create dataframes
GO_term_dataframe1 <- as.data.frame(ego_cluster1_ALL) # 9
GO_term_dataframe2 <- as.data.frame(ego_cluster2_ALL) # 5
GO_term_dataframe3 <- as.data.frame(ego_cluster3_ALL) # 32
GO_term_dataframe4 <- as.data.frame(ego_cluster4_ALL) # 8
GO_term_dataframe5 <- as.data.frame(ego_cluster5_ALL) # 10
# Make combined dataframe
GO_list <- list(
cluster1 = GO_term_dataframe1,
cluster2 = GO_term_dataframe2,
cluster3 = GO_term_dataframe3,
cluster4 = GO_term_dataframe4,
cluster5 = GO_term_dataframe5
)
combined_GO <- bind_rows(GO_list, .id = "cluster")
head(combined_GO)
## cluster ONTOLOGY ID Description
## GO:0002181...1 cluster1 BP GO:0002181 cytoplasmic translation
## GO:0044391...2 cluster1 CC GO:0044391 ribosomal subunit
## GO:0022626...3 cluster1 CC GO:0022626 cytosolic ribosome
## GO:0005840...4 cluster1 CC GO:0005840 ribosome
## GO:0015934...5 cluster1 CC GO:0015934 large ribosomal subunit
## GO:0022625...6 cluster1 CC GO:0022625 cytosolic large ribosomal subunit
## GeneRatio BgRatio RichFactor FoldEnrichment zScore
## GO:0002181...1 6/55 165/18986 0.03636364 12.552727 8.033504
## GO:0044391...2 7/61 186/19960 0.03763441 12.314472 8.583456
## GO:0022626...3 6/61 118/19960 0.05084746 16.637955 9.432937
## GO:0005840...4 7/61 245/19960 0.02857143 9.348946 7.280061
## GO:0015934...5 5/61 114/19960 0.04385965 14.351452 7.915222
## GO:0022625...6 4/61 57/19960 0.07017544 22.962324 9.193378
## pvalue p.adjust qvalue
## GO:0002181...1 8.016910e-06 0.0088827363 8.101299e-03
## GO:0044391...2 1.554764e-06 0.0001223802 9.261661e-05
## GO:0022626...3 1.599741e-06 0.0001223802 9.261661e-05
## GO:0005840...4 9.560574e-06 0.0004875893 3.690046e-04
## GO:0015934...5 2.565108e-05 0.0008452751 6.396991e-04
## GO:0022625...6 2.762337e-05 0.0008452751 6.396991e-04
## geneID Count
## GO:0002181...1 6203/6160/6201/4736/6144/6136 6
## GO:0044391...2 6203/29093/6160/6201/4736/6144/6136 7
## GO:0022626...3 6203/6160/6201/4736/6144/6136 6
## GO:0005840...4 6203/29093/6160/6201/4736/6144/6136 7
## GO:0015934...5 29093/6160/4736/6144/6136 5
## GO:0022625...6 6160/4736/6144/6136 4
# Panel B - Cluster-specific enrichment
p1 <- dotplot(ego_cluster1_ALL, showCategory = 8) +
ggtitle("Cluster 1 - Early transient induction") +
theme_classic(base_size = 7) +
theme(
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 6),
strip.text = element_text(size = 6),
legend.text = element_text(size = 5),
legend.title = element_text(size = 6),
legend.key.size = unit(0.3, "cm"),
plot.title = element_text(size = 7),
plot.margin = ggplot2::margin(5, 5, 5, 5)
) +
scale_size(range = c(1.5, 4))
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
p2 <- dotplot(ego_cluster2_ALL, showCategory = 8) +
ggtitle("Cluster 2 - Early sustained activation") +
theme_classic(base_size = 7) +
theme(
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 6),
strip.text = element_text(size = 6),
legend.text = element_text(size = 5),
legend.title = element_text(size = 6),
legend.key.size = unit(0.3, "cm"),
plot.title = element_text(size = 7),
plot.margin = ggplot2::margin(5, 5, 5, 5)
) +
scale_size(range = c(1.5, 4))
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
p3 <- dotplot(ego_cluster3_ALL, showCategory = 8) +
ggtitle("Cluster 3 - Late induction") +
theme_classic(base_size = 7) +
theme(
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 6),
strip.text = element_text(size = 6),
legend.text = element_text(size = 5),
legend.title = element_text(size = 6),
legend.key.size = unit(0.3, "cm"),
plot.title = element_text(size = 7),
plot.margin = ggplot2::margin(5, 5, 5, 5)
) +
scale_size(range = c(1.5, 4))
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
p4 <- dotplot(ego_cluster4_ALL, showCategory = 8) +
ggtitle("Cluster 4 - Early transient repression") +
theme_classic(base_size = 7) +
theme(
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 6),
strip.text = element_text(size = 6),
legend.text = element_text(size = 5),
legend.title = element_text(size = 6),
legend.key.size = unit(0.3, "cm"),
plot.title = element_text(size = 7),
plot.margin = ggplot2::margin(5, 5, 5, 5)
) +
scale_size(range = c(1.5, 4))
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
p5 <- dotplot(ego_cluster5_ALL, showCategory = 8) +
ggtitle("Cluster 5 - Progressive repression") +
theme_classic(base_size = 7) +
theme(
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 6),
strip.text = element_text(size = 6),
legend.text = element_text(size = 5),
legend.title = element_text(size = 6),
legend.key.size = unit(0.3, "cm"),
plot.title = element_text(size = 7),
plot.margin = ggplot2::margin(5, 5, 5, 5)
) +
scale_size(range = c(1.5, 4))
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
# Create combined figure ###
p4B <- (p1 | p2 | p3) /
(p4 | p5 | plot_spacer())
##### Figure 4 ####
Figure4 <- plot_grid(
p4A,
p4B,
ncol = 1,
labels = c("A", "B"),
label_size = 16,
rel_heights = c(1, 1.5),
label_x = 0.01,
label_y = 1.02
)
Figure4