Samples: 12 Features: 18361
Let’s perform end-to-end differential expression analysis of a factorial experiment to discover the impact of a cancer drug called “doxorubicin”, specifically on the hearts of mice from two genetic backgrounds. One of the known side effects of this treatment is cardiotoxicity.
Top2b mice are mice that get Top2b deleted specifically from their heart cells. The control solution is “pbs”.
If doxorubicin requires Top2b to exert a cardiotoxic effect the Top2b mice should not be effected.
# Load packages
library(Biobase)
library(limma)
dox <- readRDS("~/Applications/differential_expression/dox.rds")
dim(dox)
Features Samples
29532 12
# Log transform
exprs(dox) <- log(exprs(dox))
# Create genotype vector
plotDensities(dox, group = pData(dox)[, "genotype"], legend = "topright")
# Quantile normalize
exprs(dox) <- normalizeBetweenArrays(exprs(dox))
plotDensities(dox, group = pData(dox)[, "genotype"], legend = "topright")
# Determine the genes with mean expression level greater than 0
keep <- rowMeans(exprs(dox)) > 0
sum(keep)
[1] 18361
# Filter the genes
dox <- dox[keep,]
plotDensities(dox, group = pData(dox)[, "genotype"], legend = "topright")
# Find the row which contains Top2b expression data
top2b <- which(fData(dox)[,"symbol"] == "Top2b")
# Plot Top2b expression versus genotype
boxplot(exprs(dox)[top2b, ] ~ pData(dox)[, "genotype"],
main = fData(dox)[top2b, ])
Do the samples cluster by their experimental group? Are there batch effects?
# Plot principal components labeled by genotype
plotMDS(dox, labels = pData(dox)[, "genotype"], gene.selection = "common")
# Plot principal components labeled by treatment
plotMDS(dox, labels = pData(dox)[, "treatment"], gene.selection = "common")
Prepare negative binomial model with no intercept and contrasts.
# Create single variable
group <- with(pData(dox), paste(genotype, treatment, sep = "."))
group <- factor(group)
# Create design matrix with no intercept
design <- model.matrix(~ 0 + group)
colnames(design) <- levels(group)
# Count the number of samples modeled by each coefficient
colSums(design)
top2b.dox top2b.pbs wt.dox wt.pbs
3 3 3 3
# Create a contrasts matrix
cm <- makeContrasts(dox_wt = wt.dox - wt.pbs,
dox_top2b = top2b.dox - top2b.pbs,
interaction = (top2b.dox - top2b.pbs) - (wt.dox - wt.pbs),
levels = design)
# View the contrasts matrix
cm
Contrasts
Levels dox_wt dox_top2b interaction
top2b.dox 0 1 1
top2b.pbs 0 -1 -1
wt.dox 1 0 -1
wt.pbs -1 0 1
# Fit the model
fit <- lmFit(dox, design)
# Fit the contrasts
fit2 <- contrasts.fit(fit, contrasts = cm)
# Calculate the t-statistics for the contrasts
fit2 <- eBayes(fit2)
# Summarize results
results <- decideTests(fit2)
summary(results)
dox_wt dox_top2b interaction
Down 3180 0 1254
NotSig 12265 18361 15621
Up 2916 0 1486
Which histograms are not uniformly disributed?
# Obtain the summary statistics for the contrast dox_wt
stats_dox_wt <- topTable(fit2, coef = "dox_wt", number = nrow(fit2),
sort.by = "none")
# Obtain the summary statistics for the contrast dox_top2b
stats_dox_top2b <- topTable(fit2, coef = "dox_top2b", number = nrow(fit2),
sort.by = "none")
# Obtain the summary statistics for the contrast interaction
stats_interaction <- topTable(fit2, coef = "interaction", number = nrow(fit2),
sort.by = "none")
# Create histograms of the p-values for each contrast
hist(stats_dox_wt[, "P.Value"])
hist(stats_dox_top2b[, "P.Value"])
hist(stats_interaction[, "P.Value"])
# Extract the entrez gene IDs
entrez <- fit2$genes[,"entrez"]
# Test for enriched KEGG Pathways for contrast dox_wt
enrich_dox_wt <- kegga(fit2, coef = "dox_wt", geneid = entrez, species = "Mm")
# View the top 5 enriched KEGG pathways
topKEGG(enrich_dox_wt, number = 5)
# Test for enriched KEGG Pathways for contrast interaction
enrich_interaction <- kegga(fit2, coef = "interaction", geneid = entrez, species = "Mm")
# View the top 5 enriched KEGG pathways
topKEGG(enrich_interaction, number = 5)