The Data (Zhang et al. 2012) and the Hypothesis

Samples: 12 Features: 18361

Let’s perform end-to-end differential expression analysis of a factorial experiment to discover the impact of a cancer drug called “doxorubicin”, specifically on the hearts of mice from two genetic backgrounds. One of the known side effects of this treatment is cardiotoxicity.

Top2b mice are mice that get Top2b deleted specifically from their heart cells. The control solution is “pbs”.

If doxorubicin requires Top2b to exert a cardiotoxic effect the Top2b mice should not be effected.


Prep Data

# Load packages
library(Biobase)
library(limma)

dox <- readRDS("~/Applications/differential_expression/dox.rds")

dim(dox)
Features  Samples 
   29532       12 
# Log transform
exprs(dox) <- log(exprs(dox))

Plot Densities

# Create genotype vector
plotDensities(dox,  group = pData(dox)[, "genotype"], legend = "topright")

# Quantile normalize
exprs(dox) <- normalizeBetweenArrays(exprs(dox))
plotDensities(dox,  group = pData(dox)[, "genotype"], legend = "topright")

# Determine the genes with mean expression level greater than 0
keep <- rowMeans(exprs(dox)) > 0
sum(keep)
[1] 18361
# Filter the genes
dox <- dox[keep,]
plotDensities(dox, group = pData(dox)[, "genotype"], legend = "topright")


Top2b Expression Compared Between Groups (Sanity Check)

# Find the row which contains Top2b expression data
top2b <- which(fData(dox)[,"symbol"] == "Top2b")

# Plot Top2b expression versus genotype
boxplot(exprs(dox)[top2b, ] ~ pData(dox)[, "genotype"],
        main = fData(dox)[top2b, ])


Dimensional Reduction

Do the samples cluster by their experimental group? Are there batch effects?

# Plot principal components labeled by genotype
plotMDS(dox, labels = pData(dox)[, "genotype"], gene.selection = "common")

# Plot principal components labeled by treatment
plotMDS(dox, labels = pData(dox)[, "treatment"], gene.selection = "common")


Prep Model

Prepare negative binomial model with no intercept and contrasts.

# Create single variable
group <- with(pData(dox), paste(genotype, treatment, sep = "."))
group <- factor(group)

# Create design matrix with no intercept
design <- model.matrix(~ 0 + group)
colnames(design) <- levels(group)

# Count the number of samples modeled by each coefficient
colSums(design)
top2b.dox top2b.pbs    wt.dox    wt.pbs 
        3         3         3         3 
# Create a contrasts matrix
cm <- makeContrasts(dox_wt = wt.dox - wt.pbs,
                    dox_top2b = top2b.dox - top2b.pbs,
                    interaction = (top2b.dox - top2b.pbs) - (wt.dox - wt.pbs),
                    levels = design)

# View the contrasts matrix
cm
           Contrasts
Levels      dox_wt dox_top2b interaction
  top2b.dox      0         1           1
  top2b.pbs      0        -1          -1
  wt.dox         1         0          -1
  wt.pbs        -1         0           1

Fit Models

# Fit the model
fit <- lmFit(dox, design)

# Fit the contrasts
fit2 <- contrasts.fit(fit, contrasts = cm)

# Calculate the t-statistics for the contrasts
fit2 <- eBayes(fit2)

# Summarize results
results <- decideTests(fit2)
summary(results)
       dox_wt dox_top2b interaction
Down     3180         0        1254
NotSig  12265     18361       15621
Up       2916         0        1486

Inspect P-Value Distributions

Which histograms are not uniformly disributed?

Wild Type

# Obtain the summary statistics for the contrast dox_wt
stats_dox_wt <- topTable(fit2, coef = "dox_wt", number = nrow(fit2),
                         sort.by = "none")
# Obtain the summary statistics for the contrast dox_top2b
stats_dox_top2b <- topTable(fit2, coef = "dox_top2b", number = nrow(fit2),
                            sort.by = "none")
# Obtain the summary statistics for the contrast interaction
stats_interaction <- topTable(fit2, coef = "interaction", number = nrow(fit2),
                              sort.by = "none")

# Create histograms of the p-values for each contrast
hist(stats_dox_wt[, "P.Value"])

Top2b

hist(stats_dox_top2b[, "P.Value"])

Interaction

hist(stats_interaction[, "P.Value"])

Enriched Pathways

# Extract the entrez gene IDs
entrez <- fit2$genes[,"entrez"]

# Test for enriched KEGG Pathways for contrast dox_wt
enrich_dox_wt <- kegga(fit2, coef = "dox_wt", geneid = entrez, species = "Mm")

# View the top 5 enriched KEGG pathways
topKEGG(enrich_dox_wt, number = 5)

Enriched Pathways (continued)

# Test for enriched KEGG Pathways for contrast interaction
enrich_interaction <- kegga(fit2, coef = "interaction", geneid = entrez, species = "Mm")

# View the top 5 enriched KEGG pathways
topKEGG(enrich_interaction, number = 5)