STAT 540 Seminar 6

Shaun Jackman — Feb 13, 2013, 4:06 PM

# STAT 540 Seminar 6
# Shaun Jackman

# Preliminaries
library(limma)
library(lattice)
prDat <- read.table("../data/photoRec/GSE4051_data.txt")  # the whole enchilada
str(prDat, max.level = 0)

'data.frame':   29949 obs. of  39 variables:

load("../data/photoRec/GSE4051_design.robj")  # load exp design as 'prDes'
str(prDes)

'data.frame':   39 obs. of  3 variables:
 $ sample  : num  20 21 22 23 16 17 6 24 25 26 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...


# The difficulty in estimating gene-wise variance
m <- 1000
n <- 3
x <- matrix(rnorm(m * n), nrow = m)
obsVars <- apply(x, 1, var)
summary(obsVars)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.004   0.288   0.686   0.988   1.340   7.160

mean(obsVars < 1/3)

[1] 0.275

densityplot(~obsVars, n = 200)

plot of chunk unnamed-chunk-1


# Fit a linear model: explain gene expression in the wild type mice as a function of developmental stage (one-way ANOVA)
wtDes <- subset(prDes, gType == "wt")
str(wtDes)

'data.frame':   20 obs. of  3 variables:
 $ sample  : num  20 21 22 23 24 25 26 27 28 29 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 2 2 2 2 3 3 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 1 1 1 1 1 1 ...

wtDat <- subset(prDat, select = prDes$gType == "wt")
str(wtDat, max.level = 0)

'data.frame':   29949 obs. of  20 variables:

wtDesMat <- model.matrix(~devStage, wtDes)
str(wtDesMat)

 num [1:20, 1:5] 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:20] "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
  ..$ : chr [1:5] "(Intercept)" "devStageP2" "devStageP6" "devStageP10" ...
 - attr(*, "assign")= int [1:5] 0 1 1 1 1
 - attr(*, "contrasts")=List of 1
  ..$ devStage: chr "contr.treatment"

wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)
topTable(wtEbFit)

                       ID X.Intercept. devStageP2 devStageP6 devStageP10
1423641_s_at 1423641_s_at        12.18    -0.0175     0.0750      0.0675
1438940_x_at 1438940_x_at        12.86     0.0850     0.1325      0.3425
1438657_x_at 1438657_x_at        12.78     0.1400     0.1250     -0.1850
1456736_x_at 1456736_x_at        12.32     0.1625     0.3050      0.2075
1436884_x_at 1436884_x_at        12.93     0.1775     0.3225      0.0300
1419700_a_at 1419700_a_at        12.32     0.1650     0.6475      0.8175
1435800_a_at 1435800_a_at        12.28     0.0450     0.6825      0.9000
1454613_at     1454613_at        12.47    -0.1075    -0.0500     -0.1025
1451240_a_at 1451240_a_at        13.00     0.3100     0.2800      0.2800
1450084_s_at 1450084_s_at        12.63     0.0825     0.0525      0.1725
             devStage4_weeks AveExpr     F   P.Value adj.P.Val
1423641_s_at          0.1800   12.24 45350 3.574e-36 5.201e-32
1438940_x_at          0.3500   13.04 44957 3.865e-36 5.201e-32
1438657_x_at         -0.4500   12.71 43486 5.210e-36 5.201e-32
1456736_x_at          0.0725   12.47 39509 1.233e-35 6.725e-32
1436884_x_at          0.0250   13.04 39269 1.302e-35 6.725e-32
1419700_a_at          0.6825   12.78 39121 1.347e-35 6.725e-32
1435800_a_at          1.0200   12.81 36668 2.410e-35 1.031e-31
1454613_at           -0.3825   12.34 35835 2.962e-35 1.078e-31
1451240_a_at         -0.3700   13.10 35481 3.239e-35 1.078e-31
1450084_s_at          0.2600   12.75 34411 4.265e-35 1.234e-31

topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"))

                       ID devStageP2 devStageP6 devStageP10
1440645_at     1440645_at     0.3990     0.1952      0.9200
1416041_at     1416041_at     0.1580     0.4797      0.3327
1425222_x_at 1425222_x_at     0.8820     0.7995      1.5488
1451635_at     1451635_at     1.3025     1.1900      2.0160
1429028_at     1429028_at    -2.4433    -3.4073     -4.3105
1422929_s_at 1422929_s_at    -2.9117    -3.6182     -3.5472
1424852_at     1424852_at     0.4575     0.2298      0.5740
1425171_at     1425171_at     0.9980     3.0530      5.2787
1451617_at     1451617_at     0.7255     2.5128      4.9837
1451618_at     1451618_at     0.6028     2.8903      5.0507
             devStage4_weeks AveExpr     F   P.Value adj.P.Val
1440645_at             3.961   6.528 425.4 1.588e-17 4.755e-13
1416041_at             5.115   9.383 195.5 1.522e-14 2.280e-10
1425222_x_at           5.532   7.028 173.4 4.348e-14 4.341e-10
1451635_at             6.188   8.319 157.3 1.013e-13 7.585e-10
1429028_at            -4.602   8.045 148.8 1.646e-13 9.203e-10
1422929_s_at          -3.661   7.278 146.9 1.844e-13 9.203e-10
1424852_at             3.979   7.454 143.2 2.290e-13 9.799e-10
1425171_at             6.079   9.620 138.8 3.002e-13 1.124e-09
1451617_at             6.685   8.817 136.5 3.485e-13 1.160e-09
1451618_at             6.288   9.431 134.2 4.032e-13 1.207e-09

colnames(coef(wtEbFit))

[1] "(Intercept)"     "devStageP2"      "devStageP6"      "devStageP10"    
[5] "devStage4_weeks"

dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))))
# Use the hit list you stored above and your functions for extracting and plotting data to produce this plot for hits 3, 6, and 9 on the list.
library(reshape2)
prepareData <- function(genes)
  melt(cbind(prDes, t(prDat[genes,])),
       id.vars=colnames(prDes),
       variable.name='gene', value.name='gExp')
makeStripplot <- function(x, ...)
  stripplot(gExp ~ devStage | gene, x, subset = gType == 'wt',
            jitter.data = TRUE, auto.key = TRUE,
            type = c("p", "a"), grid = TRUE,
            ...)
makeStripplot(prepareData(dsHits$ID[c(3,6,9)]))

plot of chunk unnamed-chunk-1


# Be the boss of topTable()
# How many probes have Benjamini-Hochberg ("BH") adjusted p-values for the F test conducted above that are less than 1e-05?
hits = topTable(wtEbFit, coef = c("devStageP2", "devStageP6", "devStageP10", "devStage4_weeks"),
             p.value=1e-5, number=9999)
nrow(hits)

[1] 350

# What is the 63rd hit on this list? Provide it's Affy ID, F statistic, BH adjusted p-value, and the estimated effect for developmental stage "P6" in that order.
hits[63,c('F', 'adj.P.Val', 'devStageP6')]

                 F adj.P.Val devStageP6
1451633_a_at 64.01 1.049e-07      2.069

# Scatterplot the t statistics for the test that the P2 effect is zero against that for P10.
hits <- topTable(wtEbFit, coef = c('devStageP2', 'devStageP10'), number=99999)
smoothScatter(hits$devStageP2, hits$devStageP10, asp=1)

KernSmooth 2.23 loaded Copyright M. P. Wand 1997-2009

plot of chunk unnamed-chunk-1

# Create a densityplot of the associated p-values, so you can get a sense of which developmental stage, P2 or P10, is more clearly distinguished from baseline E16.
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', number=99999)
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', number=99999)
densityplot(~P10Hits$adj.P.Val + P2Hits$adj.P.Val)

plot of chunk unnamed-chunk-1

# Is this what you'd expect?
# Yes, it makes sense that P10 is more distinguished than P2 from the baseline E16.
# If you require a BH adjusted p-value less than 1e-03, how many hits do you get for P2? How many for P10? How much overlap is there?
P2Hits <- topTable(wtEbFit, coef = 'devStageP2', p.value=1e-3, number=99999)
P10Hits <- topTable(wtEbFit, coef = 'devStageP10', p.value=1e-3, number=99999)
length(intersect(P2Hits$ID, P10Hits$ID))

[1] 52

# Now just focus on the P10 effect. Create a scatterplot matrix of raw p-values, BH adjusted p-values, and BY p-values.
xyplot(adj.P.Val ~ P.Value, P10Hits)

plot of chunk unnamed-chunk-1

P10Hits <- topTable(wtEbFit, coef = 'devStageP10', adjust.method='BY', p.value=1e-3, number=99999)
xyplot(adj.P.Val ~ P.Value, P10Hits)

plot of chunk unnamed-chunk-1


# Perform inference for some contrasts
colnames(wtDesMat)

[1] "(Intercept)"     "devStageP2"      "devStageP6"      "devStageP10"    
[5] "devStage4_weeks"

cont.matrix <- makeContrasts(P10VsP6 = devStageP10 - devStageP6,
                             fourweeksVsP10 = devStage4_weeks - devStageP10, levels = wtDesMat)

Warning: Renaming (Intercept) to Intercept

wtFitCont <- contrasts.fit(wtFit, cont.matrix)

Warning: row names of contrasts don't match col names of coefficients

wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)

                       ID P10VsP6 fourweeksVsP10 AveExpr     F   P.Value
1440645_at     1440645_at  0.7247          3.041   6.528 632.7 2.224e-17
1416041_at     1416041_at -0.1470          4.782   9.383 302.4 1.473e-14
1425222_x_at 1425222_x_at  0.7492          3.983   7.028 235.4 1.300e-13
1424852_at     1424852_at  0.3442          3.405   7.454 225.1 1.910e-13
1420726_x_at 1420726_x_at  0.1732          3.551   7.190 203.5 4.555e-13
1451635_at     1451635_at  0.8260          4.172   8.319 200.0 5.289e-13
1429394_at     1429394_at -0.0980          2.410   7.848 167.5 2.416e-12
1455447_at     1455447_at -0.9765         -1.800   9.973 153.5 5.063e-12
1429791_at     1429791_at  0.2480          1.658   8.026 145.7 7.877e-12
1422612_at     1422612_at  0.4837          3.426   8.833 142.2 9.676e-12
             adj.P.Val
1440645_at   6.662e-13
1416041_at   2.206e-10
1425222_x_at 1.297e-09
1424852_at   1.430e-09
1420726_x_at 2.640e-09
1451635_at   2.640e-09
1429394_at   1.034e-08
1455447_at   1.896e-08
1429791_at   2.621e-08
1422612_at   2.840e-08

# The top hits are probes where there is big change from P6 to P10, from P10 to 4_weeks, or both. Let's check that by plotting the data from the top 4 hits.
makeStripplot(prepareData(rownames(topTable(wtEbFitCont))[1:4]))

cutoff <- 1e-04
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)

   P10VsP6 fourweeksVsP10
-1       4              8
0    29945          29895
1        0             46

hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
makeStripplot(prepareData(hits1))

plot of chunk unnamed-chunk-1

hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits2[1:4]))

plot of chunk unnamed-chunk-1

intersect(hits1, hits2)

character(0)

hits3 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
makeStripplot(prepareData(hits3[1:4]))

plot of chunk unnamed-chunk-1

intersect(hits1, hits3)

character(0)

intersect(hits2, hits3)

character(0)

cutoff <- 0.01
nHits <- 8
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)

   P10VsP6 fourweeksVsP10
-1      40             49
0    29897          29636
1       12            264

hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
makeStripplot(prepareData(hits1[1:nHits]))

plot of chunk unnamed-chunk-1

hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits2[1:nHits]))

plot of chunk unnamed-chunk-1

hits3 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0)]
makeStripplot(prepareData(hits3[1:nHits]))

plot of chunk unnamed-chunk-1

hits4 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
makeStripplot(prepareData(hits4[1:nHits]))

plot of chunk unnamed-chunk-1

vennDiagram(wtResCont)

plot of chunk unnamed-chunk-1

hits5 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] != 0 & wtResCont[, "fourweeksVsP10"] != 0)]
makeStripplot(prepareData(hits5))

plot of chunk unnamed-chunk-1

hits6 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0 & wtResCont[, "fourweeksVsP10"] < 0)]
makeStripplot(prepareData(hits6))

plot of chunk unnamed-chunk-1


# Take-home exercise: See if you can find one or more probes that have some expression changes up to P6 and then hold steady all the way to 4_weeks. Here's some I found.
cont.matrix <- makeContrasts(
  P2VsE16 = devStageP2,
  P6VsP2 = devStageP6 - devStageP2,
  P10VsP6 = devStageP10 - devStageP6,
  fourweeksVsP10 = devStage4_weeks - devStageP10,
  levels = wtDesMat)

Warning: Renaming (Intercept) to Intercept

wtFitCont <- contrasts.fit(wtFit, cont.matrix)

Warning: row names of contrasts don't match col names of coefficients

wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)

                       ID P2VsE16  P6VsP2 P10VsP6 fourweeksVsP10 AveExpr
1440645_at     1440645_at  0.3990 -0.2037  0.7247         3.0413   6.528
1416041_at     1416041_at  0.1580  0.3218 -0.1470         4.7818   9.383
1425222_x_at 1425222_x_at  0.8820 -0.0825  0.7492         3.9830   7.028
1451635_at     1451635_at  1.3025 -0.1125  0.8260         4.1723   8.319
1429028_at     1429028_at -2.4433 -0.9640 -0.9033        -0.2913   8.045
1422929_s_at 1422929_s_at -2.9117 -0.7065  0.0710        -0.1140   7.278
1424852_at     1424852_at  0.4575 -0.2277  0.3442         3.4050   7.454
1425171_at     1425171_at  0.9980  2.0550  2.2257         0.8000   9.620
1451617_at     1451617_at  0.7255  1.7872  2.4710         1.7010   8.817
1451618_at     1451618_at  0.6028  2.2875  2.1605         1.2375   9.431
                 F   P.Value adj.P.Val
1440645_at   425.4 1.588e-17 4.755e-13
1416041_at   195.5 1.522e-14 2.280e-10
1425222_x_at 173.4 4.348e-14 4.341e-10
1451635_at   157.3 1.013e-13 7.585e-10
1429028_at   148.8 1.646e-13 9.203e-10
1422929_s_at 146.9 1.844e-13 9.203e-10
1424852_at   143.2 2.290e-13 9.799e-10
1425171_at   138.8 3.002e-13 1.124e-09
1451617_at   136.5 3.485e-13 1.160e-09
1451618_at   134.2 4.032e-13 1.207e-09


wtResCont <- decideTests(wtEbFitCont, p.value = 0.05, method = "global")
summary(wtResCont)

   P2VsE16 P6VsP2 P10VsP6 fourweeksVsP10
-1     471     28     116            129
0    29421  29871   29804          29154
1       57     50      29            666

hits <- rownames(prDat)[which(
  wtResCont[, "P2VsE16"] != 0
  & wtResCont[, "P6VsP2"] != 0
  & wtResCont[, "P10VsP6"] == 0
  & wtResCont[, "fourweeksVsP10"] == 0)]
length(hits)

[1] 6

wtResCont[hits,]

            Contrasts
             P2VsE16 P6VsP2 P10VsP6 fourweeksVsP10
  1419740_at       1      1       0              0
  1421821_at      -1     -1       0              0
  1424963_at       1      1       0              0
  1435661_at      -1      1       0              0
  1436557_at      -1     -1       0              0
  1448201_at      -1     -1       0              0

makeStripplot(prepareData(hits))

plot of chunk unnamed-chunk-1