seminar6.R

Huiting — Mar 30, 2014, 2:38 PM

library(limma)
library(lattice)
Warning: package 'lattice' was built under R version 3.0.3
library(plyr)

prDat <- read.table("GSE4051_data.tsv",header=T)
prDes <- readRDS("GSE4051_design.rds")
str(prDat, max.level = 0)
'data.frame':   29949 obs. of  39 variables:
str(prDes)
'data.frame':   39 obs. of  4 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...

prepareData <- function(myGenes) {
  miniDat <- t(wtDat[myGenes, ])
  miniDat <- data.frame(gExp = as.vector(miniDat),
                        gene = factor(rep(colnames(miniDat), each =
                                            nrow(miniDat)), levels = colnames(miniDat)))
  miniDat <- suppressWarnings(data.frame(wtDes, miniDat))
  miniDat
}
stripplotIt <- function(myData, ...) {
  stripplot(gExp ~ devStage | gene, myData,
            jitter.data = TRUE,
            auto.key = TRUE, type = c('p', 'a'), grid = TRUE, ...)
}

## Reproduce all seminar 6 by following instructions
m <- 1000
n <- 3
x <- matrix(rnorm(m * n,0,3), nrow = m)
obsVars <- apply(x, 1, var)
summary(obsVars)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.01    2.63    5.94    8.72   12.00   48.70 
mean(obsVars < 1/3)
[1] 0.032
densityplot(~obsVars, n = 200)

plot of chunk unnamed-chunk-1


wtDes <- subset(prDes, gType == "wt")
str(wtDes)
'data.frame':   20 obs. of  4 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 24 25 26 27 28 29 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 2 2 2 2 3 3 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 1 1 1 1 1 1 ...

wtDat <- subset(prDat, select = prDes$gType == "wt")
str(wtDat, max.level = 0)
'data.frame':   29949 obs. of  20 variables:

wtDesMat <- model.matrix(~devStage, wtDes)
str(wtDesMat)
 num [1:20, 1:5] 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:20] "12" "13" "14" "15" ...
  ..$ : chr [1:5] "(Intercept)" "devStageP2" "devStageP6" "devStageP10" ...
 - attr(*, "assign")= int [1:5] 0 1 1 1 1
 - attr(*, "contrasts")=List of 1
  ..$ devStage: chr "contr.treatment"

wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)
topTable(wtEbFit)
             X.Intercept. devStageP2 devStageP6 devStageP10
1423641_s_at        12.18    -0.0175     0.0750      0.0675
1438940_x_at        12.86     0.0850     0.1325      0.3425
1438657_x_at        12.78     0.1400     0.1250     -0.1850
1456736_x_at        12.32     0.1625     0.3050      0.2075
1436884_x_at        12.93     0.1775     0.3225      0.0300
1419700_a_at        12.32     0.1650     0.6475      0.8175
1435800_a_at        12.28     0.0450     0.6825      0.9000
1454613_at          12.47    -0.1075    -0.0500     -0.1025
1451240_a_at        13.00     0.3100     0.2800      0.2800
1450084_s_at        12.63     0.0825     0.0525      0.1725
             devStage4_weeks AveExpr     F   P.Value adj.P.Val
1423641_s_at          0.1800   12.24 45350 3.574e-36 5.201e-32
1438940_x_at          0.3500   13.04 44957 3.865e-36 5.201e-32
1438657_x_at         -0.4500   12.71 43486 5.210e-36 5.201e-32
1456736_x_at          0.0725   12.47 39509 1.233e-35 6.725e-32
1436884_x_at          0.0250   13.04 39269 1.302e-35 6.725e-32
1419700_a_at          0.6825   12.78 39121 1.347e-35 6.725e-32
1435800_a_at          1.0200   12.81 36668 2.410e-35 1.031e-31
1454613_at           -0.3825   12.34 35835 2.962e-35 1.078e-31
1451240_a_at         -0.3700   13.10 35481 3.239e-35 1.078e-31
1450084_s_at          0.2600   12.75 34411 4.265e-35 1.234e-31
topTable(wtEbFit, coef = 2:5)  # cryptic! error-prone!
             devStageP2 devStageP6 devStageP10 devStage4_weeks AveExpr
1440645_at       0.3990     0.1952      0.9200           3.961   6.528
1416041_at       0.1580     0.4797      0.3327           5.115   9.383
1425222_x_at     0.8820     0.7995      1.5488           5.532   7.028
1451635_at       1.3025     1.1900      2.0160           6.188   8.319
1429028_at      -2.4433    -3.4073     -4.3105          -4.602   8.045
1422929_s_at    -2.9117    -3.6182     -3.5472          -3.661   7.278
1424852_at       0.4575     0.2298      0.5740           3.979   7.454
1425171_at       0.9980     3.0530      5.2787           6.079   9.620
1451617_at       0.7255     2.5128      4.9837           6.685   8.817
1451618_at       0.6028     2.8903      5.0507           6.288   9.431
                 F   P.Value adj.P.Val
1440645_at   425.4 1.588e-17 4.755e-13
1416041_at   195.5 1.522e-14 2.280e-10
1425222_x_at 173.4 4.348e-14 4.341e-10
1451635_at   157.3 1.013e-13 7.585e-10
1429028_at   148.8 1.646e-13 9.203e-10
1422929_s_at 146.9 1.844e-13 9.203e-10
1424852_at   143.2 2.290e-13 9.799e-10
1425171_at   138.8 3.002e-13 1.124e-09
1451617_at   136.5 3.485e-13 1.160e-09
1451618_at   134.2 4.032e-13 1.207e-09
colnames(coef(wtEbFit)) 
[1] "(Intercept)"     "devStageP2"      "devStageP6"      "devStageP10"    
[5] "devStage4_weeks"
(dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit)))))
             devStageP2 devStageP6 devStageP10 devStage4_weeks AveExpr
1440645_at       0.3990     0.1952      0.9200           3.961   6.528
1416041_at       0.1580     0.4797      0.3327           5.115   9.383
1425222_x_at     0.8820     0.7995      1.5488           5.532   7.028
1451635_at       1.3025     1.1900      2.0160           6.188   8.319
1429028_at      -2.4433    -3.4073     -4.3105          -4.602   8.045
1422929_s_at    -2.9117    -3.6182     -3.5472          -3.661   7.278
1424852_at       0.4575     0.2298      0.5740           3.979   7.454
1425171_at       0.9980     3.0530      5.2787           6.079   9.620
1451617_at       0.7255     2.5128      4.9837           6.685   8.817
1451618_at       0.6028     2.8903      5.0507           6.288   9.431
                 F   P.Value adj.P.Val
1440645_at   425.4 1.588e-17 4.755e-13
1416041_at   195.5 1.522e-14 2.280e-10
1425222_x_at 173.4 4.348e-14 4.341e-10
1451635_at   157.3 1.013e-13 7.585e-10
1429028_at   148.8 1.646e-13 9.203e-10
1422929_s_at 146.9 1.844e-13 9.203e-10
1424852_at   143.2 2.290e-13 9.799e-10
1425171_at   138.8 3.002e-13 1.124e-09
1451617_at   136.5 3.485e-13 1.160e-09
1451618_at   134.2 4.032e-13 1.207e-09
stripplotIt(prepareData(rownames(dsHits)[c(3, 6, 9)]))

plot of chunk unnamed-chunk-1


#dat <- prepareData(rownames(dsHits)[3])
#Fit <- lm(gExp~devStage,dat)
#summary(Fit)

cutoff <- 1e-05
dsHits <- topTable(wtEbFit,
                   coef = grep("devStage", colnames(coef(wtEbFit))),
                   p.value = cutoff, n = Inf)
(numBHhits <- nrow(dsHits))
[1] 350

dsHits[63, c("F", "adj.P.Val", "devStageP6")]
                 F adj.P.Val devStageP6
1451633_a_at 64.01 1.049e-07      2.069

P2Hits <- topTable(wtEbFit, coef = "devStageP2", n = Inf, sort = "none")
P10Hits <- topTable(wtEbFit, coef = "devStageP10", n = Inf, sort = "none")
xyplot(P10Hits$t ~ P2Hits$t, aspect = 1,
       xlab = "t-statistic for P2 effect",
       ylab = "t-statistic for P10 effect",
       xlim = c(-20, 16), ylim = c(-20, 16),
       panel = function(x, y, ...) {
         panel.smoothScatter(x, y, nbin = 100, ...)
         panel.abline(a = 0, b = 1, col = "orange")
       })
KernSmooth 2.23 loaded
Copyright M. P. Wand 1997-2009
(loaded the KernSmooth namespace)

plot of chunk unnamed-chunk-1



densityplot(~ P10Hits$adj.P.Val + P2Hits$adj.P.Val, auto.key = TRUE,
            plot.points = FALSE, n = 300)

plot of chunk unnamed-chunk-1


cutoff <- 1e-03
foo <- data.frame(P2 = P2Hits$adj.P.Val < cutoff,
                  P10 = P10Hits$adj.P.Val < cutoff)
addmargins(with(foo, table(P2, P10)))
       P10
P2      FALSE  TRUE   Sum
  FALSE 29201   695 29896
  TRUE      1    52    53
  Sum   29202   747 29949

P10pVals <- data.frame(raw = P10Hits$P.Value,
                       BH = P10Hits$adj.P.Val,
                       BY = p.adjust(P10Hits$P.Value, method = "BY"))
splom(P10pVals,
      panel = function(x, y, ... ) {
        panel.xyplot(x, y, pch = ".", ...)
        panel.abline(a = 0, b = 1, col = "orange")
      })

plot of chunk unnamed-chunk-1



colnames(wtDesMat)
[1] "(Intercept)"     "devStageP2"      "devStageP6"      "devStageP10"    
[5] "devStage4_weeks"

(cont.matrix <- makeContrasts(P10VsP6 = devStageP10 - devStageP6, fourweeksVsP10 = devStage4_weeks - 
                                devStageP10, levels = wtDesMat))
Warning: Renaming (Intercept) to Intercept
                 Contrasts
Levels            P10VsP6 fourweeksVsP10
  Intercept             0              0
  devStageP2            0              0
  devStageP6           -1              0
  devStageP10           1             -1
  devStage4_weeks       0              1
wtFitCont <- contrasts.fit(wtFit, cont.matrix)
Warning: row names of contrasts don't match col names of coefficients
wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)
             P10VsP6 fourweeksVsP10 AveExpr     F   P.Value adj.P.Val
1440645_at    0.7247          3.041   6.528 632.7 2.224e-17 6.662e-13
1416041_at   -0.1470          4.782   9.383 302.4 1.473e-14 2.206e-10
1425222_x_at  0.7492          3.983   7.028 235.4 1.300e-13 1.297e-09
1424852_at    0.3442          3.405   7.454 225.1 1.910e-13 1.430e-09
1420726_x_at  0.1732          3.551   7.190 203.5 4.555e-13 2.640e-09
1451635_at    0.8260          4.172   8.319 200.0 5.289e-13 2.640e-09
1429394_at   -0.0980          2.410   7.848 167.5 2.416e-12 1.034e-08
1455447_at   -0.9765         -1.800   9.973 153.5 5.063e-12 1.896e-08
1429791_at    0.2480          1.658   8.026 145.7 7.877e-12 2.621e-08
1422612_at    0.4837          3.426   8.833 142.2 9.676e-12 2.840e-08

foo <- topTable(wtEbFitCont)
stripplotIt(prepareData(rownames(foo)[1:4]))

plot of chunk unnamed-chunk-1


cutoff <- 1e-04
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
   P10VsP6 fourweeksVsP10
-1       4              8
0    29945          29895
1        0             46

(hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)])
[1] "1416635_at" "1437781_at" "1454752_at" "1455260_at"
stripplotIt(prepareData(hits1))

plot of chunk unnamed-chunk-1

(hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)])
[1] "1416021_a_at" "1423851_a_at" "1434500_at"   "1435415_x_at"
[5] "1437502_x_at" "1448182_a_at" "1452679_at"   "1455447_at"  
stripplotIt(prepareData(hits2[1:4]))

plot of chunk unnamed-chunk-1

intersect(hits1, hits2)
character(0)
(hits3 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)])
 [1] "1416041_at"   "1417280_at"   "1418406_at"   "1418710_at"  
 [5] "1418789_at"   "1419069_at"   "1420725_at"   "1420726_x_at"
 [9] "1421061_at"   "1421818_at"   "1422612_at"   "1424852_at"  
[13] "1424895_at"   "1425222_x_at" "1426059_at"   "1426223_at"  
[17] "1427388_at"   "1428763_at"   "1429394_at"   "1429791_at"  
[21] "1430580_at"   "1431174_at"   "1433699_at"   "1434297_at"  
[25] "1434573_at"   "1435436_at"   "1435679_at"   "1435727_s_at"
[29] "1436265_at"   "1436287_at"   "1440402_at"   "1440645_at"  
[33] "1441518_at"   "1442243_at"   "1443252_at"   "1446484_at"  
[37] "1449170_at"   "1449393_at"   "1451042_a_at" "1451635_at"  
[41] "1452243_at"   "1453385_at"   "1455493_at"   "1457878_at"  
[45] "1458418_at"   "1459904_at"  
stripplotIt(prepareData(hits3[1:4]))

plot of chunk unnamed-chunk-1

intersect(hits1, hits3)
character(0)
intersect(hits2, hits3)
character(0)

cutoff <- 0.01
nHits <- 8
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
   P10VsP6 fourweeksVsP10
-1      40             49
0    29897          29636
1       12            264

hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
stripplotIt(prepareData(hits1[1:nHits]))

plot of chunk unnamed-chunk-1

hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
stripplotIt(prepareData(hits2[1:nHits]))

plot of chunk unnamed-chunk-1

hits3 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0)]
stripplotIt(prepareData(hits3[1:nHits]))

plot of chunk unnamed-chunk-1

hits4 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
stripplotIt(prepareData(hits4[1:nHits]))

plot of chunk unnamed-chunk-1


vennDiagram(wtResCont)

plot of chunk unnamed-chunk-1

hits5 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] != 0 & wtResCont[, "fourweeksVsP10"] != 
                                 0)]
stripplotIt(prepareData(hits5))

plot of chunk unnamed-chunk-1


hits6 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0 & wtResCont[, "fourweeksVsP10"] < 
                                 0)]
stripplotIt(prepareData(hits6))

plot of chunk unnamed-chunk-1


## Exercise 1
## Make the above simulation more realistic with two (or more) groups, different data-generating 
## means and group differences, different data-generating gene-wise variances, etc.
m <- 1000
n <- 3
x <- matrix(rnorm(m/2 * n,0,3), nrow = m/2)
y <- matrix(rnorm(m/2 * n,3,1), nrow = m/2)
z <- rbind(x,y)

obsVars <- apply(z, 1, var)
summary(obsVars)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    0.59    1.89    5.05    6.11   47.60 
mean(obsVars < 1/3)
[1] 0.168
densityplot(~obsVars, n = 200)

plot of chunk unnamed-chunk-1


## Take Home
#colnames(wtDesMat)
(cont.matrixM <- makeContrasts(P2VsE16 = devStageP2 - Intercept,
                               P6VsP2 = devStageP6 - devStageP2, 
                               P10VsP6 = devStageP10 - devStageP6,
                               fourweeksVsP10 = devStage4_weeks - devStageP10,
                               levels = wtDesMat))
Warning: Renaming (Intercept) to Intercept
                 Contrasts
Levels            P2VsE16 P6VsP2 P10VsP6 fourweeksVsP10
  Intercept            -1      0       0              0
  devStageP2            1     -1       0              0
  devStageP6            0      1      -1              0
  devStageP10           0      0       1             -1
  devStage4_weeks       0      0       0              1
wtFitCont1 <- contrasts.fit(wtFit, cont.matrixM)
Warning: row names of contrasts don't match col names of coefficients
wtEbFitCont1 <- eBayes(wtFitCont1)
topTable(wtEbFitCont1)
             P2VsE16  P6VsP2    P10VsP6 fourweeksVsP10 AveExpr    F
1438657_x_at  -12.64 -0.0150 -3.100e-01        -0.2650   12.71 2641
1423641_s_at  -12.20  0.0925 -7.500e-03         0.1125   12.24 2610
1438940_x_at  -12.78  0.0475  2.100e-01         0.0075   13.04 2485
1436884_x_at  -12.75  0.1450 -2.925e-01        -0.0050   13.04 2226
1454613_at    -12.57  0.0575 -5.250e-02        -0.2800   12.34 2212
1456736_x_at  -12.16  0.1425 -9.750e-02        -0.1350   12.47 2202
1456349_x_at  -11.58 -0.1200 -1.250e-02        -0.3375   11.53 2077
1451240_a_at  -12.69 -0.0300 -4.441e-16        -0.6500   13.10 2032
1438859_x_at  -13.00 -0.2425 -9.750e-02        -0.3125   12.63 1978
1416187_s_at  -12.08 -0.1300 -5.500e-02         0.0825   12.18 1964
               P.Value adj.P.Val
1438657_x_at 1.309e-24 2.179e-20
1423641_s_at 1.455e-24 2.179e-20
1438940_x_at 2.259e-24 2.256e-20
1436884_x_at 6.061e-24 3.329e-20
1454613_at   6.413e-24 3.329e-20
1456736_x_at 6.669e-24 3.329e-20
1456349_x_at 1.125e-23 4.811e-20
1451240_a_at 1.369e-23 5.057e-20
1438859_x_at 1.744e-23 5.057e-20
1416187_s_at 1.858e-23 5.057e-20
cutoff <- 1e-04
wtResCont1 <- decideTests(wtEbFitCont1, p.value = cutoff, method = "global")
summary(wtResCont1)
   P2VsE16 P6VsP2 P10VsP6 fourweeksVsP10
-1   29903      0      32             37
0       46  29937   29907          29712
1        0     12      10            200

hits7 <- rownames(prDat)[which(wtResCont1[,"P2VsE16"] < 0 
                               & wtResCont1[,"P6VsP2"] > 0 &
                                 wtResCont1[, "P10VsP6"] == 0 & 
                                 wtResCont1[, "fourweeksVsP10"] == 0)]

stripplotIt(prepareData(hits7))

plot of chunk unnamed-chunk-1