Huiting — Mar 30, 2014, 2:38 PM
library(limma)
library(lattice)
Warning: package 'lattice' was built under R version 3.0.3
library(plyr)
prDat <- read.table("GSE4051_data.tsv",header=T)
prDes <- readRDS("GSE4051_design.rds")
str(prDat, max.level = 0)
'data.frame': 29949 obs. of 39 variables:
str(prDes)
'data.frame': 39 obs. of 4 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
prepareData <- function(myGenes) {
miniDat <- t(wtDat[myGenes, ])
miniDat <- data.frame(gExp = as.vector(miniDat),
gene = factor(rep(colnames(miniDat), each =
nrow(miniDat)), levels = colnames(miniDat)))
miniDat <- suppressWarnings(data.frame(wtDes, miniDat))
miniDat
}
stripplotIt <- function(myData, ...) {
stripplot(gExp ~ devStage | gene, myData,
jitter.data = TRUE,
auto.key = TRUE, type = c('p', 'a'), grid = TRUE, ...)
}
## Reproduce all seminar 6 by following instructions
m <- 1000
n <- 3
x <- matrix(rnorm(m * n,0,3), nrow = m)
obsVars <- apply(x, 1, var)
summary(obsVars)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.01 2.63 5.94 8.72 12.00 48.70
mean(obsVars < 1/3)
[1] 0.032
densityplot(~obsVars, n = 200)
wtDes <- subset(prDes, gType == "wt")
str(wtDes)
'data.frame': 20 obs. of 4 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 24 25 26 27 28 29 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 2 2 2 2 3 3 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 1 1 1 1 1 1 ...
wtDat <- subset(prDat, select = prDes$gType == "wt")
str(wtDat, max.level = 0)
'data.frame': 29949 obs. of 20 variables:
wtDesMat <- model.matrix(~devStage, wtDes)
str(wtDesMat)
num [1:20, 1:5] 1 1 1 1 1 1 1 1 1 1 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:20] "12" "13" "14" "15" ...
..$ : chr [1:5] "(Intercept)" "devStageP2" "devStageP6" "devStageP10" ...
- attr(*, "assign")= int [1:5] 0 1 1 1 1
- attr(*, "contrasts")=List of 1
..$ devStage: chr "contr.treatment"
wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)
topTable(wtEbFit)
X.Intercept. devStageP2 devStageP6 devStageP10
1423641_s_at 12.18 -0.0175 0.0750 0.0675
1438940_x_at 12.86 0.0850 0.1325 0.3425
1438657_x_at 12.78 0.1400 0.1250 -0.1850
1456736_x_at 12.32 0.1625 0.3050 0.2075
1436884_x_at 12.93 0.1775 0.3225 0.0300
1419700_a_at 12.32 0.1650 0.6475 0.8175
1435800_a_at 12.28 0.0450 0.6825 0.9000
1454613_at 12.47 -0.1075 -0.0500 -0.1025
1451240_a_at 13.00 0.3100 0.2800 0.2800
1450084_s_at 12.63 0.0825 0.0525 0.1725
devStage4_weeks AveExpr F P.Value adj.P.Val
1423641_s_at 0.1800 12.24 45350 3.574e-36 5.201e-32
1438940_x_at 0.3500 13.04 44957 3.865e-36 5.201e-32
1438657_x_at -0.4500 12.71 43486 5.210e-36 5.201e-32
1456736_x_at 0.0725 12.47 39509 1.233e-35 6.725e-32
1436884_x_at 0.0250 13.04 39269 1.302e-35 6.725e-32
1419700_a_at 0.6825 12.78 39121 1.347e-35 6.725e-32
1435800_a_at 1.0200 12.81 36668 2.410e-35 1.031e-31
1454613_at -0.3825 12.34 35835 2.962e-35 1.078e-31
1451240_a_at -0.3700 13.10 35481 3.239e-35 1.078e-31
1450084_s_at 0.2600 12.75 34411 4.265e-35 1.234e-31
topTable(wtEbFit, coef = 2:5) # cryptic! error-prone!
devStageP2 devStageP6 devStageP10 devStage4_weeks AveExpr
1440645_at 0.3990 0.1952 0.9200 3.961 6.528
1416041_at 0.1580 0.4797 0.3327 5.115 9.383
1425222_x_at 0.8820 0.7995 1.5488 5.532 7.028
1451635_at 1.3025 1.1900 2.0160 6.188 8.319
1429028_at -2.4433 -3.4073 -4.3105 -4.602 8.045
1422929_s_at -2.9117 -3.6182 -3.5472 -3.661 7.278
1424852_at 0.4575 0.2298 0.5740 3.979 7.454
1425171_at 0.9980 3.0530 5.2787 6.079 9.620
1451617_at 0.7255 2.5128 4.9837 6.685 8.817
1451618_at 0.6028 2.8903 5.0507 6.288 9.431
F P.Value adj.P.Val
1440645_at 425.4 1.588e-17 4.755e-13
1416041_at 195.5 1.522e-14 2.280e-10
1425222_x_at 173.4 4.348e-14 4.341e-10
1451635_at 157.3 1.013e-13 7.585e-10
1429028_at 148.8 1.646e-13 9.203e-10
1422929_s_at 146.9 1.844e-13 9.203e-10
1424852_at 143.2 2.290e-13 9.799e-10
1425171_at 138.8 3.002e-13 1.124e-09
1451617_at 136.5 3.485e-13 1.160e-09
1451618_at 134.2 4.032e-13 1.207e-09
colnames(coef(wtEbFit))
[1] "(Intercept)" "devStageP2" "devStageP6" "devStageP10"
[5] "devStage4_weeks"
(dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit)))))
devStageP2 devStageP6 devStageP10 devStage4_weeks AveExpr
1440645_at 0.3990 0.1952 0.9200 3.961 6.528
1416041_at 0.1580 0.4797 0.3327 5.115 9.383
1425222_x_at 0.8820 0.7995 1.5488 5.532 7.028
1451635_at 1.3025 1.1900 2.0160 6.188 8.319
1429028_at -2.4433 -3.4073 -4.3105 -4.602 8.045
1422929_s_at -2.9117 -3.6182 -3.5472 -3.661 7.278
1424852_at 0.4575 0.2298 0.5740 3.979 7.454
1425171_at 0.9980 3.0530 5.2787 6.079 9.620
1451617_at 0.7255 2.5128 4.9837 6.685 8.817
1451618_at 0.6028 2.8903 5.0507 6.288 9.431
F P.Value adj.P.Val
1440645_at 425.4 1.588e-17 4.755e-13
1416041_at 195.5 1.522e-14 2.280e-10
1425222_x_at 173.4 4.348e-14 4.341e-10
1451635_at 157.3 1.013e-13 7.585e-10
1429028_at 148.8 1.646e-13 9.203e-10
1422929_s_at 146.9 1.844e-13 9.203e-10
1424852_at 143.2 2.290e-13 9.799e-10
1425171_at 138.8 3.002e-13 1.124e-09
1451617_at 136.5 3.485e-13 1.160e-09
1451618_at 134.2 4.032e-13 1.207e-09
stripplotIt(prepareData(rownames(dsHits)[c(3, 6, 9)]))
#dat <- prepareData(rownames(dsHits)[3])
#Fit <- lm(gExp~devStage,dat)
#summary(Fit)
cutoff <- 1e-05
dsHits <- topTable(wtEbFit,
coef = grep("devStage", colnames(coef(wtEbFit))),
p.value = cutoff, n = Inf)
(numBHhits <- nrow(dsHits))
[1] 350
dsHits[63, c("F", "adj.P.Val", "devStageP6")]
F adj.P.Val devStageP6
1451633_a_at 64.01 1.049e-07 2.069
P2Hits <- topTable(wtEbFit, coef = "devStageP2", n = Inf, sort = "none")
P10Hits <- topTable(wtEbFit, coef = "devStageP10", n = Inf, sort = "none")
xyplot(P10Hits$t ~ P2Hits$t, aspect = 1,
xlab = "t-statistic for P2 effect",
ylab = "t-statistic for P10 effect",
xlim = c(-20, 16), ylim = c(-20, 16),
panel = function(x, y, ...) {
panel.smoothScatter(x, y, nbin = 100, ...)
panel.abline(a = 0, b = 1, col = "orange")
})
KernSmooth 2.23 loaded
Copyright M. P. Wand 1997-2009
(loaded the KernSmooth namespace)
densityplot(~ P10Hits$adj.P.Val + P2Hits$adj.P.Val, auto.key = TRUE,
plot.points = FALSE, n = 300)
cutoff <- 1e-03
foo <- data.frame(P2 = P2Hits$adj.P.Val < cutoff,
P10 = P10Hits$adj.P.Val < cutoff)
addmargins(with(foo, table(P2, P10)))
P10
P2 FALSE TRUE Sum
FALSE 29201 695 29896
TRUE 1 52 53
Sum 29202 747 29949
P10pVals <- data.frame(raw = P10Hits$P.Value,
BH = P10Hits$adj.P.Val,
BY = p.adjust(P10Hits$P.Value, method = "BY"))
splom(P10pVals,
panel = function(x, y, ... ) {
panel.xyplot(x, y, pch = ".", ...)
panel.abline(a = 0, b = 1, col = "orange")
})
colnames(wtDesMat)
[1] "(Intercept)" "devStageP2" "devStageP6" "devStageP10"
[5] "devStage4_weeks"
(cont.matrix <- makeContrasts(P10VsP6 = devStageP10 - devStageP6, fourweeksVsP10 = devStage4_weeks -
devStageP10, levels = wtDesMat))
Warning: Renaming (Intercept) to Intercept
Contrasts
Levels P10VsP6 fourweeksVsP10
Intercept 0 0
devStageP2 0 0
devStageP6 -1 0
devStageP10 1 -1
devStage4_weeks 0 1
wtFitCont <- contrasts.fit(wtFit, cont.matrix)
Warning: row names of contrasts don't match col names of coefficients
wtEbFitCont <- eBayes(wtFitCont)
topTable(wtEbFitCont)
P10VsP6 fourweeksVsP10 AveExpr F P.Value adj.P.Val
1440645_at 0.7247 3.041 6.528 632.7 2.224e-17 6.662e-13
1416041_at -0.1470 4.782 9.383 302.4 1.473e-14 2.206e-10
1425222_x_at 0.7492 3.983 7.028 235.4 1.300e-13 1.297e-09
1424852_at 0.3442 3.405 7.454 225.1 1.910e-13 1.430e-09
1420726_x_at 0.1732 3.551 7.190 203.5 4.555e-13 2.640e-09
1451635_at 0.8260 4.172 8.319 200.0 5.289e-13 2.640e-09
1429394_at -0.0980 2.410 7.848 167.5 2.416e-12 1.034e-08
1455447_at -0.9765 -1.800 9.973 153.5 5.063e-12 1.896e-08
1429791_at 0.2480 1.658 8.026 145.7 7.877e-12 2.621e-08
1422612_at 0.4837 3.426 8.833 142.2 9.676e-12 2.840e-08
foo <- topTable(wtEbFitCont)
stripplotIt(prepareData(rownames(foo)[1:4]))
cutoff <- 1e-04
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
P10VsP6 fourweeksVsP10
-1 4 8
0 29945 29895
1 0 46
(hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)])
[1] "1416635_at" "1437781_at" "1454752_at" "1455260_at"
stripplotIt(prepareData(hits1))
(hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)])
[1] "1416021_a_at" "1423851_a_at" "1434500_at" "1435415_x_at"
[5] "1437502_x_at" "1448182_a_at" "1452679_at" "1455447_at"
stripplotIt(prepareData(hits2[1:4]))
intersect(hits1, hits2)
character(0)
(hits3 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)])
[1] "1416041_at" "1417280_at" "1418406_at" "1418710_at"
[5] "1418789_at" "1419069_at" "1420725_at" "1420726_x_at"
[9] "1421061_at" "1421818_at" "1422612_at" "1424852_at"
[13] "1424895_at" "1425222_x_at" "1426059_at" "1426223_at"
[17] "1427388_at" "1428763_at" "1429394_at" "1429791_at"
[21] "1430580_at" "1431174_at" "1433699_at" "1434297_at"
[25] "1434573_at" "1435436_at" "1435679_at" "1435727_s_at"
[29] "1436265_at" "1436287_at" "1440402_at" "1440645_at"
[33] "1441518_at" "1442243_at" "1443252_at" "1446484_at"
[37] "1449170_at" "1449393_at" "1451042_a_at" "1451635_at"
[41] "1452243_at" "1453385_at" "1455493_at" "1457878_at"
[45] "1458418_at" "1459904_at"
stripplotIt(prepareData(hits3[1:4]))
intersect(hits1, hits3)
character(0)
intersect(hits2, hits3)
character(0)
cutoff <- 0.01
nHits <- 8
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
P10VsP6 fourweeksVsP10
-1 40 49
0 29897 29636
1 12 264
hits1 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] < 0)]
stripplotIt(prepareData(hits1[1:nHits]))
hits2 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] < 0)]
stripplotIt(prepareData(hits2[1:nHits]))
hits3 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0)]
stripplotIt(prepareData(hits3[1:nHits]))
hits4 <- rownames(prDat)[which(wtResCont[, "fourweeksVsP10"] > 0)]
stripplotIt(prepareData(hits4[1:nHits]))
vennDiagram(wtResCont)
hits5 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] != 0 & wtResCont[, "fourweeksVsP10"] !=
0)]
stripplotIt(prepareData(hits5))
hits6 <- rownames(prDat)[which(wtResCont[, "P10VsP6"] > 0 & wtResCont[, "fourweeksVsP10"] <
0)]
stripplotIt(prepareData(hits6))
## Exercise 1
## Make the above simulation more realistic with two (or more) groups, different data-generating
## means and group differences, different data-generating gene-wise variances, etc.
m <- 1000
n <- 3
x <- matrix(rnorm(m/2 * n,0,3), nrow = m/2)
y <- matrix(rnorm(m/2 * n,3,1), nrow = m/2)
z <- rbind(x,y)
obsVars <- apply(z, 1, var)
summary(obsVars)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00 0.59 1.89 5.05 6.11 47.60
mean(obsVars < 1/3)
[1] 0.168
densityplot(~obsVars, n = 200)
## Take Home
#colnames(wtDesMat)
(cont.matrixM <- makeContrasts(P2VsE16 = devStageP2 - Intercept,
P6VsP2 = devStageP6 - devStageP2,
P10VsP6 = devStageP10 - devStageP6,
fourweeksVsP10 = devStage4_weeks - devStageP10,
levels = wtDesMat))
Warning: Renaming (Intercept) to Intercept
Contrasts
Levels P2VsE16 P6VsP2 P10VsP6 fourweeksVsP10
Intercept -1 0 0 0
devStageP2 1 -1 0 0
devStageP6 0 1 -1 0
devStageP10 0 0 1 -1
devStage4_weeks 0 0 0 1
wtFitCont1 <- contrasts.fit(wtFit, cont.matrixM)
Warning: row names of contrasts don't match col names of coefficients
wtEbFitCont1 <- eBayes(wtFitCont1)
topTable(wtEbFitCont1)
P2VsE16 P6VsP2 P10VsP6 fourweeksVsP10 AveExpr F
1438657_x_at -12.64 -0.0150 -3.100e-01 -0.2650 12.71 2641
1423641_s_at -12.20 0.0925 -7.500e-03 0.1125 12.24 2610
1438940_x_at -12.78 0.0475 2.100e-01 0.0075 13.04 2485
1436884_x_at -12.75 0.1450 -2.925e-01 -0.0050 13.04 2226
1454613_at -12.57 0.0575 -5.250e-02 -0.2800 12.34 2212
1456736_x_at -12.16 0.1425 -9.750e-02 -0.1350 12.47 2202
1456349_x_at -11.58 -0.1200 -1.250e-02 -0.3375 11.53 2077
1451240_a_at -12.69 -0.0300 -4.441e-16 -0.6500 13.10 2032
1438859_x_at -13.00 -0.2425 -9.750e-02 -0.3125 12.63 1978
1416187_s_at -12.08 -0.1300 -5.500e-02 0.0825 12.18 1964
P.Value adj.P.Val
1438657_x_at 1.309e-24 2.179e-20
1423641_s_at 1.455e-24 2.179e-20
1438940_x_at 2.259e-24 2.256e-20
1436884_x_at 6.061e-24 3.329e-20
1454613_at 6.413e-24 3.329e-20
1456736_x_at 6.669e-24 3.329e-20
1456349_x_at 1.125e-23 4.811e-20
1451240_a_at 1.369e-23 5.057e-20
1438859_x_at 1.744e-23 5.057e-20
1416187_s_at 1.858e-23 5.057e-20
cutoff <- 1e-04
wtResCont1 <- decideTests(wtEbFitCont1, p.value = cutoff, method = "global")
summary(wtResCont1)
P2VsE16 P6VsP2 P10VsP6 fourweeksVsP10
-1 29903 0 32 37
0 46 29937 29907 29712
1 0 12 10 200
hits7 <- rownames(prDat)[which(wtResCont1[,"P2VsE16"] < 0
& wtResCont1[,"P6VsP2"] > 0 &
wtResCont1[, "P10VsP6"] == 0 &
wtResCont1[, "fourweeksVsP10"] == 0)]
stripplotIt(prepareData(hits7))