Abrar — Feb 13, 2014, 10:28 PM
library(limma)
library(lattice)
source("../Seminar05/Seminar05.R")
'data.frame': 29949 obs. of 39 variables:
'data.frame': 39 obs. of 4 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
'data.frame': 78 obs. of 6 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 10.93 10.74 10.67 10.68 9.61 ...
$ gene : Factor w/ 2 levels "1419655_at","1438815_at": 1 1 1 1 1 1 1 1 1 1 ...
'data.frame': 39 obs. of 6 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 7.04 7.48 7.37 6.94 6.16 ...
$ gene : Factor w/ 1 level "1456341_a_at": 1 1 1 1 1 1 1 1 1 1 ...
'data.frame': 39 obs. of 6 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 8.02 9.05 8.71 8.92 6.8 ...
$ gene : Factor w/ 1 level "1448690_at": 1 1 1 1 1 1 1 1 1 1 ...
prDat <- read.table("../data/GSE4051_data.tsv")
str(prDat, max.level = 0)
'data.frame': 29949 obs. of 39 variables:
prDes <- readRDS("../data/GSE4051_design.rds")
str(prDes)
'data.frame': 39 obs. of 4 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
# Optional take-home exercise
set.seed(400)
m <- 1000 # genes
n <- 3 #sample size
g <- 2 #gType
x <- matrix(rnorm(m * n, mean=2, sd=5), nrow = m)
colnames(x)=paste0("sample",seq_len(ncol(x)))
xDat <- data.frame(x, group = rep(paste("g", 1:g, sep=""), each=m/g))
obsVars <- apply(x, 1, var)
summary(obsVars)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.01 6.75 17.90 25.40 35.00 186.00
densityplot(~obsVars, n=200)
#Fit a linear model
wtDes <- subset(prDes, gType == "wt")
str(wtDes)
'data.frame': 20 obs. of 4 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 24 25 26 27 28 29 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 2 2 2 2 3 3 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 1 1 1 1 1 1 ...
wtDat <- subset(prDat, select = prDes$gType == "wt")
str(wtDat, max.level = 0)
'data.frame': 29949 obs. of 20 variables:
wtDesMat <- model.matrix(~devStage, wtDes)
wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)
topTable(wtEbFit)
X.Intercept. devStageP2 devStageP6 devStageP10
1423641_s_at 12.18 -0.0175 0.0750 0.0675
1438940_x_at 12.86 0.0850 0.1325 0.3425
1438657_x_at 12.78 0.1400 0.1250 -0.1850
1456736_x_at 12.32 0.1625 0.3050 0.2075
1436884_x_at 12.93 0.1775 0.3225 0.0300
1419700_a_at 12.32 0.1650 0.6475 0.8175
1435800_a_at 12.28 0.0450 0.6825 0.9000
1454613_at 12.47 -0.1075 -0.0500 -0.1025
1451240_a_at 13.00 0.3100 0.2800 0.2800
1450084_s_at 12.63 0.0825 0.0525 0.1725
devStage4_weeks AveExpr F P.Value adj.P.Val
1423641_s_at 0.1800 12.24 45350 3.574e-36 5.201e-32
1438940_x_at 0.3500 13.04 44957 3.865e-36 5.201e-32
1438657_x_at -0.4500 12.71 43486 5.210e-36 5.201e-32
1456736_x_at 0.0725 12.47 39509 1.233e-35 6.725e-32
1436884_x_at 0.0250 13.04 39269 1.302e-35 6.725e-32
1419700_a_at 0.6825 12.78 39121 1.347e-35 6.725e-32
1435800_a_at 1.0200 12.81 36668 2.410e-35 1.031e-31
1454613_at -0.3825 12.34 35835 2.962e-35 1.078e-31
1451240_a_at -0.3700 13.10 35481 3.239e-35 1.078e-31
1450084_s_at 0.2600 12.75 34411 4.265e-35 1.234e-31
colnames(coef(wtEbFit))
[1] "(Intercept)" "devStageP2" "devStageP6" "devStageP10"
[5] "devStage4_weeks"
dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))), number=nrow(wtDat))
str(dsHits)
'data.frame': 29949 obs. of 8 variables:
$ devStageP2 : num 0.399 0.158 0.882 1.303 -2.443 ...
$ devStageP6 : num 0.195 0.48 0.8 1.19 -3.407 ...
$ devStageP10 : num 0.92 0.333 1.549 2.016 -4.311 ...
$ devStage4_weeks: num 3.96 5.11 5.53 6.19 -4.6 ...
$ AveExpr : num 6.53 9.38 7.03 8.32 8.04 ...
$ F : num 425 195 173 157 149 ...
$ P.Value : num 1.59e-17 1.52e-14 4.35e-14 1.01e-13 1.65e-13 ...
$ adj.P.Val : num 4.76e-13 2.28e-10 4.34e-10 7.58e-10 9.20e-10 ...
stripplot(gExp ~ devStage | gene, prepareData((rownames(dsHits)[c(3,6,9)])), group = gType, jitter.data = TRUE, auto.key = TRUE, type = c('p', 'a'), grid = TRUE, subset=prDes$gType == "wt")
#Exercise
wetFit <- lm(formula = gExp ~ devStage, data = prepareData((rownames(dsHits)[3])), subset=gType =="wt")
summary(wetFit)
Call:
lm(formula = gExp ~ devStage, data = prepareData((rownames(dsHits)[3])),
subset = gType == "wt")
Residuals:
Min 1Q Median 3Q Max
-0.4847 -0.2157 -0.0077 0.1781 0.8315
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.276 0.173 30.50 6.5e-15 ***
devStageP2 0.882 0.245 3.61 0.0026 **
devStageP6 0.800 0.245 3.27 0.0052 **
devStageP10 1.549 0.245 6.33 1.3e-05 ***
devStage4_weeks 5.532 0.245 22.61 5.3e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.346 on 15 degrees of freedom
Multiple R-squared: 0.977, Adjusted R-squared: 0.971
F-statistic: 159 on 4 and 15 DF, p-value: 4.31e-12
# Toptabel Boss
AdjHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))), number=nrow(wtDat), p.value=1e-05)
str(AdjHits)
'data.frame': 350 obs. of 8 variables:
$ devStageP2 : num 0.399 0.158 0.882 1.303 -2.443 ...
$ devStageP6 : num 0.195 0.48 0.8 1.19 -3.407 ...
$ devStageP10 : num 0.92 0.333 1.549 2.016 -4.311 ...
$ devStage4_weeks: num 3.96 5.11 5.53 6.19 -4.6 ...
$ AveExpr : num 6.42 8.83 7.45 6.27 7.76 ...
$ F : num 425 195 173 157 149 ...
$ P.Value : num 1.59e-17 1.52e-14 4.35e-14 1.01e-13 1.65e-13 ...
$ adj.P.Val : num 4.76e-13 2.28e-10 4.34e-10 7.58e-10 9.20e-10 ...
AdjHits[63, c("F","adj.P.Val","devStageP6")]
F adj.P.Val devStageP6
1451633_a_at 64.01 1.049e-07 2.069
P2hit <- topTable(wtEbFit, coef = "devStageP2", number=nrow(wtDat), sort.by="none")
P10hit <- topTable(wtEbFit, coef = "devStageP10", number=nrow(wtDat), sort.by="none")
xyplot(P10hit$t~P2hit$t, aspect=1, scales=list(limit=(c(range(P2hit,P10hit)))), panel=function(...){panel.smoothScatter(...); panel.abline(0,1, col=2)})
KernSmooth 2.23 loaded
Copyright M. P. Wand 1997-2009
(loaded the KernSmooth namespace)
densityplot(~P2hit$adj.P.Val + P10hit$adj.P.Val, auto.key=TRUE)
addmargins(table(P2hit$adj.P.Val<1e-03,P10hit$adj.P.Val<1e-03, dnn=c("P2","P10")))
P10
P2 FALSE TRUE Sum
FALSE 29201 695 29896
TRUE 1 52 53
Sum 29202 747 29949
P10BY <- topTable(wtEbFit, coef = "devStageP10", number=nrow(wtDat), sort.by="none", adjust.method="BY")
Pvalues <- data.frame(raw=P10hit$P.Value, BH=P10hit$adj.P.Val, BY=P10BY$adj.P.Val)
pairs(Pvalues)
# Perform inference for some contrasts/Take home exercise
colnames(wtDesMat)
[1] "(Intercept)" "devStageP2" "devStageP6" "devStageP10"
[5] "devStage4_weeks"
(cont.matrix <- makeContrasts(P2VsI = devStageP2 - Intercept, P6VsP2 = devStageP6 - devStageP2, P10VsP6 = devStageP10 - devStageP6, fourweeksVsP10 = devStage4_weeks - devStageP10, levels = wtDesMat))
Warning: Renaming (Intercept) to Intercept
Contrasts
Levels P2VsI P6VsP2 P10VsP6 fourweeksVsP10
Intercept -1 0 0 0
devStageP2 1 -1 0 0
devStageP6 0 1 -1 0
devStageP10 0 0 1 -1
devStage4_weeks 0 0 0 1
wtFitCont <- contrasts.fit(wtFit, cont.matrix)
Warning: row names of contrasts don't match col names of coefficients
wtEbFitCont <- eBayes(wtFitCont)
ContHit <- topTable(wtEbFitCont)
cutoff <- 1e-04
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)
P2VsI P6VsP2 P10VsP6 fourweeksVsP10
-1 29903 0 32 37
0 46 29937 29907 29712
1 0 12 10 200
(hits <- rownames(prDat)[which(wtResCont[, "P2VsI"] != 0 & wtResCont[, "P6VsP2"] != 0 & wtResCont[, "P10VsP6"] == 0 & wtResCont[, "fourweeksVsP10"] == 0 )])
[1] "1419740_at" "1421084_at" "1424963_at" "1425530_a_at"
[5] "1428288_at" "1433590_at" "1435661_at" "1435800_a_at"
[9] "1445574_at" "1450723_at"
stripplot(gExp ~ devStage | gene, prepareData(hits[1:5]), group = gType, jitter.data = TRUE, auto.key = TRUE, type = c('p', 'a'), grid = TRUE, subset=prDes$gType == "wt")