Seminar06.R

Abrar — Feb 13, 2014, 10:28 PM

library(limma)
library(lattice)
source("../Seminar05/Seminar05.R")

'data.frame':   29949 obs. of  39 variables:
'data.frame':   39 obs. of  4 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
'data.frame':   78 obs. of  6 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
 $ gExp    : num  10.93 10.74 10.67 10.68 9.61 ...
 $ gene    : Factor w/ 2 levels "1419655_at","1438815_at": 1 1 1 1 1 1 1 1 1 1 ...
'data.frame':   39 obs. of  6 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
 $ gExp    : num  7.04 7.48 7.37 6.94 6.16 ...
 $ gene    : Factor w/ 1 level "1456341_a_at": 1 1 1 1 1 1 1 1 1 1 ...
'data.frame':   39 obs. of  6 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
 $ gExp    : num  8.02 9.05 8.71 8.92 6.8 ...
 $ gene    : Factor w/ 1 level "1448690_at": 1 1 1 1 1 1 1 1 1 1 ...


prDat <- read.table("../data/GSE4051_data.tsv")
str(prDat, max.level = 0)

'data.frame':   29949 obs. of  39 variables:

prDes <- readRDS("../data/GSE4051_design.rds")
str(prDes)

'data.frame':   39 obs. of  4 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...



# Optional take-home exercise
set.seed(400)
m <- 1000 # genes
n <- 3 #sample size
g <- 2 #gType
x <- matrix(rnorm(m * n, mean=2, sd=5), nrow = m)
colnames(x)=paste0("sample",seq_len(ncol(x)))
xDat <- data.frame(x, group = rep(paste("g", 1:g, sep=""), each=m/g))

obsVars <- apply(x, 1, var)
summary(obsVars)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.01    6.75   17.90   25.40   35.00  186.00

densityplot(~obsVars, n=200)

plot of chunk unnamed-chunk-1


#Fit a linear model
wtDes <- subset(prDes, gType == "wt")
str(wtDes)

'data.frame':   20 obs. of  4 variables:
 $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
 $ sidNum  : num  20 21 22 23 24 25 26 27 28 29 ...
 $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 2 2 2 2 3 3 ...
 $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 1 1 1 1 1 1 ...

wtDat <- subset(prDat, select = prDes$gType == "wt")
str(wtDat, max.level = 0)

'data.frame':   29949 obs. of  20 variables:


wtDesMat <- model.matrix(~devStage, wtDes)
wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)
topTable(wtEbFit)

             X.Intercept. devStageP2 devStageP6 devStageP10
1423641_s_at        12.18    -0.0175     0.0750      0.0675
1438940_x_at        12.86     0.0850     0.1325      0.3425
1438657_x_at        12.78     0.1400     0.1250     -0.1850
1456736_x_at        12.32     0.1625     0.3050      0.2075
1436884_x_at        12.93     0.1775     0.3225      0.0300
1419700_a_at        12.32     0.1650     0.6475      0.8175
1435800_a_at        12.28     0.0450     0.6825      0.9000
1454613_at          12.47    -0.1075    -0.0500     -0.1025
1451240_a_at        13.00     0.3100     0.2800      0.2800
1450084_s_at        12.63     0.0825     0.0525      0.1725
             devStage4_weeks AveExpr     F   P.Value adj.P.Val
1423641_s_at          0.1800   12.24 45350 3.574e-36 5.201e-32
1438940_x_at          0.3500   13.04 44957 3.865e-36 5.201e-32
1438657_x_at         -0.4500   12.71 43486 5.210e-36 5.201e-32
1456736_x_at          0.0725   12.47 39509 1.233e-35 6.725e-32
1436884_x_at          0.0250   13.04 39269 1.302e-35 6.725e-32
1419700_a_at          0.6825   12.78 39121 1.347e-35 6.725e-32
1435800_a_at          1.0200   12.81 36668 2.410e-35 1.031e-31
1454613_at           -0.3825   12.34 35835 2.962e-35 1.078e-31
1451240_a_at         -0.3700   13.10 35481 3.239e-35 1.078e-31
1450084_s_at          0.2600   12.75 34411 4.265e-35 1.234e-31

colnames(coef(wtEbFit))

[1] "(Intercept)"     "devStageP2"      "devStageP6"      "devStageP10"    
[5] "devStage4_weeks"

dsHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))), number=nrow(wtDat))
str(dsHits)

'data.frame':   29949 obs. of  8 variables:
 $ devStageP2     : num  0.399 0.158 0.882 1.303 -2.443 ...
 $ devStageP6     : num  0.195 0.48 0.8 1.19 -3.407 ...
 $ devStageP10    : num  0.92 0.333 1.549 2.016 -4.311 ...
 $ devStage4_weeks: num  3.96 5.11 5.53 6.19 -4.6 ...
 $ AveExpr        : num  6.53 9.38 7.03 8.32 8.04 ...
 $ F              : num  425 195 173 157 149 ...
 $ P.Value        : num  1.59e-17 1.52e-14 4.35e-14 1.01e-13 1.65e-13 ...
 $ adj.P.Val      : num  4.76e-13 2.28e-10 4.34e-10 7.58e-10 9.20e-10 ...

stripplot(gExp ~ devStage | gene, prepareData((rownames(dsHits)[c(3,6,9)])), group = gType, jitter.data = TRUE, auto.key = TRUE, type = c('p', 'a'), grid = TRUE, subset=prDes$gType == "wt")

plot of chunk unnamed-chunk-1


#Exercise
wetFit <- lm(formula = gExp ~ devStage, data = prepareData((rownames(dsHits)[3])), subset=gType =="wt")
summary(wetFit)


Call:
lm(formula = gExp ~ devStage, data = prepareData((rownames(dsHits)[3])), 
    subset = gType == "wt")

Residuals:
    Min      1Q  Median      3Q     Max 
-0.4847 -0.2157 -0.0077  0.1781  0.8315 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)        5.276      0.173   30.50  6.5e-15 ***
devStageP2         0.882      0.245    3.61   0.0026 ** 
devStageP6         0.800      0.245    3.27   0.0052 ** 
devStageP10        1.549      0.245    6.33  1.3e-05 ***
devStage4_weeks    5.532      0.245   22.61  5.3e-13 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.346 on 15 degrees of freedom
Multiple R-squared:  0.977, Adjusted R-squared:  0.971 
F-statistic:  159 on 4 and 15 DF,  p-value: 4.31e-12



# Toptabel Boss
AdjHits <- topTable(wtEbFit, coef = grep("devStage", colnames(coef(wtEbFit))), number=nrow(wtDat), p.value=1e-05)
str(AdjHits)

'data.frame':   350 obs. of  8 variables:
 $ devStageP2     : num  0.399 0.158 0.882 1.303 -2.443 ...
 $ devStageP6     : num  0.195 0.48 0.8 1.19 -3.407 ...
 $ devStageP10    : num  0.92 0.333 1.549 2.016 -4.311 ...
 $ devStage4_weeks: num  3.96 5.11 5.53 6.19 -4.6 ...
 $ AveExpr        : num  6.42 8.83 7.45 6.27 7.76 ...
 $ F              : num  425 195 173 157 149 ...
 $ P.Value        : num  1.59e-17 1.52e-14 4.35e-14 1.01e-13 1.65e-13 ...
 $ adj.P.Val      : num  4.76e-13 2.28e-10 4.34e-10 7.58e-10 9.20e-10 ...

AdjHits[63, c("F","adj.P.Val","devStageP6")]

                 F adj.P.Val devStageP6
1451633_a_at 64.01 1.049e-07      2.069


P2hit <- topTable(wtEbFit, coef = "devStageP2", number=nrow(wtDat), sort.by="none")
P10hit <- topTable(wtEbFit, coef = "devStageP10", number=nrow(wtDat), sort.by="none")

xyplot(P10hit$t~P2hit$t, aspect=1, scales=list(limit=(c(range(P2hit,P10hit)))), panel=function(...){panel.smoothScatter(...); panel.abline(0,1, col=2)})

KernSmooth 2.23 loaded
Copyright M. P. Wand 1997-2009
(loaded the KernSmooth namespace)

plot of chunk unnamed-chunk-1


densityplot(~P2hit$adj.P.Val + P10hit$adj.P.Val, auto.key=TRUE)

plot of chunk unnamed-chunk-1


addmargins(table(P2hit$adj.P.Val<1e-03,P10hit$adj.P.Val<1e-03, dnn=c("P2","P10")))

       P10
P2      FALSE  TRUE   Sum
  FALSE 29201   695 29896
  TRUE      1    52    53
  Sum   29202   747 29949


P10BY <- topTable(wtEbFit, coef = "devStageP10", number=nrow(wtDat), sort.by="none", adjust.method="BY")
Pvalues <- data.frame(raw=P10hit$P.Value, BH=P10hit$adj.P.Val, BY=P10BY$adj.P.Val)
pairs(Pvalues)

plot of chunk unnamed-chunk-1


# Perform inference for some contrasts/Take home exercise
colnames(wtDesMat)

[1] "(Intercept)"     "devStageP2"      "devStageP6"      "devStageP10"    
[5] "devStage4_weeks"

(cont.matrix <- makeContrasts(P2VsI = devStageP2 - Intercept, P6VsP2 = devStageP6 - devStageP2, P10VsP6 = devStageP10 - devStageP6, fourweeksVsP10 = devStage4_weeks - devStageP10, levels = wtDesMat))

Warning: Renaming (Intercept) to Intercept

                 Contrasts
Levels            P2VsI P6VsP2 P10VsP6 fourweeksVsP10
  Intercept          -1      0       0              0
  devStageP2          1     -1       0              0
  devStageP6          0      1      -1              0
  devStageP10         0      0       1             -1
  devStage4_weeks     0      0       0              1

wtFitCont <- contrasts.fit(wtFit, cont.matrix)

Warning: row names of contrasts don't match col names of coefficients

wtEbFitCont <- eBayes(wtFitCont)
ContHit <- topTable(wtEbFitCont)

cutoff <- 1e-04
wtResCont <- decideTests(wtEbFitCont, p.value = cutoff, method = "global")
summary(wtResCont)

   P2VsI P6VsP2 P10VsP6 fourweeksVsP10
-1 29903      0      32             37
0     46  29937   29907          29712
1      0     12      10            200

(hits <- rownames(prDat)[which(wtResCont[, "P2VsI"] != 0 & wtResCont[, "P6VsP2"] !=  0 & wtResCont[, "P10VsP6"] ==  0 & wtResCont[, "fourweeksVsP10"] ==  0 )])

 [1] "1419740_at"   "1421084_at"   "1424963_at"   "1425530_a_at"
 [5] "1428288_at"   "1433590_at"   "1435661_at"   "1435800_a_at"
 [9] "1445574_at"   "1450723_at"

stripplot(gExp ~ devStage | gene, prepareData(hits[1:5]), group = gType, jitter.data = TRUE, auto.key = TRUE, type = c('p', 'a'), grid = TRUE, subset=prDes$gType == "wt")

plot of chunk unnamed-chunk-1