Seminar 6: Take Home Problem

shannonerdelyi — Feb 12, 2014, 3:42 PM

#####################################################################
# STAT 540: Seminar 6
# High Volume Linear Models
# Author: Shannon Erdelyi
# Date: 12-02-2014
#####################################################################

# libraries
library(limma)
library(lattice)
library(hexbin)

Loading required package: grid


# source code with
#   read 'dat' and 'des'
#   'miniDF' function to subset data based on gene names
#   'makeSP' function to make a stripplot
dir <- "/Users/shannonerdelyi/Dropbox/UBC/W2014/STAT 540"
source(paste(dir, "/Seminars/photoRec_Functions.R", sep=""))


#####################################################################
# Using limma on the wild types
#####################################################################

# select wild types only
wtDes <- subset(des, gType == "wt")
wtDat <- subset(dat, select = des$gType == "wt")

# linear models with limma
wtDesMat <- model.matrix(~devStage, wtDes)
wtFit <- lmFit(wtDat, wtDesMat)
wtEbFit <- eBayes(wtFit)


#####################################################################
# Developmental Stage Hits
#####################################################################

# test the effect of developmental stage
dsHits <- topTable(wtEbFit, 
                   coef = grep("devStage", colnames(coef(wtEbFit))), 
                   number=nrow(wtDat))

# how many have p-val < 0.00001
table(dsHits$adj.P < 0.00001)


FALSE  TRUE 
29599   350


# 63rd hit
dsHits[63, c("F", "adj.P.Val", "devStageP6")]

                 F adj.P.Val devStageP6
1451633_a_at 64.01 1.049e-07      2.069



#####################################################################
# P2 and P10 hits
#####################################################################

# test the effect of developmental stage
p2hit <- topTable(wtEbFit, coef = "devStageP2", 
                  number=nrow(wtDat), sort.by="none")
p10hit <- topTable(wtEbFit, coef = "devStageP10", 
                   number=nrow(wtDat), sort.by="none")

# plot test statistics against one another
xyplot(p10hit$t ~ p2hit$t,
       xlab="P2 t-statistic", ylab="P10 t-statistic",
       scales=list(tck=c(1,0), 
                   limits=c(range(c(p2hit$t, p10hit$t)))),
       panel=function(...){
         panel.hexbinplot(...)
         panel.abline(a=0, b=1, col=2)
       })

plot of chunk unnamed-chunk-1


# density plot of p-vals
# we expect to observe a greater difference 10 weeks after
# baseline compared to 2 weeks after baseline (and we do!)
densityplot(~p10hit$adj.P + p2hit$adj.P, 
            xlab="adjusted p-values",
            plot.points=F, 
            auto.key=list(x=0.95, y=0.95, corner=c(1,1)),
            scales=list(tck=c(1,0)))

plot of chunk unnamed-chunk-1


# p-values < 0.001
addmargins(table(p2hit$adj.P<0.001, p10hit$adj.P<0.001,
                 dnn=c("P2", "P10")))

       P10
P2      FALSE  TRUE   Sum
  FALSE 29201   695 29896
  TRUE      1    52    53
  Sum   29202   747 29949


# how many have p-val < 0.00001
table(dsHits$adj.P < 0.00001)


FALSE  TRUE 
29599   350


# scatterplot of P10 pvals
p10by <- topTable(wtEbFit, coef = "devStageP10", 
                   number=nrow(wtDat), sort.by="none",
                   adjust.method=c("BY"))
pvals <- data.frame(raw=p10hit$P.Val, 
                    BH=p10hit$adj.P, 
                    BY=p10by$adj.P) 
# BH pvals are larger than raw pvals (as expected)
plot(pvals)

plot of chunk unnamed-chunk-1



#####################################################################
# Contrasts
#####################################################################

cm1 <- makeContrasts(p10vp6=devStageP10-devStageP6, 
                    fourvp10=devStage4_weeks-devStageP10, 
                    levels=wtDesMat)

Warning: Renaming (Intercept) to Intercept

wtC1 <- contrasts.fit(wtFit, cm1)

Warning: row names of contrasts don't match col names of coefficients

wtEbC1 <- eBayes(wtC1)
c1Hits <- topTable(wtEbC1)

# plot top 4 hits
makeSP(miniDF(rownames(c1Hits)[1:4]), 
       subset=miniDF(rownames(c1Hits)[1:4])$gType=="wt")

plot of chunk unnamed-chunk-1


# adjust p-vals
cutoff <- 0.001
wtResC1 <- decideTests(wtEbC1, p.value=cutoff, method="global")
summary(wtResC1)

   p10vp6 fourvp10
-1      9       22
0   29935    29820
1       5      107



#####################################################################
# TAKE HOME PROBLEM
# See if you can find one or more probes that 
# have some expression changes up to P6 
# and then hold steady all the way to 4_weeks
#####################################################################

# get contrast matrix
#   e16 = p2
#   p2 = p6
(cm2 <- makeContrasts(p2ve16=devStageP6,
                      p6vp2=devStageP6-devStageP2,
                      levels=wtDesMat))

Warning: Renaming (Intercept) to Intercept

                 Contrasts
Levels            p2ve16 p6vp2
  Intercept            0     0
  devStageP2           0    -1
  devStageP6           1     1
  devStageP10          0     0
  devStage4_weeks      0     0


# estimate contrasts
wtC2 <- contrasts.fit(wtFit, cm2)

Warning: row names of contrasts don't match col names of coefficients

wtEbC2 <- eBayes(wtC2)
c2Hits <- topTable(wtEbC2)

# plot top 4 hits
# there are clear changes in the beginning stages (as expected)
makeSP(miniDF(rownames(c2Hits)),
       subset=miniDF(rownames(c2Hits))$gType=="wt")

plot of chunk unnamed-chunk-1


# direction of differences
wtResC2 <- decideTests(wtEbC2, p.value=cutoff, method="global")
summary(wtResC2)

   p2ve16 p6vp2
-1    111     0
0   29810 29942
1      28     7


# genes with expression changes up to P6
# (i.e. both contrasts in cm2 non-zero)
change <- names(which(wtResC2[, "p6vp2"]!=0 & wtResC2[, "p2ve16"]!=0))

# genes with steady expression after P6
# (i.e. contrasts in cm1 close to zero)
wtResC1 <- decideTests(wtEbC1, p.value=0.05, method="global")
steady <- names(which(wtResC1[, "p10vp6"]==0 & wtResC1[, "fourvp10"]==0))

# genes with expression changes up to P6 and steady afterward
(both <- intersect(change, steady))

[1] "1425530_a_at" "1433590_at"   "1445574_at"


# plot the results
makeSP(miniDF(both), subset=miniDF(both)$gType=="wt")

plot of chunk unnamed-chunk-1