seminar 5.R

yifanzhang — Apr 13, 2014, 10:15 PM

library(lattice) 
library(ggplot2) 
library(reshape2) 
prDat <- read.table("GSE4051_data.tsv")
prDes <- readRDS("GSE4051_design.rds")
prepareData <- function(myGenes, data, design) {
  miniDat <- t(data[myGenes, ])
  miniDat <- data.frame(gExp = as.vector(miniDat),
                        gene = factor(rep(colnames(miniDat), each =
                                            nrow(miniDat)), levels = colnames(miniDat)))
  miniDat <- suppressWarnings(data.frame(design, miniDat))
  miniDat
}
stripplotIt <- function(myData, ...) {
  stripplot(gExp ~ devStage | gene, myData,
            jitter.data = TRUE, group = gType,
            auto.key = TRUE, type = c('p', 'a'), grid = TRUE, ...)
}


newDat <- prepareData("1456341_a_at", prDat, prDes)
stripplotIt(newDat)

plot of chunk unnamed-chunk-1

t.test(gExp ~ devStage, newDat, 
       subset = devStage %in% c("P2", "4_weeks"),
       var.equal = TRUE)

    Two Sample t-test

data:  gExp by devStage
t = -18.84, df = 14, p-value = 2.411e-11
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -4.078 -3.244
sample estimates:
     mean in group P2 mean in group 4_weeks 
                6.326                 9.987 
mDat <- prepareData("1438786_a_at", prDat, prDes)
stripplotIt(newDat)

plot of chunk unnamed-chunk-1

mFit <- lm(gExp ~ devStage, mDat, subset = gType == "wt")
summary(mFit)

Call:
lm(formula = gExp ~ devStage, data = mDat, subset = gType == 
    "wt")

Residuals:
    Min      1Q  Median      3Q     Max 
-1.1565 -0.4400  0.0288  0.4915  1.2065 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)        8.523      0.379   22.50  5.7e-13 ***
devStageP2        -1.450      0.536   -2.71    0.016 *  
devStageP6        -0.107      0.536   -0.20    0.845    
devStageP10       -1.201      0.536   -2.24    0.040 *  
devStage4_weeks    0.081      0.536    0.15    0.882    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.758 on 15 degrees of freedom
Multiple R-squared:  0.497, Adjusted R-squared:  0.363 
F-statistic: 3.71 on 4 and 15 DF,  p-value: 0.0272
contMat <- matrix(c(0, 1, 0, -1, 0), nrow = 1)
sampMeans <- aggregate(gExp ~ devStage, mDat, FUN = mean,
                       subset = gType == "wt")
with(sampMeans, gExp[devStage == "P2"] - gExp[devStage == "P10"])
[1] -0.249
pDat <- prepareData("1429225_at", prDat, prDes)
stripplotIt(pDat)

plot of chunk unnamed-chunk-1

pFitBig <- lm(gExp ~ gType * devStage, pDat)
summary(pFitBig)$coef
                           Estimate Std. Error t value  Pr(>|t|)
(Intercept)                  7.3125     0.2617 27.9391 1.619e-22
gTypeNrlKO                  -0.2602     0.3998 -0.6507 5.203e-01
devStageP2                  -1.1583     0.3701 -3.1292 3.973e-03
devStageP6                  -1.2495     0.3701 -3.3757 2.110e-03
devStageP10                 -1.0718     0.3701 -2.8955 7.125e-03
devStage4_weeks             -0.9088     0.3701 -2.4551 2.032e-02
gTypeNrlKO:devStageP2        0.2804     0.5448  0.5147 6.107e-01
gTypeNrlKO:devStageP6        0.7589     0.5448  1.3929 1.742e-01
gTypeNrlKO:devStageP10       1.7914     0.5448  3.2880 2.648e-03
gTypeNrlKO:devStage4_weeks   2.2389     0.5448  4.1094 2.970e-04
pFitSmall <- lm(gExp ~ gType + devStage, pDat)
summary(pFitSmall)$coef
                Estimate Std. Error t value  Pr(>|t|)
(Intercept)       6.8652     0.2722 25.2199 3.848e-23
gTypeNrlKO        0.7836     0.2172  3.6085 1.007e-03
devStageP2       -1.0926     0.3506 -3.1161 3.780e-03
devStageP6       -0.9446     0.3506 -2.6940 1.101e-02
devStageP10      -0.2506     0.3506 -0.7147 4.798e-01
devStage4_weeks   0.1362     0.3506  0.3883 7.003e-01
anova(pFitSmall, pFitBig)
Analysis of Variance Table

Model 1: gExp ~ gType + devStage
Model 2: gExp ~ gType * devStage
  Res.Df   RSS Df Sum of Sq    F Pr(>F)    
1     33 15.12                             
2     29  7.95  4      7.17 6.54  7e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1