a

The chosen GSE number was GSE59193: The importance of vesicular trafficking pathways in genome stability of diploid Saccharomyces cerevisiae cells

b

Summary

Oral squamous cell carcinoma cell lines, OSC19 and OSC20, presented aggressive proliferation in vitro but displayed discrete cellular phenotypes and lymphangiogenic activity when transplanted into tongues of nude mice. We thus aimed to explore the molecular factors responsible for their malignant properties.

Overall design

To overview the genetic diversity in relation to the cellular behavior and susceptibility to a stromal impact upon transplantation, genes differentially expressed between pre-transplantation (culture cells in vitro) and post-transplantation (developed tumor in vivo) were analyzed by microarray.

c

A test that I’m interested in doing is which of the genes are differentially expressed before and after transplantation. The data is pre-normalized but a log transformation is applied.

#   Differential expression analysis with limma
library(Biobase)
library(GEOquery)
library(limma)

# load series and platform data from GEO

gset <- getGEO("GSE62320", GSEMatrix =TRUE)
if (length(gset) > 1) idx <- grep("GPL13497", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]

# make proper column names to match toptable 
fvarLabels(gset) <- make.names(fvarLabels(gset))

# group names for all samples
sml <- c("G0","G0","G1","G1");

# log2 transform
ex <- exprs(gset)
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
          (qx[6]-qx[1] > 50 && qx[2] > 0) ||
          (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
if (LogC) { ex[which(ex <= 0)] <- NaN
  exprs(gset) <- log2(ex) }

boxplot(exprs(gset))

d

fl <- as.factor(sml)
gset$description <- fl
design <- model.matrix(~ description + 0, gset)
colnames(design) <- levels(fl)
fit <- lmFit(gset, design)
cont.matrix <- makeContrasts(G1-G0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2, 0.01)
## Warning: Zero sample variances detected, have been offset
fit2$df.prior
## [1] 0.7527365
fit2$s2.prior
## [1] 0.02322474

e

hist(fit2$sigma^2, breaks = 50, col=4, probability = T, ylim = c(0, 1))
lines(density(fit2$s2.post), col = 2)
box()

f

# original variance estimates
fit2$sigma[1]^2
## A_23_P100001 
##      1.97197
# eBayes variance estimates
fit2$s2.post[1]
## A_23_P100001 
##     1.439085

g

source("http://www.public.iastate.edu/~dnett/microarray/multtest.txt")
p <- fit2$p.value
plot.ub.mix(p, ub.mix(p))

h

qvals=jabes.q(p)
fdrcuts=c(0.01, 0.05, 0.10)
rbind(fdrcuts, lapply(fdrcuts, function(x) sum(qvals <= x)))
##         [,1] [,2] [,3]
## fdrcuts 0.01 0.05 0.1 
##         0    178  5876

i

topTable(fit2)[['ID']]
##  [1] A_33_P3280845 A_32_P148824  A_33_P3340404 A_23_P32500   A_23_P202071 
##  [6] A_23_P17190   A_23_P2705    A_23_P300033  A_33_P3234989 A_33_P3414127
## 34184 Levels: (-)3xSLv1 (+)E1A_r60_1 (+)E1A_r60_3 ... GE_BrightCorner

j

eppde=ppde(p, ub.mix(p))
df <- as.data.frame(fit2)

top <- head(df[order(df$p.value), ], 5)
merge(top, eppde, by = 0)[ , -(1:26)]
##        p.value     lods        F    F.p.value   G1 - G0
## 1 1.831728e-05 2.996339 4189.908 1.831728e-05 0.9843681
## 2 1.644661e-05 3.021134 4531.194 1.644661e-05 0.9849142
## 3 1.107733e-05 3.099265 6039.077 1.107733e-05 0.9867612
## 4 7.733554e-06 3.155207 7841.317 7.733554e-06 0.9882457
## 5 1.414960e-05 3.053149 5054.771 1.414960e-05 0.9856454
bot <- tail(df[order(df$p.value), ], 5)
merge(bot, eppde, by = 0)[ , -(1:26)]
##   p.value      lods F F.p.value G1 - G0
## 1       1 -7.483538 0         1       0
## 2       1 -7.483538 0         1       0
## 3       1 -7.483538 0         1       0
## 4       1 -7.483538 0         1       0
## 5       1 -7.483538 0         1       0