Lucas Schiffer
August 19, 2016
Data Analysis for the Life Sciences
“If Edison had a needle to find in a haystack, he would proceed at once with the diligence of the bee to examine straw after straw until he found the object of his search… I was a sorry witness of such doings, knowing that a little theory and calculation would have saved him ninety per cent of his labor.” - Nikola Tesla
# gene expression p-values
data(GSE5859Subset)
g <- factor(sampleInfo$group)
results <- rowttests(geneExpression,g)
pvals <- results$p.value
# nullified p-values
m <- nrow(geneExpression)
n <- ncol(geneExpression)
randomData <- matrix(rnorm(n*m), m, n)
nullpvals <- rowttests(randomData, g)$p.value
plot(results$dm, -log10(results$p.value), xlab = "Effect Size", ylab = "-log(p-values)")
hist(nullpvals, ylim = c(0, 1400))
hist(pvals, ylim = c(0, 1400))
permg <- sample(g)
permresults <- rowttests(geneExpression, permg)
hist(permresults$p.value)
data(GSE5859)
ge <- exprs(e) ##ge for gene expression
ge[, 49] <- ge[, 49]/log2(exp(1)) ##error
boxplot(ge, range = 0, names = 1:ncol(e), col = ifelse(1:ncol(ge) == 49, 1, 2))
qs <- t(apply(ge, 2, quantile, prob=c(0.05, 0.25, 0.5, 0.75, 0.95)))
matplot(qs,type="l",lty=1)
shist(ge, unit = 0.5)
Microarray platforms
https://tinyurl.com/htv3de8
sd(y-x)
?sd(y-x)
[1] 0.2025465