library(knitr)
library(plyr)
library(ggplot2)
library(latticeExtra)
library(xtable)
library(directlabels) # allows for clearer labelling
library(lattice)
load("All_3_metasets.Rdata")
load("All_3_sets.Rdata")
dataList <- list(ALL = ALL.dat, APL = APL.dat, CTRL = CTRL.dat)
na_per_samp <- lapply(dataList, function(x) apply(x, 2, function(y) sum(is.na(y))))
na_per_samp <- lapply(na_per_samp, sort)
par(mai = c(2,1,1,0.5))
plot(unlist(na_per_samp), xaxt = 'n', ylab = '# NA readings (/485577)',
xlab = 'Sample', col = rep(c('red', 'green', 'blue'), laply(na_per_samp, length)),
main = 'Number of NA readings per sample')
axis(1, at = 1:length(unlist(na_per_samp)),
labels = do.call(c, lapply(na_per_samp, names)), las = 2, cex = 0.8)
legend('topleft', legend = names(na_per_samp), fill = c('red', 'green', 'blue'))
Tables of NA counts per probe:
na_per_probe <- lapply(dataList, function(x) apply(x, 1, function(x) sum(is.na(x))))
llply(na_per_probe, table)
## $ALL
##
## 0 1 2 3 4 5 6 7 8 9
## 462250 20220 2358 478 142 55 30 14 7 4
## 10 11 12 13 14 16 18 20 30
## 5 5 3 1 1 1 1 1 1
##
## $APL
##
## 0 1 2 3 4 5 7
## 482070 3440 58 2 3 3 1
##
## $CTRL
##
## 0 1 2 3 4
## 483502 2008 52 12 3
Not a huge amount of probes to worry about, so we left this alone.
### Calculate means across samples for each probe, and reshape for
### plotting
dat.probeMeans <- unlist(lapply(dataList, function(x) apply(x, 1, function(y) mean(y,
na.rm = T))))
plotDat <- data.frame(Beta = dat.probeMeans, Data = rep(c("ALL", "APL", "CTRL"),
each = nrow(ALL.dat)))
direct.label(densityplot(~Beta, data = plotDat, groups = Data, plot.points = F,
lwd = 2, panel = function(...) {
panel.densityplot(...)
panel.abline(v = 0.2, col = "red")
panel.abline(v = 0.7, col = "red")
}, xlab = "Beta", main = "Density of Beta-Values for 3 arrays"))
### Percentages: (excluding na's)
meth_probes <- llply(dataList, function(x) apply(x, 2, function(y) sum(y > 0.7,
na.rm = T)/length(y)))
meth_probes <- llply(meth_probes, sort)
meth <- unlist(meth_probes)
names(meth) <- my.strsplit(names(meth), "\\.", i = 2)
## Plot these:
colors <- rep(c("red", "green", "blue"), laply(meth_probes, length))
par(mai = c(2, 1, 1, 0.5))
barplot(meth, col = colors, las = 2, ylab = "% Meth", main = "Proportion methylated (beta>0.8) probes per sample")
legend("topleft", legend = names(meth_probes), fill = c("red", "green", "blue"),
cex = 0.7)
### Percentages: (excluding na's)
unmeth_probes <- llply(dataList, function(x) apply(x, 2, function(y) sum(y <
0.2, na.rm = T)/length(y)))
unmeth_probes <- llply(unmeth_probes, sort)
unmeth <- unlist(unmeth_probes)
names(unmeth) <- my.strsplit(names(unmeth), "\\.", i = 2)
## Plot these:
colors <- rep(c("red", "green", "blue"), laply(unmeth_probes, length))
par(mai = c(2, 1, 1, 0.5))
barplot(unmeth, col = colors, las = 2, ylab = "% Unmeth", main = "Proportion unmethylated (beta<0.2) probes per sample")
legend("topleft", legend = names(unmeth_probes), fill = c("red", "green", "blue"),
cex = 0.7)
Function for custom pairs plotting with upper triangular matrix of correlations
my.pairs <- function(..., textCex = 2, textCol = "red", method = c("pearson",
"spearman")) {
method <- match.arg(method)
cor.panel <- function(x, y) text(mean(range(x)), mean(range(y)), labels = format(cor(x,
y, method = method), digits = 3), cex = textCex, col = textCol)
pairs(..., upper.panel = cor.panel)
}
All 5 types of control samples from all data sets:
pbmc <- names(CTRL.dat)[1:3]
lcl <- names(CTRL.dat)[4:6]
nbc <- names(CTRL.dat)[7:9]
abc <- names(ALL.dat)[grep("HBC", names(ALL.dat))]
hbm <- names(APL.dat)[grep("HBM", names(APL.dat))]
ctrls.samps.list <- list(PBMC = pbmc, LCL = lcl, NBC = nbc, ABC = abc, HBM = hbm)
ctrls.samps <- unlist(ctrls.samps.list)
Some data cleaning for plotting
## Bind all data sets into one data frame:
whole.dat <- cbind(ALL.dat, APL.dat, CTRL.dat)
### Calculate average beta-value over all sets of controls:
AVG.CTRL.dat <- lapply(ctrls.samps.list, function(x) apply(whole.dat[, x], 1,
function(x) mean(x, na.rm = T)))
AVG.CTRL.dat <- do.call(cbind, AVG.CTRL.dat)
#### There are still na's (when I try to calculate correlation)
#### Temporarily remove probes with any NAs for calculation of
#### correlations:
AVG.noNA.dat <- AVG.CTRL.dat[apply(AVG.CTRL.dat, 1, function(x) !any(is.na(x))),
]
my.pairs(AVG.noNA.dat, lower.panel = function(...) {
smoothScatter(..., add = T)
abline(-0.2, 1, lty = 3, col = "red")
abline(0.2, 1, lty = 3, col = "red")
}, method = "spearman")