Read in my day 135 data.
library(readr)
Day135 <- read_csv("~/Documents/RobertsLab/Methylome/Day135-methcounts-all.csv")
Calculate methylation means by sample. This is all adapted from the Day 10 stuff, so it should look familiar.
Sample.with.NA <- as.data.frame("EPI-151", stringsAsFactors=FALSE)
colnames(Sample.with.NA)[1] <- "Sample"
Sample.with.NA[2,1] <- "EPI-152"
Sample.with.NA[3,1] <- "EPI-153"
Sample.with.NA[4,1] <- "EPI-154"
Sample.with.NA[5,1] <- "EPI-159"
Sample.with.NA[6,1] <- "EPI-160"
Sample.with.NA[7,1] <- "EPI-161"
Sample.with.NA[8,1] <- "EPI-162"
Sample.with.NA[9,1] <- "EPI-167"
Sample.with.NA[10,1] <- "EPI-168"
Sample.with.NA[11,1] <- "EPI-169"
Sample.with.NA[12,1] <- "EPI-170"
Sample.wo.NA <- as.data.frame("EPI-151", stringsAsFactors=FALSE)
colnames(Sample.wo.NA)[1] <- "Sample"
Sample.wo.NA[2,1] <- "EPI-152"
Sample.wo.NA[3,1] <- "EPI-153"
Sample.wo.NA[4,1] <- "EPI-154"
Sample.wo.NA[5,1] <- "EPI-159"
Sample.wo.NA[6,1] <- "EPI-160"
Sample.wo.NA[7,1] <- "EPI-161"
Sample.wo.NA[8,1] <- "EPI-162"
Sample.wo.NA[9,1] <- "EPI-167"
Sample.wo.NA[10,1] <- "EPI-168"
Sample.wo.NA[11,1] <- "EPI-169"
Sample.wo.NA[12,1] <- "EPI-170"
Sample.with.NA$mean[1] <- sum(Day135$EPI_151, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[1] <- mean(Day135$EPI_151, na.rm = TRUE)
Sample.with.NA$mean[2] <- sum(Day135$`EPI-152`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[2] <- mean(Day135$`EPI-152`, na.rm = TRUE)
Sample.with.NA$mean[3] <- sum(Day135$`EPI-153`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[3] <- mean(Day135$`EPI-153`, na.rm = TRUE)
Sample.with.NA$mean[4] <- sum(Day135$`EPI-154`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[4] <- mean(Day135$`EPI-154`, na.rm = TRUE)
Sample.with.NA$mean[5] <- sum(Day135$`EPI-159`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[5] <- mean(Day135$`EPI-159`, na.rm = TRUE)
Sample.with.NA$mean[6] <- sum(Day135$`EPI-160`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[6] <- mean(Day135$`EPI-160`, na.rm = TRUE)
Sample.with.NA$mean[7] <- sum(Day135$`EPI-161`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[7] <- mean(Day135$`EPI-161`, na.rm = TRUE)
Sample.with.NA$mean[8] <- sum(Day135$`EPI-162`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[8] <- mean(Day135$`EPI-162`, na.rm = TRUE)
Sample.with.NA$mean[9] <- sum(Day135$`EPI-167`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[9] <- mean(Day135$`EPI-167`, na.rm = TRUE)
Sample.with.NA$mean[10] <- sum(Day135$`EPI-168`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[10] <- mean(Day135$`EPI-168`, na.rm = TRUE)
Sample.with.NA$mean[11] <- sum(Day135$`EPI-169`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[1] <- mean(Day135$`EPI-169`, na.rm = TRUE)
Sample.with.NA$mean[12] <- sum(Day135$`EPI-170`, na.rm = TRUE) / nrow(Day135)
Sample.wo.NA$mean[12] <- mean(Day10$`EPI-170`, na.rm = TRUE)
Sample.with.NA$trt <- as.factor(c(0,0,0,0,1,1,1,1,2,2,2,2))
Sample.wo.NA$trt <- as.factor(c(0,0,0,0,1,1,1,1,2,2,2,2))
print(Sample.with.NA)
print(Sample.wo.NA)
After laboriously constructing the data frame, lets do some statistics!
aov.with.NA <- aov(mean ~ trt, data = Sample.with.NA)
aov.wo.NA <- aov(mean ~ trt, data = Sample.wo.NA)
And lets see what the summary look like for the version with all NAs. As well as some simple residual plots
summary.lm(aov.with.NA)
Call:
aov(formula = mean ~ trt, data = Sample.with.NA)
Residuals:
Min 1Q Median 3Q Max
-0.011672 -0.004542 -0.001782 0.003561 0.015415
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.118574 0.004551 26.053 8.72e-10 ***
trt1 -0.008545 0.006436 -1.328 0.217
trt2 -0.002306 0.006436 -0.358 0.728
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.009102 on 9 degrees of freedom
Multiple R-squared: 0.1733, Adjusted R-squared: -0.01037
F-statistic: 0.9436 on 2 and 9 DF, p-value: 0.4246
plot(aov.with.NA)
Well. Boo. Treatment is not a significant predictor of methylation level at Day 10, plus the Normal QQ plot looks unhappy. Ah well.
Maybe the one without NAs is better?
summary.lm(aov.wo.NA)
Call:
aov(formula = mean ~ trt, data = Sample.wo.NA)
Residuals:
Min 1Q Median 3Q Max
-0.008306 -0.005563 -0.001462 0.002318 0.017249
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.261562 0.003899 67.076 1.84e-13 ***
trt1 -0.002413 0.005515 -0.438 0.672
trt2 -0.006097 0.005515 -1.106 0.298
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.007799 on 9 degrees of freedom
Multiple R-squared: 0.1211, Adjusted R-squared: -0.07423
F-statistic: 0.62 on 2 and 9 DF, p-value: 0.5594
plot(aov.wo.NA)
Not really!
What happens if we try to combine the two, with an additional factor of time? We’ll find out.