library(readr)
Day10<- read_csv("~/Documents/RobertsLab/Methylome/Day10-methcounts-all.csv", col_types = cols(`EPI-111` = col_double(),
EPI_103 = col_double()))
Day135 <- read_csv("~/Documents/RobertsLab/Methylome/Day135-methcounts-all.csv")
Sample.with.NA <- as.data.frame("EPI-103", stringsAsFactors=FALSE)
colnames(Sample.with.NA)[1] <- "Sample"
Sample.with.NA[2,1] <- "EPI-104"
Sample.with.NA[3,1] <- "EPI-111"
Sample.with.NA[4,1] <- "EPI-113"
Sample.with.NA[5,1] <- "EPI-119"
Sample.with.NA[6,1] <- "EPI-120"
Sample.with.NA[7,1] <- "EPI-127"
Sample.with.NA[8,1] <- "EPI-128"
Sample.with.NA[9,1] <- "EPI-135"
Sample.with.NA[10,1] <- "EPI-136"
Sample.with.NA[11,1] <- "EPI-143"
Sample.with.NA[12,1] <- "EPI-145"
Sample.with.NA[13,1] <- "EPI-151"
Sample.with.NA[14,1] <- "EPI-152"
Sample.with.NA[15,1] <- "EPI-153"
Sample.with.NA[16,1] <- "EPI-154"
Sample.with.NA[17,1] <- "EPI-159"
Sample.with.NA[18,1] <- "EPI-160"
Sample.with.NA[19,1] <- "EPI-161"
Sample.with.NA[20,1] <- "EPI-162"
Sample.with.NA[21,1] <- "EPI-167"
Sample.with.NA[22,1] <- "EPI-168"
Sample.with.NA[23,1] <- "EPI-169"
Sample.with.NA[24,1] <- "EPI-170"
Sample.wo.NA <- as.data.frame("EPI-103", stringsAsFactors=FALSE)
colnames(Sample.wo.NA)[1] <- "Sample"
Sample.wo.NA[2,1] <- "EPI-104"
Sample.wo.NA[3,1] <- "EPI-111"
Sample.wo.NA[4,1] <- "EPI-113"
Sample.wo.NA[5,1] <- "EPI-119"
Sample.wo.NA[6,1] <- "EPI-120"
Sample.wo.NA[7,1] <- "EPI-127"
Sample.wo.NA[8,1] <- "EPI-128"
Sample.wo.NA[9,1] <- "EPI-135"
Sample.wo.NA[10,1] <- "EPI-136"
Sample.wo.NA[11,1] <- "EPI-143"
Sample.wo.NA[12,1] <- "EPI-145"
Sample.wo.NA[13,1] <- "EPI-151"
Sample.wo.NA[14,1] <- "EPI-152"
Sample.wo.NA[15,1] <- "EPI-153"
Sample.wo.NA[16,1] <- "EPI-154"
Sample.wo.NA[17,1] <- "EPI-159"
Sample.wo.NA[18,1] <- "EPI-160"
Sample.wo.NA[19,1] <- "EPI-161"
Sample.wo.NA[20,1] <- "EPI-162"
Sample.wo.NA[21,1] <- "EPI-167"
Sample.wo.NA[22,1] <- "EPI-168"
Sample.wo.NA[23,1] <- "EPI-169"
Sample.wo.NA[24,1] <- "EPI-170"
Sample.with.NA$mean[1] <- sum(Day10$EPI_103, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[1] <- mean(Day10$EPI_103, na.rm = TRUE)
Sample.with.NA$mean[2] <- sum(Day10$`EPI-104`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[2] <- mean(Day10$`EPI-104`, na.rm = TRUE)
Sample.with.NA$mean[3] <- sum(Day10$`EPI-111`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[3] <- mean(Day10$`EPI-111`, na.rm = TRUE)
Sample.with.NA$mean[4] <- sum(Day10$`EPI-113`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[4] <- mean(Day10$`EPI-113`, na.rm = TRUE)
Sample.with.NA$mean[5] <- sum(Day10$`EPI-119`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[5] <- mean(Day10$`EPI-119`, na.rm = TRUE)
Sample.with.NA$mean[6] <- sum(Day10$`EPI-120`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[6] <- mean(Day10$`EPI-120`, na.rm = TRUE)
Sample.with.NA$mean[7] <- sum(Day10$`EPI-127`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[7] <- mean(Day10$`EPI-127`, na.rm = TRUE)
Sample.with.NA$mean[8] <- sum(Day10$`EPI-128`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[8] <- mean(Day10$`EPI-128`, na.rm = TRUE)
Sample.with.NA$mean[9] <- sum(Day10$`EPI-135`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[9] <- mean(Day10$`EPI-135`, na.rm = TRUE)
Sample.with.NA$mean[10] <- sum(Day10$`EPI-136`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[10] <- mean(Day10$`EPI-136`, na.rm = TRUE)
Sample.with.NA$mean[11] <- sum(Day10$`EPI-143`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[1] <- mean(Day10$`EPI-143`, na.rm = TRUE)
Sample.with.NA$mean[12] <- sum(Day10$`EPI-145`, na.rm = TRUE) / nrow(Day10_methcounts_all)
Sample.wo.NA$mean[12] <- mean(Day10$`EPI-145`, na.rm = TRUE)
Sample.with.NA$trt <- as.factor(c(1,1,2,2,0,0,1,1,0,0,2,2,0,0,0,0,1,1,1,1,2,2,2,2))
Sample.wo.NA$trt <- as.factor(c(1,1,2,2,0,0,1,1,0,0,2,2,0,0,0,0,1,1,1,1,2,2,2,2))
Sample.with.NA$time <- as.factor(c(0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1))
Sample.wo.NA$time <- as.factor(c(0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1))
aov.with.NA <- aov(mean ~ trt + time, data = Sample.with.NA)
aov.wo.NA <- aov(mean ~ trt + time, data = Sample.wo.NA)
And lets see what the summary look like for the version with all NAs. As well as some simple residual plots
summary.lm(aov.with.NA)
Call:
aov(formula = mean ~ trt + time, data = Sample.with.NA)
Residuals:
Min 1Q Median 3Q Max
-0.0099056 -0.0006313 0.0002180 0.0004133 0.0125698
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1275478 0.0017802 71.647 < 2e-16 ***
trt1 -0.0010446 0.0021803 -0.479 0.637
trt2 -0.0008492 0.0021803 -0.389 0.701
time1 -0.0103189 0.0017802 -5.796 1.14e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.004361 on 20 degrees of freedom
Multiple R-squared: 0.6286, Adjusted R-squared: 0.5729
F-statistic: 11.29 on 3 and 20 DF, p-value: 0.0001501
plot(aov.with.NA)
summary.lm(aov.wo.NA)
Call:
aov(formula = mean ~ trt + time, data = Sample.wo.NA)
Residuals:
Min 1Q Median 3Q Max
-0.013714 -0.000951 0.000316 0.001512 0.008046
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.235847 0.001673 140.960 < 2e-16 ***
trt1 0.003341 0.002049 1.630 0.11869
trt2 0.001196 0.002049 0.584 0.56585
time1 -0.005002 0.001673 -2.989 0.00725 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.004098 on 20 degrees of freedom
Multiple R-squared: 0.3684, Adjusted R-squared: 0.2737
F-statistic: 3.888 on 3 and 20 DF, p-value: 0.02435
So both show time as a signfiicant predictor variable. That’s something!
How about interactions?
int.aov.with.NA <- aov(mean ~ trt*time, data = Sample.with.NA)
summary.lm(int.aov.with.NA)
Call:
aov(formula = mean ~ trt * time, data = Sample.with.NA)
Residuals:
Min 1Q Median 3Q Max
-0.0094923 0.0000000 0.0000000 0.0002328 0.0129831
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.128179 0.002283 56.138 < 2e-16 ***
trt1 -0.002089 0.003229 -0.647 0.52582
trt2 -0.001698 0.003229 -0.526 0.60532
time1 -0.011581 0.003229 -3.587 0.00211 **
trt1:time1 0.002089 0.004567 0.457 0.65280
trt2:time1 0.001698 0.004567 0.372 0.71429
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.004567 on 18 degrees of freedom
Multiple R-squared: 0.6335, Adjusted R-squared: 0.5317
F-statistic: 6.222 on 5 and 18 DF, p-value: 0.001617
Again, not super exciting. Just showing that there’s a difference in methylation by time.
What is interesting however is the normal QQ plot. That is a beautiful example of a “heavy-tailed” distribution. Which means we’re definetly departing from our assumption of normality. Was worth a shot though!