library(ggplot2)
library(plyr)
set.seed(481)
First we generate two samples.
number <- 1000
samples <- data.frame(sample = as.factor(c(rep(1, number), rep(2, number))),
values = c(rnorm(number, mean = 1, sd = 1), rnorm(number, mean = 3, sd = 1)))
(sample.means <- ddply(samples, .(sample), summarise, sample.mean = mean(values)))
## sample sample.mean
## 1 1 0.9661
## 2 2 2.9858
If we draw the two samples with their means indicated we see a clear difference.
ggplot(samples, aes(x = values, fill = sample)) + geom_histogram(binwidth = 0.2,
alpha = 0.7, position = "identity") + geom_vline(data = sample.means, aes(xintercept = sample.mean,
colour = sample))
t.test(x = samples$values[samples$sample == 1], y = samples$values[samples$sample ==
2])
##
## Welch Two Sample t-test
##
## data: samples$values[samples$sample == 1] and samples$values[samples$sample == 2]
## t = -44.28, df = 1997, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.109 -1.930
## sample estimates:
## mean of x mean of y
## 0.9661 2.9858
aov.res <- aov(values ~ sample, data = samples)
summary(aov.res)
## Df Sum Sq Mean Sq F value Pr(>F)
## sample 1 2040 2040 1961 <2e-16 ***
## Residuals 1998 2078 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
changed again