library(ggplot2)

data(iris)
iris$id <- 1:nrow(iris)
iris$colour <- c("red", "green", "blue")[iris$Species]
attach(iris)
qplot(id, Petal.Length, data = iris, col = Species)

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))

Prediction of the petal length of irises without knowing the species.

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
abline(h = mean(Petal.Length))

The predicted petal length of an iris of unknown species is 3.758. ### Errors in the prediction of the petal length of an iris of unknown species

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
abline(h = mean(Petal.Length))
segments(id, Petal.Length, id, mean(Petal.Length))

Prediction of the petal length knowing the species

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
means <- as.numeric(by(Petal.Length, Species, mean))
segments(1, means[1], 50, means[1], col = "red")
segments(51, means[2], 100, means[2], col = "green")
segments(101, means[3], 150, means[3], col = "blue")

Error in the prediction of petal length knowing the species

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
means <- as.numeric(by(Petal.Length, Species, mean))
segments(1, means[1], 50, means[1], col = "red")
segments(id[1:50], Petal.Length[1:50], id[1:50], means[1], col = "red")
segments(51, means[2], 100, means[2], col = "green")
segments(id[51:100], Petal.Length[51:100], id[51:100], means[2], col = "green")
segments(101, means[3], 150, means[3], col = "blue")
segments(id[101:150], Petal.Length[101:150], id[101:150], means[3], col = "blue")

model <- aov(Petal.Length ~ as.factor(Species))
anova(model)
## Analysis of Variance Table
## 
## Response: Petal.Length
##                     Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Species)   2 437.10 218.551  1180.2 < 2.2e-16 ***
## Residuals          147  27.22   0.185                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(2, 2))
plot(model)

par(mfrow = c(1, 1))

SSB

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
means <- as.numeric(by(Petal.Length, Species, mean))
segments(1, means[1], 50, means[1], col = "red")
segments(51, means[2], 100, means[2], col = "green")
segments(101, means[3], 150, means[3], col = "blue")
abline(h = mean(Petal.Length))
segments(id[1:50], means[1], id[1:50], mean(Petal.Length), col = "red")
segments(id[51:100], means[2], id[51:100], mean(Petal.Length), col = "green")
segments(id[101:150], means[3], id[101:150], mean(Petal.Length), col = "blue")

sum(c(50*(means[1] - mean(Petal.Length))^2,
50*(means[2] - mean(Petal.Length))^2,
50*(means[3] - mean(Petal.Length))^2))
## [1] 437.1028

one-way ANOVA model

iris$Petal.Length <- sample(iris$Petal.Length)
attach(iris)
## The following objects are masked from iris (pos = 3):
## 
##     colour, id, Petal.Length, Petal.Width, Sepal.Length,
##     Sepal.Width, Species
qplot(id, Petal.Length, data = iris, col = Species)

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))

Prediction of the petal length of irises without knowing the species.

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
abline(h = mean(Petal.Length))

The predicted petal length of an iris of unknown species is 3.758. ### Errors in the prediction of the petal length of an iris of unknown species

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
abline(h = mean(Petal.Length))
segments(id, Petal.Length, id, mean(Petal.Length))

Prediction of the petal length knowing the species

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
means <- as.numeric(by(Petal.Length, Species, mean))
segments(1, means[1], 50, means[1], col = "red")
segments(51, means[2], 100, means[2], col = "green")
segments(101, means[3], 150, means[3], col = "blue")

Error in the prediction of petal length knowing the species

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
means <- as.numeric(by(Petal.Length, Species, mean))
segments(1, means[1], 50, means[1], col = "red")
segments(id[1:50], Petal.Length[1:50], id[1:50], means[1], col = "red")
segments(51, means[2], 100, means[2], col = "green")
segments(id[51:100], Petal.Length[51:100], id[51:100], means[2], col = "green")
segments(101, means[3], 150, means[3], col = "blue")
segments(id[101:150], Petal.Length[101:150], id[101:150], means[3], col = "blue")

model <- aov(Petal.Length ~ as.factor(Species))
anova(model)
## Analysis of Variance Table
## 
## Response: Petal.Length
##                     Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Species)   2  14.00  6.9992  2.2847 0.1054
## Residuals          147 450.33  3.0634
par(mfrow = c(2, 2))
plot(model)

par(mfrow = c(1, 1))

SSB

plot(id, Petal.Length)
points(id, Petal.Length, col = colour)
legend(0, 7, legend = c("setosa", "versicolor", "virginica"),
       fill = c("red", "green", "blue"))
means <- as.numeric(by(Petal.Length, Species, mean))
segments(1, means[1], 50, means[1], col = "red")
segments(51, means[2], 100, means[2], col = "green")
segments(101, means[3], 150, means[3], col = "blue")
abline(h = mean(Petal.Length))
segments(id[1:50], means[1], id[1:50], mean(Petal.Length), col = "red")
segments(id[51:100], means[2], id[51:100], mean(Petal.Length), col = "green")
segments(id[101:150], means[3], id[101:150], mean(Petal.Length), col = "blue")

one-way ANOVA model

model <- aov(Petal.Length ~ as.factor(Species))
anova(model)
## Analysis of Variance Table
## 
## Response: Petal.Length
##                     Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Species)   2  14.00  6.9992  2.2847 0.1054
## Residuals          147 450.33  3.0634