Preview Iris Dataset
library(MASS)
data(iris)
attach(iris)
DT::datatable(iris, rownames=TRUE, options = list(autowidth=TRUE,sClass="alignRight", className = 'dt-center', pageLength=10, digit=3))
Compare Boxplots for Sepal Length & Width
library(ggplot2)
library(gridExtra)
# Sepal Length & Width
p1 <- ggplot(iris, aes(x=Species, y=Sepal.Length), aes(fill = factor(Species))) +
ggtitle("Sepal Length") + geom_boxplot(aes(fill = factor(Species))) +
guides(fill=guide_legend(title="Species")) + geom_jitter()
p2 <- ggplot(iris, aes(x=Species, y=Sepal.Width), aes(fill = factor(Species))) +
ggtitle("Sepal Width") + geom_boxplot(aes(fill = factor(Species))) +
guides(fill=guide_legend(title="Species")) + geom_jitter()
grid.arrange(p1, p2, ncol=2, nrow=1)

# Pedal Length & Width
p3 <- ggplot(iris, aes(x=Species, y=Petal.Length), aes(fill = factor(Species))) +
ggtitle("Petal Length")+ geom_boxplot(aes(fill = factor(Species))) +
guides(fill=guide_legend(title="Species")) + geom_jitter()
p4 <- ggplot(iris, aes(x=Species, y=Petal.Width), aes(fill = factor(Species))) +
ggtitle("Pedal Width") + geom_boxplot(aes(fill = factor(Species))) +
guides(fill=guide_legend(title="Species")) + geom_jitter()
grid.arrange(p3, p4, ncol=2, nrow=1)

Summary Statistics
# Split Iris dataset by Species
iris.Setosa <- iris[iris$Species=="setosa",]
iris.Versicolor <- iris[iris$Species=="versicolor",]
iris.Virginica <- iris[iris$Species=="virginica",]
# Create Dataframe of Summary Statistics for each Sepal/Pedal metric by Specie
summary.dataframe <- data.frame(c(summary(iris.Setosa$Sepal.Length)), c(summary(iris.Versicolor$Sepal.Length)), c(summary(iris.Virginica$Sepal.Length)), c(summary(iris.Setosa$Sepal.Width)), c(summary(iris.Versicolor$Sepal.Width)), c(summary(iris.Virginica$Sepal.Width)), c(summary(iris.Setosa$Petal.Length)), c(summary(iris.Versicolor$Petal.Length)), c(summary(iris.Virginica$Petal.Length)), c(summary(iris.Setosa$Petal.Width)), c(summary(iris.Versicolor$Petal.Width)), c(summary(iris.Virginica$Petal.Width)))
# Transpose dataframe
summary.dataframe1 <- t(summary.dataframe)
measures<- c(rep("Sepal Length",3) , rep("Sepal Width",3), rep("Petal Length",3), rep("Petal Width",3))
species.type<- c(rep(c("Setosa", "Versicolor", "Virginica"),4))
summary.dataframe2<- data.frame(measures, species.type, summary.dataframe1)
colnames(summary.dataframe2) <- c("Pedal/Sepal", "Species", "Minimum", "1st Quartile", "Median", "Mean", "3rd Quartile", "Maximum")
# Output Table
DT::datatable(summary.dataframe2, rownames=FALSE, options = list(autowidth=TRUE,sClass="alignRight", className = 'dt-center', dom='tips', pageLength=12, digit=3))
MANOVA (Sepal Length & Width and Petal Length & Width by Species)
# MANOVA - Multivariate Analysis of Variance
m1 <- manova(cbind(Sepal.Length,Sepal.Width,Petal.Length,Petal.Width) ~ Species, iris)
summary.aov(m1)
## Response Sepal.Length :
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 63.212 31.606 119.26 < 2.2e-16 ***
## Residuals 147 38.956 0.265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Sepal.Width :
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 11.345 5.6725 49.16 < 2.2e-16 ***
## Residuals 147 16.962 0.1154
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Petal.Length :
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 437.10 218.551 1180.2 < 2.2e-16 ***
## Residuals 147 27.22 0.185
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Petal.Width :
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 80.413 40.207 960.01 < 2.2e-16 ***
## Residuals 147 6.157 0.042
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Consider Univariate ANOVA Tables
summary(m1, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Species 2 0.023439 199.15 8 288 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1, test = "Roy")
## Df Roy approx F num Df den Df Pr(>F)
## Species 2 32.192 1167 4 145 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Species 2 1.1919 53.466 8 290 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1, test = "Hotelling-Lawley")
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## Species 2 32.477 580.53 8 286 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1