Preview Iris Dataset

library(MASS)
data(iris)
attach(iris) 
DT::datatable(iris, rownames=TRUE, options = list(autowidth=TRUE,sClass="alignRight", className = 'dt-center', pageLength=10, digit=3))


Compare Boxplots for Sepal Length & Width

library(ggplot2)
library(gridExtra)

# Sepal Length & Width
p1 <- ggplot(iris, aes(x=Species, y=Sepal.Length), aes(fill = factor(Species))) +
       ggtitle("Sepal Length") + geom_boxplot(aes(fill = factor(Species))) +  
        guides(fill=guide_legend(title="Species")) + geom_jitter()
p2 <- ggplot(iris, aes(x=Species, y=Sepal.Width), aes(fill = factor(Species))) +
        ggtitle("Sepal Width") + geom_boxplot(aes(fill = factor(Species))) +  
        guides(fill=guide_legend(title="Species")) + geom_jitter()

grid.arrange(p1, p2, ncol=2, nrow=1)

# Pedal Length & Width
p3 <- ggplot(iris, aes(x=Species, y=Petal.Length), aes(fill = factor(Species))) +
       ggtitle("Petal Length")+  geom_boxplot(aes(fill = factor(Species))) + 
        guides(fill=guide_legend(title="Species")) + geom_jitter()
p4 <- ggplot(iris, aes(x=Species, y=Petal.Width), aes(fill = factor(Species))) +
        ggtitle("Pedal Width") + geom_boxplot(aes(fill = factor(Species))) +    
        guides(fill=guide_legend(title="Species")) + geom_jitter()

grid.arrange(p3, p4, ncol=2, nrow=1)


Summary Statistics

# Split Iris dataset by Species
iris.Setosa <- iris[iris$Species=="setosa",]
iris.Versicolor <- iris[iris$Species=="versicolor",]
iris.Virginica <- iris[iris$Species=="virginica",]

# Create Dataframe of Summary Statistics for each Sepal/Pedal metric by Specie
summary.dataframe <- data.frame(c(summary(iris.Setosa$Sepal.Length)), c(summary(iris.Versicolor$Sepal.Length)), c(summary(iris.Virginica$Sepal.Length)), c(summary(iris.Setosa$Sepal.Width)), c(summary(iris.Versicolor$Sepal.Width)), c(summary(iris.Virginica$Sepal.Width)), c(summary(iris.Setosa$Petal.Length)), c(summary(iris.Versicolor$Petal.Length)), c(summary(iris.Virginica$Petal.Length)), c(summary(iris.Setosa$Petal.Width)), c(summary(iris.Versicolor$Petal.Width)), c(summary(iris.Virginica$Petal.Width)))

# Transpose dataframe
summary.dataframe1 <- t(summary.dataframe)  
measures<- c(rep("Sepal Length",3) , rep("Sepal Width",3), rep("Petal Length",3), rep("Petal Width",3))
species.type<- c(rep(c("Setosa", "Versicolor", "Virginica"),4))
summary.dataframe2<- data.frame(measures, species.type, summary.dataframe1)
colnames(summary.dataframe2) <- c("Pedal/Sepal", "Species", "Minimum", "1st Quartile", "Median", "Mean", "3rd Quartile", "Maximum")

# Output Table
DT::datatable(summary.dataframe2, rownames=FALSE, options = list(autowidth=TRUE,sClass="alignRight", className = 'dt-center', dom='tips', pageLength=12, digit=3))



MANOVA (Sepal Length & Width and Petal Length & Width by Species)

# MANOVA - Multivariate Analysis of Variance
m1 <- manova(cbind(Sepal.Length,Sepal.Width,Petal.Length,Petal.Width) ~ Species, iris)
summary.aov(m1) 
##  Response Sepal.Length :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## Species       2 63.212  31.606  119.26 < 2.2e-16 ***
## Residuals   147 38.956   0.265                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Sepal.Width :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## Species       2 11.345  5.6725   49.16 < 2.2e-16 ***
## Residuals   147 16.962  0.1154                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Petal.Length :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## Species       2 437.10 218.551  1180.2 < 2.2e-16 ***
## Residuals   147  27.22   0.185                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Petal.Width :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## Species       2 80.413  40.207  960.01 < 2.2e-16 ***
## Residuals   147  6.157   0.042                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


Consider Univariate ANOVA Tables

summary(m1, test = "Wilks")
##            Df    Wilks approx F num Df den Df    Pr(>F)    
## Species     2 0.023439   199.15      8    288 < 2.2e-16 ***
## Residuals 147                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1, test = "Roy")
##            Df    Roy approx F num Df den Df    Pr(>F)    
## Species     2 32.192     1167      4    145 < 2.2e-16 ***
## Residuals 147                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1, test = "Pillai")
##            Df Pillai approx F num Df den Df    Pr(>F)    
## Species     2 1.1919   53.466      8    290 < 2.2e-16 ***
## Residuals 147                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1, test = "Hotelling-Lawley")
##            Df Hotelling-Lawley approx F num Df den Df    Pr(>F)    
## Species     2           32.477   580.53      8    286 < 2.2e-16 ***
## Residuals 147                                                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1