#Load packages. Examine data
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data(iris)
head(iris)
summary(iris)
str(iris)
species_count = nrow(iris) / 3
# 3 is the number of species.
# Assumption is made that all species have the same number of observations.
#Function to print stats for you
print_stats = function(species, label, value){
cat(paste0(c(species, label, toString(value), "\n")))
}
#Statistics for irises, histograms, and dataframe
species_list = c("Setosa", "Versicolor", "Virginica")
# Create dataframe for statistics.
# List of species is added and other data will be added later.
iris_stats = data.frame(species = species_list)
# Iterate through each species in species_list.
# Calculate and print statistics, and plot histogram for petal length
# Add statistics to iris_stats dataframe.
for (i in 1:3) {
species = species_list[i]
indices = ((i-1)*species_count+1):(i*species_count)
# Calculate statistics
mean_sepal_length = mean(iris$Sepal.Length[indices])
median_sepal_width = median(iris$Sepal.Width[indices], na.rm = TRUE)
max_petal_length = max(iris$Petal.Length[indices])
min_petal_length = min(iris$Petal.Length[indices])
min_petal_width = min(iris$Petal.Width[indices])
# Print statistics
print_stats(species, "Mean Sepal Length:", mean_sepal_length)
print_stats(species, "Median Sepal Width:", median_sepal_width)
print_stats(species, "Max Petal Length:", max_petal_length)
print_stats(species, "Min Petal Width:", min_petal_width)
cat("\n")
# Append stats to dataframe
iris_stats$mean_sepal_lengths[i] = mean_sepal_length
iris_stats$median_sepal_widths[i] = median_sepal_width
iris_stats$max_petal_lengths[i] = max_petal_length
iris_stats$min_petal_widths[i] = min_petal_width
# Plot histogram for petal length
print(ggplot(data = iris[indices,], aes(iris$Petal.Length[indices])) +
geom_histogram(breaks = seq(min_petal_length*.9,max_petal_length*1.1,
by = (max_petal_length-min_petal_length)/15), fill = "Grey", col = "White") +
labs(title = paste0(species, " Petal Lengths:"), x = "Length", y = "Count"))
}
## Setosa Mean Sepal Length: 5.006
## Setosa Median Sepal Width: 3.4
## Setosa Max Petal Length: 1.9
## Setosa Min Petal Width: 0.1
## Versicolor Mean Sepal Length: 5.936
## Versicolor Median Sepal Width: 2.8
## Versicolor Max Petal Length: 5.1
## Versicolor Min Petal Width: 1
## Virginica Mean Sepal Length: 6.588
## Virginica Median Sepal Width: 3
## Virginica Max Petal Length: 6.9
## Virginica Min Petal Width: 1.4
iris_stats