# Convert the built-in HairEyeColor table into a tidy data frame
hair_df <- as.data.frame(HairEyeColor)
# hair_df has columns: Hair, Eye, Sex, Freq
head(hair_df)
## Hair Eye Sex Freq
## 1 Black Brown Male 32
## 2 Brown Brown Male 53
## 3 Red Brown Male 10
## 4 Blond Brown Male 3
## 5 Black Blue Male 11
## 6 Brown Blue Male 50
# Totals by Hair color
tot_by_hair <- hair_df %>% group_by(Hair) %>% summarise(Total = sum(Freq))
tot_by_hair
## # A tibble: 4 × 2
## Hair Total
## <fct> <dbl>
## 1 Black 108
## 2 Brown 286
## 3 Red 71
## 4 Blond 127
# Totals by Eye color
tot_by_eye <- hair_df %>% group_by(Eye) %>% summarise(Total = sum(Freq))
tot_by_eye
## # A tibble: 4 × 2
## Eye Total
## <fct> <dbl>
## 1 Brown 220
## 2 Blue 215
## 3 Hazel 93
## 4 Green 64
# Totals by Sex
tot_by_sex <- hair_df %>% group_by(Sex) %>% summarise(Total = sum(Freq))
tot_by_sex
## # A tibble: 2 × 2
## Sex Total
## <fct> <dbl>
## 1 Male 279
## 2 Female 313
# Overall total observations (should equal sum of Freq)
overall_total <- sum(hair_df$Freq)
overall_total
## [1] 592
# Descriptive statistics of the frequency (Freq)
mean_freq <- mean(hair_df$Freq)
median_freq <- median(hair_df$Freq)
sd_freq <- sd(hair_df$Freq)
var_freq <- var(hair_df$Freq)
range_freq <- range(hair_df$Freq)
quantiles_freq <- quantile(hair_df$Freq, probs = c(0.25, 0.5, 0.75))
# Print nicely as sentences
print(paste("Mean frequency (counts per Hair-Eye-Sex cell):", round(mean_freq, 2)))
## [1] "Mean frequency (counts per Hair-Eye-Sex cell): 18.5"
print(paste("Median frequency:", median_freq))
## [1] "Median frequency: 10"
print(paste("Standard deviation of frequency:", round(sd_freq, 2)))
## [1] "Standard deviation of frequency: 18.24"
print(paste("Variance of frequency:", round(var_freq, 2)))
## [1] "Variance of frequency: 332.77"
print(paste("Range of frequency: from", range_freq[1], "to", range_freq[2]))
## [1] "Range of frequency: from 2 to 66"
print(paste("25th, 50th, 75th percentiles:", paste(round(quantiles_freq,2), collapse = ", ")))
## [1] "25th, 50th, 75th percentiles: 7, 10, 29.25"
# Top 5 Hair-Eye-Sex combinations by count
top5 <- hair_df %>% arrange(desc(Freq)) %>% slice(1:5)
top5
## Hair Eye Sex Freq
## 1 Brown Brown Female 66
## 2 Blond Blue Female 64
## 3 Brown Brown Male 53
## 4 Brown Blue Male 50
## 5 Black Brown Female 36
# Most common eye color overall
eye_most <- tot_by_eye %>% arrange(desc(Total)) %>% slice(1)
eye_most
## # A tibble: 1 × 2
## Eye Total
## <fct> <dbl>
## 1 Brown 220
# Bar plot: total by Hair
ggplot(tot_by_hair, aes(x = Hair, y = Total, fill = Hair)) +
geom_col(show.legend = FALSE) +
labs(title = "Total count by Hair color", y = "Total count", x = "Hair color") +
theme_minimal()

# Bar plot: total by Eye
ggplot(tot_by_eye, aes(x = Eye, y = Total, fill = Eye)) +
geom_col(show.legend = FALSE) +
labs(title = "Total count by Eye color", y = "Total count", x = "Eye color") +
theme_minimal()

ggplot(hair_df, aes(x = Hair, y = Freq, fill = Eye)) +
geom_col(position = "stack") +
labs(title = "Hair and Eye Color Distribution", y = "Count", x = "Hair Color") +
theme_minimal()

cat("Interpretation: The most common eye color in this dataset is Brown, and the distribution of counts is skewed toward darker hair/eye combinations in this sample. The mean frequency per combination cell is low because the table breaks down population across many cells.")
## Interpretation: The most common eye color in this dataset is Brown, and the distribution of counts is skewed toward darker hair/eye combinations in this sample. The mean frequency per combination cell is low because the table breaks down population across many cells.