# Convert the built-in HairEyeColor table into a tidy data frame
hair_df <- as.data.frame(HairEyeColor)
# hair_df has columns: Hair, Eye, Sex, Freq
head(hair_df)
##    Hair   Eye  Sex Freq
## 1 Black Brown Male   32
## 2 Brown Brown Male   53
## 3   Red Brown Male   10
## 4 Blond Brown Male    3
## 5 Black  Blue Male   11
## 6 Brown  Blue Male   50
# Totals by Hair color
tot_by_hair <- hair_df %>% group_by(Hair) %>% summarise(Total = sum(Freq))
tot_by_hair
## # A tibble: 4 × 2
##   Hair  Total
##   <fct> <dbl>
## 1 Black   108
## 2 Brown   286
## 3 Red      71
## 4 Blond   127
# Totals by Eye color
tot_by_eye <- hair_df %>% group_by(Eye) %>% summarise(Total = sum(Freq))
tot_by_eye
## # A tibble: 4 × 2
##   Eye   Total
##   <fct> <dbl>
## 1 Brown   220
## 2 Blue    215
## 3 Hazel    93
## 4 Green    64
# Totals by Sex
tot_by_sex <- hair_df %>% group_by(Sex) %>% summarise(Total = sum(Freq))
tot_by_sex
## # A tibble: 2 × 2
##   Sex    Total
##   <fct>  <dbl>
## 1 Male     279
## 2 Female   313
# Overall total observations (should equal sum of Freq)
overall_total <- sum(hair_df$Freq)
overall_total
## [1] 592
# Descriptive statistics of the frequency (Freq)
mean_freq <- mean(hair_df$Freq)
median_freq <- median(hair_df$Freq)
sd_freq <- sd(hair_df$Freq)
var_freq <- var(hair_df$Freq)
range_freq <- range(hair_df$Freq)
quantiles_freq <- quantile(hair_df$Freq, probs = c(0.25, 0.5, 0.75))

# Print nicely as sentences
print(paste("Mean frequency (counts per Hair-Eye-Sex cell):", round(mean_freq, 2)))
## [1] "Mean frequency (counts per Hair-Eye-Sex cell): 18.5"
print(paste("Median frequency:", median_freq))
## [1] "Median frequency: 10"
print(paste("Standard deviation of frequency:", round(sd_freq, 2)))
## [1] "Standard deviation of frequency: 18.24"
print(paste("Variance of frequency:", round(var_freq, 2)))
## [1] "Variance of frequency: 332.77"
print(paste("Range of frequency: from", range_freq[1], "to", range_freq[2]))
## [1] "Range of frequency: from 2 to 66"
print(paste("25th, 50th, 75th percentiles:", paste(round(quantiles_freq,2), collapse = ", ")))
## [1] "25th, 50th, 75th percentiles: 7, 10, 29.25"
# Top 5 Hair-Eye-Sex combinations by count
top5 <- hair_df %>% arrange(desc(Freq)) %>% slice(1:5)
top5
##    Hair   Eye    Sex Freq
## 1 Brown Brown Female   66
## 2 Blond  Blue Female   64
## 3 Brown Brown   Male   53
## 4 Brown  Blue   Male   50
## 5 Black Brown Female   36
# Most common eye color overall
eye_most <- tot_by_eye %>% arrange(desc(Total)) %>% slice(1)
eye_most
## # A tibble: 1 × 2
##   Eye   Total
##   <fct> <dbl>
## 1 Brown   220
# Bar plot: total by Hair
ggplot(tot_by_hair, aes(x = Hair, y = Total, fill = Hair)) +
  geom_col(show.legend = FALSE) +
  labs(title = "Total count by Hair color", y = "Total count", x = "Hair color") +
  theme_minimal()

# Bar plot: total by Eye
ggplot(tot_by_eye, aes(x = Eye, y = Total, fill = Eye)) +
  geom_col(show.legend = FALSE) +
  labs(title = "Total count by Eye color", y = "Total count", x = "Eye color") +
  theme_minimal()

ggplot(hair_df, aes(x = Hair, y = Freq, fill = Eye)) +
  geom_col(position = "stack") +
  labs(title = "Hair and Eye Color Distribution", y = "Count", x = "Hair Color") +
  theme_minimal()

cat("Interpretation: The most common eye color in this dataset is Brown, and the distribution of counts is skewed toward darker hair/eye combinations in this sample. The mean frequency per combination cell is low because the table breaks down population across many cells.")
## Interpretation: The most common eye color in this dataset is Brown, and the distribution of counts is skewed toward darker hair/eye combinations in this sample. The mean frequency per combination cell is low because the table breaks down population across many cells.