This analysis explores how physical characteristics of penguins — specifically flipper length and body mass — vary across species and sex. The goal is to understand how morphology differs between groups and whether clear clustering patterns exist among species.
glimpse(penguins)
## Rows: 344
## Columns: 8
## $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel…
## $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse…
## $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, …
## $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, …
## $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186…
## $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, …
## $ sex <fct> male, female, female, NA, female, male, female, male…
## $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007…
summary(penguins)
## species island bill_length_mm bill_depth_mm
## Adelie :152 Biscoe :168 Min. :32.10 Min. :13.10
## Chinstrap: 68 Dream :124 1st Qu.:39.23 1st Qu.:15.60
## Gentoo :124 Torgersen: 52 Median :44.45 Median :17.30
## Mean :43.92 Mean :17.15
## 3rd Qu.:48.50 3rd Qu.:18.70
## Max. :59.60 Max. :21.50
## NA's :2 NA's :2
## flipper_length_mm body_mass_g sex year
## Min. :172.0 Min. :2700 female:165 Min. :2007
## 1st Qu.:190.0 1st Qu.:3550 male :168 1st Qu.:2007
## Median :197.0 Median :4050 NA's : 11 Median :2008
## Mean :200.9 Mean :4202 Mean :2008
## 3rd Qu.:213.0 3rd Qu.:4750 3rd Qu.:2009
## Max. :231.0 Max. :6300 Max. :2009
## NA's :2 NA's :2
We remove missing sex values to ensure accurate grouping.
penguins_clean <- penguins %>%
drop_na(sex)
ggplot(penguins_clean, aes(x = flipper_length_mm, y = body_mass_g)) +
geom_point(alpha = 0.7, aes(color = species, shape = species)) +
labs(
title = "Flipper Length vs Body Mass by Penguin Species",
x = "Flipper Length (mm)",
y = "Body Mass (g)",
color = "Species",
shape = "Species"
) +
theme_minimal()
ggplot(penguins_clean, aes(x = flipper_length_mm, y = body_mass_g)) +
geom_point(alpha = 0.7, aes(color = species)) +
facet_wrap(~sex) +
labs(
title = "Flipper Length vs Body Mass by Sex",
x = "Flipper Length (mm)",
y = "Body Mass (g)",
color = "Species"
) +
theme_minimal()
ggplot(penguins_clean, aes(x = species, fill = species)) +
geom_bar() +
labs(
title = "Count of Penguins by Species",
x = "Species",
y = "Count"
) +
theme_minimal() +
theme(legend.position = "none")