title: “Penguins Activity” author: “Arnav Shah” output: html_document
install.packages(“palmer penguins”)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins) #to load the penguins dataset
## Warning: package 'palmerpenguins' was built under R version 4.5.2
##
## Attaching package: 'palmerpenguins'
##
## The following objects are masked from 'package:datasets':
##
## penguins, penguins_raw
#load the dataset
penguins <- palmerpenguins::penguins
data("penguins") # to see it in the environment
# Count penguins by species
penguins |>
count(species)
## # A tibble: 3 × 2
## species n
## <fct> <int>
## 1 Adelie 152
## 2 Chinstrap 68
## 3 Gentoo 124
# Count penguins by species and island
penguins |>
count(species, island)
## # A tibble: 5 × 3
## species island n
## <fct> <fct> <int>
## 1 Adelie Biscoe 44
## 2 Adelie Dream 56
## 3 Adelie Torgersen 52
## 4 Chinstrap Dream 68
## 5 Gentoo Biscoe 124
# Filter penguins by species (Adelie)
penguins|>
filter(species == "Adelie")
## # A tibble: 152 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 142 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
# Count Adelie penguins by island and sex
penguins |>
count(island, sex)
## # A tibble: 9 × 3
## island sex n
## <fct> <fct> <int>
## 1 Biscoe female 80
## 2 Biscoe male 83
## 3 Biscoe <NA> 5
## 4 Dream female 61
## 5 Dream male 62
## 6 Dream <NA> 1
## 7 Torgersen female 24
## 8 Torgersen male 23
## 9 Torgersen <NA> 5
# Select specific columns (species, island, flipper_length_mm))
penguins |>
select(species,island,flipper_length_mm)
## # A tibble: 344 × 3
## species island flipper_length_mm
## <fct> <fct> <int>
## 1 Adelie Torgersen 181
## 2 Adelie Torgersen 186
## 3 Adelie Torgersen 195
## 4 Adelie Torgersen NA
## 5 Adelie Torgersen 193
## 6 Adelie Torgersen 190
## 7 Adelie Torgersen 181
## 8 Adelie Torgersen 195
## 9 Adelie Torgersen 193
## 10 Adelie Torgersen 190
## # ℹ 334 more rows
# Mutate new columns for flipper length in cm and inches (cm = mm/10)(in = mm/25.4)
penguins |>
mutate(
flipper_length_cm = flipper_length_mm / 10,
flipper_length_in = flipper_length_mm / 25.4
)
## # A tibble: 344 × 10
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 4 more variables: sex <fct>, year <int>, flipper_length_cm <dbl>,
## # flipper_length_in <dbl>
# Group penguins by species
penguins |>
group_by(species)
## # A tibble: 344 × 8
## # Groups: species [3]
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
# Summarize penguin data by species (basic)
penguins |>
group_by(species) |>
summarize( avg_bill_length = mean(bill_length_mm, na.rm = TRUE),
avg_flipper_length = mean(flipper_length_mm, na.rm = TRUE),
avg_body_mass = mean(body_mass_g, na.rm = TRUE))
## # A tibble: 3 × 4
## species avg_bill_length avg_flipper_length avg_body_mass
## <fct> <dbl> <dbl> <dbl>
## 1 Adelie 38.8 190. 3701.
## 2 Chinstrap 48.8 196. 3733.
## 3 Gentoo 47.5 217. 5076.
# Summarize penguin data by species (additional statistics)
penguins |>
group_by(species) |>
summarize(
mean_body_mass = mean(body_mass_g, na.rm = TRUE),
sd_body_mass = sd(body_mass_g, na.rm = TRUE),
min_body_mass = min(body_mass_g, na.rm = TRUE),
max_body_mass = max(body_mass_g, na.rm = TRUE),
n = n()
)
## # A tibble: 3 × 6
## species mean_body_mass sd_body_mass min_body_mass max_body_mass n
## <fct> <dbl> <dbl> <int> <int> <int>
## 1 Adelie 3701. 459. 2850 4775 152
## 2 Chinstrap 3733. 384. 2700 4800 68
## 3 Gentoo 5076. 504. 3950 6300 124
# Summarize penguin data by species (handling missing values)
penguins |>
group_by(species) |>
summarize(
mean_body_mass = mean(body_mass_g, na.rm = TRUE),
mean_flipper_len = mean(flipper_length_mm, na.rm = TRUE),
n = n()
)
## # A tibble: 3 × 4
## species mean_body_mass mean_flipper_len n
## <fct> <dbl> <dbl> <int>
## 1 Adelie 3701. 190. 152
## 2 Chinstrap 3733. 196. 68
## 3 Gentoo 5076. 217. 124