penguins activity

Author

Wesley Samimi

#Penguin Activity! Review your wrangling skills!

Install the palmerpenguins package if you do not have it

#install.packages(“palmerpenguins”)

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins) #to load the penguins dataset
Warning: package 'palmerpenguins' was built under R version 4.5.2

Attaching package: 'palmerpenguins'

The following objects are masked from 'package:datasets':

    penguins, penguins_raw

#load the dataset penguins

data("penguins") # to see it in the environment

Count penguins by species

penguins |>
  count(species)
# A tibble: 3 × 2
  species       n
  <fct>     <int>
1 Adelie      152
2 Chinstrap    68
3 Gentoo      124

Count penguins by species and island

penguins |> 
  count(species, island)
# A tibble: 5 × 3
  species   island        n
  <fct>     <fct>     <int>
1 Adelie    Biscoe       44
2 Adelie    Dream        56
3 Adelie    Torgersen    52
4 Chinstrap Dream        68
5 Gentoo    Biscoe      124

Filter penguins by species (Adelie)

penguinsA <- penguins |>
  filter(species == "Adelie")

Count Adelie penguins by island and sex

penguinsA |>
  count(species, island)
# A tibble: 3 × 3
  species island        n
  <fct>   <fct>     <int>
1 Adelie  Biscoe       44
2 Adelie  Dream        56
3 Adelie  Torgersen    52

Select specific columns (species, island, flipper_length_mm))

penguins |>
  select(species, island, flipper_length_mm)
# A tibble: 344 × 3
   species island    flipper_length_mm
   <fct>   <fct>                 <int>
 1 Adelie  Torgersen               181
 2 Adelie  Torgersen               186
 3 Adelie  Torgersen               195
 4 Adelie  Torgersen                NA
 5 Adelie  Torgersen               193
 6 Adelie  Torgersen               190
 7 Adelie  Torgersen               181
 8 Adelie  Torgersen               195
 9 Adelie  Torgersen               193
10 Adelie  Torgersen               190
# ℹ 334 more rows

Mutate new columns for flipper length in cm and inches (cm = mm/10)(in = mm/25.4)

penguins <- penguins %>% mutate(flipperLengthCM = flipper_length_mm/10)
penguins <- penguins %>% mutate(flipperLengthIN = flipper_length_mm/25.4)

Group penguins by species

penguins |>
  group_by(species)
# A tibble: 344 × 10
# Groups:   species [3]
   species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
   <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
 1 Adelie  Torgersen           39.1          18.7               181        3750
 2 Adelie  Torgersen           39.5          17.4               186        3800
 3 Adelie  Torgersen           40.3          18                 195        3250
 4 Adelie  Torgersen           NA            NA                  NA          NA
 5 Adelie  Torgersen           36.7          19.3               193        3450
 6 Adelie  Torgersen           39.3          20.6               190        3650
 7 Adelie  Torgersen           38.9          17.8               181        3625
 8 Adelie  Torgersen           39.2          19.6               195        4675
 9 Adelie  Torgersen           34.1          18.1               193        3475
10 Adelie  Torgersen           42            20.2               190        4250
# ℹ 334 more rows
# ℹ 4 more variables: sex <fct>, year <int>, flipperLengthCM <dbl>,
#   flipperLengthIN <dbl>

Summarize penguin data by species (basic)

penguins |>
  group_by(species) |>
  summarize(
  n = n()
)
# A tibble: 3 × 2
  species       n
  <fct>     <int>
1 Adelie      152
2 Chinstrap    68
3 Gentoo      124

Summarize penguin data by species (additional statistics)

penguins |>
  group_by(species) |> 
  summarize(
  n = n(),
  mean_mass = mean(body_mass_g),
  max_flipper_length = max(flipper_length_mm),
  percent_female = sum(sex == "female") / n()
)
# A tibble: 3 × 5
  species       n mean_mass max_flipper_length percent_female
  <fct>     <int>     <dbl>              <int>          <dbl>
1 Adelie      152       NA                  NA           NA  
2 Chinstrap    68     3733.                212            0.5
3 Gentoo      124       NA                  NA           NA  

Summarize penguin data by species (handling missing values)

penguins |>
  filter(!is.na(body_mass_g), !is.na(flipper_length_mm), !is.na(sex))  |>
  group_by(species) |> 
  summarize(
    n = n(),
    mean_mass = mean(body_mass_g),
    max_flipper_length = max(flipper_length_mm),
    percent_female = sum(sex == "female") / n()
  )
# A tibble: 3 × 5
  species       n mean_mass max_flipper_length percent_female
  <fct>     <int>     <dbl>              <int>          <dbl>
1 Adelie      146     3706.                210          0.5  
2 Chinstrap    68     3733.                212          0.5  
3 Gentoo      119     5092.                231          0.487

OR

penguins |>
  group_by(species) |> 
  summarize(
    n = n(),
    mean_mass = mean(body_mass_g, na.rm = TRUE),
    max_flipper_length = max(flipper_length_mm, na.rm = TRUE),
    percent_female = sum(sex == "female", na.rm = TRUE) / n()
  )
# A tibble: 3 × 5
  species       n mean_mass max_flipper_length percent_female
  <fct>     <int>     <dbl>              <int>          <dbl>
1 Adelie      152     3701.                210          0.480
2 Chinstrap    68     3733.                212          0.5  
3 Gentoo      124     5076.                231          0.468