#Penguin Activity! Review your wrangling skills!

Install the palmerpenguins package if you do not have it

#install.packages(“palmerpenguins”)

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins) #to load the penguins dataset
## Warning: package 'palmerpenguins' was built under R version 4.5.2
## 
## Attaching package: 'palmerpenguins'
## 
## The following objects are masked from 'package:datasets':
## 
##     penguins, penguins_raw

#load the dataset

penguins
## # A tibble: 344 × 8
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>

data(“penguins”) # to see it in the environment

Count penguins by species

penguins |> 
  count(species, island)
## # A tibble: 5 × 3
##   species   island        n
##   <fct>     <fct>     <int>
## 1 Adelie    Biscoe       44
## 2 Adelie    Dream        56
## 3 Adelie    Torgersen    52
## 4 Chinstrap Dream        68
## 5 Gentoo    Biscoe      124

Count penguins by species and island

penguins |> 
  count(species, island)
## # A tibble: 5 × 3
##   species   island        n
##   <fct>     <fct>     <int>
## 1 Adelie    Biscoe       44
## 2 Adelie    Dream        56
## 3 Adelie    Torgersen    52
## 4 Chinstrap Dream        68
## 5 Gentoo    Biscoe      124

Filter penguins by species (Adelie)

penguins |>
  filter(species == "Adelie")
## # A tibble: 152 × 8
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 142 more rows
## # ℹ 2 more variables: sex <fct>, year <int>

Count Adelie penguins by island and sex

penguins |>
  filter(species == "Adelie") |>
  count(island, sex)
## # A tibble: 8 × 3
##   island    sex        n
##   <fct>     <fct>  <int>
## 1 Biscoe    female    22
## 2 Biscoe    male      22
## 3 Dream     female    27
## 4 Dream     male      28
## 5 Dream     <NA>       1
## 6 Torgersen female    24
## 7 Torgersen male      23
## 8 Torgersen <NA>       5

Select specific columns (species, island, flipper_length_mm))

penguins |>
  select(species, island, flipper_length_mm)
## # A tibble: 344 × 3
##    species island    flipper_length_mm
##    <fct>   <fct>                 <int>
##  1 Adelie  Torgersen               181
##  2 Adelie  Torgersen               186
##  3 Adelie  Torgersen               195
##  4 Adelie  Torgersen                NA
##  5 Adelie  Torgersen               193
##  6 Adelie  Torgersen               190
##  7 Adelie  Torgersen               181
##  8 Adelie  Torgersen               195
##  9 Adelie  Torgersen               193
## 10 Adelie  Torgersen               190
## # ℹ 334 more rows

Mutate new columns for flipper length in cm and inches (cm = mm/10)(in = mm/25.4)

penguins_mutate <-  penguins |>
  mutate(flipper_length_cm = flipper_length_mm/10)|>
  mutate(flipper_length_in = flipper_length_mm/25.4)
penguins_mutate
## # A tibble: 344 × 10
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 4 more variables: sex <fct>, year <int>, flipper_length_cm <dbl>,
## #   flipper_length_in <dbl>

Group penguins by species

penguins|>
  group_by(species)
## # A tibble: 344 × 8
## # Groups:   species [3]
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>

Summarize penguin data by species (basic)

penguins |>
  group_by(species) |>
  summarize(
  n = n()
)
## # A tibble: 3 × 2
##   species       n
##   <fct>     <int>
## 1 Adelie      152
## 2 Chinstrap    68
## 3 Gentoo      124

Summarize penguin data by species (additional statistics)

penguins |>
  group_by(species) |> 
  summarize(
  n = n(),
  mean_mass = mean(body_mass_g),
  max_flipper_length = max(flipper_length_mm),
  percent_female = sum(sex == "female") / n()
)
## # A tibble: 3 × 5
##   species       n mean_mass max_flipper_length percent_female
##   <fct>     <int>     <dbl>              <int>          <dbl>
## 1 Adelie      152       NA                  NA           NA  
## 2 Chinstrap    68     3733.                212            0.5
## 3 Gentoo      124       NA                  NA           NA

Summarize penguin data by species (handling missing values)

penguins |>
  filter(!is.na(body_mass_g), !is.na(flipper_length_mm), !is.na(sex))|>
  group_by(species) |> 
  summarize(
    n = n(),
    mean_mass = mean (body_mass_g),
    max_flipper_length = max(flipper_length_mm),
    percent_female = sum(sex == "female") / n()
  ) 
## # A tibble: 3 × 5
##   species       n mean_mass max_flipper_length percent_female
##   <fct>     <int>     <dbl>              <int>          <dbl>
## 1 Adelie      146     3706.                210          0.5  
## 2 Chinstrap    68     3733.                212          0.5  
## 3 Gentoo      119     5092.                231          0.487

penguins |>
  group_by(species) |> 
  summarize(
    n = n(),
    mean_mass = mean(body_mass_g,  na.rm = TRUE),
    max_flipper_length = max(flipper_length_mm, na.rm = TRUE),
    percent_female = sum(sex == "female", na.rm = TRUE) / n()
  )
## # A tibble: 3 × 5
##   species       n mean_mass max_flipper_length percent_female
##   <fct>     <int>     <dbl>              <int>          <dbl>
## 1 Adelie      152     3701.                210          0.480
## 2 Chinstrap    68     3733.                212          0.5  
## 3 Gentoo      124     5076.                231          0.468