#Penguin Activity! Review your wrangling skills!

Install the palmerpenguins package if you do not have it

#install.packages(“palmerpenguins”)

library(tidyverse)
library(palmerpenguins) #to load the penguins dataset
penguins 
## # A tibble: 344 × 8
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
data("penguins")
penguins |>
  count(species)
## # A tibble: 3 × 2
##   species       n
##   <fct>     <int>
## 1 Adelie      152
## 2 Chinstrap    68
## 3 Gentoo      124
penguins |> 
  count(species, island)
## # A tibble: 5 × 3
##   species   island        n
##   <fct>     <fct>     <int>
## 1 Adelie    Biscoe       44
## 2 Adelie    Dream        56
## 3 Adelie    Torgersen    52
## 4 Chinstrap Dream        68
## 5 Gentoo    Biscoe      124
penguins |>
  filter(species == "Adelie")
## # A tibble: 152 × 8
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 142 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
penguins |>
  filter(species == "Adelie") |>
  group_by(island,sex) |>
  count()
## # A tibble: 8 × 3
## # Groups:   island, sex [8]
##   island    sex        n
##   <fct>     <fct>  <int>
## 1 Biscoe    female    22
## 2 Biscoe    male      22
## 3 Dream     female    27
## 4 Dream     male      28
## 5 Dream     <NA>       1
## 6 Torgersen female    24
## 7 Torgersen male      23
## 8 Torgersen <NA>       5
# Select specific columns (species, island, flipper_length_mm))

penguins |>
  select(species, island, flipper_length_mm)
## # A tibble: 344 × 3
##    species island    flipper_length_mm
##    <fct>   <fct>                 <int>
##  1 Adelie  Torgersen               181
##  2 Adelie  Torgersen               186
##  3 Adelie  Torgersen               195
##  4 Adelie  Torgersen                NA
##  5 Adelie  Torgersen               193
##  6 Adelie  Torgersen               190
##  7 Adelie  Torgersen               181
##  8 Adelie  Torgersen               195
##  9 Adelie  Torgersen               193
## 10 Adelie  Torgersen               190
## # ℹ 334 more rows
# Mutate new columns for flipper length in cm and inches (cm = mm/10)(in = mm/25.4)


penguins |>
  mutate(flipper_length_cm = flipper_length_mm/10) |>
  mutate(flipper_legth_in = flipper_length_mm/25.4)
## # A tibble: 344 × 10
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 4 more variables: sex <fct>, year <int>, flipper_length_cm <dbl>,
## #   flipper_legth_in <dbl>
# Group penguins by species

penguins |>
  group_by(species)
## # A tibble: 344 × 8
## # Groups:   species [3]
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
# Summarize penguin data by species (basic)
penguins |>
  group_by(species) |>
  summarize(
  n = n()
)
## # A tibble: 3 × 2
##   species       n
##   <fct>     <int>
## 1 Adelie      152
## 2 Chinstrap    68
## 3 Gentoo      124
# Summarize penguin data by species (additional statistics)
penguins |>
  group_by(species) |> 
  summarize(
  n = n(), 
  mean_mass = mean(body_mass_g),
  max_flipper_length = max(flipper_length_mm),
  percent_female = sum(sex == "female") / n()
)
## # A tibble: 3 × 5
##   species       n mean_mass max_flipper_length percent_female
##   <fct>     <int>     <dbl>              <int>          <dbl>
## 1 Adelie      152       NA                  NA           NA  
## 2 Chinstrap    68     3733.                212            0.5
## 3 Gentoo      124       NA                  NA           NA
# Summarize penguin data by species (handling missing values)

penguins |>
  group_by(species) |> 
  summarize(
    n = n(), 
    mean_mass = mean(body_mass_g, na.rm = TRUE),
    max_flipper_length = max(flipper_length_mm, na.rm = TRUE),
    percent_female = sum(sex == "female", na.rm = TRUE) / n()
  )
## # A tibble: 3 × 5
##   species       n mean_mass max_flipper_length percent_female
##   <fct>     <int>     <dbl>              <int>          <dbl>
## 1 Adelie      152     3701.                210          0.480
## 2 Chinstrap    68     3733.                212          0.5  
## 3 Gentoo      124     5076.                231          0.468
# OR
penguins1 <- penguins |>
  filter(!is.na(flipper_length_mm)) |>
  filter(!is.na(body_mass_g)) |>
  filter(!is.na(sex)) |>
  group_by(species) |>
  summarize(
    n = n(),
    mean_mass = mean(body_mass_g),
    max_flipper_length = max(flipper_length_mm),
    percent_female = sum(sex == "female") / n()
  )
head(penguins1)
## # A tibble: 3 × 5
##   species       n mean_mass max_flipper_length percent_female
##   <fct>     <int>     <dbl>              <int>          <dbl>
## 1 Adelie      146     3706.                210          0.5  
## 2 Chinstrap    68     3733.                212          0.5  
## 3 Gentoo      119     5092.                231          0.487