title: “Penguins Activity” author: “Arnav Shah” output: html_document


install.packages(“palmer penguins”)

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins) #to load the penguins dataset
## Warning: package 'palmerpenguins' was built under R version 4.5.2
## 
## Attaching package: 'palmerpenguins'
## 
## The following objects are masked from 'package:datasets':
## 
##     penguins, penguins_raw
#load the dataset
penguins <- palmerpenguins::penguins 

data("penguins") # to see it in the environment
# Count penguins by species
penguins |>
  count(species)
## # A tibble: 3 × 2
##   species       n
##   <fct>     <int>
## 1 Adelie      152
## 2 Chinstrap    68
## 3 Gentoo      124
# Count penguins by species and island
penguins |> 
  count(species, island)
## # A tibble: 5 × 3
##   species   island        n
##   <fct>     <fct>     <int>
## 1 Adelie    Biscoe       44
## 2 Adelie    Dream        56
## 3 Adelie    Torgersen    52
## 4 Chinstrap Dream        68
## 5 Gentoo    Biscoe      124
# Filter penguins by species (Adelie)

penguins|>
filter(species == "Adelie")
## # A tibble: 152 × 8
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 142 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
# Count Adelie penguins by island and sex

penguins |>
  count(island, sex)
## # A tibble: 9 × 3
##   island    sex        n
##   <fct>     <fct>  <int>
## 1 Biscoe    female    80
## 2 Biscoe    male      83
## 3 Biscoe    <NA>       5
## 4 Dream     female    61
## 5 Dream     male      62
## 6 Dream     <NA>       1
## 7 Torgersen female    24
## 8 Torgersen male      23
## 9 Torgersen <NA>       5
# Select specific columns (species, island, flipper_length_mm))

penguins |>
select(species,island,flipper_length_mm)
## # A tibble: 344 × 3
##    species island    flipper_length_mm
##    <fct>   <fct>                 <int>
##  1 Adelie  Torgersen               181
##  2 Adelie  Torgersen               186
##  3 Adelie  Torgersen               195
##  4 Adelie  Torgersen                NA
##  5 Adelie  Torgersen               193
##  6 Adelie  Torgersen               190
##  7 Adelie  Torgersen               181
##  8 Adelie  Torgersen               195
##  9 Adelie  Torgersen               193
## 10 Adelie  Torgersen               190
## # ℹ 334 more rows
# Mutate new columns for flipper length in cm and inches (cm = mm/10)(in = mm/25.4)

penguins |>
  mutate(
    flipper_length_cm = flipper_length_mm / 10,
    flipper_length_in = flipper_length_mm / 25.4
  )
## # A tibble: 344 × 10
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 4 more variables: sex <fct>, year <int>, flipper_length_cm <dbl>,
## #   flipper_length_in <dbl>
# Group penguins by species

penguins |>
  group_by(species)
## # A tibble: 344 × 8
## # Groups:   species [3]
##    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
##  1 Adelie  Torgersen           39.1          18.7               181        3750
##  2 Adelie  Torgersen           39.5          17.4               186        3800
##  3 Adelie  Torgersen           40.3          18                 195        3250
##  4 Adelie  Torgersen           NA            NA                  NA          NA
##  5 Adelie  Torgersen           36.7          19.3               193        3450
##  6 Adelie  Torgersen           39.3          20.6               190        3650
##  7 Adelie  Torgersen           38.9          17.8               181        3625
##  8 Adelie  Torgersen           39.2          19.6               195        4675
##  9 Adelie  Torgersen           34.1          18.1               193        3475
## 10 Adelie  Torgersen           42            20.2               190        4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
# Summarize penguin data by species (basic)

penguins |>
  group_by(species) |>
  summarize( avg_bill_length = mean(bill_length_mm, na.rm = TRUE), 
             avg_flipper_length = mean(flipper_length_mm, na.rm = TRUE), 
             avg_body_mass = mean(body_mass_g, na.rm = TRUE))
## # A tibble: 3 × 4
##   species   avg_bill_length avg_flipper_length avg_body_mass
##   <fct>               <dbl>              <dbl>         <dbl>
## 1 Adelie               38.8               190.         3701.
## 2 Chinstrap            48.8               196.         3733.
## 3 Gentoo               47.5               217.         5076.
# Summarize penguin data by species (additional statistics)

penguins |>
  group_by(species) |>
  summarize(
    mean_body_mass = mean(body_mass_g, na.rm = TRUE),
    sd_body_mass = sd(body_mass_g, na.rm = TRUE),
    min_body_mass = min(body_mass_g, na.rm = TRUE),
    max_body_mass = max(body_mass_g, na.rm = TRUE),
    n = n()
  )
## # A tibble: 3 × 6
##   species   mean_body_mass sd_body_mass min_body_mass max_body_mass     n
##   <fct>              <dbl>        <dbl>         <int>         <int> <int>
## 1 Adelie             3701.         459.          2850          4775   152
## 2 Chinstrap          3733.         384.          2700          4800    68
## 3 Gentoo             5076.         504.          3950          6300   124
# Summarize penguin data by species (handling missing values)

penguins |>
  group_by(species) |>
  summarize(
    mean_body_mass = mean(body_mass_g, na.rm = TRUE),
    mean_flipper_len = mean(flipper_length_mm, na.rm = TRUE),
    n = n()
  )
## # A tibble: 3 × 4
##   species   mean_body_mass mean_flipper_len     n
##   <fct>              <dbl>            <dbl> <int>
## 1 Adelie             3701.             190.   152
## 2 Chinstrap          3733.             196.    68
## 3 Gentoo             5076.             217.   124