#Penguin Activity! Review your wrangling skills!
#install.packages(“palmerpenguins”)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(palmerpenguins) #to load the penguins dataset
## Warning: package 'palmerpenguins' was built under R version 4.5.2
##
## Attaching package: 'palmerpenguins'
##
## The following objects are masked from 'package:datasets':
##
## penguins, penguins_raw
#load the dataset
penguins
## # A tibble: 344 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
data(“penguins”) # to see it in the environment
penguins |>
count(species, island)
## # A tibble: 5 × 3
## species island n
## <fct> <fct> <int>
## 1 Adelie Biscoe 44
## 2 Adelie Dream 56
## 3 Adelie Torgersen 52
## 4 Chinstrap Dream 68
## 5 Gentoo Biscoe 124
penguins |>
count(species, island)
## # A tibble: 5 × 3
## species island n
## <fct> <fct> <int>
## 1 Adelie Biscoe 44
## 2 Adelie Dream 56
## 3 Adelie Torgersen 52
## 4 Chinstrap Dream 68
## 5 Gentoo Biscoe 124
penguins |>
filter(species == "Adelie")
## # A tibble: 152 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 142 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
penguins |>
filter(species == "Adelie") |>
count(island, sex)
## # A tibble: 8 × 3
## island sex n
## <fct> <fct> <int>
## 1 Biscoe female 22
## 2 Biscoe male 22
## 3 Dream female 27
## 4 Dream male 28
## 5 Dream <NA> 1
## 6 Torgersen female 24
## 7 Torgersen male 23
## 8 Torgersen <NA> 5
penguins |>
select(species, island, flipper_length_mm)
## # A tibble: 344 × 3
## species island flipper_length_mm
## <fct> <fct> <int>
## 1 Adelie Torgersen 181
## 2 Adelie Torgersen 186
## 3 Adelie Torgersen 195
## 4 Adelie Torgersen NA
## 5 Adelie Torgersen 193
## 6 Adelie Torgersen 190
## 7 Adelie Torgersen 181
## 8 Adelie Torgersen 195
## 9 Adelie Torgersen 193
## 10 Adelie Torgersen 190
## # ℹ 334 more rows
penguins_mutate <- penguins |>
mutate(flipper_length_cm = flipper_length_mm/10)|>
mutate(flipper_length_in = flipper_length_mm/25.4)
penguins_mutate
## # A tibble: 344 × 10
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 4 more variables: sex <fct>, year <int>, flipper_length_cm <dbl>,
## # flipper_length_in <dbl>
penguins|>
group_by(species)
## # A tibble: 344 × 8
## # Groups: species [3]
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
penguins |>
group_by(species) |>
summarize(
n = n()
)
## # A tibble: 3 × 2
## species n
## <fct> <int>
## 1 Adelie 152
## 2 Chinstrap 68
## 3 Gentoo 124
penguins |>
group_by(species) |>
summarize(
n = n(),
mean_mass = mean(body_mass_g),
max_flipper_length = max(flipper_length_mm),
percent_female = sum(sex == "female") / n()
)
## # A tibble: 3 × 5
## species n mean_mass max_flipper_length percent_female
## <fct> <int> <dbl> <int> <dbl>
## 1 Adelie 152 NA NA NA
## 2 Chinstrap 68 3733. 212 0.5
## 3 Gentoo 124 NA NA NA
penguins |>
filter(!is.na(body_mass_g), !is.na(flipper_length_mm), !is.na(sex))|>
group_by(species) |>
summarize(
n = n(),
mean_mass = mean (body_mass_g),
max_flipper_length = max(flipper_length_mm),
percent_female = sum(sex == "female") / n()
)
## # A tibble: 3 × 5
## species n mean_mass max_flipper_length percent_female
## <fct> <int> <dbl> <int> <dbl>
## 1 Adelie 146 3706. 210 0.5
## 2 Chinstrap 68 3733. 212 0.5
## 3 Gentoo 119 5092. 231 0.487
penguins |>
group_by(species) |>
summarize(
n = n(),
mean_mass = mean(body_mass_g, na.rm = TRUE),
max_flipper_length = max(flipper_length_mm, na.rm = TRUE),
percent_female = sum(sex == "female", na.rm = TRUE) / n()
)
## # A tibble: 3 × 5
## species n mean_mass max_flipper_length percent_female
## <fct> <int> <dbl> <int> <dbl>
## 1 Adelie 152 3701. 210 0.480
## 2 Chinstrap 68 3733. 212 0.5
## 3 Gentoo 124 5076. 231 0.468