#Penguin Activity! Review your wrangling skills!
#install.packages(“palmerpenguins”)
library(tidyverse)
library(palmerpenguins) #to load the penguins dataset
penguins
## # A tibble: 344 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
data("penguins")
penguins |>
count(species)
## # A tibble: 3 × 2
## species n
## <fct> <int>
## 1 Adelie 152
## 2 Chinstrap 68
## 3 Gentoo 124
penguins |>
count(species, island)
## # A tibble: 5 × 3
## species island n
## <fct> <fct> <int>
## 1 Adelie Biscoe 44
## 2 Adelie Dream 56
## 3 Adelie Torgersen 52
## 4 Chinstrap Dream 68
## 5 Gentoo Biscoe 124
penguins |>
filter(species == "Adelie")
## # A tibble: 152 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 142 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
penguins |>
filter(species == "Adelie") |>
group_by(island,sex) |>
count()
## # A tibble: 8 × 3
## # Groups: island, sex [8]
## island sex n
## <fct> <fct> <int>
## 1 Biscoe female 22
## 2 Biscoe male 22
## 3 Dream female 27
## 4 Dream male 28
## 5 Dream <NA> 1
## 6 Torgersen female 24
## 7 Torgersen male 23
## 8 Torgersen <NA> 5
# Select specific columns (species, island, flipper_length_mm))
penguins |>
select(species, island, flipper_length_mm)
## # A tibble: 344 × 3
## species island flipper_length_mm
## <fct> <fct> <int>
## 1 Adelie Torgersen 181
## 2 Adelie Torgersen 186
## 3 Adelie Torgersen 195
## 4 Adelie Torgersen NA
## 5 Adelie Torgersen 193
## 6 Adelie Torgersen 190
## 7 Adelie Torgersen 181
## 8 Adelie Torgersen 195
## 9 Adelie Torgersen 193
## 10 Adelie Torgersen 190
## # ℹ 334 more rows
# Mutate new columns for flipper length in cm and inches (cm = mm/10)(in = mm/25.4)
penguins |>
mutate(flipper_length_cm = flipper_length_mm/10) |>
mutate(flipper_legth_in = flipper_length_mm/25.4)
## # A tibble: 344 × 10
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 4 more variables: sex <fct>, year <int>, flipper_length_cm <dbl>,
## # flipper_legth_in <dbl>
# Group penguins by species
penguins |>
group_by(species)
## # A tibble: 344 × 8
## # Groups: species [3]
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## 3 Adelie Torgersen 40.3 18 195 3250
## 4 Adelie Torgersen NA NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193 3450
## 6 Adelie Torgersen 39.3 20.6 190 3650
## 7 Adelie Torgersen 38.9 17.8 181 3625
## 8 Adelie Torgersen 39.2 19.6 195 4675
## 9 Adelie Torgersen 34.1 18.1 193 3475
## 10 Adelie Torgersen 42 20.2 190 4250
## # ℹ 334 more rows
## # ℹ 2 more variables: sex <fct>, year <int>
# Summarize penguin data by species (basic)
penguins |>
group_by(species) |>
summarize(
n = n()
)
## # A tibble: 3 × 2
## species n
## <fct> <int>
## 1 Adelie 152
## 2 Chinstrap 68
## 3 Gentoo 124
# Summarize penguin data by species (additional statistics)
penguins |>
group_by(species) |>
summarize(
n = n(),
mean_mass = mean(body_mass_g),
max_flipper_length = max(flipper_length_mm),
percent_female = sum(sex == "female") / n()
)
## # A tibble: 3 × 5
## species n mean_mass max_flipper_length percent_female
## <fct> <int> <dbl> <int> <dbl>
## 1 Adelie 152 NA NA NA
## 2 Chinstrap 68 3733. 212 0.5
## 3 Gentoo 124 NA NA NA
# Summarize penguin data by species (handling missing values)
penguins |>
group_by(species) |>
summarize(
n = n(),
mean_mass = mean(body_mass_g, na.rm = TRUE),
max_flipper_length = max(flipper_length_mm, na.rm = TRUE),
percent_female = sum(sex == "female", na.rm = TRUE) / n()
)
## # A tibble: 3 × 5
## species n mean_mass max_flipper_length percent_female
## <fct> <int> <dbl> <int> <dbl>
## 1 Adelie 152 3701. 210 0.480
## 2 Chinstrap 68 3733. 212 0.5
## 3 Gentoo 124 5076. 231 0.468
# OR
penguins1 <- penguins |>
filter(!is.na(flipper_length_mm)) |>
filter(!is.na(body_mass_g)) |>
filter(!is.na(sex)) |>
group_by(species) |>
summarize(
n = n(),
mean_mass = mean(body_mass_g),
max_flipper_length = max(flipper_length_mm),
percent_female = sum(sex == "female") / n()
)
head(penguins1)
## # A tibble: 3 × 5
## species n mean_mass max_flipper_length percent_female
## <fct> <int> <dbl> <int> <dbl>
## 1 Adelie 146 3706. 210 0.5
## 2 Chinstrap 68 3733. 212 0.5
## 3 Gentoo 119 5092. 231 0.487