#############################################################################


#David Morejon
#Penguin Activity! Review your wrangling skills!


###########################################################################



# Install the palmerpenguins package if you do not have it
#install.packages("palmerpenguins")



library(tidyverse)
library(palmerpenguins) #to load the penguins dataset



#load the dataset
penguins


data("penguins") # to see it in the environment




# Count penguins by species
penguins |>
  count(species)



# Count penguins by species and island
penguins |>
  count(species, island)



# Filter penguins by species (Adelie)
penguins |>
  filter(species =="Adelie")



# Count Adelie penguins by island and sex
penguins |>
  filter(species=="Adelie")|>
  group_by(island, sex) |>
  summarise(total = n ())




# Select specific columns (species, island, flipper_length_mm))
penguins |>
  select(species, island, flipper_length_mm)




# Mutate new columns for flipper length in cm and inches (cm = mm/10)(in = mm/25.4)

penguins |>
  mutate(flipper_cm = flipper_length_mm/10, flipper_in = flipper_length_mm/25.4)



# Group penguins by species
penguins |>
  group_by(species)



# Summarize penguin data by species (basic)
penguins |>
  group_by(species) |>
  summarize(
  n = n()
)


# Summarize penguin data by species (additional statistics)
penguins |>
  group_by(species) |>
  summarize(
  n = n(),
  mean_mass = mean(body_mass_g),
  max_flipper_length = max(flipper_length_mm),
  percent_female = sum(sex == "female") / n()
)


# Summarize penguin data by species (handling missing values)
penguins |>
  group_by(species) |>
  summarize(
    n = n(),
    mean_mass = mean(body_mass_g, na.rm = T),
    max_flipper_length = max(flipper_length_mm, na.rm = T),
    percent_female = sum(sex == "female", na.rm = T) / n()
  )