Import data

# excel file
data <- read_excel("../00_data/myData.xlsx", sheet = "nhl_player_births")
data

Apply the following dplyr verbs to your data

Filter rows

filter(data, birth_year == 1999, birth_month == 1)

Arrange rows

arrange(data, desc(birth_city), desc(birth_country))

Select columns

select(data, birth_city:birth_country)
select(data, player_id, first_name, last_name, birth_date, birth_city, birth_country)

Add columns

mutate(data,
       birth_month_rev = birth_month - 1)

Summarize by groups

data %>%
    # Group by city
    group_by(birth_city) %>%
    
    #birth year median
    summarize(birth_year_median = median(birth_year))

WOW! 100 Mile House is a town! The Median year for an NHL player from there town that was born was in 1966.

data %>%
    group_by(birth_city) %>%
    summarise(count = n(),
            birth_year = median(birth_year, na.rm = TRUE)) %>%

    # Plot
    ggplot(mapping = aes(x = birth_year, y = count)) +
    geom_point(alpha = 0.3)