# excel file
data <- read_excel("../00_data/myData.xlsx", sheet = "nhl_player_births")
data
filter(data, birth_year == 1999, birth_month == 1)
arrange(data, desc(birth_city), desc(birth_country))
select(data, birth_city:birth_country)
select(data, player_id, first_name, last_name, birth_date, birth_city, birth_country)
mutate(data,
birth_month_rev = birth_month - 1)
data %>%
# Group by city
group_by(birth_city) %>%
#birth year median
summarize(birth_year_median = median(birth_year))
WOW! 100 Mile House is a town! The Median year for an NHL player from there town that was born was in 1966.
data %>%
group_by(birth_city) %>%
summarise(count = n(),
birth_year = median(birth_year, na.rm = TRUE)) %>%
# Plot
ggplot(mapping = aes(x = birth_year, y = count)) +
geom_point(alpha = 0.3)