rosters <- read_excel("../00_data/myData.xlsx", sheet = "nhl_rosters")
rosters
## # A tibble: 54,883 × 18
## team_code season position_type player_id headshot first_name last_name
## <chr> <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 ATL 19992000 forwards 8467867 https://asse… Bryan Adams
## 2 ATL 19992000 forwards 8445176 https://asse… Donald Audette
## 3 ATL 19992000 forwards 8460014 https://asse… Eric Bertrand
## 4 ATL 19992000 forwards 8460510 https://asse… Jason Botterill
## 5 ATL 19992000 forwards 8459596 https://asse… Andrew Brunette
## 6 ATL 19992000 forwards 8445733 https://asse… Kelly Buchberg…
## 7 ATL 19992000 forwards 8460573 https://asse… Hnat Domenich…
## 8 ATL 19992000 forwards 8459450 https://asse… Shean Donovan
## 9 ATL 19992000 forwards 8446675 https://asse… Nelson Emerson
## 10 ATL 19992000 forwards 8446823 https://asse… Ray Ferraro
## # ℹ 54,873 more rows
## # ℹ 11 more variables: sweater_number <chr>, position_code <chr>,
## # shoots_catches <chr>, height_in_inches <dbl>, weight_in_pounds <dbl>,
## # height_in_centimeters <dbl>, weight_in_kilograms <dbl>, birth_date <dttm>,
## # birth_city <chr>, birth_country <chr>, birth_state_province <chr>
Make two bar charts here - one before ordering another after
#Transform Data: Calculate average Height of player by Team
height_by_team <- rosters %>%
group_by(team_code) %>%
summarise(
avg_player_height_team = mean(height_in_inches, na.rm = TRUE)
)
height_by_team
## # A tibble: 58 × 2
## team_code avg_player_height_team
## <chr> <dbl>
## 1 AFM 72.0
## 2 ANA 73.2
## 3 ARI 73.2
## 4 ATL 72.9
## 5 BOS 71.8
## 6 BRK 70.6
## 7 BUF 72.7
## 8 CAR 73.0
## 9 CBJ 73.2
## 10 CGS 71.4
## # ℹ 48 more rows
#plot
height_by_team %>%
ggplot(aes(y = fct_reorder(team_code, avg_player_height_team), x = avg_player_height_team)) +
geom_col()
Show examples of three functions:
height_by_team <- rosters %>%
group_by(team_code) %>%
summarise(
avg_player_height_team = mean(height_in_inches, na.rm = TRUE)
)
height_by_team
## # A tibble: 58 × 2
## team_code avg_player_height_team
## <chr> <dbl>
## 1 AFM 72.0
## 2 ANA 73.2
## 3 ARI 73.2
## 4 ATL 72.9
## 5 BOS 71.8
## 6 BRK 70.6
## 7 BUF 72.7
## 8 CAR 73.0
## 9 CBJ 73.2
## 10 CGS 71.4
## # ℹ 48 more rows
rosters %>%
select(team_code) %>%
mutate(team_code = fct_recode(team_code,
"Boston, Bruins" = "BOS",
"Buffalo" = "BUF",
"Tortonto" = "TOR",
"Montreal" = "MTL",
"Chicago" = "CHI"
)) %>%
count(team_code)
## # A tibble: 58 × 2
## team_code n
## <fct> <int>
## 1 AFM 222
## 2 ANA 1093
## 3 ARI 359
## 4 ATL 410
## 5 Boston, Bruins 2988
## 6 BRK 26
## 7 Buffalo 1800
## 8 CAR 852
## 9 CBJ 853
## 10 CGS 184
## # ℹ 48 more rows
rosters %>%
select(team_code) %>%
mutate(team_code = fct_collapse(team_code,
good = c("BOS", "BUF", "EDM", "DET"),
bad = c("CAR", "ANA")
)) %>%
count(team_code)
## # A tibble: 54 × 2
## team_code n
## <fct> <int>
## 1 AFM 222
## 2 bad 1945
## 3 ARI 359
## 4 ATL 410
## 5 good 9247
## 6 BRK 26
## 7 CBJ 853
## 8 CGS 184
## 9 CGY 1530
## 10 CHI 2819
## # ℹ 44 more rows
rosters %>%
mutate(teams_lump = fct_lump(team_code, prop = 0.035)) %>% count(teams_lump, sort = T)
## # A tibble: 10 × 2
## teams_lump n
## <fct> <int>
## 1 Other 31459
## 2 MTL 3009
## 3 BOS 2988
## 4 TOR 2944
## 5 NYR 2943
## 6 DET 2883
## 7 CHI 2819
## 8 PIT 1979
## 9 STL 1935
## 10 PHI 1924