data <- read_excel("../00_data/myData.xlsx")
## New names:
## • `` -> `...1`
data
## # A tibble: 4,810 × 24
## ...1 rank position hand player years total…¹ status yr_st…² season age
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 1 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 2 2 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 3 3 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 4 4 1 C Left Wayne G… 1979… 894 Retir… 1979 1979-… 19
## 5 5 1 C Left Wayne G… 1979… 894 Retir… 1979 1980-… 20
## 6 6 1 C Left Wayne G… 1979… 894 Retir… 1979 1981-… 21
## 7 7 1 C Left Wayne G… 1979… 894 Retir… 1979 1982-… 22
## 8 8 1 C Left Wayne G… 1979… 894 Retir… 1979 1983-… 23
## 9 9 1 C Left Wayne G… 1979… 894 Retir… 1979 1984-… 24
## 10 10 1 C Left Wayne G… 1979… 894 Retir… 1979 1985-… 25
## # … with 4,800 more rows, 13 more variables: team <chr>, league <chr>,
## # season_games <dbl>, goals <dbl>, assists <dbl>, points <dbl>,
## # plus_minus <chr>, penalty_min <dbl>, goals_even <chr>,
## # goals_power_play <chr>, goals_short_handed <chr>, goals_game_winner <chr>,
## # headshot <chr>, and abbreviated variable names ¹total_goals, ²yr_start
data %>% distinct(position)
## # A tibble: 5 × 1
## position
## <chr>
## 1 C
## 2 RW
## 3 LW
## 4 NA
## 5 D
goals_by_position <- data %>%
group_by(position) %>%
summarise(
avg_goalsbypos = mean(goals, na.rm = TRUE)
)
goals_by_position
## # A tibble: 5 × 2
## position avg_goalsbypos
## <chr> <dbl>
## 1 C 24.3
## 2 D 14.9
## 3 LW 24.6
## 4 NA 19.9
## 5 RW 24.6
Make two bar charts here - one before ordering another after
goals_by_position %>%
ggplot(aes(x = avg_goalsbypos, y = position)) +
geom_point()
#Geom bar was not working "can only have an x or y aesthetic."
goals_by_position %>%
ggplot(aes(x = avg_goalsbypos,
y = fct_reorder(.f = position, .x = avg_goalsbypos) %>%
fct_relevel("NA"))) +
geom_point() +
labs(y = NULL, X = "Average goals by position")
Show examples of three functions:
data %>%
mutate(position_chg = fct_recode(position, "Unknown" = "NA")) %>%
select(position_chg, position) %>%
filter(position == "NA")
## # A tibble: 1,681 × 2
## position_chg position
## <fct> <chr>
## 1 Unknown NA
## 2 Unknown NA
## 3 Unknown NA
## 4 Unknown NA
## 5 Unknown NA
## 6 Unknown NA
## 7 Unknown NA
## 8 Unknown NA
## 9 Unknown NA
## 10 Unknown NA
## # … with 1,671 more rows
data %>%
mutate(position_col = fct_collapse(position, "Forward" = c("LW", "C", "RW"))) %>%
select(position, position_col) %>%
filter(position != "D")
## # A tibble: 4,643 × 2
## position position_col
## <chr> <fct>
## 1 C Forward
## 2 C Forward
## 3 C Forward
## 4 C Forward
## 5 C Forward
## 6 C Forward
## 7 C Forward
## 8 C Forward
## 9 C Forward
## 10 C Forward
## # … with 4,633 more rows
data %>% mutate(position_lump = fct_lump(position)) %>% distinct(position_lump)
## # A tibble: 5 × 1
## position_lump
## <fct>
## 1 C
## 2 RW
## 3 LW
## 4 NA
## 5 Other
No need to do anything here.