Import data
# excel file
data <- read_excel("../00_data/myData.xlsx")
## New names:
## • `` -> `...1`
data
## # A tibble: 4,810 × 24
## ...1 rank position hand player years total…¹ status yr_st…² season age
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 1 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 2 2 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 3 3 1 C Left Wayne G… 1979… 894 Retir… 1979 1978-… 18
## 4 4 1 C Left Wayne G… 1979… 894 Retir… 1979 1979-… 19
## 5 5 1 C Left Wayne G… 1979… 894 Retir… 1979 1980-… 20
## 6 6 1 C Left Wayne G… 1979… 894 Retir… 1979 1981-… 21
## 7 7 1 C Left Wayne G… 1979… 894 Retir… 1979 1982-… 22
## 8 8 1 C Left Wayne G… 1979… 894 Retir… 1979 1983-… 23
## 9 9 1 C Left Wayne G… 1979… 894 Retir… 1979 1984-… 24
## 10 10 1 C Left Wayne G… 1979… 894 Retir… 1979 1985-… 25
## # … with 4,800 more rows, 13 more variables: team <chr>, league <chr>,
## # season_games <dbl>, goals <dbl>, assists <dbl>, points <dbl>,
## # plus_minus <chr>, penalty_min <dbl>, goals_even <chr>,
## # goals_power_play <chr>, goals_short_handed <chr>, goals_game_winner <chr>,
## # headshot <chr>, and abbreviated variable names ¹total_goals, ²yr_start
Filter
filter(data, age == 18, league == "NHL")
## # A tibble: 40 × 24
## ...1 rank position hand player years total…¹ status yr_st…² season age
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 26 2 RW Right Gordie … 1946… 801 Retir… 1946 1946-… 18
## 2 58 3 RW Left Jaromir… 1990… 766 Retir… 1990 1990-… 18
## 3 218 10 C Right Steve Y… 1983… 692 Retir… 1983 1983-… 18
## 4 570 26 C Left Patrick… 1997… 561 Active 1997 1997-… 18
## 5 632 29 C Left Ron Fra… 1981… 549 Retir… 1981 1981-… 18
## 6 695 32 C Right Stan Mi… 1958… 541 Retir… 1958 1958-… 18
## 7 804 37 NA NA Pat Ver… 1982… 522 Retir… 1982 1982-… 18
## 8 826 38 C Left Dale Ha… 1981… 518 Retir… 1981 1981-… 18
## 9 844 39 NA NA Pierre … 1987… 515 Retir… 1987 1987-… 18
## 10 1049 49 NA NA Brian B… 1982… 485 Retir… 1982 1982-… 18
## # … with 30 more rows, 13 more variables: team <chr>, league <chr>,
## # season_games <dbl>, goals <dbl>, assists <dbl>, points <dbl>,
## # plus_minus <chr>, penalty_min <dbl>, goals_even <chr>,
## # goals_power_play <chr>, goals_short_handed <chr>, goals_game_winner <chr>,
## # headshot <chr>, and abbreviated variable names ¹total_goals, ²yr_start
Arrange
arrange(data, desc(goals), desc(age))
## # A tibble: 4,810 × 24
## ...1 rank position hand player years total…¹ status yr_st…² season age
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 6 1 C Left Wayne G… 1979… 894 Retir… 1979 1981-… 21
## 2 8 1 C Left Wayne G… 1979… 894 Retir… 1979 1983-… 23
## 3 94 4 RW Right Brett H… 1986… 741 Retir… 1986 1990-… 26
## 4 244 11 C Right Mario L… 1984… 690 Retir… 1984 1988-… 23
## 5 414 18 LW Left Bobby H… 1957… 610 Retir… 1957 1974-… 36
## 6 136 6 C Left Phil Es… 1963… 717 Retir… 1963 1970-… 28
## 7 1153 54 NA NA Alexand… 1989… 473 Retir… 1989 1992-… 23
## 8 257 12 RW Right Teemu S… 1992… 684 Retir… 1992 1992-… 22
## 9 9 1 C Left Wayne G… 1979… 894 Retir… 1979 1984-… 24
## 10 93 4 RW Right Brett H… 1986… 741 Retir… 1986 1989-… 25
## # … with 4,800 more rows, 13 more variables: team <chr>, league <chr>,
## # season_games <dbl>, goals <dbl>, assists <dbl>, points <dbl>,
## # plus_minus <chr>, penalty_min <dbl>, goals_even <chr>,
## # goals_power_play <chr>, goals_short_handed <chr>, goals_game_winner <chr>,
## # headshot <chr>, and abbreviated variable names ¹total_goals, ²yr_start
Select
select(data, rank:total_goals)
## # A tibble: 4,810 × 6
## rank position hand player years total_goals
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 1 C Left Wayne Gretzky 1979-99 894
## 2 1 C Left Wayne Gretzky 1979-99 894
## 3 1 C Left Wayne Gretzky 1979-99 894
## 4 1 C Left Wayne Gretzky 1979-99 894
## 5 1 C Left Wayne Gretzky 1979-99 894
## 6 1 C Left Wayne Gretzky 1979-99 894
## 7 1 C Left Wayne Gretzky 1979-99 894
## 8 1 C Left Wayne Gretzky 1979-99 894
## 9 1 C Left Wayne Gretzky 1979-99 894
## 10 1 C Left Wayne Gretzky 1979-99 894
## # … with 4,800 more rows
Add a new column
mutate(data,
goals_per_game = goals / season_games) %>%
select(rank:status, goals_per_game)
## # A tibble: 4,810 × 8
## rank position hand player years total_goals status goals_per_game
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 C Left Wayne Gretzky 1979-99 894 Retired 0.575
## 2 1 C Left Wayne Gretzky 1979-99 894 Retired 0.375
## 3 1 C Left Wayne Gretzky 1979-99 894 Retired 0.597
## 4 1 C Left Wayne Gretzky 1979-99 894 Retired 0.646
## 5 1 C Left Wayne Gretzky 1979-99 894 Retired 0.688
## 6 1 C Left Wayne Gretzky 1979-99 894 Retired 1.15
## 7 1 C Left Wayne Gretzky 1979-99 894 Retired 0.888
## 8 1 C Left Wayne Gretzky 1979-99 894 Retired 1.18
## 9 1 C Left Wayne Gretzky 1979-99 894 Retired 0.912
## 10 1 C Left Wayne Gretzky 1979-99 894 Retired 0.65
## # … with 4,800 more rows
Summarize
summarise(data, avggoals = mean(goals, na.rm = TRUE))
## # A tibble: 1 × 1
## avggoals
## <dbl>
## 1 22.5