Import your data

data <- read_excel("../00_data/myData.xlsx")
## New names:
## • `` -> `...1`
data
## # A tibble: 4,810 × 24
##     ...1  rank position hand  player   years total…¹ status yr_st…² season   age
##    <dbl> <dbl> <chr>    <chr> <chr>    <chr>   <dbl> <chr>    <dbl> <chr>  <dbl>
##  1     1     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1978-…    18
##  2     2     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1978-…    18
##  3     3     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1978-…    18
##  4     4     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1979-…    19
##  5     5     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1980-…    20
##  6     6     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1981-…    21
##  7     7     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1982-…    22
##  8     8     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1983-…    23
##  9     9     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1984-…    24
## 10    10     1 C        Left  Wayne G… 1979…     894 Retir…    1979 1985-…    25
## # … with 4,800 more rows, 13 more variables: team <chr>, league <chr>,
## #   season_games <dbl>, goals <dbl>, assists <dbl>, points <dbl>,
## #   plus_minus <chr>, penalty_min <dbl>, goals_even <chr>,
## #   goals_power_play <chr>, goals_short_handed <chr>, goals_game_winner <chr>,
## #   headshot <chr>, and abbreviated variable names ¹​total_goals, ²​yr_start
data %>% distinct(position)
## # A tibble: 5 × 1
##   position
##   <chr>   
## 1 C       
## 2 RW      
## 3 LW      
## 4 NA      
## 5 D

Chapter 15

Create a factor

goals_by_position <- data %>%
    
    group_by(position) %>%
    summarise(
        avg_goalsbypos = mean(goals, na.rm = TRUE)
    )
goals_by_position
## # A tibble: 5 × 2
##   position avg_goalsbypos
##   <chr>             <dbl>
## 1 C                  24.3
## 2 D                  14.9
## 3 LW                 24.6
## 4 NA                 19.9
## 5 RW                 24.6

Modify factor order

Make two bar charts here - one before ordering another after

goals_by_position %>% 
    
    ggplot(aes(x = avg_goalsbypos, y = position)) +
    geom_point()

#Geom bar was not working "can only have an x or y aesthetic."
goals_by_position %>% 
    
    ggplot(aes(x = avg_goalsbypos,
               y = fct_reorder(.f = position, .x = avg_goalsbypos) %>%
               fct_relevel("NA"))) +
    geom_point() +
    
    labs(y = NULL, X = "Average goals by position")

Modify factor levels

Show examples of three functions:

  • fct_recode
data %>%
    mutate(position_chg = fct_recode(position, "Unknown" = "NA")) %>%
    select(position_chg, position) %>%
    filter(position == "NA")
## # A tibble: 1,681 × 2
##    position_chg position
##    <fct>        <chr>   
##  1 Unknown      NA      
##  2 Unknown      NA      
##  3 Unknown      NA      
##  4 Unknown      NA      
##  5 Unknown      NA      
##  6 Unknown      NA      
##  7 Unknown      NA      
##  8 Unknown      NA      
##  9 Unknown      NA      
## 10 Unknown      NA      
## # … with 1,671 more rows
  • fct_collapse
data %>% 
    
    mutate(position_col = fct_collapse(position, "Forward" = c("LW", "C", "RW"))) %>%
    select(position, position_col) %>%
    filter(position != "D")
## # A tibble: 4,643 × 2
##    position position_col
##    <chr>    <fct>       
##  1 C        Forward     
##  2 C        Forward     
##  3 C        Forward     
##  4 C        Forward     
##  5 C        Forward     
##  6 C        Forward     
##  7 C        Forward     
##  8 C        Forward     
##  9 C        Forward     
## 10 C        Forward     
## # … with 4,633 more rows
  • fct_lump
data %>% mutate(position_lump = fct_lump(position)) %>% distinct(position_lump)
## # A tibble: 5 × 1
##   position_lump
##   <fct>        
## 1 C            
## 2 RW           
## 3 LW           
## 4 NA           
## 5 Other

Chapter 16

No need to do anything here.