Import your data

# excel filer
games <- read_excel("../00_data/MyData_charts.xlsx")
games
## # A tibble: 988 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay Montev… Grou… France    Mexico             4          1 H      
##  2  1930 Uruguay Montev… Grou… Belgium   United S…          0          3 A      
##  3  1930 Uruguay Montev… Grou… Brazil    Yugoslav…          1          2 A      
##  4  1930 Uruguay Montev… Grou… Peru      Romania            1          3 A      
##  5  1930 Uruguay Montev… Grou… Argentina France             1          0 H      
##  6  1930 Uruguay Montev… Grou… Chile     Mexico             3          0 H      
##  7  1930 Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4 A      
##  8  1930 Uruguay Montev… Grou… Paraguay  United S…          0          3 A      
##  9  1930 Uruguay Montev… Grou… Uruguay   Peru               1          0 H      
## 10  1930 Uruguay Montev… Grou… Argentina Mexico             6          3 H      
## # ℹ 978 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <dttm>, month <chr>, dayofweek <chr>

Chapter 15

Create a factor

x1 <- c("Sun", "Sun", "Mon", "Mon", "Tue", "Wed")

sort(x1)
## [1] "Mon" "Mon" "Sun" "Sun" "Tue" "Wed"
day_levels <- c(
    "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
) 
y1 <- factor(x1, levels = day_levels) 
y1
## [1] Sun Sun Mon Mon Tue Wed
## Levels: Mon Tue Wed Thu Fri Sat Sun
sort(y1)
## [1] Mon Mon Tue Wed Sun Sun
## Levels: Mon Tue Wed Thu Fri Sat Sun

Modify factor order

Make two bar charts here - one before ordering another after #### Unordered

homegoal_summary <- games %>%
    group_by(dayofweek) %>%
    summarise(
        avg_goals = mean(home_score, na.rm = TRUE)
    )
homegoal_summary
## # A tibble: 8 × 2
##   dayofweek avg_goals
##   <chr>         <dbl>
## 1 Friday         1.29
## 2 Monday         1.65
## 3 Saturday       1.55
## 4 Sunday         1.80
## 5 Thursday       1.39
## 6 Tuesday        1.57
## 7 Wednesday      1.54
## 8 <NA>           4.16
homegoal_summary %>%
    
    ggplot(aes(x = avg_goals, y = dayofweek)) +
    geom_point() +
    
     #Labaling
    labs(y = NULL, x = "Mean goals each week day")

Modify factor levels

Ordered

# Reorder
homegoal_summary %>%
    
    ggplot(aes(x = avg_goals, y = fct_reorder(.f = dayofweek, .x = avg_goals))) +
    geom_point() +
    
     #Labaling
    labs(y = NULL, x = "Mean goals each week day")

# Relevel
homegoal_summary %>%
    
    ggplot(aes(x = avg_goals, 
               y = fct_reorder(.f = dayofweek, .x = avg_goals) %>%
                   fct_relevel("Sunday"))) +
    geom_point() +
    
     #Labaling
    labs(y = NULL, x = "Mean goals each week day")

# Recode
games %>% distinct(dayofweek)
## # A tibble: 8 × 1
##   dayofweek
##   <chr>    
## 1 Sunday   
## 2 Monday   
## 3 Tuesday  
## 4 Wednesday
## 5 Thursday 
## 6 Friday   
## 7 Saturday 
## 8 <NA>
games %>%
    mutate(weekend = fct_recode(dayofweek, "Weekend" = "Saturday")) %>%
    select(dayofweek, weekend) %>%
    filter(dayofweek == "Saturday")
## # A tibble: 152 × 2
##    dayofweek weekend
##    <chr>     <fct>  
##  1 Saturday  Weekend
##  2 Saturday  Weekend
##  3 Saturday  Weekend
##  4 Saturday  Weekend
##  5 Saturday  Weekend
##  6 Saturday  Weekend
##  7 Saturday  Weekend
##  8 Saturday  Weekend
##  9 Saturday  Weekend
## 10 Saturday  Weekend
## # ℹ 142 more rows
# Colapse multiple levels into one
games %>%
    mutate(weekday = fct_collapse(dayofweek, "Weekend" = c("Saturday", "Sunday"))) %>%
    select(dayofweek, weekday) %>%
    filter(dayofweek !="Monday")
## # A tibble: 818 × 2
##    dayofweek weekday  
##    <chr>     <fct>    
##  1 Sunday    Weekend  
##  2 Sunday    Weekend  
##  3 Tuesday   Tuesday  
##  4 Wednesday Wednesday
##  5 Thursday  Thursday 
##  6 Thursday  Thursday 
##  7 Friday    Friday   
##  8 Saturday  Weekend  
##  9 Saturday  Weekend  
## 10 Sunday    Weekend  
## # ℹ 808 more rows
# Lump small levels into other levels
games %>% count(dayofweek)
## # A tibble: 8 × 2
##   dayofweek     n
##   <chr>     <int>
## 1 Friday       92
## 2 Monday       82
## 3 Saturday    152
## 4 Sunday      196
## 5 Thursday    111
## 6 Tuesday     119
## 7 Wednesday   148
## 8 <NA>         88
games %>% mutate(day_lump = fct_lump(dayofweek)) %>% distinct(day_lump)
## # A tibble: 8 × 1
##   day_lump 
##   <fct>    
## 1 Sunday   
## 2 Other    
## 3 Tuesday  
## 4 Wednesday
## 5 Thursday 
## 6 Friday   
## 7 Saturday 
## 8 <NA>

Chapter 16

No need to do anything here.