Import your data

The data came from here

ipf_lifts <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-08/ipf_lifts.csv")
## Rows: 41152 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): name, sex, event, equipment, age_class, division, weight_class_kg...
## dbl   (5): age, bodyweight_kg, best3squat_kg, best3bench_kg, best3deadlift_kg
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore data

ipf_lifts %>% count(name)
## # A tibble: 17,803 × 2
##    name                  n
##    <chr>             <int>
##  1 A-Yun Lin             6
##  2 A Almeida             1
##  3 A Ernandos-Ortega     1
##  4 A. Ali                1
##  5 A. Avalio             1
##  6 A. Bojanic            1
##  7 A. Candelaria         1
##  8 A. Croeneboom         1
##  9 A. Cullen             1
## 10 A. De Vega            1
## # … with 17,793 more rows
ipf_lifts %>% count(event)
## # A tibble: 3 × 2
##   event     n
##   <chr> <int>
## 1 B     12564
## 2 SB        2
## 3 SBD   28586
ipf_lifts %>% count(equipment)
## # A tibble: 3 × 2
##   equipment      n
##   <chr>      <int>
## 1 Raw         7567
## 2 Single-ply 33309
## 3 Wraps        276
ipf_lifts %>% count(name, event, equipment, age_class)
## # A tibble: 26,409 × 5
##    name              event equipment  age_class     n
##    <chr>             <chr> <chr>      <chr>     <int>
##  1 A-Yun Lin         SBD   Raw        70-74         1
##  2 A-Yun Lin         SBD   Single-ply 40-44         2
##  3 A-Yun Lin         SBD   Single-ply 45-49         3
##  4 A Almeida         SBD   Single-ply <NA>          1
##  5 A Ernandos-Ortega SBD   Single-ply 18-19         1
##  6 A. Ali            B     Raw        <NA>          1
##  7 A. Avalio         SBD   Single-ply <NA>          1
##  8 A. Bojanic        SBD   Single-ply <NA>          1
##  9 A. Candelaria     SBD   Single-ply <NA>          1
## 10 A. Croeneboom     SBD   Single-ply <NA>          1
## # … with 26,399 more rows
ipf_lifts %>% count(place)
## # A tibble: 34 × 2
##    place     n
##    <chr> <int>
##  1 1      6480
##  2 10     1064
##  3 11      789
##  4 12      586
##  5 13      443
##  6 14      323
##  7 15      233
##  8 16      164
##  9 17      105
## 10 18       67
## # … with 24 more rows

Chapter 15

Create a factor

Get top 10 lifters with the most first place events.

top_10_tbl <- ipf_lifts %>% 
    filter(place == "1") %>% 
    count(name, sort = T) %>%
    head(10)

top_10_tbl
## # A tibble: 10 × 2
##    name                  n
##    <chr>             <int>
##  1 Hideaki Inaba        28
##  2 Andrzej Stanaszek    24
##  3 Sergey Fedosienko    24
##  4 Ielja Strik          22
##  5 Hiroyuki Isagawa     21
##  6 Jarosław Olech       21
##  7 Hana Takáčová        20
##  8 Daiki Kodama         18
##  9 Alexey Sivokon       17
## 10 Priscilla Ribic      17
top_10_tbl <- top_10_tbl %>%
    mutate(name = factor(name))

top_10_tbl$name
##  [1] Hideaki Inaba     Andrzej Stanaszek Sergey Fedosienko Ielja Strik      
##  [5] Hiroyuki Isagawa  Jarosław Olech    Hana Takáčová     Daiki Kodama     
##  [9] Alexey Sivokon    Priscilla Ribic  
## 10 Levels: Alexey Sivokon Andrzej Stanaszek Daiki Kodama ... Sergey Fedosienko

Modify factor order

Make two bar charts here - one before ordering another after

top_10_tbl %>%
    ggplot(aes(n, name)) +
    geom_point()

top_10_tbl %>%
    ggplot(aes(n, fct_reorder(name, n))) +
    geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
  • fct_collapse
  • fct_lump

Chapter 16

No need to do anything here.