Import your data

data <- read_csv("../00_data/Mydata.csv")
## New names:
## Rows: 41152 Columns: 17
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (11): Name, Sex, Event, Equiptment, Age Class, Division, Weight Class KG... dbl
## (5): Age, Bodyweight, Best Sqaut KG, Best Bench KG, Best Deadlifting lgl (1):
## ...17
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...17`
skimr::skim(data)
Data summary
Name data
Number of rows 41152
Number of columns 17
_______________________
Column type frequency:
character 11
logical 1
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Name 0 1.00 2 36 0 17805 0
Sex 0 1.00 1 1 0 2 0
Event 0 1.00 1 3 0 3 0
Equiptment 0 1.00 3 10 0 3 0
Age Class 2884 0.93 5 6 0 16 0
Division 627 0.98 4 11 0 12 0
Weight Class KG 1 1.00 2 5 0 38 0
Place 0 1.00 1 2 0 34 0
Date 0 1.00 6 8 0 224 0
Federation 0 1.00 3 3 0 1 0
Meet Name 0 1.00 11 54 0 32 0

Variable type: logical

skim_variable n_missing complete_rate mean count
…17 41152 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Age 2906 0.93 34.77 14.62 0.50 22.5 31.50 45.0 93.5 ▂▇▅▂▁
Bodyweight 187 1.00 81.15 24.93 37.29 60.0 75.55 97.3 240.0 ▇▆▂▁▁
Best Sqaut KG 13698 0.67 217.55 74.61 -210.00 160.0 215.00 270.0 490.0 ▁▁▇▇▁
Best Bench KG 2462 0.94 144.68 60.03 -160.00 97.5 140.00 185.0 415.0 ▁▁▇▃▁
Best Deadlifting 14028 0.66 221.84 63.72 -215.00 170.0 222.50 270.0 420.0 ▁▁▃▇▂

Chapter 14

Tools

Detect matches

data %>%
    filter(str_detect(Event, "D"))
## # A tibble: 28,586 × 17
##    Name        Sex   Event Equip…¹   Age Age C…² Divis…³ Bodyw…⁴ Weigh…⁵ Best …⁶
##    <chr>       <chr> <chr> <chr>   <dbl> <chr>   <chr>     <dbl> <chr>     <dbl>
##  1 Anna-Liisa… F     SBD   Single…  33.5 24-34   Open         44 44         135 
##  2 Vuokko Vii… F     SBD   Single…  34.5 24-34   Open         44 44         120 
##  3 Maria DelC… F     SBD   Single…  23.5 24-34   Open         44 44         130 
##  4 Helen Wols… F     SBD   Single…  27.5 24-34   Open         44 44         112.
##  5 Lijnie van… F     SBD   Single…  37.5 35-39   Open         44 44         105 
##  6 Carine Sta… F     SBD   Single…  25.5 24-34   Open         44 44         108.
##  7 Claudine C… F     SBD   Single…  33.5 24-34   Open         48 48         132.
##  8 Glynnis Bi… F     SBD   Single…  26   24-34   Open         48 48         130 
##  9 Gisele Mat… F     SBD   Single…  33.5 24-34   Open         48 48         130 
## 10 Malou Thill F     SBD   Single…  32.5 24-34   Open         48 48         120 
## # … with 28,576 more rows, 7 more variables: `Best Bench KG` <dbl>,
## #   `Best Deadlifting` <dbl>, Place <chr>, Date <chr>, Federation <chr>,
## #   `Meet Name` <chr>, ...17 <lgl>, and abbreviated variable names ¹​Equiptment,
## #   ²​`Age Class`, ³​Division, ⁴​Bodyweight, ⁵​`Weight Class KG`, ⁶​`Best Sqaut KG`

Extract matches

data %>%
    mutate(Sex_ext = str_extract(Sex, "M"))
## # A tibble: 41,152 × 18
##    Name        Sex   Event Equip…¹   Age Age C…² Divis…³ Bodyw…⁴ Weigh…⁵ Best …⁶
##    <chr>       <chr> <chr> <chr>   <dbl> <chr>   <chr>     <dbl> <chr>     <dbl>
##  1 Anna James  F     B     Single…  36.5 35-39   <NA>       43.9 44           NA
##  2 Ingrid Gri… F     B     Single…  30.5 24-34   <NA>       42.5 44           NA
##  3 Yurika Asa… F     B     Single…  28.5 24-34   <NA>       43.5 44           NA
##  4 Raija Anne… F     B     Single…  29.5 24-34   <NA>       43.6 44           NA
##  5 Valeria So… F     B     Single…  17.5 18-19   <NA>       42.9 44           NA
##  6 Irina Kryl… F     B     Single…  14.5 13-15   <NA>       47.5 48           NA
##  7 Tsuko Wata… F     B     Single…  30.5 24-34   <NA>       47.3 48           NA
##  8 Elena Yams… F     B     Single…  18.5 18-19   <NA>       47.4 48           NA
##  9 Lai-Hsiu C… F     B     Single…  35.5 35-39   <NA>       47.1 48           NA
## 10 Heike Roth  F     B     Single…  32.5 24-34   <NA>       46.4 48           NA
## # … with 41,142 more rows, 8 more variables: `Best Bench KG` <dbl>,
## #   `Best Deadlifting` <dbl>, Place <chr>, Date <chr>, Federation <chr>,
## #   `Meet Name` <chr>, ...17 <lgl>, Sex_ext <chr>, and abbreviated variable
## #   names ¹​Equiptment, ²​`Age Class`, ³​Division, ⁴​Bodyweight,
## #   ⁵​`Weight Class KG`, ⁶​`Best Sqaut KG`

Replacing matches

data %>%
    mutate(Sex_ext = str_replace(Sex, "M", "Male"))
## # A tibble: 41,152 × 18
##    Name        Sex   Event Equip…¹   Age Age C…² Divis…³ Bodyw…⁴ Weigh…⁵ Best …⁶
##    <chr>       <chr> <chr> <chr>   <dbl> <chr>   <chr>     <dbl> <chr>     <dbl>
##  1 Anna James  F     B     Single…  36.5 35-39   <NA>       43.9 44           NA
##  2 Ingrid Gri… F     B     Single…  30.5 24-34   <NA>       42.5 44           NA
##  3 Yurika Asa… F     B     Single…  28.5 24-34   <NA>       43.5 44           NA
##  4 Raija Anne… F     B     Single…  29.5 24-34   <NA>       43.6 44           NA
##  5 Valeria So… F     B     Single…  17.5 18-19   <NA>       42.9 44           NA
##  6 Irina Kryl… F     B     Single…  14.5 13-15   <NA>       47.5 48           NA
##  7 Tsuko Wata… F     B     Single…  30.5 24-34   <NA>       47.3 48           NA
##  8 Elena Yams… F     B     Single…  18.5 18-19   <NA>       47.4 48           NA
##  9 Lai-Hsiu C… F     B     Single…  35.5 35-39   <NA>       47.1 48           NA
## 10 Heike Roth  F     B     Single…  32.5 24-34   <NA>       46.4 48           NA
## # … with 41,142 more rows, 8 more variables: `Best Bench KG` <dbl>,
## #   `Best Deadlifting` <dbl>, Place <chr>, Date <chr>, Federation <chr>,
## #   `Meet Name` <chr>, ...17 <lgl>, Sex_ext <chr>, and abbreviated variable
## #   names ¹​Equiptment, ²​`Age Class`, ³​Division, ⁴​Bodyweight,
## #   ⁵​`Weight Class KG`, ⁶​`Best Sqaut KG`