Import your data

Mydata <- read_csv("../00_data/tdf_winners.csv")
## Rows: 106 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): winner_name, winner_team, full_name, nickname, birth_town, birth_c...
## dbl  (9): edition, distance, time_overall, time_margin, stage_wins, stages_l...
## date (3): start_date, born, died
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Mydata
## # A tibble: 106 × 19
##    edition start_date winner_name  winner_team distance time_overall time_margin
##      <dbl> <date>     <chr>        <chr>          <dbl>        <dbl>       <dbl>
##  1       1 1903-07-01 Maurice Gar… La Françai…     2428         94.6        2.99
##  2       2 1904-07-02 Henri Cornet Conte           2428         96.1        2.27
##  3       3 1905-07-09 Louis Trous… Peugeot–Wo…     2994         NA         NA   
##  4       4 1906-07-04 René Pottier Peugeot–Wo…     4637         NA         NA   
##  5       5 1907-07-08 Lucien Peti… Peugeot–Wo…     4488         NA         NA   
##  6       6 1908-07-13 Lucien Peti… Peugeot–Wo…     4497         NA         NA   
##  7       7 1909-07-05 François Fa… Alcyon–Dun…     4498         NA         NA   
##  8       8 1910-07-01 Octave Lapi… Alcyon–Dun…     4734         NA         NA   
##  9       9 1911-07-02 Gustave Gar… Alcyon–Dun…     5343         NA         NA   
## 10      10 1912-06-30 Odile Defra… Alcyon–Dun…     5289         NA         NA   
## # ℹ 96 more rows
## # ℹ 12 more variables: stage_wins <dbl>, stages_led <dbl>, height <dbl>,
## #   weight <dbl>, age <dbl>, born <date>, died <date>, full_name <chr>,
## #   nickname <chr>, birth_town <chr>, birth_country <chr>, nationality <chr>

Chapter 14

Tools

Detect matches

Mydata$winner_name
##   [1] "Maurice Garin"       "Henri Cornet"        "Louis Trousselier"  
##   [4] "René Pottier"        "Lucien Petit-Breton" "Lucien Petit-Breton"
##   [7] "François Faber"      "Octave Lapize"       "Gustave Garrigou"   
##  [10] "Odile Defraye"       "Philippe Thys"       "Philippe Thys"      
##  [13] "Firmin Lambot"       "Philippe Thys"       "Léon Scieur"        
##  [16] "Firmin Lambot"       "Henri Pélissier"     "Ottavio Bottecchia" 
##  [19] "Ottavio Bottecchia"  "Lucien Buysse"       "Nicolas Frantz"     
##  [22] "Nicolas Frantz"      "Maurice De Waele"    "André Leducq"       
##  [25] "Antonin Magne"       "André Leducq"        "Georges Speicher"   
##  [28] "Antonin Magne"       "Romain Maes"         "Sylvère Maes"       
##  [31] "Roger Lapébie"       "Gino Bartali"        "Sylvère Maes"       
##  [34] "Jean Robic"          "Gino Bartali"        "Fausto Coppi"       
##  [37] "Ferdinand Kübler"    "Hugo Koblet"         "Fausto Coppi"       
##  [40] "Louison Bobet"       "Louison Bobet"       "Louison Bobet"      
##  [43] "Roger Walkowiak"     "Jacques Anquetil"    "Charly Gaul"        
##  [46] "Federico Bahamontes" "Gastone Nencini"     "Jacques Anquetil"   
##  [49] "Jacques Anquetil"    "Jacques Anquetil"    "Jacques Anquetil"   
##  [52] "Felice Gimondi"      "Lucien Aimar"        "Roger Pingeon"      
##  [55] "Jan Janssen"         "Eddy Merckx"         "Eddy Merckx"        
##  [58] "Eddy Merckx"         "Eddy Merckx"         "Luis Ocaña"         
##  [61] "Eddy Merckx"         "Bernard Thévenet"    "Lucien Van Impe"    
##  [64] "Bernard Thévenet"    "Bernard Hinault"     "Bernard Hinault"    
##  [67] "Joop Zoetemelk"      "Bernard Hinault"     "Bernard Hinault"    
##  [70] "Laurent Fignon"      "Laurent Fignon"      "Bernard Hinault"    
##  [73] "Greg LeMond"         "Stephen Roche"       "Pedro Delgado"      
##  [76] "Greg LeMond"         "Greg LeMond"         "Miguel Induráin"    
##  [79] "Miguel Induráin"     "Miguel Induráin"     "Miguel Induráin"    
##  [82] "Miguel Induráin"     "Bjarne Riis"         "Jan Ullrich"        
##  [85] "Marco Pantani"       "Lance Armstrong"     "Lance Armstrong"    
##  [88] "Lance Armstrong"     "Lance Armstrong"     "Lance Armstrong"    
##  [91] "Lance Armstrong"     "Lance Armstrong"     "Óscar Pereiro"      
##  [94] "Alberto Contador"    "Carlos Sastre"       "Alberto Contador"   
##  [97] "Andy Schleck"        "Cadel Evans"         "Bradley Wiggins"    
## [100] "Chris Froome"        "Vincenzo Nibali"     "Chris Froome"       
## [103] "Chris Froome"        "Chris Froome"        "Geraint Thomas"     
## [106] "Egan Bernal"
str_detect(Mydata$winner_name, "Lance")
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(Mydata$winner_name, "Lance"))
## [1] 7
Mydata %>%
    summarize(num_Lance = sum(str_detect(Mydata$winner_name, "Lance")))
## # A tibble: 1 × 1
##   num_Lance
##       <int>
## 1         7

Extract matches

Mydata %>%
    mutate(col_Lance = str_extract(winner_name, "Lance")) %>%
    select(winner_name, col_Lance) %>%
    filter(!is.na(col_Lance))
## # A tibble: 7 × 2
##   winner_name     col_Lance
##   <chr>           <chr>    
## 1 Lance Armstrong Lance    
## 2 Lance Armstrong Lance    
## 3 Lance Armstrong Lance    
## 4 Lance Armstrong Lance    
## 5 Lance Armstrong Lance    
## 6 Lance Armstrong Lance    
## 7 Lance Armstrong Lance

Replacing matches

Mydata %>%
    mutate(col_Firmin = str_replace(winner_name, "Lance", "Firmin")) %>%
    select(winner_name, col_Firmin) 
## # A tibble: 106 × 2
##    winner_name         col_Firmin         
##    <chr>               <chr>              
##  1 Maurice Garin       Maurice Garin      
##  2 Henri Cornet        Henri Cornet       
##  3 Louis Trousselier   Louis Trousselier  
##  4 René Pottier        René Pottier       
##  5 Lucien Petit-Breton Lucien Petit-Breton
##  6 Lucien Petit-Breton Lucien Petit-Breton
##  7 François Faber      François Faber     
##  8 Octave Lapize       Octave Lapize      
##  9 Gustave Garrigou    Gustave Garrigou   
## 10 Odile Defraye       Odile Defraye      
## # ℹ 96 more rows