Import your data

winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-25/winners.csv')
## Rows: 163 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Category, Athlete, Nationality
## dbl  (1): Year
## time (1): Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 14

Tools

Detect matches

winners$Nationality
##   [1] "United States"  "Norway"         "United Kingdom" "United Kingdom"
##   [5] "United Kingdom" "United Kingdom" "Japan"          "Japan"         
##   [9] "Denmark"        "Kenya"          "United Kingdom" "Soviet Union"  
##  [13] "Portugal"       "United Kingdom" "Mexico"         "Mexico"        
##  [17] "Mexico"         "Portugal"       "Spain"          "Morocco"       
##  [21] "Portugal"       "Morocco"        "United States"  "Ethiopia"      
##  [25] "Kenya"          "Kenya"          "Kenya"          "Kenya"         
##  [29] "Kenya"          "Kenya"          "Ethiopia"       "Kenya"         
##  [33] "Kenya"          "Ethiopia"       "Kenya"          "Kenya"         
##  [37] "Kenya"          "Kenya"          "Kenya"          "Kenya"         
##  [41] "Ethiopia"       "Ethiopia"       "Kenya"          "United Kingdom"
##  [45] "United Kingdom" "Norway"         "Norway"         "Norway"        
##  [49] "Norway"         "Norway"         "Norway"         "United Kingdom"
##  [53] "Poland"         "Portugal"       "Germany"        "Germany"       
##  [57] "Germany"        "Poland"         "United Kingdom" "Kenya"         
##  [61] "Ireland"        "Kenya"          "Kenya"          "Ethiopia"      
##  [65] "United Kingdom" "United Kingdom" "Kenya"          "United Kingdom"
##  [69] "United States"  "China"          "Germany"        "Germany"       
##  [73] "Ethiopia"       "Kenya"          "Kenya"          "Kenya"         
##  [77] "Kenya"          "Ethiopia"       "Kenya"          "Kenya"         
##  [81] "Kenya"          "Kenya"          "Kenya"          "Kenya"         
##  [85] "Ethiopia"       "United Kingdom" "Ireland"        "United Kingdom"
##  [89] "Ireland"        "United Kingdom" "Canada"         "United Kingdom"
##  [93] "Sweden"         "France"         "Canada"         "Belgium"       
##  [97] "United Kingdom" "Switzerland"    "United Kingdom" "United Kingdom"
## [101] "Switzerland"    "Switzerland"    "United Kingdom" "France"        
## [105] "United Kingdom" "France"         "Mexico"         "Mexico"        
## [109] "United Kingdom" "United Kingdom" "United Kingdom" "Australia"     
## [113] "Canada"         "United Kingdom" "United Kingdom" "Australia"     
## [117] "Switzerland"    "United States"  "Switzerland"    "United Kingdom"
## [121] "United Kingdom" "United States"  "Canada"         "Switzerland"   
## [125] "United Kingdom" "Ireland"        "Ireland"        "Ireland"       
## [129] "United Kingdom" "United Kingdom" "United Kingdom" "Denmark"       
## [133] "Denmark"        "United Kingdom" "United Kingdom" "United Kingdom"
## [137] "United Kingdom" "United Kingdom" "Sweden"         "United Kingdom"
## [141] "Sweden"         "United Kingdom" "United Kingdom" "United Kingdom"
## [145] "Italy"          "Italy"          "Italy"          "Italy"         
## [149] "United Kingdom" "Switzerland"    "United States"  "Japan"         
## [153] "United States"  "United Kingdom" "United States"  "United States" 
## [157] "United States"  "United States"  "Switzerland"    "Australia"     
## [161] "Switzerland"    "Netherlands"    "Switzerland"
str_detect(winners$Nationality, "United States")
##   [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
## [121] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
## [157]  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(winners$Nationality, "United States"))
## [1] 11
winners %>% 
    summarise(num_United_States = sum(str_detect(Nationality, "United States")))
## # A tibble: 1 × 1
##   num_United_States
##               <int>
## 1                11
winners$Athlete
##   [1] "Dick Beardsley (Tie)"       "Inge Simonsen (Tie)"       
##   [3] "Hugh Jones"                 "Mike Gratton"              
##   [5] "Charlie Spedding"           "Steve Jones"               
##   [7] "Toshihiko Seko"             "Hiromi Taniguchi"          
##   [9] "Henrik Jørgensen"           "Douglas Wakiihuri"         
##  [11] "Allister Hutton"            "Yakov Tolstikov"           
##  [13] "António Pinto"              "Eamonn Martin"             
##  [15] "Dionicio Cerón"             "Dionicio Cerón"            
##  [17] "Dionicio Cerón"             "António Pinto"             
##  [19] "Abel Antón"                 "Abdelkader El Mouaziz"     
##  [21] "António Pinto"              "Abdelkader El Mouaziz"     
##  [23] "Khalid Khannouchi"          "Gezahegne Abera"           
##  [25] "Evans Rutto"                "Martin Lel"                
##  [27] "Felix Limo"                 "Martin Lel"                
##  [29] "Martin Lel"                 "Samuel Wanjiru"            
##  [31] "Tsegaye Kebede"             "Emmanuel Kipchirchir Mutai"
##  [33] "Wilson Kipsang Kiprotich"   "Tsegaye Kebede"            
##  [35] "Wilson Kipsang Kiprotich"   "Eliud Kipchoge"            
##  [37] "Eliud Kipchoge"             "Daniel Wanjiru"            
##  [39] "Eliud Kipchoge"             "Eliud Kipchoge"            
##  [41] "Shura Kitata Tola"          "Sisay Lemma"               
##  [43] "Amos Kipruto"               "Joyce Smith"               
##  [45] "Joyce Smith"                "Grete Waitz"               
##  [47] "Ingrid Kristiansen"         "Ingrid Kristiansen"        
##  [49] "Grete Waitz"                "Ingrid Kristiansen"        
##  [51] "Ingrid Kristiansen"         "Véronique Marot"           
##  [53] "Wanda Panfil"               "Rosa Mota"                 
##  [55] "Katrin Dörre-Heinig"        "Katrin Dörre-Heinig"       
##  [57] "Katrin Dörre-Heinig"        "Małgorzata Sobańska"       
##  [59] "Liz McColgan"               "Joyce Chepchumba"          
##  [61] "Catherina McKiernan"        "Joyce Chepchumba"          
##  [63] "Tegla Loroupe"              "Derartu Tulu"              
##  [65] "Paula Radcliffe"            "Paula Radcliffe"           
##  [67] "Margaret Okayo"             "Paula Radcliffe"           
##  [69] "Deena Kastor"               "Zhou Chunxiu"              
##  [71] "Irina Mikitenko"            "Irina Mikitenko"           
##  [73] "Aselefech Mergia"           "Mary Jepkosgei Keitany"    
##  [75] "Mary Jepkosgei Keitany"     "Priscah Jeptoo"            
##  [77] "Edna Kiplagat"              "Tigist Tufa"               
##  [79] "Jemima Sumgong"             "Mary Jepkosgei Keitany"    
##  [81] "Vivian Cheruiyot"           "Brigid Kosgei"             
##  [83] "Brigid Kosgei"              "Joyciline Jepkosgei"       
##  [85] "Yalemzerf Yehualaw"         "Gordon Perry"              
##  [87] "Kevin Breen"                "Chris Hallam"              
##  [89] "Gerry O'Rourke"             "Chris Hallam"              
##  [91] "Ted Vince"                  "David Holding"             
##  [93] "Håkan Ericsson"             "Farid Amarouche"           
##  [95] "Daniel Wesley"              "George Vandamme"           
##  [97] "David Holding"              "Heinz Frei"                
##  [99] "David Holding"              "David Holding"             
## [101] "Heinz Frei"                 "Heinz Frei"                
## [103] "Kevin Papworth"             "Denis Lemeunier"           
## [105] "David Weir"                 "Joël Jeannot"              
## [107] "Saúl Mendoza"               "Saúl Mendoza"              
## [109] "David Weir"                 "David Weir"                
## [111] "David Weir"                 "Kurt Fearnley"             
## [113] "Josh Cassidy"               "David Weir"                
## [115] "David Weir"                 "Kurt Fearnley"             
## [117] "Marcel Hug"                 "Josh George"               
## [119] "Marcel Hug"                 "David Weir"                
## [121] "David Weir"                 "Daniel Romanchuk"          
## [123] "Brent Lakatos"              "Marcel Hug"                
## [125] "Denise Smith"               "Kay McShane"               
## [127] "Kay McShane"                "Kay McShane"               
## [129] "Karen Davidson"             "Karen Davidson"            
## [131] "Josie Cichockyj"            "Connie Hansen"             
## [133] "Connie Hansen"              "Tanni Grey-Thompson"       
## [135] "Rose Hill"                  "Tanni Grey-Thompson"       
## [137] "Rose Hill"                  "Tanni Grey-Thompson"       
## [139] "Monica Wetterström"         "Tanni Grey-Thompson"       
## [141] "Monica Wetterström"         "Sarah Piercy"              
## [143] "Tanni Grey-Thompson"        "Tanni Grey-Thompson"       
## [145] "Francesca Porcellato"       "Francesca Porcellato"      
## [147] "Francesca Porcellato"       "Francesca Porcellato"      
## [149] "Shelly Woods"               "Sandra Graf"               
## [151] "Amanda McGrory"             "Wakako Tsuchida"           
## [153] "Amanda McGrory"             "Shelly Woods"              
## [155] "Tatyana McFadden"           "Tatyana McFadden"          
## [157] "Tatyana McFadden"           "Tatyana McFadden"          
## [159] "Manuela Schär"              "Madison de Rozario"        
## [161] "Manuela Schär"              "Nikita den Boer"           
## [163] "Manuela Schär"
str_detect(winners$Athlete, "Mar")
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
## [121] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sum(str_detect(winners$Athlete, "Mar"))
## [1] 12
winners %>% 
    summarise(num_Mar = sum(str_detect(Athlete, "Mar")))
## # A tibble: 1 × 1
##   num_Mar
##     <int>
## 1      12

Extract matches

winners %>%
    mutate(col_US = str_extract(Nationality, "United States")) %>%
    filter(!is.na(col_US))
## # A tibble: 11 × 6
##    Category          Year Athlete              Nationality   Time     col_US    
##    <chr>            <dbl> <chr>                <chr>         <time>   <chr>     
##  1 Men               1981 Dick Beardsley (Tie) United States 02:11:48 United St…
##  2 Men               2002 Khalid Khannouchi    United States 02:05:38 United St…
##  3 Women             2006 Deena Kastor         United States 02:19:35 United St…
##  4 Wheelchair Men    2015 Josh George          United States 01:31:31 United St…
##  5 Wheelchair Men    2019 Daniel Romanchuk     United States 01:33:37 United St…
##  6 Wheelchair Women  2009 Amanda McGrory       United States 01:50:39 United St…
##  7 Wheelchair Women  2011 Amanda McGrory       United States 01:46:31 United St…
##  8 Wheelchair Women  2013 Tatyana McFadden     United States 01:46:02 United St…
##  9 Wheelchair Women  2014 Tatyana McFadden     United States 01:45:12 United St…
## 10 Wheelchair Women  2015 Tatyana McFadden     United States 01:41:14 United St…
## 11 Wheelchair Women  2016 Tatyana McFadden     United States 01:44:14 United St…
winners %>%
    mutate(col_Mar = str_extract(Athlete, "Mar")) %>%
    filter(!is.na(col_Mar))
## # A tibble: 12 × 6
##    Category        Year Athlete                Nationality    Time     col_Mar
##    <chr>          <dbl> <chr>                  <chr>          <time>   <chr>  
##  1 Men             1993 Eamonn Martin          United Kingdom 02:10:50 Mar    
##  2 Men             2005 Martin Lel             Kenya          02:07:35 Mar    
##  3 Men             2007 Martin Lel             Kenya          02:07:41 Mar    
##  4 Men             2008 Martin Lel             Kenya          02:05:15 Mar    
##  5 Women           1989 Véronique Marot        United Kingdom 02:25:56 Mar    
##  6 Women           2004 Margaret Okayo         Kenya          02:22:35 Mar    
##  7 Women           2011 Mary Jepkosgei Keitany Kenya          02:19:19 Mar    
##  8 Women           2012 Mary Jepkosgei Keitany Kenya          02:18:37 Mar    
##  9 Women           2017 Mary Jepkosgei Keitany Kenya          02:17:01 Mar    
## 10 Wheelchair Men  2014 Marcel Hug             Switzerland    01:32:41 Mar    
## 11 Wheelchair Men  2016 Marcel Hug             Switzerland    01:35:19 Mar    
## 12 Wheelchair Men  2021 Marcel Hug             Switzerland    01:26:27 Mar

Replacing matches

winners %>%
    mutate(col_UK = str_replace(Nationality, "United States", "United Kingdom"))
## # A tibble: 163 × 6
##    Category  Year Athlete              Nationality    Time     col_UK        
##    <chr>    <dbl> <chr>                <chr>          <time>   <chr>         
##  1 Men       1981 Dick Beardsley (Tie) United States  02:11:48 United Kingdom
##  2 Men       1981 Inge Simonsen (Tie)  Norway         02:11:48 Norway        
##  3 Men       1982 Hugh Jones           United Kingdom 02:09:24 United Kingdom
##  4 Men       1983 Mike Gratton         United Kingdom 02:09:43 United Kingdom
##  5 Men       1984 Charlie Spedding     United Kingdom 02:09:57 United Kingdom
##  6 Men       1985 Steve Jones          United Kingdom 02:08:16 United Kingdom
##  7 Men       1986 Toshihiko Seko       Japan          02:10:02 Japan         
##  8 Men       1987 Hiromi Taniguchi     Japan          02:09:50 Japan         
##  9 Men       1988 Henrik Jørgensen     Denmark        02:10:20 Denmark       
## 10 Men       1989 Douglas Wakiihuri    Kenya          02:09:03 Kenya         
## # ℹ 153 more rows
winners %>%
    mutate(col_Sma = str_replace(Athlete, "Mar", "Sma"))
## # A tibble: 163 × 6
##    Category  Year Athlete              Nationality    Time     col_Sma          
##    <chr>    <dbl> <chr>                <chr>          <time>   <chr>            
##  1 Men       1981 Dick Beardsley (Tie) United States  02:11:48 Dick Beardsley (…
##  2 Men       1981 Inge Simonsen (Tie)  Norway         02:11:48 Inge Simonsen (T…
##  3 Men       1982 Hugh Jones           United Kingdom 02:09:24 Hugh Jones       
##  4 Men       1983 Mike Gratton         United Kingdom 02:09:43 Mike Gratton     
##  5 Men       1984 Charlie Spedding     United Kingdom 02:09:57 Charlie Spedding 
##  6 Men       1985 Steve Jones          United Kingdom 02:08:16 Steve Jones      
##  7 Men       1986 Toshihiko Seko       Japan          02:10:02 Toshihiko Seko   
##  8 Men       1987 Hiromi Taniguchi     Japan          02:09:50 Hiromi Taniguchi 
##  9 Men       1988 Henrik Jørgensen     Denmark        02:10:20 Henrik Jørgensen 
## 10 Men       1989 Douglas Wakiihuri    Kenya          02:09:03 Douglas Wakiihuri
## # ℹ 153 more rows