Import your data

data <- read_excel("../01_module4/data/myData.xlsx")

Chapter 14

Tools

Detect matches

data$country
##   [1] "Iceland"                            "Norway"                            
##   [3] "Switzerland"                        "Denmark"                           
##   [5] "Germany"                            "Sweden"                            
##   [7] "Australia"                          "Hong Kong, China (SAR)"            
##   [9] "Netherlands"                        "Belgium"                           
##  [11] "Ireland"                            "Finland"                           
##  [13] "Singapore"                          "United Kingdom"                    
##  [15] "United Arab Emirates"               "Canada"                            
##  [17] "Liechtenstein"                      "New Zealand"                       
##  [19] "United States"                      "Korea (Republic of)"               
##  [21] "Slovenia"                           "Austria"                           
##  [23] "Japan"                              "Malta"                             
##  [25] "Luxembourg"                         "France"                            
##  [27] "Israel"                             "Spain"                             
##  [29] "Czechia"                            "Italy"                             
##  [31] "San Marino"                         "Andorra"                           
##  [33] "Cyprus"                             "Greece"                            
##  [35] "Poland"                             "Estonia"                           
##  [37] "Saudi Arabia"                       "Bahrain"                           
##  [39] "Lithuania"                          "Portugal"                          
##  [41] "Croatia"                            "Latvia"                            
##  [43] "Qatar"                              "Slovakia"                          
##  [45] "Chile"                              "Hungary"                           
##  [47] "Argentina"                          "Montenegro"                        
##  [49] "Uruguay"                            "Oman"                              
##  [51] "Türkiye"                            "Kuwait"                            
##  [53] "Antigua and Barbuda"                "Seychelles"                        
##  [55] "Bulgaria"                           "Romania"                           
##  [57] "Georgia"                            "Saint Kitts and Nevis"             
##  [59] "Panama"                             "Brunei Darussalam"                 
##  [61] "Kazakhstan"                         "Costa Rica"                        
##  [63] "Serbia"                             "Russian Federation"                
##  [65] "Belarus"                            "Bahamas"                           
##  [67] "Malaysia"                           "North Macedonia"                   
##  [69] "Armenia"                            "Barbados"                          
##  [71] "Albania"                            "Trinidad and Tobago"               
##  [73] "Mauritius"                          "Bosnia and Herzegovina"            
##  [75] "Iran (Islamic Republic of)"         "Saint Vincent and the Grenadines"  
##  [77] "Thailand"                           "China"                             
##  [79] "Peru"                               "Grenada"                           
##  [81] "Azerbaijan"                         "Mexico"                            
##  [83] "Colombia"                           "Brazil"                            
##  [85] "Palau"                              "Moldova (Republic of)"             
##  [87] "Ukraine"                            "Ecuador"                           
##  [89] "Dominican Republic"                 "Guyana"                            
##  [91] "Sri Lanka"                          "Tonga"                             
##  [93] "Maldives"                           "Viet Nam"                          
##  [95] "Turkmenistan"                       "Algeria"                           
##  [97] "Cuba"                               "Dominica"                          
##  [99] "Paraguay"                           "Egypt"                             
## [101] "Jordan"                             "Lebanon"                           
## [103] "Saint Lucia"                        "Mongolia"                          
## [105] "Tunisia"                            "South Africa"                      
## [107] "Uzbekistan"                         "Bolivia (Plurinational State of)"  
## [109] "Gabon"                              "Marshall Islands"                  
## [111] "Botswana"                           "Fiji"                              
## [113] "Indonesia"                          "Suriname"                          
## [115] "Belize"                             "Libya"                             
## [117] "Jamaica"                            "Kyrgyzstan"                        
## [119] "Philippines"                        "Morocco"                           
## [121] "Venezuela (Bolivarian Republic of)" "Samoa"                             
## [123] "Nicaragua"                          "Nauru"                             
## [125] "Bhutan"                             "Eswatini (Kingdom of)"             
## [127] "Iraq"                               "Tajikistan"                        
## [129] "Tuvalu"                             "Bangladesh"                        
## [131] "India"                              "El Salvador"                       
## [133] "Equatorial Guinea"                  "Palestine, State of"               
## [135] "Cabo Verde"                         "Namibia"                           
## [137] "Guatemala"                          "Congo"                             
## [139] "Honduras"                           "Kiribati"                          
## [141] "Sao Tome and Principe"              "Timor-Leste"                       
## [143] "Ghana"                              "Kenya"                             
## [145] "Nepal"                              "Vanuatu"                           
## [147] "Lao People's Democratic Republic"   "Angola"                            
## [149] "Micronesia (Federated States of)"   "Myanmar"                           
## [151] "Cambodia"                           "Comoros"                           
## [153] "Zimbabwe"                           "Zambia"                            
## [155] "Cameroon"                           "Solomon Islands"                   
## [157] "Côte d'Ivoire"                      "Uganda"                            
## [159] "Rwanda"                             "Papua New Guinea"                  
## [161] "Togo"                               "Syrian Arab Republic"              
## [163] "Mauritania"                         "Nigeria"                           
## [165] "Tanzania (United Republic of)"      "Haiti"                             
## [167] "Lesotho"                            "Pakistan"                          
## [169] "Senegal"                            "Gambia"                            
## [171] "Congo (Democratic Republic of the)" "Malawi"                            
## [173] "Benin"                              "Guinea-Bissau"                     
## [175] "Djibouti"                           "Sudan"                             
## [177] "Liberia"                            "Eritrea"                           
## [179] "Guinea"                             "Ethiopia"                          
## [181] "Afghanistan"                        "Mozambique"                        
## [183] "Madagascar"                         "Yemen"                             
## [185] "Sierra Leone"                       "Burkina Faso"                      
## [187] "Burundi"                            "Mali"                              
## [189] "Niger"                              "Chad"                              
## [191] "Central African Republic"           "Somalia"                           
## [193] "South Sudan"
str_detect(data$country, "China")
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE
sum(str_detect(data$country, "China"))
## [1] 2
data %>%
    summarise(num_China = sum(str_detect(country, "China")))
## # A tibble: 1 × 1
##   num_China
##       <int>
## 1         2

Extract matches

data %>%
    mutate(col_China = str_extract(country, "China")) %>%
    select(country, col_China) %>%
    filter(!is.na(col_China))
## # A tibble: 2 × 2
##   country                col_China
##   <chr>                  <chr>    
## 1 Hong Kong, China (SAR) China    
## 2 China                  China

Replacing matches

data %>%
    mutate(col_Cathay = str_replace(country, "China", "Cathay")) %>%
    select(country, col_Cathay)
## # A tibble: 193 × 2
##    country                col_Cathay             
##    <chr>                  <chr>                  
##  1 Iceland                Iceland                
##  2 Norway                 Norway                 
##  3 Switzerland            Switzerland            
##  4 Denmark                Denmark                
##  5 Germany                Germany                
##  6 Sweden                 Sweden                 
##  7 Australia              Australia              
##  8 Hong Kong, China (SAR) Hong Kong, Cathay (SAR)
##  9 Netherlands            Netherlands            
## 10 Belgium                Belgium                
## # ℹ 183 more rows