Import your data

mydata <- read_excel("../00_data/mydata.xlsx")

Chapter 14

Tools

Detect matches

# Count how many scientific names end in 'i'
mydata %>% 
    summarise(total_ending_in_i = sum(str_detect(scientificName, "i$")))
## # A tibble: 1 × 1
##   total_ending_in_i
##               <int>
## 1             51752
# Show the logic: TRUE/FALSE for each row
str_detect(mydata$scientificName, "i$") %>% head()
## [1]  TRUE FALSE FALSE FALSE FALSE  TRUE
# Calculate the percentage of species ending in 'i'
mean(str_detect(mydata$scientificName, "i$"))
## [1] 0.3787997

Extract matches

# Define the genera we are looking for
frog_genera <- c("Litoria", "Crinia", "Limnodynastes", "Uperoleia")
genus_match <- str_c(frog_genera, collapse = "|")

# Extract the genus name whenever it appears in the data
has_genus <- str_subset(mydata$scientificName, genus_match)
str_extract(has_genus, genus_match) %>% head(10)
##  [1] "Litoria" "Litoria" "Litoria" "Litoria" "Litoria" "Litoria" "Litoria"
##  [8] "Litoria" "Litoria" "Litoria"

Replacing matches

# Replace the word 'New' with an abbreviation '-'
mydata %>% 
    mutate(state_abbr = stateProvince %>% str_replace("^New", "N.")) %>%
    select(stateProvince, state_abbr)
## # A tibble: 136,621 × 2
##    stateProvince   state_abbr    
##    <chr>           <chr>         
##  1 New South Wales N. South Wales
##  2 New South Wales N. South Wales
##  3 New South Wales N. South Wales
##  4 New South Wales N. South Wales
##  5 New South Wales N. South Wales
##  6 New South Wales N. South Wales
##  7 New South Wales N. South Wales
##  8 New South Wales N. South Wales
##  9 New South Wales N. South Wales
## 10 New South Wales N. South Wales
## # ℹ 136,611 more rows
# Replace all spaces with underscores in scientific names
mydata %>% 
    mutate(name_clean = scientificName %>% str_replace_all(" ", "_")) %>%
    select(scientificName, name_clean)
## # A tibble: 136,621 × 2
##    scientificName           name_clean              
##    <chr>                    <chr>                   
##  1 Philoria loveridgei      Philoria_loveridgei     
##  2 Heleioporus australiacus Heleioporus_australiacus
##  3 Mixophyes iteratus       Mixophyes_iteratus      
##  4 Mixophyes fasciolatus    Mixophyes_fasciolatus   
##  5 Litoria latopalmata      Litoria_latopalmata     
##  6 Assa darlingtoni         Assa_darlingtoni        
##  7 Assa darlingtoni         Assa_darlingtoni        
##  8 Litoria nasuta           Litoria_nasuta          
##  9 Mixophyes iteratus       Mixophyes_iteratus      
## 10 Litoria gracilenta       Litoria_gracilenta      
## # ℹ 136,611 more rows