mydata <- read_excel("../00_data/mydata.xlsx")
# Count how many scientific names end in 'i'
mydata %>%
summarise(total_ending_in_i = sum(str_detect(scientificName, "i$")))
## # A tibble: 1 × 1
## total_ending_in_i
## <int>
## 1 51752
# Show the logic: TRUE/FALSE for each row
str_detect(mydata$scientificName, "i$") %>% head()
## [1] TRUE FALSE FALSE FALSE FALSE TRUE
# Calculate the percentage of species ending in 'i'
mean(str_detect(mydata$scientificName, "i$"))
## [1] 0.3787997
# Define the genera we are looking for
frog_genera <- c("Litoria", "Crinia", "Limnodynastes", "Uperoleia")
genus_match <- str_c(frog_genera, collapse = "|")
# Extract the genus name whenever it appears in the data
has_genus <- str_subset(mydata$scientificName, genus_match)
str_extract(has_genus, genus_match) %>% head(10)
## [1] "Litoria" "Litoria" "Litoria" "Litoria" "Litoria" "Litoria" "Litoria"
## [8] "Litoria" "Litoria" "Litoria"
# Replace the word 'New' with an abbreviation '-'
mydata %>%
mutate(state_abbr = stateProvince %>% str_replace("^New", "N.")) %>%
select(stateProvince, state_abbr)
## # A tibble: 136,621 × 2
## stateProvince state_abbr
## <chr> <chr>
## 1 New South Wales N. South Wales
## 2 New South Wales N. South Wales
## 3 New South Wales N. South Wales
## 4 New South Wales N. South Wales
## 5 New South Wales N. South Wales
## 6 New South Wales N. South Wales
## 7 New South Wales N. South Wales
## 8 New South Wales N. South Wales
## 9 New South Wales N. South Wales
## 10 New South Wales N. South Wales
## # ℹ 136,611 more rows
# Replace all spaces with underscores in scientific names
mydata %>%
mutate(name_clean = scientificName %>% str_replace_all(" ", "_")) %>%
select(scientificName, name_clean)
## # A tibble: 136,621 × 2
## scientificName name_clean
## <chr> <chr>
## 1 Philoria loveridgei Philoria_loveridgei
## 2 Heleioporus australiacus Heleioporus_australiacus
## 3 Mixophyes iteratus Mixophyes_iteratus
## 4 Mixophyes fasciolatus Mixophyes_fasciolatus
## 5 Litoria latopalmata Litoria_latopalmata
## 6 Assa darlingtoni Assa_darlingtoni
## 7 Assa darlingtoni Assa_darlingtoni
## 8 Litoria nasuta Litoria_nasuta
## 9 Mixophyes iteratus Mixophyes_iteratus
## 10 Litoria gracilenta Litoria_gracilenta
## # ℹ 136,611 more rows