Import your data

Import Data

outer_space_objects <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-23/outer_space_objects.csv')
## Rows: 1175 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, num_objects
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Tidy data

set.seed(2) # for reproducible outcome
data_top10_launchers <- outer_space_objects %>%
    
    # Select three columns
    select(Entity, Year, num_objects ) %>%
    group_by(Entity) %>%
    filter(num_objects == max(num_objects)) %>%
    ungroup() %>%
    arrange(desc(num_objects)) %>%
    slice_head(n = 10)
    
print(data_top10_launchers)
## # A tibble: 10 × 3
##    Entity          Year num_objects
##    <chr>          <dbl>       <dbl>
##  1 World           2023        2664
##  2 United States   2023        2166
##  3 United Kingdom  2021         289
##  4 China           2022         182
##  5 Russia          1981         124
##  6 Belgium         2017          28
##  7 Japan           2014          24
##  8 Japan           2021          24
##  9 France          2011          19
## 10 Spain           2022          19

Chapter 14

Tools

Detect matches

# Example: Detect if `Entity` ends with "a"
data_top10_launchers %>%
  summarise(sum(str_detect(Entity, "a$")))
## # A tibble: 1 × 1
##   `sum(str_detect(Entity, "a$"))`
##                             <int>
## 1                               2
# Detect matches in the `Entity` column
str_detect(data_top10_launchers$Entity, "a$")
##  [1] FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE
# Summarize the total number of matches
sum(str_detect(data_top10_launchers$Entity, "a$"))
## [1] 2
# Calculate the proportion of matches
mean(str_detect(data_top10_launchers$Entity, "a$"))
## [1] 0.2

Extract matches

I created a new pattern that was easier to see if it worked

#  new pattern to detect
patterns <- c("World", "United States", "Japan", "Russia")

#  match pattern
pattern_match <- str_c(patterns, collapse = "|")

# Detect strings matching the pattern
has_pattern <- str_subset(data_top10_launchers$Entity, pattern_match)

# Extract strings matching the pattern
extracted_patterns <- str_extract(has_pattern, pattern_match)

extracted_patterns
## [1] "World"         "United States" "Russia"        "Japan"        
## [5] "Japan"

Replacing matches

I honestly didn’t know how to use this to get helpful info so I just made it say something arbitrary asa replacement to show it worked

data_modified <- data_top10_launchers %>%
  mutate(Entity = ifelse(str_detect(Entity, pattern_match), "I made the code work", Entity))

data_modified
## # A tibble: 10 × 3
##    Entity                Year num_objects
##    <chr>                <dbl>       <dbl>
##  1 I made the code work  2023        2664
##  2 I made the code work  2023        2166
##  3 United Kingdom        2021         289
##  4 China                 2022         182
##  5 I made the code work  1981         124
##  6 Belgium               2017          28
##  7 I made the code work  2014          24
##  8 I made the code work  2021          24
##  9 France                2011          19
## 10 Spain                 2022          19