outer_space_objects <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-23/outer_space_objects.csv')
## Rows: 1175 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, num_objects
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(2) # for reproducible outcome
data_top10_launchers <- outer_space_objects %>%
# Select three columns
select(Entity, Year, num_objects ) %>%
group_by(Entity) %>%
filter(num_objects == max(num_objects)) %>%
ungroup() %>%
arrange(desc(num_objects)) %>%
slice_head(n = 10)
print(data_top10_launchers)
## # A tibble: 10 × 3
## Entity Year num_objects
## <chr> <dbl> <dbl>
## 1 World 2023 2664
## 2 United States 2023 2166
## 3 United Kingdom 2021 289
## 4 China 2022 182
## 5 Russia 1981 124
## 6 Belgium 2017 28
## 7 Japan 2014 24
## 8 Japan 2021 24
## 9 France 2011 19
## 10 Spain 2022 19
# Example: Detect if `Entity` ends with "a"
data_top10_launchers %>%
summarise(sum(str_detect(Entity, "a$")))
## # A tibble: 1 × 1
## `sum(str_detect(Entity, "a$"))`
## <int>
## 1 2
# Detect matches in the `Entity` column
str_detect(data_top10_launchers$Entity, "a$")
## [1] FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
# Summarize the total number of matches
sum(str_detect(data_top10_launchers$Entity, "a$"))
## [1] 2
# Calculate the proportion of matches
mean(str_detect(data_top10_launchers$Entity, "a$"))
## [1] 0.2
# new pattern to detect
patterns <- c("World", "United States", "Japan", "Russia")
# match pattern
pattern_match <- str_c(patterns, collapse = "|")
# Detect strings matching the pattern
has_pattern <- str_subset(data_top10_launchers$Entity, pattern_match)
# Extract strings matching the pattern
extracted_patterns <- str_extract(has_pattern, pattern_match)
extracted_patterns
## [1] "World" "United States" "Russia" "Japan"
## [5] "Japan"
data_modified <- data_top10_launchers %>%
mutate(Entity = ifelse(str_detect(Entity, pattern_match), "I made the code work", Entity))
data_modified
## # A tibble: 10 × 3
## Entity Year num_objects
## <chr> <dbl> <dbl>
## 1 I made the code work 2023 2664
## 2 I made the code work 2023 2166
## 3 United Kingdom 2021 289
## 4 China 2022 182
## 5 I made the code work 1981 124
## 6 Belgium 2017 28
## 7 I made the code work 2014 24
## 8 I made the code work 2021 24
## 9 France 2011 19
## 10 Spain 2022 19