simpsons_characters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-04/simpsons_characters.csv')
## Rows: 6722 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, normalized_name, gender
## dbl (1): id
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
simpsons_locations <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-04/simpsons_locations.csv')
## Rows: 4459 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, normalized_name
## dbl (1): id
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(1234)
simpson_chr_small <- simpsons_characters %>% select(id, name, normalized_name) %>% sample_n(10)
simpson_loc_small <- simpsons_locations %>% select(id, name, normalized_name) %>% sample_n(10)
simpson_chr_small
## # A tibble: 10 × 3
## id name normalized_name
## <dbl> <chr> <chr>
## 1 1027 Raheem raheem
## 2 651 Bernard bernard
## 3 2738 Red's Friend #2 reds friend 2
## 4 962 Pig pig
## 5 4562 Spanish Sailor spanish sailor
## 6 2996 Tree Jockey tree jockey
## 7 2186 Fat Convict fat convict
## 8 3224 Ring Bearer ring bearer
## 9 2818 CANADIAN WOMAN canadian woman
## 10 5802 2nd Male Animator 2nd male animator
simpson_loc_small
## # A tibble: 10 × 3
## id name normalized_name
## <dbl> <chr> <chr>
## 1 2373 FLAMING RUINS OF TROY flaming ruins of troy
## 2 1100 DETENTION AREA detention area
## 3 4046 Bohemian Art Gallery bohemian art gallery
## 4 4366 THE RELATION SHIP the relation ship
## 5 3454 CONCRETE concrete
## 6 2230 African City african city
## 7 2621 ENGLISH MEADOW english meadow
## 8 3972 OUTER CONCOURSE outer concourse
## 9 1682 PARIS STREET paris street
## 10 2599 COUNSELOR'S OFFICE counselor office
data_wide <- simpson_loc_small %>%
pivot_wider(names_from = name, values_from = normalized_name)
data_wide
## # A tibble: 10 × 11
## id `FLAMING RUINS OF TROY` `DETENTION AREA` `Bohemian Art Gallery`
## <dbl> <chr> <chr> <chr>
## 1 2373 flaming ruins of troy <NA> <NA>
## 2 1100 <NA> detention area <NA>
## 3 4046 <NA> <NA> bohemian art gallery
## 4 4366 <NA> <NA> <NA>
## 5 3454 <NA> <NA> <NA>
## 6 2230 <NA> <NA> <NA>
## 7 2621 <NA> <NA> <NA>
## 8 3972 <NA> <NA> <NA>
## 9 1682 <NA> <NA> <NA>
## 10 2599 <NA> <NA> <NA>
## # ℹ 7 more variables: `THE RELATION SHIP` <chr>, CONCRETE <chr>,
## # `African City` <chr>, `ENGLISH MEADOW` <chr>, `OUTER CONCOURSE` <chr>,
## # `PARIS STREET` <chr>, `COUNSELOR'S OFFICE` <chr>
data_wide2 <- simpson_loc_small %>% slice(-4) %>%
pivot_wider(names_from = name, values_from = normalized_name)
data_wide2
## # A tibble: 9 × 10
## id `FLAMING RUINS OF TROY` `DETENTION AREA` `Bohemian Art Gallery` CONCRETE
## <dbl> <chr> <chr> <chr> <chr>
## 1 2373 flaming ruins of troy <NA> <NA> <NA>
## 2 1100 <NA> detention area <NA> <NA>
## 3 4046 <NA> <NA> bohemian art gallery <NA>
## 4 3454 <NA> <NA> <NA> concrete
## 5 2230 <NA> <NA> <NA> <NA>
## 6 2621 <NA> <NA> <NA> <NA>
## 7 3972 <NA> <NA> <NA> <NA>
## 8 1682 <NA> <NA> <NA> <NA>
## 9 2599 <NA> <NA> <NA> <NA>
## # ℹ 5 more variables: `African City` <chr>, `ENGLISH MEADOW` <chr>,
## # `OUTER CONCOURSE` <chr>, `PARIS STREET` <chr>, `COUNSELOR'S OFFICE` <chr>
data_wide2 %>%
pivot_longer(4,names_to = "name", values_to = "normalized_name")
## # A tibble: 9 × 11
## id `FLAMING RUINS OF TROY` `DETENTION AREA` CONCRETE `African City`
## <dbl> <chr> <chr> <chr> <chr>
## 1 2373 flaming ruins of troy <NA> <NA> <NA>
## 2 1100 <NA> detention area <NA> <NA>
## 3 4046 <NA> <NA> <NA> <NA>
## 4 3454 <NA> <NA> concrete <NA>
## 5 2230 <NA> <NA> <NA> african city
## 6 2621 <NA> <NA> <NA> <NA>
## 7 3972 <NA> <NA> <NA> <NA>
## 8 1682 <NA> <NA> <NA> <NA>
## 9 2599 <NA> <NA> <NA> <NA>
## # ℹ 6 more variables: `ENGLISH MEADOW` <chr>, `OUTER CONCOURSE` <chr>,
## # `PARIS STREET` <chr>, `COUNSELOR'S OFFICE` <chr>, name <chr>,
## # normalized_name <chr>
data_united <- simpson_loc_small %>%
unite(col = "United_name", c(name, normalized_name), sep = "/")
data_united
## # A tibble: 10 × 2
## id United_name
## <dbl> <chr>
## 1 2373 FLAMING RUINS OF TROY/flaming ruins of troy
## 2 1100 DETENTION AREA/detention area
## 3 4046 Bohemian Art Gallery/bohemian art gallery
## 4 4366 THE RELATION SHIP/the relation ship
## 5 3454 CONCRETE/concrete
## 6 2230 African City/african city
## 7 2621 ENGLISH MEADOW/english meadow
## 8 3972 OUTER CONCOURSE/outer concourse
## 9 1682 PARIS STREET/paris street
## 10 2599 COUNSELOR'S OFFICE/counselor office
data_sep <- data_united %>%
separate(col = "United_name", into = c("name", "normalized_name"))
## Warning: Expected 2 pieces. Additional pieces discarded in 9 rows [1, 2, 3, 4, 6, 7, 8,
## 9, 10].
data_sep
## # A tibble: 10 × 3
## id name normalized_name
## <dbl> <chr> <chr>
## 1 2373 FLAMING RUINS
## 2 1100 DETENTION AREA
## 3 4046 Bohemian Art
## 4 4366 THE RELATION
## 5 3454 CONCRETE concrete
## 6 2230 African City
## 7 2621 ENGLISH MEADOW
## 8 3972 OUTER CONCOURSE
## 9 1682 PARIS STREET
## 10 2599 COUNSELOR S