Import your data

simpsons_characters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-04/simpsons_characters.csv')
## Rows: 6722 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, normalized_name, gender
## dbl (1): id
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
simpsons_locations <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-04/simpsons_locations.csv')
## Rows: 4459 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, normalized_name
## dbl (1): id
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Make data small

set.seed(1234)
simpson_chr_small <- simpsons_characters %>% select(id, name, normalized_name) %>% sample_n(10)
simpson_loc_small <- simpsons_locations %>% select(id, name, normalized_name) %>% sample_n(10)

simpson_chr_small
## # A tibble: 10 × 3
##       id name              normalized_name  
##    <dbl> <chr>             <chr>            
##  1  1027 Raheem            raheem           
##  2   651 Bernard           bernard          
##  3  2738 Red's Friend #2   reds friend 2    
##  4   962 Pig               pig              
##  5  4562 Spanish Sailor    spanish sailor   
##  6  2996 Tree Jockey       tree jockey      
##  7  2186 Fat Convict       fat convict      
##  8  3224 Ring Bearer       ring bearer      
##  9  2818 CANADIAN WOMAN    canadian woman   
## 10  5802 2nd Male Animator 2nd male animator
simpson_loc_small
## # A tibble: 10 × 3
##       id name                  normalized_name      
##    <dbl> <chr>                 <chr>                
##  1  2373 FLAMING RUINS OF TROY flaming ruins of troy
##  2  1100 DETENTION AREA        detention area       
##  3  4046 Bohemian Art Gallery  bohemian art gallery 
##  4  4366 THE RELATION SHIP     the relation ship    
##  5  3454 CONCRETE              concrete             
##  6  2230 African City          african city         
##  7  2621 ENGLISH MEADOW        english meadow       
##  8  3972 OUTER CONCOURSE       outer concourse      
##  9  1682 PARIS STREET          paris street         
## 10  2599 COUNSELOR'S OFFICE    counselor office

long to wide form

data_wide <- simpson_loc_small %>%
  pivot_wider(names_from = name, values_from = normalized_name)

data_wide
## # A tibble: 10 × 11
##       id `FLAMING RUINS OF TROY` `DETENTION AREA` `Bohemian Art Gallery`
##    <dbl> <chr>                   <chr>            <chr>                 
##  1  2373 flaming ruins of troy   <NA>             <NA>                  
##  2  1100 <NA>                    detention area   <NA>                  
##  3  4046 <NA>                    <NA>             bohemian art gallery  
##  4  4366 <NA>                    <NA>             <NA>                  
##  5  3454 <NA>                    <NA>             <NA>                  
##  6  2230 <NA>                    <NA>             <NA>                  
##  7  2621 <NA>                    <NA>             <NA>                  
##  8  3972 <NA>                    <NA>             <NA>                  
##  9  1682 <NA>                    <NA>             <NA>                  
## 10  2599 <NA>                    <NA>             <NA>                  
## # ℹ 7 more variables: `THE RELATION SHIP` <chr>, CONCRETE <chr>,
## #   `African City` <chr>, `ENGLISH MEADOW` <chr>, `OUTER CONCOURSE` <chr>,
## #   `PARIS STREET` <chr>, `COUNSELOR'S OFFICE` <chr>
data_wide2 <- simpson_loc_small %>% slice(-4) %>%
    pivot_wider(names_from = name, values_from = normalized_name)
data_wide2
## # A tibble: 9 × 10
##      id `FLAMING RUINS OF TROY` `DETENTION AREA` `Bohemian Art Gallery` CONCRETE
##   <dbl> <chr>                   <chr>            <chr>                  <chr>   
## 1  2373 flaming ruins of troy   <NA>             <NA>                   <NA>    
## 2  1100 <NA>                    detention area   <NA>                   <NA>    
## 3  4046 <NA>                    <NA>             bohemian art gallery   <NA>    
## 4  3454 <NA>                    <NA>             <NA>                   concrete
## 5  2230 <NA>                    <NA>             <NA>                   <NA>    
## 6  2621 <NA>                    <NA>             <NA>                   <NA>    
## 7  3972 <NA>                    <NA>             <NA>                   <NA>    
## 8  1682 <NA>                    <NA>             <NA>                   <NA>    
## 9  2599 <NA>                    <NA>             <NA>                   <NA>    
## # ℹ 5 more variables: `African City` <chr>, `ENGLISH MEADOW` <chr>,
## #   `OUTER CONCOURSE` <chr>, `PARIS STREET` <chr>, `COUNSELOR'S OFFICE` <chr>

wide to long form

data_wide2 %>%
    pivot_longer(4,names_to = "name", values_to = "normalized_name")
## # A tibble: 9 × 11
##      id `FLAMING RUINS OF TROY` `DETENTION AREA` CONCRETE `African City`
##   <dbl> <chr>                   <chr>            <chr>    <chr>         
## 1  2373 flaming ruins of troy   <NA>             <NA>     <NA>          
## 2  1100 <NA>                    detention area   <NA>     <NA>          
## 3  4046 <NA>                    <NA>             <NA>     <NA>          
## 4  3454 <NA>                    <NA>             concrete <NA>          
## 5  2230 <NA>                    <NA>             <NA>     african city  
## 6  2621 <NA>                    <NA>             <NA>     <NA>          
## 7  3972 <NA>                    <NA>             <NA>     <NA>          
## 8  1682 <NA>                    <NA>             <NA>     <NA>          
## 9  2599 <NA>                    <NA>             <NA>     <NA>          
## # ℹ 6 more variables: `ENGLISH MEADOW` <chr>, `OUTER CONCOURSE` <chr>,
## #   `PARIS STREET` <chr>, `COUNSELOR'S OFFICE` <chr>, name <chr>,
## #   normalized_name <chr>

Separating and Uniting

Unite two columns

data_united <- simpson_loc_small %>%
    unite(col = "United_name", c(name, normalized_name), sep = "/")
  
data_united
## # A tibble: 10 × 2
##       id United_name                                
##    <dbl> <chr>                                      
##  1  2373 FLAMING RUINS OF TROY/flaming ruins of troy
##  2  1100 DETENTION AREA/detention area              
##  3  4046 Bohemian Art Gallery/bohemian art gallery  
##  4  4366 THE RELATION SHIP/the relation ship        
##  5  3454 CONCRETE/concrete                          
##  6  2230 African City/african city                  
##  7  2621 ENGLISH MEADOW/english meadow              
##  8  3972 OUTER CONCOURSE/outer concourse            
##  9  1682 PARIS STREET/paris street                  
## 10  2599 COUNSELOR'S OFFICE/counselor office

Separate a column

data_sep <- data_united %>%
    separate(col = "United_name", into = c("name", "normalized_name"))
## Warning: Expected 2 pieces. Additional pieces discarded in 9 rows [1, 2, 3, 4, 6, 7, 8,
## 9, 10].
data_sep
## # A tibble: 10 × 3
##       id name      normalized_name
##    <dbl> <chr>     <chr>          
##  1  2373 FLAMING   RUINS          
##  2  1100 DETENTION AREA           
##  3  4046 Bohemian  Art            
##  4  4366 THE       RELATION       
##  5  3454 CONCRETE  concrete       
##  6  2230 African   City           
##  7  2621 ENGLISH   MEADOW         
##  8  3972 OUTER     CONCOURSE      
##  9  1682 PARIS     STREET         
## 10  2599 COUNSELOR S

Missing Values