Import Data

outer_space_objects <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-23/outer_space_objects.csv')
## Rows: 1175 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, num_objects
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Tidy data

set.seed(2) # for reproducible outcome
data_top10_launchers <- outer_space_objects %>%
    
    # Select three columns
    select(Entity, Year, num_objects ) %>%
    group_by(Entity) %>%
    filter(num_objects == max(num_objects)) %>%
    ungroup() %>%
    arrange(desc(num_objects)) %>%
    slice_head(n = 10)
    
print(data_top10_launchers)
## # A tibble: 10 × 3
##    Entity          Year num_objects
##    <chr>          <dbl>       <dbl>
##  1 World           2023        2664
##  2 United States   2023        2166
##  3 United Kingdom  2021         289
##  4 China           2022         182
##  5 Russia          1981         124
##  6 Belgium         2017          28
##  7 Japan           2014          24
##  8 Japan           2021          24
##  9 France          2011          19
## 10 Spain           2022          19

Pivoting

wide to long form

data_long <- data_top10_launchers %>%
  pivot_longer(cols = num_objects,
               names_to = "metric",
               values_to = "value")

data_long
## # A tibble: 10 × 4
##    Entity          Year metric      value
##    <chr>          <dbl> <chr>       <dbl>
##  1 World           2023 num_objects  2664
##  2 United States   2023 num_objects  2166
##  3 United Kingdom  2021 num_objects   289
##  4 China           2022 num_objects   182
##  5 Russia          1981 num_objects   124
##  6 Belgium         2017 num_objects    28
##  7 Japan           2014 num_objects    24
##  8 Japan           2021 num_objects    24
##  9 France          2011 num_objects    19
## 10 Spain           2022 num_objects    19

long to wide form

data_wide <- data_long %>%
  pivot_wider(names_from = Year,
              values_from = value)

data_wide
## # A tibble: 9 × 9
##   Entity         metric      `2023` `2021` `2022` `1981` `2017` `2014` `2011`
##   <chr>          <chr>        <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 World          num_objects   2664     NA     NA     NA     NA     NA     NA
## 2 United States  num_objects   2166     NA     NA     NA     NA     NA     NA
## 3 United Kingdom num_objects     NA    289     NA     NA     NA     NA     NA
## 4 China          num_objects     NA     NA    182     NA     NA     NA     NA
## 5 Russia         num_objects     NA     NA     NA    124     NA     NA     NA
## 6 Belgium        num_objects     NA     NA     NA     NA     28     NA     NA
## 7 Japan          num_objects     NA     24     NA     NA     NA     24     NA
## 8 France         num_objects     NA     NA     NA     NA     NA     NA     19
## 9 Spain          num_objects     NA     NA     19     NA     NA     NA     NA

Separating and Uniting

data_united2 <- data_top10_launchers %>%
  unite(col = "Year_Entity", Year, Entity, sep = "-")
data_united2
## # A tibble: 10 × 2
##    Year_Entity         num_objects
##    <chr>                     <dbl>
##  1 2023-World                 2664
##  2 2023-United States         2166
##  3 2021-United Kingdom         289
##  4 2022-China                  182
##  5 1981-Russia                 124
##  6 2017-Belgium                 28
##  7 2014-Japan                   24
##  8 2021-Japan                   24
##  9 2011-France                  19
## 10 2022-Spain                   19

Separate a column

data_separated <- data_united2 %>%
  separate(col = Year_Entity, into = c("Year", "Entity"), sep = "-")
data_separated
## # A tibble: 10 × 3
##    Year  Entity         num_objects
##    <chr> <chr>                <dbl>
##  1 2023  World                 2664
##  2 2023  United States         2166
##  3 2021  United Kingdom         289
##  4 2022  China                  182
##  5 1981  Russia                 124
##  6 2017  Belgium                 28
##  7 2014  Japan                   24
##  8 2021  Japan                   24
##  9 2011  France                  19
## 10 2022  Spain                   19

Unite two columns

data_united <- data_top10_launchers %>%
  unite(col = "Year/NumObjects", c(Year, num_objects), sep = "/")

data_united
## # A tibble: 10 × 2
##    Entity         `Year/NumObjects`
##    <chr>          <chr>            
##  1 World          2023/2664        
##  2 United States  2023/2166        
##  3 United Kingdom 2021/289         
##  4 China          2022/182         
##  5 Russia         1981/124         
##  6 Belgium        2017/28          
##  7 Japan          2014/24          
##  8 Japan          2021/24          
##  9 France         2011/19          
## 10 Spain          2022/19

Missing Values

I used the wide form data table from above that I created because it has plenty NA values visible, I used the below code to replace all NA’s with IDK because “I Don’t Know” the value that goes there

data_replaced <- data_wide %>%
  mutate(across(everything(), ~replace_na(as.character(.), "IDK")))

data_replaced
## # A tibble: 9 × 9
##   Entity         metric      `2023` `2021` `2022` `1981` `2017` `2014` `2011`
##   <chr>          <chr>       <chr>  <chr>  <chr>  <chr>  <chr>  <chr>  <chr> 
## 1 World          num_objects 2664   IDK    IDK    IDK    IDK    IDK    IDK   
## 2 United States  num_objects 2166   IDK    IDK    IDK    IDK    IDK    IDK   
## 3 United Kingdom num_objects IDK    289    IDK    IDK    IDK    IDK    IDK   
## 4 China          num_objects IDK    IDK    182    IDK    IDK    IDK    IDK   
## 5 Russia         num_objects IDK    IDK    IDK    124    IDK    IDK    IDK   
## 6 Belgium        num_objects IDK    IDK    IDK    IDK    28     IDK    IDK   
## 7 Japan          num_objects IDK    24     IDK    IDK    IDK    24     IDK   
## 8 France         num_objects IDK    IDK    IDK    IDK    IDK    IDK    19    
## 9 Spain          num_objects IDK    IDK    19     IDK    IDK    IDK    IDK