Import your data

This Data set is from Tidy Tuesday and it contains a list of every country on Earth that has launched objects into space within the last couple decades, a categorical column of their designations for each country, and then a numerical column dedicated to how many objects get launched by each country.

outer_space_objects <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-23/outer_space_objects.csv')
## Rows: 1175 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, num_objects
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 13

What are primary keys in your data?

Can you divide your data into two?

Below are two code chunks that divide my data up into two different tables. one contains just the top ten launcher of objects into space with the num_objects variable being sorted from highest to least descending with a limit on the number of times each country can appear.The second table has the countries ordered by their country code with filter ordering them alphabetically and descending from Z to A, this set does not take into account the launch amounts hence why most countries end up different than the first table.

set.seed(2) # for reproducible outcome
data_top10_launchers <- outer_space_objects %>%
    
    # Select three columns
    select(Entity, Year, num_objects ) %>%
    group_by(Entity) %>%
    filter(num_objects == max(num_objects)) %>%
    ungroup() %>%
    arrange(desc(num_objects)) %>%
    slice_head(n = 10)
    
print(data_top10_launchers)
## # A tibble: 10 × 3
##    Entity          Year num_objects
##    <chr>          <dbl>       <dbl>
##  1 World           2023        2664
##  2 United States   2023        2166
##  3 United Kingdom  2021         289
##  4 China           2022         182
##  5 Russia          1981         124
##  6 Belgium         2017          28
##  7 Japan           2014          24
##  8 Japan           2021          24
##  9 France          2011          19
## 10 Spain           2022          19
set.seed(33) # for reproducible outcome
data_Z2ACode <- outer_space_objects %>%
    
    # Select three columns
    select(Entity, Year, num_objects, Code ) %>%
    group_by(Entity) %>%
    filter(num_objects == max(num_objects)) %>%
    ungroup() %>%
    arrange(desc(Code)) %>%
    slice_head(n = 10)
    
print(data_Z2ACode)
## # A tibble: 10 × 4
##    Entity         Year num_objects Code 
##    <chr>         <dbl>       <dbl> <chr>
##  1 Zimbabwe       2022           1 ZWE  
##  2 South Africa   2022           3 ZAF  
##  3 Vietnam        2012           2 VNM  
##  4 Vietnam        2013           2 VNM  
##  5 Venezuela      2008           2 VEN  
##  6 United States  2023        2166 USA  
##  7 Uruguay        2020          13 URY  
##  8 Ukraine        2004           2 UKR  
##  9 Ukraine        2011           2 UKR  
## 10 Uganda         2022           1 UGA

Can you join the two together?

Left Join (Table Size 10x4)

I designed it so most countries won’t have a code available due to not being in the data_Z2ACode data table, this data table took all countries from the original data set and arranged them alphabetically by country code from Z-A descending, the US has the only Common code for this left join example.

left_join (data_top10_launchers, data_Z2ACode)
## Joining with `by = join_by(Entity, Year, num_objects)`
## # A tibble: 10 × 4
##    Entity          Year num_objects Code 
##    <chr>          <dbl>       <dbl> <chr>
##  1 World           2023        2664 <NA> 
##  2 United States   2023        2166 USA  
##  3 United Kingdom  2021         289 <NA> 
##  4 China           2022         182 <NA> 
##  5 Russia          1981         124 <NA> 
##  6 Belgium         2017          28 <NA> 
##  7 Japan           2014          24 <NA> 
##  8 Japan           2021          24 <NA> 
##  9 France          2011          19 <NA> 
## 10 Spain           2022          19 <NA>

Right Join (Table Size 10x4)

right_join (data_top10_launchers, data_Z2ACode)
## Joining with `by = join_by(Entity, Year, num_objects)`
## # A tibble: 10 × 4
##    Entity         Year num_objects Code 
##    <chr>         <dbl>       <dbl> <chr>
##  1 United States  2023        2166 USA  
##  2 Zimbabwe       2022           1 ZWE  
##  3 South Africa   2022           3 ZAF  
##  4 Vietnam        2012           2 VNM  
##  5 Vietnam        2013           2 VNM  
##  6 Venezuela      2008           2 VEN  
##  7 Uruguay        2020          13 URY  
##  8 Ukraine        2004           2 UKR  
##  9 Ukraine        2011           2 UKR  
## 10 Uganda         2022           1 UGA

Full Join (Table Size 19x4)

full_join (data_top10_launchers, data_Z2ACode)
## Joining with `by = join_by(Entity, Year, num_objects)`
## # A tibble: 19 × 4
##    Entity          Year num_objects Code 
##    <chr>          <dbl>       <dbl> <chr>
##  1 World           2023        2664 <NA> 
##  2 United States   2023        2166 USA  
##  3 United Kingdom  2021         289 <NA> 
##  4 China           2022         182 <NA> 
##  5 Russia          1981         124 <NA> 
##  6 Belgium         2017          28 <NA> 
##  7 Japan           2014          24 <NA> 
##  8 Japan           2021          24 <NA> 
##  9 France          2011          19 <NA> 
## 10 Spain           2022          19 <NA> 
## 11 Zimbabwe        2022           1 ZWE  
## 12 South Africa    2022           3 ZAF  
## 13 Vietnam         2012           2 VNM  
## 14 Vietnam         2013           2 VNM  
## 15 Venezuela       2008           2 VEN  
## 16 Uruguay         2020          13 URY  
## 17 Ukraine         2004           2 UKR  
## 18 Ukraine         2011           2 UKR  
## 19 Uganda          2022           1 UGA

Filtering

Semi Join (Table Size (Top 1x3) (Bottom 1x4))

semi_join(data_top10_launchers, data_Z2ACode)
## Joining with `by = join_by(Entity, Year, num_objects)`
## # A tibble: 1 × 3
##   Entity         Year num_objects
##   <chr>         <dbl>       <dbl>
## 1 United States  2023        2166
semi_join(data_Z2ACode, data_top10_launchers)
## Joining with `by = join_by(Entity, Year, num_objects)`
## # A tibble: 1 × 4
##   Entity         Year num_objects Code 
##   <chr>         <dbl>       <dbl> <chr>
## 1 United States  2023        2166 USA

Anti Join (Table Size (Top 9x3) (Bottom 9x4))

anti_join(data_top10_launchers, data_Z2ACode)
## Joining with `by = join_by(Entity, Year, num_objects)`
## # A tibble: 9 × 3
##   Entity          Year num_objects
##   <chr>          <dbl>       <dbl>
## 1 World           2023        2664
## 2 United Kingdom  2021         289
## 3 China           2022         182
## 4 Russia          1981         124
## 5 Belgium         2017          28
## 6 Japan           2014          24
## 7 Japan           2021          24
## 8 France          2011          19
## 9 Spain           2022          19
anti_join(data_Z2ACode, data_top10_launchers)
## Joining with `by = join_by(Entity, Year, num_objects)`
## # A tibble: 9 × 4
##   Entity        Year num_objects Code 
##   <chr>        <dbl>       <dbl> <chr>
## 1 Zimbabwe      2022           1 ZWE  
## 2 South Africa  2022           3 ZAF  
## 3 Vietnam       2012           2 VNM  
## 4 Vietnam       2013           2 VNM  
## 5 Venezuela     2008           2 VEN  
## 6 Uruguay       2020          13 URY  
## 7 Ukraine       2004           2 UKR  
## 8 Ukraine       2011           2 UKR  
## 9 Uganda        2022           1 UGA