Import your data

outer_space_objects <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-23/outer_space_objects.csv')
## Rows: 1175 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, num_objects
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Tidy data

set.seed(2) # for reproducible outcome
data_top10_launchers <- outer_space_objects %>%
    
    # Select three columns
    select(Entity, Year, num_objects ) %>%
    group_by(Entity) %>%
    filter(num_objects == max(num_objects)) %>%
    ungroup() %>%
    arrange(desc(num_objects)) %>%
    slice_head(n = 10)
    
print(data_top10_launchers)
## # A tibble: 10 × 3
##    Entity          Year num_objects
##    <chr>          <dbl>       <dbl>
##  1 World           2023        2664
##  2 United States   2023        2166
##  3 United Kingdom  2021         289
##  4 China           2022         182
##  5 Russia          1981         124
##  6 Belgium         2017          28
##  7 Japan           2014          24
##  8 Japan           2021          24
##  9 France          2011          19
## 10 Spain           2022          19

Chapter 15

Create a factor

datafactored <- data_top10_launchers %>% mutate(Entity = factor(Entity, levels = c("World", "United States", "China", "Russia", "United Kingdom", "France", "Belgium", "Japan", "Spain")))

Modify factor order

Dataset changed after running code indicating it did correctly work in averaging the values, most Entities besides Japan come up once so their values are unchanged, Japans value is also unchanged due to the two present entry’s having the same value but now there is only one value for Japan seen bringing our sample size down by one from 10 to 9.

Make two bar charts here - one before ordering another after

# Calculate average number of objects by Entity
avg_objects_by_entity <- datafactored %>%
  group_by(Entity) %>%
  summarise(
    avg_num_objects = mean(num_objects, na.rm = TRUE)
  )

# Display the summarized data
avg_objects_by_entity
## # A tibble: 9 × 2
##   Entity         avg_num_objects
##   <fct>                    <dbl>
## 1 World                     2664
## 2 United States             2166
## 3 China                      182
## 4 Russia                     124
## 5 United Kingdom             289
## 6 France                      19
## 7 Belgium                     28
## 8 Japan                       24
## 9 Spain                       19
# Plot with unordered factor levels (BEFORE ORDERING)
avg_objects_by_entity %>%
  ggplot(aes(x = avg_num_objects, y = Entity)) +
  geom_point()

# Plot with ordered factor levels
avg_objects_by_entity %>%
  ggplot(aes(x = avg_num_objects, y = fct_reorder(.f = Entity, .x = avg_num_objects))) +
  geom_point() +
  
  # Labeling
  labs(y = NULL, x = "Average Number of Objects")

Modify factor levels

Show examples of three functions:

fct_recode

Unites States becomes USA, United Kingdom Becomes UK

average_Recode <- avg_objects_by_entity %>%
    mutate(Entity = fct_recode(Entity, "USA" = "United States", "UK" = "United Kingdom"))

print(average_Recode)
## # A tibble: 9 × 2
##   Entity  avg_num_objects
##   <fct>             <dbl>
## 1 World              2664
## 2 USA                2166
## 3 China               182
## 4 Russia              124
## 5 UK                  289
## 6 France               19
## 7 Belgium              28
## 8 Japan                24
## 9 Spain                19

fct_collapse

# Collapse factor levels
average_Collapse <- average_Recode %>%
  mutate(Entity = fct_collapse(Entity,
                               "North America" = c( "USA"),
                               "Europe" = c("UK", "Belgium", "France", "Spain"),
                               "Asia" = c("China", "Japan"),
                               "Russia" = "Russia"))

# Display the modified data
print(average_Collapse)
## # A tibble: 9 × 2
##   Entity        avg_num_objects
##   <fct>                   <dbl>
## 1 World                    2664
## 2 North America            2166
## 3 Asia                      182
## 4 Russia                    124
## 5 Europe                    289
## 6 Europe                     19
## 7 Europe                     28
## 8 Asia                       24
## 9 Europe                     19

fct_lump

# Load necessary packages
library(dplyr)
library(forcats)

# Lump the least common entities together, keeping the top 3 most frequent entities
data_lumped <- data_top10_launchers %>%
  mutate(Entity = fct_lump(Entity, n = 3))

data_lumped %>% count(Entity) # Displays the modified data print(data_lumped) 
## # A tibble: 9 × 2
##   Entity             n
##   <fct>          <int>
## 1 Belgium            1
## 2 China              1
## 3 France             1
## 4 Japan              2
## 5 Russia             1
## 6 Spain              1
## 7 United Kingdom     1
## 8 United States      1
## 9 World              1
# Plots lumped factor levels
data_lumped %>% 
    ggplot(aes(x = num_objects, y = Entity)) + 
    geom_point() +
    labs(y = NULL, x = "Number of Objects")

Chapter 16

No need to do anything here.