outer_space_objects <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-23/outer_space_objects.csv')
## Rows: 1175 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, num_objects
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(2) # for reproducible outcome
data_top10_launchers <- outer_space_objects %>%
# Select three columns
select(Entity, Year, num_objects ) %>%
group_by(Entity) %>%
filter(num_objects == max(num_objects)) %>%
ungroup() %>%
arrange(desc(num_objects)) %>%
slice_head(n = 10)
print(data_top10_launchers)
## # A tibble: 10 × 3
## Entity Year num_objects
## <chr> <dbl> <dbl>
## 1 World 2023 2664
## 2 United States 2023 2166
## 3 United Kingdom 2021 289
## 4 China 2022 182
## 5 Russia 1981 124
## 6 Belgium 2017 28
## 7 Japan 2014 24
## 8 Japan 2021 24
## 9 France 2011 19
## 10 Spain 2022 19
datafactored <- data_top10_launchers %>% mutate(Entity = factor(Entity, levels = c("World", "United States", "China", "Russia", "United Kingdom", "France", "Belgium", "Japan", "Spain")))
Make two bar charts here - one before ordering another after
# Calculate average number of objects by Entity
avg_objects_by_entity <- datafactored %>%
group_by(Entity) %>%
summarise(
avg_num_objects = mean(num_objects, na.rm = TRUE)
)
# Display the summarized data
avg_objects_by_entity
## # A tibble: 9 × 2
## Entity avg_num_objects
## <fct> <dbl>
## 1 World 2664
## 2 United States 2166
## 3 China 182
## 4 Russia 124
## 5 United Kingdom 289
## 6 France 19
## 7 Belgium 28
## 8 Japan 24
## 9 Spain 19
# Plot with unordered factor levels (BEFORE ORDERING)
avg_objects_by_entity %>%
ggplot(aes(x = avg_num_objects, y = Entity)) +
geom_point()
# Plot with ordered factor levels
avg_objects_by_entity %>%
ggplot(aes(x = avg_num_objects, y = fct_reorder(.f = Entity, .x = avg_num_objects))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Average Number of Objects")
Show examples of three functions:
average_Recode <- avg_objects_by_entity %>%
mutate(Entity = fct_recode(Entity, "USA" = "United States", "UK" = "United Kingdom"))
print(average_Recode)
## # A tibble: 9 × 2
## Entity avg_num_objects
## <fct> <dbl>
## 1 World 2664
## 2 USA 2166
## 3 China 182
## 4 Russia 124
## 5 UK 289
## 6 France 19
## 7 Belgium 28
## 8 Japan 24
## 9 Spain 19
# Collapse factor levels
average_Collapse <- average_Recode %>%
mutate(Entity = fct_collapse(Entity,
"North America" = c( "USA"),
"Europe" = c("UK", "Belgium", "France", "Spain"),
"Asia" = c("China", "Japan"),
"Russia" = "Russia"))
# Display the modified data
print(average_Collapse)
## # A tibble: 9 × 2
## Entity avg_num_objects
## <fct> <dbl>
## 1 World 2664
## 2 North America 2166
## 3 Asia 182
## 4 Russia 124
## 5 Europe 289
## 6 Europe 19
## 7 Europe 28
## 8 Asia 24
## 9 Europe 19
# Load necessary packages
library(dplyr)
library(forcats)
# Lump the least common entities together, keeping the top 3 most frequent entities
data_lumped <- data_top10_launchers %>%
mutate(Entity = fct_lump(Entity, n = 3))
data_lumped %>% count(Entity) # Displays the modified data print(data_lumped)
## # A tibble: 9 × 2
## Entity n
## <fct> <int>
## 1 Belgium 1
## 2 China 1
## 3 France 1
## 4 Japan 2
## 5 Russia 1
## 6 Spain 1
## 7 United Kingdom 1
## 8 United States 1
## 9 World 1
# Plots lumped factor levels
data_lumped %>%
ggplot(aes(x = num_objects, y = Entity)) +
geom_point() +
labs(y = NULL, x = "Number of Objects")
No need to do anything here.