knitr::opts_chunk$set(echo = TRUE)
# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
survivalists <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-24/survivalists.csv')
## Rows: 94 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, gender, city, state, country, reason_tapped_out, reason_cate...
## dbl (5): season, age, result, days_lasted, day_linked_up
## lgl (1): medically_evacuated
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
country_levels <- c(
"United States", "Canada", "United Kingdom", "U.S. Virgin Islands"
)
x1 <- c("United States", "United Kingdom", "Canada", "U.S. Virgin Islands")
y1 <- factor(x1, levels = country_levels)
Make two bar charts here - one before ordering another after
# Before ordering
# Transform data: Calculate average days lasted by country
avgdayslasted_bycountry <- survivalists %>%
group_by(country) %>%
summarise(
avg_days_lasted = mean(days_lasted, na.rm = TRUE
))
avgdayslasted_bycountry
## # A tibble: 4 × 2
## country avg_days_lasted
## <chr> <dbl>
## 1 Canada 51.9
## 2 U.S. Virgin Islands 22
## 3 United Kingdom 69
## 4 United States 36.8
# Plot
avgdayslasted_bycountry %>%
ggplot(aes(x = avg_days_lasted, y = y1)) +
geom_point()
# After ordering
avgdayslasted_bycountry %>%
ggplot(aes(x = avg_days_lasted, y = fct_reorder(.f = y1, .x = avg_days_lasted))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean Days Lasted on Alone")
Show examples of three functions:
y2 <- fct_recode(y1, U.S. = "United States", U.S. = "U.S. Virgin Islands")
y2
## [1] U.S. United Kingdom Canada U.S.
## Levels: U.S. Canada United Kingdom
#Using Code Along 10
survivalists %>% distinct(country)
## # A tibble: 4 × 1
## country
## <chr>
## 1 United States
## 2 Canada
## 3 United Kingdom
## 4 U.S. Virgin Islands
survivalists %>%
# Rename levels
mutate(country_rev = fct_recode(.f = country, U.S. = "United States")) %>%
select(country, country_rev)
## # A tibble: 94 × 2
## country country_rev
## <chr> <fct>
## 1 United States U.S.
## 2 United States U.S.
## 3 United States U.S.
## 4 United States U.S.
## 5 United States U.S.
## 6 United States U.S.
## 7 Canada Canada
## 8 Canada Canada
## 9 United States U.S.
## 10 United States U.S.
## # ℹ 84 more rows
survivalists %>% distinct(country)
## # A tibble: 4 × 1
## country
## <chr>
## 1 United States
## 2 Canada
## 3 United Kingdom
## 4 U.S. Virgin Islands
# Collapse multiple levels into one
survivalists %>%
mutate(country_col = fct_collapse(country, "U.S." = c("United States", "U.S. Virgin Islands"))) %>%
select(country, country_col) %>%
filter(country == "United States")
## # A tibble: 79 × 2
## country country_col
## <chr> <fct>
## 1 United States U.S.
## 2 United States U.S.
## 3 United States U.S.
## 4 United States U.S.
## 5 United States U.S.
## 6 United States U.S.
## 7 United States U.S.
## 8 United States U.S.
## 9 United States U.S.
## 10 United States U.S.
## # ℹ 69 more rows
#Filtering only “U.S. Virgin Islands” with the same code
survivalists %>%
mutate(country_col = fct_collapse(country, "U.S." = c("United States", "U.S. Virgin Islands"))) %>%
select(country, country_col) %>%
filter(country == "U.S. Virgin Islands")
## # A tibble: 1 × 2
## country country_col
## <chr> <fct>
## 1 U.S. Virgin Islands U.S.
# Lump small levels into other levels
survivalists %>% mutate(country_lump = fct_lump(country)) %>% distinct(country_lump)
## # A tibble: 2 × 1
## country_lump
## <fct>
## 1 United States
## 2 Other
No need to do anything here.