knitr::opts_chunk$set(echo = TRUE)

# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import your data

survivalists <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-24/survivalists.csv')
## Rows: 94 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, gender, city, state, country, reason_tapped_out, reason_cate...
## dbl  (5): season, age, result, days_lasted, day_linked_up
## lgl  (1): medically_evacuated
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

country_levels <- c(
    "United States", "Canada", "United Kingdom", "U.S. Virgin Islands"
)

x1 <- c("United States", "United Kingdom", "Canada", "U.S. Virgin Islands")

y1 <- factor(x1, levels = country_levels)

Modify factor order

Make two bar charts here - one before ordering another after

# Before ordering
# Transform data: Calculate average days lasted by country
avgdayslasted_bycountry <- survivalists %>%
    
    group_by(country) %>%
    summarise(
        avg_days_lasted = mean(days_lasted, na.rm = TRUE
        ))

avgdayslasted_bycountry
## # A tibble: 4 × 2
##   country             avg_days_lasted
##   <chr>                         <dbl>
## 1 Canada                         51.9
## 2 U.S. Virgin Islands            22  
## 3 United Kingdom                 69  
## 4 United States                  36.8
# Plot
avgdayslasted_bycountry %>%
    
    ggplot(aes(x = avg_days_lasted, y = y1)) +
    geom_point()

# After ordering
avgdayslasted_bycountry %>%
    
    ggplot(aes(x = avg_days_lasted, y = fct_reorder(.f = y1, .x = avg_days_lasted))) + 
    geom_point() +
    
    # Labeling
    labs(y = NULL, x = "Mean Days Lasted on Alone")

Modify factor levels

Show examples of three functions:

  • fct_recode #Using original factor
y2 <- fct_recode(y1, U.S. = "United States", U.S. = "U.S. Virgin Islands")

y2
## [1] U.S.           United Kingdom Canada         U.S.          
## Levels: U.S. Canada United Kingdom

#Using Code Along 10

survivalists %>% distinct(country)
## # A tibble: 4 × 1
##   country            
##   <chr>              
## 1 United States      
## 2 Canada             
## 3 United Kingdom     
## 4 U.S. Virgin Islands
survivalists %>%
    
    # Rename levels
    mutate(country_rev = fct_recode(.f = country, U.S. = "United States")) %>%
    select(country, country_rev)
## # A tibble: 94 × 2
##    country       country_rev
##    <chr>         <fct>      
##  1 United States U.S.       
##  2 United States U.S.       
##  3 United States U.S.       
##  4 United States U.S.       
##  5 United States U.S.       
##  6 United States U.S.       
##  7 Canada        Canada     
##  8 Canada        Canada     
##  9 United States U.S.       
## 10 United States U.S.       
## # ℹ 84 more rows
  • fct_collapse
survivalists %>% distinct(country)
## # A tibble: 4 × 1
##   country            
##   <chr>              
## 1 United States      
## 2 Canada             
## 3 United Kingdom     
## 4 U.S. Virgin Islands
# Collapse multiple levels into one
survivalists %>%
    
    mutate(country_col = fct_collapse(country, "U.S." = c("United States", "U.S. Virgin Islands"))) %>%
    select(country, country_col) %>%
    filter(country == "United States")
## # A tibble: 79 × 2
##    country       country_col
##    <chr>         <fct>      
##  1 United States U.S.       
##  2 United States U.S.       
##  3 United States U.S.       
##  4 United States U.S.       
##  5 United States U.S.       
##  6 United States U.S.       
##  7 United States U.S.       
##  8 United States U.S.       
##  9 United States U.S.       
## 10 United States U.S.       
## # ℹ 69 more rows

#Filtering only “U.S. Virgin Islands” with the same code

survivalists %>%
    
    mutate(country_col = fct_collapse(country, "U.S." = c("United States", "U.S. Virgin Islands"))) %>%
    select(country, country_col) %>%
    filter(country == "U.S. Virgin Islands")
## # A tibble: 1 × 2
##   country             country_col
##   <chr>               <fct>      
## 1 U.S. Virgin Islands U.S.
  • fct_lump
# Lump small levels into other levels
survivalists %>% mutate(country_lump = fct_lump(country)) %>% distinct(country_lump)
## # A tibble: 2 × 1
##   country_lump 
##   <fct>        
## 1 United States
## 2 Other

Chapter 16

No need to do anything here.