Get data

survivalists <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-24/survivalists.csv')
## Rows: 94 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, gender, city, state, country, reason_tapped_out, reason_cate...
## dbl  (5): season, age, result, days_lasted, day_linked_up
## lgl  (1): medically_evacuated
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Introduction

Question2

Variation

Visualizing Distributions

survivalists %>%
    ggplot(aes(x = medically_evacuated)) +
    geom_bar()

survivalists %>%
    ggplot(mapping = aes(x = days_lasted)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

survivalists %>%
    filter(gender == "Male") %>%
    
    ggplot(aes(x = result)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

survivalists %>%
    filter(gender == "Female") %>%
    
    ggplot(aes(x = result)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

survivalists %>%
    ggplot(aes(x = days_lasted, color = gender)) +
    geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical Values

survivalists %>%
    
    # Filter out less than 50 days lasted
    filter(result == 1) %>%
    
    # Plot
    ggplot(aes(x = days_lasted)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

survivalists %>%
    ggplot(aes(days_lasted)) +
    geom_histogram(binwidth = 5)

Unusual Values

survivalists%>%
    ggplot(aes(age)) +
    geom_histogram(binwidth = 10)

survivalists%>%
    ggplot(aes(age)) +
    geom_histogram() +
    coord_cartesian(ylim = c(0, 15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Missing Values

survivalists %>%
    
    # filter(result > 1 | result < 5) %>%
    
    mutate(y = ifelse(result <= 5 | result >= 1, NA, result)) %>%
    
    #Plot
    ggplot(aes(x = days_lasted, result)) +
    geom_point()

Covariation

A categorical and continous variable

survivalists %>%
    
    ggplot(aes(x = medically_evacuated, y = days_lasted)) +
    geom_boxplot()

Two categorical variables

survivalists %>%
    
    count(medically_evacuated, result) %>%
    
    ggplot(aes(x = medically_evacuated, y = result, fill = n)) +
    geom_tile()

Two continuous variables

library(hexbin)
survivalists %>%
    ggplot(aes(x = result, y = days_lasted)) +
    geom_hex()

survivalists %>%
    filter(result <= 3) %>%
    ggplot(aes(x = result, y = days_lasted)) +
    geom_boxplot(aes(group = cut_width(result, 1)))

Patterns and Models

library(modelr)
mod <- lm(result ~ days_lasted, data = survivalists)

survivalists2 <- survivalists %>%
    modelr::add_residuals(mod) %>%
    mutate(resid = exp(resid))

survivalists2 %>%
    ggplot(aes(result, resid)) +
    geom_point()

survivalists2 %>%
    ggplot(aes(gender, resid)) +
    geom_boxplot()