survivalists <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-24/survivalists.csv')
## Rows: 94 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, gender, city, state, country, reason_tapped_out, reason_cate...
## dbl (5): season, age, result, days_lasted, day_linked_up
## lgl (1): medically_evacuated
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
survivalists %>%
ggplot(aes(x = medically_evacuated)) +
geom_bar()
survivalists %>%
ggplot(mapping = aes(x = days_lasted)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
survivalists %>%
filter(gender == "Male") %>%
ggplot(aes(x = result)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
survivalists %>%
filter(gender == "Female") %>%
ggplot(aes(x = result)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
survivalists %>%
ggplot(aes(x = days_lasted, color = gender)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
survivalists %>%
# Filter out less than 50 days lasted
filter(result == 1) %>%
# Plot
ggplot(aes(x = days_lasted)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
survivalists %>%
ggplot(aes(days_lasted)) +
geom_histogram(binwidth = 5)
survivalists%>%
ggplot(aes(age)) +
geom_histogram(binwidth = 10)
survivalists%>%
ggplot(aes(age)) +
geom_histogram() +
coord_cartesian(ylim = c(0, 15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Missing Values
survivalists %>%
# filter(result > 1 | result < 5) %>%
mutate(y = ifelse(result <= 5 | result >= 1, NA, result)) %>%
#Plot
ggplot(aes(x = days_lasted, result)) +
geom_point()
survivalists %>%
ggplot(aes(x = medically_evacuated, y = days_lasted)) +
geom_boxplot()
survivalists %>%
count(medically_evacuated, result) %>%
ggplot(aes(x = medically_evacuated, y = result, fill = n)) +
geom_tile()
library(hexbin)
survivalists %>%
ggplot(aes(x = result, y = days_lasted)) +
geom_hex()
survivalists %>%
filter(result <= 3) %>%
ggplot(aes(x = result, y = days_lasted)) +
geom_boxplot(aes(group = cut_width(result, 1)))
library(modelr)
mod <- lm(result ~ days_lasted, data = survivalists)
survivalists2 <- survivalists %>%
modelr::add_residuals(mod) %>%
mutate(resid = exp(resid))
survivalists2 %>%
ggplot(aes(result, resid)) +
geom_point()
survivalists2 %>%
ggplot(aes(gender, resid)) +
geom_boxplot()