knitr::opts_chunk$set(echo = TRUE)
# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
alonedata <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-24/survivalists.csv')
## Rows: 94 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, gender, city, state, country, reason_tapped_out, reason_cate...
## dbl (5): season, age, result, days_lasted, day_linked_up
## lgl (1): medically_evacuated
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
alonedata
## # A tibble: 94 × 16
## season name age gender city state country result days_lasted
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 1 Alan Kay 40 Male Blairs… Geor… United… 1 56
## 2 1 Sam Larson 22 Male Lincoln Nebr… United… 2 55
## 3 1 Mitch Mitchell 34 Male Bellin… Mass… United… 3 43
## 4 1 Lucas Miller 32 Male Quasqu… Iowa United… 4 39
## 5 1 Dustin Feher 37 Male Pittsb… Penn… United… 5 8
## 6 1 Brant McGee 44 Male Albema… Nort… United… 6 6
## 7 1 Wayne Russell 46 Male Saint … New … Canada 7 4
## 8 1 Joe Robinet 24 Male Windsor Onta… Canada 8 4
## 9 1 Chris Weatherman 41 Male Umatil… Flor… United… 9 1
## 10 1 Josh Chavez 31 Male Jackson Ohio United… 10 0
## # ℹ 84 more rows
## # ℹ 7 more variables: medically_evacuated <lgl>, reason_tapped_out <chr>,
## # reason_category <chr>, team <chr>, day_linked_up <dbl>, profession <chr>,
## # url <chr>
set.seed(123)
alone_small <- alonedata %>%
sample_n(10) %>%
select(city, age, result)
alone_small
## # A tibble: 10 × 3
## city age result
## <chr> <dbl> <dbl>
## 1 Toronto 35 1
## 2 Wellsboro 43 5
## 3 Skowhegan 32 7
## 4 Portland 45 4
## 5 Espanola 33 3
## 6 Liberty 19 6
## 7 Fox 45 6
## 8 Fox Lake 23 7
## 9 Homer 30 7
## 10 Lopez Island 27 5
##Pivoting
alone_wide <- alone_small %>%
pivot_wider(names_from = result, values_from = age)
alone_wide
## # A tibble: 10 × 7
## city `1` `5` `7` `4` `3` `6`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Toronto 35 NA NA NA NA NA
## 2 Wellsboro NA 43 NA NA NA NA
## 3 Skowhegan NA NA 32 NA NA NA
## 4 Portland NA NA NA 45 NA NA
## 5 Espanola NA NA NA NA 33 NA
## 6 Liberty NA NA NA NA NA 19
## 7 Fox NA NA NA NA NA 45
## 8 Fox Lake NA NA 23 NA NA NA
## 9 Homer NA NA 30 NA NA NA
## 10 Lopez Island NA 27 NA NA NA NA
alone_wide %>%
pivot_longer(cols = '1':'7',
names_to = "result",
values_to = "age",
values_drop_na = TRUE)
## # A tibble: 6 × 6
## city `4` `3` `6` result age
## <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Toronto NA NA NA 1 35
## 2 Wellsboro NA NA NA 5 43
## 3 Skowhegan NA NA NA 7 32
## 4 Fox Lake NA NA NA 7 23
## 5 Homer NA NA NA 7 30
## 6 Lopez Island NA NA NA 5 27
alone_small %>%
separate(age, into = c("Decades Old", "Years into next Decade"), sep = 1)
## # A tibble: 10 × 4
## city `Decades Old` `Years into next Decade` result
## <chr> <chr> <chr> <dbl>
## 1 Toronto 3 5 1
## 2 Wellsboro 4 3 5
## 3 Skowhegan 3 2 7
## 4 Portland 4 5 4
## 5 Espanola 3 3 3
## 6 Liberty 1 9 6
## 7 Fox 4 5 6
## 8 Fox Lake 2 3 7
## 9 Homer 3 0 7
## 10 Lopez Island 2 7 5
alonedata %>%
unite(col = "age_gender", c(age, gender), sep = "-")
## # A tibble: 94 × 15
## season name age_gender city state country result days_lasted
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 1 Alan Kay 40-Male Blairsvi… Geor… United… 1 56
## 2 1 Sam Larson 22-Male Lincoln Nebr… United… 2 55
## 3 1 Mitch Mitchell 34-Male Bellingh… Mass… United… 3 43
## 4 1 Lucas Miller 32-Male Quasquet… Iowa United… 4 39
## 5 1 Dustin Feher 37-Male Pittsbur… Penn… United… 5 8
## 6 1 Brant McGee 44-Male Albemarle Nort… United… 6 6
## 7 1 Wayne Russell 46-Male Saint Jo… New … Canada 7 4
## 8 1 Joe Robinet 24-Male Windsor Onta… Canada 8 4
## 9 1 Chris Weatherman 41-Male Umatilla Flor… United… 9 1
## 10 1 Josh Chavez 31-Male Jackson Ohio United… 10 0
## # ℹ 84 more rows
## # ℹ 7 more variables: medically_evacuated <lgl>, reason_tapped_out <chr>,
## # reason_category <chr>, team <chr>, day_linked_up <dbl>, profession <chr>,
## # url <chr>
alone_small %>%
complete(age, result)
## # A tibble: 54 × 3
## age result city
## <dbl> <dbl> <chr>
## 1 19 1 <NA>
## 2 19 3 <NA>
## 3 19 4 <NA>
## 4 19 5 <NA>
## 5 19 6 Liberty
## 6 19 7 <NA>
## 7 23 1 <NA>
## 8 23 3 <NA>
## 9 23 4 <NA>
## 10 23 5 <NA>
## # ℹ 44 more rows