knitr::opts_chunk$set(echo = TRUE)

# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

Import your data

alonedata <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-01-24/survivalists.csv')
## Rows: 94 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, gender, city, state, country, reason_tapped_out, reason_cate...
## dbl  (5): season, age, result, days_lasted, day_linked_up
## lgl  (1): medically_evacuated
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
alonedata
## # A tibble: 94 × 16
##    season name               age gender city    state country result days_lasted
##     <dbl> <chr>            <dbl> <chr>  <chr>   <chr> <chr>    <dbl>       <dbl>
##  1      1 Alan Kay            40 Male   Blairs… Geor… United…      1          56
##  2      1 Sam Larson          22 Male   Lincoln Nebr… United…      2          55
##  3      1 Mitch Mitchell      34 Male   Bellin… Mass… United…      3          43
##  4      1 Lucas Miller        32 Male   Quasqu… Iowa  United…      4          39
##  5      1 Dustin Feher        37 Male   Pittsb… Penn… United…      5           8
##  6      1 Brant McGee         44 Male   Albema… Nort… United…      6           6
##  7      1 Wayne Russell       46 Male   Saint … New … Canada       7           4
##  8      1 Joe Robinet         24 Male   Windsor Onta… Canada       8           4
##  9      1 Chris Weatherman    41 Male   Umatil… Flor… United…      9           1
## 10      1 Josh Chavez         31 Male   Jackson Ohio  United…     10           0
## # ℹ 84 more rows
## # ℹ 7 more variables: medically_evacuated <lgl>, reason_tapped_out <chr>,
## #   reason_category <chr>, team <chr>, day_linked_up <dbl>, profession <chr>,
## #   url <chr>
set.seed(123)

alone_small <- alonedata %>%
    sample_n(10) %>%
    select(city, age, result)

alone_small
## # A tibble: 10 × 3
##    city           age result
##    <chr>        <dbl>  <dbl>
##  1 Toronto         35      1
##  2 Wellsboro       43      5
##  3 Skowhegan       32      7
##  4 Portland        45      4
##  5 Espanola        33      3
##  6 Liberty         19      6
##  7 Fox             45      6
##  8 Fox Lake        23      7
##  9 Homer           30      7
## 10 Lopez Island    27      5

##Pivoting

long to wide form

alone_wide <-  alone_small %>%
    
    pivot_wider(names_from = result, values_from = age)

alone_wide
## # A tibble: 10 × 7
##    city           `1`   `5`   `7`   `4`   `3`   `6`
##    <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Toronto         35    NA    NA    NA    NA    NA
##  2 Wellsboro       NA    43    NA    NA    NA    NA
##  3 Skowhegan       NA    NA    32    NA    NA    NA
##  4 Portland        NA    NA    NA    45    NA    NA
##  5 Espanola        NA    NA    NA    NA    33    NA
##  6 Liberty         NA    NA    NA    NA    NA    19
##  7 Fox             NA    NA    NA    NA    NA    45
##  8 Fox Lake        NA    NA    23    NA    NA    NA
##  9 Homer           NA    NA    30    NA    NA    NA
## 10 Lopez Island    NA    27    NA    NA    NA    NA

wide to long form

alone_wide %>%
    pivot_longer(cols = '1':'7',
                 names_to = "result",
                 values_to = "age",
                 values_drop_na = TRUE)
## # A tibble: 6 × 6
##   city           `4`   `3`   `6` result   age
##   <chr>        <dbl> <dbl> <dbl> <chr>  <dbl>
## 1 Toronto         NA    NA    NA 1         35
## 2 Wellsboro       NA    NA    NA 5         43
## 3 Skowhegan       NA    NA    NA 7         32
## 4 Fox Lake        NA    NA    NA 7         23
## 5 Homer           NA    NA    NA 7         30
## 6 Lopez Island    NA    NA    NA 5         27

Separating and Uniting

Separate a column

alone_small %>%
    separate(age, into = c("Decades Old", "Years into next Decade"), sep = 1)
## # A tibble: 10 × 4
##    city         `Decades Old` `Years into next Decade` result
##    <chr>        <chr>         <chr>                     <dbl>
##  1 Toronto      3             5                             1
##  2 Wellsboro    4             3                             5
##  3 Skowhegan    3             2                             7
##  4 Portland     4             5                             4
##  5 Espanola     3             3                             3
##  6 Liberty      1             9                             6
##  7 Fox          4             5                             6
##  8 Fox Lake     2             3                             7
##  9 Homer        3             0                             7
## 10 Lopez Island 2             7                             5

Unite two columns

alonedata %>%
    
    unite(col = "age_gender", c(age, gender), sep = "-")
## # A tibble: 94 × 15
##    season name             age_gender city      state country result days_lasted
##     <dbl> <chr>            <chr>      <chr>     <chr> <chr>    <dbl>       <dbl>
##  1      1 Alan Kay         40-Male    Blairsvi… Geor… United…      1          56
##  2      1 Sam Larson       22-Male    Lincoln   Nebr… United…      2          55
##  3      1 Mitch Mitchell   34-Male    Bellingh… Mass… United…      3          43
##  4      1 Lucas Miller     32-Male    Quasquet… Iowa  United…      4          39
##  5      1 Dustin Feher     37-Male    Pittsbur… Penn… United…      5           8
##  6      1 Brant McGee      44-Male    Albemarle Nort… United…      6           6
##  7      1 Wayne Russell    46-Male    Saint Jo… New … Canada       7           4
##  8      1 Joe Robinet      24-Male    Windsor   Onta… Canada       8           4
##  9      1 Chris Weatherman 41-Male    Umatilla  Flor… United…      9           1
## 10      1 Josh Chavez      31-Male    Jackson   Ohio  United…     10           0
## # ℹ 84 more rows
## # ℹ 7 more variables: medically_evacuated <lgl>, reason_tapped_out <chr>,
## #   reason_category <chr>, team <chr>, day_linked_up <dbl>, profession <chr>,
## #   url <chr>

Missing Values

alone_small %>%
    
    complete(age, result)
## # A tibble: 54 × 3
##      age result city   
##    <dbl>  <dbl> <chr>  
##  1    19      1 <NA>   
##  2    19      3 <NA>   
##  3    19      4 <NA>   
##  4    19      5 <NA>   
##  5    19      6 Liberty
##  6    19      7 <NA>   
##  7    23      1 <NA>   
##  8    23      3 <NA>   
##  9    23      4 <NA>   
## 10    23      5 <NA>   
## # ℹ 44 more rows