Week 8: Code Along 7

Tidy data

Pivoting

long to wide form

table4a_long<- table4a %>%
    
    pivot_longer(cols = c('1999', '2000'), names_to = "year", values_to = "cases")

Wide to long form

table4a_long %>%
    
    pivot_wider(names_from = year,
                values_from = cases)

## # A tibble: 3 × 3
##   country     `1999` `2000`
##   <chr>        <dbl>  <dbl>
## 1 Afghanistan    745   2666
## 2 Brazil       37737  80488
## 3 China       212258 213766

Seperting and Uniting

Seperating a column

table3_sep<-table3 %>%
    
    separate(col = rate, into = c("cases", "population"))

Unite two column

table3_sep %>%
    
        unite(col= "rate", c(cases:population), sep= "/" )

## # A tibble: 6 × 3
##   country      year rate             
##   <chr>       <dbl> <chr>            
## 1 Afghanistan  1999 745/19987071     
## 2 Afghanistan  2000 2666/20595360    
## 3 Brazil       1999 37737/172006362  
## 4 Brazil       2000 80488/174504898  
## 5 China        1999 212258/1272915272
## 6 China        2000 213766/1280428583

Missing Values

stocks <- tibble(
  year   = c(2015, 2015, 2015, 2015, 2016, 2016, 2016),
  qtr    = c(   1,    2,    3,    4,    2,    3,    4),
  return = c(1.88, 0.59, 0.35,   NA, 0.92, 0.17, 2.66)
)

stocks %>%
    pivot_wider(names_from = year, values_from = return)

## # A tibble: 4 × 3
##     qtr `2015` `2016`
##   <dbl>  <dbl>  <dbl>
## 1     1   1.88  NA   
## 2     2   0.59   0.92
## 3     3   0.35   0.17
## 4     4  NA      2.66

bikes <- tibble(
  bike_model   = c("A","A","B","B","C"),
  material = c("steel","aluminium","steel","aluminium","steel"),
  price = c(100, 200, 300, 400, 500)
)

bikes %>%
    pivot_wider(names_from = bike_model, values_from = price)

## # A tibble: 2 × 4
##   material      A     B     C
##   <chr>     <dbl> <dbl> <dbl>
## 1 steel       100   300   500
## 2 aluminium   200   400    NA

bikes %>%
    
    complete(bike_model, material)

## # A tibble: 6 × 3
##   bike_model material  price
##   <chr>      <chr>     <dbl>
## 1 A          aluminium   200
## 2 A          steel       100
## 3 B          aluminium   400
## 4 B          steel       300
## 5 C          aluminium    NA
## 6 C          steel       500

treatment <- tribble(
  ~ person,           ~ treatment, ~response,
  "Derrick Whitmore", 1,           7,
  NA,                 2,           10,
  NA,                 3,           9,
  "Katherine Burke",  1,           4
)

treatment %>%
    fill(person, .direction = "down")

## # A tibble: 4 × 3
##   person           treatment response
##   <chr>                <dbl>    <dbl>
## 1 Derrick Whitmore         1        7
## 2 Derrick Whitmore         2       10
## 3 Derrick Whitmore         3        9
## 4 Katherine Burke          1        4

Non-Tidy Data

readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-01-25/details.csv")

## Rows: 21631 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): primary, description, boardgamecategory, boardgamemechanic, boardg...
## dbl (13): num, id, yearpublished, minplayers, maxplayers, playingtime, minpl...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

## # A tibble: 21,631 × 23
##      num     id primary          description yearpublished minplayers maxplayers
##    <dbl>  <dbl> <chr>            <chr>               <dbl>      <dbl>      <dbl>
##  1     0  30549 Pandemic         In Pandemi…          2008          2          4
##  2     1    822 Carcassonne      Carcassonn…          2000          2          5
##  3     2     13 Catan            In CATAN (…          1995          3          4
##  4     3  68448 7 Wonders        You are th…          2010          2          7
##  5     4  36218 Dominion         &quot;You …          2008          2          4
##  6     5   9209 Ticket to Ride   With elega…          2004          2          5
##  7     6 178900 Codenames        Codenames …          2015          2          8
##  8     7 167791 Terraforming Ma… In the 240…          2016          1          5
##  9     8 173346 7 Wonders Duel   In many wa…          2015          2          2
## 10     9  31260 Agricola         Descriptio…          2007          1          5
## # ℹ 21,621 more rows
## # ℹ 16 more variables: playingtime <dbl>, minplaytime <dbl>, maxplaytime <dbl>,
## #   minage <dbl>, boardgamecategory <chr>, boardgamemechanic <chr>,
## #   boardgamefamily <chr>, boardgameexpansion <chr>,
## #   boardgameimplementation <chr>, boardgamedesigner <chr>,
## #   boardgameartist <chr>, boardgamepublisher <chr>, owned <dbl>,
## #   trading <dbl>, wanting <dbl>, wishing <dbl>

data <-readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-01-25/details.csv")

## Rows: 21631 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): primary, description, boardgamecategory, boardgamemechanic, boardg...
## dbl (13): num, id, yearpublished, minplayers, maxplayers, playingtime, minpl...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Week 8: Code Along 7

Olivia Pendergast

2023-10-17

Tidy data

Pivoting

long to wide form

Wide to long form

Seperting and Uniting

Seperating a column

Unite two column

Missing Values

Non-Tidy Data