Ch 10: Introduction

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Creating tibbles

as_tibble(iris)

## # A tibble: 150 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ℹ 140 more rows

#> # A tibble: 150 × 5
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa 
#> 4          4.6         3.1          1.5         0.2 setosa 
#> 5          5           3.6          1.4         0.2 setosa 
#> 6          5.4         3.9          1.7         0.4 setosa 
#> # ℹ 144 more rows

tibble(
  x = 1:5, 
  y = 1, 
  z = x ^ 2 + y
)

## # A tibble: 5 × 3
##       x     y     z
##   <int> <dbl> <dbl>
## 1     1     1     2
## 2     2     1     5
## 3     3     1    10
## 4     4     1    17
## 5     5     1    26

#> # A tibble: 5 × 3
#>       x     y     z
#>   <int> <dbl> <dbl>
#> 1     1     1     2
#> 2     2     1     5
#> 3     3     1    10
#> 4     4     1    17
#> 5     5     1    26

Tibbles vs data.frame

tibble(
  a = lubridate::now() + runif(1e3) * 86400,
  b = lubridate::today() + runif(1e3) * 30,
  c = 1:1e3,
  d = runif(1e3),
  e = sample(letters, 1e3, replace = TRUE)
)

## # A tibble: 1,000 × 5
##    a                   b              c      d e    
##    <dttm>              <date>     <int>  <dbl> <chr>
##  1 2025-03-12 22:04:38 2025-04-03     1 0.189  a    
##  2 2025-03-13 09:06:17 2025-04-10     2 0.400  s    
##  3 2025-03-12 22:50:48 2025-04-01     3 0.0174 m    
##  4 2025-03-13 10:06:48 2025-03-30     4 0.382  f    
##  5 2025-03-12 13:14:43 2025-03-13     5 0.181  y    
##  6 2025-03-12 23:40:13 2025-03-16     6 0.725  c    
##  7 2025-03-12 19:03:22 2025-04-09     7 0.0930 n    
##  8 2025-03-13 10:52:38 2025-03-23     8 0.857  x    
##  9 2025-03-13 11:28:02 2025-03-19     9 0.707  m    
## 10 2025-03-12 13:39:25 2025-03-16    10 0.261  x    
## # ℹ 990 more rows

#> # A tibble: 1,000 × 5
#>   a                   b              c     d e    
#>   <dttm>              <date>     <int> <dbl> <chr>
#> 1 2025-02-18 19:27:10 2025-02-25     1 0.368 n    
#> 2 2025-02-19 13:32:20 2025-03-02     2 0.612 l    
#> 3 2025-02-19 07:55:59 2025-03-12     3 0.415 p    
#> 4 2025-02-18 21:17:16 2025-03-11     4 0.212 m    
#> 5 2025-02-18 17:41:33 2025-03-08     5 0.733 i    
#> 6 2025-02-19 04:42:30 2025-03-04     6 0.460 n    
#> # ℹ 994 more rows

nycflights13::flights %>% 
  print(n = 10, width = Inf)

## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
##    arr_delay carrier flight tailnum origin dest  air_time distance  hour minute
##        <dbl> <chr>    <int> <chr>   <chr>  <chr>    <dbl>    <dbl> <dbl>  <dbl>
##  1        11 UA        1545 N14228  EWR    IAH        227     1400     5     15
##  2        20 UA        1714 N24211  LGA    IAH        227     1416     5     29
##  3        33 AA        1141 N619AA  JFK    MIA        160     1089     5     40
##  4       -18 B6         725 N804JB  JFK    BQN        183     1576     5     45
##  5       -25 DL         461 N668DN  LGA    ATL        116      762     6      0
##  6        12 UA        1696 N39463  EWR    ORD        150      719     5     58
##  7        19 B6         507 N516JB  EWR    FLL        158     1065     6      0
##  8       -14 EV        5708 N829AS  LGA    IAD         53      229     6      0
##  9        -8 B6          79 N593JB  JFK    MCO        140      944     6      0
## 10         8 AA         301 N3ALAA  LGA    ORD        138      733     6      0
##    time_hour          
##    <dttm>             
##  1 2013-01-01 05:00:00
##  2 2013-01-01 05:00:00
##  3 2013-01-01 05:00:00
##  4 2013-01-01 05:00:00
##  5 2013-01-01 06:00:00
##  6 2013-01-01 05:00:00
##  7 2013-01-01 06:00:00
##  8 2013-01-01 06:00:00
##  9 2013-01-01 06:00:00
## 10 2013-01-01 06:00:00
## # ℹ 336,766 more rows

df <- tibble(
  x = runif(5),
  y = rnorm(5)
)

# Extract by name
df$x

## [1] 0.23371203 0.06299367 0.02766269 0.44889860 0.01043344

#> [1] 0.73296674 0.23436542 0.66035540 0.03285612 0.46049161
df[["x"]]

## [1] 0.23371203 0.06299367 0.02766269 0.44889860 0.01043344

#> [1] 0.73296674 0.23436542 0.66035540 0.03285612 0.46049161

# Extract by position
df[[1]]

## [1] 0.23371203 0.06299367 0.02766269 0.44889860 0.01043344

#> [1] 0.73296674 0.23436542 0.66035540 0.03285612 0.46049161

df %>% .$x

## [1] 0.23371203 0.06299367 0.02766269 0.44889860 0.01043344

#> [1] 0.73296674 0.23436542 0.66035540 0.03285612 0.46049161
df %>% .[["x"]]

## [1] 0.23371203 0.06299367 0.02766269 0.44889860 0.01043344

#> [1] 0.73296674 0.23436542 0.66035540 0.03285612 0.46049161

Interacting with older code

Ch. 11

Getting Started

Parsing a Vector

str(parse_logical(c("TRUE", "FALSE", "NA")))

##  logi [1:3] TRUE FALSE NA

#>  logi [1:3] TRUE FALSE NA
str(parse_integer(c("1", "2", "3")))

##  int [1:3] 1 2 3

#>  int [1:3] 1 2 3
str(parse_date(c("2010-01-01", "1979-10-14")))

##  Date[1:2], format: "2010-01-01" "1979-10-14"

#>  Date[1:2], format: "2010-01-01" "1979-10-14"

parse_integer(c("1", "231", ".", "456"), na = ".")

## [1]   1 231  NA 456

#> [1]   1 231  NA 456

Numbers

parse_double("1.23")

## [1] 1.23

#> [1] 1.23
parse_double("1,23", locale = locale(decimal_mark = ","))

## [1] 1.23

#> [1] 1.23

parse_number("$100")

## [1] 100

#> [1] 100
parse_number("20%")

## [1] 20

#> [1] 20
parse_number("It cost $123.45")

## [1] 123.45

#> [1] 123.45

Strings

charToRaw("Hadley")

## [1] 48 61 64 6c 65 79

#> [1] 48 61 64 6c 65 79

x1 <- "El Ni\xf1o was particularly bad this year"
x2 <- "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd"

x1

## [1] "El Ni\xf1o was particularly bad this year"

#> [1] "El Ni\xf1o was particularly bad this year"
x2

## [1] "\x82\xb1\x82\xf1\x82ɂ\xbf\x82\xcd"

#> [1] "\x82\xb1\x82\xf1\x82ɂ\xbf\x82\xcd"

parse_character(x1, locale = locale(encoding = "Latin1"))

## [1] "El Niño was particularly bad this year"

#> [1] "El Niño was particularly bad this year"
parse_character(x2, locale = locale(encoding = "Shift-JIS"))

## [1] "こんにちは"

#> [1] "こんにちは"

Factors

fruit <- c("apple", "banana")
parse_factor(c("apple", "banana", "bananana"), levels = fruit)

## Warning: 1 parsing failure.
## row col           expected   actual
##   3  -- value in level set bananana

## [1] apple  banana <NA>  
## attr(,"problems")

## Warning: `...` must be empty in `format.tbl()`
## Caused by error in `format_tbl()`:
## ! `...` must be empty.
## ✖ Problematic argument:
## • quote = FALSE

## # A tibble: 1 × 4
##     row   col expected           actual  
##   <int> <int> <chr>              <chr>   
## 1     3    NA value in level set bananana
## Levels: apple banana

#> Warning: 1 parsing failure.
#> row col           expected   actual
#>   3  -- value in level set bananana
#> [1] apple  banana <NA>  
#> attr(,"problems")
#> Warning: `...` must be empty in `format.tbl()`
#> Caused by error in `format_tbl()`:
#> ! `...` must be empty.
#> ✖ Problematic argument:
#> • quote = FALSE
#> # A tibble: 1 × 4
#>     row   col expected           actual  
#>   <int> <int> <chr>              <chr>   
#> 1     3    NA value in level set bananana
#> Levels: apple banana

Dates, date-times, and times

parse_datetime("2010-10-01T2010")

## [1] "2010-10-01 20:10:00 UTC"

#> [1] "2010-10-01 20:10:00 UTC"
# If time is omitted, it will be set to midnight
parse_datetime("20101010")

## [1] "2010-10-10 UTC"

#> [1] "2010-10-10 UTC"

library(hms)

## 
## Attaching package: 'hms'

## The following object is masked from 'package:lubridate':
## 
##     hms

#> 
#> Attaching package: 'hms'
#> The following object is masked from 'package:lubridate':
#> 
#>     hms
parse_time("01:10 am")

## 01:10:00

#> 01:10:00
parse_time("20:10:01")

## 20:10:01

#> 20:10:01

parse_date("01/02/15", "%m/%d/%y")

## [1] "2015-01-02"

#> [1] "2015-01-02"
parse_date("01/02/15", "%d/%m/%y")

## [1] "2015-02-01"

#> [1] "2015-02-01"
parse_date("01/02/15", "%y/%m/%d")

## [1] "2001-02-15"

#> [1] "2001-02-15"

Parsing a File

guess_parser("2010-10-01")

## [1] "date"

#> [1] "date"
guess_parser("15:01")

## [1] "time"

#> [1] "time"
guess_parser(c("TRUE", "FALSE"))

## [1] "logical"

#> [1] "logical"
guess_parser(c("1", "5", "9"))

## [1] "double"

#> [1] "double"
guess_parser(c("12,352,561"))

## [1] "number"

#> [1] "number"

str(parse_guess("2010-10-10"))

##  Date[1:1], format: "2010-10-10"

#>  Date[1:1], format: "2010-10-10"

challenge2 <- read_csv(readr_example("challenge.csv"), guess_max = 1001)

## Rows: 2000 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (1): x
## date (1): y
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#> Rows: 2000 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl  (1): x
#> date (1): y
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
challenge2

## # A tibble: 2,000 × 2
##        x y     
##    <dbl> <date>
##  1   404 NA    
##  2  4172 NA    
##  3  3004 NA    
##  4   787 NA    
##  5    37 NA    
##  6  2332 NA    
##  7  2489 NA    
##  8  1449 NA    
##  9  3665 NA    
## 10  3863 NA    
## # ℹ 1,990 more rows

#> # A tibble: 2,000 × 2
#>       x y     
#>   <dbl> <date>
#> 1   404 NA    
#> 2  4172 NA    
#> 3  3004 NA    
#> 4   787 NA    
#> 5    37 NA    
#> 6  2332 NA    
#> # ℹ 1,994 more rows

challenge2 <- read_csv(readr_example("challenge.csv"), 
  col_types = cols(.default = col_character())
)

df <- tribble(
  ~x,  ~y,
  "1", "1.21",
  "2", "2.32",
  "3", "4.56"
)
df

## # A tibble: 3 × 2
##   x     y    
##   <chr> <chr>
## 1 1     1.21 
## 2 2     2.32 
## 3 3     4.56

#> # A tibble: 3 × 2
#>   x     y    
#>   <chr> <chr>
#> 1 1     1.21 
#> 2 2     2.32 
#> 3 3     4.56

# Note the column types
type_convert(df)

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   x = col_double(),
##   y = col_double()
## )

## # A tibble: 3 × 2
##       x     y
##   <dbl> <dbl>
## 1     1  1.21
## 2     2  2.32
## 3     3  4.56

#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   x = col_double(),
#>   y = col_double()
#> )
#> # A tibble: 3 × 2
#>       x     y
#>   <dbl> <dbl>
#> 1     1  1.21
#> 2     2  2.32
#> 3     3  4.56

Ch 12

Tidy Data

Pivoting

Longer

table4a

## # A tibble: 3 × 3
##   country     `1999` `2000`
##   <chr>        <dbl>  <dbl>
## 1 Afghanistan    745   2666
## 2 Brazil       37737  80488
## 3 China       212258 213766

#> # A tibble: 3 × 3
#>   country     `1999` `2000`
#>   <chr>        <dbl>  <dbl>
#> 1 Afghanistan    745   2666
#> 2 Brazil       37737  80488
#> 3 China       212258 213766

table4a %>% 
  pivot_longer(c(`1999`, `2000`), names_to = "year", values_to = "cases")

## # A tibble: 6 × 3
##   country     year   cases
##   <chr>       <chr>  <dbl>
## 1 Afghanistan 1999     745
## 2 Afghanistan 2000    2666
## 3 Brazil      1999   37737
## 4 Brazil      2000   80488
## 5 China       1999  212258
## 6 China       2000  213766

#> # A tibble: 6 × 3
#>   country     year   cases
#>   <chr>       <chr>  <dbl>
#> 1 Afghanistan 1999     745
#> 2 Afghanistan 2000    2666
#> 3 Brazil      1999   37737
#> 4 Brazil      2000   80488
#> 5 China       1999  212258
#> 6 China       2000  213766

table4b %>% 
  pivot_longer(c(`1999`, `2000`), names_to = "year", values_to = "population")

## # A tibble: 6 × 3
##   country     year  population
##   <chr>       <chr>      <dbl>
## 1 Afghanistan 1999    19987071
## 2 Afghanistan 2000    20595360
## 3 Brazil      1999   172006362
## 4 Brazil      2000   174504898
## 5 China       1999  1272915272
## 6 China       2000  1280428583

#> # A tibble: 6 × 3
#>   country     year  population
#>   <chr>       <chr>      <dbl>
#> 1 Afghanistan 1999    19987071
#> 2 Afghanistan 2000    20595360
#> 3 Brazil      1999   172006362
#> 4 Brazil      2000   174504898
#> 5 China       1999  1272915272
#> 6 China       2000  1280428583

tidy4a <- table4a %>% 
  pivot_longer(c(`1999`, `2000`), names_to = "year", values_to = "cases")
tidy4b <- table4b %>% 
  pivot_longer(c(`1999`, `2000`), names_to = "year", values_to = "population")
left_join(tidy4a, tidy4b)

## Joining with `by = join_by(country, year)`

## # A tibble: 6 × 4
##   country     year   cases population
##   <chr>       <chr>  <dbl>      <dbl>
## 1 Afghanistan 1999     745   19987071
## 2 Afghanistan 2000    2666   20595360
## 3 Brazil      1999   37737  172006362
## 4 Brazil      2000   80488  174504898
## 5 China       1999  212258 1272915272
## 6 China       2000  213766 1280428583

#> Joining with `by = join_by(country, year)`
#> # A tibble: 6 × 4
#>   country     year   cases population
#>   <chr>       <chr>  <dbl>      <dbl>
#> 1 Afghanistan 1999     745   19987071
#> 2 Afghanistan 2000    2666   20595360
#> 3 Brazil      1999   37737  172006362
#> 4 Brazil      2000   80488  174504898
#> 5 China       1999  212258 1272915272
#> 6 China       2000  213766 1280428583

Wider

table2

## # A tibble: 12 × 4
##    country      year type            count
##    <chr>       <dbl> <chr>           <dbl>
##  1 Afghanistan  1999 cases             745
##  2 Afghanistan  1999 population   19987071
##  3 Afghanistan  2000 cases            2666
##  4 Afghanistan  2000 population   20595360
##  5 Brazil       1999 cases           37737
##  6 Brazil       1999 population  172006362
##  7 Brazil       2000 cases           80488
##  8 Brazil       2000 population  174504898
##  9 China        1999 cases          212258
## 10 China        1999 population 1272915272
## 11 China        2000 cases          213766
## 12 China        2000 population 1280428583

#> # A tibble: 12 × 4
#>   country      year type           count
#>   <chr>       <dbl> <chr>          <dbl>
#> 1 Afghanistan  1999 cases            745
#> 2 Afghanistan  1999 population  19987071
#> 3 Afghanistan  2000 cases           2666
#> 4 Afghanistan  2000 population  20595360
#> 5 Brazil       1999 cases          37737
#> 6 Brazil       1999 population 172006362
#> # ℹ 6 more rows

table2 %>%
    pivot_wider(names_from = type, values_from = count)

## # A tibble: 6 × 4
##   country      year  cases population
##   <chr>       <dbl>  <dbl>      <dbl>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3 Brazil       1999  37737  172006362
## 4 Brazil       2000  80488  174504898
## 5 China        1999 212258 1272915272
## 6 China        2000 213766 1280428583

#> # A tibble: 6 × 4
#>   country      year  cases population
#>   <chr>       <dbl>  <dbl>      <dbl>
#> 1 Afghanistan  1999    745   19987071
#> 2 Afghanistan  2000   2666   20595360
#> 3 Brazil       1999  37737  172006362
#> 4 Brazil       2000  80488  174504898
#> 5 China        1999 212258 1272915272
#> 6 China        2000 213766 1280428583

Separating and uniting

table3

## # A tibble: 6 × 3
##   country      year rate             
##   <chr>       <dbl> <chr>            
## 1 Afghanistan  1999 745/19987071     
## 2 Afghanistan  2000 2666/20595360    
## 3 Brazil       1999 37737/172006362  
## 4 Brazil       2000 80488/174504898  
## 5 China        1999 212258/1272915272
## 6 China        2000 213766/1280428583

#> # A tibble: 6 × 3
#>   country      year rate             
#>   <chr>       <dbl> <chr>            
#> 1 Afghanistan  1999 745/19987071     
#> 2 Afghanistan  2000 2666/20595360    
#> 3 Brazil       1999 37737/172006362  
#> 4 Brazil       2000 80488/174504898  
#> 5 China        1999 212258/1272915272
#> 6 China        2000 213766/1280428583

table3 %>% 
  separate(rate, into = c("cases", "population"))

## # A tibble: 6 × 4
##   country      year cases  population
##   <chr>       <dbl> <chr>  <chr>     
## 1 Afghanistan  1999 745    19987071  
## 2 Afghanistan  2000 2666   20595360  
## 3 Brazil       1999 37737  172006362 
## 4 Brazil       2000 80488  174504898 
## 5 China        1999 212258 1272915272
## 6 China        2000 213766 1280428583

#> # A tibble: 6 × 4
#>   country      year cases  population
#>   <chr>       <dbl> <chr>  <chr>     
#> 1 Afghanistan  1999 745    19987071  
#> 2 Afghanistan  2000 2666   20595360  
#> 3 Brazil       1999 37737  172006362 
#> 4 Brazil       2000 80488  174504898 
#> 5 China        1999 212258 1272915272
#> 6 China        2000 213766 1280428583

table3 %>% 
  separate(rate, into = c("cases", "population"), convert = TRUE)

## # A tibble: 6 × 4
##   country      year  cases population
##   <chr>       <dbl>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3 Brazil       1999  37737  172006362
## 4 Brazil       2000  80488  174504898
## 5 China        1999 212258 1272915272
## 6 China        2000 213766 1280428583

#> # A tibble: 6 × 4
#>   country      year  cases population
#>   <chr>       <dbl>  <int>      <int>
#> 1 Afghanistan  1999    745   19987071
#> 2 Afghanistan  2000   2666   20595360
#> 3 Brazil       1999  37737  172006362
#> 4 Brazil       2000  80488  174504898
#> 5 China        1999 212258 1272915272
#> 6 China        2000 213766 1280428583

table3 %>% 
  separate(year, into = c("century", "year"), sep = 2)

## # A tibble: 6 × 4
##   country     century year  rate             
##   <chr>       <chr>   <chr> <chr>            
## 1 Afghanistan 19      99    745/19987071     
## 2 Afghanistan 20      00    2666/20595360    
## 3 Brazil      19      99    37737/172006362  
## 4 Brazil      20      00    80488/174504898  
## 5 China       19      99    212258/1272915272
## 6 China       20      00    213766/1280428583

#> # A tibble: 6 × 4
#>   country     century year  rate             
#>   <chr>       <chr>   <chr> <chr>            
#> 1 Afghanistan 19      99    745/19987071     
#> 2 Afghanistan 20      00    2666/20595360    
#> 3 Brazil      19      99    37737/172006362  
#> 4 Brazil      20      00    80488/174504898  
#> 5 China       19      99    212258/1272915272
#> 6 China       20      00    213766/1280428583

Unite

table5 %>% 
  unite(new, century, year)

## # A tibble: 6 × 3
##   country     new   rate             
##   <chr>       <chr> <chr>            
## 1 Afghanistan 19_99 745/19987071     
## 2 Afghanistan 20_00 2666/20595360    
## 3 Brazil      19_99 37737/172006362  
## 4 Brazil      20_00 80488/174504898  
## 5 China       19_99 212258/1272915272
## 6 China       20_00 213766/1280428583

#> # A tibble: 6 × 3
#>   country     new   rate             
#>   <chr>       <chr> <chr>            
#> 1 Afghanistan 19_99 745/19987071     
#> 2 Afghanistan 20_00 2666/20595360    
#> 3 Brazil      19_99 37737/172006362  
#> 4 Brazil      20_00 80488/174504898  
#> 5 China       19_99 212258/1272915272
#> 6 China       20_00 213766/1280428583

table5 %>% 
  unite(new, century, year, sep = "")

## # A tibble: 6 × 3
##   country     new   rate             
##   <chr>       <chr> <chr>            
## 1 Afghanistan 1999  745/19987071     
## 2 Afghanistan 2000  2666/20595360    
## 3 Brazil      1999  37737/172006362  
## 4 Brazil      2000  80488/174504898  
## 5 China       1999  212258/1272915272
## 6 China       2000  213766/1280428583

#> # A tibble: 6 × 3
#>   country     new   rate             
#>   <chr>       <chr> <chr>            
#> 1 Afghanistan 1999  745/19987071     
#> 2 Afghanistan 2000  2666/20595360    
#> 3 Brazil      1999  37737/172006362  
#> 4 Brazil      2000  80488/174504898  
#> 5 China       1999  212258/1272915272
#> 6 China       2000  213766/1280428583

Missing Values

stocks <- tibble(
  year   = c(2015, 2015, 2015, 2015, 2016, 2016, 2016),
  qtr    = c(   1,    2,    3,    4,    2,    3,    4),
  return = c(1.88, 0.59, 0.35,   NA, 0.92, 0.17, 2.66)
)

stocks %>% 
  pivot_wider(names_from = year, values_from = return)

## # A tibble: 4 × 3
##     qtr `2015` `2016`
##   <dbl>  <dbl>  <dbl>
## 1     1   1.88  NA   
## 2     2   0.59   0.92
## 3     3   0.35   0.17
## 4     4  NA      2.66

#> # A tibble: 4 × 3
#>     qtr `2015` `2016`
#>   <dbl>  <dbl>  <dbl>
#> 1     1   1.88  NA   
#> 2     2   0.59   0.92
#> 3     3   0.35   0.17
#> 4     4  NA      2.66

stocks %>% 
  complete(year, qtr)

## # A tibble: 8 × 3
##    year   qtr return
##   <dbl> <dbl>  <dbl>
## 1  2015     1   1.88
## 2  2015     2   0.59
## 3  2015     3   0.35
## 4  2015     4  NA   
## 5  2016     1  NA   
## 6  2016     2   0.92
## 7  2016     3   0.17
## 8  2016     4   2.66

#> # A tibble: 8 × 3
#>    year   qtr return
#>   <dbl> <dbl>  <dbl>
#> 1  2015     1   1.88
#> 2  2015     2   0.59
#> 3  2015     3   0.35
#> 4  2015     4  NA   
#> 5  2016     1  NA   
#> 6  2016     2   0.92
#> # ℹ 2 more rows

treatment <- tribble(
  ~ person,           ~ treatment, ~response,
  "Derrick Whitmore", 1,           7,
  NA,                 2,           10,
  NA,                 3,           9,
  "Katherine Burke",  1,           4
)

treatment %>% 
  fill(person)

## # A tibble: 4 × 3
##   person           treatment response
##   <chr>                <dbl>    <dbl>
## 1 Derrick Whitmore         1        7
## 2 Derrick Whitmore         2       10
## 3 Derrick Whitmore         3        9
## 4 Katherine Burke          1        4

#> # A tibble: 4 × 3
#>   person           treatment response
#>   <chr>                <dbl>    <dbl>
#> 1 Derrick Whitmore         1        7
#> 2 Derrick Whitmore         2       10
#> 3 Derrick Whitmore         3        9
#> 4 Katherine Burke          1        4

CodeAlong7

Josh Crosswhite

2025-03-12

Ch 10: Introduction

Creating tibbles

Tibbles vs data.frame

Interacting with older code

Ch. 11

Getting Started

Parsing a Vector

Numbers

Strings

Factors

Dates, date-times, and times

Parsing a File

Ch 12

Tidy Data

Pivoting

Longer

Wider

Separating and uniting

Unite

Missing Values

Non-Tidy Data