Tidy Data

table4a
## # A tibble: 3 × 3
##   country     `1999` `2000`
##   <chr>        <dbl>  <dbl>
## 1 Afghanistan    745   2666
## 2 Brazil       37737  80488
## 3 China       212258 213766
table4b
## # A tibble: 3 × 3
##   country         `1999`     `2000`
##   <chr>            <dbl>      <dbl>
## 1 Afghanistan   19987071   20595360
## 2 Brazil       172006362  174504898
## 3 China       1272915272 1280428583

Pivoting

tidy4a <- table4a |>
  pivot_longer(
    cols = c(`1999`, `2000`),
    names_to = "year",
    values_to = "cases"
  )
tidy4a
## # A tibble: 6 × 3
##   country     year   cases
##   <chr>       <chr>  <dbl>
## 1 Afghanistan 1999     745
## 2 Afghanistan 2000    2666
## 3 Brazil      1999   37737
## 4 Brazil      2000   80488
## 5 China       1999  212258
## 6 China       2000  213766
tidy4b <- table4b |>
  pivot_longer(
    cols = c(`1999`, `2000`),
    names_to = "year",
    values_to = "population"
  )
tidy4b
## # A tibble: 6 × 3
##   country     year  population
##   <chr>       <chr>      <dbl>
## 1 Afghanistan 1999    19987071
## 2 Afghanistan 2000    20595360
## 3 Brazil      1999   172006362
## 4 Brazil      2000   174504898
## 5 China       1999  1272915272
## 6 China       2000  1280428583
tidy_data <- left_join(tidy4a, tidy4b, by = c("country", "year"))
tidy_data
## # A tibble: 6 × 4
##   country     year   cases population
##   <chr>       <chr>  <dbl>      <dbl>
## 1 Afghanistan 1999     745   19987071
## 2 Afghanistan 2000    2666   20595360
## 3 Brazil      1999   37737  172006362
## 4 Brazil      2000   80488  174504898
## 5 China       1999  212258 1272915272
## 6 China       2000  213766 1280428583

Separating and Uniting

demo <- tibble(
  name = c("Smith, John", "Perez, Maria")
)

demo_separated <- demo |>
  separate(name, into = c("last", "first"), sep = ", ")

demo_separated
## # A tibble: 2 × 2
##   last  first
##   <chr> <chr>
## 1 Smith John 
## 2 Perez Maria
demo_united <- demo_separated |>
  unite("name_joined", first, last, sep = " ")

demo_united
## # A tibble: 2 × 1
##   name_joined
##   <chr>      
## 1 John Smith 
## 2 Maria Perez

Missing Values

df <- tibble(
  x = c(1, 2, NA, 4),
  y = c("a", NA, "c", "d")
)
df
## # A tibble: 4 × 2
##       x y    
##   <dbl> <chr>
## 1     1 a    
## 2     2 <NA> 
## 3    NA c    
## 4     4 d
df_no_missing <- df |>
  drop_na()
df_no_missing
## # A tibble: 2 × 2
##       x y    
##   <dbl> <chr>
## 1     1 a    
## 2     4 d
df_filled <- df |>
  mutate(
    x = replace_na(x, 0),
    y = replace_na(y, "missing")
  )
df_filled
## # A tibble: 4 × 2
##       x y      
##   <dbl> <chr>  
## 1     1 a      
## 2     2 missing
## 3     0 c      
## 4     4 d

Non-Tidy Data

world_bank <- tribble(
  ~country, ~`2000_pop`, ~`2000_gdp`, ~`2001_pop`, ~`2001_gdp`,
  "Aland",  1000, 50000, 1100, 52000,
  "Bora",   2000, 70000, 2100, 73000
)
world_bank
## # A tibble: 2 × 5
##   country `2000_pop` `2000_gdp` `2001_pop` `2001_gdp`
##   <chr>        <dbl>      <dbl>      <dbl>      <dbl>
## 1 Aland         1000      50000       1100      52000
## 2 Bora          2000      70000       2100      73000
tidy_wb <- world_bank |>
  pivot_longer(
    cols = -country,
    names_to = c("year", "measure"),
    names_sep = "_",
    values_to = "value"
  ) |>
  pivot_wider(
    names_from = measure,
    values_from = value
  )
tidy_wb
## # A tibble: 4 × 4
##   country year    pop   gdp
##   <chr>   <chr> <dbl> <dbl>
## 1 Aland   2000   1000 50000
## 2 Aland   2001   1100 52000
## 3 Bora    2000   2000 70000
## 4 Bora    2001   2100 73000