Tidy Data
table4a
## # A tibble: 3 × 3
## country `1999` `2000`
## <chr> <dbl> <dbl>
## 1 Afghanistan 745 2666
## 2 Brazil 37737 80488
## 3 China 212258 213766
table4b
## # A tibble: 3 × 3
## country `1999` `2000`
## <chr> <dbl> <dbl>
## 1 Afghanistan 19987071 20595360
## 2 Brazil 172006362 174504898
## 3 China 1272915272 1280428583
Pivoting
tidy4a <- table4a |>
pivot_longer(
cols = c(`1999`, `2000`),
names_to = "year",
values_to = "cases"
)
tidy4a
## # A tibble: 6 × 3
## country year cases
## <chr> <chr> <dbl>
## 1 Afghanistan 1999 745
## 2 Afghanistan 2000 2666
## 3 Brazil 1999 37737
## 4 Brazil 2000 80488
## 5 China 1999 212258
## 6 China 2000 213766
tidy4b <- table4b |>
pivot_longer(
cols = c(`1999`, `2000`),
names_to = "year",
values_to = "population"
)
tidy4b
## # A tibble: 6 × 3
## country year population
## <chr> <chr> <dbl>
## 1 Afghanistan 1999 19987071
## 2 Afghanistan 2000 20595360
## 3 Brazil 1999 172006362
## 4 Brazil 2000 174504898
## 5 China 1999 1272915272
## 6 China 2000 1280428583
tidy_data <- left_join(tidy4a, tidy4b, by = c("country", "year"))
tidy_data
## # A tibble: 6 × 4
## country year cases population
## <chr> <chr> <dbl> <dbl>
## 1 Afghanistan 1999 745 19987071
## 2 Afghanistan 2000 2666 20595360
## 3 Brazil 1999 37737 172006362
## 4 Brazil 2000 80488 174504898
## 5 China 1999 212258 1272915272
## 6 China 2000 213766 1280428583
Separating and Uniting
demo <- tibble(
name = c("Smith, John", "Perez, Maria")
)
demo_separated <- demo |>
separate(name, into = c("last", "first"), sep = ", ")
demo_separated
## # A tibble: 2 × 2
## last first
## <chr> <chr>
## 1 Smith John
## 2 Perez Maria
demo_united <- demo_separated |>
unite("name_joined", first, last, sep = " ")
demo_united
## # A tibble: 2 × 1
## name_joined
## <chr>
## 1 John Smith
## 2 Maria Perez
Missing Values
df <- tibble(
x = c(1, 2, NA, 4),
y = c("a", NA, "c", "d")
)
df
## # A tibble: 4 × 2
## x y
## <dbl> <chr>
## 1 1 a
## 2 2 <NA>
## 3 NA c
## 4 4 d
df_no_missing <- df |>
drop_na()
df_no_missing
## # A tibble: 2 × 2
## x y
## <dbl> <chr>
## 1 1 a
## 2 4 d
df_filled <- df |>
mutate(
x = replace_na(x, 0),
y = replace_na(y, "missing")
)
df_filled
## # A tibble: 4 × 2
## x y
## <dbl> <chr>
## 1 1 a
## 2 2 missing
## 3 0 c
## 4 4 d
Non-Tidy Data
world_bank <- tribble(
~country, ~`2000_pop`, ~`2000_gdp`, ~`2001_pop`, ~`2001_gdp`,
"Aland", 1000, 50000, 1100, 52000,
"Bora", 2000, 70000, 2100, 73000
)
world_bank
## # A tibble: 2 × 5
## country `2000_pop` `2000_gdp` `2001_pop` `2001_gdp`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Aland 1000 50000 1100 52000
## 2 Bora 2000 70000 2100 73000
tidy_wb <- world_bank |>
pivot_longer(
cols = -country,
names_to = c("year", "measure"),
names_sep = "_",
values_to = "value"
) |>
pivot_wider(
names_from = measure,
values_from = value
)
tidy_wb
## # A tibble: 4 × 4
## country year pop gdp
## <chr> <chr> <dbl> <dbl>
## 1 Aland 2000 1000 50000
## 2 Aland 2001 1100 52000
## 3 Bora 2000 2000 70000
## 4 Bora 2001 2100 73000