Import your data
coffee <- read_excel("../00_data/myDataCoffee.xlsx")
set.seed(2)
data_small <- coffee %>%
select(favorite, cups, style) %>%
sample_n(5)
data_small
## # A tibble: 5 × 3
## favorite cups style
## <chr> <chr> <chr>
## 1 Americano 1 Juicy
## 2 Cappuccino 1 Chocolatey
## 3 Cappuccino 2 Bright
## 4 Pourover 2 Fruity
## 5 Espresso 2 Bold
data_small %>% pivot_wider(names_from = cups, values_from = favorite)
## # A tibble: 5 × 3
## style `1` `2`
## <chr> <chr> <chr>
## 1 Juicy Americano <NA>
## 2 Chocolatey Cappuccino <NA>
## 3 Bright <NA> Cappuccino
## 4 Fruity <NA> Pourover
## 5 Bold <NA> Espresso
Import data
olympics <- read_excel("../00_data/myDataOlympics.xlsx")
Separating and Uniting
Separate a column
olympics_new <- olympics %>%
separate(year, into = c("century", "yr_short"), sep = 2, remove = FALSE) %>%
select(year, century, yr_short)
olympics_new
## # A tibble: 271,116 × 3
## year century yr_short
## <dbl> <chr> <chr>
## 1 1992 19 92
## 2 2012 20 12
## 3 1920 19 20
## 4 1900 19 00
## 5 1988 19 88
## 6 1988 19 88
## 7 1992 19 92
## 8 1992 19 92
## 9 1994 19 94
## 10 1994 19 94
## # ℹ 271,106 more rows
Unite two columns
olympics_new %>%
unite("year", c(century, yr_short), sep = "")
## # A tibble: 271,116 × 1
## year
## <chr>
## 1 1992
## 2 2012
## 3 1920
## 4 1900
## 5 1988
## 6 1988
## 7 1992
## 8 1992
## 9 1994
## 10 1994
## # ℹ 271,106 more rows
Missing Values
olympics %>%
pivot_wider(names_from = age, values_from = weight, values_fn = list)
## # A tibble: 269,731 × 88
## id name sex height team noc games year season city sport event
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 1 A Dijiang M 180 China CHN 1992… 1992 Summer Barc… Bask… Bask…
## 2 2 A Lamusi M 170 China CHN 2012… 2012 Summer Lond… Judo Judo…
## 3 3 Gunnar N… M NA Denm… DEN 1920… 1920 Summer Antw… Foot… Foot…
## 4 4 Edgar Li… M NA Denm… DEN 1900… 1900 Summer Paris Tug-… Tug-…
## 5 5 Christin… F 185 Neth… NED 1988… 1988 Winter Calg… Spee… Spee…
## 6 5 Christin… F 185 Neth… NED 1988… 1988 Winter Calg… Spee… Spee…
## 7 5 Christin… F 185 Neth… NED 1992… 1992 Winter Albe… Spee… Spee…
## 8 5 Christin… F 185 Neth… NED 1992… 1992 Winter Albe… Spee… Spee…
## 9 5 Christin… F 185 Neth… NED 1994… 1994 Winter Lill… Spee… Spee…
## 10 5 Christin… F 185 Neth… NED 1994… 1994 Winter Lill… Spee… Spee…
## # ℹ 269,721 more rows
## # ℹ 76 more variables: medal <chr>, `24` <list>, `23` <list>, `34` <list>,
## # `21` <list>, `25` <list>, `27` <list>, `31` <list>, `33` <list>,
## # `18` <list>, `26` <list>, `22` <list>, `30` <list>, `32` <list>,
## # `28` <list>, `54` <list>, `20` <list>, `17` <list>, `43` <list>,
## # `47` <list>, `29` <list>, `41` <list>, `45` <list>, `49` <list>,
## # `53` <list>, `57` <list>, `NA` <list>, `19` <list>, `38` <list>, …
olympics %>%
complete(sex, height) %>%
fill(age, weight)
## # A tibble: 271,137 × 15
## sex height id name age weight team noc games year season city
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 F 127 15150 Rosario… 15 42 Mexi… MEX 1968… 1968 Summer Mexi…
## 2 F 127 15150 Rosario… 15 42 Mexi… MEX 1968… 1968 Summer Mexi…
## 3 F 127 15150 Rosario… 15 42 Mexi… MEX 1968… 1968 Summer Mexi…
## 4 F 127 15150 Rosario… 15 42 Mexi… MEX 1968… 1968 Summer Mexi…
## 5 F 127 15150 Rosario… 15 42 Mexi… MEX 1968… 1968 Summer Mexi…
## 6 F 127 15150 Rosario… 15 42 Mexi… MEX 1968… 1968 Summer Mexi…
## 7 F 128 NA <NA> 15 42 <NA> <NA> <NA> NA <NA> <NA>
## 8 F 130 NA <NA> 15 42 <NA> <NA> <NA> NA <NA> <NA>
## 9 F 131 35122 Nadia F… NA 41 Libya LBA 1980… 1980 Summer Mosk…
## 10 F 131 35122 Nadia F… NA 41 Libya LBA 1980… 1980 Summer Mosk…
## # ℹ 271,127 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>