Import your data

coffee <- read_excel("../00_data/myDataCoffee.xlsx")
set.seed(2)
data_small <- coffee %>%
    
    select(favorite, cups, style) %>%
    
    sample_n(5) 

data_small
## # A tibble: 5 × 3
##   favorite   cups  style     
##   <chr>      <chr> <chr>     
## 1 Americano  1     Juicy     
## 2 Cappuccino 1     Chocolatey
## 3 Cappuccino 2     Bright    
## 4 Pourover   2     Fruity    
## 5 Espresso   2     Bold
data_small %>% pivot_wider(names_from = cups, values_from = favorite)
## # A tibble: 5 × 3
##   style      `1`        `2`       
##   <chr>      <chr>      <chr>     
## 1 Juicy      Americano  <NA>      
## 2 Chocolatey Cappuccino <NA>      
## 3 Bright     <NA>       Cappuccino
## 4 Fruity     <NA>       Pourover  
## 5 Bold       <NA>       Espresso

Pivoting

long to wide form

data_small %>% pivot_wider(names_from = style, values_from = favorite)
## # A tibble: 2 × 6
##   cups  Juicy     Chocolatey Bright     Fruity   Bold    
##   <chr> <chr>     <chr>      <chr>      <chr>    <chr>   
## 1 1     Americano Cappuccino <NA>       <NA>     <NA>    
## 2 2     <NA>      <NA>       Cappuccino Pourover Espresso
 data_small %>% arrange(favorite, cups)
## # A tibble: 5 × 3
##   favorite   cups  style     
##   <chr>      <chr> <chr>     
## 1 Americano  1     Juicy     
## 2 Cappuccino 1     Chocolatey
## 3 Cappuccino 2     Bright    
## 4 Espresso   2     Bold      
## 5 Pourover   2     Fruity

wide to long form

data_long <- data_small %>% pivot_wider(names_from = style, values_from = cups)

data_long %>% pivot_longer('favorite':'favorite', names_to = "style", values_to = "cups")
## # A tibble: 4 × 7
##   Juicy Chocolatey Bright Fruity Bold  style    cups      
##   <chr> <chr>      <chr>  <chr>  <chr> <chr>    <chr>     
## 1 1     <NA>       <NA>   <NA>   <NA>  favorite Americano 
## 2 <NA>  1          2      <NA>   <NA>  favorite Cappuccino
## 3 <NA>  <NA>       <NA>   2      <NA>  favorite Pourover  
## 4 <NA>  <NA>       <NA>   <NA>   2     favorite Espresso

Import data

olympics <- read_excel("../00_data/myDataOlympics.xlsx")

Separating and Uniting

Separate a column

olympics_new <- olympics %>% 
  separate(year, into = c("century", "yr_short"), sep = 2, remove = FALSE) %>%
    select(year, century, yr_short)

olympics_new
## # A tibble: 271,116 × 3
##     year century yr_short
##    <dbl> <chr>   <chr>   
##  1  1992 19      92      
##  2  2012 20      12      
##  3  1920 19      20      
##  4  1900 19      00      
##  5  1988 19      88      
##  6  1988 19      88      
##  7  1992 19      92      
##  8  1992 19      92      
##  9  1994 19      94      
## 10  1994 19      94      
## # ℹ 271,106 more rows

Unite two columns

olympics_new %>%
  unite("year", c(century, yr_short), sep = "")
## # A tibble: 271,116 × 1
##    year 
##    <chr>
##  1 1992 
##  2 2012 
##  3 1920 
##  4 1900 
##  5 1988 
##  6 1988 
##  7 1992 
##  8 1992 
##  9 1994 
## 10 1994 
## # ℹ 271,106 more rows

Missing Values

olympics %>% 
  pivot_wider(names_from = age, values_from = weight, values_fn = list)
## # A tibble: 269,731 × 88
##       id name      sex   height team  noc   games  year season city  sport event
##    <dbl> <chr>     <chr> <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr> <chr> <chr>
##  1     1 A Dijiang M     180    China CHN   1992…  1992 Summer Barc… Bask… Bask…
##  2     2 A Lamusi  M     170    China CHN   2012…  2012 Summer Lond… Judo  Judo…
##  3     3 Gunnar N… M     NA     Denm… DEN   1920…  1920 Summer Antw… Foot… Foot…
##  4     4 Edgar Li… M     NA     Denm… DEN   1900…  1900 Summer Paris Tug-… Tug-…
##  5     5 Christin… F     185    Neth… NED   1988…  1988 Winter Calg… Spee… Spee…
##  6     5 Christin… F     185    Neth… NED   1988…  1988 Winter Calg… Spee… Spee…
##  7     5 Christin… F     185    Neth… NED   1992…  1992 Winter Albe… Spee… Spee…
##  8     5 Christin… F     185    Neth… NED   1992…  1992 Winter Albe… Spee… Spee…
##  9     5 Christin… F     185    Neth… NED   1994…  1994 Winter Lill… Spee… Spee…
## 10     5 Christin… F     185    Neth… NED   1994…  1994 Winter Lill… Spee… Spee…
## # ℹ 269,721 more rows
## # ℹ 76 more variables: medal <chr>, `24` <list>, `23` <list>, `34` <list>,
## #   `21` <list>, `25` <list>, `27` <list>, `31` <list>, `33` <list>,
## #   `18` <list>, `26` <list>, `22` <list>, `30` <list>, `32` <list>,
## #   `28` <list>, `54` <list>, `20` <list>, `17` <list>, `43` <list>,
## #   `47` <list>, `29` <list>, `41` <list>, `45` <list>, `49` <list>,
## #   `53` <list>, `57` <list>, `NA` <list>, `19` <list>, `38` <list>, …
olympics %>% 
    complete(sex, height) %>%
    fill(age, weight)
## # A tibble: 271,137 × 15
##    sex   height    id name     age   weight team  noc   games  year season city 
##    <chr> <chr>  <dbl> <chr>    <chr> <chr>  <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1 F     127    15150 Rosario… 15    42     Mexi… MEX   1968…  1968 Summer Mexi…
##  2 F     127    15150 Rosario… 15    42     Mexi… MEX   1968…  1968 Summer Mexi…
##  3 F     127    15150 Rosario… 15    42     Mexi… MEX   1968…  1968 Summer Mexi…
##  4 F     127    15150 Rosario… 15    42     Mexi… MEX   1968…  1968 Summer Mexi…
##  5 F     127    15150 Rosario… 15    42     Mexi… MEX   1968…  1968 Summer Mexi…
##  6 F     127    15150 Rosario… 15    42     Mexi… MEX   1968…  1968 Summer Mexi…
##  7 F     128       NA <NA>     15    42     <NA>  <NA>  <NA>     NA <NA>   <NA> 
##  8 F     130       NA <NA>     15    42     <NA>  <NA>  <NA>     NA <NA>   <NA> 
##  9 F     131    35122 Nadia F… NA    41     Libya LBA   1980…  1980 Summer Mosk…
## 10 F     131    35122 Nadia F… NA    41     Libya LBA   1980…  1980 Summer Mosk…
## # ℹ 271,127 more rows
## # ℹ 3 more variables: sport <chr>, event <chr>, medal <chr>