Tidyr

library(tidyr)

## Warning: package 'tidyr' was built under R version 3.6.3

#Fungsi utama ada 5 kategori: #1. “Pivotting” yang mengubah antara bentuk panjang dan lebar. tidyr 1.0.0 memperkenalkan pivot_longer () dan pivot_wider (), menggantikan fungsi spread () dan gathering () yang lebih lama. Lihat sketsa (“poros”) untuk lebih jelasnya.

#pivot_longer = Baris jadi kolom #pivot_wider = kolom jadi baris, bisa dikatakan traspose dari pivot_longer

head(fish_encounters,5)

## # A tibble: 5 x 3
##   fish  station  seen
##   <fct> <fct>   <int>
## 1 4842  Release     1
## 2 4842  I80_1       1
## 3 4842  Lisbon      1
## 4 4842  Rstr        1
## 5 4842  Base_TD     1

head(fish_encounters%>%
  pivot_wider(names_from = station, values_from = seen),5) #jadikan kolom station jadi baris

## # A tibble: 5 x 12
##   fish  Release I80_1 Lisbon  Rstr Base_TD   BCE   BCW  BCE2  BCW2   MAE   MAW
##   <fct>   <int> <int>  <int> <int>   <int> <int> <int> <int> <int> <int> <int>
## 1 4842        1     1      1     1       1     1     1     1     1     1     1
## 2 4843        1     1      1     1       1     1     1     1     1     1     1
## 3 4844        1     1      1     1       1     1     1     1     1     1     1
## 4 4845        1     1      1     1       1    NA    NA    NA    NA    NA    NA
## 5 4847        1     1      1    NA      NA    NA    NA    NA    NA    NA    NA

#mengganti NA menjadi 0
head(fish_encounters %>%
  pivot_wider(names_from = station, values_from = seen, values_fill = 0),5)

## # A tibble: 5 x 12
##   fish  Release I80_1 Lisbon  Rstr Base_TD   BCE   BCW  BCE2  BCW2   MAE   MAW
##   <fct>   <int> <int>  <int> <int>   <int> <int> <int> <int> <int> <int> <int>
## 1 4842        1     1      1     1       1     1     1     1     1     1     1
## 2 4843        1     1      1     1       1     1     1     1     1     1     1
## 3 4844        1     1      1     1       1     1     1     1     1     1     1
## 4 4845        1     1      1     1       1     0     0     0     0     0     0
## 5 4847        1     1      1     0       0     0     0     0     0     0     0

# Hasilkan nama kolom dari beberapa variabel
head(us_rent_income,10)

## # A tibble: 10 x 5
##    GEOID NAME       variable estimate   moe
##    <chr> <chr>      <chr>       <dbl> <dbl>
##  1 01    Alabama    income      24476   136
##  2 01    Alabama    rent          747     3
##  3 02    Alaska     income      32940   508
##  4 02    Alaska     rent         1200    13
##  5 04    Arizona    income      27517   148
##  6 04    Arizona    rent          972     4
##  7 05    Arkansas   income      23789   165
##  8 05    Arkansas   rent          709     5
##  9 06    California income      29454   109
## 10 06    California rent         1358     3

#income dan rent kolom jadi baris dengan tambahan prefiks estimate dan moe
head(us_rent_income %>%
  pivot_wider(names_from = variable, values_from = c(estimate, moe)),10)

## # A tibble: 10 x 6
##    GEOID NAME                 estimate_income estimate_rent moe_income moe_rent
##    <chr> <chr>                          <dbl>         <dbl>      <dbl>    <dbl>
##  1 01    Alabama                        24476           747        136        3
##  2 02    Alaska                         32940          1200        508       13
##  3 04    Arizona                        27517           972        148        4
##  4 05    Arkansas                       23789           709        165        5
##  5 06    California                     29454          1358        109        3
##  6 08    Colorado                       32401          1125        109        5
##  7 09    Connecticut                    35326          1123        195        5
##  8 10    Delaware                       31560          1076        247       10
##  9 11    District of Columbia           43198          1424        681       17
## 10 12    Florida                        25952          1077         70        3

head(us_rent_income %>%
  pivot_wider(
    names_from = variable,
    names_sep = ".",  #antara names_from dan values_from dipisahkan titik
    values_from = c(estimate, moe)
  ),10)

## # A tibble: 10 x 6
##    GEOID NAME                 estimate.income estimate.rent moe.income moe.rent
##    <chr> <chr>                          <dbl>         <dbl>      <dbl>    <dbl>
##  1 01    Alabama                        24476           747        136        3
##  2 02    Alaska                         32940          1200        508       13
##  3 04    Arizona                        27517           972        148        4
##  4 05    Arkansas                       23789           709        165        5
##  5 06    California                     29454          1358        109        3
##  6 08    Colorado                       32401          1125        109        5
##  7 09    Connecticut                    35326          1123        195        5
##  8 10    Delaware                       31560          1076        247       10
##  9 11    District of Columbia           43198          1424        681       17
## 10 12    Florida                        25952          1077         70        3

head(warpbreaks,10)

##    breaks wool tension
## 1      26    A       L
## 2      30    A       L
## 3      54    A       L
## 4      25    A       L
## 5      70    A       L
## 6      52    A       L
## 7      51    A       L
## 8      26    A       L
## 9      67    A       L
## 10     18    A       M

#merubah urutan variabel
warpbreaks <- as_tibble(warpbreaks[c("wool", "tension", "breaks")])
head(warpbreaks,10)

## # A tibble: 10 x 3
##    wool  tension breaks
##    <fct> <fct>    <dbl>
##  1 A     L           26
##  2 A     L           30
##  3 A     L           54
##  4 A     L           25
##  5 A     L           70
##  6 A     L           52
##  7 A     L           51
##  8 A     L           26
##  9 A     L           67
## 10 A     M           18

#transformasi wool (A,B): baris jadi kolom dengan nilai value average variabel breaks
warpbreaks %>%
  pivot_wider(
    names_from = wool,
    values_from = breaks,
    values_fn = mean
  )

## # A tibble: 3 x 3
##   tension     A     B
##   <fct>   <dbl> <dbl>
## 1 L        44.6  28.2
## 2 M        24    28.8
## 3 H        24.6  18.8

#2. “Rectangling”, yang mengubah daftar yang sangat bertingkat (seperti dari JSON) menjadi tibble yang rapi. #unnest #Tiga fungsi unest () berbeda dalam cara mengubah bentuk bingkai data keluaran:

#unest_wider () mempertahankan baris, tetapi mengubah kolom.

#unest_longer () mempertahankan kolom, tetapi mengubah baris

#unest () dapat mengubah baris dan kolom.

df <- tibble(
  character = c("Toothless", "Dory"),
  metadata = list(
    list(
      species = "dragon",
      color = "black",
      films = c(
        "How to Train Your Dragon",
        "How to Train Your Dragon 2",
        "How to Train Your Dragon: The Hidden World"
       )
    ),
    list(
      species = "blue tang",
      color = "blue",
      films = c("Finding Nemo", "Finding Dory")
    )
  )
)
## Sebuah tibble: 2 x 2
df  #list 3 artinya dalam metadata ada 3 variabel

## # A tibble: 2 x 2
##   character metadata        
##   <chr>     <list>          
## 1 Toothless <named list [3]>
## 2 Dory      <named list [3]>

# Ubah semua komponen metadata menjadi kolom
df %>% unnest_wider(metadata)

## # A tibble: 2 x 4
##   character species   color films    
##   <chr>     <chr>     <chr> <list>   
## 1 Toothless dragon    black <chr [3]>
## 2 Dory      blue tang blue  <chr [2]>

# Ekstrak hanya komponen yang ditentukan
df %>% hoist(metadata,
  "species",
  first_film = list("films", 1L),
  third_film = list("films", 3L)
)

## # A tibble: 2 x 5
##   character species   first_film        third_film                  metadata    
##   <chr>     <chr>     <chr>             <chr>                       <list>      
## 1 Toothless dragon    How to Train You~ How to Train Your Dragon: ~ <named list~
## 2 Dory      blue tang Finding Nemo      <NA>                        <named list~

df %>%
  unnest_wider(metadata) %>%
  unnest_longer(films)

## # A tibble: 5 x 4
##   character species   color films                                     
##   <chr>     <chr>     <chr> <chr>                                     
## 1 Toothless dragon    black How to Train Your Dragon                  
## 2 Toothless dragon    black How to Train Your Dragon 2                
## 3 Toothless dragon    black How to Train Your Dragon: The Hidden World
## 4 Dory      blue tang blue  Finding Nemo                              
## 5 Dory      blue tang blue  Finding Dory

# unest_longer () berguna jika setiap komponen dari daftar harus membentuk baris
df <- tibble(
  x = 1:3,
  y = list(NULL, 1:3, 4:5) #1:3 dan 4:5 sebagai metadata
)
df

## # A tibble: 3 x 2
##       x y        
##   <int> <list>   
## 1     1 <NULL>   
## 2     2 <int [3]>
## 3     3 <int [2]>

df %>% unnest_longer(y)

## # A tibble: 6 x 2
##       x     y
##   <int> <int>
## 1     1    NA
## 2     2     1
## 3     2     2
## 4     2     3
## 5     3     4
## 6     3     5

# Secara otomatis membuat nama jika melebar
df %>% unnest_wider(y)

## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3

## New names:
## * `` -> ...1
## * `` -> ...2

## # A tibble: 3 x 4
##       x  ...1  ...2  ...3
##   <int> <int> <int> <int>
## 1     1    NA    NA    NA
## 2     2     1     2     3
## 3     3     4     5    NA

## Tapi biasanya Anda ingin memberikan names_sep:
df %>% unnest_wider(y, names_sep = "_")

## # A tibble: 3 x 4
##       x   y_1   y_2   y_3
##   <int> <int> <int> <int>
## 1     1    NA    NA    NA
## 2     2     1     2     3
## 3     3     4     5    NA

# Dan demikian pula jika vektor diberi nama
lop <- tibble(
  x = 1:2,
  y = list(c(a = 1, b = 2), c(a = 10, b = 11, c = 12)) #metadata
)
lop

## # A tibble: 2 x 2
##       x y        
##   <int> <list>   
## 1     1 <dbl [2]>
## 2     2 <dbl [3]>

lop %>% unnest_wider(y,names_sep = "_")

## # A tibble: 2 x 4
##       x   y_a   y_b   y_c
##   <int> <dbl> <dbl> <dbl>
## 1     1     1     2    NA
## 2     2    10    11    12

df %>% unnest_longer(y)

## # A tibble: 6 x 2
##       x     y
##   <int> <int>
## 1     1    NA
## 2     2     1
## 3     2     2
## 4     2     3
## 5     3     4
## 6     3     5

#3. Nesting: penyusunan / mengonversi data yang dikelompokkan menjadi formulir di mana setiap grup menjadi satu baris yang berisi bingkai data bersarang, dan tindakan sebaliknya.

library(tidyr)
fgh <- tibble(x = c(1, 1, 1, 2, 2, 3), y = 1:6, z = 6:1)
fgh

## # A tibble: 6 x 3
##       x     y     z
##   <dbl> <int> <int>
## 1     1     1     6
## 2     1     2     5
## 3     1     3     4
## 4     2     4     3
## 5     2     5     2
## 6     3     6     1

# Perhatikan bahwa kita mendapatkan satu baris keluaran untuk setiap kombinasi unik
# variabel tidak bertingkat
fgh%>% nest(data = c(y, z))

## # A tibble: 3 x 2
##       x data            
##   <dbl> <list>          
## 1     1 <tibble [3 x 2]>
## 2     2 <tibble [2 x 2]>
## 3     3 <tibble [1 x 2]>

# chop melakukan sesuatu yang serupa, tetapi mempertahankan kolom individual
fgh %>% chop(c(y, z))

## # A tibble: 3 x 3
##       x           y           z
##   <dbl> <list<int>> <list<int>>
## 1     1         [3]         [3]
## 2     2         [2]         [2]
## 3     3         [1]         [1]

juko <- tibble(
  x = 1:3,
  y = list(
    NULL,    #x=1 --> kosong (null)
    tibble(a = 1, b = 2),  #x=2 
    tibble(a = 1:3, b = 3:1)  #x=3
  )
)
juko

## # A tibble: 3 x 2
##       x y               
##   <int> <list>          
## 1     1 <NULL>          
## 2     2 <tibble [1 x 2]>
## 3     3 <tibble [3 x 2]>

#nilai null/kosong diabaikan
juko %>% unnest(y)

## # A tibble: 4 x 3
##       x     a     b
##   <int> <dbl> <dbl>
## 1     2     1     2
## 2     3     1     3
## 3     3     2     2
## 4     3     3     1

#nilai null/kosong tidak diabaikan
juko %>% unnest(y, keep_empty = TRUE)

## # A tibble: 5 x 3
##       x     a     b
##   <int> <dbl> <dbl>
## 1     1    NA    NA
## 2     2     1     2
## 3     3     1     3
## 4     3     2     2
## 5     3     3     1

# Anda dapat menghapus (unnest) beberapa kolom secara bersamaan
oli <- tibble(
 a = list(c("a", "b"), "c"),
 b = list(1:2, 3),
 c = c(11, 22)
)
oli

## # A tibble: 2 x 3
##   a         b             c
##   <list>    <list>    <dbl>
## 1 <chr [2]> <int [2]>    11
## 2 <chr [1]> <dbl [1]>    22

oli %>% unnest(c(a,b))

## # A tibble: 3 x 3
##   a         b     c
##   <chr> <dbl> <dbl>
## 1 a         1    11
## 2 b         2    11
## 3 c         3    22

# Bandingkan dengan menghapus (unnesting) kolom satu per satu, yang menghasilkan produk Cartesian
oli %>% unnest(a) %>% unnest(b)

## # A tibble: 5 x 3
##   a         b     c
##   <chr> <dbl> <dbl>
## 1 a         1    11
## 2 a         2    11
## 3 b         1    11
## 4 b         2    11
## 5 c         3    22

#4. Memisahkan (splitting) dan menggabungkan (combining) kolom karakter.

# Jika Anda ingin membagi dengan nilai non-alfanumerik (default):
art <- data.frame(x = c(NA, "a.b", "a.d", "b.c"))
art

##      x
## 1 <NA>
## 2  a.b
## 3  a.d
## 4  b.c

#pisahkan jadi 2 komponen atau 2 variabel
art %>% separate(x, c("A", "B"))

##      A    B
## 1 <NA> <NA>
## 2    a    b
## 3    a    d
## 4    b    c

# Jika Anda hanya ingin variabel kedua:
art %>% separate(x, c(NA, "B"))

##      B
## 1 <NA>
## 2    b
## 3    d
## 4    c

# Jika setiap baris tidak dibagi menjadi jumlah bagian yang sama, gunakan argumen ekstra dan isi untuk mengontrol apa yang terjadi:
dfu <- data.frame(x = c("a", "a b", "a b c", NA))
dfu

##       x
## 1     a
## 2   a b
## 3 a b c
## 4  <NA>

dfu %>% separate(x, c("Tabuk", "b"))

## Warning: Expected 2 pieces. Additional pieces discarded in 1 rows [3].

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].

##   Tabuk    b
## 1     a <NA>
## 2     a    b
## 3     a    b
## 4  <NA> <NA>

# Langkah yang sama seperti sebelumnya, tetapi menghilangkan c tanpa peringatan:
dfu %>% separate(x, c("Tabuk", "b"), extra = "drop", fill = "right")

##   Tabuk    b
## 1     a <NA>
## 2     a    b
## 3     a    b
## 4  <NA> <NA>

# Berlawanan dengan sebelumnya, biarkan c dan isiannya tetap di kiri:
dfu %>% separate(x, c("Tabuk", "b"), extra = "merge", fill = "right") #jika fill=left maka a baris pertama akan muncul di sebelah kiri

##   Tabuk    b
## 1     a <NA>
## 2     a    b
## 3     a  b c
## 4  <NA> <NA>

# Atau Anda dapat menyimpan ketiganya:
dfu %>% separate(x, c("a", "b", "c"))

## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 2 rows [1, 2].

##      a    b    c
## 1    a <NA> <NA>
## 2    a    b <NA>
## 3    a    b    c
## 4 <NA> <NA> <NA>

# Untuk hanya membagi beberapa kali gunakan extra = "merge":
dfs <- data.frame(x = c("x: 123", "y: error: 7"))
dfs

##             x
## 1      x: 123
## 2 y: error: 7

#buat 2 variabel bernama key dan value 
dfs %>% separate(x, c("key", "value"), ": ", extra = "merge")

##   key    value
## 1   x      123
## 2   y error: 7

# Gunakan ekspresi reguler untuk memisahkan beberapa karakter:
dfe <- data.frame(x = c(NA, "a?b", "a.d", "b:c"))
dfe

##      x
## 1 <NA>
## 2  a?b
## 3  a.d
## 4  b:c

#hilangkan tanda . ? dan :
dfe %>% separate(x, c("A","B"), sep = "([.?:])")

##      A    B
## 1 <NA> <NA>
## 2    a    b
## 3    a    d
## 4    b    c

# convert = TRUE mendeteksi kelas kolom:
dfq <- data.frame(x = c("a:1", "a:2", "c:4", "d", NA))
dfq

##      x
## 1  a:1
## 2  a:2
## 3  c:4
## 4    d
## 5 <NA>

dfq %>% separate(x, c("key","value"), ":") %>% str

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [4].

## 'data.frame':    5 obs. of  2 variables:
##  $ key  : chr  "a" "a" "c" "d" ...
##  $ value: chr  "1" "2" "4" NA ...

dfq %>% separate(x, c("key","value"), ":", convert = TRUE) %>% str

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [4].

## 'data.frame':    5 obs. of  2 variables:
##  $ key  : chr  "a" "a" "c" "d" ...
##  $ value: int  1 2 4 NA NA

#Extract

dfw <- data.frame(x = c(NA, "a-b", "a-d", "b-c", "d-e"))
dfw

##      x
## 1 <NA>
## 2  a-b
## 3  a-d
## 4  b-c
## 5  d-e

#Ambil huruf pertama dan masukkan variabel bernama awal
dfw %>% extract(x, "Awal")

##   Awal
## 1 <NA>
## 2    a
## 3    a
## 4    b
## 5    d

#2 variabel dengan huruf yg dipisah
dfw %>% extract(x, c("Atas", "Bawah"), "([[:alnum:]]+)-([[:alnum:]]+)")

##   Atas Bawah
## 1 <NA>  <NA>
## 2    a     b
## 3    a     d
## 4    b     c
## 5    d     e

# Jika tidak ada yang cocok, NA: #syntax berikut tidak memuat e
dfw %>% extract(x, c("Atas", "Bawah"), "([a-d]+)-([a-d]+)")

##   Atas Bawah
## 1 <NA>  <NA>
## 2    a     b
## 3    a     d
## 4    b     c
## 5 <NA>  <NA>

#unite

#semacam perkalian matriks: expand_grid
dfr <- expand_grid(x = c("a", NA), y = c("b", NA))
dfr

## # A tibble: 4 x 2
##   x     y    
##   <chr> <chr>
## 1 a     b    
## 2 a     <NA> 
## 3 <NA>  b    
## 4 <NA>  <NA>

#buat variabel bernama z yg berisi penggabungan variabel x dan y
dfr %>% unite("z", x:y, remove = FALSE)

## # A tibble: 4 x 3
##   z     x     y    
##   <chr> <chr> <chr>
## 1 a_b   a     b    
## 2 a_NA  a     <NA> 
## 3 NA_b  <NA>  b    
## 4 NA_NA <NA>  <NA>

#Hilangkan NA
dfr %>% unite("z", x:y, na.rm = TRUE, remove = FALSE)

## # A tibble: 4 x 3
##   z     x     y    
##   <chr> <chr> <chr>
## 1 "a_b" a     b    
## 2 "a"   a     <NA> 
## 3 "b"   <NA>  b    
## 4 ""    <NA>  <NA>

# Pisahkan hampir melengkapi kesatuan
dfr %>%
  unite("xy", x:y) %>%  #satukan variabel x dan y
  separate(xy, c("x-ray", "yo")) #pisahkan jadi 2 kolom

## # A tibble: 4 x 2
##   `x-ray` yo   
##   <chr>   <chr>
## 1 a       b    
## 2 a       NA   
## 3 NA      b    
## 4 NA      NA

#5. Jadikan nilai implisit yang hilang menjadi eksplisit dengan complete (); membuat nilai yang hilang secara eksplisit implisit dengan drop_na (); ganti nilai yang hilang dengan nilai berikutnya / sebelumnya dengan fill (), atau nilai yang diketahui dengan replace_na ()

#complete()
dfl <- tibble(
  group = c(1:2, 1),
  kode_item = c(1:2, 2),
  nama_item = c("a", "b", "b"),
  nilai1 = 1:3,
  nilai2 = 4:6
)
dfl

## # A tibble: 3 x 5
##   group kode_item nama_item nilai1 nilai2
##   <dbl>     <dbl> <chr>      <int>  <int>
## 1     1         1 a              1      4
## 2     2         2 b              2      5
## 3     1         2 b              3      6

dfl %>% complete(group, nesting(kode_item, nama_item))

## # A tibble: 4 x 5
##   group kode_item nama_item nilai1 nilai2
##   <dbl>     <dbl> <chr>      <int>  <int>
## 1     1         1 a              1      4
## 2     1         2 b              3      6
## 3     2         1 a             NA     NA
## 4     2         2 b              2      5

# Anda juga dapat memilih untuk mengisi nilai yang hilang
dfl %>% complete(group, nesting(kode_item, nama_item), fill = list(value1 = 0))

## # A tibble: 4 x 5
##   group kode_item nama_item nilai1 nilai2
##   <dbl>     <dbl> <chr>      <int>  <int>
## 1     1         1 a              1      4
## 2     1         2 b              3      6
## 3     2         1 a             NA     NA
## 4     2         2 b              2      5

#drop_na
dfx <- tibble(x = c(1, 2, NA), y = c("a", NA, "b"))
dfx

## # A tibble: 3 x 2
##       x y    
##   <dbl> <chr>
## 1     1 a    
## 2     2 <NA> 
## 3    NA b

dfx %>% drop_na()

## # A tibble: 1 x 2
##       x y    
##   <dbl> <chr>
## 1     1 a

#hilangkan na pada x
dfx %>% drop_na(x)

## # A tibble: 2 x 2
##       x y    
##   <dbl> <chr>
## 1     1 a    
## 2     2 <NA>

vars <- "y"
dfx %>% drop_na(x, any_of(vars))

## # A tibble: 1 x 2
##       x y    
##   <dbl> <chr>
## 1     1 a

#fill
# Nilai (tahun) dicatat hanya jika berubah
sales <- tibble::tribble(
  ~quarter, ~year, ~sales,
  "Q1",    2000,    66013,
  "Q2",      NA,    69182,
  "Q3",      NA,    53175,
  "Q4",      NA,    21001,
  "Q1",    2001,    46036,
  "Q2",      NA,    58842,
  "Q3",      NA,    44568,
  "Q4",      NA,    50197,
  "Q1",    2002,    39113,
  "Q2",      NA,    41668,
  "Q3",      NA,    30144,
  "Q4",      NA,    52897,
  "Q1",    2004,    32129,
  "Q2",      NA,    67686,
  "Q3",      NA,    31768,
  "Q4",      NA,    49094
)
sales

## # A tibble: 16 x 3
##    quarter  year sales
##    <chr>   <dbl> <dbl>
##  1 Q1       2000 66013
##  2 Q2         NA 69182
##  3 Q3         NA 53175
##  4 Q4         NA 21001
##  5 Q1       2001 46036
##  6 Q2         NA 58842
##  7 Q3         NA 44568
##  8 Q4         NA 50197
##  9 Q1       2002 39113
## 10 Q2         NA 41668
## 11 Q3         NA 30144
## 12 Q4         NA 52897
## 13 Q1       2004 32129
## 14 Q2         NA 67686
## 15 Q3         NA 31768
## 16 Q4         NA 49094

# `fill ()` default untuk mengganti data yang hilang dari atas ke bawah
sales %>% fill(year)

## # A tibble: 16 x 3
##    quarter  year sales
##    <chr>   <dbl> <dbl>
##  1 Q1       2000 66013
##  2 Q2       2000 69182
##  3 Q3       2000 53175
##  4 Q4       2000 21001
##  5 Q1       2001 46036
##  6 Q2       2001 58842
##  7 Q3       2001 44568
##  8 Q4       2001 50197
##  9 Q1       2002 39113
## 10 Q2       2002 41668
## 11 Q3       2002 30144
## 12 Q4       2002 52897
## 13 Q1       2004 32129
## 14 Q2       2004 67686
## 15 Q3       2004 31768
## 16 Q4       2004 49094

# Nilai (pet_type) tidak ada di atas
tidy_pets <- tibble::tribble(
  ~rank, ~pet_type, ~breed,
  1L,        NA,    "Boston Terrier",
  2L,        NA,    "Retrievers (Labrador)",
  3L,        NA,    "Retrievers (Golden)",
  4L,        NA,    "French Bulldogs",
  5L,        NA,    "Bulldogs",
  6L,     "Dog",    "Beagles",
  1L,        NA,    "Persian",
  2L,        NA,    "Maine Coon",
  3L,        NA,    "Ragdoll",
  4L,        NA,    "Exotic",
  5L,        NA,    "Siamese",
  6L,     "Cat",    "American Short"
)
tidy_pets

## # A tibble: 12 x 3
##     rank pet_type breed                
##    <int> <chr>    <chr>                
##  1     1 <NA>     Boston Terrier       
##  2     2 <NA>     Retrievers (Labrador)
##  3     3 <NA>     Retrievers (Golden)  
##  4     4 <NA>     French Bulldogs      
##  5     5 <NA>     Bulldogs             
##  6     6 Dog      Beagles              
##  7     1 <NA>     Persian              
##  8     2 <NA>     Maine Coon           
##  9     3 <NA>     Ragdoll              
## 10     4 <NA>     Exotic               
## 11     5 <NA>     Siamese              
## 12     6 Cat      American Short

## Untuk nilai yang hilang di atas, Anda dapat menggunakan `.direction =" up "`
tidy_pets %>%
  fill(pet_type, .direction = "up")

## # A tibble: 12 x 3
##     rank pet_type breed                
##    <int> <chr>    <chr>                
##  1     1 Dog      Boston Terrier       
##  2     2 Dog      Retrievers (Labrador)
##  3     3 Dog      Retrievers (Golden)  
##  4     4 Dog      French Bulldogs      
##  5     5 Dog      Bulldogs             
##  6     6 Dog      Beagles              
##  7     1 Cat      Persian              
##  8     2 Cat      Maine Coon           
##  9     3 Cat      Ragdoll              
## 10     4 Cat      Exotic               
## 11     5 Cat      Siamese              
## 12     6 Cat      American Short

# Nilai (n_squirrels) hilang di atas dan di bawah dalam grup
squirrels <- tibble::tribble(
  ~group,    ~name,     ~role,     ~n_squirrels,
  1,      "Sam",    "Observer",   NA,
  1,     "Mara", "Scorekeeper",    8,
  1,    "Jesse",    "Observer",   NA,
  1,      "Tom",    "Observer",   NA,
  2,     "Mike",    "Observer",   NA,
  2,  "Rachael",    "Observer",   NA,
  2,  "Sydekea", "Scorekeeper",   14,
  2, "Gabriela",    "Observer",   NA,
  3,  "Derrick",    "Observer",   NA,
  3,     "Kara", "Scorekeeper",    9,
  3,    "Emily",    "Observer",   NA,
  3, "Danielle",    "Observer",   NA
)
squirrels

## # A tibble: 12 x 4
##    group name     role        n_squirrels
##    <dbl> <chr>    <chr>             <dbl>
##  1     1 Sam      Observer             NA
##  2     1 Mara     Scorekeeper           8
##  3     1 Jesse    Observer             NA
##  4     1 Tom      Observer             NA
##  5     2 Mike     Observer             NA
##  6     2 Rachael  Observer             NA
##  7     2 Sydekea  Scorekeeper          14
##  8     2 Gabriela Observer             NA
##  9     3 Derrick  Observer             NA
## 10     3 Kara     Scorekeeper           9
## 11     3 Emily    Observer             NA
## 12     3 Danielle Observer             NA

# Nilai hilang secara tidak konsisten berdasarkan posisi dalam grup
# Gunakan .direction = "downup" untuk mengisi nilai yang hilang di kedua arah 
squirrels %>%
  dplyr::group_by(group) %>%  #library(dplyr) dan kelompokkan berdasarkan group
  fill(n_squirrels, .direction = "downup") %>%
  dplyr::ungroup()

## # A tibble: 12 x 4
##    group name     role        n_squirrels
##    <dbl> <chr>    <chr>             <dbl>
##  1     1 Sam      Observer              8
##  2     1 Mara     Scorekeeper           8
##  3     1 Jesse    Observer              8
##  4     1 Tom      Observer              8
##  5     2 Mike     Observer             14
##  6     2 Rachael  Observer             14
##  7     2 Sydekea  Scorekeeper          14
##  8     2 Gabriela Observer             14
##  9     3 Derrick  Observer              9
## 10     3 Kara     Scorekeeper           9
## 11     3 Emily    Observer              9
## 12     3 Danielle Observer              9

# Menggunakan `.direction =" updown "` menyelesaikan tujuan yang sama dalam contoh ini

#replace_na()
dfc <- tibble(x = c(1, 2, NA), y = c("a", NA, "b"))
dfc

## # A tibble: 3 x 2
##       x y    
##   <dbl> <chr>
## 1     1 a    
## 2     2 <NA> 
## 3    NA b

#ganti NA di x = 0 dan y = unknown
dfc %>% replace_na(list(x = 0, y = "unknown"))

## # A tibble: 3 x 2
##       x y      
##   <dbl> <chr>  
## 1     1 a      
## 2     2 unknown
## 3     0 b

# Ganti NAs dalam vektor
#Misal ganti NA di variabel x dengan 0
dfc %>% dplyr::mutate(x = replace_na(x, 0))

## # A tibble: 3 x 2
##       x y    
##   <dbl> <chr>
## 1     1 a    
## 2     2 <NA> 
## 3     0 b

#Ganti NA di variabel x dengan 0
dfc$x %>% replace_na(0)

## [1] 1 2 0

# Ganti NULL dalam daftar: NULL adalah list-col yang setara dengan NAs
df_list <- tibble(z = list(1:5, NULL, 10:20))
df_list

## # A tibble: 3 x 1
##   z         
##   <list>    
## 1 <int [5]> 
## 2 <NULL>    
## 3 <int [11]>

df_list %>% replace_na(list(z = list(5)))

## # A tibble: 3 x 1
##   z         
##   <list>    
## 1 <int [5]> 
## 2 <dbl [1]> 
## 3 <int [11]>

Tidyr

Qorinul Huda

January 22, 2021