library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
| Fungsi dalam dplyr | Fungsi dalam SQL | Keterangan |
|---|---|---|
| select() | SELECT | Menyeleksi kolom variabel |
| filter() | WHERE | Menyaring (filter) baris |
| group_by() | GROUP_BY | Mengelompokkan data |
| summarise() | tidak ada | Merangkum data |
| arrange() | ORDER_BY | Mengurutkan data |
| mutate() | COLUMN ALIAS | Membuat kolom baru |
| join() | JOIN | Menggabungkan data frame |
Gambar 2 data
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
3, "x3"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
4, "y3"
)
x
## # A tibble: 3 x 2
## key val_x
## <dbl> <chr>
## 1 1 x1
## 2 2 x2
## 3 3 x3
y
## # A tibble: 3 x 2
## key val_y
## <dbl> <chr>
## 1 1 y1
## 2 2 y2
## 3 4 y3
Gambar Relational 2 data terhadap variabel “key” dan hasil relational nya
x %>%
inner_join(y, by = "key")
## # A tibble: 2 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
Gambar Relational 2 data dengan Left, Right, Full dan hasil relational
Gambar Relational 2 data dengan left_joint dan hasil relational
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
3, "x3"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
4, "y3"
)
left_join(x,y)
## Joining, by = "key"
## # A tibble: 3 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 3 x3 <NA>
Gambar Relational 2 data dengan right_joint dan hasil relational
right_join(x,y)
## Joining, by = "key"
## # A tibble: 3 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 4 <NA> y3
Gambar Relational 2 data dengan full_joint dan hasil relational
full_join(x,y)
## Joining, by = "key"
## # A tibble: 4 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 3 x3 <NA>
## 4 4 <NA> y3
Gambar Relational 2 data dengan diagram Venn
Gambar Duplicate Key 2 data
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
1, "x4"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2"
)
left_join(x, y, by = "key")
## # A tibble: 4 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 2 x3 y2
## 4 1 x4 y1
Gambar Duplicate Key antar data set
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
3, "x4"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
2, "y3",
3, "y4"
)
left_join(x, y, by = "key")
## # A tibble: 6 x 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 2 x2 y3
## 4 2 x3 y2
## 5 2 x3 y3
## 6 3 x4 y4
Gambar Relational semi-joint 2 data
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
3, "x3"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
4, "y3"
)
semi_join(x, y, by = "key")
## # A tibble: 2 x 2
## key val_x
## <dbl> <chr>
## 1 1 x1
## 2 2 x2
Gambar semi-joint antar 2 data set
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
3, "x4"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
2, "y3",
3, "y4"
)
semi_join(x, y, by = "key")
## # A tibble: 4 x 2
## key val_x
## <dbl> <chr>
## 1 1 x1
## 2 2 x2
## 3 2 x3
## 4 3 x4
Gambar anti-joint antar 2 data
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
3, "x3"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
4, "y3"
)
anti_join(x, y, by = "key")
## # A tibble: 1 x 2
## key val_x
## <dbl> <chr>
## 1 3 x3
df1 <- tribble(
~x, ~y,
1, 1,
2, 1
)
df2 <- tribble(
~x, ~y,
1, 1,
1, 2
)
df1
## # A tibble: 2 x 2
## x y
## <dbl> <dbl>
## 1 1 1
## 2 2 1
df2
## # A tibble: 2 x 2
## x y
## <dbl> <dbl>
## 1 1 1
## 2 1 2
intersect(df1, df2)
## # A tibble: 1 x 2
## x y
## <dbl> <dbl>
## 1 1 1
union(df1, df2)
## # A tibble: 3 x 2
## x y
## <dbl> <dbl>
## 1 1 1
## 2 2 1
## 3 1 2
setdiff(df1, df2)
## # A tibble: 1 x 2
## x y
## <dbl> <dbl>
## 1 2 1
setdiff(df2, df1)
## # A tibble: 1 x 2
## x y
## <dbl> <dbl>
## 1 1 2
| dplyr | merge |
|---|---|
| inner_join(x, y) | merge(x, y) |
| left_join(x, y) | merge(x, y, all.x = TRUE) |
| right_join(x, y) | merge(x, y, all.y = TRUE) |
| full_join(x, y) | merge(x, y, all.x = TRUE, all.y = TRUE) |
| dplyr | SQL |
|---|---|
| inner_join(x, y) | SELECT * FROM x INNER JOIN y USING (z) |
| left_join(x, y) | SELECT * FROM x LEFT OUTER JOIN y USING (z) |
| right_join(x, y) | SELECT * FROM x RIGHT OUTER JOIN y USING (z) |
| full_join(x, y) | SELECT * FROM x FULL OUTER JOIN y USING (z) |