library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
library(tidyverse)
Salah satu cara mengimpor data adalah menggunakan fungsi
read_csv() dari package readr.
dem_score <- read_csv("https://moderndive.com/data/dem_score.csv")
## Rows: 96 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (9): 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dem_score
## # A tibble: 96 × 10
## country `1952` `1957` `1962` `1967` `1972` `1977` `1982` `1987` `1992`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Albania -9 -9 -9 -9 -9 -9 -9 -9 5
## 2 Argentina -9 -1 -1 -9 -9 -9 -8 8 7
## 3 Armenia -9 -7 -7 -7 -7 -7 -7 -7 7
## 4 Australia 10 10 10 10 10 10 10 10 10
## 5 Austria 10 10 10 10 10 10 10 10 10
## 6 Azerbaijan -9 -7 -7 -7 -7 -7 -7 -7 1
## 7 Belarus -9 -7 -7 -7 -7 -7 -7 -7 7
## 8 Belgium 10 10 10 10 10 10 10 10 10
## 9 Bhutan -10 -10 -10 -10 -10 -10 -10 -10 -10
## 10 Bolivia -4 -3 -3 -4 -7 -7 8 9 9
## # ℹ 86 more rows
Nilai berkisar dari:
Karakteristik utama tidy data:
Dengan format ini, analisis data dan visualisasi dapat dilakukan lebih mudah.
Contoh data berikut tidak tidy:
drinks <- tibble(
country = c("China","Italy","Saudi Arabia","USA"),
beer = c(79,85,0,249),
spirit = c(192,42,5,158),
wine = c(8,237,0,84)
)
drinks
## # A tibble: 4 × 4
## country beer spirit wine
## <chr> <dbl> <dbl> <dbl>
## 1 China 79 192 8
## 2 Italy 85 42 237
## 3 Saudi Arabia 0 5 0
## 4 USA 249 158 84
Masalahnya:
Kita dapat menggunakan pivot_longer() dari package
tidyr.
drinks_tidy <- drinks %>%
pivot_longer(
cols = beer:wine,
names_to = "type",
values_to = "servings"
)
drinks_tidy
## # A tibble: 12 × 3
## country type servings
## <chr> <chr> <dbl>
## 1 China beer 79
## 2 China spirit 192
## 3 China wine 8
## 4 Italy beer 85
## 5 Italy spirit 42
## 6 Italy wine 237
## 7 Saudi Arabia beer 0
## 8 Saudi Arabia spirit 5
## 9 Saudi Arabia wine 0
## 10 USA beer 249
## 11 USA spirit 158
## 12 USA wine 84
Sebagian besar workflow data science di R menggunakan format tidy.