Import library
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(datasets)
Import dan menampilkan dataset
# import dataset PlantGrowth
data("PlantGrowth")
PlantGrowth <- tibble::as.tibble(PlantGrowth)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
PlantGrowth
## # A tibble: 30 × 2
## weight group
## <dbl> <fct>
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.5 ctrl
## 6 4.61 ctrl
## 7 5.17 ctrl
## 8 4.53 ctrl
## 9 5.33 ctrl
## 10 5.14 ctrl
## # ℹ 20 more rows
Data Aggregating; summarize rata-rata berat
PlantGrowth %>% summarize (mean=mean(weight))
## # A tibble: 1 × 1
## mean
## <dbl>
## 1 5.07
Arrange data secara ascending berdasarkan berat
PlantGrowth %>% arrange (weight)
## # A tibble: 30 × 2
## weight group
## <dbl> <fct>
## 1 3.59 trt1
## 2 3.83 trt1
## 3 4.17 ctrl
## 4 4.17 trt1
## 5 4.32 trt1
## 6 4.41 trt1
## 7 4.5 ctrl
## 8 4.53 ctrl
## 9 4.61 ctrl
## 10 4.69 trt1
## # ℹ 20 more rows
Memfilter data berdasarkan grup “trt1”
PlantGrowth %>% filter(group == "trt1")
## # A tibble: 10 × 2
## weight group
## <dbl> <fct>
## 1 4.81 trt1
## 2 4.17 trt1
## 3 4.41 trt1
## 4 3.59 trt1
## 5 5.87 trt1
## 6 3.83 trt1
## 7 6.03 trt1
## 8 4.89 trt1
## 9 4.32 trt1
## 10 4.69 trt1
Feature engineering menggunakan mutate
mutated_plantgrowth <- PlantGrowth %>%
mutate(weight_grams = weight * 1000)
mutated_plantgrowth
## # A tibble: 30 × 3
## weight group weight_grams
## <dbl> <fct> <dbl>
## 1 4.17 ctrl 4170
## 2 5.58 ctrl 5580
## 3 5.18 ctrl 5180
## 4 6.11 ctrl 6110
## 5 4.5 ctrl 4500
## 6 4.61 ctrl 4610
## 7 5.17 ctrl 5170
## 8 4.53 ctrl 4530
## 9 5.33 ctrl 5330
## 10 5.14 ctrl 5140
## # ℹ 20 more rows
Select kolom weight dan group
PlantGrowth %>% select(weight, group)
## # A tibble: 30 × 2
## weight group
## <dbl> <fct>
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.5 ctrl
## 6 4.61 ctrl
## 7 5.17 ctrl
## 8 4.53 ctrl
## 9 5.33 ctrl
## 10 5.14 ctrl
## # ℹ 20 more rows
Penggunaan fungsi filter() dan arrange()
result1 <- PlantGrowth %>%
filter(group == "trt1") %>%
arrange(desc(weight))
result1
## # A tibble: 10 × 2
## weight group
## <dbl> <fct>
## 1 6.03 trt1
## 2 5.87 trt1
## 3 4.89 trt1
## 4 4.81 trt1
## 5 4.69 trt1
## 6 4.41 trt1
## 7 4.32 trt1
## 8 4.17 trt1
## 9 3.83 trt1
## 10 3.59 trt1