library(datasets)
data("CO2")
CO2 <- tibble::as_tibble(CO2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
class(CO2)
## [1] "tbl_df"     "tbl"        "data.frame"
View(CO2)
head(CO2)
## # A tibble: 6 × 5
##   Plant Type   Treatment   conc uptake
##   <ord> <fct>  <fct>      <dbl>  <dbl>
## 1 Qn1   Quebec nonchilled    95   16  
## 2 Qn1   Quebec nonchilled   175   30.4
## 3 Qn1   Quebec nonchilled   250   34.8
## 4 Qn1   Quebec nonchilled   350   37.2
## 5 Qn1   Quebec nonchilled   500   35.3
## 6 Qn1   Quebec nonchilled   675   39.2
glimpse(CO2)
## Rows: 84
## Columns: 5
## $ Plant     <ord> Qn1, Qn1, Qn1, Qn1, Qn1, Qn1, Qn1, Qn2, Qn2, Qn2, Qn2, Qn2, …
## $ Type      <fct> Quebec, Quebec, Quebec, Quebec, Quebec, Quebec, Quebec, Queb…
## $ Treatment <fct> nonchilled, nonchilled, nonchilled, nonchilled, nonchilled, …
## $ conc      <dbl> 95, 175, 250, 350, 500, 675, 1000, 95, 175, 250, 350, 500, 6…
## $ uptake    <dbl> 16.0, 30.4, 34.8, 37.2, 35.3, 39.2, 39.7, 13.6, 27.3, 37.1, …

Filtering

Mengambil data CO2 dengan uptake lebih dari 34.0

filtered_CO2 <- filter(CO2, uptake > 34.0)
head(filtered_CO2)
## # A tibble: 6 × 5
##   Plant Type   Treatment   conc uptake
##   <ord> <fct>  <fct>      <dbl>  <dbl>
## 1 Qn1   Quebec nonchilled   250   34.8
## 2 Qn1   Quebec nonchilled   350   37.2
## 3 Qn1   Quebec nonchilled   500   35.3
## 4 Qn1   Quebec nonchilled   675   39.2
## 5 Qn1   Quebec nonchilled  1000   39.7
## 6 Qn2   Quebec nonchilled   250   37.1

Mengambil data CO2 dimana Plant sama dengan “Qn2”

CO2 %>% filter(Plant=="Qn2")
## # A tibble: 7 × 5
##   Plant Type   Treatment   conc uptake
##   <ord> <fct>  <fct>      <dbl>  <dbl>
## 1 Qn2   Quebec nonchilled    95   13.6
## 2 Qn2   Quebec nonchilled   175   27.3
## 3 Qn2   Quebec nonchilled   250   37.1
## 4 Qn2   Quebec nonchilled   350   41.8
## 5 Qn2   Quebec nonchilled   500   40.6
## 6 Qn2   Quebec nonchilled   675   41.4
## 7 Qn2   Quebec nonchilled  1000   44.3

Mengambil data Plant, Type dan Conc

CO2 %>% select(Plant,Type,conc)
## # A tibble: 84 × 3
##    Plant Type    conc
##    <ord> <fct>  <dbl>
##  1 Qn1   Quebec    95
##  2 Qn1   Quebec   175
##  3 Qn1   Quebec   250
##  4 Qn1   Quebec   350
##  5 Qn1   Quebec   500
##  6 Qn1   Quebec   675
##  7 Qn1   Quebec  1000
##  8 Qn2   Quebec    95
##  9 Qn2   Quebec   175
## 10 Qn2   Quebec   250
## # ℹ 74 more rows

Mengambil data CO2 kecuali kolom Treatment dan conc, sehingga data yang dihasilkan akan memuat semua kolom kecuali kedua kolom tersebut.

CO2 %>% select(-Treatment,-conc)
## # A tibble: 84 × 3
##    Plant Type   uptake
##    <ord> <fct>   <dbl>
##  1 Qn1   Quebec   16  
##  2 Qn1   Quebec   30.4
##  3 Qn1   Quebec   34.8
##  4 Qn1   Quebec   37.2
##  5 Qn1   Quebec   35.3
##  6 Qn1   Quebec   39.2
##  7 Qn1   Quebec   39.7
##  8 Qn2   Quebec   13.6
##  9 Qn2   Quebec   27.3
## 10 Qn2   Quebec   37.1
## # ℹ 74 more rows

Arranging

Mengurutkan data uptake dari nilai terkecil

CO2 %>% arrange (uptake)
## # A tibble: 84 × 5
##    Plant Type        Treatment   conc uptake
##    <ord> <fct>       <fct>      <dbl>  <dbl>
##  1 Mc2   Mississippi chilled       95    7.7
##  2 Qc2   Quebec      chilled       95    9.3
##  3 Mc1   Mississippi chilled       95   10.5
##  4 Mn1   Mississippi nonchilled    95   10.6
##  5 Mc3   Mississippi chilled       95   10.6
##  6 Mn3   Mississippi nonchilled    95   11.3
##  7 Mc2   Mississippi chilled      175   11.4
##  8 Mn2   Mississippi nonchilled    95   12  
##  9 Mc2   Mississippi chilled      250   12.3
## 10 Mc2   Mississippi chilled      500   12.5
## # ℹ 74 more rows

Mengurutkan data uptake dari nilai terbesar

CO2 %>% arrange(desc(uptake))
## # A tibble: 84 × 5
##    Plant Type   Treatment   conc uptake
##    <ord> <fct>  <fct>      <dbl>  <dbl>
##  1 Qn3   Quebec nonchilled  1000   45.5
##  2 Qn2   Quebec nonchilled  1000   44.3
##  3 Qn3   Quebec nonchilled   675   43.9
##  4 Qn3   Quebec nonchilled   500   42.9
##  5 Qc2   Quebec chilled     1000   42.4
##  6 Qn3   Quebec nonchilled   350   42.1
##  7 Qn2   Quebec nonchilled   350   41.8
##  8 Qn2   Quebec nonchilled   675   41.4
##  9 Qc3   Quebec chilled     1000   41.4
## 10 Qn2   Quebec nonchilled   500   40.6
## # ℹ 74 more rows

Reshaping

library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.2

Mengubah dataset dari format “wide” ke format “long” dengan cara menggabungkan nilai kolom conc dan uptake ke dalam satu kolom baru bernama Value, sambil menyimpan nama kolom asli dalam kolom Measurement

long_CO2 <- pivot_longer(CO2, cols = c(conc, uptake), names_to = "Measurement", values_to = "Value")
head(long_CO2)
## # A tibble: 6 × 5
##   Plant Type   Treatment  Measurement Value
##   <ord> <fct>  <fct>      <chr>       <dbl>
## 1 Qn1   Quebec nonchilled conc         95  
## 2 Qn1   Quebec nonchilled uptake       16  
## 3 Qn1   Quebec nonchilled conc        175  
## 4 Qn1   Quebec nonchilled uptake       30.4
## 5 Qn1   Quebec nonchilled conc        250  
## 6 Qn1   Quebec nonchilled uptake       34.8

Merging

Menambahkan informasi baru dari dataset lain (additional_data) ke dataset utama (CO2) berdasarkan kecocokan nilai di kolom kunci (Treatment).

additional_data <- data.frame(Treatment = c("nonchilled", "chilled"), Info = c("Type A", "Type B"))
merged_CO2 <- left_join(CO2, additional_data, by = "Treatment")
head(merged_CO2)
## # A tibble: 6 × 6
##   Plant Type   Treatment   conc uptake Info  
##   <ord> <fct>  <chr>      <dbl>  <dbl> <chr> 
## 1 Qn1   Quebec nonchilled    95   16   Type A
## 2 Qn1   Quebec nonchilled   175   30.4 Type A
## 3 Qn1   Quebec nonchilled   250   34.8 Type A
## 4 Qn1   Quebec nonchilled   350   37.2 Type A
## 5 Qn1   Quebec nonchilled   500   35.3 Type A
## 6 Qn1   Quebec nonchilled   675   39.2 Type A

Aggregating

Menghitung summary statistics dari data uptake

CO2 %>% group_by(Plant) %>% summarize (mean=mean(uptake))
## # A tibble: 12 × 2
##    Plant  mean
##    <ord> <dbl>
##  1 Qn1    33.2
##  2 Qn2    35.2
##  3 Qn3    37.6
##  4 Qc1    30.0
##  5 Qc3    32.6
##  6 Qc2    32.7
##  7 Mn3    24.1
##  8 Mn2    27.3
##  9 Mn1    26.4
## 10 Mc2    12.1
## 11 Mc3    17.3
## 12 Mc1    18

Feature Engineering

Memfilter data untuk menampilkan data CO2 dimana Type adalah “Quebec”

CO2 %>% filter(Type=="Quebec")
## # A tibble: 42 × 5
##    Plant Type   Treatment   conc uptake
##    <ord> <fct>  <fct>      <dbl>  <dbl>
##  1 Qn1   Quebec nonchilled    95   16  
##  2 Qn1   Quebec nonchilled   175   30.4
##  3 Qn1   Quebec nonchilled   250   34.8
##  4 Qn1   Quebec nonchilled   350   37.2
##  5 Qn1   Quebec nonchilled   500   35.3
##  6 Qn1   Quebec nonchilled   675   39.2
##  7 Qn1   Quebec nonchilled  1000   39.7
##  8 Qn2   Quebec nonchilled    95   13.6
##  9 Qn2   Quebec nonchilled   175   27.3
## 10 Qn2   Quebec nonchilled   250   37.1
## # ℹ 32 more rows

Mengambil data CO2 kecuali kolom Type dan Treatment, kemudian membuat variabel baru bernama “efficiency” merupakan hasil bagi dari variable conc dan uptake

CO2baru <- CO2 %>% select(-Type, -Treatment) %>% mutate(efficiency= conc/uptake)
CO2baru
## # A tibble: 84 × 4
##    Plant  conc uptake efficiency
##    <ord> <dbl>  <dbl>      <dbl>
##  1 Qn1      95   16         5.94
##  2 Qn1     175   30.4       5.76
##  3 Qn1     250   34.8       7.18
##  4 Qn1     350   37.2       9.41
##  5 Qn1     500   35.3      14.2 
##  6 Qn1     675   39.2      17.2 
##  7 Qn1    1000   39.7      25.2 
##  8 Qn2      95   13.6       6.99
##  9 Qn2     175   27.3       6.41
## 10 Qn2     250   37.1       6.74
## # ℹ 74 more rows

Penggunaan 2 fungsi bersamaan

Memfilter data CO2 untuk mendapatkan data uptake lebih dari 34.0, kemudian mengurutkan hasilnya berdasarkan data conc secara descending

result1 <- CO2 %>%
  filter(uptake > 34.0) %>%
  arrange(desc(conc))
result1
## # A tibble: 28 × 5
##    Plant Type        Treatment   conc uptake
##    <ord> <fct>       <fct>      <dbl>  <dbl>
##  1 Qn1   Quebec      nonchilled  1000   39.7
##  2 Qn2   Quebec      nonchilled  1000   44.3
##  3 Qn3   Quebec      nonchilled  1000   45.5
##  4 Qc1   Quebec      chilled     1000   38.7
##  5 Qc2   Quebec      chilled     1000   42.4
##  6 Qc3   Quebec      chilled     1000   41.4
##  7 Mn1   Mississippi nonchilled  1000   35.5
##  8 Qn1   Quebec      nonchilled   675   39.2
##  9 Qn2   Quebec      nonchilled   675   41.4
## 10 Qn3   Quebec      nonchilled   675   43.9
## # ℹ 18 more rows