library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(datasets)
data(airquality)
airquality <- tibble::as.tibble(airquality)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(dplyr)
class(airquality)
## [1] "tbl_df"     "tbl"        "data.frame"
view(airquality)
head(airquality)
## # A tibble: 6 × 6
##   Ozone Solar.R  Wind  Temp Month   Day
##   <int>   <int> <dbl> <int> <int> <int>
## 1    41     190   7.4    67     5     1
## 2    36     118   8      72     5     2
## 3    12     149  12.6    74     5     3
## 4    18     313  11.5    62     5     4
## 5    NA      NA  14.3    56     5     5
## 6    28      NA  14.9    66     5     6
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone   <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind    <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp    <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month   <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…

Data Filtering

Mengambil kualitas udara dengan temperatur lebih dari 60.0 fahrenheit

filtered_airquality <- filter(airquality, Temp > 60.0)
head(filtered_airquality)
## # A tibble: 6 × 6
##   Ozone Solar.R  Wind  Temp Month   Day
##   <int>   <int> <dbl> <int> <int> <int>
## 1    41     190   7.4    67     5     1
## 2    36     118   8      72     5     2
## 3    12     149  12.6    74     5     3
## 4    18     313  11.5    62     5     4
## 5    28      NA  14.9    66     5     6
## 6    23     299   8.6    65     5     7

Mengambil data dari Ozone, Solar.r, Wind, Temp, Month, Day

airquality %>% filter(Month==5)
## # A tibble: 31 × 6
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    41     190   7.4    67     5     1
##  2    36     118   8      72     5     2
##  3    12     149  12.6    74     5     3
##  4    18     313  11.5    62     5     4
##  5    NA      NA  14.3    56     5     5
##  6    28      NA  14.9    66     5     6
##  7    23     299   8.6    65     5     7
##  8    19      99  13.8    59     5     8
##  9     8      19  20.1    61     5     9
## 10    NA     194   8.6    69     5    10
## # ℹ 21 more rows
airquality %>% select(Ozone, Solar.R, Wind, Temp, Month, Day)
## # A tibble: 153 × 6
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    41     190   7.4    67     5     1
##  2    36     118   8      72     5     2
##  3    12     149  12.6    74     5     3
##  4    18     313  11.5    62     5     4
##  5    NA      NA  14.3    56     5     5
##  6    28      NA  14.9    66     5     6
##  7    23     299   8.6    65     5     7
##  8    19      99  13.8    59     5     8
##  9     8      19  20.1    61     5     9
## 10    NA     194   8.6    69     5    10
## # ℹ 143 more rows
airquality %>% select(-Solar.R, -Day)
## # A tibble: 153 × 4
##    Ozone  Wind  Temp Month
##    <int> <dbl> <int> <int>
##  1    41   7.4    67     5
##  2    36   8      72     5
##  3    12  12.6    74     5
##  4    18  11.5    62     5
##  5    NA  14.3    56     5
##  6    28  14.9    66     5
##  7    23   8.6    65     5
##  8    19  13.8    59     5
##  9     8  20.1    61     5
## 10    NA   8.6    69     5
## # ℹ 143 more rows

Data Arranging

Mengurutkan berdasarkan peubah Wind dari nilai terkecil

airquality %>% arrange(Wind)
## # A tibble: 153 × 6
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    NA      59   1.7    76     6    22
##  2   118     225   2.3    94     8    29
##  3    73     183   2.8    93     9     3
##  4   168     238   3.4    81     8    25
##  5   122     255   4      89     8     7
##  6   135     269   4.1    84     7     1
##  7    NA      91   4.6    76     6    23
##  8    64     175   4.6    83     7     5
##  9    66      NA   4.6    87     8     6
## 10    91     189   4.6    93     9     4
## # ℹ 143 more rows

Mengurutkan berdasaran peubah Wind dari nilai terbesar

airquality %>% arrange(desc(Wind))
## # A tibble: 153 × 6
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    37     284  20.7    72     6    17
##  2     8      19  20.1    61     5     9
##  3     6      78  18.4    57     5    18
##  4    11     320  16.6    73     5    22
##  5    NA      66  16.6    57     5    25
##  6    14      20  16.6    63     9    25
##  7    NA     242  16.1    67     6     3
##  8    21     259  15.5    77     8    21
##  9    32      92  15.5    84     9     6
## 10    21     259  15.5    76     9    12
## # ℹ 143 more rows

Data Reshaping

Mengubah data dari format wide ke long

long_airquality <- pivot_longer(airquality, cols = c(Ozone, Solar.R, Wind, Temp, Month, Day), names_to = "Measurement", values_to = "Value")
head(long_airquality)
## # A tibble: 6 × 2
##   Measurement Value
##   <chr>       <dbl>
## 1 Ozone        41  
## 2 Solar.R     190  
## 3 Wind          7.4
## 4 Temp         67  
## 5 Month         5  
## 6 Day           1

Data Merging

Menambahkan contoh data eksternal

additional_data <- data.frame(Month = c(5, 6, 7, 8, 9), Info = c("Type A", "Type B", "Type C", "Type D", "Type E"))
merged_airquality <- left_join(airquality, additional_data, by = "Month")
head(merged_airquality)
## # A tibble: 6 × 7
##   Ozone Solar.R  Wind  Temp Month   Day Info  
##   <int>   <int> <dbl> <int> <dbl> <int> <chr> 
## 1    41     190   7.4    67     5     1 Type A
## 2    36     118   8      72     5     2 Type A
## 3    12     149  12.6    74     5     3 Type A
## 4    18     313  11.5    62     5     4 Type A
## 5    NA      NA  14.3    56     5     5 Type A
## 6    28      NA  14.9    66     5     6 Type A

Data Aggregating

Menghitung rata-rata Temp tiap bulan

airquality %>% group_by(Month) %>% summarize (mean=mean(Temp))
## # A tibble: 5 × 2
##   Month  mean
##   <int> <dbl>
## 1     5  65.5
## 2     6  79.1
## 3     7  83.9
## 4     8  84.0
## 5     9  76.9

Feature Engineering (Variable Trnasformation)

Menambah peubah baru

airquality %>% filter(Month==6)
## # A tibble: 30 × 6
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    NA     286   8.6    78     6     1
##  2    NA     287   9.7    74     6     2
##  3    NA     242  16.1    67     6     3
##  4    NA     186   9.2    84     6     4
##  5    NA     220   8.6    85     6     5
##  6    NA     264  14.3    79     6     6
##  7    29     127   9.7    82     6     7
##  8    NA     273   6.9    87     6     8
##  9    71     291  13.8    90     6     9
## 10    39     323  11.5    87     6    10
## # ℹ 20 more rows
airqualitybaru <- airquality %>% select(-Month, -Day) %>% mutate(quality=Ozone-Wind)
airqualitybaru
## # A tibble: 153 × 5
##    Ozone Solar.R  Wind  Temp quality
##    <int>   <int> <dbl> <int>   <dbl>
##  1    41     190   7.4    67  33.6  
##  2    36     118   8      72  28    
##  3    12     149  12.6    74  -0.600
##  4    18     313  11.5    62   6.5  
##  5    NA      NA  14.3    56  NA    
##  6    28      NA  14.9    66  13.1  
##  7    23     299   8.6    65  14.4  
##  8    19      99  13.8    59   5.2  
##  9     8      19  20.1    61 -12.1  
## 10    NA     194   8.6    69  NA    
## # ℹ 143 more rows
engineered_airquality <- mutate(airquality, Impact_Solar_Ozone = Ozone * Solar.R)
head(engineered_airquality)
## # A tibble: 6 × 7
##   Ozone Solar.R  Wind  Temp Month   Day Impact_Solar_Ozone
##   <int>   <int> <dbl> <int> <int> <int>              <int>
## 1    41     190   7.4    67     5     1               7790
## 2    36     118   8      72     5     2               4248
## 3    12     149  12.6    74     5     3               1788
## 4    18     313  11.5    62     5     4               5634
## 5    NA      NA  14.3    56     5     5                 NA
## 6    28      NA  14.9    66     5     6                 NA

Membuat variabel baru yang merupakan hasil perkalian Ozone dengan Solar.r

Pengguaan 2 Fungsi Bersamaan

Menyaring data airquality untuk mendapatkan nilai wind lebih besar dari 10.0 mph, kemudian mengurutkan hasilnya berdasarkan Ozone secara descending.

result1 <- airquality %>%
  filter(Wind > 10.0) %>%
  arrange(desc(Ozone))
result1
## # A tibble: 72 × 6
##    Ozone Solar.R  Wind  Temp Month   Day
##    <int>   <int> <dbl> <int> <int> <int>
##  1    89     229  10.3    90     8     8
##  2    71     291  13.8    90     6     9
##  3    63     220  11.5    85     7    20
##  4    52      82  12      86     7    27
##  5    45     252  14.9    81     5    29
##  6    44     192  11.5    86     8    12
##  7    44     190  10.3    78     8    20
##  8    44     236  14.9    81     9    11
##  9    40     314  10.9    83     7     6
## 10    39     323  11.5    87     6    10
## # ℹ 62 more rows

Membuat kolom baru yang merupakan hasil pengurangan Ozone dengan Wind yang disebut sebagai quality, lalu memilih hanya beberapa kolom tertentu untuk ditampilkan

result2 <- airquality %>%
  mutate(Quality = Ozone - Wind) %>%
  select(Ozone, Temp, Month)

head(result2)
## # A tibble: 6 × 3
##   Ozone  Temp Month
##   <int> <int> <int>
## 1    41    67     5
## 2    36    72     5
## 3    12    74     5
## 4    18    62     5
## 5    NA    56     5
## 6    28    66     5