library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(datasets)
data(airquality)
airquality <- tibble::as.tibble(airquality)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(dplyr)
class(airquality)
## [1] "tbl_df" "tbl" "data.frame"
view(airquality)
head(airquality)
## # A tibble: 6 × 6
## Ozone Solar.R Wind Temp Month Day
## <int> <int> <dbl> <int> <int> <int>
## 1 41 190 7.4 67 5 1
## 2 36 118 8 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
Data Filtering
Mengambil kualitas udara dengan temperatur lebih dari 60.0
fahrenheit
filtered_airquality <- filter(airquality, Temp > 60.0)
head(filtered_airquality)
## # A tibble: 6 × 6
## Ozone Solar.R Wind Temp Month Day
## <int> <int> <dbl> <int> <int> <int>
## 1 41 190 7.4 67 5 1
## 2 36 118 8 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 28 NA 14.9 66 5 6
## 6 23 299 8.6 65 5 7
Mengambil data dari Ozone, Solar.r, Wind, Temp, Month, Day
airquality %>% filter(Month==5)
## # A tibble: 31 × 6
## Ozone Solar.R Wind Temp Month Day
## <int> <int> <dbl> <int> <int> <int>
## 1 41 190 7.4 67 5 1
## 2 36 118 8 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
## # ℹ 21 more rows
airquality %>% select(Ozone, Solar.R, Wind, Temp, Month, Day)
## # A tibble: 153 × 6
## Ozone Solar.R Wind Temp Month Day
## <int> <int> <dbl> <int> <int> <int>
## 1 41 190 7.4 67 5 1
## 2 36 118 8 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## 9 8 19 20.1 61 5 9
## 10 NA 194 8.6 69 5 10
## # ℹ 143 more rows
airquality %>% select(-Solar.R, -Day)
## # A tibble: 153 × 4
## Ozone Wind Temp Month
## <int> <dbl> <int> <int>
## 1 41 7.4 67 5
## 2 36 8 72 5
## 3 12 12.6 74 5
## 4 18 11.5 62 5
## 5 NA 14.3 56 5
## 6 28 14.9 66 5
## 7 23 8.6 65 5
## 8 19 13.8 59 5
## 9 8 20.1 61 5
## 10 NA 8.6 69 5
## # ℹ 143 more rows
Data Arranging
Mengurutkan berdasarkan peubah Wind dari nilai terkecil
airquality %>% arrange(Wind)
## # A tibble: 153 × 6
## Ozone Solar.R Wind Temp Month Day
## <int> <int> <dbl> <int> <int> <int>
## 1 NA 59 1.7 76 6 22
## 2 118 225 2.3 94 8 29
## 3 73 183 2.8 93 9 3
## 4 168 238 3.4 81 8 25
## 5 122 255 4 89 8 7
## 6 135 269 4.1 84 7 1
## 7 NA 91 4.6 76 6 23
## 8 64 175 4.6 83 7 5
## 9 66 NA 4.6 87 8 6
## 10 91 189 4.6 93 9 4
## # ℹ 143 more rows
Mengurutkan berdasaran peubah Wind dari nilai terbesar
airquality %>% arrange(desc(Wind))
## # A tibble: 153 × 6
## Ozone Solar.R Wind Temp Month Day
## <int> <int> <dbl> <int> <int> <int>
## 1 37 284 20.7 72 6 17
## 2 8 19 20.1 61 5 9
## 3 6 78 18.4 57 5 18
## 4 11 320 16.6 73 5 22
## 5 NA 66 16.6 57 5 25
## 6 14 20 16.6 63 9 25
## 7 NA 242 16.1 67 6 3
## 8 21 259 15.5 77 8 21
## 9 32 92 15.5 84 9 6
## 10 21 259 15.5 76 9 12
## # ℹ 143 more rows
Data Merging
Menambahkan contoh data eksternal
additional_data <- data.frame(Month = c(5, 6, 7, 8, 9), Info = c("Type A", "Type B", "Type C", "Type D", "Type E"))
merged_airquality <- left_join(airquality, additional_data, by = "Month")
head(merged_airquality)
## # A tibble: 6 × 7
## Ozone Solar.R Wind Temp Month Day Info
## <int> <int> <dbl> <int> <dbl> <int> <chr>
## 1 41 190 7.4 67 5 1 Type A
## 2 36 118 8 72 5 2 Type A
## 3 12 149 12.6 74 5 3 Type A
## 4 18 313 11.5 62 5 4 Type A
## 5 NA NA 14.3 56 5 5 Type A
## 6 28 NA 14.9 66 5 6 Type A
Data Aggregating
Menghitung rata-rata Temp tiap bulan
airquality %>% group_by(Month) %>% summarize (mean=mean(Temp))
## # A tibble: 5 × 2
## Month mean
## <int> <dbl>
## 1 5 65.5
## 2 6 79.1
## 3 7 83.9
## 4 8 84.0
## 5 9 76.9
Pengguaan 2 Fungsi Bersamaan
Menyaring data airquality untuk mendapatkan nilai wind lebih besar
dari 10.0 mph, kemudian mengurutkan hasilnya berdasarkan Ozone secara
descending.
result1 <- airquality %>%
filter(Wind > 10.0) %>%
arrange(desc(Ozone))
result1
## # A tibble: 72 × 6
## Ozone Solar.R Wind Temp Month Day
## <int> <int> <dbl> <int> <int> <int>
## 1 89 229 10.3 90 8 8
## 2 71 291 13.8 90 6 9
## 3 63 220 11.5 85 7 20
## 4 52 82 12 86 7 27
## 5 45 252 14.9 81 5 29
## 6 44 192 11.5 86 8 12
## 7 44 190 10.3 78 8 20
## 8 44 236 14.9 81 9 11
## 9 40 314 10.9 83 7 6
## 10 39 323 11.5 87 6 10
## # ℹ 62 more rows
Membuat kolom baru yang merupakan hasil pengurangan Ozone dengan
Wind yang disebut sebagai quality, lalu memilih hanya beberapa kolom
tertentu untuk ditampilkan
result2 <- airquality %>%
mutate(Quality = Ozone - Wind) %>%
select(Ozone, Temp, Month)
head(result2)
## # A tibble: 6 × 3
## Ozone Temp Month
## <int> <int> <int>
## 1 41 67 5
## 2 36 72 5
## 3 12 74 5
## 4 18 62 5
## 5 NA 56 5
## 6 28 66 5