library(tidyverse)
library(ggplot2)
library(dplyr)
data(diamonds)
diamonds <- tibble::as.tibble(diamonds)
class(diamonds)
## [1] "tbl_df" "tbl" "data.frame"
view(diamonds)
head(diamonds)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
glimpse(diamonds)
## Rows: 53,940
## Columns: 10
## $ carat <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.…
## $ cut <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver…
## $ color <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,…
## $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, …
## $ depth <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64…
## $ table <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58…
## $ price <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34…
## $ x <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.…
## $ y <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.…
## $ z <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.…
summary(diamonds)
## carat cut color clarity depth
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065 Min. :43.00
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258 1st Qu.:61.00
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194 Median :61.80
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171 Mean :61.75
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066 3rd Qu.:62.50
## Max. :5.0100 I: 5422 VVS1 : 3655 Max. :79.00
## J: 2808 (Other): 2531
## table price x y
## Min. :43.00 Min. : 326 Min. : 0.000 Min. : 0.000
## 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710 1st Qu.: 4.720
## Median :57.00 Median : 2401 Median : 5.700 Median : 5.710
## Mean :57.46 Mean : 3933 Mean : 5.731 Mean : 5.735
## 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540 3rd Qu.: 6.540
## Max. :95.00 Max. :18823 Max. :10.740 Max. :58.900
##
## z
## Min. : 0.000
## 1st Qu.: 2.910
## Median : 3.530
## Mean : 3.539
## 3rd Qu.: 4.040
## Max. :31.800
##
#Fungsi summarize
##Menghitung harga rata-rata berlian berdasarkan jenis potongan
average_price <- diamonds %>% group_by(cut) %>% summarize (average_price=mean(price))
average_price
## # A tibble: 5 × 2
## cut average_price
## <ord> <dbl>
## 1 Fair 4359.
## 2 Good 3929.
## 3 Very Good 3982.
## 4 Premium 4584.
## 5 Ideal 3458.
#Fungsi arrange
##Menampilkan data berlian dari yang paling mahal
diamonds %>% arrange (desc(price))
## # A tibble: 53,940 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
## 2 2 Very Good G SI1 63.5 56 18818 7.9 7.97 5.04
## 3 1.51 Ideal G IF 61.7 55 18806 7.37 7.41 4.56
## 4 2.07 Ideal G SI2 62.5 55 18804 8.2 8.13 5.11
## 5 2 Very Good H SI1 62.8 57 18803 7.95 8 5.01
## 6 2.29 Premium I SI1 61.8 59 18797 8.52 8.45 5.24
## 7 2.04 Premium H SI1 58.1 60 18795 8.37 8.28 4.84
## 8 2 Premium I VS1 60.8 59 18795 8.13 8.02 4.91
## 9 1.71 Premium F VS2 62.3 59 18791 7.57 7.53 4.7
## 10 2.15 Ideal G SI2 62.6 54 18791 8.29 8.35 5.21
## # ℹ 53,930 more rows
#Fungsi filter
##Menampilkan data berlian yang memiliki harga lebih dari 15.000 dolar
diamonds %>% filter(price > 15000)
## # A tibble: 1,655 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 1.54 Premium E VS2 62.3 58 15002 7.31 7.39 4.58
## 2 1.19 Ideal F VVS1 61.5 55 15005 6.82 6.84 4.2
## 3 2.1 Premium I SI1 61.5 57 15007 8.25 8.21 5.06
## 4 1.69 Ideal D SI1 60.8 57 15011 7.69 7.71 4.68
## 5 1.5 Very Good G VVS2 62.9 56 15013 7.22 7.32 4.57
## 6 1.73 Very Good G VS1 62.8 57 15014 7.57 7.72 4.8
## 7 2.02 Premium G SI2 63 59 15014 8.05 7.95 5.03
## 8 2.05 Very Good F SI2 61.9 56 15017 8.13 8.18 5.05
## 9 1.5 Very Good F VS1 61.6 58 15022 7.35 7.43 4.55
## 10 1.82 Very Good G SI1 62.7 58 15025 7.68 7.75 4.84
## # ℹ 1,645 more rows
#Fungsi select
##Menampilkan tabel yang berisi carat, cut, dan price
diamonds %>% select(carat, cut, price)
## # A tibble: 53,940 × 3
## carat cut price
## <dbl> <ord> <int>
## 1 0.23 Ideal 326
## 2 0.21 Premium 326
## 3 0.23 Good 327
## 4 0.29 Premium 334
## 5 0.31 Good 335
## 6 0.24 Very Good 336
## 7 0.24 Very Good 336
## 8 0.26 Very Good 337
## 9 0.22 Fair 337
## 10 0.23 Very Good 338
## # ℹ 53,930 more rows
#Fungsi mutate
##Menambahkan kolom baru 'price_per_carat' yang merupakan rasio harga dan berat karat
diamonds %>% mutate(price_per_carat = price/carat)
## # A tibble: 53,940 × 11
## carat cut color clarity depth table price x y z price_per_carat
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417.
## 2 0.21 Prem… E SI1 59.8 61 326 3.89 3.84 2.31 1552.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422.
## 4 0.29 Prem… I VS2 62.4 58 334 4.2 4.23 2.63 1152.
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 1081.
## 6 0.24 Very… J VVS2 62.8 57 336 3.94 3.96 2.48 1400
## 7 0.24 Very… I VVS1 62.3 57 336 3.95 3.98 2.47 1400
## 8 0.26 Very… H SI1 61.9 55 337 4.07 4.11 2.53 1296.
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 1532.
## 10 0.23 Very… H VS1 59.4 61 338 4 4.05 2.39 1470.
## # ℹ 53,930 more rows
#Fungsi ketika digunakan bersama-sama
##Mengelompokan data berdasarkan jenis potongan ('cut'), merangkum statistik seperti rata-rata harga ('average_price'), nilai maksimum karat ('max_carat'), dan nilai minimum clarity ('min_clarity') untuk setiap jenis potongan, mengurutkan hasil berdasarkan rata-rata harga dari yang tertinggi ke terendah, melakukan filter untuk menampilkan hanya data dengan nilai maksimum karat lebih dari 2, memilih kolom cut, average price, max carat dan minclarity, dan menambahkan kolom baru 'price_per_carat' yang merupakan rasio antara rata-rata harga dan nilai maksimum karat.
diamonds %>% group_by(cut) %>% summarize(average_price = mean(price), max_carat = max(carat), min_clarity = min(clarity)) %>% arrange(desc(average_price)) %>% filter(max_carat > 2) %>% select(cut, average_price, max_carat, min_clarity) %>% mutate(price_per_carat = average_price/ max_carat)
## # A tibble: 5 × 5
## cut average_price max_carat min_clarity price_per_carat
## <ord> <dbl> <dbl> <ord> <dbl>
## 1 Premium 4584. 4.01 I1 1143.
## 2 Fair 4359. 5.01 I1 870.
## 3 Very Good 3982. 4 I1 995.
## 4 Good 3929. 3.01 I1 1305.
## 5 Ideal 3458. 3.5 I1 988.