library(tidyverse)
library(ggplot2)
library(dplyr)
data(diamonds)
diamonds <- tibble::as.tibble(diamonds)
class(diamonds)
## [1] "tbl_df"     "tbl"        "data.frame"
view(diamonds)
head(diamonds)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
glimpse(diamonds)
## Rows: 53,940
## Columns: 10
## $ carat   <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.…
## $ cut     <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver…
## $ color   <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,…
## $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, …
## $ depth   <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64…
## $ table   <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58…
## $ price   <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34…
## $ x       <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.…
## $ y       <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.…
## $ z       <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.…
summary(diamonds)
##      carat               cut        color        clarity          depth      
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065   Min.   :43.00  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258   1st Qu.:61.00  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194   Median :61.80  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171   Mean   :61.75  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066   3rd Qu.:62.50  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655   Max.   :79.00  
##                                     J: 2808   (Other): 2531                  
##      table           price             x                y         
##  Min.   :43.00   Min.   :  326   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710   1st Qu.: 4.720  
##  Median :57.00   Median : 2401   Median : 5.700   Median : 5.710  
##  Mean   :57.46   Mean   : 3933   Mean   : 5.731   Mean   : 5.735  
##  3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540   3rd Qu.: 6.540  
##  Max.   :95.00   Max.   :18823   Max.   :10.740   Max.   :58.900  
##                                                                   
##        z         
##  Min.   : 0.000  
##  1st Qu.: 2.910  
##  Median : 3.530  
##  Mean   : 3.539  
##  3rd Qu.: 4.040  
##  Max.   :31.800  
## 
#Fungsi summarize
##Menghitung harga rata-rata berlian berdasarkan jenis potongan
average_price <- diamonds %>% group_by(cut) %>% summarize (average_price=mean(price))
average_price
## # A tibble: 5 × 2
##   cut       average_price
##   <ord>             <dbl>
## 1 Fair              4359.
## 2 Good              3929.
## 3 Very Good         3982.
## 4 Premium           4584.
## 5 Ideal             3458.
#Fungsi arrange
##Menampilkan data berlian dari yang paling mahal
diamonds %>% arrange (desc(price))
## # A tibble: 53,940 × 10
##    carat cut       color clarity depth table price     x     y     z
##    <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  2.29 Premium   I     VS2      60.8    60 18823  8.5   8.47  5.16
##  2  2    Very Good G     SI1      63.5    56 18818  7.9   7.97  5.04
##  3  1.51 Ideal     G     IF       61.7    55 18806  7.37  7.41  4.56
##  4  2.07 Ideal     G     SI2      62.5    55 18804  8.2   8.13  5.11
##  5  2    Very Good H     SI1      62.8    57 18803  7.95  8     5.01
##  6  2.29 Premium   I     SI1      61.8    59 18797  8.52  8.45  5.24
##  7  2.04 Premium   H     SI1      58.1    60 18795  8.37  8.28  4.84
##  8  2    Premium   I     VS1      60.8    59 18795  8.13  8.02  4.91
##  9  1.71 Premium   F     VS2      62.3    59 18791  7.57  7.53  4.7 
## 10  2.15 Ideal     G     SI2      62.6    54 18791  8.29  8.35  5.21
## # ℹ 53,930 more rows
#Fungsi filter
##Menampilkan data berlian yang memiliki harga lebih dari 15.000 dolar
diamonds %>% filter(price > 15000)
## # A tibble: 1,655 × 10
##    carat cut       color clarity depth table price     x     y     z
##    <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
##  1  1.54 Premium   E     VS2      62.3    58 15002  7.31  7.39  4.58
##  2  1.19 Ideal     F     VVS1     61.5    55 15005  6.82  6.84  4.2 
##  3  2.1  Premium   I     SI1      61.5    57 15007  8.25  8.21  5.06
##  4  1.69 Ideal     D     SI1      60.8    57 15011  7.69  7.71  4.68
##  5  1.5  Very Good G     VVS2     62.9    56 15013  7.22  7.32  4.57
##  6  1.73 Very Good G     VS1      62.8    57 15014  7.57  7.72  4.8 
##  7  2.02 Premium   G     SI2      63      59 15014  8.05  7.95  5.03
##  8  2.05 Very Good F     SI2      61.9    56 15017  8.13  8.18  5.05
##  9  1.5  Very Good F     VS1      61.6    58 15022  7.35  7.43  4.55
## 10  1.82 Very Good G     SI1      62.7    58 15025  7.68  7.75  4.84
## # ℹ 1,645 more rows
#Fungsi select
##Menampilkan tabel yang berisi carat, cut, dan price
diamonds %>% select(carat, cut, price)
## # A tibble: 53,940 × 3
##    carat cut       price
##    <dbl> <ord>     <int>
##  1  0.23 Ideal       326
##  2  0.21 Premium     326
##  3  0.23 Good        327
##  4  0.29 Premium     334
##  5  0.31 Good        335
##  6  0.24 Very Good   336
##  7  0.24 Very Good   336
##  8  0.26 Very Good   337
##  9  0.22 Fair        337
## 10  0.23 Very Good   338
## # ℹ 53,930 more rows
#Fungsi mutate
##Menambahkan kolom baru 'price_per_carat' yang merupakan rasio harga dan berat karat
diamonds %>% mutate(price_per_carat = price/carat)
## # A tibble: 53,940 × 11
##    carat cut   color clarity depth table price     x     y     z price_per_carat
##    <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>           <dbl>
##  1  0.23 Ideal E     SI2      61.5    55   326  3.95  3.98  2.43           1417.
##  2  0.21 Prem… E     SI1      59.8    61   326  3.89  3.84  2.31           1552.
##  3  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31           1422.
##  4  0.29 Prem… I     VS2      62.4    58   334  4.2   4.23  2.63           1152.
##  5  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75           1081.
##  6  0.24 Very… J     VVS2     62.8    57   336  3.94  3.96  2.48           1400 
##  7  0.24 Very… I     VVS1     62.3    57   336  3.95  3.98  2.47           1400 
##  8  0.26 Very… H     SI1      61.9    55   337  4.07  4.11  2.53           1296.
##  9  0.22 Fair  E     VS2      65.1    61   337  3.87  3.78  2.49           1532.
## 10  0.23 Very… H     VS1      59.4    61   338  4     4.05  2.39           1470.
## # ℹ 53,930 more rows
#Fungsi ketika digunakan bersama-sama
##Mengelompokan data berdasarkan jenis potongan ('cut'), merangkum statistik seperti rata-rata harga ('average_price'), nilai maksimum karat ('max_carat'), dan nilai minimum clarity ('min_clarity') untuk setiap jenis potongan, mengurutkan hasil berdasarkan rata-rata harga dari yang tertinggi ke terendah, melakukan filter untuk menampilkan hanya data dengan nilai maksimum karat lebih dari 2, memilih kolom cut, average price, max carat dan minclarity, dan menambahkan kolom baru 'price_per_carat' yang merupakan rasio antara rata-rata harga dan nilai maksimum karat.
diamonds %>% group_by(cut) %>% summarize(average_price = mean(price), max_carat = max(carat), min_clarity = min(clarity)) %>% arrange(desc(average_price)) %>% filter(max_carat > 2) %>% select(cut, average_price, max_carat, min_clarity) %>% mutate(price_per_carat = average_price/ max_carat) 
## # A tibble: 5 × 5
##   cut       average_price max_carat min_clarity price_per_carat
##   <ord>             <dbl>     <dbl> <ord>                 <dbl>
## 1 Premium           4584.      4.01 I1                    1143.
## 2 Fair              4359.      5.01 I1                     870.
## 3 Very Good         3982.      4    I1                     995.
## 4 Good              3929.      3.01 I1                    1305.
## 5 Ideal             3458.      3.5  I1                     988.