Belajar Package dplyr

Edisi #TohNganggur

Karena masih nganggur dan rebahan juga sudah mulai bosan, jadi saya mutusin buat nyoba-nyoba belajar R. Walaupun hasilnya biasa aja, maklum masih coba-coba. Pada bagian ini saya mencoba belajar paket dplyr. Mohon maaf kalau kurang komunikatif, karena niatnya hanya coba-coba. Pada bagian selanjutnya saya akan upload tentang paket ggplot2.

Walaupun biasa siapa tau mau dicoba sama rekan-rekan.

Sumber : Sebagian dari Programming Foundation for Data Science di SKILL ACADEMY dan sisanya hasil coba-coba

Dataset : Super Store Simple

Ini bukan endorse, karena waktu itu ada gratisan SKILL ACADEMY jadi lah nyoba-nyoba ngambil, TohGratis.

ket : karena datanya banyak, jadi waktu mau di knit lama (laptop na kentang). alhasil pada part 1 disini menggunakan perintah head jadi yang ditampilin hanya sedikit.

Load Package

library(dplyr)

Baca Data

stores<-read.csv("superstore.csv")

Part 1

pendahuluan

names(stores)
## [1] "order_id"     "order_date"   "customer_id"  "segment"      "category"    
## [6] "sub_category" "sales"        "quantity"     "profit"
t(t(names(stores)))
##       [,1]          
##  [1,] "order_id"    
##  [2,] "order_date"  
##  [3,] "customer_id" 
##  [4,] "segment"     
##  [5,] "category"    
##  [6,] "sub_category"
##  [7,] "sales"       
##  [8,] "quantity"    
##  [9,] "profit"
class(stores$profit)
## [1] "numeric"
levels(stores[,5])
## NULL
dim(stores)
## [1] 9988    9
sapply(stores, class)
##     order_id   order_date  customer_id      segment     category sub_category 
##  "character"  "character"  "character"  "character"  "character"  "character" 
##        sales     quantity       profit 
##    "numeric"    "integer"    "numeric"
summary(stores)
##    order_id          order_date        customer_id          segment         
##  Length:9988        Length:9988        Length:9988        Length:9988       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    category         sub_category           sales             quantity     
##  Length:9988        Length:9988        Min.   :   0.444   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:  17.248   1st Qu.: 2.000  
##  Mode  :character   Mode  :character   Median :  54.376   Median : 3.000  
##                                        Mean   : 221.953   Mean   : 3.789  
##                                        3rd Qu.: 209.814   3rd Qu.: 5.000  
##                                        Max.   :9892.740   Max.   :14.000  
##      profit         
##  Min.   :-3839.990  
##  1st Qu.:    1.730  
##  Median :    8.653  
##  Mean   :   27.105  
##  3rd Qu.:   29.345  
##  Max.   : 4946.370
aggregate(profit~segment, stores, mean)
##       segment   profit
## 1    Consumer 24.85721
## 2   Corporate 27.68438
## 3 Home Office 32.67253
aggregate(cbind(profit,sales)~segment, stores, mean)
##       segment   profit    sales
## 1    Consumer 24.85721 218.2732
## 2   Corporate 27.68438 228.1041
## 3 Home Office 32.67253 222.2430
xtabs(~segment+category, stores )
##              category
## segment       Furniture Office Supplies Technology
##   Consumer         1113            3127        948
##   Corporate         646            1820        553
##   Home Office       362            1079        340

Melihat Banyak Data

  1. Banyaknya data
tally(stores)
##      n
## 1 9988
  1. Banyaknya data per category
count(stores,category)
##          category    n
## 1       Furniture 2121
## 2 Office Supplies 6026
## 3      Technology 1841
  1. Banyaknya data per category dengan diganti nama variabel dan diurutkan
count(stores,category, name = "Banyaknya data", sort = T)
##          category Banyaknya data
## 1 Office Supplies           6026
## 2       Furniture           2121
## 3      Technology           1841

Sample

  1. Diambil data 10 teratas
head(stores,10)
##          order_id order_date customer_id   segment        category sub_category
## 1  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3  CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4  US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5  US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6  CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
## 7  CA-2014-115812 2014-06-09    BH-11710  Consumer Office Supplies          Art
## 8  CA-2014-115812 2014-06-09    BH-11710  Consumer      Technology       Phones
## 9  CA-2014-115812 2014-06-09    BH-11710  Consumer Office Supplies      Binders
## 10 CA-2014-115812 2014-06-09    BH-11710  Consumer Office Supplies   Appliances
##       sales quantity    profit
## 1  261.9600        2   41.9136
## 2  731.9400        3  219.5820
## 3   14.6200        2    6.8714
## 4  957.5775        5 -383.0310
## 5   22.3680        2    2.5164
## 6   48.8600        7   14.1694
## 7    7.2800        4    1.9656
## 8  907.1520        6   90.7152
## 9   18.5040        3    5.7825
## 10 114.9000        5   34.4700
  1. Diambil data 10 terbawah
tail(stores,10)
##            order_id order_date customer_id   segment        category
## 9979 CA-2015-100251 2015-05-17    DV-13465  Consumer Office Supplies
## 9980 CA-2015-100251 2015-05-17    DV-13465  Consumer Office Supplies
## 9981 CA-2016-125794 2016-09-29    ML-17410  Consumer      Technology
## 9982 CA-2017-163629 2017-11-17    RA-19885 Corporate      Technology
## 9983 CA-2017-163629 2017-11-17    RA-19885 Corporate      Technology
## 9984 CA-2014-110422 2014-01-21    TB-21400  Consumer       Furniture
## 9985 CA-2017-121258 2017-02-26    DB-13060  Consumer       Furniture
## 9986 CA-2017-121258 2017-02-26    DB-13060  Consumer      Technology
## 9987 CA-2017-121258 2017-02-26    DB-13060  Consumer Office Supplies
## 9988 CA-2017-119914 2017-05-04    CC-12220  Consumer Office Supplies
##      sub_category   sales quantity  profit
## 9979       Labels  31.500       10 15.1200
## 9980     Supplies  55.600        4 16.1240
## 9981  Accessories  36.240        1 15.2208
## 9982  Accessories  79.990        1 28.7964
## 9983       Phones 206.100        5 55.6470
## 9984  Furnishings  25.248        3  4.1028
## 9985  Furnishings  91.960        2 15.6332
## 9986       Phones 258.576        2 19.3932
## 9987        Paper  29.600        4 13.3200
## 9988   Appliances 243.160        2 72.9480
  1. Diambil 10 data secara acak
sample_n(stores, 10)
##          order_id order_date customer_id   segment        category sub_category
## 1  CA-2014-124429 2014-05-27    MH-17785 Corporate       Furniture       Tables
## 2  CA-2015-129042 2015-12-12    EM-13960  Consumer Office Supplies          Art
## 3  CA-2014-114510 2014-03-14    JF-15295  Consumer      Technology  Accessories
## 4  CA-2016-133368 2016-01-14    AG-10675  Consumer       Furniture  Furnishings
## 5  CA-2016-140501 2016-06-23    IM-15070  Consumer Office Supplies      Storage
## 6  CA-2017-152079 2017-01-20    ML-17410  Consumer Office Supplies       Labels
## 7  CA-2017-147354 2017-03-09    KB-16315  Consumer Office Supplies      Binders
## 8  US-2017-106705 2017-12-26    PO-18850  Consumer Office Supplies        Paper
## 9  CA-2016-134376 2016-10-02    TT-21265 Corporate Office Supplies   Appliances
## 10 CA-2016-145898 2016-09-26    CM-12445  Consumer Office Supplies        Paper
##      sales quantity   profit
## 1  567.120       10 -28.3560
## 2    8.220        3   2.2194
## 3   82.800       12   6.6240
## 4  315.776        8  31.5776
## 5  324.900        5  38.9880
## 6   11.520        5   4.1760
## 7   30.840        5   9.6375
## 8   44.750        5  20.5850
## 9   61.440        3  16.5888
## 10  29.900        5  13.4550
  1. Diambil sebanyak 0.1 % data secara acak
sample_frac(stores, 0.001)
##          order_id order_date customer_id     segment        category
## 1  CA-2015-130022 2015-08-10    JK-16120 Home Office Office Supplies
## 2  CA-2015-148180 2015-07-26    BP-11095   Corporate Office Supplies
## 3  CA-2017-107909 2017-09-01    SS-20875    Consumer Office Supplies
## 4  CA-2017-148474 2017-06-12    ME-17320 Home Office Office Supplies
## 5  CA-2017-108574 2017-10-07    MG-18145    Consumer      Technology
## 6  CA-2014-126361 2014-08-04    VD-21670    Consumer Office Supplies
## 7  US-2016-111563 2016-11-04    SM-20005    Consumer       Furniture
## 8  CA-2014-130274 2014-05-03    JS-15940 Home Office Office Supplies
## 9  CA-2016-112669 2016-04-14    KT-16465    Consumer       Furniture
## 10 CA-2014-146591 2014-01-19    TS-21340    Consumer Office Supplies
##    sub_category    sales quantity   profit
## 1        Labels    3.750        1   1.8000
## 2       Binders   23.136        6   8.3868
## 3       Binders   22.920        5   8.0220
## 4       Binders   91.200        3  41.9520
## 5   Accessories 1115.910        9 200.8638
## 6    Appliances 1089.750        3 305.1300
## 7   Furnishings   11.376        3  -5.6880
## 8        Labels   21.560        7  10.3488
## 9        Chairs  933.536        4 105.0228
## 10        Paper   56.064        4  19.6224

Select and rename (memilih kolom dan mengganti nama variabel)

  1. Hanya memilih variabel order_id, order_date, dan sales
head(select(stores,c(order_id, order_date, sales)))
##         order_id order_date    sales
## 1 CA-2016-152156 2016-11-08 261.9600
## 2 CA-2016-152156 2016-11-08 731.9400
## 3 CA-2016-138688 2016-06-12  14.6200
## 4 US-2015-108966 2015-10-11 957.5775
## 5 US-2015-108966 2015-10-11  22.3680
## 6 CA-2014-115812 2014-06-09  48.8600
  1. Hanya memilih variabel dengan nama variabel diawali huruf “order”
head(select(stores,starts_with("order")))
##         order_id order_date
## 1 CA-2016-152156 2016-11-08
## 2 CA-2016-152156 2016-11-08
## 3 CA-2016-138688 2016-06-12
## 4 US-2015-108966 2015-10-11
## 5 US-2015-108966 2015-10-11
## 6 CA-2014-115812 2014-06-09
  1. Hanya memilih variabel dengan nama variabel diakhiri huruf “id”
head(select(stores,ends_with("id")))
##         order_id customer_id
## 1 CA-2016-152156    CG-12520
## 2 CA-2016-152156    CG-12520
## 3 CA-2016-138688    DV-13045
## 4 US-2015-108966    SO-20335
## 5 US-2015-108966    SO-20335
## 6 CA-2014-115812    BH-11710
  1. Hanya memilih variabel dengan nama variabel huruf “er”
head(select(stores,contains("er")))
##         order_id order_date customer_id
## 1 CA-2016-152156 2016-11-08    CG-12520
## 2 CA-2016-152156 2016-11-08    CG-12520
## 3 CA-2016-138688 2016-06-12    DV-13045
## 4 US-2015-108966 2015-10-11    SO-20335
## 5 US-2015-108966 2015-10-11    SO-20335
## 6 CA-2014-115812 2014-06-09    BH-11710
  1. Memindahkan variabel order_date menjadi didepan
head(select(stores,order_date,everything()))
##   order_date       order_id customer_id   segment        category sub_category
## 1 2016-11-08 CA-2016-152156    CG-12520  Consumer       Furniture    Bookcases
## 2 2016-11-08 CA-2016-152156    CG-12520  Consumer       Furniture       Chairs
## 3 2016-06-12 CA-2016-138688    DV-13045 Corporate Office Supplies       Labels
## 4 2015-10-11 US-2015-108966    SO-20335  Consumer       Furniture       Tables
## 5 2015-10-11 US-2015-108966    SO-20335  Consumer Office Supplies      Storage
## 6 2014-06-09 CA-2014-115812    BH-11710  Consumer       Furniture  Furnishings
##      sales quantity    profit
## 1 261.9600        2   41.9136
## 2 731.9400        3  219.5820
## 3  14.6200        2    6.8714
## 4 957.5775        5 -383.0310
## 5  22.3680        2    2.5164
## 6  48.8600        7   14.1694
  1. Memindahkan variabel order_date menjadi dibelakang
head(select(stores, -order_date, order_date))
##         order_id customer_id   segment        category sub_category    sales
## 1 CA-2016-152156    CG-12520  Consumer       Furniture    Bookcases 261.9600
## 2 CA-2016-152156    CG-12520  Consumer       Furniture       Chairs 731.9400
## 3 CA-2016-138688    DV-13045 Corporate Office Supplies       Labels  14.6200
## 4 US-2015-108966    SO-20335  Consumer       Furniture       Tables 957.5775
## 5 US-2015-108966    SO-20335  Consumer Office Supplies      Storage  22.3680
## 6 CA-2014-115812    BH-11710  Consumer       Furniture  Furnishings  48.8600
##   quantity    profit order_date
## 1        2   41.9136 2016-11-08
## 2        3  219.5820 2016-11-08
## 3        2    6.8714 2016-06-12
## 4        5 -383.0310 2015-10-11
## 5        2    2.5164 2015-10-11
## 6        7   14.1694 2014-06-09
  1. Hanya memilih variabel dengan tipe numeric
head(select_if(stores, is.numeric ))
##      sales quantity    profit
## 1 261.9600        2   41.9136
## 2 731.9400        3  219.5820
## 3  14.6200        2    6.8714
## 4 957.5775        5 -383.0310
## 5  22.3680        2    2.5164
## 6  48.8600        7   14.1694
  1. Hanya memilih variabel dengan nama variabel tanpa huruf “e”, kecuali diawali huruf “s”
head(select_at(stores, vars(-contains("e"),starts_with("s"))))
##   quantity    profit   segment sub_category    sales
## 1        2   41.9136  Consumer    Bookcases 261.9600
## 2        3  219.5820  Consumer       Chairs 731.9400
## 3        2    6.8714 Corporate       Labels  14.6200
## 4        5 -383.0310  Consumer       Tables 957.5775
## 5        2    2.5164  Consumer      Storage  22.3680
## 6        7   14.1694  Consumer  Furnishings  48.8600
  1. rename
head(rename(stores, penjualan = sales))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##   penjualan quantity    profit
## 1  261.9600        2   41.9136
## 2  731.9400        3  219.5820
## 3   14.6200        2    6.8714
## 4  957.5775        5 -383.0310
## 5   22.3680        2    2.5164
## 6   48.8600        7   14.1694

arrange (sort)

  1. Diurutkan berdasarkan profit
head(arrange(stores,profit))
##         order_id order_date customer_id     segment        category
## 1 US-2017-168116 2017-11-04    GT-14635   Corporate      Technology
## 2 CA-2014-169019 2014-07-26    LF-17185    Consumer Office Supplies
## 3 CA-2017-134845 2017-04-17    SR-20425 Home Office      Technology
## 4 US-2017-122714 2017-12-07    HG-14965   Corporate Office Supplies
## 5 CA-2015-147830 2015-12-15    NF-18385    Consumer      Technology
## 6 CA-2017-131254 2017-11-19    NC-18415    Consumer Office Supplies
##   sub_category    sales quantity    profit
## 1     Machines 7999.980        4 -3839.990
## 2      Binders 2177.584        8 -3701.893
## 3     Machines 2549.985        5 -3399.980
## 4      Binders 1889.990        5 -2929.485
## 5     Machines 1799.994        2 -2639.991
## 6      Binders 1525.188        6 -2287.782
  1. Diurutkan berdasarkan profi dari yang terbesar
head(arrange(stores, desc(profit)))
##         order_id order_date customer_id     segment        category
## 1 CA-2016-117121 2016-12-17    AB-10105    Consumer Office Supplies
## 2 CA-2014-116904 2014-09-23    SC-20095    Consumer Office Supplies
## 3 CA-2015-145352 2015-03-16    CM-12385    Consumer Office Supplies
## 4 CA-2016-158841 2016-02-02    SE-20110    Consumer      Technology
## 5 US-2016-140158 2016-10-04    DR-12940 Home Office      Technology
## 6 CA-2017-138289 2017-01-16    AR-10540    Consumer Office Supplies
##   sub_category   sales quantity   profit
## 1      Binders 9892.74       13 4946.370
## 2      Binders 9449.95        5 4630.475
## 3      Binders 6354.95        5 3177.475
## 4     Machines 8749.95        5 2799.984
## 5      Copiers 5399.91        9 2591.957
## 6      Binders 5443.96        4 2504.222

Filter (memilih data berdasarkan)

  1. Data dengan (Segment = Consumer) dan hanya diambil 10 data teratas
head(filter(stores, segment=="Consumer"))
##         order_id order_date customer_id  segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520 Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520 Consumer       Furniture       Chairs
## 3 US-2015-108966 2015-10-11    SO-20335 Consumer       Furniture       Tables
## 4 US-2015-108966 2015-10-11    SO-20335 Consumer Office Supplies      Storage
## 5 CA-2014-115812 2014-06-09    BH-11710 Consumer       Furniture  Furnishings
## 6 CA-2014-115812 2014-06-09    BH-11710 Consumer Office Supplies          Art
##      sales quantity    profit
## 1 261.9600        2   41.9136
## 2 731.9400        3  219.5820
## 3 957.5775        5 -383.0310
## 4  22.3680        2    2.5164
## 5  48.8600        7   14.1694
## 6   7.2800        4    1.9656
  1. Data dengan (Segment = Consumer dan Category = Furniture) dan hanya diambil 10 data terbawah
head(filter(stores, segment=="Consumer" & category=="Furniture"))
##         order_id order_date customer_id  segment  category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520 Consumer Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520 Consumer Furniture       Chairs
## 3 US-2015-108966 2015-10-11    SO-20335 Consumer Furniture       Tables
## 4 CA-2014-115812 2014-06-09    BH-11710 Consumer Furniture  Furnishings
## 5 CA-2014-115812 2014-06-09    BH-11710 Consumer Furniture       Tables
## 6 US-2017-156909 2017-07-16    SF-20065 Consumer Furniture       Chairs
##       sales quantity    profit
## 1  261.9600        2   41.9136
## 2  731.9400        3  219.5820
## 3  957.5775        5 -383.0310
## 4   48.8600        7   14.1694
## 5 1706.1840        9   85.3092
## 6   71.3720        2   -1.0196
  1. Data dengan (segment bukan consumer, category = Furniture, ) dan hanya diambil 10 data teratas
head(filter(stores, segment!="Consumer" , category=="Furniture"))
##         order_id order_date customer_id     segment  category sub_category
## 1 CA-2016-117590 2016-12-08    GH-14485   Corporate Furniture  Furnishings
## 2 CA-2015-117415 2015-12-27    SN-20710 Home Office Furniture    Bookcases
## 3 CA-2015-117415 2015-12-27    SN-20710 Home Office Furniture       Chairs
## 4 US-2015-164175 2015-04-30    PS-18970 Home Office Furniture       Chairs
## 5 US-2017-118038 2017-12-09    KB-16600   Corporate Furniture  Furnishings
## 6 CA-2017-161018 2017-11-09    PN-18775 Home Office Furniture  Furnishings
##      sales quantity    profit
## 1 190.9200        5 -147.9630
## 2 532.3992        3  -46.9764
## 3 212.0580        3  -15.1470
## 4 213.1150        5  -15.2225
## 5   9.7080        3   -5.8248
## 6  96.5300        7   40.5426
  1. Data dengan profit lebih besar dari rata-rata profit, yaitu 27.1053411
head(filter(stores, profit > mean(profit)))
##         order_id order_date customer_id  segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520 Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520 Consumer       Furniture       Chairs
## 3 CA-2014-115812 2014-06-09    BH-11710 Consumer      Technology       Phones
## 4 CA-2014-115812 2014-06-09    BH-11710 Consumer Office Supplies   Appliances
## 5 CA-2014-115812 2014-06-09    BH-11710 Consumer       Furniture       Tables
## 6 CA-2014-115812 2014-06-09    BH-11710 Consumer      Technology       Phones
##      sales quantity   profit
## 1  261.960        2  41.9136
## 2  731.940        3 219.5820
## 3  907.152        6  90.7152
## 4  114.900        5  34.4700
## 5 1706.184        9  85.3092
## 6  911.424        4  68.3568
  1. Data dengan variabel tipe numeric lebih besar dari rata-rata variabel tersebut. rata-rata sales 221.9525978, rata-rata quantity 3.789147 ,dan rata-rata profit 27.1053411
head(filter_if(stores, is.numeric, all_vars(. > mean(.))))
##         order_id order_date customer_id     segment   category sub_category
## 1 CA-2014-115812 2014-06-09    BH-11710    Consumer Technology       Phones
## 2 CA-2014-115812 2014-06-09    BH-11710    Consumer  Furniture       Tables
## 3 CA-2014-115812 2014-06-09    BH-11710    Consumer Technology       Phones
## 4 CA-2016-117590 2016-12-08    GH-14485   Corporate Technology       Phones
## 5 CA-2015-117415 2015-12-27    SN-20710 Home Office Technology       Phones
## 6 CA-2016-105816 2016-12-11    JM-15265   Corporate Technology       Phones
##      sales quantity   profit
## 1  907.152        6  90.7152
## 2 1706.184        9  85.3092
## 3  911.424        4  68.3568
## 4 1097.544        7 123.4737
## 5  371.168        4  41.7564
## 6 1029.950        5 298.6855
  1. Data variabel sales dan profit yang nilainya lebih kecil dari rata-rata variabel tersebut. rata-rata sales 221.9525978 dan rata-rata profit 27.1053411
head(filter_at(stores,vars(profit, sales), ~ .<mean(.) ))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 2 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 3 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
## 4 CA-2014-115812 2014-06-09    BH-11710  Consumer Office Supplies          Art
## 5 CA-2014-115812 2014-06-09    BH-11710  Consumer Office Supplies      Binders
## 6 CA-2017-114412 2017-04-15    AA-10480  Consumer Office Supplies        Paper
##    sales quantity  profit
## 1 14.620        2  6.8714
## 2 22.368        2  2.5164
## 3 48.860        7 14.1694
## 4  7.280        4  1.9656
## 5 18.504        3  5.7825
## 6 15.552        3  5.4432

Distinc (unik)

  1. Melihat apasaja pada sub_category
distinct(stores, sub_category)
##    sub_category
## 1     Bookcases
## 2        Chairs
## 3        Labels
## 4        Tables
## 5       Storage
## 6   Furnishings
## 7           Art
## 8        Phones
## 9       Binders
## 10   Appliances
## 11        Paper
## 12  Accessories
## 13    Envelopes
## 14    Fasteners
## 15     Supplies
## 16     Machines
## 17      Copiers
  1. melihat ada berapa category barang
n_distinct(stores$category)
## [1] 3

mutate

  1. menambah variabel persentase keuntunan dari total keuntungan
head(mutate(stores, persentase_keuntungan=(profit*100)/sum(profit)))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##      sales quantity    profit persentase_keuntungan
## 1 261.9600        2   41.9136          0.0154818036
## 2 731.9400        3  219.5820          0.0811079315
## 3  14.6200        2    6.8714          0.0025381181
## 4 957.5775        5 -383.0310         -0.1414817796
## 5  22.3680        2    2.5164          0.0009294933
## 6  48.8600        7   14.1694          0.0052338112
  1. sama seperti diatas
vars <- "profit"
head(mutate(stores, persentase = (.data[[vars[[1]]]] * 100 )/sum(.data[[vars[[1]]]])))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##      sales quantity    profit    persentase
## 1 261.9600        2   41.9136  0.0154818036
## 2 731.9400        3  219.5820  0.0811079315
## 3  14.6200        2    6.8714  0.0025381181
## 4 957.5775        5 -383.0310 -0.1414817796
## 5  22.3680        2    2.5164  0.0009294933
## 6  48.8600        7   14.1694  0.0052338112
  1. sama seperti diatas, namun mengubah isi variabel
head(mutate(stores, profit=(profit*100)/sum(profit)))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##      sales quantity        profit
## 1 261.9600        2  0.0154818036
## 2 731.9400        3  0.0811079315
## 3  14.6200        2  0.0025381181
## 4 957.5775        5 -0.1414817796
## 5  22.3680        2  0.0009294933
## 6  48.8600        7  0.0052338112
  1. sama seperti diatas, namun sales juga ikut dirubah
head(mutate_at(stores,c("profit","sales"), ~ (.*100)/sum(.) ))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##          sales quantity        profit
## 1 0.0118167002        2  0.0154818036
## 2 0.0330169320        3  0.0811079315
## 3 0.0006594906        2  0.0025381181
## 4 0.0431951679        5 -0.1414817796
## 5 0.0010089935        2  0.0009294933
## 6 0.0022040158        7  0.0052338112
  1. sama seperti diatas
per<-function(x, na.rm=T) (x*100/sum(x))
head(mutate_at(stores,c("profit","sales"), per))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##          sales quantity        profit
## 1 0.0118167002        2  0.0154818036
## 2 0.0330169320        3  0.0811079315
## 3 0.0006594906        2  0.0025381181
## 4 0.0431951679        5 -0.1414817796
## 5 0.0010089935        2  0.0009294933
## 6 0.0022040158        7  0.0052338112
  1. sama seperti diatas namun untuk semua variabel numerik
per<-function(x, na.rm=T) (x*100/sum(x))
head(mutate_if(stores,is.numeric, per))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##          sales    quantity        profit
## 1 0.0118167002 0.005284574  0.0154818036
## 2 0.0330169320 0.007926861  0.0811079315
## 3 0.0006594906 0.005284574  0.0025381181
## 4 0.0431951679 0.013211436 -0.1414817796
## 5 0.0010089935 0.005284574  0.0009294933
## 6 0.0022040158 0.018496010  0.0052338112
  1. mutate if juga bisa dilakukan untuk mengubah tipe data
head(mutate_if(stores, is.factor, as.character))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##      sales quantity    profit
## 1 261.9600        2   41.9136
## 2 731.9400        3  219.5820
## 3  14.6200        2    6.8714
## 4 957.5775        5 -383.0310
## 5  22.3680        2    2.5164
## 6  48.8600        7   14.1694
  1. Bila ingin menjadi variabel baru
per<-function(x, na.rm=T) (x*100/sum(x))
head(mutate_if(stores,is.numeric, list(persentase=per)))
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
##      sales quantity    profit sales_persentase quantity_persentase
## 1 261.9600        2   41.9136     0.0118167002         0.005284574
## 2 731.9400        3  219.5820     0.0330169320         0.007926861
## 3  14.6200        2    6.8714     0.0006594906         0.005284574
## 4 957.5775        5 -383.0310     0.0431951679         0.013211436
## 5  22.3680        2    2.5164     0.0010089935         0.005284574
## 6  48.8600        7   14.1694     0.0022040158         0.018496010
##   profit_persentase
## 1      0.0154818036
## 2      0.0811079315
## 3      0.0025381181
## 4     -0.1414817796
## 5      0.0009294933
## 6      0.0052338112

transmutate

head(transmute(stores, persentase_keuntungan=(profit*100)/sum(profit)))
##   persentase_keuntungan
## 1          0.0154818036
## 2          0.0811079315
## 3          0.0025381181
## 4         -0.1414817796
## 5          0.0009294933
## 6          0.0052338112
per<-function(x, na.rm=T) (x*100/sum(x))
head(transmute_at(stores,c("profit","sales"), per))
##          profit        sales
## 1  0.0154818036 0.0118167002
## 2  0.0811079315 0.0330169320
## 3  0.0025381181 0.0006594906
## 4 -0.1414817796 0.0431951679
## 5  0.0009294933 0.0010089935
## 6  0.0052338112 0.0022040158
per<-function(x, na.rm=T) (x*100/sum(x))
head(transmute_if(stores,is.numeric, per))
##          sales    quantity        profit
## 1 0.0118167002 0.005284574  0.0154818036
## 2 0.0330169320 0.007926861  0.0811079315
## 3 0.0006594906 0.005284574  0.0025381181
## 4 0.0431951679 0.013211436 -0.1414817796
## 5 0.0010089935 0.005284574  0.0009294933
## 6 0.0022040158 0.018496010  0.0052338112

summarize

summarise(stores,mean = mean(profit), n = n())
##       mean    n
## 1 27.10534 9988
var <- "profit"
summarise(stores, avg = mean(.data[[var]], na.rm = TRUE))
##        avg
## 1 27.10534
summarise_at(stores,c("profit", "sales"), mean, na.rm = TRUE)
##     profit    sales
## 1 27.10534 221.9526
summarise_at(stores,vars(sales,profit), mean, na.rm = TRUE)
##      sales   profit
## 1 221.9526 27.10534
summarise_if(stores,is.double, mean, na.rm = TRUE)
##      sales   profit
## 1 221.9526 27.10534

bind row

one <- head(stores,5)
two <- tail(stores,5)
bind_rows(one, two)
##          order_id order_date customer_id   segment        category sub_category
## 1  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3  CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4  US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5  US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6  CA-2014-110422 2014-01-21    TB-21400  Consumer       Furniture  Furnishings
## 7  CA-2017-121258 2017-02-26    DB-13060  Consumer       Furniture  Furnishings
## 8  CA-2017-121258 2017-02-26    DB-13060  Consumer      Technology       Phones
## 9  CA-2017-121258 2017-02-26    DB-13060  Consumer Office Supplies        Paper
## 10 CA-2017-119914 2017-05-04    CC-12220  Consumer Office Supplies   Appliances
##       sales quantity    profit
## 1  261.9600        2   41.9136
## 2  731.9400        3  219.5820
## 3   14.6200        2    6.8714
## 4  957.5775        5 -383.0310
## 5   22.3680        2    2.5164
## 6   25.2480        3    4.1028
## 7   91.9600        2   15.6332
## 8  258.5760        2   19.3932
## 9   29.6000        4   13.3200
## 10 243.1600        2   72.9480
bind_rows(list(one, two))
##          order_id order_date customer_id   segment        category sub_category
## 1  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3  CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4  US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5  US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6  CA-2014-110422 2014-01-21    TB-21400  Consumer       Furniture  Furnishings
## 7  CA-2017-121258 2017-02-26    DB-13060  Consumer       Furniture  Furnishings
## 8  CA-2017-121258 2017-02-26    DB-13060  Consumer      Technology       Phones
## 9  CA-2017-121258 2017-02-26    DB-13060  Consumer Office Supplies        Paper
## 10 CA-2017-119914 2017-05-04    CC-12220  Consumer Office Supplies   Appliances
##       sales quantity    profit
## 1  261.9600        2   41.9136
## 2  731.9400        3  219.5820
## 3   14.6200        2    6.8714
## 4  957.5775        5 -383.0310
## 5   22.3680        2    2.5164
## 6   25.2480        3    4.1028
## 7   91.9600        2   15.6332
## 8  258.5760        2   19.3932
## 9   29.6000        4   13.3200
## 10 243.1600        2   72.9480
bind_rows("group 1" = one, "group 2" = two, .id = "groups")
##     groups       order_id order_date customer_id   segment        category
## 1  group 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture
## 2  group 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture
## 3  group 1 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies
## 4  group 1 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture
## 5  group 1 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies
## 6  group 2 CA-2014-110422 2014-01-21    TB-21400  Consumer       Furniture
## 7  group 2 CA-2017-121258 2017-02-26    DB-13060  Consumer       Furniture
## 8  group 2 CA-2017-121258 2017-02-26    DB-13060  Consumer      Technology
## 9  group 2 CA-2017-121258 2017-02-26    DB-13060  Consumer Office Supplies
## 10 group 2 CA-2017-119914 2017-05-04    CC-12220  Consumer Office Supplies
##    sub_category    sales quantity    profit
## 1     Bookcases 261.9600        2   41.9136
## 2        Chairs 731.9400        3  219.5820
## 3        Labels  14.6200        2    6.8714
## 4        Tables 957.5775        5 -383.0310
## 5       Storage  22.3680        2    2.5164
## 6   Furnishings  25.2480        3    4.1028
## 7   Furnishings  91.9600        2   15.6332
## 8        Phones 258.5760        2   19.3932
## 9         Paper  29.6000        4   13.3200
## 10   Appliances 243.1600        2   72.9480

intersect, union, dan setdif

first <- stores[1:5, ]
second <- stores[3:7, ]

intersect(first, second)
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 2 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 3 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
##      sales quantity    profit
## 1  14.6200        2    6.8714
## 2 957.5775        5 -383.0310
## 3  22.3680        2    2.5164
union(first, second)
##         order_id order_date customer_id   segment        category sub_category
## 1 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2 CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3 CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4 US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5 US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6 CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
## 7 CA-2014-115812 2014-06-09    BH-11710  Consumer Office Supplies          Art
##      sales quantity    profit
## 1 261.9600        2   41.9136
## 2 731.9400        3  219.5820
## 3  14.6200        2    6.8714
## 4 957.5775        5 -383.0310
## 5  22.3680        2    2.5164
## 6  48.8600        7   14.1694
## 7   7.2800        4    1.9656
setdiff(first, second)
##         order_id order_date customer_id  segment  category sub_category  sales
## 1 CA-2016-152156 2016-11-08    CG-12520 Consumer Furniture    Bookcases 261.96
## 2 CA-2016-152156 2016-11-08    CG-12520 Consumer Furniture       Chairs 731.94
##   quantity   profit
## 1        2  41.9136
## 2        3 219.5820
setdiff(second, first)
##         order_id order_date customer_id  segment        category sub_category
## 1 CA-2014-115812 2014-06-09    BH-11710 Consumer       Furniture  Furnishings
## 2 CA-2014-115812 2014-06-09    BH-11710 Consumer Office Supplies          Art
##   sales quantity  profit
## 1 48.86        7 14.1694
## 2  7.28        4  1.9656
union_all(first, second)
##          order_id order_date customer_id   segment        category sub_category
## 1  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture    Bookcases
## 2  CA-2016-152156 2016-11-08    CG-12520  Consumer       Furniture       Chairs
## 3  CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 4  US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 5  US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 6  CA-2016-138688 2016-06-12    DV-13045 Corporate Office Supplies       Labels
## 7  US-2015-108966 2015-10-11    SO-20335  Consumer       Furniture       Tables
## 8  US-2015-108966 2015-10-11    SO-20335  Consumer Office Supplies      Storage
## 9  CA-2014-115812 2014-06-09    BH-11710  Consumer       Furniture  Furnishings
## 10 CA-2014-115812 2014-06-09    BH-11710  Consumer Office Supplies          Art
##       sales quantity    profit
## 1  261.9600        2   41.9136
## 2  731.9400        3  219.5820
## 3   14.6200        2    6.8714
## 4  957.5775        5 -383.0310
## 5   22.3680        2    2.5164
## 6   14.6200        2    6.8714
## 7  957.5775        5 -383.0310
## 8   22.3680        2    2.5164
## 9   48.8600        7   14.1694
## 10   7.2800        4    1.9656

Join

band_members
## # A tibble: 3 x 2
##   name  band   
##   <chr> <chr>  
## 1 Mick  Stones 
## 2 John  Beatles
## 3 Paul  Beatles
band_instruments
## # A tibble: 3 x 2
##   name  plays 
##   <chr> <chr> 
## 1 John  guitar
## 2 Paul  bass  
## 3 Keith guitar
inner_join( band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 2 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 John  Beatles guitar
## 2 Paul  Beatles bass
left_join( band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 Mick  Stones  <NA>  
## 2 John  Beatles guitar
## 3 Paul  Beatles bass
right_join( band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 John  Beatles guitar
## 2 Paul  Beatles bass  
## 3 Keith <NA>    guitar
full_join(band_members ,band_instruments)
## Joining, by = "name"
## # A tibble: 4 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 Mick  Stones  <NA>  
## 2 John  Beatles guitar
## 3 Paul  Beatles bass  
## 4 Keith <NA>    guitar
semi_join(band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 2 x 2
##   name  band   
##   <chr> <chr>  
## 1 John  Beatles
## 2 Paul  Beatles
anti_join(band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 1 x 2
##   name  band  
##   <chr> <chr> 
## 1 Mick  Stones
nest_join(band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
##   name  band    band_instruments
##   <chr> <chr>   <list>          
## 1 Mick  Stones  <tibble [0 x 1]>
## 2 John  Beatles <tibble [1 x 1]>
## 3 Paul  Beatles <tibble [1 x 1]>
inner_join(band_members,band_instruments, by = "name")
## # A tibble: 2 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 John  Beatles guitar
## 2 Paul  Beatles bass
full_join(band_members,band_instruments2, by = c("name" = "artist"))
## # A tibble: 4 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 Mick  Stones  <NA>  
## 2 John  Beatles guitar
## 3 Paul  Beatles bass  
## 4 Keith <NA>    guitar

Part 2

perintah pada paket dplyr bisa dilakukan secara bersamaan dengan menulis :

(dataset) %>% perintah1 %>% perintah2 …

ket : selalu menulis %>% sebelum menambah perintah

  1. mencari costumer ID yang memiliki sales paling besar
stores %>%
  select(customer_id, sales)%>%
  filter(sales==max(sales))
##   customer_id   sales
## 1    AB-10105 9892.74
## kita juga bisa menggunakan perintah lain untuk mendapatkan hasil tersebut
## misal

stores%>%
  select(customer_id, sales)%>%
  arrange(desc(sales))%>%
  head(1)
##   customer_id   sales
## 1    AB-10105 9892.74
  1. melihat sub category apa saja yang ada dalam category “office supply” dan masing- masing berapa banyak total profitnya
stores%>%
  filter(category=="Office Supplies")%>%
  group_by(sub_category)%>%
  summarise(total_profit=sum(profit))%>%
  arrange(desc(total_profit))
## # A tibble: 9 x 2
##   sub_category total_profit
##   <chr>               <dbl>
## 1 Paper              34054.
## 2 Binders            30222.
## 3 Storage            21279.
## 4 Appliances         18138.
## 5 Envelopes           6964.
## 6 Art                 6528.
## 7 Labels              5546.
## 8 Fasteners            950.
## 9 Supplies           -1189.
  1. Berapa banyak order yang menghasilkan profit negatif
stores%>%
  filter(profit<0)%>%
  tally(name="banyaknya order rugi")
##   banyaknya order rugi
## 1                 1869
  1. antara customer id JE-16165, KH-16510, AD-10180. manakah yang total salesnya paling banyak.
stores%>%
  filter(customer_id =="JE-16165"|customer_id=="KH-16510"|customer_id=="AD-10180")%>%
  group_by(customer_id)%>%
  summarise(total_sales=sum(sales))%>%
  arrange(desc(total_sales))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
##   customer_id total_sales
##   <chr>             <dbl>
## 1 AD-10180          6107.
## 2 KH-16510          5953.
## 3 JE-16165          2697.
  1. buatlah data frame bernama “yearly_sales” yang berisi total sales, jumlah customers, dan total profit tiap tahun dan tahun berapakah profit tertinggi diperoleh
stores$order_date<-as.Date(stores$order_date)
yearly_sales<- stores
yearly_sales$order_year<-as.Date(cut(yearly_sales$order_date, breaks = "year"))
yearly_sales%>%
  group_by(order_year)%>%
  summarise(total_sales= sum(sales),jumlah_customer= n(),total_profit= sum(profit))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 4
##   order_year total_sales jumlah_customer total_profit
##   <date>           <dbl>           <int>        <dbl>
## 1 2014-01-01     461609.            1992       51355.
## 2 2015-01-01     470533.            2102       61619.
## 3 2016-01-01     587206.            2585       79995.
## 4 2017-01-01     697515.            3309       77759.
  1. melihat banyaknya pesanan berdasarkan category,segment dan sub category
stores%>%
  group_by_at(vars( segment,category))%>%
  count(sub_category, name = "banyaknya_order")
## # A tibble: 51 x 4
## # Groups:   segment, category [9]
##    segment  category        sub_category banyaknya_order
##    <chr>    <chr>           <chr>                  <int>
##  1 Consumer Furniture       Bookcases                131
##  2 Consumer Furniture       Chairs                   329
##  3 Consumer Furniture       Furnishings              494
##  4 Consumer Furniture       Tables                   159
##  5 Consumer Office Supplies Appliances               244
##  6 Consumer Office Supplies Art                      428
##  7 Consumer Office Supplies Binders                  780
##  8 Consumer Office Supplies Envelopes                129
##  9 Consumer Office Supplies Fasteners                114
## 10 Consumer Office Supplies Labels                   189
## # ... with 41 more rows
# sarua wae
stores%>%
  group_by(segment)%>%
  count(category,sub_category, name = "banyaknya_order")
## # A tibble: 51 x 4
## # Groups:   segment [3]
##    segment  category        sub_category banyaknya_order
##    <chr>    <chr>           <chr>                  <int>
##  1 Consumer Furniture       Bookcases                131
##  2 Consumer Furniture       Chairs                   329
##  3 Consumer Furniture       Furnishings              494
##  4 Consumer Furniture       Tables                   159
##  5 Consumer Office Supplies Appliances               244
##  6 Consumer Office Supplies Art                      428
##  7 Consumer Office Supplies Binders                  780
##  8 Consumer Office Supplies Envelopes                129
##  9 Consumer Office Supplies Fasteners                114
## 10 Consumer Office Supplies Labels                   189
## # ... with 41 more rows
  1. melihat banyaknya sub category yang diorder berdasarkan segment dan category
stores%>%
  group_by(segment)%>%
  count(category,sub_category)%>%
  count(category, name = "banyaknya_sub_category")
## Using `n` as weighting variable
## i Quiet this message with `wt = n` or count rows with `wt = 1`
## # A tibble: 9 x 3
## # Groups:   segment [3]
##   segment     category        banyaknya_sub_category
##   <chr>       <chr>                            <int>
## 1 Consumer    Furniture                         1113
## 2 Consumer    Office Supplies                   3127
## 3 Consumer    Technology                         948
## 4 Corporate   Furniture                          646
## 5 Corporate   Office Supplies                   1820
## 6 Corporate   Technology                         553
## 7 Home Office Furniture                          362
## 8 Home Office Office Supplies                   1079
## 9 Home Office Technology                         340
  1. melihat penjualan sub category dengan profit terbesar berdasarkan category dan segment
stores%>%
  group_by_at(vars(segment,category,sub_category))%>%
  summarise(profits=sum(profit))%>%
  group_by_at(vars(segment, category))%>%
  filter(profits==max(profits))
## `summarise()` regrouping output by 'segment', 'category' (override with `.groups` argument)
## # A tibble: 9 x 4
## # Groups:   segment, category [9]
##   segment     category        sub_category profits
##   <chr>       <chr>           <chr>          <dbl>
## 1 Consumer    Furniture       Chairs        13235.
## 2 Consumer    Office Supplies Binders       17996.
## 3 Consumer    Technology      Phones        23837.
## 4 Corporate   Furniture       Chairs         8345.
## 5 Corporate   Office Supplies Paper         10362.
## 6 Corporate   Technology      Accessories   12707.
## 7 Home Office Furniture       Chairs         5010.
## 8 Home Office Office Supplies Paper          8157.
## 9 Home Office Technology      Phones         8912.
  1. melihat profit sub category berdasarkan category dan segment. apakah rugi atau untung
stores%>%
  group_by_at(vars(segment,category,sub_category))%>%
  summarise(profits=sum(profit))%>%
  mutate(keterangan=case_when(
      profits==max(profits) ~"profit terbesar dikelompoknya",
      profits > 0 ~ "untung",
      profits < 0 ~ "rugi"
    ))%>%
  select(-profits)
## `summarise()` regrouping output by 'segment', 'category' (override with `.groups` argument)
## # A tibble: 51 x 4
## # Groups:   segment, category [9]
##    segment  category        sub_category keterangan                   
##    <chr>    <chr>           <chr>        <chr>                        
##  1 Consumer Furniture       Bookcases    rugi                         
##  2 Consumer Furniture       Chairs       profit terbesar dikelompoknya
##  3 Consumer Furniture       Furnishings  untung                       
##  4 Consumer Furniture       Tables       rugi                         
##  5 Consumer Office Supplies Appliances   untung                       
##  6 Consumer Office Supplies Art          untung                       
##  7 Consumer Office Supplies Binders      profit terbesar dikelompoknya
##  8 Consumer Office Supplies Envelopes    untung                       
##  9 Consumer Office Supplies Fasteners    untung                       
## 10 Consumer Office Supplies Labels       untung                       
## # ... with 41 more rows
  1. merangking profit sub category berdasarkan category dan segment
stores%>%
  group_by(segment,category,sub_category)%>%
  summarise(profits=sum(profit))%>%
  mutate(rank = min_rank(desc(profits)))%>%
  arrange(rank, .by_group=T)%>%
  select(-profits)
## `summarise()` regrouping output by 'segment', 'category' (override with `.groups` argument)
## # A tibble: 51 x 4
## # Groups:   segment, category [9]
##    segment  category        sub_category  rank
##    <chr>    <chr>           <chr>        <int>
##  1 Consumer Furniture       Chairs           1
##  2 Consumer Furniture       Furnishings      2
##  3 Consumer Furniture       Bookcases        3
##  4 Consumer Furniture       Tables           4
##  5 Consumer Office Supplies Binders          1
##  6 Consumer Office Supplies Paper            2
##  7 Consumer Office Supplies Storage          3
##  8 Consumer Office Supplies Appliances       4
##  9 Consumer Office Supplies Art              5
## 10 Consumer Office Supplies Envelopes        6
## # ... with 41 more rows

11.data partitioning

set.seed(1)
#60% train dan 40% validation 
data<-stores
train.rows<-sample(rownames(data), dim(data)[1]*0.6)
train.data<-data[train.rows,]

valid.rows<-setdiff(rownames(data),train.rows)
valid.data<-data[valid.rows,]


#50% train, 30% validation, dan 20% test
train.rows2<-sample(rownames(data),dim(data)[1]*0.5)
valid.rows2<-sample(setdiff(rownames(data), train.rows2),dim(data)[1]*0.3)
test.rows<-setdiff(rownames(data),union(train.rows2, valid.rows2))
train.data2<-data[train.rows2,]
valid.data2<-data[valid.rows2,]
test.data<-data[test.rows,]

SEMOGA BERMANFAAT