Karena masih nganggur dan rebahan juga sudah mulai bosan, jadi saya mutusin buat nyoba-nyoba belajar R. Walaupun hasilnya biasa aja, maklum masih coba-coba. Pada bagian ini saya mencoba belajar paket dplyr. Mohon maaf kalau kurang komunikatif, karena niatnya hanya coba-coba. Pada bagian selanjutnya saya akan upload tentang paket ggplot2.
Walaupun biasa siapa tau mau dicoba sama rekan-rekan.
Sumber : Sebagian dari Programming Foundation for Data Science di SKILL ACADEMY dan sisanya hasil coba-coba
Dataset : Super Store Simple
Ini bukan endorse, karena waktu itu ada gratisan SKILL ACADEMY jadi lah nyoba-nyoba ngambil, TohGratis.
ket : karena datanya banyak, jadi waktu mau di knit lama (laptop na kentang). alhasil pada part 1 disini menggunakan perintah head jadi yang ditampilin hanya sedikit.
Load Package
library(dplyr)
Baca Data
stores<-read.csv("superstore.csv")
pendahuluan
names(stores)
## [1] "order_id" "order_date" "customer_id" "segment" "category"
## [6] "sub_category" "sales" "quantity" "profit"
t(t(names(stores)))
## [,1]
## [1,] "order_id"
## [2,] "order_date"
## [3,] "customer_id"
## [4,] "segment"
## [5,] "category"
## [6,] "sub_category"
## [7,] "sales"
## [8,] "quantity"
## [9,] "profit"
class(stores$profit)
## [1] "numeric"
levels(stores[,5])
## NULL
dim(stores)
## [1] 9988 9
sapply(stores, class)
## order_id order_date customer_id segment category sub_category
## "character" "character" "character" "character" "character" "character"
## sales quantity profit
## "numeric" "integer" "numeric"
summary(stores)
## order_id order_date customer_id segment
## Length:9988 Length:9988 Length:9988 Length:9988
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## category sub_category sales quantity
## Length:9988 Length:9988 Min. : 0.444 Min. : 1.000
## Class :character Class :character 1st Qu.: 17.248 1st Qu.: 2.000
## Mode :character Mode :character Median : 54.376 Median : 3.000
## Mean : 221.953 Mean : 3.789
## 3rd Qu.: 209.814 3rd Qu.: 5.000
## Max. :9892.740 Max. :14.000
## profit
## Min. :-3839.990
## 1st Qu.: 1.730
## Median : 8.653
## Mean : 27.105
## 3rd Qu.: 29.345
## Max. : 4946.370
aggregate(profit~segment, stores, mean)
## segment profit
## 1 Consumer 24.85721
## 2 Corporate 27.68438
## 3 Home Office 32.67253
aggregate(cbind(profit,sales)~segment, stores, mean)
## segment profit sales
## 1 Consumer 24.85721 218.2732
## 2 Corporate 27.68438 228.1041
## 3 Home Office 32.67253 222.2430
xtabs(~segment+category, stores )
## category
## segment Furniture Office Supplies Technology
## Consumer 1113 3127 948
## Corporate 646 1820 553
## Home Office 362 1079 340
Melihat Banyak Data
tally(stores)
## n
## 1 9988
count(stores,category)
## category n
## 1 Furniture 2121
## 2 Office Supplies 6026
## 3 Technology 1841
count(stores,category, name = "Banyaknya data", sort = T)
## category Banyaknya data
## 1 Office Supplies 6026
## 2 Furniture 2121
## 3 Technology 1841
Sample
head(stores,10)
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## 7 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Art
## 8 CA-2014-115812 2014-06-09 BH-11710 Consumer Technology Phones
## 9 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Binders
## 10 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Appliances
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 48.8600 7 14.1694
## 7 7.2800 4 1.9656
## 8 907.1520 6 90.7152
## 9 18.5040 3 5.7825
## 10 114.9000 5 34.4700
tail(stores,10)
## order_id order_date customer_id segment category
## 9979 CA-2015-100251 2015-05-17 DV-13465 Consumer Office Supplies
## 9980 CA-2015-100251 2015-05-17 DV-13465 Consumer Office Supplies
## 9981 CA-2016-125794 2016-09-29 ML-17410 Consumer Technology
## 9982 CA-2017-163629 2017-11-17 RA-19885 Corporate Technology
## 9983 CA-2017-163629 2017-11-17 RA-19885 Corporate Technology
## 9984 CA-2014-110422 2014-01-21 TB-21400 Consumer Furniture
## 9985 CA-2017-121258 2017-02-26 DB-13060 Consumer Furniture
## 9986 CA-2017-121258 2017-02-26 DB-13060 Consumer Technology
## 9987 CA-2017-121258 2017-02-26 DB-13060 Consumer Office Supplies
## 9988 CA-2017-119914 2017-05-04 CC-12220 Consumer Office Supplies
## sub_category sales quantity profit
## 9979 Labels 31.500 10 15.1200
## 9980 Supplies 55.600 4 16.1240
## 9981 Accessories 36.240 1 15.2208
## 9982 Accessories 79.990 1 28.7964
## 9983 Phones 206.100 5 55.6470
## 9984 Furnishings 25.248 3 4.1028
## 9985 Furnishings 91.960 2 15.6332
## 9986 Phones 258.576 2 19.3932
## 9987 Paper 29.600 4 13.3200
## 9988 Appliances 243.160 2 72.9480
sample_n(stores, 10)
## order_id order_date customer_id segment category sub_category
## 1 CA-2014-124429 2014-05-27 MH-17785 Corporate Furniture Tables
## 2 CA-2015-129042 2015-12-12 EM-13960 Consumer Office Supplies Art
## 3 CA-2014-114510 2014-03-14 JF-15295 Consumer Technology Accessories
## 4 CA-2016-133368 2016-01-14 AG-10675 Consumer Furniture Furnishings
## 5 CA-2016-140501 2016-06-23 IM-15070 Consumer Office Supplies Storage
## 6 CA-2017-152079 2017-01-20 ML-17410 Consumer Office Supplies Labels
## 7 CA-2017-147354 2017-03-09 KB-16315 Consumer Office Supplies Binders
## 8 US-2017-106705 2017-12-26 PO-18850 Consumer Office Supplies Paper
## 9 CA-2016-134376 2016-10-02 TT-21265 Corporate Office Supplies Appliances
## 10 CA-2016-145898 2016-09-26 CM-12445 Consumer Office Supplies Paper
## sales quantity profit
## 1 567.120 10 -28.3560
## 2 8.220 3 2.2194
## 3 82.800 12 6.6240
## 4 315.776 8 31.5776
## 5 324.900 5 38.9880
## 6 11.520 5 4.1760
## 7 30.840 5 9.6375
## 8 44.750 5 20.5850
## 9 61.440 3 16.5888
## 10 29.900 5 13.4550
sample_frac(stores, 0.001)
## order_id order_date customer_id segment category
## 1 CA-2015-130022 2015-08-10 JK-16120 Home Office Office Supplies
## 2 CA-2015-148180 2015-07-26 BP-11095 Corporate Office Supplies
## 3 CA-2017-107909 2017-09-01 SS-20875 Consumer Office Supplies
## 4 CA-2017-148474 2017-06-12 ME-17320 Home Office Office Supplies
## 5 CA-2017-108574 2017-10-07 MG-18145 Consumer Technology
## 6 CA-2014-126361 2014-08-04 VD-21670 Consumer Office Supplies
## 7 US-2016-111563 2016-11-04 SM-20005 Consumer Furniture
## 8 CA-2014-130274 2014-05-03 JS-15940 Home Office Office Supplies
## 9 CA-2016-112669 2016-04-14 KT-16465 Consumer Furniture
## 10 CA-2014-146591 2014-01-19 TS-21340 Consumer Office Supplies
## sub_category sales quantity profit
## 1 Labels 3.750 1 1.8000
## 2 Binders 23.136 6 8.3868
## 3 Binders 22.920 5 8.0220
## 4 Binders 91.200 3 41.9520
## 5 Accessories 1115.910 9 200.8638
## 6 Appliances 1089.750 3 305.1300
## 7 Furnishings 11.376 3 -5.6880
## 8 Labels 21.560 7 10.3488
## 9 Chairs 933.536 4 105.0228
## 10 Paper 56.064 4 19.6224
Select and rename (memilih kolom dan mengganti nama variabel)
head(select(stores,c(order_id, order_date, sales)))
## order_id order_date sales
## 1 CA-2016-152156 2016-11-08 261.9600
## 2 CA-2016-152156 2016-11-08 731.9400
## 3 CA-2016-138688 2016-06-12 14.6200
## 4 US-2015-108966 2015-10-11 957.5775
## 5 US-2015-108966 2015-10-11 22.3680
## 6 CA-2014-115812 2014-06-09 48.8600
head(select(stores,starts_with("order")))
## order_id order_date
## 1 CA-2016-152156 2016-11-08
## 2 CA-2016-152156 2016-11-08
## 3 CA-2016-138688 2016-06-12
## 4 US-2015-108966 2015-10-11
## 5 US-2015-108966 2015-10-11
## 6 CA-2014-115812 2014-06-09
head(select(stores,ends_with("id")))
## order_id customer_id
## 1 CA-2016-152156 CG-12520
## 2 CA-2016-152156 CG-12520
## 3 CA-2016-138688 DV-13045
## 4 US-2015-108966 SO-20335
## 5 US-2015-108966 SO-20335
## 6 CA-2014-115812 BH-11710
head(select(stores,contains("er")))
## order_id order_date customer_id
## 1 CA-2016-152156 2016-11-08 CG-12520
## 2 CA-2016-152156 2016-11-08 CG-12520
## 3 CA-2016-138688 2016-06-12 DV-13045
## 4 US-2015-108966 2015-10-11 SO-20335
## 5 US-2015-108966 2015-10-11 SO-20335
## 6 CA-2014-115812 2014-06-09 BH-11710
head(select(stores,order_date,everything()))
## order_date order_id customer_id segment category sub_category
## 1 2016-11-08 CA-2016-152156 CG-12520 Consumer Furniture Bookcases
## 2 2016-11-08 CA-2016-152156 CG-12520 Consumer Furniture Chairs
## 3 2016-06-12 CA-2016-138688 DV-13045 Corporate Office Supplies Labels
## 4 2015-10-11 US-2015-108966 SO-20335 Consumer Furniture Tables
## 5 2015-10-11 US-2015-108966 SO-20335 Consumer Office Supplies Storage
## 6 2014-06-09 CA-2014-115812 BH-11710 Consumer Furniture Furnishings
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 48.8600 7 14.1694
head(select(stores, -order_date, order_date))
## order_id customer_id segment category sub_category sales
## 1 CA-2016-152156 CG-12520 Consumer Furniture Bookcases 261.9600
## 2 CA-2016-152156 CG-12520 Consumer Furniture Chairs 731.9400
## 3 CA-2016-138688 DV-13045 Corporate Office Supplies Labels 14.6200
## 4 US-2015-108966 SO-20335 Consumer Furniture Tables 957.5775
## 5 US-2015-108966 SO-20335 Consumer Office Supplies Storage 22.3680
## 6 CA-2014-115812 BH-11710 Consumer Furniture Furnishings 48.8600
## quantity profit order_date
## 1 2 41.9136 2016-11-08
## 2 3 219.5820 2016-11-08
## 3 2 6.8714 2016-06-12
## 4 5 -383.0310 2015-10-11
## 5 2 2.5164 2015-10-11
## 6 7 14.1694 2014-06-09
head(select_if(stores, is.numeric ))
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 48.8600 7 14.1694
head(select_at(stores, vars(-contains("e"),starts_with("s"))))
## quantity profit segment sub_category sales
## 1 2 41.9136 Consumer Bookcases 261.9600
## 2 3 219.5820 Consumer Chairs 731.9400
## 3 2 6.8714 Corporate Labels 14.6200
## 4 5 -383.0310 Consumer Tables 957.5775
## 5 2 2.5164 Consumer Storage 22.3680
## 6 7 14.1694 Consumer Furnishings 48.8600
head(rename(stores, penjualan = sales))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## penjualan quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 48.8600 7 14.1694
arrange (sort)
head(arrange(stores,profit))
## order_id order_date customer_id segment category
## 1 US-2017-168116 2017-11-04 GT-14635 Corporate Technology
## 2 CA-2014-169019 2014-07-26 LF-17185 Consumer Office Supplies
## 3 CA-2017-134845 2017-04-17 SR-20425 Home Office Technology
## 4 US-2017-122714 2017-12-07 HG-14965 Corporate Office Supplies
## 5 CA-2015-147830 2015-12-15 NF-18385 Consumer Technology
## 6 CA-2017-131254 2017-11-19 NC-18415 Consumer Office Supplies
## sub_category sales quantity profit
## 1 Machines 7999.980 4 -3839.990
## 2 Binders 2177.584 8 -3701.893
## 3 Machines 2549.985 5 -3399.980
## 4 Binders 1889.990 5 -2929.485
## 5 Machines 1799.994 2 -2639.991
## 6 Binders 1525.188 6 -2287.782
head(arrange(stores, desc(profit)))
## order_id order_date customer_id segment category
## 1 CA-2016-117121 2016-12-17 AB-10105 Consumer Office Supplies
## 2 CA-2014-116904 2014-09-23 SC-20095 Consumer Office Supplies
## 3 CA-2015-145352 2015-03-16 CM-12385 Consumer Office Supplies
## 4 CA-2016-158841 2016-02-02 SE-20110 Consumer Technology
## 5 US-2016-140158 2016-10-04 DR-12940 Home Office Technology
## 6 CA-2017-138289 2017-01-16 AR-10540 Consumer Office Supplies
## sub_category sales quantity profit
## 1 Binders 9892.74 13 4946.370
## 2 Binders 9449.95 5 4630.475
## 3 Binders 6354.95 5 3177.475
## 4 Machines 8749.95 5 2799.984
## 5 Copiers 5399.91 9 2591.957
## 6 Binders 5443.96 4 2504.222
Filter (memilih data berdasarkan)
head(filter(stores, segment=="Consumer"))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 5 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Art
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 957.5775 5 -383.0310
## 4 22.3680 2 2.5164
## 5 48.8600 7 14.1694
## 6 7.2800 4 1.9656
head(filter(stores, segment=="Consumer" & category=="Furniture"))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 4 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## 5 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Tables
## 6 US-2017-156909 2017-07-16 SF-20065 Consumer Furniture Chairs
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 957.5775 5 -383.0310
## 4 48.8600 7 14.1694
## 5 1706.1840 9 85.3092
## 6 71.3720 2 -1.0196
head(filter(stores, segment!="Consumer" , category=="Furniture"))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-117590 2016-12-08 GH-14485 Corporate Furniture Furnishings
## 2 CA-2015-117415 2015-12-27 SN-20710 Home Office Furniture Bookcases
## 3 CA-2015-117415 2015-12-27 SN-20710 Home Office Furniture Chairs
## 4 US-2015-164175 2015-04-30 PS-18970 Home Office Furniture Chairs
## 5 US-2017-118038 2017-12-09 KB-16600 Corporate Furniture Furnishings
## 6 CA-2017-161018 2017-11-09 PN-18775 Home Office Furniture Furnishings
## sales quantity profit
## 1 190.9200 5 -147.9630
## 2 532.3992 3 -46.9764
## 3 212.0580 3 -15.1470
## 4 213.1150 5 -15.2225
## 5 9.7080 3 -5.8248
## 6 96.5300 7 40.5426
head(filter(stores, profit > mean(profit)))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2014-115812 2014-06-09 BH-11710 Consumer Technology Phones
## 4 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Appliances
## 5 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Tables
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Technology Phones
## sales quantity profit
## 1 261.960 2 41.9136
## 2 731.940 3 219.5820
## 3 907.152 6 90.7152
## 4 114.900 5 34.4700
## 5 1706.184 9 85.3092
## 6 911.424 4 68.3568
head(filter_if(stores, is.numeric, all_vars(. > mean(.))))
## order_id order_date customer_id segment category sub_category
## 1 CA-2014-115812 2014-06-09 BH-11710 Consumer Technology Phones
## 2 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Tables
## 3 CA-2014-115812 2014-06-09 BH-11710 Consumer Technology Phones
## 4 CA-2016-117590 2016-12-08 GH-14485 Corporate Technology Phones
## 5 CA-2015-117415 2015-12-27 SN-20710 Home Office Technology Phones
## 6 CA-2016-105816 2016-12-11 JM-15265 Corporate Technology Phones
## sales quantity profit
## 1 907.152 6 90.7152
## 2 1706.184 9 85.3092
## 3 911.424 4 68.3568
## 4 1097.544 7 123.4737
## 5 371.168 4 41.7564
## 6 1029.950 5 298.6855
head(filter_at(stores,vars(profit, sales), ~ .<mean(.) ))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 2 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 3 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## 4 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Art
## 5 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Binders
## 6 CA-2017-114412 2017-04-15 AA-10480 Consumer Office Supplies Paper
## sales quantity profit
## 1 14.620 2 6.8714
## 2 22.368 2 2.5164
## 3 48.860 7 14.1694
## 4 7.280 4 1.9656
## 5 18.504 3 5.7825
## 6 15.552 3 5.4432
Distinc (unik)
distinct(stores, sub_category)
## sub_category
## 1 Bookcases
## 2 Chairs
## 3 Labels
## 4 Tables
## 5 Storage
## 6 Furnishings
## 7 Art
## 8 Phones
## 9 Binders
## 10 Appliances
## 11 Paper
## 12 Accessories
## 13 Envelopes
## 14 Fasteners
## 15 Supplies
## 16 Machines
## 17 Copiers
n_distinct(stores$category)
## [1] 3
mutate
head(mutate(stores, persentase_keuntungan=(profit*100)/sum(profit)))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit persentase_keuntungan
## 1 261.9600 2 41.9136 0.0154818036
## 2 731.9400 3 219.5820 0.0811079315
## 3 14.6200 2 6.8714 0.0025381181
## 4 957.5775 5 -383.0310 -0.1414817796
## 5 22.3680 2 2.5164 0.0009294933
## 6 48.8600 7 14.1694 0.0052338112
vars <- "profit"
head(mutate(stores, persentase = (.data[[vars[[1]]]] * 100 )/sum(.data[[vars[[1]]]])))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit persentase
## 1 261.9600 2 41.9136 0.0154818036
## 2 731.9400 3 219.5820 0.0811079315
## 3 14.6200 2 6.8714 0.0025381181
## 4 957.5775 5 -383.0310 -0.1414817796
## 5 22.3680 2 2.5164 0.0009294933
## 6 48.8600 7 14.1694 0.0052338112
head(mutate(stores, profit=(profit*100)/sum(profit)))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit
## 1 261.9600 2 0.0154818036
## 2 731.9400 3 0.0811079315
## 3 14.6200 2 0.0025381181
## 4 957.5775 5 -0.1414817796
## 5 22.3680 2 0.0009294933
## 6 48.8600 7 0.0052338112
head(mutate_at(stores,c("profit","sales"), ~ (.*100)/sum(.) ))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit
## 1 0.0118167002 2 0.0154818036
## 2 0.0330169320 3 0.0811079315
## 3 0.0006594906 2 0.0025381181
## 4 0.0431951679 5 -0.1414817796
## 5 0.0010089935 2 0.0009294933
## 6 0.0022040158 7 0.0052338112
per<-function(x, na.rm=T) (x*100/sum(x))
head(mutate_at(stores,c("profit","sales"), per))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit
## 1 0.0118167002 2 0.0154818036
## 2 0.0330169320 3 0.0811079315
## 3 0.0006594906 2 0.0025381181
## 4 0.0431951679 5 -0.1414817796
## 5 0.0010089935 2 0.0009294933
## 6 0.0022040158 7 0.0052338112
per<-function(x, na.rm=T) (x*100/sum(x))
head(mutate_if(stores,is.numeric, per))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit
## 1 0.0118167002 0.005284574 0.0154818036
## 2 0.0330169320 0.007926861 0.0811079315
## 3 0.0006594906 0.005284574 0.0025381181
## 4 0.0431951679 0.013211436 -0.1414817796
## 5 0.0010089935 0.005284574 0.0009294933
## 6 0.0022040158 0.018496010 0.0052338112
head(mutate_if(stores, is.factor, as.character))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 48.8600 7 14.1694
per<-function(x, na.rm=T) (x*100/sum(x))
head(mutate_if(stores,is.numeric, list(persentase=per)))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## sales quantity profit sales_persentase quantity_persentase
## 1 261.9600 2 41.9136 0.0118167002 0.005284574
## 2 731.9400 3 219.5820 0.0330169320 0.007926861
## 3 14.6200 2 6.8714 0.0006594906 0.005284574
## 4 957.5775 5 -383.0310 0.0431951679 0.013211436
## 5 22.3680 2 2.5164 0.0010089935 0.005284574
## 6 48.8600 7 14.1694 0.0022040158 0.018496010
## profit_persentase
## 1 0.0154818036
## 2 0.0811079315
## 3 0.0025381181
## 4 -0.1414817796
## 5 0.0009294933
## 6 0.0052338112
transmutate
head(transmute(stores, persentase_keuntungan=(profit*100)/sum(profit)))
## persentase_keuntungan
## 1 0.0154818036
## 2 0.0811079315
## 3 0.0025381181
## 4 -0.1414817796
## 5 0.0009294933
## 6 0.0052338112
per<-function(x, na.rm=T) (x*100/sum(x))
head(transmute_at(stores,c("profit","sales"), per))
## profit sales
## 1 0.0154818036 0.0118167002
## 2 0.0811079315 0.0330169320
## 3 0.0025381181 0.0006594906
## 4 -0.1414817796 0.0431951679
## 5 0.0009294933 0.0010089935
## 6 0.0052338112 0.0022040158
per<-function(x, na.rm=T) (x*100/sum(x))
head(transmute_if(stores,is.numeric, per))
## sales quantity profit
## 1 0.0118167002 0.005284574 0.0154818036
## 2 0.0330169320 0.007926861 0.0811079315
## 3 0.0006594906 0.005284574 0.0025381181
## 4 0.0431951679 0.013211436 -0.1414817796
## 5 0.0010089935 0.005284574 0.0009294933
## 6 0.0022040158 0.018496010 0.0052338112
summarize
summarise(stores,mean = mean(profit), n = n())
## mean n
## 1 27.10534 9988
var <- "profit"
summarise(stores, avg = mean(.data[[var]], na.rm = TRUE))
## avg
## 1 27.10534
summarise_at(stores,c("profit", "sales"), mean, na.rm = TRUE)
## profit sales
## 1 27.10534 221.9526
summarise_at(stores,vars(sales,profit), mean, na.rm = TRUE)
## sales profit
## 1 221.9526 27.10534
summarise_if(stores,is.double, mean, na.rm = TRUE)
## sales profit
## 1 221.9526 27.10534
bind row
one <- head(stores,5)
two <- tail(stores,5)
bind_rows(one, two)
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-110422 2014-01-21 TB-21400 Consumer Furniture Furnishings
## 7 CA-2017-121258 2017-02-26 DB-13060 Consumer Furniture Furnishings
## 8 CA-2017-121258 2017-02-26 DB-13060 Consumer Technology Phones
## 9 CA-2017-121258 2017-02-26 DB-13060 Consumer Office Supplies Paper
## 10 CA-2017-119914 2017-05-04 CC-12220 Consumer Office Supplies Appliances
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 25.2480 3 4.1028
## 7 91.9600 2 15.6332
## 8 258.5760 2 19.3932
## 9 29.6000 4 13.3200
## 10 243.1600 2 72.9480
bind_rows(list(one, two))
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-110422 2014-01-21 TB-21400 Consumer Furniture Furnishings
## 7 CA-2017-121258 2017-02-26 DB-13060 Consumer Furniture Furnishings
## 8 CA-2017-121258 2017-02-26 DB-13060 Consumer Technology Phones
## 9 CA-2017-121258 2017-02-26 DB-13060 Consumer Office Supplies Paper
## 10 CA-2017-119914 2017-05-04 CC-12220 Consumer Office Supplies Appliances
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 25.2480 3 4.1028
## 7 91.9600 2 15.6332
## 8 258.5760 2 19.3932
## 9 29.6000 4 13.3200
## 10 243.1600 2 72.9480
bind_rows("group 1" = one, "group 2" = two, .id = "groups")
## groups order_id order_date customer_id segment category
## 1 group 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture
## 2 group 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture
## 3 group 1 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies
## 4 group 1 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture
## 5 group 1 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies
## 6 group 2 CA-2014-110422 2014-01-21 TB-21400 Consumer Furniture
## 7 group 2 CA-2017-121258 2017-02-26 DB-13060 Consumer Furniture
## 8 group 2 CA-2017-121258 2017-02-26 DB-13060 Consumer Technology
## 9 group 2 CA-2017-121258 2017-02-26 DB-13060 Consumer Office Supplies
## 10 group 2 CA-2017-119914 2017-05-04 CC-12220 Consumer Office Supplies
## sub_category sales quantity profit
## 1 Bookcases 261.9600 2 41.9136
## 2 Chairs 731.9400 3 219.5820
## 3 Labels 14.6200 2 6.8714
## 4 Tables 957.5775 5 -383.0310
## 5 Storage 22.3680 2 2.5164
## 6 Furnishings 25.2480 3 4.1028
## 7 Furnishings 91.9600 2 15.6332
## 8 Phones 258.5760 2 19.3932
## 9 Paper 29.6000 4 13.3200
## 10 Appliances 243.1600 2 72.9480
intersect, union, dan setdif
first <- stores[1:5, ]
second <- stores[3:7, ]
intersect(first, second)
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 2 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 3 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## sales quantity profit
## 1 14.6200 2 6.8714
## 2 957.5775 5 -383.0310
## 3 22.3680 2 2.5164
union(first, second)
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## 7 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Art
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 48.8600 7 14.1694
## 7 7.2800 4 1.9656
setdiff(first, second)
## order_id order_date customer_id segment category sub_category sales
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases 261.96
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs 731.94
## quantity profit
## 1 2 41.9136
## 2 3 219.5820
setdiff(second, first)
## order_id order_date customer_id segment category sub_category
## 1 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## 2 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Art
## sales quantity profit
## 1 48.86 7 14.1694
## 2 7.28 4 1.9656
union_all(first, second)
## order_id order_date customer_id segment category sub_category
## 1 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Bookcases
## 2 CA-2016-152156 2016-11-08 CG-12520 Consumer Furniture Chairs
## 3 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 4 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 5 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 6 CA-2016-138688 2016-06-12 DV-13045 Corporate Office Supplies Labels
## 7 US-2015-108966 2015-10-11 SO-20335 Consumer Furniture Tables
## 8 US-2015-108966 2015-10-11 SO-20335 Consumer Office Supplies Storage
## 9 CA-2014-115812 2014-06-09 BH-11710 Consumer Furniture Furnishings
## 10 CA-2014-115812 2014-06-09 BH-11710 Consumer Office Supplies Art
## sales quantity profit
## 1 261.9600 2 41.9136
## 2 731.9400 3 219.5820
## 3 14.6200 2 6.8714
## 4 957.5775 5 -383.0310
## 5 22.3680 2 2.5164
## 6 14.6200 2 6.8714
## 7 957.5775 5 -383.0310
## 8 22.3680 2 2.5164
## 9 48.8600 7 14.1694
## 10 7.2800 4 1.9656
Join
band_members
## # A tibble: 3 x 2
## name band
## <chr> <chr>
## 1 Mick Stones
## 2 John Beatles
## 3 Paul Beatles
band_instruments
## # A tibble: 3 x 2
## name plays
## <chr> <chr>
## 1 John guitar
## 2 Paul bass
## 3 Keith guitar
inner_join( band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 2 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
left_join( band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
right_join( band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
## 3 Keith <NA> guitar
full_join(band_members ,band_instruments)
## Joining, by = "name"
## # A tibble: 4 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
## 4 Keith <NA> guitar
semi_join(band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 2 x 2
## name band
## <chr> <chr>
## 1 John Beatles
## 2 Paul Beatles
anti_join(band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 1 x 2
## name band
## <chr> <chr>
## 1 Mick Stones
nest_join(band_members,band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
## name band band_instruments
## <chr> <chr> <list>
## 1 Mick Stones <tibble [0 x 1]>
## 2 John Beatles <tibble [1 x 1]>
## 3 Paul Beatles <tibble [1 x 1]>
inner_join(band_members,band_instruments, by = "name")
## # A tibble: 2 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
full_join(band_members,band_instruments2, by = c("name" = "artist"))
## # A tibble: 4 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
## 4 Keith <NA> guitar
perintah pada paket dplyr bisa dilakukan secara bersamaan dengan menulis :
(dataset) %>% perintah1 %>% perintah2 …
ket : selalu menulis %>% sebelum menambah perintah
stores %>%
select(customer_id, sales)%>%
filter(sales==max(sales))
## customer_id sales
## 1 AB-10105 9892.74
## kita juga bisa menggunakan perintah lain untuk mendapatkan hasil tersebut
## misal
stores%>%
select(customer_id, sales)%>%
arrange(desc(sales))%>%
head(1)
## customer_id sales
## 1 AB-10105 9892.74
stores%>%
filter(category=="Office Supplies")%>%
group_by(sub_category)%>%
summarise(total_profit=sum(profit))%>%
arrange(desc(total_profit))
## # A tibble: 9 x 2
## sub_category total_profit
## <chr> <dbl>
## 1 Paper 34054.
## 2 Binders 30222.
## 3 Storage 21279.
## 4 Appliances 18138.
## 5 Envelopes 6964.
## 6 Art 6528.
## 7 Labels 5546.
## 8 Fasteners 950.
## 9 Supplies -1189.
stores%>%
filter(profit<0)%>%
tally(name="banyaknya order rugi")
## banyaknya order rugi
## 1 1869
stores%>%
filter(customer_id =="JE-16165"|customer_id=="KH-16510"|customer_id=="AD-10180")%>%
group_by(customer_id)%>%
summarise(total_sales=sum(sales))%>%
arrange(desc(total_sales))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## customer_id total_sales
## <chr> <dbl>
## 1 AD-10180 6107.
## 2 KH-16510 5953.
## 3 JE-16165 2697.
stores$order_date<-as.Date(stores$order_date)
yearly_sales<- stores
yearly_sales$order_year<-as.Date(cut(yearly_sales$order_date, breaks = "year"))
yearly_sales%>%
group_by(order_year)%>%
summarise(total_sales= sum(sales),jumlah_customer= n(),total_profit= sum(profit))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 4
## order_year total_sales jumlah_customer total_profit
## <date> <dbl> <int> <dbl>
## 1 2014-01-01 461609. 1992 51355.
## 2 2015-01-01 470533. 2102 61619.
## 3 2016-01-01 587206. 2585 79995.
## 4 2017-01-01 697515. 3309 77759.
stores%>%
group_by_at(vars( segment,category))%>%
count(sub_category, name = "banyaknya_order")
## # A tibble: 51 x 4
## # Groups: segment, category [9]
## segment category sub_category banyaknya_order
## <chr> <chr> <chr> <int>
## 1 Consumer Furniture Bookcases 131
## 2 Consumer Furniture Chairs 329
## 3 Consumer Furniture Furnishings 494
## 4 Consumer Furniture Tables 159
## 5 Consumer Office Supplies Appliances 244
## 6 Consumer Office Supplies Art 428
## 7 Consumer Office Supplies Binders 780
## 8 Consumer Office Supplies Envelopes 129
## 9 Consumer Office Supplies Fasteners 114
## 10 Consumer Office Supplies Labels 189
## # ... with 41 more rows
# sarua wae
stores%>%
group_by(segment)%>%
count(category,sub_category, name = "banyaknya_order")
## # A tibble: 51 x 4
## # Groups: segment [3]
## segment category sub_category banyaknya_order
## <chr> <chr> <chr> <int>
## 1 Consumer Furniture Bookcases 131
## 2 Consumer Furniture Chairs 329
## 3 Consumer Furniture Furnishings 494
## 4 Consumer Furniture Tables 159
## 5 Consumer Office Supplies Appliances 244
## 6 Consumer Office Supplies Art 428
## 7 Consumer Office Supplies Binders 780
## 8 Consumer Office Supplies Envelopes 129
## 9 Consumer Office Supplies Fasteners 114
## 10 Consumer Office Supplies Labels 189
## # ... with 41 more rows
stores%>%
group_by(segment)%>%
count(category,sub_category)%>%
count(category, name = "banyaknya_sub_category")
## Using `n` as weighting variable
## i Quiet this message with `wt = n` or count rows with `wt = 1`
## # A tibble: 9 x 3
## # Groups: segment [3]
## segment category banyaknya_sub_category
## <chr> <chr> <int>
## 1 Consumer Furniture 1113
## 2 Consumer Office Supplies 3127
## 3 Consumer Technology 948
## 4 Corporate Furniture 646
## 5 Corporate Office Supplies 1820
## 6 Corporate Technology 553
## 7 Home Office Furniture 362
## 8 Home Office Office Supplies 1079
## 9 Home Office Technology 340
stores%>%
group_by_at(vars(segment,category,sub_category))%>%
summarise(profits=sum(profit))%>%
group_by_at(vars(segment, category))%>%
filter(profits==max(profits))
## `summarise()` regrouping output by 'segment', 'category' (override with `.groups` argument)
## # A tibble: 9 x 4
## # Groups: segment, category [9]
## segment category sub_category profits
## <chr> <chr> <chr> <dbl>
## 1 Consumer Furniture Chairs 13235.
## 2 Consumer Office Supplies Binders 17996.
## 3 Consumer Technology Phones 23837.
## 4 Corporate Furniture Chairs 8345.
## 5 Corporate Office Supplies Paper 10362.
## 6 Corporate Technology Accessories 12707.
## 7 Home Office Furniture Chairs 5010.
## 8 Home Office Office Supplies Paper 8157.
## 9 Home Office Technology Phones 8912.
stores%>%
group_by_at(vars(segment,category,sub_category))%>%
summarise(profits=sum(profit))%>%
mutate(keterangan=case_when(
profits==max(profits) ~"profit terbesar dikelompoknya",
profits > 0 ~ "untung",
profits < 0 ~ "rugi"
))%>%
select(-profits)
## `summarise()` regrouping output by 'segment', 'category' (override with `.groups` argument)
## # A tibble: 51 x 4
## # Groups: segment, category [9]
## segment category sub_category keterangan
## <chr> <chr> <chr> <chr>
## 1 Consumer Furniture Bookcases rugi
## 2 Consumer Furniture Chairs profit terbesar dikelompoknya
## 3 Consumer Furniture Furnishings untung
## 4 Consumer Furniture Tables rugi
## 5 Consumer Office Supplies Appliances untung
## 6 Consumer Office Supplies Art untung
## 7 Consumer Office Supplies Binders profit terbesar dikelompoknya
## 8 Consumer Office Supplies Envelopes untung
## 9 Consumer Office Supplies Fasteners untung
## 10 Consumer Office Supplies Labels untung
## # ... with 41 more rows
stores%>%
group_by(segment,category,sub_category)%>%
summarise(profits=sum(profit))%>%
mutate(rank = min_rank(desc(profits)))%>%
arrange(rank, .by_group=T)%>%
select(-profits)
## `summarise()` regrouping output by 'segment', 'category' (override with `.groups` argument)
## # A tibble: 51 x 4
## # Groups: segment, category [9]
## segment category sub_category rank
## <chr> <chr> <chr> <int>
## 1 Consumer Furniture Chairs 1
## 2 Consumer Furniture Furnishings 2
## 3 Consumer Furniture Bookcases 3
## 4 Consumer Furniture Tables 4
## 5 Consumer Office Supplies Binders 1
## 6 Consumer Office Supplies Paper 2
## 7 Consumer Office Supplies Storage 3
## 8 Consumer Office Supplies Appliances 4
## 9 Consumer Office Supplies Art 5
## 10 Consumer Office Supplies Envelopes 6
## # ... with 41 more rows
11.data partitioning
set.seed(1)
#60% train dan 40% validation
data<-stores
train.rows<-sample(rownames(data), dim(data)[1]*0.6)
train.data<-data[train.rows,]
valid.rows<-setdiff(rownames(data),train.rows)
valid.data<-data[valid.rows,]
#50% train, 30% validation, dan 20% test
train.rows2<-sample(rownames(data),dim(data)[1]*0.5)
valid.rows2<-sample(setdiff(rownames(data), train.rows2),dim(data)[1]*0.3)
test.rows<-setdiff(rownames(data),union(train.rows2, valid.rows2))
train.data2<-data[train.rows2,]
valid.data2<-data[valid.rows2,]
test.data<-data[test.rows,]