#Baca Data
store <- read.csv("data_input/superstore_data.csv")
storeDeskripsi Tabel ID -> ID unik dari setiap pelanggan Year_Birth -> Usia pelanggan Complain -> 1 jika pelanggan melakukan komplain dalam 2 tahun terakhir Dt_Customer -> tanggal pendaftaran pelanggan dengan perusahaan Education -> tingkat pendidikan pelanggan Marital -> status perkawinan pelanggan Kidhome -> jumlah anak kecil di rumah pelanggan Teenhome -> jumlah remaja dalam rumah tangga pelanggan Income -> pendapatan rumah tangga tahunan pelanggan MntFishProducts -> jumlah yang dibelanjakan untuk produk ikan dalam 2 tahun terakhir MntMeatProducts -> jumlah yang dihabiskan untuk produk daging dalam 2 tahun terakhir MntFruits -> jumlah yang dibelanjakan untuk produk buah-buahan dalam 2 tahun terakhir MntSweetProducts -> jumlah yang dibelanjakan untuk produk manis dalam 2 tahun terakhir MntWines -> jumlah yang dibelanjakan untuk produk anggur dalam 2 tahun terakhir MntGoldProds -> jumlah yang dibelanjakan untuk produk emas dalam 2 tahun terakhir NumDealsPurchases -> jumlah pembelian yang dilakukan dengan diskon NumCatalogPurchases -> jumlah pembelian yang dilakukan menggunakan katalog (membeli barang untuk dikirim melalui pos) NumStorePurchases -> jumlah pembelian yang dilakukan langsung di toko NumWebPurchases -> jumlah pembelian yang dilakukan melalui situs web perusahaan NumWebVisitsMonth -> jumlah kunjungan ke situs web perusahaan dalam sebulan terakhir *Recency -> jumlah hari sejak pembelian terakhir
library(dplyr)
library(lubridate)
store_clean <- store %>%
select(-Id) %>%
mutate(Dt_Customer = mdy (store$Dt_Customer))glimpse(store_clean)#> Rows: 2,240
#> Columns: 21
#> $ Year_Birth <int> 1970, 1961, 1958, 1967, 1989, 1958, 1954, 1967, 19~
#> $ Education <chr> "Graduation", "Graduation", "Graduation", "Graduat~
#> $ Marital_Status <chr> "Divorced", "Single", "Married", "Together", "Sing~
#> $ Income <int> 84835, 57091, 67267, 32474, 21474, 71691, 63564, 4~
#> $ Kidhome <int> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,~
#> $ Teenhome <int> 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,~
#> $ Dt_Customer <date> 2014-06-16, 2014-06-15, 2014-05-13, 2014-11-05, 2~
#> $ Recency <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
#> $ MntWines <int> 189, 464, 134, 10, 6, 336, 769, 78, 384, 384, 450,~
#> $ MntFruits <int> 104, 5, 11, 0, 16, 130, 80, 0, 0, 0, 26, 4, 82, 10~
#> $ MntMeatProducts <int> 379, 64, 59, 1, 24, 411, 252, 11, 102, 102, 535, 6~
#> $ MntFishProducts <int> 111, 7, 15, 0, 11, 240, 15, 0, 21, 21, 73, 0, 80, ~
#> $ MntSweetProducts <int> 189, 0, 2, 0, 0, 32, 34, 0, 32, 32, 98, 13, 20, 16~
#> $ MntGoldProds <int> 218, 37, 30, 0, 34, 43, 65, 7, 5, 5, 26, 4, 102, 3~
#> $ NumDealsPurchases <int> 1, 1, 1, 1, 2, 1, 1, 1, 3, 3, 1, 2, 1, 1, 0, 4, 4,~
#> $ NumWebPurchases <int> 4, 7, 3, 1, 3, 4, 10, 2, 6, 6, 5, 3, 3, 1, 25, 2, ~
#> $ NumCatalogPurchases <int> 4, 3, 2, 0, 1, 7, 10, 1, 2, 2, 6, 1, 6, 1, 0, 1, 1~
#> $ NumStorePurchases <int> 6, 7, 5, 2, 2, 5, 7, 3, 9, 9, 10, 6, 6, 2, 0, 5, 5~
#> $ NumWebVisitsMonth <int> 1, 5, 2, 7, 7, 2, 6, 5, 4, 4, 1, 4, 1, 6, 1, 4, 4,~
#> $ Response <int> 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,~
#> $ Complain <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
colSums(is.na(store_clean))#> Year_Birth Education Marital_Status Income
#> 0 0 0 24
#> Kidhome Teenhome Dt_Customer Recency
#> 0 0 0 0
#> MntWines MntFruits MntMeatProducts MntFishProducts
#> 0 0 0 0
#> MntSweetProducts MntGoldProds NumDealsPurchases NumWebPurchases
#> 0 0 0 0
#> NumCatalogPurchases NumStorePurchases NumWebVisitsMonth Response
#> 0 0 0 0
#> Complain
#> 0
store_clean <- store_clean %>%
filter(complete.cases(.)) #hanya memilih baris yang komplit atau tidak NA
anyNA(store_clean)#> [1] FALSE
store_num <- store_clean %>%
select_if(is.numeric)
glimpse(store_num)#> Rows: 2,216
#> Columns: 18
#> $ Year_Birth <int> 1970, 1961, 1958, 1967, 1989, 1958, 1954, 1967, 19~
#> $ Income <int> 84835, 57091, 67267, 32474, 21474, 71691, 63564, 4~
#> $ Kidhome <int> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,~
#> $ Teenhome <int> 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,~
#> $ Recency <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
#> $ MntWines <int> 189, 464, 134, 10, 6, 336, 769, 78, 384, 384, 450,~
#> $ MntFruits <int> 104, 5, 11, 0, 16, 130, 80, 0, 0, 0, 26, 4, 82, 10~
#> $ MntMeatProducts <int> 379, 64, 59, 1, 24, 411, 252, 11, 102, 102, 535, 6~
#> $ MntFishProducts <int> 111, 7, 15, 0, 11, 240, 15, 0, 21, 21, 73, 0, 80, ~
#> $ MntSweetProducts <int> 189, 0, 2, 0, 0, 32, 34, 0, 32, 32, 98, 13, 20, 16~
#> $ MntGoldProds <int> 218, 37, 30, 0, 34, 43, 65, 7, 5, 5, 26, 4, 102, 3~
#> $ NumDealsPurchases <int> 1, 1, 1, 1, 2, 1, 1, 1, 3, 3, 1, 2, 1, 1, 0, 4, 4,~
#> $ NumWebPurchases <int> 4, 7, 3, 1, 3, 4, 10, 2, 6, 6, 5, 3, 3, 1, 25, 2, ~
#> $ NumCatalogPurchases <int> 4, 3, 2, 0, 1, 7, 10, 1, 2, 2, 6, 1, 6, 1, 0, 1, 1~
#> $ NumStorePurchases <int> 6, 7, 5, 2, 2, 5, 7, 3, 9, 9, 10, 6, 6, 2, 0, 5, 5~
#> $ NumWebVisitsMonth <int> 1, 5, 2, 7, 7, 2, 6, 5, 4, 4, 1, 4, 1, 6, 1, 4, 4,~
#> $ Response <int> 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,~
#> $ Complain <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
summary(store_num)#> Year_Birth Income Kidhome Teenhome
#> Min. :1893 Min. : 1730 Min. :0.0000 Min. :0.0000
#> 1st Qu.:1959 1st Qu.: 35303 1st Qu.:0.0000 1st Qu.:0.0000
#> Median :1970 Median : 51382 Median :0.0000 Median :0.0000
#> Mean :1969 Mean : 52247 Mean :0.4418 Mean :0.5054
#> 3rd Qu.:1977 3rd Qu.: 68522 3rd Qu.:1.0000 3rd Qu.:1.0000
#> Max. :1996 Max. :666666 Max. :2.0000 Max. :2.0000
#> Recency MntWines MntFruits MntMeatProducts
#> Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0
#> 1st Qu.:24.00 1st Qu.: 24.0 1st Qu.: 2.00 1st Qu.: 16.0
#> Median :49.00 Median : 174.5 Median : 8.00 Median : 68.0
#> Mean :49.01 Mean : 305.1 Mean : 26.36 Mean : 167.0
#> 3rd Qu.:74.00 3rd Qu.: 505.0 3rd Qu.: 33.00 3rd Qu.: 232.2
#> Max. :99.00 Max. :1493.0 Max. :199.00 Max. :1725.0
#> MntFishProducts MntSweetProducts MntGoldProds NumDealsPurchases
#> Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000
#> 1st Qu.: 3.00 1st Qu.: 1.00 1st Qu.: 9.00 1st Qu.: 1.000
#> Median : 12.00 Median : 8.00 Median : 24.50 Median : 2.000
#> Mean : 37.64 Mean : 27.03 Mean : 43.97 Mean : 2.324
#> 3rd Qu.: 50.00 3rd Qu.: 33.00 3rd Qu.: 56.00 3rd Qu.: 3.000
#> Max. :259.00 Max. :262.00 Max. :321.00 Max. :15.000
#> NumWebPurchases NumCatalogPurchases NumStorePurchases NumWebVisitsMonth
#> Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
#> 1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.: 3.000 1st Qu.: 3.000
#> Median : 4.000 Median : 2.000 Median : 5.000 Median : 6.000
#> Mean : 4.085 Mean : 2.671 Mean : 5.801 Mean : 5.319
#> 3rd Qu.: 6.000 3rd Qu.: 4.000 3rd Qu.: 8.000 3rd Qu.: 7.000
#> Max. :27.000 Max. :28.000 Max. :13.000 Max. :20.000
#> Response Complain
#> Min. :0.0000 Min. :0.000000
#> 1st Qu.:0.0000 1st Qu.:0.000000
#> Median :0.0000 Median :0.000000
#> Mean :0.1503 Mean :0.009477
#> 3rd Qu.:0.0000 3rd Qu.:0.000000
#> Max. :1.0000 Max. :1.000000
cov(store_num)#> Year_Birth Income Kidhome
#> Year_Birth 143.65350702 -48814.62120 1.503309809
#> Income -48814.62120144 633683788.57562 -5793.603194294
#> Kidhome 1.50330981 -5793.60319 0.288257573
#> Teenhome -2.28796603 262.10270 -0.011648507
#> Recency -5.65371157 -2892.83659 0.178613979
#> MntWines -644.67067296 4913651.56703 -90.072543008
#> MntFruits -8.46424078 431589.32536 -7.977683948
#> MntMeatProducts -90.58221961 3300781.48003 -52.894367457
#> MntFishProducts -26.52831653 604886.90106 -11.431713335
#> MntSweetProducts -9.94605211 455689.30515 -8.336015516
#> MntGoldProds -39.87531802 425110.22454 -9.876741490
#> NumDealsPurchases -1.35269739 -4024.23080 0.224035539
#> NumWebPurchases -5.02801583 26762.85982 -0.547403045
#> NumCatalogPurchases -4.27130045 43406.47821 -0.792748205
#> NumStorePurchases -4.98293674 43318.89745 -0.875021188
#> NumWebVisitsMonth 3.60179650 -33768.08971 0.582689205
#> Response 0.10149335 1197.05973 -0.014950371
#> Complain -0.03531753 -66.41277 0.002132042
#> Teenhome Recency MntWines
#> Year_Birth -2.2879660340 -5.65371157 -644.6706730
#> Income 262.1027014693 -2892.83658596 4913651.5670317
#> Kidhome -0.0116485075 0.17861398 -90.0725430
#> Teenhome 0.2961331910 0.21799024 0.6877656
#> Recency 0.2179902372 838.00706375 153.5171264
#> MntWines 0.6877655630 153.51712642 113790.1256902
#> MntFruits -3.8233736177 -6.73181459 5195.2531478
#> MntMeatProducts -31.8701811574 146.19869694 43038.2468733
#> MntFishProducts -6.1151926070 0.87320371 7345.6715847
#> MntSweetProducts -3.6444002575 29.85471392 5407.8655247
#> MntGoldProds -0.5607598341 26.49344150 6864.4609723
#> NumDealsPurchases 0.4043419090 0.11780606 5.7662836
#> NumWebPurchases 0.2417501284 -0.44757927 512.0301067
#> NumCatalogPurchases -0.1794818720 2.04027593 626.6716845
#> NumStorePurchases 0.0879855922 -0.04082519 701.8245591
#> NumWebVisitsMonth 0.1732151152 -1.30335585 -263.4233706
#> Response -0.0299337468 -2.06691087 29.6954831
#> Complain 0.0001743935 0.03825492 -1.2902590
#> MntFruits MntMeatProducts MntFishProducts
#> Year_Birth -8.46424078 -90.5822196 -26.5283165
#> Income 431589.32536427 3300781.4800281 604886.9010551
#> Kidhome -7.97768395 -52.8943675 -11.4317133
#> Teenhome -3.82337362 -31.8701812 -6.1151926
#> Recency -6.73181459 146.1986969 0.8732037
#> MntWines 5195.25314784 43038.2468733 7345.6715847
#> MntFruits 1583.55579186 4889.3730043 1292.9674518
#> MntMeatProducts 4889.37300425 50302.9864395 7043.4752771
#> MntFishProducts 1292.96745178 7043.4752771 2997.7905288
#> MntSweetProducts 934.24298555 4929.5522618 1312.9883482
#> MntGoldProds 817.53156461 4177.2441026 1211.8027530
#> NumDealsPurchases -10.29719442 -52.3390916 -15.0872165
#> NumWebPurchases 32.94433730 188.7836423 44.9749377
#> NumCatalogPurchases 56.63320913 481.8934714 85.3714836
#> NumStorePurchases 59.31106849 354.3450153 81.4727753
#> NumWebVisitsMonth -40.41341995 -293.4619091 -59.2820805
#> Response 1.74150626 19.0583984 2.1163284
#> Complain -0.02053137 -0.5168915 -0.1125916
#> MntSweetProducts MntGoldProds NumDealsPurchases
#> Year_Birth -9.9460521 -39.8753180 -1.35269739469
#> Income 455689.3051528 425110.2245392 -4024.23079939859
#> Kidhome -8.3360155 -9.8767415 0.22403553879
#> Teenhome -3.6444003 -0.5607598 0.40434190904
#> Recency 29.8547139 26.4934415 0.11780606466
#> MntWines 5407.8655247 6864.4609723 5.76628358501
#> MntFruits 934.2429856 817.5315646 -10.29719442430
#> MntMeatProducts 4929.5522618 4177.2441026 -52.33909164623
#> MntFishProducts 1312.9883482 1211.8027530 -15.08721650871
#> MntSweetProducts 1686.9129353 760.7120649 -9.59445037527
#> MntGoldProds 760.7120649 2684.8371668 5.17377598585
#> NumDealsPurchases -9.5944504 5.1737760 3.70068188671
#> NumWebPurchases 37.5934725 57.8128972 1.27306904027
#> NumCatalogPurchases 59.5187603 67.0942073 -0.06822921335
#> NumStorePurchases 60.7800165 65.5538043 0.41340324013
#> NumWebVisitsMonth -42.0742297 -31.1275096 1.61455798584
#> Response 1.7053646 2.5989033 0.00237285166
#> Complain -0.0901158 -0.1563297 0.00009269748
#> NumWebPurchases NumCatalogPurchases NumStorePurchases
#> Year_Birth -5.028015826 -4.271300454 -4.982936738
#> Income 26762.859816765 43406.478210796 43318.897447865
#> Kidhome -0.547403045 -0.792748205 -0.875021188
#> Teenhome 0.241750128 -0.179481872 0.087985592
#> Recency -0.447579272 2.040275933 -0.040825191
#> MntWines 512.030106714 626.671684486 701.824559127
#> MntFruits 32.944337305 56.633209126 59.311068486
#> MntMeatProducts 188.783642257 481.893471449 354.345015321
#> MntFishProducts 44.974937658 85.371483608 81.472775261
#> MntSweetProducts 37.593472468 59.518760339 60.780016461
#> MntGoldProds 57.812897173 67.094207325 65.553804264
#> NumDealsPurchases 1.273069040 -0.068229213 0.413403240
#> NumWebPurchases 7.512812829 3.103465256 4.599824995
#> NumCatalogPurchases 3.103465256 8.565769776 4.926827872
#> NumStorePurchases 4.599824995 4.926827872 10.567601723
#> NumWebVisitsMonth -0.340541394 -3.705380325 -3.409165641
#> Response 0.148351615 0.230043965 0.042108083
#> Complain -0.004420345 -0.005910432 -0.005336726
#> NumWebVisitsMonth Response Complain
#> Year_Birth 3.601796497 0.1014933462 -0.03531753470
#> Income -33768.089709765 1197.0597287529 -66.41276678537
#> Kidhome 0.582689205 -0.0149503712 0.00213204195
#> Teenhome 0.173215115 -0.0299337468 0.00017439349
#> Recency -1.303355852 -2.0669108719 0.03825492417
#> MntWines -263.423370562 29.6954830863 -1.29025902323
#> MntFruits -40.413419946 1.7415062627 -0.02053137046
#> MntMeatProducts -293.461909079 19.0583983913 -0.51689151747
#> MntFishProducts -59.282080457 2.1163284058 -0.11259157696
#> MntSweetProducts -42.074229694 1.7053646372 -0.09011580054
#> MntGoldProds -31.127509555 2.5989033176 -0.15632970964
#> NumDealsPurchases 1.614557986 0.0023728517 0.00009269748
#> NumWebPurchases -0.340541394 0.1483516148 -0.00442034536
#> NumCatalogPurchases -3.705380325 0.2300439651 -0.00591043183
#> NumStorePurchases -3.409165641 0.0421080832 -0.00533672613
#> NumWebVisitsMonth 5.882364050 -0.0019148650 0.00465015361
#> Response -0.001914865 0.1277471050 -0.00007028710
#> Complain 0.004650154 -0.0000702871 0.00939096739
plot(prcomp(store_num))store_scaled <- scale(store_num)
summary(store_scaled)#> Year_Birth Income Kidhome Teenhome
#> Min. :-6.32598 Min. :-2.00680 Min. :-0.8229 Min. :-0.9288
#> 1st Qu.:-0.81935 1st Qu.:-0.67311 1st Qu.:-0.8229 1st Qu.:-0.9288
#> Median : 0.09842 Median :-0.03439 Median :-0.8229 Median :-0.9288
#> Mean : 0.00000 Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
#> 3rd Qu.: 0.68246 3rd Qu.: 0.64651 3rd Qu.: 1.0397 3rd Qu.: 0.9089
#> Max. : 2.26770 Max. :24.40777 Max. : 2.9023 Max. : 2.7465
#> Recency MntWines MntFruits MntMeatProducts
#> Min. :-1.6931063 Min. :-0.9044 Min. :-0.6623 Min. :-0.7446
#> 1st Qu.:-0.8640435 1st Qu.:-0.8333 1st Qu.:-0.6121 1st Qu.:-0.6732
#> Median :-0.0004365 Median :-0.3871 Median :-0.4613 Median :-0.4414
#> Mean : 0.0000000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
#> 3rd Qu.: 0.8631705 3rd Qu.: 0.5926 3rd Qu.: 0.1670 3rd Qu.: 0.2909
#> Max. : 1.7267776 Max. : 3.5215 Max. : 4.3385 Max. : 6.9466
#> MntFishProducts MntSweetProducts MntGoldProds NumDealsPurchases
#> Min. :-0.6874 Min. :-0.6581 Min. :-0.8485 Min. :-1.2078
#> 1st Qu.:-0.6326 1st Qu.:-0.6337 1st Qu.:-0.6748 1st Qu.:-0.6880
#> Median :-0.4682 Median :-0.4633 Median :-0.3757 Median :-0.1682
#> Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
#> 3rd Qu.: 0.2258 3rd Qu.: 0.1454 3rd Qu.: 0.2323 3rd Qu.: 0.3516
#> Max. : 4.0430 Max. : 5.7210 Max. : 5.3466 Max. : 6.5896
#> NumWebPurchases NumCatalogPurchases NumStorePurchases NumWebVisitsMonth
#> Min. :-1.49046 Min. :-0.9126 Min. :-1.7845 Min. :-2.1931
#> 1st Qu.:-0.76079 1st Qu.:-0.9126 1st Qu.:-0.8616 1st Qu.:-0.9562
#> Median :-0.03112 Median :-0.2293 Median :-0.2464 Median : 0.2808
#> Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
#> 3rd Qu.: 0.69856 3rd Qu.: 0.4541 3rd Qu.: 0.6765 3rd Qu.: 0.6931
#> Max. : 8.36013 Max. : 8.6543 Max. : 2.2145 Max. : 6.0531
#> Response Complain
#> Min. :-0.4204 Min. :-0.09779
#> 1st Qu.:-0.4204 1st Qu.:-0.09779
#> Median :-0.4204 Median :-0.09779
#> Mean : 0.0000 Mean : 0.00000
#> 3rd Qu.:-0.4204 3rd Qu.:-0.09779
#> Max. : 2.3774 Max. :10.22138
plot(prcomp(store_scaled))pca <- prcomp(store_num, scale. = T)
pca#> Standard deviations (1, .., p=18):
#> [1] 2.4763905 1.3974106 1.1621545 1.0612573 1.0027072 0.9413812 0.8833364
#> [8] 0.8708472 0.7922104 0.7346263 0.6981765 0.6594193 0.6430538 0.6229631
#> [15] 0.5933158 0.5524368 0.4948089 0.4630121
#>
#> Rotation (n x k) = (18 x 18):
#> PC1 PC2 PC3 PC4
#> Year_Birth -0.064457664 0.335727340 0.39130327 -0.379686696
#> Income 0.301181504 -0.052621546 -0.10889439 0.123130540
#> Kidhome -0.269524241 0.063275638 0.21971165 -0.211052432
#> Teenhome -0.050631544 -0.549098334 -0.24008694 0.092630537
#> Recency 0.002094297 -0.015163415 -0.33711638 -0.614163837
#> MntWines 0.305047822 -0.180233324 0.10012802 0.071763727
#> MntFruits 0.284321321 0.135320520 0.04247837 -0.159389951
#> MntMeatProducts 0.325336178 0.140925582 0.06463400 0.004011992
#> MntFishProducts 0.293585737 0.146309787 0.02230301 -0.156829322
#> MntSweetProducts 0.283929301 0.118036609 0.02493680 -0.180381396
#> MntGoldProds 0.234726750 -0.107149903 0.15270537 -0.189908031
#> NumDealsPurchases -0.050354204 -0.491556879 0.29627406 -0.273815251
#> NumWebPurchases 0.224019509 -0.373326842 0.21667582 -0.123724133
#> NumCatalogPurchases 0.330712088 -0.006605965 0.04048847 0.027994114
#> NumStorePurchases 0.299784110 -0.177131773 -0.04265224 -0.078849483
#> NumWebVisitsMonth -0.260191985 -0.217413564 0.35727422 -0.165836141
#> Response 0.091540403 0.040014987 0.55451165 0.402114478
#> Complain -0.015535785 -0.003023835 -0.04125873 0.010288157
#> PC5 PC6 PC7 PC8
#> Year_Birth 0.076674166 -0.016866270 0.44215463 -0.31890708
#> Income 0.013025084 0.139281304 0.32553524 0.12007120
#> Kidhome -0.069387460 0.002889616 0.18344140 0.43224899
#> Teenhome 0.024353675 -0.153586909 0.05332261 0.16167128
#> Recency -0.041843054 0.612004206 -0.26732198 0.06291479
#> MntWines 0.023524459 0.298047882 0.12938145 -0.23853026
#> MntFruits -0.053592441 -0.306964411 -0.13371863 0.16942145
#> MntMeatProducts -0.027923041 0.196014462 0.14574536 0.22594429
#> MntFishProducts -0.028960804 -0.276196775 -0.16503996 0.19055013
#> MntSweetProducts -0.025044925 -0.250629256 -0.09292733 0.17591814
#> MntGoldProds 0.012968951 -0.181591941 -0.45703032 -0.13840791
#> NumDealsPurchases -0.027796309 -0.050493156 0.24183066 0.40185225
#> NumWebPurchases -0.001985414 -0.013636539 -0.06375694 -0.34105305
#> NumCatalogPurchases -0.021212739 0.210796978 0.10550721 0.17388666
#> NumStorePurchases 0.017988629 -0.082444216 0.27336627 -0.24972432
#> NumWebVisitsMonth -0.022290318 0.008819296 -0.21097904 -0.15502826
#> Response -0.077586347 0.365379980 -0.30342569 0.18982101
#> Complain -0.986350239 -0.032632763 0.07050860 -0.11582183
#> PC9 PC10 PC11 PC12
#> Year_Birth 0.18927106 -0.43021014 0.172867102 -0.05290104
#> Income 0.06207080 -0.21091764 -0.516506219 -0.15625208
#> Kidhome -0.01338915 0.09505855 -0.606858061 0.08056670
#> Teenhome 0.07761456 -0.56022385 0.131781369 -0.01295178
#> Recency -0.08040853 -0.18012525 0.014072139 0.08446339
#> MntWines -0.09996281 0.13858605 -0.096504039 0.12223445
#> MntFruits -0.21244679 -0.14215190 -0.002480198 0.58438859
#> MntMeatProducts 0.04740319 0.20873114 0.171281024 -0.07894611
#> MntFishProducts -0.08197321 0.11532020 0.038742331 -0.01039039
#> MntSweetProducts -0.37937066 -0.23242062 0.051629755 -0.53077395
#> MntGoldProds 0.70704233 -0.01276052 -0.234629863 -0.02894733
#> NumDealsPurchases 0.12233789 0.15997981 0.280675950 0.04348007
#> NumWebPurchases -0.27609955 0.04151285 -0.248278051 -0.24453706
#> NumCatalogPurchases 0.22420040 0.21230270 0.274352421 -0.17674727
#> NumStorePurchases -0.11541451 0.03952605 -0.006192277 0.43830905
#> NumWebVisitsMonth -0.27567337 0.18774515 0.037207493 -0.06473089
#> Response -0.07802978 -0.39356394 0.045207727 0.15361425
#> Complain 0.05350450 -0.02336023 0.021595765 -0.02850529
#> PC13 PC14 PC15 PC16
#> Year_Birth 0.119010347 -0.108036754 -0.022901012 -0.037975729
#> Income 0.219418971 -0.025906549 0.189975248 0.542482479
#> Kidhome -0.102422499 -0.038575442 -0.258975810 -0.366449200
#> Teenhome 0.195267103 -0.191334094 -0.297617129 -0.226461889
#> Recency -0.050900304 -0.042276699 0.074756074 0.023095357
#> MntWines 0.030335564 -0.068259066 -0.553173356 0.039763793
#> MntFruits 0.434445888 0.340428182 0.025035552 -0.003293442
#> MntMeatProducts 0.208178402 0.057583672 -0.046850480 -0.157303289
#> MntFishProducts 0.011594730 -0.830279290 0.054793304 0.058756587
#> MntSweetProducts -0.347131451 0.302118644 -0.257174449 0.084780260
#> MntGoldProds -0.098599052 0.126634459 -0.121682721 0.060328193
#> NumDealsPurchases -0.152613141 0.100880769 0.272572829 0.248746223
#> NumWebPurchases 0.173792385 0.007967293 0.441582408 -0.449973051
#> NumCatalogPurchases 0.177393468 0.065799001 -0.144863596 -0.233128693
#> NumStorePurchases -0.547653116 -0.042070768 -0.043940343 0.012046311
#> NumWebVisitsMonth 0.308565203 -0.061688264 -0.322295880 0.397866289
#> Response -0.207378770 -0.083646809 0.114934489 -0.011081328
#> Complain 0.004224006 -0.009407843 -0.009492095 0.016924656
#> PC17 PC18
#> Year_Birth 0.025449454 -0.02123086
#> Income -0.008719244 0.14401465
#> Kidhome 0.001670653 0.08891473
#> Teenhome -0.150088569 0.04481087
#> Recency 0.001519119 0.02136944
#> MntWines 0.277517294 -0.50262677
#> MntFruits 0.132188421 -0.01880471
#> MntMeatProducts -0.757486982 -0.17275251
#> MntFishProducts 0.081311927 -0.04945076
#> MntSweetProducts 0.028176835 -0.03229485
#> MntGoldProds -0.109642826 -0.02596877
#> NumDealsPurchases 0.129811939 -0.24914778
#> NumWebPurchases 0.009643083 -0.01971172
#> NumCatalogPurchases 0.406230402 0.56912789
#> NumStorePurchases -0.239761480 0.39776132
#> NumWebVisitsMonth -0.222506556 0.36502956
#> Response -0.008310830 0.05649559
#> Complain 0.003206678 -0.02056646
pca$sdev#> [1] 2.4763905 1.3974106 1.1621545 1.0612573 1.0027072 0.9413812 0.8833364
#> [8] 0.8708472 0.7922104 0.7346263 0.6981765 0.6594193 0.6430538 0.6229631
#> [15] 0.5933158 0.5524368 0.4948089 0.4630121
pca$sdev^2#> [1] 6.1325101 1.9527564 1.3506032 1.1262670 1.0054218 0.8861986 0.7802832
#> [8] 0.7583749 0.6275974 0.5396758 0.4874504 0.4348339 0.4135182 0.3880830
#> [15] 0.3520237 0.3051865 0.2448358 0.2143802
#Cek eigen vector
pca$rotation#> PC1 PC2 PC3 PC4
#> Year_Birth -0.064457664 0.335727340 0.39130327 -0.379686696
#> Income 0.301181504 -0.052621546 -0.10889439 0.123130540
#> Kidhome -0.269524241 0.063275638 0.21971165 -0.211052432
#> Teenhome -0.050631544 -0.549098334 -0.24008694 0.092630537
#> Recency 0.002094297 -0.015163415 -0.33711638 -0.614163837
#> MntWines 0.305047822 -0.180233324 0.10012802 0.071763727
#> MntFruits 0.284321321 0.135320520 0.04247837 -0.159389951
#> MntMeatProducts 0.325336178 0.140925582 0.06463400 0.004011992
#> MntFishProducts 0.293585737 0.146309787 0.02230301 -0.156829322
#> MntSweetProducts 0.283929301 0.118036609 0.02493680 -0.180381396
#> MntGoldProds 0.234726750 -0.107149903 0.15270537 -0.189908031
#> NumDealsPurchases -0.050354204 -0.491556879 0.29627406 -0.273815251
#> NumWebPurchases 0.224019509 -0.373326842 0.21667582 -0.123724133
#> NumCatalogPurchases 0.330712088 -0.006605965 0.04048847 0.027994114
#> NumStorePurchases 0.299784110 -0.177131773 -0.04265224 -0.078849483
#> NumWebVisitsMonth -0.260191985 -0.217413564 0.35727422 -0.165836141
#> Response 0.091540403 0.040014987 0.55451165 0.402114478
#> Complain -0.015535785 -0.003023835 -0.04125873 0.010288157
#> PC5 PC6 PC7 PC8
#> Year_Birth 0.076674166 -0.016866270 0.44215463 -0.31890708
#> Income 0.013025084 0.139281304 0.32553524 0.12007120
#> Kidhome -0.069387460 0.002889616 0.18344140 0.43224899
#> Teenhome 0.024353675 -0.153586909 0.05332261 0.16167128
#> Recency -0.041843054 0.612004206 -0.26732198 0.06291479
#> MntWines 0.023524459 0.298047882 0.12938145 -0.23853026
#> MntFruits -0.053592441 -0.306964411 -0.13371863 0.16942145
#> MntMeatProducts -0.027923041 0.196014462 0.14574536 0.22594429
#> MntFishProducts -0.028960804 -0.276196775 -0.16503996 0.19055013
#> MntSweetProducts -0.025044925 -0.250629256 -0.09292733 0.17591814
#> MntGoldProds 0.012968951 -0.181591941 -0.45703032 -0.13840791
#> NumDealsPurchases -0.027796309 -0.050493156 0.24183066 0.40185225
#> NumWebPurchases -0.001985414 -0.013636539 -0.06375694 -0.34105305
#> NumCatalogPurchases -0.021212739 0.210796978 0.10550721 0.17388666
#> NumStorePurchases 0.017988629 -0.082444216 0.27336627 -0.24972432
#> NumWebVisitsMonth -0.022290318 0.008819296 -0.21097904 -0.15502826
#> Response -0.077586347 0.365379980 -0.30342569 0.18982101
#> Complain -0.986350239 -0.032632763 0.07050860 -0.11582183
#> PC9 PC10 PC11 PC12
#> Year_Birth 0.18927106 -0.43021014 0.172867102 -0.05290104
#> Income 0.06207080 -0.21091764 -0.516506219 -0.15625208
#> Kidhome -0.01338915 0.09505855 -0.606858061 0.08056670
#> Teenhome 0.07761456 -0.56022385 0.131781369 -0.01295178
#> Recency -0.08040853 -0.18012525 0.014072139 0.08446339
#> MntWines -0.09996281 0.13858605 -0.096504039 0.12223445
#> MntFruits -0.21244679 -0.14215190 -0.002480198 0.58438859
#> MntMeatProducts 0.04740319 0.20873114 0.171281024 -0.07894611
#> MntFishProducts -0.08197321 0.11532020 0.038742331 -0.01039039
#> MntSweetProducts -0.37937066 -0.23242062 0.051629755 -0.53077395
#> MntGoldProds 0.70704233 -0.01276052 -0.234629863 -0.02894733
#> NumDealsPurchases 0.12233789 0.15997981 0.280675950 0.04348007
#> NumWebPurchases -0.27609955 0.04151285 -0.248278051 -0.24453706
#> NumCatalogPurchases 0.22420040 0.21230270 0.274352421 -0.17674727
#> NumStorePurchases -0.11541451 0.03952605 -0.006192277 0.43830905
#> NumWebVisitsMonth -0.27567337 0.18774515 0.037207493 -0.06473089
#> Response -0.07802978 -0.39356394 0.045207727 0.15361425
#> Complain 0.05350450 -0.02336023 0.021595765 -0.02850529
#> PC13 PC14 PC15 PC16
#> Year_Birth 0.119010347 -0.108036754 -0.022901012 -0.037975729
#> Income 0.219418971 -0.025906549 0.189975248 0.542482479
#> Kidhome -0.102422499 -0.038575442 -0.258975810 -0.366449200
#> Teenhome 0.195267103 -0.191334094 -0.297617129 -0.226461889
#> Recency -0.050900304 -0.042276699 0.074756074 0.023095357
#> MntWines 0.030335564 -0.068259066 -0.553173356 0.039763793
#> MntFruits 0.434445888 0.340428182 0.025035552 -0.003293442
#> MntMeatProducts 0.208178402 0.057583672 -0.046850480 -0.157303289
#> MntFishProducts 0.011594730 -0.830279290 0.054793304 0.058756587
#> MntSweetProducts -0.347131451 0.302118644 -0.257174449 0.084780260
#> MntGoldProds -0.098599052 0.126634459 -0.121682721 0.060328193
#> NumDealsPurchases -0.152613141 0.100880769 0.272572829 0.248746223
#> NumWebPurchases 0.173792385 0.007967293 0.441582408 -0.449973051
#> NumCatalogPurchases 0.177393468 0.065799001 -0.144863596 -0.233128693
#> NumStorePurchases -0.547653116 -0.042070768 -0.043940343 0.012046311
#> NumWebVisitsMonth 0.308565203 -0.061688264 -0.322295880 0.397866289
#> Response -0.207378770 -0.083646809 0.114934489 -0.011081328
#> Complain 0.004224006 -0.009407843 -0.009492095 0.016924656
#> PC17 PC18
#> Year_Birth 0.025449454 -0.02123086
#> Income -0.008719244 0.14401465
#> Kidhome 0.001670653 0.08891473
#> Teenhome -0.150088569 0.04481087
#> Recency 0.001519119 0.02136944
#> MntWines 0.277517294 -0.50262677
#> MntFruits 0.132188421 -0.01880471
#> MntMeatProducts -0.757486982 -0.17275251
#> MntFishProducts 0.081311927 -0.04945076
#> MntSweetProducts 0.028176835 -0.03229485
#> MntGoldProds -0.109642826 -0.02596877
#> NumDealsPurchases 0.129811939 -0.24914778
#> NumWebPurchases 0.009643083 -0.01971172
#> NumCatalogPurchases 0.406230402 0.56912789
#> NumStorePurchases -0.239761480 0.39776132
#> NumWebVisitsMonth -0.222506556 0.36502956
#> Response -0.008310830 0.05649559
#> Complain 0.003206678 -0.02056646
Variabel yang memiliki kontribusi besar terhadap PC1 adalah Income, MntWines, MntMeatProducts, NumCatalogPurchases,
# data awal (property_scale)
as.data.frame(store_scaled)# nilai baru di tiap PC
as.data.frame(pca$x)mereduksi 19 dimensi tersebut dengan mempertahankan minimal 75%
informasi. Berapakah dimensi baru yaitu PC yang digunakan? Hal ini dapat
dilakukan dengan melihat cumulative variance dengan
fungsi summary() pada objek pca
summary(pca)#> Importance of components:
#> PC1 PC2 PC3 PC4 PC5 PC6 PC7
#> Standard deviation 2.4764 1.3974 1.16215 1.06126 1.00271 0.94138 0.88334
#> Proportion of Variance 0.3407 0.1085 0.07503 0.06257 0.05586 0.04923 0.04335
#> Cumulative Proportion 0.3407 0.4492 0.52421 0.58679 0.64264 0.69188 0.73522
#> PC8 PC9 PC10 PC11 PC12 PC13 PC14
#> Standard deviation 0.87085 0.79221 0.73463 0.69818 0.65942 0.64305 0.62296
#> Proportion of Variance 0.04213 0.03487 0.02998 0.02708 0.02416 0.02297 0.02156
#> Cumulative Proportion 0.77736 0.81222 0.84220 0.86929 0.89344 0.91642 0.93798
#> PC15 PC16 PC17 PC18
#> Standard deviation 0.59332 0.55244 0.4948 0.46301
#> Proportion of Variance 0.01956 0.01695 0.0136 0.01191
#> Cumulative Proportion 0.95753 0.97449 0.9881 1.00000
Jawaban: PC1 sampai PC8 dengan nilai Cumulative Proportion = 0.77736
# mengambil PC hasil dimensionality reduction
pc_keep <- as.data.frame(pca$x[,1:8])
pc_keepSetelah dipilih PC yang merangkum informasi yang dibutuhkan, PC dapat digabung dengan data awal dan digunakan untuk analisis lebih lanjut
store_clean %>%
select_if(~!is.numeric(.)) %>%
cbind(pc_keep)Buat biplot dari 200 observasi pertama data store
prop_small <- store_num %>% head(100)
# melakukan PCA
pca_small <- prcomp(prop_small, scale = F)
# membuat biplot
biplot(x = pca_small, cex = 0.7, scale = FALSE)#install.packages("factoextra")
library(factoextra)
fviz_contrib(
X = pca_small, # objek pca
choice = "var", # mau lihat kontribusi tiap variabel
axes = 2 # mau lihat kontribusi ke PC berapa
)Kesimpulan: * Variabel yang berkontribusi tinggi ke PC1 : Income * Variabel yang berkontribusi tinggi ke PC2 : MntWines