1 Introduction

#Baca Data

store <- read.csv("data_input/superstore_data.csv")
store

Deskripsi Tabel ID -> ID unik dari setiap pelanggan Year_Birth -> Usia pelanggan Complain -> 1 jika pelanggan melakukan komplain dalam 2 tahun terakhir Dt_Customer -> tanggal pendaftaran pelanggan dengan perusahaan Education -> tingkat pendidikan pelanggan Marital -> status perkawinan pelanggan Kidhome -> jumlah anak kecil di rumah pelanggan Teenhome -> jumlah remaja dalam rumah tangga pelanggan Income -> pendapatan rumah tangga tahunan pelanggan MntFishProducts -> jumlah yang dibelanjakan untuk produk ikan dalam 2 tahun terakhir MntMeatProducts -> jumlah yang dihabiskan untuk produk daging dalam 2 tahun terakhir MntFruits -> jumlah yang dibelanjakan untuk produk buah-buahan dalam 2 tahun terakhir MntSweetProducts -> jumlah yang dibelanjakan untuk produk manis dalam 2 tahun terakhir MntWines -> jumlah yang dibelanjakan untuk produk anggur dalam 2 tahun terakhir MntGoldProds -> jumlah yang dibelanjakan untuk produk emas dalam 2 tahun terakhir NumDealsPurchases -> jumlah pembelian yang dilakukan dengan diskon NumCatalogPurchases -> jumlah pembelian yang dilakukan menggunakan katalog (membeli barang untuk dikirim melalui pos) NumStorePurchases -> jumlah pembelian yang dilakukan langsung di toko NumWebPurchases -> jumlah pembelian yang dilakukan melalui situs web perusahaan NumWebVisitsMonth -> jumlah kunjungan ke situs web perusahaan dalam sebulan terakhir *Recency -> jumlah hari sejak pembelian terakhir

2 Data Cleansing

library(dplyr)
library(lubridate)
store_clean <- store %>% 
  select(-Id) %>% 
  mutate(Dt_Customer = mdy (store$Dt_Customer))
glimpse(store_clean)
#> Rows: 2,240
#> Columns: 21
#> $ Year_Birth          <int> 1970, 1961, 1958, 1967, 1989, 1958, 1954, 1967, 19~
#> $ Education           <chr> "Graduation", "Graduation", "Graduation", "Graduat~
#> $ Marital_Status      <chr> "Divorced", "Single", "Married", "Together", "Sing~
#> $ Income              <int> 84835, 57091, 67267, 32474, 21474, 71691, 63564, 4~
#> $ Kidhome             <int> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,~
#> $ Teenhome            <int> 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,~
#> $ Dt_Customer         <date> 2014-06-16, 2014-06-15, 2014-05-13, 2014-11-05, 2~
#> $ Recency             <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
#> $ MntWines            <int> 189, 464, 134, 10, 6, 336, 769, 78, 384, 384, 450,~
#> $ MntFruits           <int> 104, 5, 11, 0, 16, 130, 80, 0, 0, 0, 26, 4, 82, 10~
#> $ MntMeatProducts     <int> 379, 64, 59, 1, 24, 411, 252, 11, 102, 102, 535, 6~
#> $ MntFishProducts     <int> 111, 7, 15, 0, 11, 240, 15, 0, 21, 21, 73, 0, 80, ~
#> $ MntSweetProducts    <int> 189, 0, 2, 0, 0, 32, 34, 0, 32, 32, 98, 13, 20, 16~
#> $ MntGoldProds        <int> 218, 37, 30, 0, 34, 43, 65, 7, 5, 5, 26, 4, 102, 3~
#> $ NumDealsPurchases   <int> 1, 1, 1, 1, 2, 1, 1, 1, 3, 3, 1, 2, 1, 1, 0, 4, 4,~
#> $ NumWebPurchases     <int> 4, 7, 3, 1, 3, 4, 10, 2, 6, 6, 5, 3, 3, 1, 25, 2, ~
#> $ NumCatalogPurchases <int> 4, 3, 2, 0, 1, 7, 10, 1, 2, 2, 6, 1, 6, 1, 0, 1, 1~
#> $ NumStorePurchases   <int> 6, 7, 5, 2, 2, 5, 7, 3, 9, 9, 10, 6, 6, 2, 0, 5, 5~
#> $ NumWebVisitsMonth   <int> 1, 5, 2, 7, 7, 2, 6, 5, 4, 4, 1, 4, 1, 6, 1, 4, 4,~
#> $ Response            <int> 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,~
#> $ Complain            <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~

2.1 Cek Missing Value

colSums(is.na(store_clean))
#>          Year_Birth           Education      Marital_Status              Income 
#>                   0                   0                   0                  24 
#>             Kidhome            Teenhome         Dt_Customer             Recency 
#>                   0                   0                   0                   0 
#>            MntWines           MntFruits     MntMeatProducts     MntFishProducts 
#>                   0                   0                   0                   0 
#>    MntSweetProducts        MntGoldProds   NumDealsPurchases     NumWebPurchases 
#>                   0                   0                   0                   0 
#> NumCatalogPurchases   NumStorePurchases   NumWebVisitsMonth            Response 
#>                   0                   0                   0                   0 
#>            Complain 
#>                   0

2.2 Drop Baris yang NA

store_clean <- store_clean %>% 
  filter(complete.cases(.)) #hanya memilih baris yang komplit atau tidak NA
  anyNA(store_clean)
#> [1] FALSE

2.3 Memfilter kolom Numerik

store_num <- store_clean %>% 
  select_if(is.numeric)
glimpse(store_num)
#> Rows: 2,216
#> Columns: 18
#> $ Year_Birth          <int> 1970, 1961, 1958, 1967, 1989, 1958, 1954, 1967, 19~
#> $ Income              <int> 84835, 57091, 67267, 32474, 21474, 71691, 63564, 4~
#> $ Kidhome             <int> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,~
#> $ Teenhome            <int> 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,~
#> $ Recency             <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
#> $ MntWines            <int> 189, 464, 134, 10, 6, 336, 769, 78, 384, 384, 450,~
#> $ MntFruits           <int> 104, 5, 11, 0, 16, 130, 80, 0, 0, 0, 26, 4, 82, 10~
#> $ MntMeatProducts     <int> 379, 64, 59, 1, 24, 411, 252, 11, 102, 102, 535, 6~
#> $ MntFishProducts     <int> 111, 7, 15, 0, 11, 240, 15, 0, 21, 21, 73, 0, 80, ~
#> $ MntSweetProducts    <int> 189, 0, 2, 0, 0, 32, 34, 0, 32, 32, 98, 13, 20, 16~
#> $ MntGoldProds        <int> 218, 37, 30, 0, 34, 43, 65, 7, 5, 5, 26, 4, 102, 3~
#> $ NumDealsPurchases   <int> 1, 1, 1, 1, 2, 1, 1, 1, 3, 3, 1, 2, 1, 1, 0, 4, 4,~
#> $ NumWebPurchases     <int> 4, 7, 3, 1, 3, 4, 10, 2, 6, 6, 5, 3, 3, 1, 25, 2, ~
#> $ NumCatalogPurchases <int> 4, 3, 2, 0, 1, 7, 10, 1, 2, 2, 6, 1, 6, 1, 0, 1, 1~
#> $ NumStorePurchases   <int> 6, 7, 5, 2, 2, 5, 7, 3, 9, 9, 10, 6, 6, 2, 0, 5, 5~
#> $ NumWebVisitsMonth   <int> 1, 5, 2, 7, 7, 2, 6, 5, 4, 4, 1, 4, 1, 6, 1, 4, 4,~
#> $ Response            <int> 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,~
#> $ Complain            <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~

3 Exploratory Data Analysis (EDA)

summary(store_num)
#>    Year_Birth       Income          Kidhome          Teenhome     
#>  Min.   :1893   Min.   :  1730   Min.   :0.0000   Min.   :0.0000  
#>  1st Qu.:1959   1st Qu.: 35303   1st Qu.:0.0000   1st Qu.:0.0000  
#>  Median :1970   Median : 51382   Median :0.0000   Median :0.0000  
#>  Mean   :1969   Mean   : 52247   Mean   :0.4418   Mean   :0.5054  
#>  3rd Qu.:1977   3rd Qu.: 68522   3rd Qu.:1.0000   3rd Qu.:1.0000  
#>  Max.   :1996   Max.   :666666   Max.   :2.0000   Max.   :2.0000  
#>     Recency         MntWines        MntFruits      MntMeatProducts 
#>  Min.   : 0.00   Min.   :   0.0   Min.   :  0.00   Min.   :   0.0  
#>  1st Qu.:24.00   1st Qu.:  24.0   1st Qu.:  2.00   1st Qu.:  16.0  
#>  Median :49.00   Median : 174.5   Median :  8.00   Median :  68.0  
#>  Mean   :49.01   Mean   : 305.1   Mean   : 26.36   Mean   : 167.0  
#>  3rd Qu.:74.00   3rd Qu.: 505.0   3rd Qu.: 33.00   3rd Qu.: 232.2  
#>  Max.   :99.00   Max.   :1493.0   Max.   :199.00   Max.   :1725.0  
#>  MntFishProducts  MntSweetProducts  MntGoldProds    NumDealsPurchases
#>  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   : 0.000   
#>  1st Qu.:  3.00   1st Qu.:  1.00   1st Qu.:  9.00   1st Qu.: 1.000   
#>  Median : 12.00   Median :  8.00   Median : 24.50   Median : 2.000   
#>  Mean   : 37.64   Mean   : 27.03   Mean   : 43.97   Mean   : 2.324   
#>  3rd Qu.: 50.00   3rd Qu.: 33.00   3rd Qu.: 56.00   3rd Qu.: 3.000   
#>  Max.   :259.00   Max.   :262.00   Max.   :321.00   Max.   :15.000   
#>  NumWebPurchases  NumCatalogPurchases NumStorePurchases NumWebVisitsMonth
#>  Min.   : 0.000   Min.   : 0.000      Min.   : 0.000    Min.   : 0.000   
#>  1st Qu.: 2.000   1st Qu.: 0.000      1st Qu.: 3.000    1st Qu.: 3.000   
#>  Median : 4.000   Median : 2.000      Median : 5.000    Median : 6.000   
#>  Mean   : 4.085   Mean   : 2.671      Mean   : 5.801    Mean   : 5.319   
#>  3rd Qu.: 6.000   3rd Qu.: 4.000      3rd Qu.: 8.000    3rd Qu.: 7.000   
#>  Max.   :27.000   Max.   :28.000      Max.   :13.000    Max.   :20.000   
#>     Response         Complain       
#>  Min.   :0.0000   Min.   :0.000000  
#>  1st Qu.:0.0000   1st Qu.:0.000000  
#>  Median :0.0000   Median :0.000000  
#>  Mean   :0.1503   Mean   :0.009477  
#>  3rd Qu.:0.0000   3rd Qu.:0.000000  
#>  Max.   :1.0000   Max.   :1.000000

3.1 cek matriks covariance

cov(store_num)
#>                          Year_Birth          Income         Kidhome
#> Year_Birth             143.65350702    -48814.62120     1.503309809
#> Income              -48814.62120144 633683788.57562 -5793.603194294
#> Kidhome                  1.50330981     -5793.60319     0.288257573
#> Teenhome                -2.28796603       262.10270    -0.011648507
#> Recency                 -5.65371157     -2892.83659     0.178613979
#> MntWines              -644.67067296   4913651.56703   -90.072543008
#> MntFruits               -8.46424078    431589.32536    -7.977683948
#> MntMeatProducts        -90.58221961   3300781.48003   -52.894367457
#> MntFishProducts        -26.52831653    604886.90106   -11.431713335
#> MntSweetProducts        -9.94605211    455689.30515    -8.336015516
#> MntGoldProds           -39.87531802    425110.22454    -9.876741490
#> NumDealsPurchases       -1.35269739     -4024.23080     0.224035539
#> NumWebPurchases         -5.02801583     26762.85982    -0.547403045
#> NumCatalogPurchases     -4.27130045     43406.47821    -0.792748205
#> NumStorePurchases       -4.98293674     43318.89745    -0.875021188
#> NumWebVisitsMonth        3.60179650    -33768.08971     0.582689205
#> Response                 0.10149335      1197.05973    -0.014950371
#> Complain                -0.03531753       -66.41277     0.002132042
#>                           Teenhome        Recency        MntWines
#> Year_Birth           -2.2879660340    -5.65371157    -644.6706730
#> Income              262.1027014693 -2892.83658596 4913651.5670317
#> Kidhome              -0.0116485075     0.17861398     -90.0725430
#> Teenhome              0.2961331910     0.21799024       0.6877656
#> Recency               0.2179902372   838.00706375     153.5171264
#> MntWines              0.6877655630   153.51712642  113790.1256902
#> MntFruits            -3.8233736177    -6.73181459    5195.2531478
#> MntMeatProducts     -31.8701811574   146.19869694   43038.2468733
#> MntFishProducts      -6.1151926070     0.87320371    7345.6715847
#> MntSweetProducts     -3.6444002575    29.85471392    5407.8655247
#> MntGoldProds         -0.5607598341    26.49344150    6864.4609723
#> NumDealsPurchases     0.4043419090     0.11780606       5.7662836
#> NumWebPurchases       0.2417501284    -0.44757927     512.0301067
#> NumCatalogPurchases  -0.1794818720     2.04027593     626.6716845
#> NumStorePurchases     0.0879855922    -0.04082519     701.8245591
#> NumWebVisitsMonth     0.1732151152    -1.30335585    -263.4233706
#> Response             -0.0299337468    -2.06691087      29.6954831
#> Complain              0.0001743935     0.03825492      -1.2902590
#>                           MntFruits MntMeatProducts MntFishProducts
#> Year_Birth              -8.46424078     -90.5822196     -26.5283165
#> Income              431589.32536427 3300781.4800281  604886.9010551
#> Kidhome                 -7.97768395     -52.8943675     -11.4317133
#> Teenhome                -3.82337362     -31.8701812      -6.1151926
#> Recency                 -6.73181459     146.1986969       0.8732037
#> MntWines              5195.25314784   43038.2468733    7345.6715847
#> MntFruits             1583.55579186    4889.3730043    1292.9674518
#> MntMeatProducts       4889.37300425   50302.9864395    7043.4752771
#> MntFishProducts       1292.96745178    7043.4752771    2997.7905288
#> MntSweetProducts       934.24298555    4929.5522618    1312.9883482
#> MntGoldProds           817.53156461    4177.2441026    1211.8027530
#> NumDealsPurchases      -10.29719442     -52.3390916     -15.0872165
#> NumWebPurchases         32.94433730     188.7836423      44.9749377
#> NumCatalogPurchases     56.63320913     481.8934714      85.3714836
#> NumStorePurchases       59.31106849     354.3450153      81.4727753
#> NumWebVisitsMonth      -40.41341995    -293.4619091     -59.2820805
#> Response                 1.74150626      19.0583984       2.1163284
#> Complain                -0.02053137      -0.5168915      -0.1125916
#>                     MntSweetProducts   MntGoldProds NumDealsPurchases
#> Year_Birth                -9.9460521    -39.8753180    -1.35269739469
#> Income                455689.3051528 425110.2245392 -4024.23079939859
#> Kidhome                   -8.3360155     -9.8767415     0.22403553879
#> Teenhome                  -3.6444003     -0.5607598     0.40434190904
#> Recency                   29.8547139     26.4934415     0.11780606466
#> MntWines                5407.8655247   6864.4609723     5.76628358501
#> MntFruits                934.2429856    817.5315646   -10.29719442430
#> MntMeatProducts         4929.5522618   4177.2441026   -52.33909164623
#> MntFishProducts         1312.9883482   1211.8027530   -15.08721650871
#> MntSweetProducts        1686.9129353    760.7120649    -9.59445037527
#> MntGoldProds             760.7120649   2684.8371668     5.17377598585
#> NumDealsPurchases         -9.5944504      5.1737760     3.70068188671
#> NumWebPurchases           37.5934725     57.8128972     1.27306904027
#> NumCatalogPurchases       59.5187603     67.0942073    -0.06822921335
#> NumStorePurchases         60.7800165     65.5538043     0.41340324013
#> NumWebVisitsMonth        -42.0742297    -31.1275096     1.61455798584
#> Response                   1.7053646      2.5989033     0.00237285166
#> Complain                  -0.0901158     -0.1563297     0.00009269748
#>                     NumWebPurchases NumCatalogPurchases NumStorePurchases
#> Year_Birth             -5.028015826        -4.271300454      -4.982936738
#> Income              26762.859816765     43406.478210796   43318.897447865
#> Kidhome                -0.547403045        -0.792748205      -0.875021188
#> Teenhome                0.241750128        -0.179481872       0.087985592
#> Recency                -0.447579272         2.040275933      -0.040825191
#> MntWines              512.030106714       626.671684486     701.824559127
#> MntFruits              32.944337305        56.633209126      59.311068486
#> MntMeatProducts       188.783642257       481.893471449     354.345015321
#> MntFishProducts        44.974937658        85.371483608      81.472775261
#> MntSweetProducts       37.593472468        59.518760339      60.780016461
#> MntGoldProds           57.812897173        67.094207325      65.553804264
#> NumDealsPurchases       1.273069040        -0.068229213       0.413403240
#> NumWebPurchases         7.512812829         3.103465256       4.599824995
#> NumCatalogPurchases     3.103465256         8.565769776       4.926827872
#> NumStorePurchases       4.599824995         4.926827872      10.567601723
#> NumWebVisitsMonth      -0.340541394        -3.705380325      -3.409165641
#> Response                0.148351615         0.230043965       0.042108083
#> Complain               -0.004420345        -0.005910432      -0.005336726
#>                     NumWebVisitsMonth        Response        Complain
#> Year_Birth                3.601796497    0.1014933462  -0.03531753470
#> Income               -33768.089709765 1197.0597287529 -66.41276678537
#> Kidhome                   0.582689205   -0.0149503712   0.00213204195
#> Teenhome                  0.173215115   -0.0299337468   0.00017439349
#> Recency                  -1.303355852   -2.0669108719   0.03825492417
#> MntWines               -263.423370562   29.6954830863  -1.29025902323
#> MntFruits               -40.413419946    1.7415062627  -0.02053137046
#> MntMeatProducts        -293.461909079   19.0583983913  -0.51689151747
#> MntFishProducts         -59.282080457    2.1163284058  -0.11259157696
#> MntSweetProducts        -42.074229694    1.7053646372  -0.09011580054
#> MntGoldProds            -31.127509555    2.5989033176  -0.15632970964
#> NumDealsPurchases         1.614557986    0.0023728517   0.00009269748
#> NumWebPurchases          -0.340541394    0.1483516148  -0.00442034536
#> NumCatalogPurchases      -3.705380325    0.2300439651  -0.00591043183
#> NumStorePurchases        -3.409165641    0.0421080832  -0.00533672613
#> NumWebVisitsMonth         5.882364050   -0.0019148650   0.00465015361
#> Response                 -0.001914865    0.1277471050  -0.00007028710
#> Complain                  0.004650154   -0.0000702871   0.00939096739

3.2 variansi tiap PC

plot(prcomp(store_num))

4 Data Pre-processing

store_scaled <- scale(store_num)
summary(store_scaled)
#>    Year_Birth           Income            Kidhome           Teenhome      
#>  Min.   :-6.32598   Min.   :-2.00680   Min.   :-0.8229   Min.   :-0.9288  
#>  1st Qu.:-0.81935   1st Qu.:-0.67311   1st Qu.:-0.8229   1st Qu.:-0.9288  
#>  Median : 0.09842   Median :-0.03439   Median :-0.8229   Median :-0.9288  
#>  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000  
#>  3rd Qu.: 0.68246   3rd Qu.: 0.64651   3rd Qu.: 1.0397   3rd Qu.: 0.9089  
#>  Max.   : 2.26770   Max.   :24.40777   Max.   : 2.9023   Max.   : 2.7465  
#>     Recency              MntWines         MntFruits       MntMeatProducts  
#>  Min.   :-1.6931063   Min.   :-0.9044   Min.   :-0.6623   Min.   :-0.7446  
#>  1st Qu.:-0.8640435   1st Qu.:-0.8333   1st Qu.:-0.6121   1st Qu.:-0.6732  
#>  Median :-0.0004365   Median :-0.3871   Median :-0.4613   Median :-0.4414  
#>  Mean   : 0.0000000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
#>  3rd Qu.: 0.8631705   3rd Qu.: 0.5926   3rd Qu.: 0.1670   3rd Qu.: 0.2909  
#>  Max.   : 1.7267776   Max.   : 3.5215   Max.   : 4.3385   Max.   : 6.9466  
#>  MntFishProducts   MntSweetProducts   MntGoldProds     NumDealsPurchases
#>  Min.   :-0.6874   Min.   :-0.6581   Min.   :-0.8485   Min.   :-1.2078  
#>  1st Qu.:-0.6326   1st Qu.:-0.6337   1st Qu.:-0.6748   1st Qu.:-0.6880  
#>  Median :-0.4682   Median :-0.4633   Median :-0.3757   Median :-0.1682  
#>  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
#>  3rd Qu.: 0.2258   3rd Qu.: 0.1454   3rd Qu.: 0.2323   3rd Qu.: 0.3516  
#>  Max.   : 4.0430   Max.   : 5.7210   Max.   : 5.3466   Max.   : 6.5896  
#>  NumWebPurchases    NumCatalogPurchases NumStorePurchases NumWebVisitsMonth
#>  Min.   :-1.49046   Min.   :-0.9126     Min.   :-1.7845   Min.   :-2.1931  
#>  1st Qu.:-0.76079   1st Qu.:-0.9126     1st Qu.:-0.8616   1st Qu.:-0.9562  
#>  Median :-0.03112   Median :-0.2293     Median :-0.2464   Median : 0.2808  
#>  Mean   : 0.00000   Mean   : 0.0000     Mean   : 0.0000   Mean   : 0.0000  
#>  3rd Qu.: 0.69856   3rd Qu.: 0.4541     3rd Qu.: 0.6765   3rd Qu.: 0.6931  
#>  Max.   : 8.36013   Max.   : 8.6543     Max.   : 2.2145   Max.   : 6.0531  
#>     Response          Complain       
#>  Min.   :-0.4204   Min.   :-0.09779  
#>  1st Qu.:-0.4204   1st Qu.:-0.09779  
#>  Median :-0.4204   Median :-0.09779  
#>  Mean   : 0.0000   Mean   : 0.00000  
#>  3rd Qu.:-0.4204   3rd Qu.:-0.09779  
#>  Max.   : 2.3774   Max.   :10.22138
plot(prcomp(store_scaled))

5 Principal Component Analysis

pca <- prcomp(store_num, scale. = T)
pca
#> Standard deviations (1, .., p=18):
#>  [1] 2.4763905 1.3974106 1.1621545 1.0612573 1.0027072 0.9413812 0.8833364
#>  [8] 0.8708472 0.7922104 0.7346263 0.6981765 0.6594193 0.6430538 0.6229631
#> [15] 0.5933158 0.5524368 0.4948089 0.4630121
#> 
#> Rotation (n x k) = (18 x 18):
#>                              PC1          PC2         PC3          PC4
#> Year_Birth          -0.064457664  0.335727340  0.39130327 -0.379686696
#> Income               0.301181504 -0.052621546 -0.10889439  0.123130540
#> Kidhome             -0.269524241  0.063275638  0.21971165 -0.211052432
#> Teenhome            -0.050631544 -0.549098334 -0.24008694  0.092630537
#> Recency              0.002094297 -0.015163415 -0.33711638 -0.614163837
#> MntWines             0.305047822 -0.180233324  0.10012802  0.071763727
#> MntFruits            0.284321321  0.135320520  0.04247837 -0.159389951
#> MntMeatProducts      0.325336178  0.140925582  0.06463400  0.004011992
#> MntFishProducts      0.293585737  0.146309787  0.02230301 -0.156829322
#> MntSweetProducts     0.283929301  0.118036609  0.02493680 -0.180381396
#> MntGoldProds         0.234726750 -0.107149903  0.15270537 -0.189908031
#> NumDealsPurchases   -0.050354204 -0.491556879  0.29627406 -0.273815251
#> NumWebPurchases      0.224019509 -0.373326842  0.21667582 -0.123724133
#> NumCatalogPurchases  0.330712088 -0.006605965  0.04048847  0.027994114
#> NumStorePurchases    0.299784110 -0.177131773 -0.04265224 -0.078849483
#> NumWebVisitsMonth   -0.260191985 -0.217413564  0.35727422 -0.165836141
#> Response             0.091540403  0.040014987  0.55451165  0.402114478
#> Complain            -0.015535785 -0.003023835 -0.04125873  0.010288157
#>                              PC5          PC6         PC7         PC8
#> Year_Birth           0.076674166 -0.016866270  0.44215463 -0.31890708
#> Income               0.013025084  0.139281304  0.32553524  0.12007120
#> Kidhome             -0.069387460  0.002889616  0.18344140  0.43224899
#> Teenhome             0.024353675 -0.153586909  0.05332261  0.16167128
#> Recency             -0.041843054  0.612004206 -0.26732198  0.06291479
#> MntWines             0.023524459  0.298047882  0.12938145 -0.23853026
#> MntFruits           -0.053592441 -0.306964411 -0.13371863  0.16942145
#> MntMeatProducts     -0.027923041  0.196014462  0.14574536  0.22594429
#> MntFishProducts     -0.028960804 -0.276196775 -0.16503996  0.19055013
#> MntSweetProducts    -0.025044925 -0.250629256 -0.09292733  0.17591814
#> MntGoldProds         0.012968951 -0.181591941 -0.45703032 -0.13840791
#> NumDealsPurchases   -0.027796309 -0.050493156  0.24183066  0.40185225
#> NumWebPurchases     -0.001985414 -0.013636539 -0.06375694 -0.34105305
#> NumCatalogPurchases -0.021212739  0.210796978  0.10550721  0.17388666
#> NumStorePurchases    0.017988629 -0.082444216  0.27336627 -0.24972432
#> NumWebVisitsMonth   -0.022290318  0.008819296 -0.21097904 -0.15502826
#> Response            -0.077586347  0.365379980 -0.30342569  0.18982101
#> Complain            -0.986350239 -0.032632763  0.07050860 -0.11582183
#>                             PC9        PC10         PC11        PC12
#> Year_Birth           0.18927106 -0.43021014  0.172867102 -0.05290104
#> Income               0.06207080 -0.21091764 -0.516506219 -0.15625208
#> Kidhome             -0.01338915  0.09505855 -0.606858061  0.08056670
#> Teenhome             0.07761456 -0.56022385  0.131781369 -0.01295178
#> Recency             -0.08040853 -0.18012525  0.014072139  0.08446339
#> MntWines            -0.09996281  0.13858605 -0.096504039  0.12223445
#> MntFruits           -0.21244679 -0.14215190 -0.002480198  0.58438859
#> MntMeatProducts      0.04740319  0.20873114  0.171281024 -0.07894611
#> MntFishProducts     -0.08197321  0.11532020  0.038742331 -0.01039039
#> MntSweetProducts    -0.37937066 -0.23242062  0.051629755 -0.53077395
#> MntGoldProds         0.70704233 -0.01276052 -0.234629863 -0.02894733
#> NumDealsPurchases    0.12233789  0.15997981  0.280675950  0.04348007
#> NumWebPurchases     -0.27609955  0.04151285 -0.248278051 -0.24453706
#> NumCatalogPurchases  0.22420040  0.21230270  0.274352421 -0.17674727
#> NumStorePurchases   -0.11541451  0.03952605 -0.006192277  0.43830905
#> NumWebVisitsMonth   -0.27567337  0.18774515  0.037207493 -0.06473089
#> Response            -0.07802978 -0.39356394  0.045207727  0.15361425
#> Complain             0.05350450 -0.02336023  0.021595765 -0.02850529
#>                             PC13         PC14         PC15         PC16
#> Year_Birth           0.119010347 -0.108036754 -0.022901012 -0.037975729
#> Income               0.219418971 -0.025906549  0.189975248  0.542482479
#> Kidhome             -0.102422499 -0.038575442 -0.258975810 -0.366449200
#> Teenhome             0.195267103 -0.191334094 -0.297617129 -0.226461889
#> Recency             -0.050900304 -0.042276699  0.074756074  0.023095357
#> MntWines             0.030335564 -0.068259066 -0.553173356  0.039763793
#> MntFruits            0.434445888  0.340428182  0.025035552 -0.003293442
#> MntMeatProducts      0.208178402  0.057583672 -0.046850480 -0.157303289
#> MntFishProducts      0.011594730 -0.830279290  0.054793304  0.058756587
#> MntSweetProducts    -0.347131451  0.302118644 -0.257174449  0.084780260
#> MntGoldProds        -0.098599052  0.126634459 -0.121682721  0.060328193
#> NumDealsPurchases   -0.152613141  0.100880769  0.272572829  0.248746223
#> NumWebPurchases      0.173792385  0.007967293  0.441582408 -0.449973051
#> NumCatalogPurchases  0.177393468  0.065799001 -0.144863596 -0.233128693
#> NumStorePurchases   -0.547653116 -0.042070768 -0.043940343  0.012046311
#> NumWebVisitsMonth    0.308565203 -0.061688264 -0.322295880  0.397866289
#> Response            -0.207378770 -0.083646809  0.114934489 -0.011081328
#> Complain             0.004224006 -0.009407843 -0.009492095  0.016924656
#>                             PC17        PC18
#> Year_Birth           0.025449454 -0.02123086
#> Income              -0.008719244  0.14401465
#> Kidhome              0.001670653  0.08891473
#> Teenhome            -0.150088569  0.04481087
#> Recency              0.001519119  0.02136944
#> MntWines             0.277517294 -0.50262677
#> MntFruits            0.132188421 -0.01880471
#> MntMeatProducts     -0.757486982 -0.17275251
#> MntFishProducts      0.081311927 -0.04945076
#> MntSweetProducts     0.028176835 -0.03229485
#> MntGoldProds        -0.109642826 -0.02596877
#> NumDealsPurchases    0.129811939 -0.24914778
#> NumWebPurchases      0.009643083 -0.01971172
#> NumCatalogPurchases  0.406230402  0.56912789
#> NumStorePurchases   -0.239761480  0.39776132
#> NumWebVisitsMonth   -0.222506556  0.36502956
#> Response            -0.008310830  0.05649559
#> Complain             0.003206678 -0.02056646
pca$sdev
#>  [1] 2.4763905 1.3974106 1.1621545 1.0612573 1.0027072 0.9413812 0.8833364
#>  [8] 0.8708472 0.7922104 0.7346263 0.6981765 0.6594193 0.6430538 0.6229631
#> [15] 0.5933158 0.5524368 0.4948089 0.4630121
pca$sdev^2
#>  [1] 6.1325101 1.9527564 1.3506032 1.1262670 1.0054218 0.8861986 0.7802832
#>  [8] 0.7583749 0.6275974 0.5396758 0.4874504 0.4348339 0.4135182 0.3880830
#> [15] 0.3520237 0.3051865 0.2448358 0.2143802

6

#Cek eigen vector
pca$rotation
#>                              PC1          PC2         PC3          PC4
#> Year_Birth          -0.064457664  0.335727340  0.39130327 -0.379686696
#> Income               0.301181504 -0.052621546 -0.10889439  0.123130540
#> Kidhome             -0.269524241  0.063275638  0.21971165 -0.211052432
#> Teenhome            -0.050631544 -0.549098334 -0.24008694  0.092630537
#> Recency              0.002094297 -0.015163415 -0.33711638 -0.614163837
#> MntWines             0.305047822 -0.180233324  0.10012802  0.071763727
#> MntFruits            0.284321321  0.135320520  0.04247837 -0.159389951
#> MntMeatProducts      0.325336178  0.140925582  0.06463400  0.004011992
#> MntFishProducts      0.293585737  0.146309787  0.02230301 -0.156829322
#> MntSweetProducts     0.283929301  0.118036609  0.02493680 -0.180381396
#> MntGoldProds         0.234726750 -0.107149903  0.15270537 -0.189908031
#> NumDealsPurchases   -0.050354204 -0.491556879  0.29627406 -0.273815251
#> NumWebPurchases      0.224019509 -0.373326842  0.21667582 -0.123724133
#> NumCatalogPurchases  0.330712088 -0.006605965  0.04048847  0.027994114
#> NumStorePurchases    0.299784110 -0.177131773 -0.04265224 -0.078849483
#> NumWebVisitsMonth   -0.260191985 -0.217413564  0.35727422 -0.165836141
#> Response             0.091540403  0.040014987  0.55451165  0.402114478
#> Complain            -0.015535785 -0.003023835 -0.04125873  0.010288157
#>                              PC5          PC6         PC7         PC8
#> Year_Birth           0.076674166 -0.016866270  0.44215463 -0.31890708
#> Income               0.013025084  0.139281304  0.32553524  0.12007120
#> Kidhome             -0.069387460  0.002889616  0.18344140  0.43224899
#> Teenhome             0.024353675 -0.153586909  0.05332261  0.16167128
#> Recency             -0.041843054  0.612004206 -0.26732198  0.06291479
#> MntWines             0.023524459  0.298047882  0.12938145 -0.23853026
#> MntFruits           -0.053592441 -0.306964411 -0.13371863  0.16942145
#> MntMeatProducts     -0.027923041  0.196014462  0.14574536  0.22594429
#> MntFishProducts     -0.028960804 -0.276196775 -0.16503996  0.19055013
#> MntSweetProducts    -0.025044925 -0.250629256 -0.09292733  0.17591814
#> MntGoldProds         0.012968951 -0.181591941 -0.45703032 -0.13840791
#> NumDealsPurchases   -0.027796309 -0.050493156  0.24183066  0.40185225
#> NumWebPurchases     -0.001985414 -0.013636539 -0.06375694 -0.34105305
#> NumCatalogPurchases -0.021212739  0.210796978  0.10550721  0.17388666
#> NumStorePurchases    0.017988629 -0.082444216  0.27336627 -0.24972432
#> NumWebVisitsMonth   -0.022290318  0.008819296 -0.21097904 -0.15502826
#> Response            -0.077586347  0.365379980 -0.30342569  0.18982101
#> Complain            -0.986350239 -0.032632763  0.07050860 -0.11582183
#>                             PC9        PC10         PC11        PC12
#> Year_Birth           0.18927106 -0.43021014  0.172867102 -0.05290104
#> Income               0.06207080 -0.21091764 -0.516506219 -0.15625208
#> Kidhome             -0.01338915  0.09505855 -0.606858061  0.08056670
#> Teenhome             0.07761456 -0.56022385  0.131781369 -0.01295178
#> Recency             -0.08040853 -0.18012525  0.014072139  0.08446339
#> MntWines            -0.09996281  0.13858605 -0.096504039  0.12223445
#> MntFruits           -0.21244679 -0.14215190 -0.002480198  0.58438859
#> MntMeatProducts      0.04740319  0.20873114  0.171281024 -0.07894611
#> MntFishProducts     -0.08197321  0.11532020  0.038742331 -0.01039039
#> MntSweetProducts    -0.37937066 -0.23242062  0.051629755 -0.53077395
#> MntGoldProds         0.70704233 -0.01276052 -0.234629863 -0.02894733
#> NumDealsPurchases    0.12233789  0.15997981  0.280675950  0.04348007
#> NumWebPurchases     -0.27609955  0.04151285 -0.248278051 -0.24453706
#> NumCatalogPurchases  0.22420040  0.21230270  0.274352421 -0.17674727
#> NumStorePurchases   -0.11541451  0.03952605 -0.006192277  0.43830905
#> NumWebVisitsMonth   -0.27567337  0.18774515  0.037207493 -0.06473089
#> Response            -0.07802978 -0.39356394  0.045207727  0.15361425
#> Complain             0.05350450 -0.02336023  0.021595765 -0.02850529
#>                             PC13         PC14         PC15         PC16
#> Year_Birth           0.119010347 -0.108036754 -0.022901012 -0.037975729
#> Income               0.219418971 -0.025906549  0.189975248  0.542482479
#> Kidhome             -0.102422499 -0.038575442 -0.258975810 -0.366449200
#> Teenhome             0.195267103 -0.191334094 -0.297617129 -0.226461889
#> Recency             -0.050900304 -0.042276699  0.074756074  0.023095357
#> MntWines             0.030335564 -0.068259066 -0.553173356  0.039763793
#> MntFruits            0.434445888  0.340428182  0.025035552 -0.003293442
#> MntMeatProducts      0.208178402  0.057583672 -0.046850480 -0.157303289
#> MntFishProducts      0.011594730 -0.830279290  0.054793304  0.058756587
#> MntSweetProducts    -0.347131451  0.302118644 -0.257174449  0.084780260
#> MntGoldProds        -0.098599052  0.126634459 -0.121682721  0.060328193
#> NumDealsPurchases   -0.152613141  0.100880769  0.272572829  0.248746223
#> NumWebPurchases      0.173792385  0.007967293  0.441582408 -0.449973051
#> NumCatalogPurchases  0.177393468  0.065799001 -0.144863596 -0.233128693
#> NumStorePurchases   -0.547653116 -0.042070768 -0.043940343  0.012046311
#> NumWebVisitsMonth    0.308565203 -0.061688264 -0.322295880  0.397866289
#> Response            -0.207378770 -0.083646809  0.114934489 -0.011081328
#> Complain             0.004224006 -0.009407843 -0.009492095  0.016924656
#>                             PC17        PC18
#> Year_Birth           0.025449454 -0.02123086
#> Income              -0.008719244  0.14401465
#> Kidhome              0.001670653  0.08891473
#> Teenhome            -0.150088569  0.04481087
#> Recency              0.001519119  0.02136944
#> MntWines             0.277517294 -0.50262677
#> MntFruits            0.132188421 -0.01880471
#> MntMeatProducts     -0.757486982 -0.17275251
#> MntFishProducts      0.081311927 -0.04945076
#> MntSweetProducts     0.028176835 -0.03229485
#> MntGoldProds        -0.109642826 -0.02596877
#> NumDealsPurchases    0.129811939 -0.24914778
#> NumWebPurchases      0.009643083 -0.01971172
#> NumCatalogPurchases  0.406230402  0.56912789
#> NumStorePurchases   -0.239761480  0.39776132
#> NumWebVisitsMonth   -0.222506556  0.36502956
#> Response            -0.008310830  0.05649559
#> Complain             0.003206678 -0.02056646

Variabel yang memiliki kontribusi besar terhadap PC1 adalah Income, MntWines, MntMeatProducts, NumCatalogPurchases,

# data awal (property_scale)
as.data.frame(store_scaled)
# nilai baru di tiap PC
as.data.frame(pca$x)

6.1 PCA for Dimensionality Reduction

mereduksi 19 dimensi tersebut dengan mempertahankan minimal 75% informasi. Berapakah dimensi baru yaitu PC yang digunakan? Hal ini dapat dilakukan dengan melihat cumulative variance dengan fungsi summary() pada objek pca

summary(pca)
#> Importance of components:
#>                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
#> Standard deviation     2.4764 1.3974 1.16215 1.06126 1.00271 0.94138 0.88334
#> Proportion of Variance 0.3407 0.1085 0.07503 0.06257 0.05586 0.04923 0.04335
#> Cumulative Proportion  0.3407 0.4492 0.52421 0.58679 0.64264 0.69188 0.73522
#>                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
#> Standard deviation     0.87085 0.79221 0.73463 0.69818 0.65942 0.64305 0.62296
#> Proportion of Variance 0.04213 0.03487 0.02998 0.02708 0.02416 0.02297 0.02156
#> Cumulative Proportion  0.77736 0.81222 0.84220 0.86929 0.89344 0.91642 0.93798
#>                           PC15    PC16   PC17    PC18
#> Standard deviation     0.59332 0.55244 0.4948 0.46301
#> Proportion of Variance 0.01956 0.01695 0.0136 0.01191
#> Cumulative Proportion  0.95753 0.97449 0.9881 1.00000

Jawaban: PC1 sampai PC8 dengan nilai Cumulative Proportion = 0.77736

# mengambil PC hasil dimensionality reduction
pc_keep <- as.data.frame(pca$x[,1:8])
pc_keep

Setelah dipilih PC yang merangkum informasi yang dibutuhkan, PC dapat digabung dengan data awal dan digunakan untuk analisis lebih lanjut

store_clean %>% 
  select_if(~!is.numeric(.)) %>% 
  cbind(pc_keep)

7 Biplot

Buat biplot dari 200 observasi pertama data store

prop_small <- store_num %>% head(100)

# melakukan PCA
pca_small <- prcomp(prop_small, scale = F)
  
# membuat biplot
biplot(x = pca_small, cex = 0.7, scale = FALSE)

#install.packages("factoextra")
library(factoextra)

fviz_contrib(
  X = pca_small,  # objek pca
  choice = "var", # mau lihat kontribusi tiap variabel
  axes = 2 # mau lihat kontribusi ke PC berapa
)

Kesimpulan: * Variabel yang berkontribusi tinggi ke PC1 : Income * Variabel yang berkontribusi tinggi ke PC2 : MntWines