Load Library

library(psych)
library(factoextra)
library(sjPlot)
library(ppcor)
library(corrplot)

Import data

data <- read.csv("media prediction and its cost.csv")

Data Cleaning

Cek Duplikat Baris

dup_bar <- sum(duplicated(data))
cat("Jumlah baris duplikat:", dup_bar)
## Jumlah baris duplikat: 0

Cek Duplikat Kolom

kol_dup <- names(data)[duplicated(as.list(data))]
cat("Kolom duplikat:", if(length(kol_dup)==0) "Tidak ada" else kol_dup)
## Kolom duplikat: avg_cars_at.home.approx..1 prepared_food

Mengambil Kolom Numerik

data_clean <- data[sapply(data, is.numeric)]

Hapus Kolom Duplikat, Biner, dan Target

cols_to_remove <- c("avg_cars_at.home.approx..1", "recyclable_package", "low_fat", "coffee_bar", "video_store", "salad_bar", "prepared_food", "florist","cost") 
data_clean <- data_clean[, !(names(data_clean) %in% cols_to_remove), drop = FALSE]

Hapus missing value

data_clean <- na.omit(data_clean)

visualisasi

par(mfrow = c(ceiling(ncol(data_clean)/3), 3),
    mar = c(2,2,2,1))
for(i in names(data_clean)){
  hist(data_clean[[i]],
       main = paste(i),
       xlab = i)
}

Karakteristik Data

Statistik Sederhana

summary_table <- describe(data_clean)
print(summary_table)
##                          vars     n     mean      sd   median  trimmed     mad
## store_sales.in.millions.    1 60428     6.54    3.46     5.94     6.28    3.53
## store_cost.in.millions.     2 60428     2.62    1.45     2.39     2.49    1.45
## unit_sales.in.millions.     3 60428     3.09    0.83     3.00     3.09    1.48
## total_children              4 60428     2.53    1.49     3.00     2.53    1.48
## avg_cars_at.home.approx.    5 60428     2.20    1.11     2.00     2.20    1.48
## num_children_at_home        6 60428     0.83    1.30     0.00     0.56    0.00
## SRP                         7 60428     2.12    0.93     2.13     2.10    1.02
## gross_weight                8 60428    13.81    4.62    13.60    13.74    5.93
## net_weight                  9 60428    11.80    4.68    11.60    11.73    6.08
## units_per_case             10 60428    18.86   10.26    19.00    18.96   13.34
## store_sqft                 11 60428 27988.48 5701.02 27694.00 27615.88 6080.14
## grocery_sqft               12 60428 19133.80 3987.40 18670.00 18936.07 4941.51
## frozen_sqft                13 60428  5312.85 1575.91  5062.00  5240.34  846.56
## meat_sqft                  14 60428  3541.85 1050.47  3375.00  3493.49  563.39
##                               min      max    range  skew kurtosis    se
## store_sales.in.millions.     0.51    22.92    22.41  0.68     0.09  0.01
## store_cost.in.millions.      0.16     9.73     9.56  0.83     0.54  0.01
## unit_sales.in.millions.      1.00     6.00     5.00  0.05    -0.32  0.00
## total_children               0.00     5.00     5.00 -0.01    -1.04  0.01
## avg_cars_at.home.approx.     0.00     4.00     4.00 -0.08    -0.79  0.00
## num_children_at_home         0.00     5.00     5.00  1.55     1.47  0.01
## SRP                          0.50     3.98     3.48  0.14    -0.89  0.00
## gross_weight                 6.00    21.90    15.90  0.09    -1.23  0.02
## net_weight                   3.05    20.80    17.75  0.11    -1.19  0.02
## units_per_case               1.00    36.00    35.00 -0.08    -1.25  0.04
## store_sqft               20319.00 39696.00 19377.00  0.39    -0.94 23.19
## grocery_sqft             13305.00 30351.00 17046.00  0.39    -0.54 16.22
## frozen_sqft               2452.00  9184.00  6732.00  0.56     0.61  6.41
## meat_sqft                 1635.00  6122.00  4487.00  0.56     0.60  4.27

Cek Tipe Data

str(data_clean)
## 'data.frame':    60428 obs. of  14 variables:
##  $ store_sales.in.millions.: num  7.36 5.52 3.68 3.68 4.08 4.08 5.44 3.74 4.08 9.72 ...
##  $ store_cost.in.millions. : num  2.72 2.59 1.36 1.18 1.43 ...
##  $ unit_sales.in.millions. : num  4 3 2 2 3 3 4 2 3 3 ...
##  $ total_children          : num  1 0 4 2 0 2 4 1 2 3 ...
##  $ avg_cars_at.home.approx.: num  1 4 1 2 2 1 2 4 2 1 ...
##  $ num_children_at_home    : num  1 0 0 2 0 2 0 0 0 0 ...
##  $ SRP                     : num  1.84 1.84 1.84 1.84 1.36 1.36 1.36 1.87 1.36 3.24 ...
##  $ gross_weight            : num  19.7 19.7 19.7 19.7 7.12 7.12 7.12 16.7 7.12 16.3 ...
##  $ net_weight              : num  17.7 17.7 17.7 17.7 5.11 5.11 5.11 14.7 5.11 14.2 ...
##  $ units_per_case          : num  17 17 17 17 29 29 29 10 29 25 ...
##  $ store_sqft              : num  27694 27694 27694 27694 27694 ...
##  $ grocery_sqft            : num  18670 18670 18670 18670 18670 ...
##  $ frozen_sqft             : num  5415 5415 5415 5415 5415 ...
##  $ meat_sqft               : num  3610 3610 3610 3610 3610 3610 3610 3610 3610 3610 ...

Uji Asumsi Kelayakan

Asumsi multikolinier

mat_corr <- round(cor(data_clean),3)
tab_corr(data_clean)
  store_sales.in.millions. store_cost.in.millions. unit_sales.in.millions. total_children avg_cars_at.home.approx. num_children_at_home SRP gross_weight net_weight units_per_case store_sqft grocery_sqft frozen_sqft meat_sqft
store_sales.in.millions.   0.955*** 0.503*** 0.083*** 0.004 0.032*** 0.833*** 0.036*** 0.032*** -0.011** 0.016*** 0.010* 0.018*** 0.018***
store_cost.in.millions. 0.955***   0.480*** 0.079*** 0.003 0.028*** 0.796*** 0.034*** 0.030*** -0.010* 0.018*** 0.013** 0.019*** 0.019***
unit_sales.in.millions. 0.503*** 0.480***   0.163*** 0.024*** 0.067*** -0.002 0.001 0.001 0.000 0.031*** 0.025*** 0.031*** 0.031***
total_children 0.083*** 0.079*** 0.163***   0.098*** 0.395*** 0.001 -0.000 0.000 0.002 0.001 0.019*** -0.027*** -0.027***
avg_cars_at.home.approx. 0.004 0.003 0.024*** 0.098***   0.131*** -0.008 0.005 0.004 -0.007 -0.016*** -0.018*** -0.007 -0.007
num_children_at_home 0.032*** 0.028*** 0.067*** 0.395*** 0.131***   -0.000 -0.002 -0.003 0.000 0.006 0.025*** -0.025*** -0.025***
SRP 0.833*** 0.796*** -0.002 0.001 -0.008 -0.000   0.043*** 0.039*** -0.012** 0.002 -0.001 0.004 0.004
gross_weight 0.036*** 0.034*** 0.001 -0.000 0.005 -0.002 0.043***   0.989*** -0.012** -0.005 -0.005 -0.004 -0.004
net_weight 0.032*** 0.030*** 0.001 0.000 0.004 -0.003 0.039*** 0.989***   -0.014*** -0.004 -0.004 -0.004 -0.004
units_per_case -0.011** -0.010* 0.000 0.002 -0.007 0.000 -0.012** -0.012** -0.014***   0.002 0.004 -0.001 -0.001
store_sqft 0.016*** 0.018*** 0.031*** 0.001 -0.016*** 0.006 0.002 -0.005 -0.004 0.002   0.913*** 0.785*** 0.785***
grocery_sqft 0.010* 0.013** 0.025*** 0.019*** -0.018*** 0.025*** -0.001 -0.005 -0.004 0.004 0.913***   0.463*** 0.463***
frozen_sqft 0.018*** 0.019*** 0.031*** -0.027*** -0.007 -0.025*** 0.004 -0.004 -0.004 -0.001 0.785*** 0.463***   1.000***
meat_sqft 0.018*** 0.019*** 0.031*** -0.027*** -0.007 -0.025*** 0.004 -0.004 -0.004 -0.001 0.785*** 0.463*** 1.000***  
Computed correlation used pearson-method with listwise-deletion.
# Menghapus variabel dengan korelasi 1 (frozen_sqft + meat_sqft)
data_clean <- data_clean[, !(names(data_clean) %in% c("meat_sqft"))]

#mengecek ulang multikolinier
mat_corr <- round(cor(data_clean),3)
tab_corr(data_clean)
  store_sales.in.millions. store_cost.in.millions. unit_sales.in.millions. total_children avg_cars_at.home.approx. num_children_at_home SRP gross_weight net_weight units_per_case store_sqft grocery_sqft frozen_sqft
store_sales.in.millions.   0.955*** 0.503*** 0.083*** 0.004 0.032*** 0.833*** 0.036*** 0.032*** -0.011** 0.016*** 0.010* 0.018***
store_cost.in.millions. 0.955***   0.480*** 0.079*** 0.003 0.028*** 0.796*** 0.034*** 0.030*** -0.010* 0.018*** 0.013** 0.019***
unit_sales.in.millions. 0.503*** 0.480***   0.163*** 0.024*** 0.067*** -0.002 0.001 0.001 0.000 0.031*** 0.025*** 0.031***
total_children 0.083*** 0.079*** 0.163***   0.098*** 0.395*** 0.001 -0.000 0.000 0.002 0.001 0.019*** -0.027***
avg_cars_at.home.approx. 0.004 0.003 0.024*** 0.098***   0.131*** -0.008 0.005 0.004 -0.007 -0.016*** -0.018*** -0.007
num_children_at_home 0.032*** 0.028*** 0.067*** 0.395*** 0.131***   -0.000 -0.002 -0.003 0.000 0.006 0.025*** -0.025***
SRP 0.833*** 0.796*** -0.002 0.001 -0.008 -0.000   0.043*** 0.039*** -0.012** 0.002 -0.001 0.004
gross_weight 0.036*** 0.034*** 0.001 -0.000 0.005 -0.002 0.043***   0.989*** -0.012** -0.005 -0.005 -0.004
net_weight 0.032*** 0.030*** 0.001 0.000 0.004 -0.003 0.039*** 0.989***   -0.014*** -0.004 -0.004 -0.004
units_per_case -0.011** -0.010* 0.000 0.002 -0.007 0.000 -0.012** -0.012** -0.014***   0.002 0.004 -0.001
store_sqft 0.016*** 0.018*** 0.031*** 0.001 -0.016*** 0.006 0.002 -0.005 -0.004 0.002   0.913*** 0.785***
grocery_sqft 0.010* 0.013** 0.025*** 0.019*** -0.018*** 0.025*** -0.001 -0.005 -0.004 0.004 0.913***   0.463***
frozen_sqft 0.018*** 0.019*** 0.031*** -0.027*** -0.007 -0.025*** 0.004 -0.004 -0.004 -0.001 0.785*** 0.463***  
Computed correlation used pearson-method with listwise-deletion.

Visualisasi Heatmap Korelasi

corrplot(mat_corr, 
         method = "color", 
         type = "lower", 
         tl.col = "black",    
         tl.srt = 45,         
         tl.cex = 0.7,        
         addCoef.col = "black", 
         number.cex = 0.6,      
         diag = FALSE,         
         cl.pos = "b")

Uji Bartlett

library(psych)
cortest.bartlett(cor(data_clean), n = nrow(data_clean))
## $chisq
## [1] 1703182
## 
## $p.value
## [1] 0
## 
## $df
## [1] 78

KMO (Kaiser-Meyer-Olkin)

kmo_awal <- KMO(data_clean)
print(kmo_awal)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_clean)
## Overall MSA =  0.47
## MSA for each item = 
## store_sales.in.millions.  store_cost.in.millions.  unit_sales.in.millions. 
##                     0.55                     0.84                     0.28 
##           total_children avg_cars_at.home.approx.     num_children_at_home 
##                     0.58                     0.67                     0.54 
##                      SRP             gross_weight               net_weight 
##                     0.49                     0.50                     0.50 
##           units_per_case               store_sqft             grocery_sqft 
##                     0.51                     0.42                     0.34 
##              frozen_sqft 
##                     0.29
# Menghapus variabel dengan MSA rendah (unit_sales.in.millions)
data_pca_satu <- data_clean[, !(names(data_clean) %in% c("unit_sales.in.millions."))]

# Cek ulang KMO
kmo_satu <- KMO(data_pca_satu)
print(kmo_satu)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_pca_satu)
## Overall MSA =  0.5
## MSA for each item = 
## store_sales.in.millions.  store_cost.in.millions.           total_children 
##                     0.63                     0.67                     0.52 
## avg_cars_at.home.approx.     num_children_at_home                      SRP 
##                     0.66                     0.53                     0.88 
##             gross_weight               net_weight           units_per_case 
##                     0.50                     0.50                     0.51 
##               store_sqft             grocery_sqft              frozen_sqft 
##                     0.42                     0.34                     0.29
# Menghapus variabel dengan MSA rendah (frozen_sqft)
data_pca_final <- data_pca_satu[, !(names(data_pca_satu) %in% c("frozen_sqft"))]

# Cek ulang KMO
kmo_final <- KMO(data_pca_final)
print(kmo_final)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_pca_final)
## Overall MSA =  0.59
## MSA for each item = 
## store_sales.in.millions.  store_cost.in.millions.           total_children 
##                     0.63                     0.67                     0.52 
## avg_cars_at.home.approx.     num_children_at_home                      SRP 
##                     0.67                     0.53                     0.88 
##             gross_weight               net_weight           units_per_case 
##                     0.50                     0.50                     0.50 
##               store_sqft             grocery_sqft 
##                     0.50                     0.50
library(psych)
cortest.bartlett(cor(data_pca_final), n = nrow(data_pca_final))
## $chisq
## [1] 570140.9
## 
## $p.value
## [1] 0
## 
## $df
## [1] 55
mat_corr_final <- round(cor(data_pca_final), 3)
tab_corr(data_pca_final,
         file = "tabel_korelasi.html",
         title = "Tabel 2. Matriks Korelasi Variabel Penelitian")
Tabel 2. Matriks Korelasi Variabel Penelitian
  store_sales.in.millions. store_cost.in.millions. total_children avg_cars_at.home.approx. num_children_at_home SRP gross_weight net_weight units_per_case store_sqft grocery_sqft
store_sales.in.millions.   0.955*** 0.083*** 0.004 0.032*** 0.833*** 0.036*** 0.032*** -0.011** 0.016*** 0.010*
store_cost.in.millions. 0.955***   0.079*** 0.003 0.028*** 0.796*** 0.034*** 0.030*** -0.010* 0.018*** 0.013**
total_children 0.083*** 0.079***   0.098*** 0.395*** 0.001 -0.000 0.000 0.002 0.001 0.019***
avg_cars_at.home.approx. 0.004 0.003 0.098***   0.131*** -0.008 0.005 0.004 -0.007 -0.016*** -0.018***
num_children_at_home 0.032*** 0.028*** 0.395*** 0.131***   -0.000 -0.002 -0.003 0.000 0.006 0.025***
SRP 0.833*** 0.796*** 0.001 -0.008 -0.000   0.043*** 0.039*** -0.012** 0.002 -0.001
gross_weight 0.036*** 0.034*** -0.000 0.005 -0.002 0.043***   0.989*** -0.012** -0.005 -0.005
net_weight 0.032*** 0.030*** 0.000 0.004 -0.003 0.039*** 0.989***   -0.014*** -0.004 -0.004
units_per_case -0.011** -0.010* 0.002 -0.007 0.000 -0.012** -0.012** -0.014***   0.002 0.004
store_sqft 0.016*** 0.018*** 0.001 -0.016*** 0.006 0.002 -0.005 -0.004 0.002   0.913***
grocery_sqft 0.010* 0.013** 0.019*** -0.018*** 0.025*** -0.001 -0.005 -0.004 0.004 0.913***  
Computed correlation used pearson-method with listwise-deletion.

PCA

Eigenvalue dan Persentase Varians

pc_results <- prcomp(data_pca_final, scale. = TRUE) #standarisasi
eig_val <- get_eigenvalue(pc_results)
print(eig_val)
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.74288917       24.9353561                    24.93536
## Dim.2  1.98164086       18.0149169                    42.95027
## Dim.3  1.91143247       17.3766588                    60.32693
## Dim.4  1.44953777       13.1776161                    73.50455
## Dim.5  1.00047883        9.0952621                    82.59981
## Dim.6  0.94117964        8.5561785                    91.15599
## Dim.7  0.60497917        5.4998107                    96.65580
## Dim.8  0.22745721        2.0677928                    98.72359
## Dim.9  0.08680822        0.7891656                    99.51276
## Dim.10 0.04251539        0.3865035                    99.89926
## Dim.11 0.01108127        0.1007388                   100.00000

Scree Plot

fviz_eig(pc_results, addlabels = TRUE, ylim = c(0, 50))
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

Hasil faktor yang terbentuk

n_factors <- sum(eig_val$eigenvalue > 1)
print(n_factors)
## [1] 5

Loading PCA

(hubungan variabel dengan komponen)

pca_loading <- pc_results$rotation[, 1:n_factors]
print(pca_loading)
##                                   PC1          PC2          PC3           PC4
## store_sales.in.millions. -0.586789926 -0.065516705  0.027945997  0.0191203234
## store_cost.in.millions.  -0.578896217 -0.066071354  0.025794379  0.0218090927
## total_children           -0.062898781 -0.020519314 -0.019515433 -0.6540013988
## avg_cars_at.home.approx. -0.006181894  0.005979239  0.018256140 -0.3403984083
## num_children_at_home     -0.034762932 -0.019035518 -0.027131545 -0.6694173999
## SRP                      -0.548852319 -0.051489395  0.035797519  0.0785392912
## gross_weight             -0.082326447  0.690829443 -0.124892047 -0.0083056858
## net_weight               -0.079676886  0.691119301 -0.125796108 -0.0086692354
## units_per_case            0.011682432 -0.017238776 -0.001966634 -0.0003085306
## store_sqft               -0.021605364 -0.128047167 -0.694022383  0.0309986662
## grocery_sqft             -0.019637464 -0.127621999 -0.694786535  0.0132906961
##                                    PC5
## store_sales.in.millions. -0.0056727324
## store_cost.in.millions.  -0.0063669526
## total_children           -0.0395987388
## avg_cars_at.home.approx.  0.1212909187
## num_children_at_home     -0.0221525322
## SRP                      -0.0002999228
## gross_weight             -0.0145540651
## net_weight               -0.0119119523
## units_per_case           -0.9913235447
## store_sqft                0.0077980754
## grocery_sqft              0.0044737781

Komponent Scores

head(pc_results$x[, 1:n_factors])
##          PC1       PC2        PC3         PC4           PC5
## 1 -0.1553910  1.783757 -0.2061187  0.91203603  0.0502657308
## 2  0.2598667  1.868964 -0.1399851  0.93211498  0.4253317799
## 3  0.9106808  1.888588 -0.2784600  0.06822886 -0.0004642832
## 4  1.0094951  1.900674 -0.2807131 -0.39070709  0.1288036713
## 5  1.7140476 -1.793240  0.4521361  1.52531979 -0.8736308932
## 6  1.5656040 -1.857231  0.3685846 -0.07222889 -1.0702539467

Factor Analysis

Matriks Loading Unrotated

fa_unrotated_awal <- fa(data_pca_final, nfactors = n_factors, rotate = "none", fm = "pa")
print(fa_unrotated_awal$loadings, cutoff = 0, sort = TRUE)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4    PA5   
## store_sales.in.millions.  0.992 -0.115 -0.033 -0.005 -0.034
## store_cost.in.millions.   0.948 -0.110 -0.029 -0.009 -0.042
## SRP                       0.833 -0.083 -0.039 -0.079  0.097
## gross_weight              0.153  0.979  0.081  0.004  0.001
## net_weight                0.148  0.980  0.082  0.004 -0.002
## store_sqft                0.036 -0.089  0.949 -0.027  0.002
## grocery_sqft              0.033 -0.088  0.952  0.000  0.000
## total_children            0.080 -0.016  0.015  0.626 -0.176
## num_children_at_home      0.042 -0.014  0.023  0.667  0.158
## avg_cars_at.home.approx.  0.005  0.004 -0.015  0.178  0.068
## units_per_case           -0.013 -0.012  0.002  0.001 -0.017
## 
##                  PA1   PA2   PA3   PA4   PA5
## SS loadings    2.630 1.968 1.825 0.876 0.073
## Proportion Var 0.239 0.179 0.166 0.080 0.007
## Cumulative Var 0.239 0.418 0.584 0.664 0.670

Communality

h2_values <- sort(fa_unrotated_awal$communality, decreasing = TRUE)
print(round(h2_values, 3))
## store_sales.in.millions.               net_weight             gross_weight 
##                    0.999                    0.989                    0.988 
##             grocery_sqft  store_cost.in.millions.               store_sqft 
##                    0.915                    0.913                    0.911 
##                      SRP     num_children_at_home           total_children 
##                    0.717                    0.472                    0.430 
## avg_cars_at.home.approx.           units_per_case 
##                    0.037                    0.001

Rotasi Varimax

fa_orthogonal <- fa(data_pca_final, nfactors = n_factors, rotate = "varimax", fm = "pa")

Matriks Loading Terrotasi

print(fa_orthogonal$loadings, cutoff = 0, sort = TRUE)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4    PA5   
## store_sales.in.millions.  0.998  0.019  0.008  0.040  0.038
## store_cost.in.millions.   0.954  0.018  0.011  0.034  0.029
## SRP                       0.830  0.013  0.000 -0.022  0.168
## gross_weight              0.014  0.988  0.001  0.002  0.110
## net_weight                0.009  0.989  0.002  0.002  0.107
## store_sqft                0.009 -0.003  0.954 -0.023 -0.028
## grocery_sqft              0.004 -0.002  0.956  0.004 -0.035
## total_children            0.069  0.026  0.007  0.598 -0.259
## num_children_at_home      0.004 -0.010  0.025  0.684  0.059
## avg_cars_at.home.approx. -0.007  0.000 -0.014  0.186  0.042
## units_per_case           -0.010 -0.011  0.002 -0.002 -0.019
## 
##                  PA1   PA2   PA3   PA4   PA5
## SS loadings    2.599 1.956 1.825 0.864 0.128
## Proportion Var 0.236 0.178 0.166 0.079 0.012
## Cumulative Var 0.236 0.414 0.580 0.659 0.670

Evaluasi Loading & Communality

Hapus variabel dengan loading<0,4 dan communality<0,5 (“units_per_case”)

data_baru <- data_pca_final[, !(names(data_pca_final) %in% c("units_per_case"))]

#Uji Bartlett
library(psych)
cortest.bartlett(cor(data_baru), n = nrow(data_baru))
## $chisq
## [1] 570100.5
## 
## $p.value
## [1] 0
## 
## $df
## [1] 45
#KMO (Kaiser-Meyer-Olkin)
kmo_baru<- KMO(data_baru)
print(kmo_baru)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_baru)
## Overall MSA =  0.59
## MSA for each item = 
## store_sales.in.millions.  store_cost.in.millions.           total_children 
##                     0.63                     0.67                     0.52 
## avg_cars_at.home.approx.     num_children_at_home                      SRP 
##                     0.67                     0.53                     0.88 
##             gross_weight               net_weight               store_sqft 
##                     0.50                     0.50                     0.50 
##             grocery_sqft 
##                     0.50
#re-evaluasi jumlah faktor baru
pc_results_fa <- prcomp(data_baru, scale. = TRUE)
eig_val <- get_eigenvalue(pc_results_fa)
n_factors <- sum(eig_val$eigenvalue > 1)
print(n_factors)
## [1] 4
#Menampilkan Matriks Loading Unrotated
fa_unrotated_awal <- fa(data_baru, nfactors = n_factors, rotate = "none", fm = "pa")
print(fa_unrotated_awal$loadings, cutoff = 0, sort = TRUE)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4   
## store_sales.in.millions.  0.992 -0.116 -0.034 -0.004
## store_cost.in.millions.   0.949 -0.111 -0.029 -0.007
## SRP                       0.828 -0.082 -0.038 -0.078
## gross_weight              0.153  0.979  0.080  0.003
## net_weight                0.149  0.980  0.082  0.004
## store_sqft                0.037 -0.089  0.950 -0.026
## grocery_sqft              0.033 -0.088  0.951  0.001
## total_children            0.078 -0.016  0.014  0.608
## num_children_at_home      0.041 -0.013  0.022  0.642
## avg_cars_at.home.approx.  0.005  0.004 -0.015  0.183
## 
##                  PA1   PA2   PA3   PA4
## SS loadings    2.626 1.968 1.825 0.823
## Proportion Var 0.263 0.197 0.183 0.082
## Cumulative Var 0.263 0.459 0.642 0.724
#Communality (Unrotated)
h2_values <- sort(fa_unrotated_awal$communality, decreasing = TRUE)
print(round(h2_values, 3))
## store_sales.in.millions.             gross_weight               net_weight 
##                    0.999                    0.989                    0.989 
##             grocery_sqft  store_cost.in.millions.               store_sqft 
##                    0.914                    0.913                    0.912 
##                      SRP     num_children_at_home           total_children 
##                    0.700                    0.415                    0.376 
## avg_cars_at.home.approx. 
##                    0.034
#Rotasi Varimax
fa_orthogonal <- fa(data_baru, nfactors = n_factors, rotate = "varimax", fm = "pa")

#Matriks Loading Terrotasi
print(fa_orthogonal$loadings, cutoff = 0, sort = TRUE)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4   
## store_sales.in.millions.  0.998  0.012  0.008  0.050
## store_cost.in.millions.   0.955  0.012  0.011  0.045
## SRP                       0.836  0.023 -0.004 -0.033
## gross_weight              0.024  0.994 -0.003  0.003
## net_weight                0.019  0.994 -0.002  0.004
## store_sqft                0.008 -0.003  0.955 -0.023
## grocery_sqft              0.003 -0.002  0.956  0.005
## total_children            0.046 -0.003  0.016  0.612
## num_children_at_home      0.006 -0.005  0.022  0.644
## avg_cars_at.home.approx. -0.005  0.004 -0.016  0.183
## 
##                  PA1   PA2   PA3   PA4
## SS loadings    2.608 1.978 1.827 0.828
## Proportion Var 0.261 0.198 0.183 0.083
## Cumulative Var 0.261 0.459 0.641 0.724

Hapus variabel dengan loading<0,4 dan communality<0,5 (“avg_cars_at.home.approx.”)

data_final <- data_baru[, !(names(data_baru) %in% c("avg_cars_at.home.approx."))]

#Uji Bartlett
library(psych)
cortest.bartlett(cor(data_final), n = nrow(data_final))
## $chisq
## [1] 568862.4
## 
## $p.value
## [1] 0
## 
## $df
## [1] 36
#KMO (Kaiser-Meyer-Olkin)
kmo_final <- KMO(data_final)
print(kmo_final)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_final)
## Overall MSA =  0.59
## MSA for each item = 
## store_sales.in.millions.  store_cost.in.millions.           total_children 
##                     0.63                     0.67                     0.50 
##     num_children_at_home                      SRP             gross_weight 
##                     0.51                     0.88                     0.50 
##               net_weight               store_sqft             grocery_sqft 
##                     0.50                     0.50                     0.50
#menampilkan faktor yang terbentuk
pc_results_fa <- prcomp(data_final, scale. = TRUE)
eig_val <- get_eigenvalue(pc_results_fa)
n_factors <- sum(eig_val$eigenvalue > 1)
print(n_factors)
## [1] 4
#Menampilkan Matriks Loading Unrotated
fa_unrotated_awal <- fa(data_final, nfactors = n_factors, rotate = "none", fm = "pa")
print(fa_unrotated_awal$loadings, cutoff = 0, sort = TRUE)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4   
## store_sales.in.millions.  0.992 -0.115 -0.034 -0.003
## store_cost.in.millions.   0.949 -0.111 -0.029 -0.006
## SRP                       0.828 -0.082 -0.039 -0.080
## gross_weight              0.153  0.979  0.080  0.004
## net_weight                0.149  0.980  0.081  0.005
## store_sqft                0.037 -0.089  0.950 -0.030
## grocery_sqft              0.033 -0.088  0.951  0.000
## total_children            0.079 -0.016  0.016  0.641
## num_children_at_home      0.040 -0.013  0.023  0.609
## 
##                  PA1   PA2   PA3   PA4
## SS loadings    2.626 1.968 1.825 0.790
## Proportion Var 0.292 0.219 0.203 0.088
## Cumulative Var 0.292 0.510 0.713 0.801
#Communality (Unrotated)
h2_values <- sort(fa_unrotated_awal$communality, decreasing = TRUE)
print(round(h2_values, 3))
## store_sales.in.millions.             gross_weight               net_weight 
##                    0.998                    0.989                    0.989 
##  store_cost.in.millions.               store_sqft             grocery_sqft 
##                    0.913                    0.913                    0.913 
##                      SRP           total_children     num_children_at_home 
##                    0.700                    0.418                    0.374
#Rotasi Varimax
fa_orthogonal_final <- fa(data_final, nfactors = n_factors, rotate = "varimax", fm = "pa")

#Matriks Loading Terrotasi
print(fa_orthogonal_final$loadings, cutoff = 0, sort = TRUE)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4   
## store_sales.in.millions.  0.997  0.013  0.007  0.059
## store_cost.in.millions.   0.954  0.012  0.010  0.053
## SRP                       0.836  0.023 -0.004 -0.028
## gross_weight              0.024  0.994 -0.003 -0.002
## net_weight                0.019  0.994 -0.002 -0.001
## store_sqft                0.008 -0.003  0.956 -0.005
## grocery_sqft              0.003 -0.002  0.955  0.025
## total_children            0.040  0.000  0.003  0.645
## num_children_at_home      0.002 -0.002  0.010  0.611
## 
##                  PA1   PA2   PA3   PA4
## SS loadings    2.607 1.978 1.826 0.797
## Proportion Var 0.290 0.220 0.203 0.089
## Cumulative Var 0.290 0.509 0.712 0.801

Proportion Var & Cumulative Var

print(fa_orthogonal_final$Vaccounted)
##                             PA1       PA2       PA3        PA4
## SS loadings           2.6067392 1.9776915 1.8260392 0.79729178
## Proportion Var        0.2896377 0.2197435 0.2028932 0.08858798
## Cumulative Var        0.2896377 0.5093812 0.7122744 0.80086241
## Proportion Explained  0.3616572 0.2743836 0.2533435 0.11061572
## Cumulative Proportion 0.3616572 0.6360408 0.8893843 1.00000000

Split Sample

set.seed(123) 
index <- sample(1:nrow(data_final), size = 0.5 * nrow(data_final))
sample_1 <- data_final[index, ]
sample_2 <- data_final[-index, ]

Re-estimate model untuk Sampel dan 2

fa_sample1 <- fa(sample_1, nfactors = n_factors, rotate = "varimax", fm = "pa")
fa_sample2 <- fa(sample_2, nfactors = n_factors, rotate = "varimax", fm = "pa")
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

Perbandingan Loading

print(fa_sample1$loadings, cutoff = 0.4)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4   
## store_sales.in.millions.  0.996                     
## store_cost.in.millions.   0.955                     
## total_children                                 0.648
## num_children_at_home                           0.616
## SRP                       0.839                     
## gross_weight                     0.994              
## net_weight                       0.994              
## store_sqft                              0.955       
## grocery_sqft                            0.955       
## 
##                  PA1   PA2   PA3   PA4
## SS loadings    2.612 1.977 1.825 0.808
## Proportion Var 0.290 0.220 0.203 0.090
## Cumulative Var 0.290 0.510 0.713 0.803
print(fa_sample2$loadings, cutoff = 0.4)
## 
## Loadings:
##                          PA1    PA2    PA3    PA4   
## store_sales.in.millions.  0.998                     
## store_cost.in.millions.   0.953                     
## total_children                                 0.641
## num_children_at_home                           0.606
## SRP                       0.833                     
## gross_weight                     0.994              
## net_weight                       0.994              
## store_sqft                              0.956       
## grocery_sqft                            0.955       
## 
##                  PA1   PA2   PA3   PA4
## SS loadings    2.602 1.978 1.827 0.787
## Proportion Var 0.289 0.220 0.203 0.087
## Cumulative Var 0.289 0.509 0.712 0.799