library(psych)
library(factoextra)
library(sjPlot)
library(ppcor)
library(corrplot)
data <- read.csv("media prediction and its cost.csv")
Cek Duplikat Baris
dup_bar <- sum(duplicated(data))
cat("Jumlah baris duplikat:", dup_bar)
## Jumlah baris duplikat: 0
Cek Duplikat Kolom
kol_dup <- names(data)[duplicated(as.list(data))]
cat("Kolom duplikat:", if(length(kol_dup)==0) "Tidak ada" else kol_dup)
## Kolom duplikat: avg_cars_at.home.approx..1 prepared_food
Mengambil Kolom Numerik
data_clean <- data[sapply(data, is.numeric)]
Hapus Kolom Duplikat, Biner, dan Target
cols_to_remove <- c("avg_cars_at.home.approx..1", "recyclable_package", "low_fat", "coffee_bar", "video_store", "salad_bar", "prepared_food", "florist","cost")
data_clean <- data_clean[, !(names(data_clean) %in% cols_to_remove), drop = FALSE]
Hapus missing value
data_clean <- na.omit(data_clean)
visualisasi
par(mfrow = c(ceiling(ncol(data_clean)/3), 3),
mar = c(2,2,2,1))
for(i in names(data_clean)){
hist(data_clean[[i]],
main = paste(i),
xlab = i)
}
Statistik Sederhana
summary_table <- describe(data_clean)
print(summary_table)
## vars n mean sd median trimmed mad
## store_sales.in.millions. 1 60428 6.54 3.46 5.94 6.28 3.53
## store_cost.in.millions. 2 60428 2.62 1.45 2.39 2.49 1.45
## unit_sales.in.millions. 3 60428 3.09 0.83 3.00 3.09 1.48
## total_children 4 60428 2.53 1.49 3.00 2.53 1.48
## avg_cars_at.home.approx. 5 60428 2.20 1.11 2.00 2.20 1.48
## num_children_at_home 6 60428 0.83 1.30 0.00 0.56 0.00
## SRP 7 60428 2.12 0.93 2.13 2.10 1.02
## gross_weight 8 60428 13.81 4.62 13.60 13.74 5.93
## net_weight 9 60428 11.80 4.68 11.60 11.73 6.08
## units_per_case 10 60428 18.86 10.26 19.00 18.96 13.34
## store_sqft 11 60428 27988.48 5701.02 27694.00 27615.88 6080.14
## grocery_sqft 12 60428 19133.80 3987.40 18670.00 18936.07 4941.51
## frozen_sqft 13 60428 5312.85 1575.91 5062.00 5240.34 846.56
## meat_sqft 14 60428 3541.85 1050.47 3375.00 3493.49 563.39
## min max range skew kurtosis se
## store_sales.in.millions. 0.51 22.92 22.41 0.68 0.09 0.01
## store_cost.in.millions. 0.16 9.73 9.56 0.83 0.54 0.01
## unit_sales.in.millions. 1.00 6.00 5.00 0.05 -0.32 0.00
## total_children 0.00 5.00 5.00 -0.01 -1.04 0.01
## avg_cars_at.home.approx. 0.00 4.00 4.00 -0.08 -0.79 0.00
## num_children_at_home 0.00 5.00 5.00 1.55 1.47 0.01
## SRP 0.50 3.98 3.48 0.14 -0.89 0.00
## gross_weight 6.00 21.90 15.90 0.09 -1.23 0.02
## net_weight 3.05 20.80 17.75 0.11 -1.19 0.02
## units_per_case 1.00 36.00 35.00 -0.08 -1.25 0.04
## store_sqft 20319.00 39696.00 19377.00 0.39 -0.94 23.19
## grocery_sqft 13305.00 30351.00 17046.00 0.39 -0.54 16.22
## frozen_sqft 2452.00 9184.00 6732.00 0.56 0.61 6.41
## meat_sqft 1635.00 6122.00 4487.00 0.56 0.60 4.27
Cek Tipe Data
str(data_clean)
## 'data.frame': 60428 obs. of 14 variables:
## $ store_sales.in.millions.: num 7.36 5.52 3.68 3.68 4.08 4.08 5.44 3.74 4.08 9.72 ...
## $ store_cost.in.millions. : num 2.72 2.59 1.36 1.18 1.43 ...
## $ unit_sales.in.millions. : num 4 3 2 2 3 3 4 2 3 3 ...
## $ total_children : num 1 0 4 2 0 2 4 1 2 3 ...
## $ avg_cars_at.home.approx.: num 1 4 1 2 2 1 2 4 2 1 ...
## $ num_children_at_home : num 1 0 0 2 0 2 0 0 0 0 ...
## $ SRP : num 1.84 1.84 1.84 1.84 1.36 1.36 1.36 1.87 1.36 3.24 ...
## $ gross_weight : num 19.7 19.7 19.7 19.7 7.12 7.12 7.12 16.7 7.12 16.3 ...
## $ net_weight : num 17.7 17.7 17.7 17.7 5.11 5.11 5.11 14.7 5.11 14.2 ...
## $ units_per_case : num 17 17 17 17 29 29 29 10 29 25 ...
## $ store_sqft : num 27694 27694 27694 27694 27694 ...
## $ grocery_sqft : num 18670 18670 18670 18670 18670 ...
## $ frozen_sqft : num 5415 5415 5415 5415 5415 ...
## $ meat_sqft : num 3610 3610 3610 3610 3610 3610 3610 3610 3610 3610 ...
mat_corr <- round(cor(data_clean),3)
tab_corr(data_clean)
| store_sales.in.millions. | store_cost.in.millions. | unit_sales.in.millions. | total_children | avg_cars_at.home.approx. | num_children_at_home | SRP | gross_weight | net_weight | units_per_case | store_sqft | grocery_sqft | frozen_sqft | meat_sqft | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| store_sales.in.millions. | 0.955*** | 0.503*** | 0.083*** | 0.004 | 0.032*** | 0.833*** | 0.036*** | 0.032*** | -0.011** | 0.016*** | 0.010* | 0.018*** | 0.018*** | |
| store_cost.in.millions. | 0.955*** | 0.480*** | 0.079*** | 0.003 | 0.028*** | 0.796*** | 0.034*** | 0.030*** | -0.010* | 0.018*** | 0.013** | 0.019*** | 0.019*** | |
| unit_sales.in.millions. | 0.503*** | 0.480*** | 0.163*** | 0.024*** | 0.067*** | -0.002 | 0.001 | 0.001 | 0.000 | 0.031*** | 0.025*** | 0.031*** | 0.031*** | |
| total_children | 0.083*** | 0.079*** | 0.163*** | 0.098*** | 0.395*** | 0.001 | -0.000 | 0.000 | 0.002 | 0.001 | 0.019*** | -0.027*** | -0.027*** | |
| avg_cars_at.home.approx. | 0.004 | 0.003 | 0.024*** | 0.098*** | 0.131*** | -0.008 | 0.005 | 0.004 | -0.007 | -0.016*** | -0.018*** | -0.007 | -0.007 | |
| num_children_at_home | 0.032*** | 0.028*** | 0.067*** | 0.395*** | 0.131*** | -0.000 | -0.002 | -0.003 | 0.000 | 0.006 | 0.025*** | -0.025*** | -0.025*** | |
| SRP | 0.833*** | 0.796*** | -0.002 | 0.001 | -0.008 | -0.000 | 0.043*** | 0.039*** | -0.012** | 0.002 | -0.001 | 0.004 | 0.004 | |
| gross_weight | 0.036*** | 0.034*** | 0.001 | -0.000 | 0.005 | -0.002 | 0.043*** | 0.989*** | -0.012** | -0.005 | -0.005 | -0.004 | -0.004 | |
| net_weight | 0.032*** | 0.030*** | 0.001 | 0.000 | 0.004 | -0.003 | 0.039*** | 0.989*** | -0.014*** | -0.004 | -0.004 | -0.004 | -0.004 | |
| units_per_case | -0.011** | -0.010* | 0.000 | 0.002 | -0.007 | 0.000 | -0.012** | -0.012** | -0.014*** | 0.002 | 0.004 | -0.001 | -0.001 | |
| store_sqft | 0.016*** | 0.018*** | 0.031*** | 0.001 | -0.016*** | 0.006 | 0.002 | -0.005 | -0.004 | 0.002 | 0.913*** | 0.785*** | 0.785*** | |
| grocery_sqft | 0.010* | 0.013** | 0.025*** | 0.019*** | -0.018*** | 0.025*** | -0.001 | -0.005 | -0.004 | 0.004 | 0.913*** | 0.463*** | 0.463*** | |
| frozen_sqft | 0.018*** | 0.019*** | 0.031*** | -0.027*** | -0.007 | -0.025*** | 0.004 | -0.004 | -0.004 | -0.001 | 0.785*** | 0.463*** | 1.000*** | |
| meat_sqft | 0.018*** | 0.019*** | 0.031*** | -0.027*** | -0.007 | -0.025*** | 0.004 | -0.004 | -0.004 | -0.001 | 0.785*** | 0.463*** | 1.000*** | |
| Computed correlation used pearson-method with listwise-deletion. | ||||||||||||||
# Menghapus variabel dengan korelasi 1 (frozen_sqft + meat_sqft)
data_clean <- data_clean[, !(names(data_clean) %in% c("meat_sqft"))]
#mengecek ulang multikolinier
mat_corr <- round(cor(data_clean),3)
tab_corr(data_clean)
| store_sales.in.millions. | store_cost.in.millions. | unit_sales.in.millions. | total_children | avg_cars_at.home.approx. | num_children_at_home | SRP | gross_weight | net_weight | units_per_case | store_sqft | grocery_sqft | frozen_sqft | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| store_sales.in.millions. | 0.955*** | 0.503*** | 0.083*** | 0.004 | 0.032*** | 0.833*** | 0.036*** | 0.032*** | -0.011** | 0.016*** | 0.010* | 0.018*** | |
| store_cost.in.millions. | 0.955*** | 0.480*** | 0.079*** | 0.003 | 0.028*** | 0.796*** | 0.034*** | 0.030*** | -0.010* | 0.018*** | 0.013** | 0.019*** | |
| unit_sales.in.millions. | 0.503*** | 0.480*** | 0.163*** | 0.024*** | 0.067*** | -0.002 | 0.001 | 0.001 | 0.000 | 0.031*** | 0.025*** | 0.031*** | |
| total_children | 0.083*** | 0.079*** | 0.163*** | 0.098*** | 0.395*** | 0.001 | -0.000 | 0.000 | 0.002 | 0.001 | 0.019*** | -0.027*** | |
| avg_cars_at.home.approx. | 0.004 | 0.003 | 0.024*** | 0.098*** | 0.131*** | -0.008 | 0.005 | 0.004 | -0.007 | -0.016*** | -0.018*** | -0.007 | |
| num_children_at_home | 0.032*** | 0.028*** | 0.067*** | 0.395*** | 0.131*** | -0.000 | -0.002 | -0.003 | 0.000 | 0.006 | 0.025*** | -0.025*** | |
| SRP | 0.833*** | 0.796*** | -0.002 | 0.001 | -0.008 | -0.000 | 0.043*** | 0.039*** | -0.012** | 0.002 | -0.001 | 0.004 | |
| gross_weight | 0.036*** | 0.034*** | 0.001 | -0.000 | 0.005 | -0.002 | 0.043*** | 0.989*** | -0.012** | -0.005 | -0.005 | -0.004 | |
| net_weight | 0.032*** | 0.030*** | 0.001 | 0.000 | 0.004 | -0.003 | 0.039*** | 0.989*** | -0.014*** | -0.004 | -0.004 | -0.004 | |
| units_per_case | -0.011** | -0.010* | 0.000 | 0.002 | -0.007 | 0.000 | -0.012** | -0.012** | -0.014*** | 0.002 | 0.004 | -0.001 | |
| store_sqft | 0.016*** | 0.018*** | 0.031*** | 0.001 | -0.016*** | 0.006 | 0.002 | -0.005 | -0.004 | 0.002 | 0.913*** | 0.785*** | |
| grocery_sqft | 0.010* | 0.013** | 0.025*** | 0.019*** | -0.018*** | 0.025*** | -0.001 | -0.005 | -0.004 | 0.004 | 0.913*** | 0.463*** | |
| frozen_sqft | 0.018*** | 0.019*** | 0.031*** | -0.027*** | -0.007 | -0.025*** | 0.004 | -0.004 | -0.004 | -0.001 | 0.785*** | 0.463*** | |
| Computed correlation used pearson-method with listwise-deletion. | |||||||||||||
Visualisasi Heatmap Korelasi
corrplot(mat_corr,
method = "color",
type = "lower",
tl.col = "black",
tl.srt = 45,
tl.cex = 0.7,
addCoef.col = "black",
number.cex = 0.6,
diag = FALSE,
cl.pos = "b")
library(psych)
cortest.bartlett(cor(data_clean), n = nrow(data_clean))
## $chisq
## [1] 1703182
##
## $p.value
## [1] 0
##
## $df
## [1] 78
kmo_awal <- KMO(data_clean)
print(kmo_awal)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_clean)
## Overall MSA = 0.47
## MSA for each item =
## store_sales.in.millions. store_cost.in.millions. unit_sales.in.millions.
## 0.55 0.84 0.28
## total_children avg_cars_at.home.approx. num_children_at_home
## 0.58 0.67 0.54
## SRP gross_weight net_weight
## 0.49 0.50 0.50
## units_per_case store_sqft grocery_sqft
## 0.51 0.42 0.34
## frozen_sqft
## 0.29
# Menghapus variabel dengan MSA rendah (unit_sales.in.millions)
data_pca_satu <- data_clean[, !(names(data_clean) %in% c("unit_sales.in.millions."))]
# Cek ulang KMO
kmo_satu <- KMO(data_pca_satu)
print(kmo_satu)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_pca_satu)
## Overall MSA = 0.5
## MSA for each item =
## store_sales.in.millions. store_cost.in.millions. total_children
## 0.63 0.67 0.52
## avg_cars_at.home.approx. num_children_at_home SRP
## 0.66 0.53 0.88
## gross_weight net_weight units_per_case
## 0.50 0.50 0.51
## store_sqft grocery_sqft frozen_sqft
## 0.42 0.34 0.29
# Menghapus variabel dengan MSA rendah (frozen_sqft)
data_pca_final <- data_pca_satu[, !(names(data_pca_satu) %in% c("frozen_sqft"))]
# Cek ulang KMO
kmo_final <- KMO(data_pca_final)
print(kmo_final)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_pca_final)
## Overall MSA = 0.59
## MSA for each item =
## store_sales.in.millions. store_cost.in.millions. total_children
## 0.63 0.67 0.52
## avg_cars_at.home.approx. num_children_at_home SRP
## 0.67 0.53 0.88
## gross_weight net_weight units_per_case
## 0.50 0.50 0.50
## store_sqft grocery_sqft
## 0.50 0.50
library(psych)
cortest.bartlett(cor(data_pca_final), n = nrow(data_pca_final))
## $chisq
## [1] 570140.9
##
## $p.value
## [1] 0
##
## $df
## [1] 55
mat_corr_final <- round(cor(data_pca_final), 3)
tab_corr(data_pca_final,
file = "tabel_korelasi.html",
title = "Tabel 2. Matriks Korelasi Variabel Penelitian")
| store_sales.in.millions. | store_cost.in.millions. | total_children | avg_cars_at.home.approx. | num_children_at_home | SRP | gross_weight | net_weight | units_per_case | store_sqft | grocery_sqft | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| store_sales.in.millions. | 0.955*** | 0.083*** | 0.004 | 0.032*** | 0.833*** | 0.036*** | 0.032*** | -0.011** | 0.016*** | 0.010* | |
| store_cost.in.millions. | 0.955*** | 0.079*** | 0.003 | 0.028*** | 0.796*** | 0.034*** | 0.030*** | -0.010* | 0.018*** | 0.013** | |
| total_children | 0.083*** | 0.079*** | 0.098*** | 0.395*** | 0.001 | -0.000 | 0.000 | 0.002 | 0.001 | 0.019*** | |
| avg_cars_at.home.approx. | 0.004 | 0.003 | 0.098*** | 0.131*** | -0.008 | 0.005 | 0.004 | -0.007 | -0.016*** | -0.018*** | |
| num_children_at_home | 0.032*** | 0.028*** | 0.395*** | 0.131*** | -0.000 | -0.002 | -0.003 | 0.000 | 0.006 | 0.025*** | |
| SRP | 0.833*** | 0.796*** | 0.001 | -0.008 | -0.000 | 0.043*** | 0.039*** | -0.012** | 0.002 | -0.001 | |
| gross_weight | 0.036*** | 0.034*** | -0.000 | 0.005 | -0.002 | 0.043*** | 0.989*** | -0.012** | -0.005 | -0.005 | |
| net_weight | 0.032*** | 0.030*** | 0.000 | 0.004 | -0.003 | 0.039*** | 0.989*** | -0.014*** | -0.004 | -0.004 | |
| units_per_case | -0.011** | -0.010* | 0.002 | -0.007 | 0.000 | -0.012** | -0.012** | -0.014*** | 0.002 | 0.004 | |
| store_sqft | 0.016*** | 0.018*** | 0.001 | -0.016*** | 0.006 | 0.002 | -0.005 | -0.004 | 0.002 | 0.913*** | |
| grocery_sqft | 0.010* | 0.013** | 0.019*** | -0.018*** | 0.025*** | -0.001 | -0.005 | -0.004 | 0.004 | 0.913*** | |
| Computed correlation used pearson-method with listwise-deletion. | |||||||||||
pc_results <- prcomp(data_pca_final, scale. = TRUE) #standarisasi
eig_val <- get_eigenvalue(pc_results)
print(eig_val)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.74288917 24.9353561 24.93536
## Dim.2 1.98164086 18.0149169 42.95027
## Dim.3 1.91143247 17.3766588 60.32693
## Dim.4 1.44953777 13.1776161 73.50455
## Dim.5 1.00047883 9.0952621 82.59981
## Dim.6 0.94117964 8.5561785 91.15599
## Dim.7 0.60497917 5.4998107 96.65580
## Dim.8 0.22745721 2.0677928 98.72359
## Dim.9 0.08680822 0.7891656 99.51276
## Dim.10 0.04251539 0.3865035 99.89926
## Dim.11 0.01108127 0.1007388 100.00000
fviz_eig(pc_results, addlabels = TRUE, ylim = c(0, 50))
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.
n_factors <- sum(eig_val$eigenvalue > 1)
print(n_factors)
## [1] 5
(hubungan variabel dengan komponen)
pca_loading <- pc_results$rotation[, 1:n_factors]
print(pca_loading)
## PC1 PC2 PC3 PC4
## store_sales.in.millions. -0.586789926 -0.065516705 0.027945997 0.0191203234
## store_cost.in.millions. -0.578896217 -0.066071354 0.025794379 0.0218090927
## total_children -0.062898781 -0.020519314 -0.019515433 -0.6540013988
## avg_cars_at.home.approx. -0.006181894 0.005979239 0.018256140 -0.3403984083
## num_children_at_home -0.034762932 -0.019035518 -0.027131545 -0.6694173999
## SRP -0.548852319 -0.051489395 0.035797519 0.0785392912
## gross_weight -0.082326447 0.690829443 -0.124892047 -0.0083056858
## net_weight -0.079676886 0.691119301 -0.125796108 -0.0086692354
## units_per_case 0.011682432 -0.017238776 -0.001966634 -0.0003085306
## store_sqft -0.021605364 -0.128047167 -0.694022383 0.0309986662
## grocery_sqft -0.019637464 -0.127621999 -0.694786535 0.0132906961
## PC5
## store_sales.in.millions. -0.0056727324
## store_cost.in.millions. -0.0063669526
## total_children -0.0395987388
## avg_cars_at.home.approx. 0.1212909187
## num_children_at_home -0.0221525322
## SRP -0.0002999228
## gross_weight -0.0145540651
## net_weight -0.0119119523
## units_per_case -0.9913235447
## store_sqft 0.0077980754
## grocery_sqft 0.0044737781
head(pc_results$x[, 1:n_factors])
## PC1 PC2 PC3 PC4 PC5
## 1 -0.1553910 1.783757 -0.2061187 0.91203603 0.0502657308
## 2 0.2598667 1.868964 -0.1399851 0.93211498 0.4253317799
## 3 0.9106808 1.888588 -0.2784600 0.06822886 -0.0004642832
## 4 1.0094951 1.900674 -0.2807131 -0.39070709 0.1288036713
## 5 1.7140476 -1.793240 0.4521361 1.52531979 -0.8736308932
## 6 1.5656040 -1.857231 0.3685846 -0.07222889 -1.0702539467
fa_unrotated_awal <- fa(data_pca_final, nfactors = n_factors, rotate = "none", fm = "pa")
print(fa_unrotated_awal$loadings, cutoff = 0, sort = TRUE)
##
## Loadings:
## PA1 PA2 PA3 PA4 PA5
## store_sales.in.millions. 0.992 -0.115 -0.033 -0.005 -0.034
## store_cost.in.millions. 0.948 -0.110 -0.029 -0.009 -0.042
## SRP 0.833 -0.083 -0.039 -0.079 0.097
## gross_weight 0.153 0.979 0.081 0.004 0.001
## net_weight 0.148 0.980 0.082 0.004 -0.002
## store_sqft 0.036 -0.089 0.949 -0.027 0.002
## grocery_sqft 0.033 -0.088 0.952 0.000 0.000
## total_children 0.080 -0.016 0.015 0.626 -0.176
## num_children_at_home 0.042 -0.014 0.023 0.667 0.158
## avg_cars_at.home.approx. 0.005 0.004 -0.015 0.178 0.068
## units_per_case -0.013 -0.012 0.002 0.001 -0.017
##
## PA1 PA2 PA3 PA4 PA5
## SS loadings 2.630 1.968 1.825 0.876 0.073
## Proportion Var 0.239 0.179 0.166 0.080 0.007
## Cumulative Var 0.239 0.418 0.584 0.664 0.670
h2_values <- sort(fa_unrotated_awal$communality, decreasing = TRUE)
print(round(h2_values, 3))
## store_sales.in.millions. net_weight gross_weight
## 0.999 0.989 0.988
## grocery_sqft store_cost.in.millions. store_sqft
## 0.915 0.913 0.911
## SRP num_children_at_home total_children
## 0.717 0.472 0.430
## avg_cars_at.home.approx. units_per_case
## 0.037 0.001
fa_orthogonal <- fa(data_pca_final, nfactors = n_factors, rotate = "varimax", fm = "pa")
print(fa_orthogonal$loadings, cutoff = 0, sort = TRUE)
##
## Loadings:
## PA1 PA2 PA3 PA4 PA5
## store_sales.in.millions. 0.998 0.019 0.008 0.040 0.038
## store_cost.in.millions. 0.954 0.018 0.011 0.034 0.029
## SRP 0.830 0.013 0.000 -0.022 0.168
## gross_weight 0.014 0.988 0.001 0.002 0.110
## net_weight 0.009 0.989 0.002 0.002 0.107
## store_sqft 0.009 -0.003 0.954 -0.023 -0.028
## grocery_sqft 0.004 -0.002 0.956 0.004 -0.035
## total_children 0.069 0.026 0.007 0.598 -0.259
## num_children_at_home 0.004 -0.010 0.025 0.684 0.059
## avg_cars_at.home.approx. -0.007 0.000 -0.014 0.186 0.042
## units_per_case -0.010 -0.011 0.002 -0.002 -0.019
##
## PA1 PA2 PA3 PA4 PA5
## SS loadings 2.599 1.956 1.825 0.864 0.128
## Proportion Var 0.236 0.178 0.166 0.079 0.012
## Cumulative Var 0.236 0.414 0.580 0.659 0.670
Hapus variabel dengan loading<0,4 dan communality<0,5 (“units_per_case”)
data_baru <- data_pca_final[, !(names(data_pca_final) %in% c("units_per_case"))]
#Uji Bartlett
library(psych)
cortest.bartlett(cor(data_baru), n = nrow(data_baru))
## $chisq
## [1] 570100.5
##
## $p.value
## [1] 0
##
## $df
## [1] 45
#KMO (Kaiser-Meyer-Olkin)
kmo_baru<- KMO(data_baru)
print(kmo_baru)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_baru)
## Overall MSA = 0.59
## MSA for each item =
## store_sales.in.millions. store_cost.in.millions. total_children
## 0.63 0.67 0.52
## avg_cars_at.home.approx. num_children_at_home SRP
## 0.67 0.53 0.88
## gross_weight net_weight store_sqft
## 0.50 0.50 0.50
## grocery_sqft
## 0.50
#re-evaluasi jumlah faktor baru
pc_results_fa <- prcomp(data_baru, scale. = TRUE)
eig_val <- get_eigenvalue(pc_results_fa)
n_factors <- sum(eig_val$eigenvalue > 1)
print(n_factors)
## [1] 4
#Menampilkan Matriks Loading Unrotated
fa_unrotated_awal <- fa(data_baru, nfactors = n_factors, rotate = "none", fm = "pa")
print(fa_unrotated_awal$loadings, cutoff = 0, sort = TRUE)
##
## Loadings:
## PA1 PA2 PA3 PA4
## store_sales.in.millions. 0.992 -0.116 -0.034 -0.004
## store_cost.in.millions. 0.949 -0.111 -0.029 -0.007
## SRP 0.828 -0.082 -0.038 -0.078
## gross_weight 0.153 0.979 0.080 0.003
## net_weight 0.149 0.980 0.082 0.004
## store_sqft 0.037 -0.089 0.950 -0.026
## grocery_sqft 0.033 -0.088 0.951 0.001
## total_children 0.078 -0.016 0.014 0.608
## num_children_at_home 0.041 -0.013 0.022 0.642
## avg_cars_at.home.approx. 0.005 0.004 -0.015 0.183
##
## PA1 PA2 PA3 PA4
## SS loadings 2.626 1.968 1.825 0.823
## Proportion Var 0.263 0.197 0.183 0.082
## Cumulative Var 0.263 0.459 0.642 0.724
#Communality (Unrotated)
h2_values <- sort(fa_unrotated_awal$communality, decreasing = TRUE)
print(round(h2_values, 3))
## store_sales.in.millions. gross_weight net_weight
## 0.999 0.989 0.989
## grocery_sqft store_cost.in.millions. store_sqft
## 0.914 0.913 0.912
## SRP num_children_at_home total_children
## 0.700 0.415 0.376
## avg_cars_at.home.approx.
## 0.034
#Rotasi Varimax
fa_orthogonal <- fa(data_baru, nfactors = n_factors, rotate = "varimax", fm = "pa")
#Matriks Loading Terrotasi
print(fa_orthogonal$loadings, cutoff = 0, sort = TRUE)
##
## Loadings:
## PA1 PA2 PA3 PA4
## store_sales.in.millions. 0.998 0.012 0.008 0.050
## store_cost.in.millions. 0.955 0.012 0.011 0.045
## SRP 0.836 0.023 -0.004 -0.033
## gross_weight 0.024 0.994 -0.003 0.003
## net_weight 0.019 0.994 -0.002 0.004
## store_sqft 0.008 -0.003 0.955 -0.023
## grocery_sqft 0.003 -0.002 0.956 0.005
## total_children 0.046 -0.003 0.016 0.612
## num_children_at_home 0.006 -0.005 0.022 0.644
## avg_cars_at.home.approx. -0.005 0.004 -0.016 0.183
##
## PA1 PA2 PA3 PA4
## SS loadings 2.608 1.978 1.827 0.828
## Proportion Var 0.261 0.198 0.183 0.083
## Cumulative Var 0.261 0.459 0.641 0.724
Hapus variabel dengan loading<0,4 dan communality<0,5 (“avg_cars_at.home.approx.”)
data_final <- data_baru[, !(names(data_baru) %in% c("avg_cars_at.home.approx."))]
#Uji Bartlett
library(psych)
cortest.bartlett(cor(data_final), n = nrow(data_final))
## $chisq
## [1] 568862.4
##
## $p.value
## [1] 0
##
## $df
## [1] 36
#KMO (Kaiser-Meyer-Olkin)
kmo_final <- KMO(data_final)
print(kmo_final)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_final)
## Overall MSA = 0.59
## MSA for each item =
## store_sales.in.millions. store_cost.in.millions. total_children
## 0.63 0.67 0.50
## num_children_at_home SRP gross_weight
## 0.51 0.88 0.50
## net_weight store_sqft grocery_sqft
## 0.50 0.50 0.50
#menampilkan faktor yang terbentuk
pc_results_fa <- prcomp(data_final, scale. = TRUE)
eig_val <- get_eigenvalue(pc_results_fa)
n_factors <- sum(eig_val$eigenvalue > 1)
print(n_factors)
## [1] 4
#Menampilkan Matriks Loading Unrotated
fa_unrotated_awal <- fa(data_final, nfactors = n_factors, rotate = "none", fm = "pa")
print(fa_unrotated_awal$loadings, cutoff = 0, sort = TRUE)
##
## Loadings:
## PA1 PA2 PA3 PA4
## store_sales.in.millions. 0.992 -0.115 -0.034 -0.003
## store_cost.in.millions. 0.949 -0.111 -0.029 -0.006
## SRP 0.828 -0.082 -0.039 -0.080
## gross_weight 0.153 0.979 0.080 0.004
## net_weight 0.149 0.980 0.081 0.005
## store_sqft 0.037 -0.089 0.950 -0.030
## grocery_sqft 0.033 -0.088 0.951 0.000
## total_children 0.079 -0.016 0.016 0.641
## num_children_at_home 0.040 -0.013 0.023 0.609
##
## PA1 PA2 PA3 PA4
## SS loadings 2.626 1.968 1.825 0.790
## Proportion Var 0.292 0.219 0.203 0.088
## Cumulative Var 0.292 0.510 0.713 0.801
#Communality (Unrotated)
h2_values <- sort(fa_unrotated_awal$communality, decreasing = TRUE)
print(round(h2_values, 3))
## store_sales.in.millions. gross_weight net_weight
## 0.998 0.989 0.989
## store_cost.in.millions. store_sqft grocery_sqft
## 0.913 0.913 0.913
## SRP total_children num_children_at_home
## 0.700 0.418 0.374
#Rotasi Varimax
fa_orthogonal_final <- fa(data_final, nfactors = n_factors, rotate = "varimax", fm = "pa")
#Matriks Loading Terrotasi
print(fa_orthogonal_final$loadings, cutoff = 0, sort = TRUE)
##
## Loadings:
## PA1 PA2 PA3 PA4
## store_sales.in.millions. 0.997 0.013 0.007 0.059
## store_cost.in.millions. 0.954 0.012 0.010 0.053
## SRP 0.836 0.023 -0.004 -0.028
## gross_weight 0.024 0.994 -0.003 -0.002
## net_weight 0.019 0.994 -0.002 -0.001
## store_sqft 0.008 -0.003 0.956 -0.005
## grocery_sqft 0.003 -0.002 0.955 0.025
## total_children 0.040 0.000 0.003 0.645
## num_children_at_home 0.002 -0.002 0.010 0.611
##
## PA1 PA2 PA3 PA4
## SS loadings 2.607 1.978 1.826 0.797
## Proportion Var 0.290 0.220 0.203 0.089
## Cumulative Var 0.290 0.509 0.712 0.801
print(fa_orthogonal_final$Vaccounted)
## PA1 PA2 PA3 PA4
## SS loadings 2.6067392 1.9776915 1.8260392 0.79729178
## Proportion Var 0.2896377 0.2197435 0.2028932 0.08858798
## Cumulative Var 0.2896377 0.5093812 0.7122744 0.80086241
## Proportion Explained 0.3616572 0.2743836 0.2533435 0.11061572
## Cumulative Proportion 0.3616572 0.6360408 0.8893843 1.00000000
set.seed(123)
index <- sample(1:nrow(data_final), size = 0.5 * nrow(data_final))
sample_1 <- data_final[index, ]
sample_2 <- data_final[-index, ]
fa_sample1 <- fa(sample_1, nfactors = n_factors, rotate = "varimax", fm = "pa")
fa_sample2 <- fa(sample_2, nfactors = n_factors, rotate = "varimax", fm = "pa")
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(fa_sample1$loadings, cutoff = 0.4)
##
## Loadings:
## PA1 PA2 PA3 PA4
## store_sales.in.millions. 0.996
## store_cost.in.millions. 0.955
## total_children 0.648
## num_children_at_home 0.616
## SRP 0.839
## gross_weight 0.994
## net_weight 0.994
## store_sqft 0.955
## grocery_sqft 0.955
##
## PA1 PA2 PA3 PA4
## SS loadings 2.612 1.977 1.825 0.808
## Proportion Var 0.290 0.220 0.203 0.090
## Cumulative Var 0.290 0.510 0.713 0.803
print(fa_sample2$loadings, cutoff = 0.4)
##
## Loadings:
## PA1 PA2 PA3 PA4
## store_sales.in.millions. 0.998
## store_cost.in.millions. 0.953
## total_children 0.641
## num_children_at_home 0.606
## SRP 0.833
## gross_weight 0.994
## net_weight 0.994
## store_sqft 0.956
## grocery_sqft 0.955
##
## PA1 PA2 PA3 PA4
## SS loadings 2.602 1.978 1.827 0.787
## Proportion Var 0.289 0.220 0.203 0.087
## Cumulative Var 0.289 0.509 0.712 0.799