packages <- c(
"lavaan",
"semPlot",
"psych",
"MVN",
"car",
"ggplot2",
"dplyr",
"corrplot",
"reshape2",
"FactoMineR",
"factoextra"
)
installed <- packages %in% rownames(installed.packages())
if(any(!installed)){
install.packages(
packages[!installed],
dependencies = TRUE
)
}
library(lavaan)
library(semPlot)
library(psych)
library(MVN)
library(car)
library(ggplot2)
library(dplyr)
library(corrplot)
library(reshape2)
library(FactoMineR)
library(factoextra)
1. Load Dataset
data_customer <- read.csv(
"D:/UAS tableau/customer_feedback_survey.csv"
)
cat("JUMLAH DATA:")
## JUMLAH DATA:
cat("Jumlah baris:", nrow(data_customer))
## Jumlah baris: 10000
cat("Jumlah kolom:", ncol(data_customer))
## Jumlah kolom: 12
2. Preprocessing
# NORMALISASI NAMA KOLOM
names(data_customer) <- gsub(
"\\.",
"_",
names(data_customer)
)
names(data_customer) <- gsub(
" ",
"_",
names(data_customer)
)
names(data_customer) <- gsub(
"[^A-Za-z0-9_]",
"",
names(data_customer)
)
cat("NAMA KOLOM DATASET")
## NAMA KOLOM DATASET
print(names(data_customer))
## [1] "Customer_ID" "Overall_Satisfaction"
## [3] "Product_Quality" "Service_Speed"
## [5] "Support_Helpfulness" "Website_Ease_of_Use"
## [7] "Delivery_Speed" "Price_Competitiveness"
## [9] "Recommendation_Likelihood" "Experience_with_Brand"
## [11] "Feedback_Comments" "Contact_Channel"
3. Konversi Data Kategorik
data_customer$Experience_with_Brand <-
ifelse(
data_customer$Experience_with_Brand == "Positive", 5,
ifelse(
data_customer$Experience_with_Brand == "Neutral", 3,
ifelse(
data_customer$Experience_with_Brand == "Negative", 1, NA
)
)
)
data_customer$Experience_with_Brand <-
as.numeric(data_customer$Experience_with_Brand)
4. Pemilihan Variabel
data <- data.frame(
X1 = data_customer$Product_Quality,
X2 = data_customer$Service_Speed,
X3 = data_customer$Support_Helpfulness,
X4 = data_customer$Website_Ease_of_Use
)
5. Cek Missing Value
cat("MISSING VALUE")
## MISSING VALUE
print(colSums(is.na(data)))
## X1 X2 X3 X4
## 0 0 0 0
# HAPUS MISSING VALUE
data <- na.omit(data)
cat("JUMLAH DATA SETELAH na.omit()")
## JUMLAH DATA SETELAH na.omit()
print(nrow(data))
## [1] 10000
6. Cek Struktur Data
cat("STRUKTUR DATA")
## STRUKTUR DATA
str(data)
## 'data.frame': 10000 obs. of 4 variables:
## $ X1: int 2 5 3 4 1 1 5 5 2 1 ...
## $ X2: int 4 1 5 1 4 4 5 2 5 2 ...
## $ X3: int 1 3 5 1 4 2 4 2 1 2 ...
## $ X4: int 1 5 5 4 4 3 1 5 3 4 ...
7. Standarisasi Data
data_scaled <- as.data.frame(
scale(data)
)
8. Statistik Deskriptif
cat("STATISTIK DESKRIPTIF")
## STATISTIK DESKRIPTIF
desc <- describe(data_scaled)
print(round(
desc[, c(
"mean",
"sd",
"min",
"max",
"skew",
"kurtosis"
)], 3))
## mean sd min max skew kurtosis
## X1 0 1 -1.42 1.41 0.00 -1.30
## X2 0 1 -1.40 1.41 0.00 -1.31
## X3 0 1 -1.42 1.40 -0.01 -1.31
## X4 0 1 -1.42 1.42 -0.01 -1.28
9. Visualisasi
data_long <- melt(data_scaled)
p1 <- ggplot(
data_long,
aes(
x = value,
fill = variable
)
) +
geom_histogram(
bins = 10,
color = "white"
) +
facet_wrap(
~variable,
scales = "free"
) +
theme_bw() +
labs(
title = "Distribusi Variabel",
x = "Nilai",
y = "Frekuensi"
)
print(p1)

cor_matrix <- cor(data_scaled)
cat("MATRKS KORELASI")
## MATRKS KORELASI
print(round(cor_matrix, 3))
## X1 X2 X3 X4
## X1 1.000 0.003 0.017 0.004
## X2 0.003 1.000 0.010 -0.014
## X3 0.017 0.010 1.000 -0.021
## X4 0.004 -0.014 -0.021 1.000
corrplot(
cor_matrix,
method = "color",
type = "upper",
addCoef.col = "yellow",
tl.col = "black",
tl.srt = 45
)

10. Uji Asumsi
# KMO
kmo_result <- KMO(cor_matrix)
cat("KMO TEST")
## KMO TEST
cat(
"MSA :",
round(kmo_result$MSA, 3),
"\n"
)
## MSA : 0.503
cat("MSA PER VARIABEL")
## MSA PER VARIABEL
print(round(kmo_result$MSAi, 3))
## X1 X2 X3 X4
## 0.497 0.511 0.502 0.502
# BARTLETT TEST
bartlett_result <- cortest.bartlett(
cor_matrix,
n = nrow(data_scaled)
)
cat("BARTLETT TEST")
## BARTLETT TEST
cat(
"Chi-Square :",
round(bartlett_result$chisq, 3),
"\n"
)
## Chi-Square : 10.512
cat(
"df :",
bartlett_result$df,
"\n"
)
## df : 6
cat(
"p-value :",
round(bartlett_result$p.value, 4),
"\n"
)
## p-value : 0.1047
# NORMALITAS MULTIVARIAT
cat("UJI NORMALITAS MULTIVARIAT")
## UJI NORMALITAS MULTIVARIAT
result_mardia <- psych::mardia(
data_scaled
)

print(result_mardia)
## Call: psych::mardia(x = data_scaled)
##
## Mardia tests of multivariate skew and kurtosis
## Use describe(x) the to get univariate tests
## n.obs = 10000 num.vars = 4
## b1p = 0.01 skew = 10.04 with probability <= 0.97
## small sample skew = 10.05 with probability <= 0.97
## b2p = 18.8 kurtosis = -37.51 with probability <= 0
# MULTIKOLINIERITAS
cat("UJI MULTIKOLINIERITAS")
## UJI MULTIKOLINIERITAS
model_vif <- lm(
X1 ~ X2 + X3 + X4,
data = data_scaled
)
vif_result <- vif(model_vif)
print(round(vif_result, 3))
## X2 X3 X4
## 1.000 1.001 1.001
11. PCA
pca_result <- PCA(
data_scaled,
scale.unit = TRUE,
graph = FALSE
)
cat("EIGENVALUE PCA")
## EIGENVALUE PCA
print(round(pca_result$eig, 3))
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 1.034 25.840 25.840
## comp 2 1.005 25.135 50.975
## comp 3 0.990 24.758 75.733
## comp 4 0.971 24.267 100.000
# SCREE PLOT
fviz_eig(
pca_result,
addlabels = TRUE,
ylim = c(0, 80)
)

12. EFA
eig_value <- pca_result$eig[,1]
n_factor <- sum(eig_value > 1)
cat("JUMLAH FAKTOR")
## JUMLAH FAKTOR
print(n_factor)
## [1] 2
efa_result <- fa(
data_scaled,
nfactors = 1,
rotate = "varimax",
fm = "pa"
)
cat("HASIL EFA")
## HASIL EFA
print(efa_result)
## Factor Analysis using method = pa
## Call: fa(r = data_scaled, nfactors = 1, rotate = "varimax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 h2 u2 com
## X1 0.05 0.0030 1.00 1
## X2 0.08 0.0059 0.99 1
## X3 0.17 0.0286 0.97 1
## X4 -0.12 0.0147 0.99 1
##
## PA1
## SS loadings 0.05
## Proportion Var 0.01
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 6 with the objective function = 0 with Chi Square = 10.51
## df of the model are 2 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.01
##
## The harmonic n.obs is 10000 with the empirical chi square 2 with prob < 0.37
## The total n.obs was 10000 with Likelihood Chi Square = 2.04 with prob < 0.36
##
## Tucker Lewis Index of factoring reliability = 0.972
## RMSEA index = 0.001 and the 90 % confidence intervals are 0 0.02
## BIC = -16.38
## Fit based upon off diagonal values = 0.81
## Measures of factor score adequacy
## PA1
## Correlation of (regression) scores with factors 0.22
## Multiple R square of scores with factors 0.05
## Minimum correlation of possible factor scores -0.90
13. FIRST ORDER CFA
model_first_order <- '
KP =~
X1 + X2 + X3 + X4
'
cat("MODEL FIRST ORDER CFA")
## MODEL FIRST ORDER CFA
cat(model_first_order)
##
##
## KP =~
## X1 + X2 + X3 + X4
fit_first <- cfa(
model_first_order,
data = data_scaled,
estimator = "ML",
std.lv = TRUE
)
inspect(fit_first, "converged")
## [1] TRUE
summary(
fit_first,
standardized = TRUE,
fit.measures = TRUE
)
## lavaan 0.6-21 ended normally after 39 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 8
##
## Number of observations 10000
##
## Model Test User Model:
##
## Test statistic 1.882
## Degrees of freedom 2
## P-value (Chi-square) 0.390
##
## Model Test Baseline Model:
##
## Test statistic 10.515
## Degrees of freedom 6
## P-value 0.105
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000
## Tucker-Lewis Index (TLI) 1.078
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -56751.225
## Loglikelihood unrestricted model (H1) -56750.284
##
## Akaike (AIC) 113518.449
## Bayesian (BIC) 113576.132
## Sample-size adjusted Bayesian (SABIC) 113550.709
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.019
## P-value H_0: RMSEA <= 0.050 1.000
## P-value H_0: RMSEA >= 0.080 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.004
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## KP =~
## X1 0.058 0.049 1.183 0.237 0.058 0.058
## X2 0.055 0.048 1.146 0.252 0.055 0.055
## X3 0.236 0.170 1.384 0.166 0.236 0.236
## X4 -0.090 0.067 -1.335 0.182 -0.090 -0.090
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .X1 0.996 0.015 65.644 0.000 0.996 0.997
## .X2 0.997 0.015 66.379 0.000 0.997 0.997
## .X3 0.944 0.081 11.635 0.000 0.944 0.945
## .X4 0.992 0.018 53.697 0.000 0.992 0.992
## KP 1.000 1.000 1.000
# GOF 1
fit_first_indices <- fitMeasures(
fit_first,
c(
"chisq",
"df",
"pvalue",
"cfi",
"tli",
"rmsea",
"srmr",
"gfi",
"agfi",
"nfi"
)
)
gof_first <- data.frame(
Indeks = names(fit_first_indices),
Nilai = round(
as.numeric(fit_first_indices),
3
)
)
cat("GOODNESS OF FIT")
## GOODNESS OF FIT
print(gof_first)
## Indeks Nilai
## 1 chisq 1.882
## 2 df 2.000
## 3 pvalue 0.390
## 4 cfi 1.000
## 5 tli 1.078
## 6 rmsea 0.000
## 7 srmr 0.004
## 8 gfi 1.000
## 9 agfi 1.000
## 10 nfi 0.821
# CMIN/DF 1
cmin_df <- fitMeasures(
fit_first,
"chisq"
) /
fitMeasures(
fit_first,
"df"
)
cat(
"\nCMIN/DF:",
round(cmin_df, 3),
"\n"
)
##
## CMIN/DF: 0.941
# LOADING FACTOR 1
std_solution <- standardizedSolution(
fit_first
)
loading_factor <- std_solution[
std_solution$op == "=~",
]
cat("LOADING FACTOR")
## LOADING FACTOR
print(
loading_factor[, c(
"lhs",
"rhs",
"est.std",
"se",
"z",
"pvalue"
)]
)
## lhs rhs est.std se z pvalue
## 1 KP X1 0.058 0.049 1.183 0.237
## 2 KP X2 0.055 0.048 1.146 0.252
## 3 KP X3 0.236 0.170 1.384 0.166
## 4 KP X4 -0.090 0.067 -1.335 0.182
# CR & AVE 1
cat("CR DAN AVE")
## CR DAN AVE
loading_data <- loading_factor
lambda <- loading_data$est.std
error_var <- 1 - lambda^2
# CONSTRUCT RELIABILITY
CR <- (sum(lambda)^2) /
(
(sum(lambda)^2) +
sum(error_var)
)
# AVERAGE VARIANCE EXTRACTED
AVE <- sum(lambda^2) /
(
sum(lambda^2) +
sum(error_var)
)
hasil_cr_ave <- data.frame(
Konstruk = "KepuasanPelanggan",
CR = round(CR, 3),
AVE = round(AVE, 3)
)
print(hasil_cr_ave)
## Konstruk CR AVE
## 1 KepuasanPelanggan 0.017 0.018
# --- MODIFICATION INDICES 1 ---
mi_first <- modificationIndices(
fit_first,
sort. = TRUE
)
cat("TOP MODIFICATION INDICES")
## TOP MODIFICATION INDICES
print(head(mi_first, 10))
## lhs op rhs mi epc sepc.lv sepc.all sepc.nox
## 14 X2 ~~ X4 1.607 -0.017 -0.017 -0.017 -0.017
## 11 X1 ~~ X3 1.607 0.048 0.048 0.049 0.049
## 13 X2 ~~ X3 1.580 -0.044 -0.044 -0.045 -0.045
## 12 X1 ~~ X4 1.580 0.018 0.018 0.018 0.018
## 15 X3 ~~ X4 0.000 -0.001 -0.001 -0.001 -0.001
## 10 X1 ~~ X2 0.000 0.000 0.000 0.000 0.000
semPaths(
fit_first,
what = "std",
whatLabels = "std",
layout = "tree",
style = "lisrel",
residuals = FALSE,
intercepts = FALSE,
edge.label.cex = 1.8,
sizeLat = 14,
sizeMan = 10,
label.cex = 2,
nCharNodes = 0,
fade = FALSE,
mar = c(6,6,6,6)
)
title("First Order - CFA")

14. SECOND ORDER CFA
model_second_order <- '
# FIRST ORDER LATENT
KlP =~
X1 + X2
KL =~
X3 + X4
# SECOND ORDER LATENT
KP =~
KlP +
KL
'
cat("MODEL SECOND ORDER CFA")
## MODEL SECOND ORDER CFA
cat(model_second_order)
##
##
## # FIRST ORDER LATENT
## KlP =~
## X1 + X2
##
## KL =~
## X3 + X4
##
## # SECOND ORDER LATENT
## KP =~
## KlP +
## KL
fit_second <- cfa(
model_second_order,
data = data_scaled,
estimator = "ML",
std.lv = TRUE
)
inspect(fit_second, "converged")
## [1] TRUE
summary(
fit_second,
standardized = TRUE,
fit.measures = TRUE
)
## lavaan 0.6-21 ended normally after 387 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 10
##
## Number of observations 10000
##
## Model Test User Model:
##
## Test statistic 1.882
## Degrees of freedom 0
##
## Model Test Baseline Model:
##
## Test statistic 10.515
## Degrees of freedom 6
## P-value 0.105
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.583
## Tucker-Lewis Index (TLI) 1.000
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -56751.225
## Loglikelihood unrestricted model (H1) -56750.284
##
## Akaike (AIC) 113522.449
## Bayesian (BIC) 113594.552
## Sample-size adjusted Bayesian (SABIC) 113562.774
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.000
## P-value H_0: RMSEA <= 0.050 NA
## P-value H_0: RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.004
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## KlP =~
## X1 0.007 NA 0.059 0.059
## X2 0.007 NA 0.056 0.056
## KL =~
## X3 0.031 NA 0.237 0.237
## X4 -0.012 NA -0.090 -0.090
## KP =~
## KlP 8.321 NA 0.993 0.993
## KL 7.531 NA 0.991 0.991
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .X1 0.996 NA 0.996 0.996
## .X2 0.997 NA 0.997 0.997
## .X3 0.944 NA 0.944 0.944
## .X4 0.992 NA 0.992 0.992
## .KlP 1.000 0.014 0.014
## .KL 1.000 0.017 0.017
## KP 1.000 1.000 1.000
# GOF 2
fit_second_indices <- fitMeasures(
fit_second,
c(
"chisq",
"df",
"pvalue",
"cfi",
"tli",
"rmsea",
"srmr",
"gfi",
"agfi",
"nfi"
)
)
gof_second <- data.frame(
Indeks = names(fit_second_indices),
Nilai = round(
as.numeric(fit_second_indices),
3
)
)
cat("GOODNESS OF FIT SECOND ORDER")
## GOODNESS OF FIT SECOND ORDER
print(gof_second)
## Indeks Nilai
## 1 chisq 1.882
## 2 df 0.000
## 3 pvalue NA
## 4 cfi 0.583
## 5 tli 1.000
## 6 rmsea 0.000
## 7 srmr 0.004
## 8 gfi 1.000
## 9 agfi 1.000
## 10 nfi 1.000
# CMIN/DF 2
cmin_df_second <- fitMeasures(
fit_second,
"chisq"
) /
fitMeasures(
fit_second,
"df"
)
cat(
"\nCMIN/DF :",
round(cmin_df_second, 3),
"\n"
)
##
## CMIN/DF : Inf
# LOADING FACTOR 2
std_second <- standardizedSolution(
fit_second
)
loading_second <- std_second[
std_second$op == "=~",
]
cat("LOADING FACTOR SECOND ORDER")
## LOADING FACTOR SECOND ORDER
print(
loading_second[, c(
"lhs",
"rhs",
"est.std",
"se",
"z",
"pvalue"
)]
)
## lhs rhs est.std se z pvalue
## 1 KlP X1 0.059 NA NA NA
## 2 KlP X2 0.056 NA NA NA
## 3 KL X3 0.237 NA NA NA
## 4 KL X4 -0.090 NA NA NA
## 5 KP KlP 0.993 NA NA NA
## 6 KP KL 0.991 NA NA NA
# CR & AVE 2
cat("CR DAN AVE SECOND ORDER")
## CR DAN AVE SECOND ORDER
loading_data <- loading_second
constructs <- unique(
loading_data$lhs
)
hasil_cr_ave_second <- data.frame()
for(k in constructs){
lambda <- loading_data$est.std[
loading_data$lhs == k
]
error_var <- 1 - lambda^2
# CONSTRUCT RELIABILITY
CR <- (sum(lambda)^2) /
(
(sum(lambda)^2) +
sum(error_var)
)
# AVERAGE VARIANCE EXTRACTED
AVE <- sum(lambda^2) /
(
sum(lambda^2) +
sum(error_var)
)
hasil_cr_ave_second <- rbind(
hasil_cr_ave_second,
data.frame(
Konstruk = k,
CR = round(CR, 3),
AVE = round(AVE, 3)
)
)
}
print(hasil_cr_ave_second)
## Konstruk CR AVE
## 1 KlP 0.007 0.003
## 2 KL 0.011 0.032
## 3 KP 0.992 0.984
cat("TOP MODIFICATION INDICES SECOND ORDER\n")
## TOP MODIFICATION INDICES SECOND ORDER
mi_second <- try(
modificationIndices(
fit_second,
sort. = TRUE
),
silent = TRUE
)
if(inherits(mi_second, "try-error")){
cat(
"Modification Indices tidak dapat dihitung karena information matrix singular.\n"
)
} else {
print(
head(mi_second, 10)
)
}
## Modification Indices tidak dapat dihitung karena information matrix singular.
semPaths(
fit_second,
what = "std",
whatLabels = "std",
layout = "tree",
style = "lisrel",
residuals = FALSE,
intercepts = FALSE,
edge.label.cex = 1.8,
sizeLat = 14,
sizeMan = 10,
label.cex = 2,
nCharNodes = 0,
fade = FALSE,
mar = c(6,6,6,6)
)
title("Second Order - CFA")
