packages <- c(
  "lavaan",
  "semPlot",
  "psych",
  "MVN",
  "car",
  "ggplot2",
  "dplyr",
  "corrplot",
  "reshape2",
  "FactoMineR",
  "factoextra"
)

installed <- packages %in% rownames(installed.packages())

if(any(!installed)){
  install.packages(
    packages[!installed],
    dependencies = TRUE
  )
}

library(lavaan)
library(semPlot)
library(psych)
library(MVN)
library(car)
library(ggplot2)
library(dplyr)
library(corrplot)
library(reshape2)
library(FactoMineR)
library(factoextra)

1. Load Dataset

data_customer <- read.csv(
  "D:/UAS tableau/customer_feedback_survey.csv"
)

cat("JUMLAH DATA:")
## JUMLAH DATA:
cat("Jumlah baris:", nrow(data_customer))
## Jumlah baris: 10000
cat("Jumlah kolom:", ncol(data_customer))
## Jumlah kolom: 12

2. Preprocessing

# NORMALISASI NAMA KOLOM
names(data_customer) <- gsub(
  "\\.",
  "_",
  names(data_customer)
)

names(data_customer) <- gsub(
  " ",
  "_",
  names(data_customer)
)

names(data_customer) <- gsub(
  "[^A-Za-z0-9_]",
  "",
  names(data_customer)
)

cat("NAMA KOLOM DATASET")
## NAMA KOLOM DATASET
print(names(data_customer))
##  [1] "Customer_ID"               "Overall_Satisfaction"     
##  [3] "Product_Quality"           "Service_Speed"            
##  [5] "Support_Helpfulness"       "Website_Ease_of_Use"      
##  [7] "Delivery_Speed"            "Price_Competitiveness"    
##  [9] "Recommendation_Likelihood" "Experience_with_Brand"    
## [11] "Feedback_Comments"         "Contact_Channel"

3. Konversi Data Kategorik

data_customer$Experience_with_Brand <-
  ifelse(
    data_customer$Experience_with_Brand == "Positive", 5,
    
    ifelse(
      data_customer$Experience_with_Brand == "Neutral", 3,
      
      ifelse(
        data_customer$Experience_with_Brand == "Negative", 1, NA
      )
    )
  )

data_customer$Experience_with_Brand <-
  as.numeric(data_customer$Experience_with_Brand)

4. Pemilihan Variabel

data <- data.frame(
  
  X1 = data_customer$Product_Quality,
  
  X2 = data_customer$Service_Speed,
  
  X3 = data_customer$Support_Helpfulness,
  
  X4 = data_customer$Website_Ease_of_Use
)

5. Cek Missing Value

cat("MISSING VALUE")
## MISSING VALUE
print(colSums(is.na(data)))
## X1 X2 X3 X4 
##  0  0  0  0
    # HAPUS MISSING VALUE
data <- na.omit(data)

cat("JUMLAH DATA SETELAH na.omit()")
## JUMLAH DATA SETELAH na.omit()
print(nrow(data))
## [1] 10000

6. Cek Struktur Data

cat("STRUKTUR DATA")
## STRUKTUR DATA
str(data)
## 'data.frame':    10000 obs. of  4 variables:
##  $ X1: int  2 5 3 4 1 1 5 5 2 1 ...
##  $ X2: int  4 1 5 1 4 4 5 2 5 2 ...
##  $ X3: int  1 3 5 1 4 2 4 2 1 2 ...
##  $ X4: int  1 5 5 4 4 3 1 5 3 4 ...

7. Standarisasi Data

data_scaled <- as.data.frame(
  scale(data)
)

8. Statistik Deskriptif

cat("STATISTIK DESKRIPTIF")
## STATISTIK DESKRIPTIF
desc <- describe(data_scaled)

print(round(
  desc[, c(
    "mean",
    "sd",
    "min",
    "max",
    "skew",
    "kurtosis"
  )], 3))
##    mean sd   min  max  skew kurtosis
## X1    0  1 -1.42 1.41  0.00    -1.30
## X2    0  1 -1.40 1.41  0.00    -1.31
## X3    0  1 -1.42 1.40 -0.01    -1.31
## X4    0  1 -1.42 1.42 -0.01    -1.28

9. Visualisasi

data_long <- melt(data_scaled)

p1 <- ggplot(
  data_long,
  aes(
    x = value,
    fill = variable
  )
) +
  
  geom_histogram(
    bins = 10,
    color = "white"
  ) +
  
  facet_wrap(
    ~variable,
    scales = "free"
  ) +
  
  theme_bw() +
  
  labs(
    title = "Distribusi Variabel",
    x = "Nilai",
    y = "Frekuensi"
  )

print(p1)

cor_matrix <- cor(data_scaled)

cat("MATRKS KORELASI")
## MATRKS KORELASI
print(round(cor_matrix, 3))
##       X1     X2     X3     X4
## X1 1.000  0.003  0.017  0.004
## X2 0.003  1.000  0.010 -0.014
## X3 0.017  0.010  1.000 -0.021
## X4 0.004 -0.014 -0.021  1.000
corrplot(
  cor_matrix,
  method = "color",
  type = "upper",
  addCoef.col = "yellow",
  tl.col = "black",
  tl.srt = 45
)

10. Uji Asumsi

# KMO
kmo_result <- KMO(cor_matrix)

cat("KMO TEST")
## KMO TEST
cat(
  "MSA :",
  round(kmo_result$MSA, 3),
  "\n"
)
## MSA : 0.503
cat("MSA PER VARIABEL")
## MSA PER VARIABEL
print(round(kmo_result$MSAi, 3))
##    X1    X2    X3    X4 
## 0.497 0.511 0.502 0.502
    # BARTLETT TEST
bartlett_result <- cortest.bartlett(
  cor_matrix,
  n = nrow(data_scaled)
)

cat("BARTLETT TEST")
## BARTLETT TEST
cat(
  "Chi-Square :",
  round(bartlett_result$chisq, 3),
  "\n"
)
## Chi-Square : 10.512
cat(
  "df :",
  bartlett_result$df,
  "\n"
)
## df : 6
cat(
  "p-value :",
  round(bartlett_result$p.value, 4),
  "\n"
)
## p-value : 0.1047
    # NORMALITAS MULTIVARIAT
cat("UJI NORMALITAS MULTIVARIAT")
## UJI NORMALITAS MULTIVARIAT
result_mardia <- psych::mardia(
  data_scaled
)

print(result_mardia)
## Call: psych::mardia(x = data_scaled)
## 
## Mardia tests of multivariate skew and kurtosis
## Use describe(x) the to get univariate tests
## n.obs = 10000   num.vars =  4 
## b1p =  0.01   skew =  10.04  with probability  <=  0.97
##  small sample skew =  10.05  with probability <=  0.97
## b2p =  18.8   kurtosis =  -37.51  with probability <=  0
    # MULTIKOLINIERITAS
cat("UJI MULTIKOLINIERITAS")
## UJI MULTIKOLINIERITAS
model_vif <- lm(
  X1 ~ X2 + X3 + X4,
  data = data_scaled
)

vif_result <- vif(model_vif)
print(round(vif_result, 3))
##    X2    X3    X4 
## 1.000 1.001 1.001

11. PCA

pca_result <- PCA(
  data_scaled,
  scale.unit = TRUE,
  graph = FALSE
)

cat("EIGENVALUE PCA")
## EIGENVALUE PCA
print(round(pca_result$eig, 3))
##        eigenvalue percentage of variance cumulative percentage of variance
## comp 1      1.034                 25.840                            25.840
## comp 2      1.005                 25.135                            50.975
## comp 3      0.990                 24.758                            75.733
## comp 4      0.971                 24.267                           100.000
    # SCREE PLOT
fviz_eig(
  pca_result,
  addlabels = TRUE,
  ylim = c(0, 80)
)

12. EFA

eig_value <- pca_result$eig[,1]

n_factor <- sum(eig_value > 1)

cat("JUMLAH FAKTOR")
## JUMLAH FAKTOR
print(n_factor)
## [1] 2
efa_result <- fa(
  data_scaled,
  nfactors = 1,
  rotate = "varimax",
  fm = "pa"
)

cat("HASIL EFA")
## HASIL EFA
print(efa_result)
## Factor Analysis using method =  pa
## Call: fa(r = data_scaled, nfactors = 1, rotate = "varimax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1     h2   u2 com
## X1  0.05 0.0030 1.00   1
## X2  0.08 0.0059 0.99   1
## X3  0.17 0.0286 0.97   1
## X4 -0.12 0.0147 0.99   1
## 
##                 PA1
## SS loadings    0.05
## Proportion Var 0.01
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  6  with the objective function =  0 with Chi Square =  10.51
## df of  the model are 2  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.01 
## 
## The harmonic n.obs is  10000 with the empirical chi square  2  with prob <  0.37 
## The total n.obs was  10000  with Likelihood Chi Square =  2.04  with prob <  0.36 
## 
## Tucker Lewis Index of factoring reliability =  0.972
## RMSEA index =  0.001  and the 90 % confidence intervals are  0 0.02
## BIC =  -16.38
## Fit based upon off diagonal values = 0.81
## Measures of factor score adequacy             
##                                                     PA1
## Correlation of (regression) scores with factors    0.22
## Multiple R square of scores with factors           0.05
## Minimum correlation of possible factor scores     -0.90

13. FIRST ORDER CFA

model_first_order <- '

KP =~
X1 + X2 + X3 + X4
'

cat("MODEL FIRST ORDER CFA")
## MODEL FIRST ORDER CFA
cat(model_first_order)
## 
## 
## KP =~
## X1 + X2 + X3 + X4
fit_first <- cfa(
  model_first_order,
  data = data_scaled,
  estimator = "ML",
  std.lv = TRUE
)

inspect(fit_first, "converged")
## [1] TRUE
summary(
  fit_first,
  standardized = TRUE,
  fit.measures = TRUE
)
## lavaan 0.6-21 ended normally after 39 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         8
## 
##   Number of observations                         10000
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 1.882
##   Degrees of freedom                                 2
##   P-value (Chi-square)                           0.390
## 
## Model Test Baseline Model:
## 
##   Test statistic                                10.515
##   Degrees of freedom                                 6
##   P-value                                        0.105
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.078
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -56751.225
##   Loglikelihood unrestricted model (H1)     -56750.284
##                                                       
##   Akaike (AIC)                              113518.449
##   Bayesian (BIC)                            113576.132
##   Sample-size adjusted Bayesian (SABIC)     113550.709
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.019
##   P-value H_0: RMSEA <= 0.050                    1.000
##   P-value H_0: RMSEA >= 0.080                    0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.004
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   KP =~                                                                 
##     X1                0.058    0.049    1.183    0.237    0.058    0.058
##     X2                0.055    0.048    1.146    0.252    0.055    0.055
##     X3                0.236    0.170    1.384    0.166    0.236    0.236
##     X4               -0.090    0.067   -1.335    0.182   -0.090   -0.090
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .X1                0.996    0.015   65.644    0.000    0.996    0.997
##    .X2                0.997    0.015   66.379    0.000    0.997    0.997
##    .X3                0.944    0.081   11.635    0.000    0.944    0.945
##    .X4                0.992    0.018   53.697    0.000    0.992    0.992
##     KP                1.000                               1.000    1.000
    # GOF 1
fit_first_indices <- fitMeasures(
  fit_first,
  c(
    "chisq",
    "df",
    "pvalue",
    "cfi",
    "tli",
    "rmsea",
    "srmr",
    "gfi",
    "agfi",
    "nfi"
  )
)

gof_first <- data.frame(
  Indeks = names(fit_first_indices),
  Nilai = round(
    as.numeric(fit_first_indices),
    3
  )
)

cat("GOODNESS OF FIT")
## GOODNESS OF FIT
print(gof_first)
##    Indeks Nilai
## 1   chisq 1.882
## 2      df 2.000
## 3  pvalue 0.390
## 4     cfi 1.000
## 5     tli 1.078
## 6   rmsea 0.000
## 7    srmr 0.004
## 8     gfi 1.000
## 9    agfi 1.000
## 10    nfi 0.821
    # CMIN/DF 1
cmin_df <- fitMeasures(
  fit_first,
  "chisq"
) /
  fitMeasures(
    fit_first,
    "df"
  )

cat(
  "\nCMIN/DF:",
  round(cmin_df, 3),
  "\n"
)
## 
## CMIN/DF: 0.941
    # LOADING FACTOR 1
std_solution <- standardizedSolution(
  fit_first
)

loading_factor <- std_solution[
  std_solution$op == "=~",
]

cat("LOADING FACTOR")
## LOADING FACTOR
print(
  loading_factor[, c(
    "lhs",
    "rhs",
    "est.std",
    "se",
    "z",
    "pvalue"
  )]
)
##   lhs rhs est.std    se      z pvalue
## 1  KP  X1   0.058 0.049  1.183  0.237
## 2  KP  X2   0.055 0.048  1.146  0.252
## 3  KP  X3   0.236 0.170  1.384  0.166
## 4  KP  X4  -0.090 0.067 -1.335  0.182
    # CR & AVE 1
cat("CR DAN AVE")
## CR DAN AVE
loading_data <- loading_factor

lambda <- loading_data$est.std

error_var <- 1 - lambda^2

    # CONSTRUCT RELIABILITY
CR <- (sum(lambda)^2) /
  (
    (sum(lambda)^2) +
      sum(error_var)
  )

    # AVERAGE VARIANCE EXTRACTED
AVE <- sum(lambda^2) /
  (
    sum(lambda^2) +
      sum(error_var)
  )

hasil_cr_ave <- data.frame(
  Konstruk = "KepuasanPelanggan",
  CR = round(CR, 3),
  AVE = round(AVE, 3)
)

print(hasil_cr_ave)
##            Konstruk    CR   AVE
## 1 KepuasanPelanggan 0.017 0.018
# --- MODIFICATION INDICES 1 ---
mi_first <- modificationIndices(
  fit_first,
  sort. = TRUE
)

cat("TOP MODIFICATION INDICES")
## TOP MODIFICATION INDICES
print(head(mi_first, 10))
##    lhs op rhs    mi    epc sepc.lv sepc.all sepc.nox
## 14  X2 ~~  X4 1.607 -0.017  -0.017   -0.017   -0.017
## 11  X1 ~~  X3 1.607  0.048   0.048    0.049    0.049
## 13  X2 ~~  X3 1.580 -0.044  -0.044   -0.045   -0.045
## 12  X1 ~~  X4 1.580  0.018   0.018    0.018    0.018
## 15  X3 ~~  X4 0.000 -0.001  -0.001   -0.001   -0.001
## 10  X1 ~~  X2 0.000  0.000   0.000    0.000    0.000
semPaths(
  fit_first,
  
  what = "std",
  whatLabels = "std",
  
  layout = "tree",
  
  style = "lisrel",
  
  residuals = FALSE,
  
  intercepts = FALSE,
  
  edge.label.cex = 1.8,
  
  sizeLat = 14,
  sizeMan = 10,
  
  label.cex = 2,
  
  nCharNodes = 0,
  
  fade = FALSE,
  
  mar = c(6,6,6,6)
)

title("First Order - CFA")

14. SECOND ORDER CFA

model_second_order <- '

    # FIRST ORDER LATENT
KlP =~
X1 + X2

KL =~
X3 + X4

    # SECOND ORDER LATENT
KP =~
KlP +
KL
'

cat("MODEL SECOND ORDER CFA")
## MODEL SECOND ORDER CFA
cat(model_second_order)
## 
## 
##     # FIRST ORDER LATENT
## KlP =~
## X1 + X2
## 
## KL =~
## X3 + X4
## 
##     # SECOND ORDER LATENT
## KP =~
## KlP +
## KL
fit_second <- cfa(
  model_second_order,
  data = data_scaled,
  estimator = "ML",
  std.lv = TRUE
)

inspect(fit_second, "converged")
## [1] TRUE
summary(
  fit_second,
  standardized = TRUE,
  fit.measures = TRUE
)
## lavaan 0.6-21 ended normally after 387 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        10
## 
##   Number of observations                         10000
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 1.882
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                10.515
##   Degrees of freedom                                 6
##   P-value                                        0.105
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.583
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -56751.225
##   Loglikelihood unrestricted model (H1)     -56750.284
##                                                       
##   Akaike (AIC)                              113522.449
##   Bayesian (BIC)                            113594.552
##   Sample-size adjusted Bayesian (SABIC)     113562.774
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.004
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   KlP =~                                                                
##     X1                0.007       NA                      0.059    0.059
##     X2                0.007       NA                      0.056    0.056
##   KL =~                                                                 
##     X3                0.031       NA                      0.237    0.237
##     X4               -0.012       NA                     -0.090   -0.090
##   KP =~                                                                 
##     KlP               8.321       NA                      0.993    0.993
##     KL                7.531       NA                      0.991    0.991
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .X1                0.996       NA                      0.996    0.996
##    .X2                0.997       NA                      0.997    0.997
##    .X3                0.944       NA                      0.944    0.944
##    .X4                0.992       NA                      0.992    0.992
##    .KlP               1.000                               0.014    0.014
##    .KL                1.000                               0.017    0.017
##     KP                1.000                               1.000    1.000
    # GOF 2 
fit_second_indices <- fitMeasures(
  fit_second,
  c(
    "chisq",
    "df",
    "pvalue",
    "cfi",
    "tli",
    "rmsea",
    "srmr",
    "gfi",
    "agfi",
    "nfi"
  )
)

gof_second <- data.frame(
  Indeks = names(fit_second_indices),
  Nilai = round(
    as.numeric(fit_second_indices),
    3
  )
)

cat("GOODNESS OF FIT SECOND ORDER")
## GOODNESS OF FIT SECOND ORDER
print(gof_second)
##    Indeks Nilai
## 1   chisq 1.882
## 2      df 0.000
## 3  pvalue    NA
## 4     cfi 0.583
## 5     tli 1.000
## 6   rmsea 0.000
## 7    srmr 0.004
## 8     gfi 1.000
## 9    agfi 1.000
## 10    nfi 1.000
    # CMIN/DF 2
cmin_df_second <- fitMeasures(
  fit_second,
  "chisq"
) /
  fitMeasures(
    fit_second,
    "df"
  )

cat(
  "\nCMIN/DF :",
  round(cmin_df_second, 3),
  "\n"
)
## 
## CMIN/DF : Inf
    # LOADING FACTOR 2
std_second <- standardizedSolution(
  fit_second
)

loading_second <- std_second[
  std_second$op == "=~",
]

cat("LOADING FACTOR SECOND ORDER")
## LOADING FACTOR SECOND ORDER
print(
  loading_second[, c(
    "lhs",
    "rhs",
    "est.std",
    "se",
    "z",
    "pvalue"
  )]
)
##   lhs rhs est.std se  z pvalue
## 1 KlP  X1   0.059 NA NA     NA
## 2 KlP  X2   0.056 NA NA     NA
## 3  KL  X3   0.237 NA NA     NA
## 4  KL  X4  -0.090 NA NA     NA
## 5  KP KlP   0.993 NA NA     NA
## 6  KP  KL   0.991 NA NA     NA
    # CR & AVE 2
cat("CR DAN AVE SECOND ORDER")
## CR DAN AVE SECOND ORDER
loading_data <- loading_second

constructs <- unique(
  loading_data$lhs
)

hasil_cr_ave_second <- data.frame()

for(k in constructs){
  
  lambda <- loading_data$est.std[
    loading_data$lhs == k
  ]
  
  error_var <- 1 - lambda^2
  
  # CONSTRUCT RELIABILITY
  CR <- (sum(lambda)^2) /
    (
      (sum(lambda)^2) +
        sum(error_var)
    )
  
  # AVERAGE VARIANCE EXTRACTED
  AVE <- sum(lambda^2) /
    (
      sum(lambda^2) +
        sum(error_var)
    )
  
  hasil_cr_ave_second <- rbind(
    hasil_cr_ave_second,
    
    data.frame(
      Konstruk = k,
      CR = round(CR, 3),
      AVE = round(AVE, 3)
    )
  )
}

print(hasil_cr_ave_second)
##   Konstruk    CR   AVE
## 1      KlP 0.007 0.003
## 2       KL 0.011 0.032
## 3       KP 0.992 0.984
cat("TOP MODIFICATION INDICES SECOND ORDER\n")
## TOP MODIFICATION INDICES SECOND ORDER
mi_second <- try(
  modificationIndices(
    fit_second,
    sort. = TRUE
  ),
  silent = TRUE
)

if(inherits(mi_second, "try-error")){

  cat(
    "Modification Indices tidak dapat dihitung karena information matrix singular.\n"
  )

} else {

  print(
    head(mi_second, 10)
  )

}
## Modification Indices tidak dapat dihitung karena information matrix singular.
semPaths(
  fit_second,
  
  what = "std",
  whatLabels = "std",
  
  layout = "tree",
  
  style = "lisrel",
  
  residuals = FALSE,
  
  intercepts = FALSE,
  
  edge.label.cex = 1.8,
  
  sizeLat = 14,
  sizeMan = 10,
  
  label.cex = 2,
  
  nCharNodes = 0,
  
  fade = FALSE,
  
  mar = c(6,6,6,6)
)
  
title("Second Order - CFA")