Introduction

Suppose the SEM model is specified as follows:

In which A, B, C, D, E, and F are factors, with the numbers of items comprising them being 3, 3, 4, 4, 4, and 4, respectively. In addition, we propose nine research hypotheses as follows:

H1: A has a positive effect on D (A → D)

H2: B has a negative effect on D (B → D)

H3: C has a positive effect on D (C → D)

H4: A has a positive effect on E (A → E)

H5: B has a negative effect on E (B → E)

H6: C has a negative effect on E (C → E)

H7: E has a positive effect on D (E → D)

H8: D has a positive effect on F (D → F)

H9: E has a positive effect on F (E → F)

R Codes

#=============================
#  Stage 1: Data Simulation
#=============================

library(lavaan)
library(psych)
library(dplyr)

set.seed(20260305)

N <- 500

# ==========================================================
# 1) Population model (continuous -> later cut to Likert)
#    Designed so H1-H9 significant + correct signs
# ===========================================================
pop_model <- '
  # -------------------------
  # Measurement (high loadings -> alpha > .75)
  # -------------------------
  A  =~ 0.9*A1  + 0.85*A2  + 0.80*A3
  B  =~ 0.9*B1  + 0.85*B2  + 0.80*B3
  C  =~ 0.9*C1  + 0.85*C2  + 0.80*C3 + 0.75*C4

  D  =~ 0.9*D1  + 0.85*D2  + 0.80*D3 + 0.75*D4
  E  =~ 0.9*E1  + 0.85*E2  + 0.80*E3 + 0.75*E4
  F  =~ 0.9*F1  + 0.85*F2  + 0.80*F3 + 0.75*F4

  # -------------------------
  # Structural (H1-H9 in new notation)
  # -------------------------
  # H1-H3
  D ~ 0.55*A + (-0.45)*B + 0.35*C

  # H4-H7
  E ~ 0.40*A + (-0.35)*B + (-0.25)*C + 0.55*D

  # H8-H9
  F ~ 0.40*D + 0.60*E

  # Correlations among exogenous constructs
  A ~~ 0.25*B
  A ~~ 0.45*C
  B ~~ 0.65*C
'

# 2) Simulate continuous data: 
dat <- simulateData(pop_model,
                    sample.nobs  = N,
                    model.type   = "sem",
                    standardized = TRUE)

# =========================
# 3) Convert to Likert 5
# =========================

cut_to_likert <- function(x) {
  cut(x,
      breaks = c(-Inf, -1, -0.3, 0.3, 1, Inf),
      labels = 1:5,
      ordered_result = TRUE)
}

item_names <- c(paste0("A",1:3),
                paste0("B",1:3),
                paste0("C",1:4),
                paste0("D",1:4),
                paste0("E",1:4),
                paste0("F",1:4))

dat %>%
  mutate(across(all_of(item_names), cut_to_likert)) %>%
  mutate(across(all_of(item_names), as.integer)) -> data_Likert5

# Save data for using later: 

readr::write_csv(data_Likert5, "data_Likert5forPaper500.csv")

#=============================
#  Stage 2: SEM Estimation
#=============================

library(kableExtra) 

# --------------------------
# Measurement + Structural model (new notation)
# --------------------------
fit_model <- '
  A  =~ A1 + A2 + A3
  B  =~ B1 + B2 + B3
  C  =~ C1 + C2 + C3 + C4
  D  =~ D1 + D2 + D3 + D4
  E  =~ E1 + E2 + E3 + E4
  F  =~ F1 + F2 + F3 + F4

  D ~ A + B + C        # H1-H3
  E ~ A + B + C + D    # H4-H7
  F ~ D + E            # H8-H9

  A ~~ B + C
  B ~~ C
'

# SEM

fit_ord <- sem(fit_model,
               data      = data_Likert5,
               ordered   = item_names,
               estimator = "WLSMV",
               parameterization = "theta",
               std.lv    = TRUE)



# The SEM results show that hypotheses H1 through H9 are supported: 

pe <- parameterEstimates(fit_ord, standardized = TRUE)

pe %>% 
  filter(op == "~") %>% 
  select(-ci.lower, -ci.upper, -std.lv, -std.all) %>% 
  mutate_if(is.numeric, function(x) {round(x, 3)}) %>% 
  kbl(caption = "Table 1: The SEM results show that hypotheses H1 through H9 are supported (confirmed)") %>%
  kable_classic(full_width = TRUE, html_font = "Cambria")

Table 1: The SEM results show that hypotheses H1 through H9 are supported (confirmed)
lhs	op	rhs	est	se	z	pvalue
D	~	A	0.507	0.074	6.882	0.000
D	~	B	-0.473	0.081	-5.869	0.000
D	~	C	0.390	0.093	4.192	0.000
E	~	A	0.477	0.090	5.272	0.000
E	~	B	-0.301	0.089	-3.368	0.001
E	~	C	-0.297	0.106	-2.814	0.005
E	~	D	0.547	0.076	7.236	0.000
F	~	D	0.328	0.081	4.044	0.000
F	~	E	0.678	0.083	8.200	0.000

#  Fit model indicators: 

fitMeasures(fit_ord,
            fit.measures = c("cfi", "tli", "rmsea", "gfi", "chisq", "df", "fmin", "ifi"))

##     cfi     tli   rmsea     gfi   chisq      df    fmin     ifi 
##   1.000   1.004   0.000   0.997  94.542 197.000   0.095   1.004

#-------------------------------
#      Cronbach alpha section
#-------------------------------

cronbach_alpha <- function(var_prefix) {
  data_Likert5 %>%
    select(starts_with(var_prefix)) %>%
    psych::alpha() -> alpha_factor

  alpha_factor$total$raw_alpha
}

# Cronbach Alpha for A factor: 

cronbach_alpha("A")  # Factor A (A1-A3)

## [1] 0.8626427

# Not show results: 
cronbach_alpha("B")  # Factor B (B1-B3)
cronbach_alpha("C")  # Factor C (C1-C4)
cronbach_alpha("D")  # Factor D (D1-D4)
cronbach_alpha("E")  # Factor E (E1-E4)
cronbach_alpha("F")  # Factor F (F1-F4)

Structural Equation Modeling (SEM) using R

SEM Series

Nguyen Chi Dung

Introduction

R Codes