1. Import Library

library(lavaan)

## Warning: package 'lavaan' was built under R version 4.5.3

## This is lavaan 0.6-21
## lavaan is FREE software! Please report any bugs.

library(psych)

## Warning: package 'psych' was built under R version 4.5.3

## 
## Attaching package: 'psych'

## The following object is masked from 'package:lavaan':
## 
##     cor2cov

2. Import Dataset

data <- read.csv("student-por.csv", header = TRUE)
str(data)

## 'data.frame':    649 obs. of  33 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "no" "no" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ romantic  : chr  "no" "no" "no" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  4 2 6 0 0 6 0 2 0 0 ...
##  $ G1        : int  0 9 12 14 11 12 13 10 15 12 ...
##  $ G2        : int  11 11 13 14 13 12 12 13 16 12 ...
##  $ G3        : int  11 11 12 14 13 13 13 13 17 13 ...

head(data)

##   school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob     reason
## 1     GP   F  18       U     GT3       A    4    4  at_home  teacher     course
## 2     GP   F  17       U     GT3       T    1    1  at_home    other     course
## 3     GP   F  15       U     LE3       T    1    1  at_home    other      other
## 4     GP   F  15       U     GT3       T    4    2   health services       home
## 5     GP   F  16       U     GT3       T    3    3    other    other       home
## 6     GP   M  16       U     LE3       T    4    3 services    other reputation
##   guardian traveltime studytime failures schoolsup famsup paid activities
## 1   mother          2         2        0       yes     no   no         no
## 2   father          1         2        0        no    yes   no         no
## 3   mother          1         2        0       yes     no   no         no
## 4   mother          1         3        0        no    yes   no        yes
## 5   father          1         2        0        no    yes   no         no
## 6   mother          1         2        0        no    yes   no        yes
##   nursery higher internet romantic famrel freetime goout Dalc Walc health
## 1     yes    yes       no       no      4        3     4    1    1      3
## 2      no    yes      yes       no      5        3     3    1    1      3
## 3     yes    yes      yes       no      4        3     2    2    3      3
## 4     yes    yes      yes      yes      3        2     2    1    1      5
## 5     yes    yes       no       no      4        3     2    1    2      5
## 6     yes    yes      yes       no      5        4     2    1    2      5
##   absences G1 G2 G3
## 1        4  0 11 11
## 2        2  9 11 11
## 3        6 12 13 12
## 4        0 14 14 14
## 5        0 11 13 13
## 6        6 12 12 13

4. Statistik Deskriptif

describe(data[, c("famrel", "studytime", "failures", "health", 
                  "Walc", "Dalc", "goout", "freetime", "romantic",
                  "G1", "G2", "G3")])

##           vars   n  mean   sd median trimmed  mad min max range  skew kurtosis
## famrel       1 649  3.93 0.96      4    4.05 1.48   1   5     4 -1.10     1.32
## studytime    2 649  1.93 0.83      2    1.85 1.48   1   4     3  0.70     0.02
## failures     3 649  0.22 0.59      0    0.07 0.00   0   3     3  3.08     9.70
## health       4 649  3.54 1.45      4    3.67 1.48   1   5     4 -0.50    -1.13
## Walc         5 649  2.28 1.28      2    2.14 1.48   1   5     4  0.63    -0.78
## Dalc         6 649  1.50 0.92      1    1.28 0.00   1   5     4  2.13     4.28
## goout        7 649  3.18 1.18      3    3.20 1.48   1   5     4 -0.01    -0.87
## freetime     8 649  3.18 1.05      3    3.19 1.48   1   5     4 -0.18    -0.41
## romantic*    9 649  1.37 0.48      1    1.34 0.00   1   2     1  0.55    -1.71
## G1          10 649 11.40 2.75     11   11.38 2.97   0  19    19  0.00     0.02
## G2          11 649 11.57 2.91     11   11.56 2.97   0  19    19 -0.36     1.63
## G3          12 649 11.91 3.23     12   12.04 2.97   0  19    19 -0.91     2.66
##             se
## famrel    0.04
## studytime 0.03
## failures  0.02
## health    0.06
## Walc      0.05
## Dalc      0.04
## goout     0.05
## freetime  0.04
## romantic* 0.02
## G1        0.11
## G2        0.11
## G3        0.13

5. Transformasi Variabel Kategorik

data$romantic_num <- ifelse(data$romantic == "yes", 1, 0)

6. Mengecek Missing Value

colSums(is.na(data))

##       school          sex          age      address      famsize      Pstatus 
##            0            0            0            0            0            0 
##         Medu         Fedu         Mjob         Fjob       reason     guardian 
##            0            0            0            0            0            0 
##   traveltime    studytime     failures    schoolsup       famsup         paid 
##            0            0            0            0            0            0 
##   activities      nursery       higher     internet     romantic       famrel 
##            0            0            0            0            0            0 
##     freetime        goout         Dalc         Walc       health     absences 
##            0            0            0            0            0            0 
##           G1           G2           G3 romantic_num 
##            0            0            0            0

7. Menyiapkan Data untuk SEM

data_sem <- data[, c("famrel", "freetime", "studytime", "failures", 
                     "goout", "health", "Walc", "Dalc", 
                     "G1", "G2", "G3", "romantic_num")]

8. Standardisasi Data

data_sem_std <- as.data.frame(scale(data_sem))

9. Membuat Model SEM

model_sederhana <- '
  # Measurement Model
  Social =~ 1*famrel + freetime
  StudyHabit =~ 1*studytime + failures
  Achievement =~ 1*G1 + G2 + G3
  
  # Structural Model
  Achievement ~ Social + StudyHabit
  
  # Covariance
  Social ~~ StudyHabit
'

10. Estimasi Model SEM

fit_sederhana <- sem(model_sederhana, 
                     data = data_sem_std, 
                     std.lv = TRUE)

## Warning: lavaan->lav_object_post_check():  
##    some estimated ov variances are negative

11. Hasil SEM

summary(fit_sederhana, 
        fit.measures = TRUE, 
        standardized = TRUE)

## lavaan 0.6-21 ended normally after 22 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        14
## 
##   Number of observations                           649
## 
## Model Test User Model:
##                                                       
##   Test statistic                               153.124
##   Degrees of freedom                                14
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              2321.025
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.940
##   Tucker-Lewis Index (TLI)                       0.909
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -5358.784
##   Loglikelihood unrestricted model (H1)      -5282.222
##                                                       
##   Akaike (AIC)                               10745.569
##   Bayesian (BIC)                             10808.225
##   Sample-size adjusted Bayesian (SABIC)      10763.775
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.124
##   90 Percent confidence interval - lower         0.106
##   90 Percent confidence interval - upper         0.142
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.166
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Social =~                                                             
##     famrel            1.000                               1.000    1.002
##     freetime          0.129    0.039    3.261    0.001    0.129    0.129
##   StudyHabit =~                                                         
##     studytime         1.000                               1.000    0.994
##     failures         -0.150    0.040   -3.761    0.000   -0.150   -0.150
##   Achievement =~                                                        
##     G1                1.000                               1.031    0.910
##     G2                1.051    0.019   56.709    0.000    1.084    0.983
##     G3                1.007    0.020   49.374    0.000    1.039    0.949
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Achievement ~                                                         
##     Social            0.077    0.040    1.927    0.054    0.075    0.075
##     StudyHabit        0.240    0.042    5.664    0.000    0.232    0.232
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Social ~~                                                             
##     StudyHabit       -0.004    0.039   -0.102    0.919   -0.004   -0.004
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .famrel           -0.005    0.055   -0.082    0.934   -0.005   -0.005
##    .freetime          0.982    0.055   18.013    0.000    0.982    0.983
##    .studytime         0.011    0.056    0.201    0.840    0.011    0.011
##    .failures          0.976    0.054   18.004    0.000    0.976    0.977
##    .G1                0.219    0.014   15.247    0.000    0.219    0.171
##    .G2                0.042    0.009    4.798    0.000    0.042    0.035
##    .G3                0.120    0.010   11.701    0.000    0.120    0.100
##     Social            1.000                               1.000    1.000
##     StudyHabit        1.000                               1.000    1.000
##    .Achievement       1.000                               0.941    0.941

13. Membuat Model Path Analysis

model_path <- '
  G3 ~ G1 + G2 + studytime + failures + famrel + freetime
'

14. Estimasi Model Path

fit_path <- sem(model_path, data = data)

15. Hasil Path Analysis

summary(fit_path, 
        fit.measures = TRUE, 
        standardized = TRUE)

## lavaan 0.6-21 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         7
## 
##   Number of observations                           649
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                              1231.666
##   Degrees of freedom                                 6
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1065.630
##   Loglikelihood unrestricted model (H1)      -1065.630
##                                                       
##   Akaike (AIC)                                2145.261
##   Bayesian (BIC)                              2176.589
##   Sample-size adjusted Bayesian (SABIC)       2154.364
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   G3 ~                                                                  
##     G1                0.133    0.036    3.685    0.000    0.133    0.113
##     G2                0.888    0.034   26.110    0.000    0.888    0.801
##     studytime         0.082    0.061    1.326    0.185    0.082    0.021
##     failures         -0.199    0.091   -2.198    0.028   -0.199   -0.037
##     famrel           -0.046    0.052   -0.879    0.379   -0.046   -0.014
##     freetime         -0.060    0.048   -1.253    0.210   -0.060   -0.019
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .G3                1.562    0.087   18.014    0.000    1.562    0.150

16. Mengecek Nilai R-Squared dan Koefisien Jalur

parameterEstimates(fit_path)[parameterEstimates(fit_path)$op == "~", ]

##   lhs op       rhs    est    se      z pvalue ci.lower ci.upper
## 1  G3  ~        G1  0.133 0.036  3.685  0.000    0.062    0.204
## 2  G3  ~        G2  0.888 0.034 26.110  0.000    0.821    0.955
## 3  G3  ~ studytime  0.082 0.061  1.326  0.185   -0.039    0.202
## 4  G3  ~  failures -0.199 0.091 -2.198  0.028   -0.377   -0.022
## 5  G3  ~    famrel -0.046 0.052 -0.879  0.379   -0.148    0.056
## 6  G3  ~  freetime -0.060 0.048 -1.253  0.210   -0.153    0.034

Implementasi Structural Equation Modeling (SEM) pada Faktor-Faktor yang Memengaruhi Prestasi Akademik Siswa

Tazkia Caecaria Marchanda (24031554002), Alya Ummi Faricha(24031554123), Fajar Syihabuddin (24031554142)

2026-05-28

1. Import Library

2. Import Dataset

4. Statistik Deskriptif

5. Transformasi Variabel Kategorik

6. Mengecek Missing Value

7. Menyiapkan Data untuk SEM

8. Standardisasi Data

9. Membuat Model SEM

10. Estimasi Model SEM

11. Hasil SEM

13. Membuat Model Path Analysis

14. Estimasi Model Path

15. Hasil Path Analysis

16. Mengecek Nilai R-Squared dan Koefisien Jalur