1. Import Library

library(lavaan)
## Warning: package 'lavaan' was built under R version 4.5.3
## This is lavaan 0.6-21
## lavaan is FREE software! Please report any bugs.
library(psych)
## Warning: package 'psych' was built under R version 4.5.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:lavaan':
## 
##     cor2cov

2. Import Dataset

data <- read.csv("student-por.csv", header = TRUE)
str(data)
## 'data.frame':    649 obs. of  33 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "no" "no" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ romantic  : chr  "no" "no" "no" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  4 2 6 0 0 6 0 2 0 0 ...
##  $ G1        : int  0 9 12 14 11 12 13 10 15 12 ...
##  $ G2        : int  11 11 13 14 13 12 12 13 16 12 ...
##  $ G3        : int  11 11 12 14 13 13 13 13 17 13 ...
head(data)
##   school sex age address famsize Pstatus Medu Fedu     Mjob     Fjob     reason
## 1     GP   F  18       U     GT3       A    4    4  at_home  teacher     course
## 2     GP   F  17       U     GT3       T    1    1  at_home    other     course
## 3     GP   F  15       U     LE3       T    1    1  at_home    other      other
## 4     GP   F  15       U     GT3       T    4    2   health services       home
## 5     GP   F  16       U     GT3       T    3    3    other    other       home
## 6     GP   M  16       U     LE3       T    4    3 services    other reputation
##   guardian traveltime studytime failures schoolsup famsup paid activities
## 1   mother          2         2        0       yes     no   no         no
## 2   father          1         2        0        no    yes   no         no
## 3   mother          1         2        0       yes     no   no         no
## 4   mother          1         3        0        no    yes   no        yes
## 5   father          1         2        0        no    yes   no         no
## 6   mother          1         2        0        no    yes   no        yes
##   nursery higher internet romantic famrel freetime goout Dalc Walc health
## 1     yes    yes       no       no      4        3     4    1    1      3
## 2      no    yes      yes       no      5        3     3    1    1      3
## 3     yes    yes      yes       no      4        3     2    2    3      3
## 4     yes    yes      yes      yes      3        2     2    1    1      5
## 5     yes    yes       no       no      4        3     2    1    2      5
## 6     yes    yes      yes       no      5        4     2    1    2      5
##   absences G1 G2 G3
## 1        4  0 11 11
## 2        2  9 11 11
## 3        6 12 13 12
## 4        0 14 14 14
## 5        0 11 13 13
## 6        6 12 12 13

4. Statistik Deskriptif

describe(data[, c("famrel", "studytime", "failures", "health", 
                  "Walc", "Dalc", "goout", "freetime", "romantic",
                  "G1", "G2", "G3")])
##           vars   n  mean   sd median trimmed  mad min max range  skew kurtosis
## famrel       1 649  3.93 0.96      4    4.05 1.48   1   5     4 -1.10     1.32
## studytime    2 649  1.93 0.83      2    1.85 1.48   1   4     3  0.70     0.02
## failures     3 649  0.22 0.59      0    0.07 0.00   0   3     3  3.08     9.70
## health       4 649  3.54 1.45      4    3.67 1.48   1   5     4 -0.50    -1.13
## Walc         5 649  2.28 1.28      2    2.14 1.48   1   5     4  0.63    -0.78
## Dalc         6 649  1.50 0.92      1    1.28 0.00   1   5     4  2.13     4.28
## goout        7 649  3.18 1.18      3    3.20 1.48   1   5     4 -0.01    -0.87
## freetime     8 649  3.18 1.05      3    3.19 1.48   1   5     4 -0.18    -0.41
## romantic*    9 649  1.37 0.48      1    1.34 0.00   1   2     1  0.55    -1.71
## G1          10 649 11.40 2.75     11   11.38 2.97   0  19    19  0.00     0.02
## G2          11 649 11.57 2.91     11   11.56 2.97   0  19    19 -0.36     1.63
## G3          12 649 11.91 3.23     12   12.04 2.97   0  19    19 -0.91     2.66
##             se
## famrel    0.04
## studytime 0.03
## failures  0.02
## health    0.06
## Walc      0.05
## Dalc      0.04
## goout     0.05
## freetime  0.04
## romantic* 0.02
## G1        0.11
## G2        0.11
## G3        0.13

5. Transformasi Variabel Kategorik

data$romantic_num <- ifelse(data$romantic == "yes", 1, 0)

6. Mengecek Missing Value

colSums(is.na(data))
##       school          sex          age      address      famsize      Pstatus 
##            0            0            0            0            0            0 
##         Medu         Fedu         Mjob         Fjob       reason     guardian 
##            0            0            0            0            0            0 
##   traveltime    studytime     failures    schoolsup       famsup         paid 
##            0            0            0            0            0            0 
##   activities      nursery       higher     internet     romantic       famrel 
##            0            0            0            0            0            0 
##     freetime        goout         Dalc         Walc       health     absences 
##            0            0            0            0            0            0 
##           G1           G2           G3 romantic_num 
##            0            0            0            0

7. Menyiapkan Data untuk SEM

data_sem <- data[, c("famrel", "freetime", "studytime", "failures", 
                     "goout", "health", "Walc", "Dalc", 
                     "G1", "G2", "G3", "romantic_num")]

8. Standardisasi Data

data_sem_std <- as.data.frame(scale(data_sem))

9. Membuat Model SEM

model_sederhana <- '
  # Measurement Model
  Social =~ 1*famrel + freetime
  StudyHabit =~ 1*studytime + failures
  Achievement =~ 1*G1 + G2 + G3
  
  # Structural Model
  Achievement ~ Social + StudyHabit
  
  # Covariance
  Social ~~ StudyHabit
'

10. Estimasi Model SEM

fit_sederhana <- sem(model_sederhana, 
                     data = data_sem_std, 
                     std.lv = TRUE)
## Warning: lavaan->lav_object_post_check():  
##    some estimated ov variances are negative

11. Hasil SEM

summary(fit_sederhana, 
        fit.measures = TRUE, 
        standardized = TRUE)
## lavaan 0.6-21 ended normally after 22 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        14
## 
##   Number of observations                           649
## 
## Model Test User Model:
##                                                       
##   Test statistic                               153.124
##   Degrees of freedom                                14
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              2321.025
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.940
##   Tucker-Lewis Index (TLI)                       0.909
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -5358.784
##   Loglikelihood unrestricted model (H1)      -5282.222
##                                                       
##   Akaike (AIC)                               10745.569
##   Bayesian (BIC)                             10808.225
##   Sample-size adjusted Bayesian (SABIC)      10763.775
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.124
##   90 Percent confidence interval - lower         0.106
##   90 Percent confidence interval - upper         0.142
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.166
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Social =~                                                             
##     famrel            1.000                               1.000    1.002
##     freetime          0.129    0.039    3.261    0.001    0.129    0.129
##   StudyHabit =~                                                         
##     studytime         1.000                               1.000    0.994
##     failures         -0.150    0.040   -3.761    0.000   -0.150   -0.150
##   Achievement =~                                                        
##     G1                1.000                               1.031    0.910
##     G2                1.051    0.019   56.709    0.000    1.084    0.983
##     G3                1.007    0.020   49.374    0.000    1.039    0.949
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Achievement ~                                                         
##     Social            0.077    0.040    1.927    0.054    0.075    0.075
##     StudyHabit        0.240    0.042    5.664    0.000    0.232    0.232
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Social ~~                                                             
##     StudyHabit       -0.004    0.039   -0.102    0.919   -0.004   -0.004
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .famrel           -0.005    0.055   -0.082    0.934   -0.005   -0.005
##    .freetime          0.982    0.055   18.013    0.000    0.982    0.983
##    .studytime         0.011    0.056    0.201    0.840    0.011    0.011
##    .failures          0.976    0.054   18.004    0.000    0.976    0.977
##    .G1                0.219    0.014   15.247    0.000    0.219    0.171
##    .G2                0.042    0.009    4.798    0.000    0.042    0.035
##    .G3                0.120    0.010   11.701    0.000    0.120    0.100
##     Social            1.000                               1.000    1.000
##     StudyHabit        1.000                               1.000    1.000
##    .Achievement       1.000                               0.941    0.941

13. Membuat Model Path Analysis

model_path <- '
  G3 ~ G1 + G2 + studytime + failures + famrel + freetime
'

14. Estimasi Model Path

fit_path <- sem(model_path, data = data)

15. Hasil Path Analysis

summary(fit_path, 
        fit.measures = TRUE, 
        standardized = TRUE)
## lavaan 0.6-21 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         7
## 
##   Number of observations                           649
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                              1231.666
##   Degrees of freedom                                 6
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1065.630
##   Loglikelihood unrestricted model (H1)      -1065.630
##                                                       
##   Akaike (AIC)                                2145.261
##   Bayesian (BIC)                              2176.589
##   Sample-size adjusted Bayesian (SABIC)       2154.364
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   G3 ~                                                                  
##     G1                0.133    0.036    3.685    0.000    0.133    0.113
##     G2                0.888    0.034   26.110    0.000    0.888    0.801
##     studytime         0.082    0.061    1.326    0.185    0.082    0.021
##     failures         -0.199    0.091   -2.198    0.028   -0.199   -0.037
##     famrel           -0.046    0.052   -0.879    0.379   -0.046   -0.014
##     freetime         -0.060    0.048   -1.253    0.210   -0.060   -0.019
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .G3                1.562    0.087   18.014    0.000    1.562    0.150

16. Mengecek Nilai R-Squared dan Koefisien Jalur

parameterEstimates(fit_path)[parameterEstimates(fit_path)$op == "~", ]
##   lhs op       rhs    est    se      z pvalue ci.lower ci.upper
## 1  G3  ~        G1  0.133 0.036  3.685  0.000    0.062    0.204
## 2  G3  ~        G2  0.888 0.034 26.110  0.000    0.821    0.955
## 3  G3  ~ studytime  0.082 0.061  1.326  0.185   -0.039    0.202
## 4  G3  ~  failures -0.199 0.091 -2.198  0.028   -0.377   -0.022
## 5  G3  ~    famrel -0.046 0.052 -0.879  0.379   -0.148    0.056
## 6  G3  ~  freetime -0.060 0.048 -1.253  0.210   -0.153    0.034