#install.packages("tmvnsim", dependencies = TRUE)
#install.packages("mnormt", dependencies = TRUE)
#install.packages("lavaan", dependencies = TRUE) #lavaan package depends on tmvnsim and mnormt 
#install.packages("semPlot", dependencies = TRUE)
library(lavaan)
library(semPlot) #package to visulize SEM
df <- read.csv("~/Dropbox/Labs/SLM637/esports viewership.csv") #note that you need to specify the full file address here in R Markdown (regardless your working directory setup)
#str(df)
#no missing value issue involved. If yes, please refer back to the PCA file.

model_spec = "
  # measurement model
    SKI =~ SKI_1 + SKI_2 + SKI_3 + SKI_4 + SKI_5 + SKI_6
    EXC =~ EXC_1 + EXC_2 + EXC_3 + EXC_4
  # relationship model
    Hours_Watch ~ SKI + EXC
"
#no need to specify the relationship between SKI and EXC. It will be automatically included in the estimation.

model <- sem(model_spec, df)

summary(model, fit.measures=TRUE, standardized = TRUE) #standardized=T gives the standardized factor loadings in the measurement model and standardized regression coefficients. So you can compare the loading values/coefficient values across different items/variables: compare factor loadings with factor loadings; compare coefficients with coefficients.
## lavaan 0.6.15 ended normally after 32 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        24
## 
##   Number of observations                           633
## 
## Model Test User Model:
##                                                       
##   Test statistic                               120.681
##   Degrees of freedom                                42
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              5458.952
##   Degrees of freedom                                55
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.985
##   Tucker-Lewis Index (TLI)                       0.981
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -9161.967
##   Loglikelihood unrestricted model (H1)      -9101.626
##                                                       
##   Akaike (AIC)                               18371.933
##   Bayesian (BIC)                             18478.744
##   Sample-size adjusted Bayesian (SABIC)      18402.547
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.054
##   90 Percent confidence interval - lower         0.043
##   90 Percent confidence interval - upper         0.066
##   P-value H_0: RMSEA <= 0.050                    0.248
##   P-value H_0: RMSEA >= 0.080                    0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.036
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   SKI =~                                                                
##     SKI_1             1.000                               1.152    0.840
##     SKI_2             0.945    0.036   25.898    0.000    1.088    0.836
##     SKI_3             0.980    0.043   22.542    0.000    1.129    0.764
##     SKI_4             1.011    0.037   27.573    0.000    1.164    0.869
##     SKI_5             0.883    0.039   22.878    0.000    1.016    0.772
##     SKI_6             0.984    0.037   26.454    0.000    1.134    0.848
##   EXC =~                                                                
##     EXC_1             1.000                               1.042    0.854
##     EXC_2             1.110    0.033   33.746    0.000    1.157    0.934
##     EXC_3             1.174    0.034   34.886    0.000    1.223    0.950
##     EXC_4             1.188    0.039   30.759    0.000    1.239    0.891
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Hours_Watch ~                                                         
##     SKI              -0.076    0.048   -1.572    0.116   -0.087   -0.067
##     EXC               0.182    0.053    3.451    0.001    0.190    0.146
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   SKI ~~                                                                
##     EXC               0.349    0.054    6.496    0.000    0.291    0.291
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .SKI_1             0.552    0.038   14.518    0.000    0.552    0.294
##    .SKI_2             0.508    0.035   14.623    0.000    0.508    0.300
##    .SKI_3             0.909    0.057   15.898    0.000    0.909    0.416
##    .SKI_4             0.438    0.032   13.563    0.000    0.438    0.244
##    .SKI_5             0.702    0.044   15.802    0.000    0.702    0.405
##    .SKI_6             0.504    0.035   14.317    0.000    0.504    0.282
##    .EXC_1             0.404    0.026   15.607    0.000    0.404    0.271
##    .EXC_2             0.197    0.017   11.886    0.000    0.197    0.128
##    .EXC_3             0.162    0.016    9.995    0.000    0.162    0.098
##    .EXC_4             0.396    0.027   14.590    0.000    0.396    0.205
##    .Hours_Watch       1.647    0.093   17.767    0.000    1.647    0.980
##     SKI               1.326    0.103   12.843    0.000    1.000    1.000
##     EXC               1.087    0.082   13.315    0.000    1.000    1.000
semPaths(model) #Model graph without estimation

semPaths(model, "std", residuals = T) #Model graph with standardized coefficients and residuals