1 FUNCTIONS

2 DATA PREPARATION

2.1 Load all sheets of an Excel

Sheets <- rio::import_list("https://drive.google.com/uc?id=1eWmXuL3uXAHMZesaCuiSQcuMD8djEKNc&export=download")
#Sheets <-rio::import_list("C:\\Users\\S\\Documents\\WXWork\\1688851821252719\\Cache\\File\\2024-09\\Matrix-overall.xlsx")
#as.data.frame(Sheets$Template)

# 使用 lapply 将 Sheets 中的每个数据框转换为 data.table,并替换缺失值
Sheets <- lapply(Sheets, function(df) {
  # 替换所有缺失值为 -999
  df[is.na(df)] <- -999
  # 返回修改后的 df
  return(df)
})

2.2 Convert all data in wide-format within one sheet

result_list <- lapply(Sheets, function(sheet) Matrix.toDT(sheet, title = ""))
#result_list <- result_list[!names(result_list) %in% "Sources"]
#result_list$Sources[[1]]
df <- bind_rows(lapply(result_list, function(x) x[[1]]))%>%
  setDT%>%
  .[, MatrixName := names(result_list)]%>%
  .[MatrixName != "Sources", ]%>%
  .[MatrixName != "Template" , ]  #%>%  .[, lapply(.SD, as.numeric), .SDcols = setdiff(names(df), "MatrixName")]

# 将除了 "MatrixName" 之外的所有列转换为数值类型
cols_to_convert <- setdiff(names(df), "MatrixName")
df=df[, (cols_to_convert) := lapply(.SD, as.numeric), .SDcols = cols_to_convert]

#MatrixName=df$MatrixName  

#df[] <- lapSources#df[] <- lapply(df, as.numeric)
#df=df[, names(df) := lapply(.SD, as.numeric)]

#names(df)
#unique(df$MatrixName)
#print_table(df)
#View(df)
# 查看合并后的数据框
#export(combined_df,"JPJSrow.xlsx")
#data <-read_sheet("https://docs.google.com/spreadsheets/d/1Q8OUSBQ9a1R0jJ14PtEH3w74aTmwyru3u3uIOst3DC4/edit?usp=sharing")
#View(data)

2.3 Convert all data in matrix format

cor_matrices <- Matrix.fromDT(as.data.frame(df),"A-ES", "Y-X", "MatrixName")
## 正在处理行:  r 
## 正在处理行:  C-r 
## 正在处理行:  SD-r 
## 正在处理行:  SD-cr 
## 正在处理行:  Sample size 
## 正在处理行:  K Size 
## 正在处理行:  95%CI-L 
## 正在处理行:  95%CI-U
P <- cor_matrices$r
Nnum=Matrix.Stats(Sheets$`Sample size`)
## Statistics of matrix
## ────────────────────────────
##    harmonic_mean         sum
## ────────────────────────────
## 1      33785.447 1101817.000
## ────────────────────────────
## Descriptive Statistics:
## ───────────────────────────────────────────────────────────────────────
##     N     Mean       SD |   Median      Min       Max Skewness Kurtosis
## ───────────────────────────────────────────────────────────────────────
##    21 52467.48 30517.75 | 54471.00 11856.00 106149.00     0.20    -1.42
## ───────────────────────────────────────────────────────────────────────
Knum=Matrix.Stats(Sheets$`K Size`)
## Statistics of matrix
## ─────────────────────────
##    harmonic_mean      sum
## ─────────────────────────
## 1         94.418 2496.000
## ─────────────────────────
## Descriptive Statistics:
## ──────────────────────────────────────────────────────────
##     N   Mean    SD | Median   Min    Max Skewness Kurtosis
## ──────────────────────────────────────────────────────────
##    21 118.86 59.32 | 114.00 38.00 312.00     1.40     2.84
## ──────────────────────────────────────────────────────────
Nhar=round(Nnum$sum/Knum$sum,0)#num$harmonic_mean

3 OVERALL RESULTS

IV.oceanS0  <- '
  # X 由 ES、A、C、EX 和 O 预测
  X ~  O + C + EX + A + ES

  # Y 由 X 预测
  Y ~ X
  
  # Residual correlation
  Y ~~ X
'

# 1.1 model specification 
IV.S <- '
  X ON O C EX A ES;
  Y ON X;
  Y WITH X;
'
IV <- mplus2lavaan.modelSyntax(IV.S)
IVb <- mplus2lavaan.modelSyntax(IV.S)
cat(IV)
## X ~ O + C + EX + A + ES
## Y ~ X
## Y ~~ X

3.1 Template: IVs are OCEAN

# 1. Model Specification
IV <- mplus2lavaan.modelSyntax(IV.S)

# 1.2 model fit
IV.F <- sem(IV,sample.cov = P,sample.nobs = Nhar)
lavaan_summary(IV.F)
## 
## Fit Measures (lavaan):
## χ²(4, N = 441) = 8.531, p = 0.074 .  
## χ²/df = 2.133
## AIC = 2451.765 (Akaike Information Criterion)
## BIC = 2488.566 (Bayesian Information Criterion)
## CFI = 0.930 (Comparative Fit Index)
## TLI = 0.808 (Tucker-Lewis Index; Non-Normed Fit Index, NNFI)
## NFI = 0.887 (Normed Fit Index)
## IFI = 0.937 (Incremental Fit Index)
## GFI = 0.981 (Goodness-of-Fit Index)
## AGFI = 0.869 (Adjusted Goodness-of-Fit Index)
## RMSEA = 0.051, 90% CI [0.000, 0.098] (Root Mean Square Error of Approximation)
## SRMR = 0.021 (Standardized Root Mean Square Residual)
## 
## Model Estimates (lavaan):
## ────────────────────────────────────────────────────────────────────────
##                    Estimate    S.E.      z     p       LLCI  ULCI   Beta
## ────────────────────────────────────────────────────────────────────────
## Regression Paths:                                                       
##   X <- O             -0.047 (0.044) -1.064  .287     -0.133 0.039 -0.047
##   X <- C              0.032 (0.044)  0.725  .468     -0.055 0.119  0.032
##   X <- EX             0.109 (0.046)  2.393  .017 *    0.020 0.198  0.109
##   X <- A              0.150 (0.045)  3.362 <.001 ***  0.063 0.238  0.150
##   X <- ES             0.176 (0.046)  3.859 <.001 ***  0.087 0.266  0.176
##   Y <- X              0.595 (0.164)  3.620 <.001 ***  0.273 0.917  0.595
## ────────────────────────────────────────────────────────────────────────
## Note. Raw (Standard) Confidence Interval (CI) and SE.
# 2.1. Wald
variables <- c("ES", "A", "C", "EX", "O")
par.names <- paste0('X~',variables)
Wald.test(fit=IV.F,par.names,method='UIMASEM')%>%setDT()%>%print_table()
## ─────────────────────────────
##    Wald.Statistic Wald.pValue
## ─────────────────────────────
## 1          46.636       0.000
## ─────────────────────────────
# 2.3. R2
R2xzw.MASEM(P = P,method = 'UIMASEM',y.nm='Y',X.nm='X',Z.nm=variables)
## $R2x.z
##         [,1]
## [1,] 0.09853
# 3. X->Y
lavaanPlot(model = IV.F, coefs = T, sig = 0.05)

3.2 IVs are OCEAN

# 1. Model Specification
IV.ocean <- IVb
cat(IV.ocean)
## X ~ O + C + EX + A + ES
## Y ~ X
## Y ~~ X
# 1.2 model fit
IV.oceanF <- sem(IV.ocean,sample.cov = P,sample.nobs = Nhar)
lavaan_summary(IV.oceanF)
## 
## Fit Measures (lavaan):
## χ²(4, N = 441) = 8.531, p = 0.074 .  
## χ²/df = 2.133
## AIC = 2451.765 (Akaike Information Criterion)
## BIC = 2488.566 (Bayesian Information Criterion)
## CFI = 0.930 (Comparative Fit Index)
## TLI = 0.808 (Tucker-Lewis Index; Non-Normed Fit Index, NNFI)
## NFI = 0.887 (Normed Fit Index)
## IFI = 0.937 (Incremental Fit Index)
## GFI = 0.981 (Goodness-of-Fit Index)
## AGFI = 0.869 (Adjusted Goodness-of-Fit Index)
## RMSEA = 0.051, 90% CI [0.000, 0.098] (Root Mean Square Error of Approximation)
## SRMR = 0.021 (Standardized Root Mean Square Residual)
## 
## Model Estimates (lavaan):
## ────────────────────────────────────────────────────────────────────────
##                    Estimate    S.E.      z     p       LLCI  ULCI   Beta
## ────────────────────────────────────────────────────────────────────────
## Regression Paths:                                                       
##   X <- O             -0.047 (0.044) -1.064  .287     -0.133 0.039 -0.047
##   X <- C              0.032 (0.044)  0.725  .468     -0.055 0.119  0.032
##   X <- EX             0.109 (0.046)  2.393  .017 *    0.020 0.198  0.109
##   X <- A              0.150 (0.045)  3.362 <.001 ***  0.063 0.238  0.150
##   X <- ES             0.176 (0.046)  3.859 <.001 ***  0.087 0.266  0.176
##   Y <- X              0.595 (0.164)  3.620 <.001 ***  0.273 0.917  0.595
## ────────────────────────────────────────────────────────────────────────
## Note. Raw (Standard) Confidence Interval (CI) and SE.
# 2.1. Wald
variables <- c("ES", "A", "C", "EX", "O")
par.names <- paste0('X~',variables)
Wald.test(fit=IV.oceanF,par.names,method='UIMASEM')%>%setDT()%>%print_table()
## ─────────────────────────────
##    Wald.Statistic Wald.pValue
## ─────────────────────────────
## 1          46.636       0.000
## ─────────────────────────────
# 2.3. R2
R2xzw.MASEM(P = P,method = 'UIMASEM',y.nm='Y',X.nm='X',Z.nm=variables)
## $R2x.z
##         [,1]
## [1,] 0.09853
# 3. X->Y
lavaanPlot(model = IV.oceanF, coefs = T, sig = 0.05)

3.3 IVs are OCEAN with direct effects

# 1. Model Specification
#IV <- gsub("X ON O C EX A ES;", "X ON EX A ES;", IV.S)
IV <- gsub("Y ON X;", "Y ON X EX A ES;", IV.S)
IV <- mplus2lavaan.modelSyntax(IV)

# 1.2 model fit
IV.F <- sem(IV,sample.cov = P,sample.nobs = Nhar)
lavaan_summary(IV.F)
## 
## Fit Measures (lavaan):
## χ²(1, N = 441) = 0.007, p = 0.933    
## χ²/df = 0.007
## AIC = 2449.241 (Akaike Information Criterion)
## BIC = 2498.310 (Bayesian Information Criterion)
## CFI = 1.000 (Comparative Fit Index)
## TLI = 1.169 (Tucker-Lewis Index; Non-Normed Fit Index, NNFI)
## NFI = 1.000 (Normed Fit Index)
## IFI = 1.013 (Incremental Fit Index)
## GFI = 1.000 (Goodness-of-Fit Index)
## AGFI = 1.000 (Adjusted Goodness-of-Fit Index)
## RMSEA = 0.000, 90% CI [0.000, 0.036] (Root Mean Square Error of Approximation)
## SRMR = 0.001 (Standardized Root Mean Square Residual)
## 
## Model Estimates (lavaan):
## ────────────────────────────────────────────────────────────────────────
##                    Estimate    S.E.      z     p       LLCI  ULCI   Beta
## ────────────────────────────────────────────────────────────────────────
## Regression Paths:                                                       
##   X <- O             -0.070 (0.046) -1.502  .133     -0.161 0.021 -0.070
##   X <- C              0.053 (0.045)  1.171  .242     -0.036 0.142  0.053
##   X <- EX             0.130 (0.049)  2.650  .008 **   0.034 0.226  0.130
##   X <- A              0.172 (0.048)  3.605 <.001 ***  0.079 0.266  0.172
##   X <- ES             0.131 (0.048)  2.725  .006 **   0.037 0.226  0.131
##   Y <- X             -0.374 (0.661) -0.566  .571     -1.669 0.920 -0.374
##   Y <- EX             0.067 (0.095)  0.703  .482     -0.119 0.252  0.067
##   Y <- A              0.105 (0.130)  0.814  .416     -0.148 0.359  0.105
##   Y <- ES             0.248 (0.105)  2.361  .018 *    0.042 0.454  0.248
## ────────────────────────────────────────────────────────────────────────
## Note. Raw (Standard) Confidence Interval (CI) and SE.
# 2.1. Wald
variables <- c("ES", "A", "EX")
par.names <- paste0('X~',variables)
Wald.test(fit=IV.F,par.names,method='UIMASEM')%>%setDT()%>%print_table()
## ─────────────────────────────
##    Wald.Statistic Wald.pValue
## ─────────────────────────────
## 1          40.656       0.000
## ─────────────────────────────
# 2.3. R2
R2xzw.MASEM(P = P,method = 'UIMASEM',y.nm='Y',X.nm='X',Z.nm=variables)
## $R2x.z
##         [,1]
## [1,] 0.09242
# 3. X->Y
lavaanPlot(model = IV.F, coefs = T, sig = 0.05)
as.data.table(list(ChiSq_Diff = 8.531-.007,
             DF_Diff = 4-1))%>%
  .[,ChiSq_p := pchisq(ChiSq_Diff, df = DF_Diff, lower.tail = FALSE)]%>%
  print_table()
## ─────────────────────────────
##    ChiSq_Diff DF_Diff ChiSq_p
## ─────────────────────────────
## 1       8.524   3.000   0.036
## ─────────────────────────────

3.4 IVs are EAN

# 1. Model Specification
IV <- gsub("X ON O C EX A ES;", "X ON EX A ES;", IV.S)
IV <- gsub("Y ON X;", "Y ON X O C;", IV)
IV <- mplus2lavaan.modelSyntax(IV)

# 1.2 model fit
IV.F <- sem(IV,sample.cov = P,sample.nobs = Nhar)
lavaan_summary(IV.F)
## 
## Fit Measures (lavaan):
## χ²(4, N = 441) = 9.464, p = 0.051 .  
## χ²/df = 2.366
## AIC = 2452.698 (Akaike Information Criterion)
## BIC = 2489.499 (Bayesian Information Criterion)
## CFI = 0.916 (Comparative Fit Index)
## TLI = 0.768 (Tucker-Lewis Index; Non-Normed Fit Index, NNFI)
## NFI = 0.875 (Normed Fit Index)
## IFI = 0.924 (Incremental Fit Index)
## GFI = 0.979 (Goodness-of-Fit Index)
## AGFI = 0.855 (Adjusted Goodness-of-Fit Index)
## RMSEA = 0.056, 90% CI [0.000, 0.103] (Root Mean Square Error of Approximation)
## SRMR = 0.023 (Standardized Root Mean Square Residual)
## 
## Model Estimates (lavaan):
## ────────────────────────────────────────────────────────────────────────
##                    Estimate    S.E.      z     p       LLCI  ULCI   Beta
## ────────────────────────────────────────────────────────────────────────
## Regression Paths:                                                       
##   X <- EX             0.095 (0.043)  2.196  .028 *    0.010 0.179  0.095
##   X <- A              0.149 (0.044)  3.375 <.001 ***  0.062 0.236  0.149
##   X <- ES             0.180 (0.044)  4.101 <.001 ***  0.094 0.266  0.180
##   Y <- X              0.646 (0.183)  3.537 <.001 ***  0.288 1.004  0.646
##   Y <- O              0.029 (0.048)  0.598  .550     -0.066 0.123  0.029
##   Y <- C             -0.022 (0.049) -0.439  .661     -0.118 0.075 -0.022
## ────────────────────────────────────────────────────────────────────────
## Note. Raw (Standard) Confidence Interval (CI) and SE.
# 2.1. Wald
variables <- c("ES", "A", "EX")
par.names <- paste0('X~',variables)
Wald.test(fit=IV.F,par.names,method='UIMASEM')%>%setDT()%>%print_table()
## ─────────────────────────────
##    Wald.Statistic Wald.pValue
## ─────────────────────────────
## 1          43.500       0.000
## ─────────────────────────────
# 2.3. R2
R2xzw.MASEM(P = P,method = 'UIMASEM',y.nm='Y',X.nm='X',Z.nm=variables)
## $R2x.z
##         [,1]
## [1,] 0.09242
# 3. X->Y
lavaanPlot(model = IV.F, coefs = T, sig = 0.05)

4 OBJECTIVE SOURCES RESULTS

4.1 Data preparation

Coding of X ->Y: https://docs.google.com/spreadsheets/d/1_shaky4FeekfqvwQpChzKuBw6JDUaxcD/edit?usp=sharing&ouid=115346063709313800668&rtpof=true&sd=true

Change the data to CaseID=13

P <- cor_matrices$r
P["X", "Y"] <- 0.16
P["Y", "X"] <- 0.16

NP=cor_matrices$`Sample size`
NP["X", "Y"] <- 5216
NP["Y", "X"] <- 5216

KP=cor_matrices$`K Size`
KP["X", "Y"] <- 34
KP["Y", "X"] <- 34

Nnum=Matrix.Stats(NP)
## Statistics of matrix
## ────────────────────────────
##    harmonic_mean         sum
## ────────────────────────────
## 1      26417.461 1052562.000
## ────────────────────────────
## Descriptive Statistics:
## ──────────────────────────────────────────────────────────────────────
##     N     Mean       SD |   Median     Min       Max Skewness Kurtosis
## ──────────────────────────────────────────────────────────────────────
##    21 50122.00 32202.35 | 41939.00 5216.00 106149.00     0.23    -1.47
## ──────────────────────────────────────────────────────────────────────
Knum=Matrix.Stats(KP)
## Statistics of matrix
## ─────────────────────────
##    harmonic_mean      sum
## ─────────────────────────
## 1         84.465 2218.000
## ─────────────────────────
## Descriptive Statistics:
## ──────────────────────────────────────────────────────────
##     N   Mean    SD | Median   Min    Max Skewness Kurtosis
## ──────────────────────────────────────────────────────────
##    21 105.62 42.78 | 114.00 34.00 182.00    -0.11    -1.16
## ──────────────────────────────────────────────────────────
Nhar=round(Nnum$sum/Knum$sum,0)#num$harmonic_mean

4.2 IVs are OCEAN

# 1. Model Specification
IV <- mplus2lavaan.modelSyntax(IV.S)

# 1.2 model fit
IV.F <- sem(IV,sample.cov = P,sample.nobs = Nhar)
lavaan_summary(IV.F)
## 
## Fit Measures (lavaan):
## χ²(4, N = 475) = 8.998, p = 0.061 .  
## χ²/df = 2.250
## AIC = 2642.051 (Akaike Information Criterion)
## BIC = 2679.521 (Bayesian Information Criterion)
## CFI = 0.926 (Comparative Fit Index)
## TLI = 0.798 (Tucker-Lewis Index; Non-Normed Fit Index, NNFI)
## NFI = 0.886 (Normed Fit Index)
## IFI = 0.933 (Incremental Fit Index)
## GFI = 0.982 (Goodness-of-Fit Index)
## AGFI = 0.872 (Adjusted Goodness-of-Fit Index)
## RMSEA = 0.051, 90% CI [0.000, 0.097] (Root Mean Square Error of Approximation)
## SRMR = 0.021 (Standardized Root Mean Square Residual)
## 
## Model Estimates (lavaan):
## ────────────────────────────────────────────────────────────────────────
##                    Estimate    S.E.      z     p       LLCI  ULCI   Beta
## ────────────────────────────────────────────────────────────────────────
## Regression Paths:                                                       
##   X <- O             -0.046 (0.042) -1.094  .274     -0.128 0.036 -0.046
##   X <- C              0.031 (0.042)  0.744  .457     -0.051 0.114  0.031
##   X <- EX             0.108 (0.044)  2.484  .013 *    0.023 0.194  0.108
##   X <- A              0.149 (0.043)  3.492 <.001 ***  0.065 0.233  0.149
##   X <- ES             0.177 (0.044)  4.055 <.001 ***  0.092 0.263  0.177
##   Y <- X              0.598 (0.160)  3.727 <.001 ***  0.283 0.912  0.598
## ────────────────────────────────────────────────────────────────────────
## Note. Raw (Standard) Confidence Interval (CI) and SE.
# 2.1. Wald
variables <- c("ES", "A", "C", "EX", "O")
par.names <- paste0('X~',variables)
Wald.test(fit=IV.F,par.names,method='UIMASEM')%>%setDT()%>%print_table()
## ─────────────────────────────
##    Wald.Statistic Wald.pValue
## ─────────────────────────────
## 1          50.123       0.000
## ─────────────────────────────
# 2.3. R2
R2xzw.MASEM(P = P,method = 'UIMASEM',y.nm='Y',X.nm='X',Z.nm=variables)
## $R2x.z
##         [,1]
## [1,] 0.09853
# 3. X->Y
lavaanPlot(model = IV.F, coefs = T, sig = 0.05)