1 LOAD PACKAGES & SET WORKING DIRECTORY

## ✔ Set working directory to "G:/我的云端硬盘/R/meta/NonLinearMata"
## ✔ Set working directory to "G:/我的云端硬盘/R/meta/NonLinearMata"

2 INFORMATION FORM PUBLISHED STUDY

2.1 Primary information

2.1.1 Mean and standard deviation

Describe(published)#, file ="A2.Describe.doc")
## Descriptive Statistics:
## ──────────────────────────────────────────────────────────────
##        N (NA)  Mean   SD | Median   Min  Max Skewness Kurtosis
## ──────────────────────────────────────────────────────────────
## X   7181   38 -0.00 1.00 |  -0.54 -3.06 1.98    -0.27     0.25
## W   7155   64 -0.00 1.00 |  -0.09 -2.12 1.93    -0.06    -0.67
## Y   7191   28  0.00 1.00 |   0.41 -3.02 1.40    -0.86     0.36
## C1  7176   43  0.00 1.00 |   0.53 -2.23 1.45    -0.45    -0.40
## C2  7129   90  0.00 1.00 |  -0.09 -3.18 1.46    -0.48    -0.34
## ──────────────────────────────────────────────────────────────
#Cr=data[,.(X,W,Y,C1,C2)]
#names(Cr)

2.1.2 Correlation matrix

Corr(published)
## Pearson's r and 95% confidence intervals:
## ───────────────────────────────────────
##           r     [95% CI]     p        N
## ───────────────────────────────────────
## X-W    0.17 [0.14, 0.19] <.001 *** 7122
## X-Y    0.49 [0.47, 0.50] <.001 *** 7159
## X-C1   0.34 [0.32, 0.36] <.001 *** 7146
## X-C2   0.17 [0.15, 0.19] <.001 *** 7096
## W-Y    0.23 [0.21, 0.26] <.001 *** 7134
## W-C1   0.30 [0.28, 0.32] <.001 *** 7121
## W-C2   0.06 [0.04, 0.09] <.001 *** 7098
## Y-C1   0.45 [0.43, 0.47] <.001 *** 7154
## Y-C2   0.23 [0.21, 0.26] <.001 *** 7108
## C1-C2  0.15 [0.13, 0.18] <.001 *** 7094
## ───────────────────────────────────────

## Correlation matrix is displayed in the RStudio `Plots` Pane.

2.2 Model information:

  • Regression coefficients

  • R-squared: R^2

  • Sample sizes: Num. obs.

Published<- lm(Y ~X*W+C1+C2, na.action = na.exclude,data=published)

2.3 Ture model

True<- lm(Y ~X*W, na.action = na.exclude,data=published)
model_summary(list(Published,True))
## 
## Model Summary
## 
## ───────────────────────────────────────
##              (1) Y         (2) Y       
## ───────────────────────────────────────
## (Intercept)     0.016         0.015    
##                (0.010)       (0.010)   
## X               0.358 ***     0.462 ***
##                (0.010)       (0.010)   
## W               0.078 ***     0.153 ***
##                (0.010)       (0.010)   
## C1              0.281 ***              
##                (0.011)                 
## C2              0.124 ***              
##                (0.010)                 
## X:W            -0.059 ***    -0.069 ***
##                (0.009)       (0.010)   
## ───────────────────────────────────────
## R^2             0.352         0.265    
## Adj. R^2        0.352         0.265    
## Num. obs.    7019          7103        
## ───────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.
interact_plot(True, pred = X, modx = W,#Basic setup
              modx.values = "plus-minus")

3 DATA REGENERATION APPROACH FOR MODEL RECOVERY

3.1 Data regeneration

library(MASS) # 用于生成相关性数据

# 设置种子以确保结果可重复
set.seed(123)

# 定义变量数和样本大小
n <- 7019

# 均值和标准差
means <- rep(0, 5) # 所有变量的均值为0
sds <- rep(1, 5) # 所有变量的标准差为1

# 定义相关矩阵
cor_matrix <- matrix(c(1, 0.17, 0.49, 0.34, 0.17,
                       0.17, 1, 0.23, 0.30, 0.06,
                       0.49, 0.23, 1, 0.45, 0.23,
                       0.34, 0.30, 0.45, 1, 0.15,
                       0.17, 0.06, 0.23, 0.15, 1), nrow = 5)

# 生成相关性数据
data <- mvrnorm(n, mu = means, Sigma = cor_matrix * (sds %*% t(sds)))

# 添加列名
colnames(data) <- c("X", "W", "C1", "C2", "Y")
#data$Y=NULL
# 计算Y,考虑到回归系数和交互项
data[, "Y"] <- with(as.data.frame(data), 0.016 + 0.358*X + 0.078*W + 0.281*C1 + 0.124*C2 - 0.059*(X*W))

# 添加符合指定R^2值的随机误差项
error_variance <- var(data[, "Y"]) * 0.648 # 根据R^2计算Y的误差方差
data[, "Y"] <- data[, "Y"] + rnorm(n, mean = 0, sd = sqrt(error_variance))

# 查看结果
data=as.data.table(data)

3.2 Modeling based on new data

SiumlatedClean<- lm(Y ~X*W, na.action = na.exclude,data=data)
model_summary(list(SiumlatedClean))
## 
## Model Summary
## 
## ─────────────────────────
##              (1) Y       
## ─────────────────────────
## (Intercept)     0.011    
##                (0.007)   
## X               0.526 ***
##                (0.007)   
## W               0.151 ***
##                (0.007)   
## X:W            -0.055 ***
##                (0.007)   
## ─────────────────────────
## R^2             0.475    
## Adj. R^2        0.475    
## Num. obs.    7019        
## ─────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.
## 
## # Check for Multicollinearity
## 
## Low Correlation
## 
##  Term  VIF       VIF 95% CI Increased SE Tolerance Tolerance 95% CI
##     X 1.02 [1.01,     1.07]         1.01      0.98     [0.94, 0.99]
##     W 1.02 [1.01,     1.07]         1.01      0.98     [0.94, 0.99]
##   X:W 1.00 [1.00,      Inf]         1.00      1.00     [0.00, 1.00]
interact_plot(True, pred = X, modx = W,#Basic setup
              modx.values = "plus-minus")

4 RESULTS COMPARISON

4.1 Regression

???set “SE” in data generation. ???R2 is NOT right

model_summary(list(True,SiumlatedClean))
## 
## Model Summary
## 
## ───────────────────────────────────────
##              (1) Y         (2) Y       
## ───────────────────────────────────────
## (Intercept)     0.015         0.011    
##                (0.010)       (0.007)   
## X               0.462 ***     0.526 ***
##                (0.010)       (0.007)   
## W               0.153 ***     0.151 ***
##                (0.010)       (0.007)   
## X:W            -0.069 ***    -0.055 ***
##                (0.010)       (0.007)   
## ───────────────────────────────────────
## R^2             0.265         0.475    
## Adj. R^2        0.265         0.475    
## Num. obs.    7103          7019        
## ───────────────────────────────────────
## Note. * p < .05, ** p < .01, *** p < .001.

4.2 Descriptive results

???set “Median Min Max Skewness Kurtosis” in data generation.

published$XW=published$X*published$W
#published=setorder(published, X, W, Y, C1, C2, XW)
data$XW=data$X*data$W
#data=setorder(data, X, W, Y, C1, C2, XW)
data <- data[, c("X", "W", "Y", "C1", "C2", "XW")]
Describe(published)
## Descriptive Statistics:
## ──────────────────────────────────────────────────────────────
##        N (NA)  Mean   SD | Median   Min  Max Skewness Kurtosis
## ──────────────────────────────────────────────────────────────
## X   7181   38 -0.00 1.00 |  -0.54 -3.06 1.98    -0.27     0.25
## W   7155   64 -0.00 1.00 |  -0.09 -2.12 1.93    -0.06    -0.67
## Y   7191   28  0.00 1.00 |   0.41 -3.02 1.40    -0.86     0.36
## C1  7176   43  0.00 1.00 |   0.53 -2.23 1.45    -0.45    -0.40
## C2  7129   90  0.00 1.00 |  -0.09 -3.18 1.46    -0.48    -0.34
## XW  7122   97  0.16 1.01 |   0.17 -5.92 6.48     0.43     5.10
## ──────────────────────────────────────────────────────────────
Describe(data)
## Descriptive Statistics:
## ─────────────────────────────────────────────────────────
##        N  Mean   SD | Median   Min  Max Skewness Kurtosis
## ─────────────────────────────────────────────────────────
## X   7019  0.01 1.00 |   0.00 -3.96 4.39     0.02    -0.00
## W   7019 -0.01 1.00 |  -0.02 -3.50 3.78     0.05    -0.03
## Y   7019  0.00 0.83 |   0.02 -2.92 2.99    -0.11    -0.08
## C1  7019  0.00 1.00 |   0.01 -3.96 3.83    -0.03     0.03
## C2  7019  0.01 1.00 |   0.03 -3.54 3.46    -0.04     0.04
## XW  7019  0.15 1.00 |   0.03 -4.17 8.65     1.07     5.97
## ─────────────────────────────────────────────────────────

4.3 Correlatioin

??? XW only same to the Y ??? Y is not the same in the original data

Corr(published)
## Pearson's r and 95% confidence intervals:
## ──────────────────────────────────────────
##            r       [95% CI]     p        N
## ──────────────────────────────────────────
## X-W     0.17 [ 0.14,  0.19] <.001 *** 7122
## X-Y     0.49 [ 0.47,  0.50] <.001 *** 7159
## X-C1    0.34 [ 0.32,  0.36] <.001 *** 7146
## X-C2    0.17 [ 0.15,  0.19] <.001 *** 7096
## X-XW    0.04 [ 0.02,  0.07] <.001 *** 7122
## W-Y     0.23 [ 0.21,  0.26] <.001 *** 7134
## W-C1    0.30 [ 0.28,  0.32] <.001 *** 7121
## W-C2    0.06 [ 0.04,  0.09] <.001 *** 7098
## W-XW   -0.04 [-0.06, -0.02] <.001 *** 7122
## Y-C1    0.45 [ 0.43,  0.47] <.001 *** 7154
## Y-C2    0.23 [ 0.21,  0.26] <.001 *** 7108
## Y-XW   -0.06 [-0.08, -0.03] <.001 *** 7103
## C1-C2   0.15 [ 0.13,  0.18] <.001 *** 7094
## C1-XW  -0.03 [-0.06, -0.01]  .006 **  7092
## C2-XW   0.01 [-0.01,  0.03]  .412     7065
## ──────────────────────────────────────────

## Correlation matrix is displayed in the RStudio `Plots` Pane.
Corr(data)
## Pearson's r and 95% confidence intervals:
## ──────────────────────────────────────────
##            r       [95% CI]     p        N
## ──────────────────────────────────────────
## X-W     0.15 [ 0.12,  0.17] <.001 *** 7019
## X-Y     0.66 [ 0.65,  0.67] <.001 *** 7019
## X-C1    0.49 [ 0.47,  0.50] <.001 *** 7019
## X-C2    0.33 [ 0.31,  0.35] <.001 *** 7019
## X-XW   -0.01 [-0.03,  0.02]  .651     7019
## W-Y     0.28 [ 0.25,  0.30] <.001 *** 7019
## W-C1    0.23 [ 0.20,  0.25] <.001 *** 7019
## W-C2    0.31 [ 0.29,  0.33] <.001 *** 7019
## W-XW    0.01 [-0.02,  0.03]  .646     7019
## Y-C1    0.64 [ 0.63,  0.66] <.001 *** 7019
## Y-C2    0.48 [ 0.46,  0.49] <.001 *** 7019
## Y-XW   -0.07 [-0.09, -0.05] <.001 *** 7019
## C1-C2   0.45 [ 0.43,  0.46] <.001 *** 7019
## C1-XW   0.00 [-0.02,  0.02]  .902     7019
## C2-XW  -0.02 [-0.05,  0.00]  .051 .   7019
## ──────────────────────────────────────────

## Correlation matrix is displayed in the RStudio `Plots` Pane.