Making Data
#創建數據,25個IV(T1 to T25),1個X,1個Y
library(data.table)
set.seed(123)
values <- c(0, 1, 2)
probabilities <- c(0.49, 0.42, 0.09)
# 生成 T1, T2, T3 各 165 個值
T1 <- sample(values, 165, replace = TRUE, prob = probabilities)
T2 <- sample(values, 165, replace = TRUE, prob = probabilities)
T3 <- sample(values, 165, replace = TRUE, prob = probabilities)
T4 <- sample(values, 165, replace = TRUE, prob = probabilities)
T5 <- sample(values, 165, replace = TRUE, prob = probabilities)
T6 <- sample(values, 165, replace = TRUE, prob = probabilities)
T7 <- sample(values, 165, replace = TRUE, prob = probabilities)
T8 <- sample(values, 165, replace = TRUE, prob = probabilities)
T9 <- sample(values, 165, replace = TRUE, prob = probabilities)
T10 <- sample(values, 165, replace = TRUE, prob = probabilities)
T11 <- sample(values, 165, replace = TRUE, prob = probabilities)
T12 <- sample(values, 165, replace = TRUE, prob = probabilities)
T13 <- sample(values, 165, replace = TRUE, prob = probabilities)
T14 <- sample(values, 165, replace = TRUE, prob = probabilities)
T15 <- sample(values, 165, replace = TRUE, prob = probabilities)
T16 <- sample(values, 165, replace = TRUE, prob = probabilities)
T17 <- sample(values, 165, replace = TRUE, prob = probabilities)
T18 <- sample(values, 165, replace = TRUE, prob = probabilities)
T19 <- sample(values, 165, replace = TRUE, prob = probabilities)
T20 <- sample(values, 165, replace = TRUE, prob = probabilities)
T21 <- sample(values, 165, replace = TRUE, prob = probabilities)
T22 <- sample(values, 165, replace = TRUE, prob = probabilities)
T23 <- sample(values, 165, replace = TRUE, prob = probabilities)
T24 <- sample(values, 165, replace = TRUE, prob = probabilities)
T25 <- sample(values, 165, replace = TRUE, prob = probabilities)
# 生成 X 和 Y,各 165 個值,從 N(0, 2) 中抽取
X <- rnorm(165, mean = 0, sd = sqrt(2))
Y <- rnorm(165, mean = 0, sd = sqrt(2))
# 創建 data.frame
whatifdat <- data.frame(T1 = T1, T2 = T2, T3 = T3, T4 = T4, T5 = T5,
T6 = T6, T7 = T7, T8 = T8, T9 = T9, T10 = T10,
T11 = T11, T12 = T12, T13 = T13, T14 = T14, T5 = T15,
T16 = T16, T17 = T17, T18 = T18, T19 = T19, T20 = T20,
T21 = T21, T22 = T22, T23 = T23, T24 = T24, T25 = T25,
X = X, Y = Y)
#T對X做迴歸
fit_X_T1 <- lm(X ~ T1, data = whatifdat)
fit_X_T2 <- lm(X ~ T2, data = whatifdat)
fit_X_T3 <- lm(X ~ T3, data = whatifdat)
fit_X_T4 <- lm(X ~ T4, data = whatifdat)
fit_X_T5 <- lm(X ~ T5, data = whatifdat)
fit_X_T6 <- lm(X ~ T6, data = whatifdat)
fit_X_T7 <- lm(X ~ T7, data = whatifdat)
fit_X_T8 <- lm(X ~ T8, data = whatifdat)
fit_X_T9 <- lm(X ~ T9, data = whatifdat)
fit_X_T10 <- lm(X ~ T10, data = whatifdat)
fit_X_T11 <- lm(X ~ T11, data = whatifdat)
fit_X_T12 <- lm(X ~ T12, data = whatifdat)
fit_X_T13 <- lm(X ~ T13, data = whatifdat)
fit_X_T14 <- lm(X ~ T14, data = whatifdat)
fit_X_T15 <- lm(X ~ T15, data = whatifdat)
fit_X_T16 <- lm(X ~ T16, data = whatifdat)
fit_X_T17 <- lm(X ~ T17, data = whatifdat)
fit_X_T18 <- lm(X ~ T18, data = whatifdat)
fit_X_T19 <- lm(X ~ T19, data = whatifdat)
fit_X_T20 <- lm(X ~ T20, data = whatifdat)
fit_X_T21 <- lm(X ~ T21, data = whatifdat)
fit_X_T22 <- lm(X ~ T22, data = whatifdat)
fit_X_T23 <- lm(X ~ T23, data = whatifdat)
fit_X_T24 <- lm(X ~ T24, data = whatifdat)
fit_X_T25 <- lm(X ~ T25, data = whatifdat)
#bx為迴歸斜率,bxse為迴歸後的se
bx <- c(coef(fit_X_T1)["T1"], coef(fit_X_T2)["T2"], coef(fit_X_T3)["T3"],
coef(fit_X_T4)["T4"], coef(fit_X_T5)["T5"], coef(fit_X_T6)["T6"],
coef(fit_X_T7)["T7"], coef(fit_X_T8)["T8"], coef(fit_X_T9)["T9"],
coef(fit_X_T10)["T10"], coef(fit_X_T11)["T11"], coef(fit_X_T12)["T12"],
coef(fit_X_T13)["T13"], coef(fit_X_T14)["T14"], coef(fit_X_T15)["T15"],
coef(fit_X_T16)["T16"], coef(fit_X_T17)["T17"], coef(fit_X_T18)["T18"],
coef(fit_X_T19)["T19"], coef(fit_X_T20)["T20"], coef(fit_X_T21)["T21"],
coef(fit_X_T22)["T22"], coef(fit_X_T23)["T23"], coef(fit_X_T24)["T24"],
coef(fit_X_T25)["T25"])
#簡化了後續分析,無需考慮這些關聯的方向
#取基因與暴露之間關聯(bx)的絕對值,確保所有基因與暴露估計都是正值
bxg = abs(bx)
bxse <- c(summary(fit_X_T1)$coefficients["T1", "Std. Error"],
summary(fit_X_T2)$coefficients["T2", "Std. Error"],
summary(fit_X_T3)$coefficients["T3", "Std. Error"],
summary(fit_X_T4)$coefficients["T4", "Std. Error"],
summary(fit_X_T5)$coefficients["T5", "Std. Error"],
summary(fit_X_T6)$coefficients["T6", "Std. Error"],
summary(fit_X_T7)$coefficients["T7", "Std. Error"],
summary(fit_X_T8)$coefficients["T8", "Std. Error"],
summary(fit_X_T9)$coefficients["T9", "Std. Error"],
summary(fit_X_T10)$coefficients["T10", "Std. Error"],
summary(fit_X_T11)$coefficients["T11", "Std. Error"],
summary(fit_X_T12)$coefficients["T12", "Std. Error"],
summary(fit_X_T13)$coefficients["T13", "Std. Error"],
summary(fit_X_T14)$coefficients["T14", "Std. Error"],
summary(fit_X_T15)$coefficients["T15", "Std. Error"],
summary(fit_X_T16)$coefficients["T16", "Std. Error"],
summary(fit_X_T17)$coefficients["T17", "Std. Error"],
summary(fit_X_T18)$coefficients["T18", "Std. Error"],
summary(fit_X_T19)$coefficients["T19", "Std. Error"],
summary(fit_X_T20)$coefficients["T20", "Std. Error"],
summary(fit_X_T21)$coefficients["T21", "Std. Error"],
summary(fit_X_T22)$coefficients["T22", "Std. Error"],
summary(fit_X_T23)$coefficients["T23", "Std. Error"],
summary(fit_X_T24)$coefficients["T24", "Std. Error"],
summary(fit_X_T25)$coefficients["T25", "Std. Error"])
#T對Y做迴歸
fit_Y_T1 <- lm(Y ~ T1, data = whatifdat)
fit_Y_T2 <- lm(Y ~ T2, data = whatifdat)
fit_Y_T3 <- lm(Y ~ T3, data = whatifdat)
fit_Y_T4 <- lm(Y ~ T4, data = whatifdat)
fit_Y_T5 <- lm(Y ~ T5, data = whatifdat)
fit_Y_T6 <- lm(Y ~ T6, data = whatifdat)
fit_Y_T7 <- lm(Y ~ T7, data = whatifdat)
fit_Y_T8 <- lm(Y ~ T8, data = whatifdat)
fit_Y_T9 <- lm(Y ~ T9, data = whatifdat)
fit_Y_T10 <- lm(Y ~ T10, data = whatifdat)
fit_Y_T11 <- lm(Y ~ T11, data = whatifdat)
fit_Y_T12 <- lm(Y ~ T12, data = whatifdat)
fit_Y_T13 <- lm(Y ~ T13, data = whatifdat)
fit_Y_T14 <- lm(Y ~ T14, data = whatifdat)
fit_Y_T15 <- lm(Y ~ T15, data = whatifdat)
fit_Y_T16 <- lm(Y ~ T16, data = whatifdat)
fit_Y_T17 <- lm(Y ~ T17, data = whatifdat)
fit_Y_T18 <- lm(Y ~ T18, data = whatifdat)
fit_Y_T19 <- lm(Y ~ T19, data = whatifdat)
fit_Y_T20 <- lm(Y ~ T20, data = whatifdat)
fit_Y_T21 <- lm(Y ~ T21, data = whatifdat)
fit_Y_T22 <- lm(Y ~ T22, data = whatifdat)
fit_Y_T23 <- lm(Y ~ T23, data = whatifdat)
fit_Y_T24 <- lm(Y ~ T24, data = whatifdat)
fit_Y_T25 <- lm(Y ~ T25, data = whatifdat)
#by為迴歸斜率,byse為迴歸後的se
by <- c(coef(fit_Y_T1)["T1"], coef(fit_Y_T2)["T2"], coef(fit_Y_T3)["T3"],
coef(fit_Y_T4)["T4"], coef(fit_Y_T5)["T5"], coef(fit_Y_T6)["T6"],
coef(fit_Y_T7)["T7"], coef(fit_Y_T8)["T8"], coef(fit_Y_T9)["T9"],
coef(fit_Y_T10)["T10"], coef(fit_Y_T11)["T11"], coef(fit_Y_T12)["T12"],
coef(fit_Y_T13)["T13"], coef(fit_Y_T14)["T14"], coef(fit_Y_T15)["T15"],
coef(fit_Y_T16)["T16"], coef(fit_Y_T17)["T17"], coef(fit_Y_T18)["T18"],
coef(fit_Y_T19)["T19"], coef(fit_Y_T20)["T20"], coef(fit_Y_T21)["T21"],
coef(fit_Y_T22)["T22"], coef(fit_Y_T23)["T23"], coef(fit_Y_T24)["T24"],
coef(fit_Y_T25)["T25"])
#根據基因與暴露之間關聯的符號(bx)調整基因與結果之間關聯(by)的方向
byg = by*sign(bx)
byse <- c(summary(fit_Y_T1)$coefficients["T1", "Std. Error"],
summary(fit_Y_T2)$coefficients["T2", "Std. Error"],
summary(fit_Y_T3)$coefficients["T3", "Std. Error"],
summary(fit_Y_T4)$coefficients["T4", "Std. Error"],
summary(fit_Y_T5)$coefficients["T5", "Std. Error"],
summary(fit_Y_T6)$coefficients["T6", "Std. Error"],
summary(fit_Y_T7)$coefficients["T7", "Std. Error"],
summary(fit_Y_T8)$coefficients["T8", "Std. Error"],
summary(fit_Y_T9)$coefficients["T9", "Std. Error"],
summary(fit_Y_T10)$coefficients["T10", "Std. Error"],
summary(fit_Y_T11)$coefficients["T11", "Std. Error"],
summary(fit_Y_T12)$coefficients["T12", "Std. Error"],
summary(fit_Y_T13)$coefficients["T13", "Std. Error"],
summary(fit_Y_T14)$coefficients["T14", "Std. Error"],
summary(fit_Y_T15)$coefficients["T15", "Std. Error"],
summary(fit_Y_T16)$coefficients["T16", "Std. Error"],
summary(fit_Y_T17)$coefficients["T17", "Std. Error"],
summary(fit_Y_T18)$coefficients["T18", "Std. Error"],
summary(fit_Y_T19)$coefficients["T19", "Std. Error"],
summary(fit_Y_T20)$coefficients["T20", "Std. Error"],
summary(fit_Y_T21)$coefficients["T21", "Std. Error"],
summary(fit_Y_T22)$coefficients["T22", "Std. Error"],
summary(fit_Y_T23)$coefficients["T23", "Std. Error"],
summary(fit_Y_T24)$coefficients["T24", "Std. Error"],
summary(fit_Y_T25)$coefficients["T25", "Std. Error"])
MR-Egger function
#輸入數據
library(MendelianRandomization)
MRInputObject <- mr_input(
bx = bxg,
bxse = bxse,
by = byg,
byse = byse
)
#MR-Egger做因果分析
EggerObject.wi <- mr_egger(
MRInputObject,
robust = FALSE,
penalized = FALSE,
correl = FALSE,
distribution = "normal",
alpha = 0.05
)
EggerObject.wi
##
## MR-Egger method
## (variants uncorrelated, random-effect model)
##
## Number of Variants = 25
##
## ------------------------------------------------------------------
## Method Estimate Std Error 95% CI p-value
## MR-Egger 0.139 0.367 -0.580, 0.858 0.705
## (intercept) -0.029 0.068 -0.162, 0.104 0.669
## ------------------------------------------------------------------
## Residual Standard Error : 1.132
## Heterogeneity test statistic = 29.4478 on 23 degrees of freedom, (p-value = 0.1659)
## I^2_GX statistic: 0.0%
MR-Egger by hands
weights <- 1/byse^2
data <- data.frame(bxg = bxg, byg = byg, weights = weights)
#小gamma 對 大gamma 做迴歸
fit <- lm(byg ~ bxg, data = data, weights = weights)
fit
##
## Call:
## lm(formula = byg ~ bxg, data = data, weights = weights)
##
## Coefficients:
## (Intercept) bxg
## -0.02901 0.13884
Other Example
library(MendelianRandomization)
#創建數據
#無相關
MRInputObject <- mr_input(bx = abs(ldlc),
bxse = ldlcse,
by = chdlodds*sign(ldlc),
byse = chdloddsse)
#有相關
MRInputObject.cor <- mr_input(bx = abs(calcium),
bxse = calciumse,
by = fastgluc*sign(calcium),
byse = fastglucse,
corr = calc.rho)
MRInputObject # example with uncorrelated variants
## SNP exposure.beta exposure.se outcome.beta outcome.se
## 1 snp_1 0.0260 0.004 0.0677 0.0286
## 2 snp_2 0.0440 0.004 0.1625 0.0300
## 3 snp_3 0.0380 0.004 0.1054 0.0310
## 4 snp_4 0.0230 0.003 0.0619 0.0243
## 5 snp_5 0.0170 0.003 0.0834 0.0222
## 6 snp_6 0.0310 0.006 0.1278 0.0667
## 7 snp_7 0.0180 0.004 0.0408 0.0373
## 8 snp_8 0.0460 0.007 0.0770 0.0543
## 9 snp_9 0.0590 0.004 0.1570 0.0306
## 10 snp_10 0.0040 0.003 -0.0305 0.0236
## 11 snp_11 0.0110 0.004 0.0100 0.0277
## 12 snp_12 0.0050 0.005 -0.1823 0.0403
## 13 snp_13 0.0040 0.005 -0.0408 0.0344
## 14 snp_14 0.0220 0.005 0.1989 0.0335
## 15 snp_15 0.0050 0.004 -0.0100 0.0378
## 16 snp_16 0.0020 0.004 -0.0488 0.0292
## 17 snp_17 0.0020 0.003 -0.0100 0.0253
## 18 snp_18 0.0040 0.004 -0.0408 0.0319
## 19 snp_19 0.0110 0.004 -0.0305 0.0316
## 20 snp_20 0.0090 0.003 -0.0408 0.0241
## 21 snp_21 0.0110 0.004 0.0202 0.0285
## 22 snp_22 0.0030 0.003 0.0619 0.0217
## 23 snp_23 0.0120 0.004 -0.0296 0.0298
## 24 snp_24 0.0003 0.003 0.0677 0.0239
## 25 snp_25 0.0150 0.003 0.0726 0.0220
## 26 snp_26 0.0080 0.004 0.0726 0.0246
## 27 snp_27 0.0090 0.003 0.0000 0.0255
## 28 snp_28 0.0360 0.007 -0.0198 0.0647
MRInputObject.cor # example with correlated variants
## SNP exposure.beta exposure.se outcome.beta outcome.se
## 1 snp_1 0.00625 0.00233 0.02805 0.0122
## 2 snp_2 0.00590 0.00338 0.00953 0.0198
## 3 snp_3 0.01822 0.00318 0.03646 0.0173
## 4 snp_4 0.00598 0.00233 0.01049 0.0119
## 5 snp_5 0.00825 0.00229 0.02357 0.0122
## 6 snp_6 0.00651 0.00352 0.00204 0.0179
without correlation
#用mr-egger計算
#無相關
EggerObject <- mr_egger(MRInputObject,
robust = FALSE,
penalized = FALSE,
correl = FALSE,
distribution = "normal",
alpha = 0.05)
EggerObject
##
## MR-Egger method
## (variants uncorrelated, random-effect model)
##
## Number of Variants = 28
##
## ------------------------------------------------------------------
## Method Estimate Std Error 95% CI p-value
## MR-Egger 3.253 0.770 1.743, 4.762 0.000
## (intercept) -0.011 0.015 -0.041, 0.018 0.451
## ------------------------------------------------------------------
## Residual Standard Error : 1.935
## Heterogeneity test statistic = 97.3975 on 26 degrees of freedom, (p-value = 0.0000)
## I^2_GX statistic: 91.9%
weights <- 1/chdloddsse^2
data <- data.frame(bx = abs(ldlc), by = chdlodds*sign(ldlc), weights = weights)
fit1 <- lm(by ~ bx, data = data,weights = weights)
fit1
##
## Call:
## lm(formula = by ~ bx, data = data, weights = weights)
##
## Coefficients:
## (Intercept) bx
## -0.01146 3.25289
with correlation
#有相關
EggerObject.cor <- mr_egger(MRInputObject.cor,
robust = FALSE,
penalized = FALSE,
correl = FALSE,
distribution = "normal",
alpha = 0.05)
EggerObject.cor
##
## MR-Egger method
## (variants correlated, random-effect model)
##
## Number of Variants = 6
##
## ------------------------------------------------------------------
## Method Estimate Std Error 95% CI p-value
## MR-Egger 1.302 1.518 -1.672, 4.276 0.391
## (intercept) 0.009 0.013 -0.017, 0.035 0.493
## ------------------------------------------------------------------
## Residual Standard Error : 0.629
## Residual standard error is set to 1 in calculation of confidence interval when its estimate is less than 1.
## Heterogeneity test statistic = 1.5825 on 4 degrees of freedom, (p-value = 0.8119)
weights <- 1/fastglucse^2
data <- data.frame(bx = abs(calcium), by = fastgluc*sign(calcium), weights = weights)
fit2 <- lm(by~bx, data = data, weights = weights)
fit2
##
## Call:
## lm(formula = by ~ bx, data = data, weights = weights)
##
## Coefficients:
## (Intercept) bx
## 0.005094 1.793699