library(tidyverse) # Data wrangling
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(TwoSampleMR) # MR
## TwoSampleMR version 0.6.11
##
## [>] New authentication requirements: https://mrcieu.github.io/ieugwasr/articles/guide.html#authentication.
## [>] Major upgrades to our servers completed to improve service and stability.
## [>] We need your help to shape our emerging roadmap!
## Please take 2 minutes to give us feedback -
## https://forms.office.com/e/eSr7EFAfCG
##
## Warning:
## You are running an old version of the TwoSampleMR package.
## This version: 0.6.11
## Latest version: 0.6.22
## Please consider updating using remotes::install_github('MRCIEU/TwoSampleMR')
library(LDlinkR) # LD and proxy snps
library(ieugwasr)
## OpenGWAS updates:
## Date: 2024-05-17
## [>] OpenGWAS is growing!
## [>] Please take 2 minutes to give us feedback -
## [>] It will help directly shape our emerging roadmap
## [>] https://forms.office.com/e/eSr7EFAfCG
##
## Attaching package: 'ieugwasr'
##
## The following object is masked from 'package:TwoSampleMR':
##
## ld_matrix
library(fixtuRes)
Importing datasets for Severe Respiratory COVID, Hospitalized COVID, and COVID vs. Population
exposure_path1 = "resources/A2eurGRCH38.tsv.gz"
exposure_c1 <- read_tsv(exposure_path1,comment = "##",
col_select = c(1,2,3,4,5,6,7,8,9,14,15))
colnames(exposure_c1)<- c("CHR","POS","effect_allele.exposure","other_allele.exposure","SNP","N","beta.exposure","se.exposure","pval","eaf.exposure","rsid")
exposure_c1$exposure <- rep(c("severe_respiratory_COVID19"),times=nrow(exposure_c1));
clump_id<- "a2grch"
exposure_c1$z.exposure <- exposure_c1$beta.exposure/exposure_c1$se.exposure;
exposure_c1$id.exposure <- rep(c(clump_id),times=nrow(exposure_c1));
exposure1 <- exposure_c1 %>%
format_data(.,
type = "exposure",
snps = NULL,
header = TRUE,
phenotype_col = "exposure",
snp_col ="rsid",
beta_col = "beta.exposure",
se_col = "se.exposure",
eaf_col = "eaf.exposure",
effect_allele_col = "effect_allele.exposure",
other_allele_col = "other_allele.exposure",
pval_col = "pval",
samplesize_col = "N",
z_col = "z.exposure",
chr_col = "CHR",
pos_col = "POS",
log_pval = FALSE
) %>%
as_tibble()
exposure_clump1 <-ld_clump(
#dplyr::tibble(rsid=exposure_c1$rsid, pval=exposure_c1$pval, id=exposure_c1$trait_id),
dat = exposure_c1,
plink_bin = genetics.binaRies::get_plink_binary(),
bfile = "1kgv3/EUR"
)
exposure_dat1 <- filter(exposure_clump1, pval < 5e-8)
write_csv(exposure_dat1, "data/exposure_A2GRCH38.csv", append = FALSE)
exposure_path2 = "resources/B2GRCH38.gz"
exposure_c2 <- read_tsv(exposure_path2,comment = "##",
col_select = c(1,2,3,4,5,6,7,8,9,14,15))
colnames(exposure_c2)<- c("CHR","POS","effect_allele.exposure","other_allele.exposure","SNP","N","beta.exposure","se.exposure","pval","eaf.exposure","rsid")
clump_id <- "b2grch"
exposure_c2$exposure <- rep(c("hospitalized_COVID19"),times=nrow(exposure_c2));
exposure_c2$z.exposure <- exposure_c2$beta.exposure/exposure_c2$se.exposure;
exposure_c2$id.exposure <- rep(c(clump_id),times=nrow(exposure_c2));
exposure2 <- exposure_c2 %>%
format_data(.,
type = "exposure",
snps = NULL,
header = TRUE,
phenotype_col = "exposure",
snp_col ="rsid",
beta_col = "beta.exposure",
se_col = "se.exposure",
eaf_col = "eaf.exposure",
effect_allele_col = "effect_allele.exposure",
other_allele_col = "other_allele.exposure",
pval_col = "pval",
samplesize_col = "N",
z_col = "z.exposure",
chr_col = "CHR",
pos_col = "POS",
log_pval = FALSE
) %>%
as_tibble()
exposure_clump2 <-ld_clump(
#dplyr::tibble(rsid=exposure_c1$rsid, pval=exposure_c1$pval, id=exposure_c1$trait_id),
dat = exposure_c2,
plink_bin = genetics.binaRies::get_plink_binary(),
bfile = "1kgv3/EUR"
)
exposure_dat2 <- filter(exposure_clump2, pval < 5e-8)
write_csv(exposure_dat2, "data/exposure_B2GRCH38.csv", append = FALSE)
exposure_path3 = "resources/C2GRCH38.gz"
exposure_c3 <- read_tsv(exposure_path3,comment = "##",
col_select = c(1,2,3,4,5,6,7,8,9,14,15))
colnames(exposure_c3)<- c("CHR","POS","effect_allele.exposure","other_allele.exposure","SNP","N","beta.exposure","se.exposure","pval","eaf.exposure","rsid")
clump_id <- "c2grch"
exposure_c3$exposure <- rep(c("COVID19"),times=nrow(exposure_c3));
exposure_c3$z.exposure <- exposure_c3$beta.exposure/exposure_c3$se.exposure;
exposure_c3$id.exposure <- rep(c(clump_id),times=nrow(exposure_c3));
exposure3 <- exposure_c3 %>%
format_data(.,
type = "exposure",
snps = NULL,
header = TRUE,
phenotype_col = "exposure",
snp_col ="rsid",
beta_col = "beta.exposure",
se_col = "se.exposure",
eaf_col = "eaf.exposure",
effect_allele_col = "effect_allele.exposure",
other_allele_col = "other_allele.exposure",
pval_col = "pval",
samplesize_col = "N",
z_col = "z.exposure",
chr_col = "CHR",
pos_col = "POS",
log_pval = FALSE
) %>%
as_tibble()
exposure_clump3 <-ld_clump(
#dplyr::tibble(rsid=exposure_c1$rsid, pval=exposure_c1$pval, id=exposure_c1$trait_id),
dat = exposure_c3,
plink_bin = genetics.binaRies::get_plink_binary(),
bfile = "1kgv3/EUR"
)
exposure_dat3 <- filter(exposure_clump3, pval < 5e-8)
write_csv(exposure_dat3, "data/exposure_C2GRCH38.csv", append = FALSE)
Importing GWAS data for Acute Kidney Injury from “A generalized linear mixed model association tool for biobank-scale data”, by Jiang L et al and “Efficiently controlling for case-control imbalance and sample relatedness in large-scale genetic association studies” by Zhou W et al.
a <- read_tsv('resources/JiangL2021.tsv')
b <- read_tsv('resources/zhouw2018_phecode5851.tsv', col_select = c(1,2,3,4,5,6,7,8,9,11))
colnames(a)<- c("CHR","SNP","POS","effect_allele.outcome","other_allele.outcome","N","eaf.outcome","T","SE_T","P_NoSpa","beta.outcome","se.outcome","pval.outcome","CONVERGE")
a$outcome <- rep(c("AKI"),times=nrow(a));
clump_id <- "jiangl";
a$id.outcome <- rep(c(clump_id),times=nrow(a));
colnames(b)<- c("CHR","POS","effect_allele.outcome","other_allele.outcome","beta.outcome","se.outcome","eaf.outcome","pval.outcome","SNP","N")
b$Z <- b$se.outcome/b$beta.outcome;
clump_id <- "zhou_w";
b$outcome <- rep(c("AKI"),times=nrow(b));
b$id.outcome <- rep(c(clump_id),times=nrow(b));
merged <- a %>%
inner_join(b, by = "SNP", suffix = c("_1", "_2"))
#merged <- inner_join(a,b, by = "SNP", suffix = c("_1", "_2"))
merged <- merged %>%
filter(effect_allele.outcome_1 == effect_allele.outcome_2 & other_allele.outcome_1 == other_allele.outcome_2)
merged <- merged %>%
mutate(
# Calculate the weights based on standard errors
w1 = 1 / (se.outcome_1^2),
w2 = 1 / (se.outcome_2^2),
ES_META = (beta.outcome_1 * w1 + beta.outcome_2 * w2) / (w1 + w2),
# Calculate the combined standard error
SE_META = sqrt(1 / (w1 + w2 )),
# Compute the Z statistic (Effect size divided by standard error)
Z_META = ES_META / SE_META,
#compute beta
AF_META= (eaf.outcome_1 * 456348 + eaf.outcome_2 * 402123) / (456348 + 402123),
# Compute the P-value (two-tailed)
P_META = 2 * pnorm(-abs(Z_META))
)
write.csv(merged, file = "meta_combined_results.csv")
meta_result <- merged %>%
select(SNP, effect_allele.outcome = effect_allele.outcome_1, other_allele.outcome = other_allele.outcome_1, ES_META, SE_META, P_META, outcome_1, id.outcome = id.outcome_1, AF_META)
meta_result$id.outcome = rep(c("akimet"),times=nrow(meta_result));
write.table(meta_result, file = "meta_combined_results.txt", sep = "\t", row.names = FALSE, quote = FALSE)
#write results to tsv file
aki_combined <- read_tsv("meta_combined_results.txt")
# calculate the effective N using case/control N
combined_effective_N = 4/(1/1199+1/455149) + 4/(1/4521 + 1/397602)
colnames(aki_combined)<- c("SNP","effect_allele.outcome","other_allele.outcome","beta.outcome","se.outcome","pval","outcome","id.outcome","eaf.outcome")
aki_combined$N = rep(c(combined_effective_N),times=nrow(aki_combined))
exposure_snps <- read_csv("data/exposure_A2GRCH38.csv")
#filter outcome on SNPs in the exposure
outcome_clumped <- filter(aki_combined, SNP %in% exposure_snps$rsid)
#harmonise data
mr_dat <- harmonise_data(dplyr::tibble(SNP=exposure_snps$rsid, beta.exposure=exposure_snps$beta.exposure, se.exposure=exposure_snps$se.exposure, effect_allele.exposure = exposure_snps$effect_allele.exposure, other_allele.exposure = exposure_snps$other_allele.exposure,eaf.exposure = exposure_snps$eaf.exposure, id.exposure = exposure_snps$id.exposure, exposure = exposure_snps$exposure), dplyr::tibble(SNP=outcome_clumped$SNP, beta.outcome=outcome_clumped$beta.outcome, se.outcome=outcome_clumped$se.outcome, effect_allele.outcome = outcome_clumped$effect_allele.outcome, other_allele.outcome = outcome_clumped$other_allele.outcome,eaf.outcome = outcome_clumped$eaf.outcome , outcome = outcome_clumped$outcome, id.outcome = outcome_clumped$id.outcome), action = 2)
#write_csv(mr_dat, "data/harmonized_data_combinedA2")
#Necessary R Packages
library(tidyverse) # Data wrangling
library(knitr)
library(TwoSampleMR) # MR
library(gt)
library(LDlinkR) # LD and proxy snps
library(RadialMR) # Radial MR sensetivity analysis
library(phenoscanner)
mr_dat <- read_csv("combined_results/A2/harmonized_data_combinedA2")
## Rows: 24 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): SNP, effect_allele.exposure, other_allele.exposure, effect_allele.o...
## dbl (8): beta.exposure, beta.outcome, eaf.exposure, eaf.outcome, se.outcome,...
## lgl (5): remove, palindromic, ambiguous, mr_keep, samplesize.outcome
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mr_res <- mr(mr_dat, method_list = c(
"mr_two_sample_ml", "mr_egger_regression", "mr_egger_regression_bootstrap",
"mr_simple_median", "mr_weighted_median", "mr_penalised_weighted_median",
"mr_ivw_fe", "mr_ivw_mre",
"mr_simple_mode", "mr_weighted_mode", "mr_weighted_mode_nome", "mr_simple_mode_nome"
))
## Analysing 'a2grch' on 'akimet'
res_single <- mr_singlesnp(mr_dat, all_method = c("mr_ivw_fe", "mr_egger_regression", "mr_weighted_median", "mr_weighted_mode")) %>% as_tibble()
View(res_single)
odd <- generate_odds_ratios(mr_res)
print("Finished Analyzing")
#write.csv(odd,"combined_results/final_combinedA2.csv")
#write.csv(res_single,"combined_results/A2/res_singleA2.csv")
scatter_p <- mr_scatter_plot(mr_res, mr_dat)
scatter_out_p <- scatter_p[[1]] + theme_bw() +
guides(color=guide_legend(ncol =1)) +
theme(
text = element_text(size = 8),
)
scatter_out_p
#ggsave("combined_results/A2/scatterplotA2combined.png", plot = scatter_out_p, units = 'in', height = 4, width = 9)
forrest_p <- mr_forest_plot(res_single)
forrest_p[[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#ggsave("combined_results/A2/forrestA2combined.png", plot =forrest_p[[1]] + theme(text = element_text(size = 8)), units = 'in', height = 9, width = 9)
res_loo <- mr_leaveoneout(mr_dat, method = mr_ivw_fe) %>% as_tibble()
loo_p <- mr_leaveoneout_plot(res_loo)
loo_p[[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#ggsave("combined_results/A2/lo9A2combined.png",plot = loo_p[[1]], units = 'in', height = 9, width = 9)
mr_dat <- read_csv("combined_results/B2/harmonized_data_combinedB2")
## Rows: 29 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): SNP, effect_allele.exposure, other_allele.exposure, effect_allele.o...
## dbl (8): beta.exposure, beta.outcome, eaf.exposure, eaf.outcome, se.outcome,...
## lgl (5): remove, palindromic, ambiguous, mr_keep, samplesize.outcome
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mr_res <- mr(mr_dat, method_list = c(
"mr_two_sample_ml", "mr_egger_regression", "mr_egger_regression_bootstrap",
"mr_simple_median", "mr_weighted_median", "mr_penalised_weighted_median",
"mr_ivw_fe", "mr_ivw_mre",
"mr_simple_mode", "mr_weighted_mode", "mr_weighted_mode_nome", "mr_simple_mode_nome"
))
## Analysing 'b2grch' on 'akimet'
res_single <- mr_singlesnp(mr_dat, all_method = c("mr_ivw_fe", "mr_egger_regression", "mr_weighted_median", "mr_weighted_mode")) %>% as_tibble()
View(res_single)
odd <- generate_odds_ratios(mr_res)
print("Finished Analyzing")
#write.csv(res_single,"combined_results/B2/res_singleB2.csv")
#write.csv(odd,"combined_results/final_combinedB2.csv")
scatter_p <- mr_scatter_plot(mr_res, mr_dat)
scatter_out_p <- scatter_p[[1]] + theme_bw() +
guides(color=guide_legend(ncol =1)) +
theme(
text = element_text(size = 8),
)
scatter_out_p
#ggsave("combined_results/B2/scatterplotB2combined.png", plot = scatter_out_p, units = 'in', height = 4, width = 9)
forrest_p <- mr_forest_plot(res_single)
forrest_p[[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#ggsave("combined_results/B2/forrestB2combined.png", plot =forrest_p[[1]] + theme(text = element_text(size = 8)), units = 'in', height = 9, width = 9)
res_loo <- mr_leaveoneout(mr_dat, method = mr_ivw_fe) %>% as_tibble()
loo_p <- mr_leaveoneout_plot(res_loo)
loo_p[[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#ggsave("combined_results/B2/looB2combined.png",plot = loo_p[[1]], units = 'in', height = 9, width = 9)
mr_dat <- read_csv("combined_results/C2/harmonized_data_combinedC2")
## Rows: 11 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): SNP, effect_allele.exposure, other_allele.exposure, effect_allele.o...
## dbl (8): beta.exposure, beta.outcome, eaf.exposure, eaf.outcome, se.outcome,...
## lgl (5): remove, palindromic, ambiguous, mr_keep, samplesize.outcome
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mr_res <- mr(mr_dat, method_list = c(
"mr_two_sample_ml", "mr_egger_regression", "mr_egger_regression_bootstrap",
"mr_simple_median", "mr_weighted_median", "mr_penalised_weighted_median",
"mr_ivw_fe", "mr_ivw_mre",
"mr_simple_mode", "mr_weighted_mode", "mr_weighted_mode_nome", "mr_simple_mode_nome"
))
## Analysing 'c2grch' on 'akimet'
res_single <- mr_singlesnp(mr_dat, all_method = c("mr_ivw_fe", "mr_egger_regression", "mr_weighted_median", "mr_weighted_mode")) %>% as_tibble()
#View(res_single)
odd <- generate_odds_ratios(mr_res)
print("Finished Analyzing")
#write.csv(odd,"combined_results/final_combinedC2.csv")
#write.csv(res_single,"combined_results/C2/res_singleC2.csv")
scatter_p <- mr_scatter_plot(mr_res, mr_dat)
scatter_out_p <- scatter_p[[1]] + theme_bw() +
guides(color=guide_legend(ncol =1)) +
theme(
text = element_text(size = 8),
)
scatter_out_p
#ggsave("combined_results/C2/scatterplotC2combined.png", plot = scatter_out_p, units = 'in', height = 4, width = 9)
forrest_p <- mr_forest_plot(res_single)
forrest_p[[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#ggsave("combined_results/C2/forrestC2combined.png", plot =forrest_p[[1]] + theme(text = element_text(size = 8)), units = 'in', height = 9, width = 9)
res_loo <- mr_leaveoneout(mr_dat, method = mr_ivw_fe) %>% as_tibble()
loo_p <- mr_leaveoneout_plot(res_loo)
loo_p[[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#ggsave("combined_results/C2/looC2combined.png",plot = loo_p[[1]], units = 'in', height = 9, width = 9)