Import Exposure
library(tidyverse)
── Attaching core tidyverse packages ────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2 ── Conflicts ──────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(TwoSampleMR)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
TwoSampleMR version 0.6.11
[>] New authentication requirements: https://mrcieu.github.io/ieugwasr/articles/guide.html#authentication.
[>] Major upgrades to our servers completed to improve service and stability.
[>] We need your help to shape our emerging roadmap!
Please take 2 minutes to give us feedback -
https://forms.office.com/e/eSr7EFAfCG
Warning:
You are running an old version of the TwoSampleMR package.
This version: 0.6.11
Latest version: 0.6.30
Please consider updating using remotes::install_github('MRCIEU/TwoSampleMR')
library(LDlinkR) # LD and proxy snps
library(ieugwasr)
OpenGWAS updates:
Date: 2024-05-17
[>] OpenGWAS is growing!
[>] Please take 2 minutes to give us feedback -
[>] It will help directly shape our emerging roadmap
[>] https://forms.office.com/e/eSr7EFAfCG
Attaching package: ‘ieugwasr’
The following object is masked from ‘package:TwoSampleMR’:
ld_matrix
library(fixtuRes)
exposure_path_A2 = "gwas_resources/A2EUR_GRCH38.tsv.gz"
exposure_c1 <- read_tsv(exposure_path_A2,comment = "##",
col_select = c(1,2,3,4,5,6,7,8,9,14,15))
colnames(exposure_c1)<- c("CHR","POS","effect_allele.exposure","other_allele.exposure","SNP","N","beta.exposure","se.exposure","pval","eaf.exposure","rsid")
exposure_c1$exposure <- rep(c("A2_severe_respiratory_COVID19"),times=nrow(exposure_c1));
clump_id<- "a2grch"
#calculate z value
exposure_c1$z.exposure <- exposure_c1$beta.exposure/exposure_c1$se.exposure;
exposure_c1$id.exposure <- rep(c(clump_id),times=nrow(exposure_c1));
exposure_dat1 <- filter(exposure_c1, pval < 5e-8)
exposure_clump1 <-ld_clump(
#dplyr::tibble(rsid=exposure_c1$rsid, pval=exposure_c1$pval, id=exposure_c1$trait_id),
dat = exposure_dat1,
plink_bin = genetics.binaRies::get_plink_binary(),
bfile = "1kgv3/EUR"
)
write_csv(exposure_clump1, "data/exposure_A2GRCH38.csv", append = FALSE)
exposure_path_B2 = "gwas_resources/B2EUR_GRCH38.tsv.gz"
exposure_c2 <- read_tsv(exposure_path_B2,comment = "##",
col_select = c(1,2,3,4,5,6,7,8,9,14,15))
colnames(exposure_c2)<- c("CHR","POS","effect_allele.exposure","other_allele.exposure","SNP","N","beta.exposure","se.exposure","pval","eaf.exposure","rsid")
exposure_c2$exposure <- rep(c("B2_hospitalized_COVID19"),times=nrow(exposure_c2));
clump_id<- "b2grch"
#calculate z value
exposure_c2$z.exposure <- exposure_c2$beta.exposure/exposure_c2$se.exposure;
exposure_c2$id.exposure <- rep(c(clump_id),times=nrow(exposure_c2));
exposure_dat2 <- filter(exposure_c2, pval < 5e-8)
exposure_clump2 <-ld_clump(
#dplyr::tibble(rsid=exposure_c1$rsid, pval=exposure_c1$pval, id=exposure_c1$trait_id),
dat = exposure_dat2,
plink_bin = genetics.binaRies::get_plink_binary(),
bfile = "1kgv3/EUR"
)
write_csv(exposure_clump2, "data/exposure_B2GRCH38.csv", append = FALSE)
exposure_path_C2 = "gwas_resources/C2EUR_GRCH38.tsv.gz"
exposure_c3 <- read_tsv(exposure_path_C2,comment = "##",
col_select = c(1,2,3,4,5,6,7,8,9,14,15))
colnames(exposure_c3)<- c("CHR","POS","effect_allele.exposure","other_allele.exposure","SNP","N","beta.exposure","se.exposure","pval","eaf.exposure","rsid")
exposure_c3$exposure <- rep(c("C2_COVID19"),times=nrow(exposure_c3));
clump_id<- "c2grch"
#calculate z value
exposure_c3$z.exposure <- exposure_c3$beta.exposure/exposure_c3$se.exposure;
exposure_c3$id.exposure <- rep(c(clump_id),times=nrow(exposure_c3));
exposure_dat3 <- filter(exposure_c3, pval < 5e-8)
exposure_clump3 <-ld_clump(
#dplyr::tibble(rsid=exposure_c1$rsid, pval=exposure_c1$pval, id=exposure_c1$trait_id),
dat = exposure_dat3,
plink_bin = genetics.binaRies::get_plink_binary(),
bfile = "1kgv3/EUR"
)
write_csv(exposure_clump3, "data/exposure_C2GRCH38.csv", append = FALSE)
Import Outcome
outcome1 <- read_tsv('gwas_resources/AKIJiangL2021h.tsv.gz', col_select = c(2,3,4,5,6,7,11,12,18, 21,22))
colnames(outcome1)<- c("rsid","CHR","POS","other_allele.outcome","effect_allele.outcome","beta.outcome","eaf.outcome","code","N","se.outcome","pval.outcome")
outcome1$outcome <- rep(c("PheCode 585.1"),times=nrow(outcome1));
clump_id <- "jiangl2021";
outcome1$id.outcome <- rep(c(clump_id),times=nrow(outcome1));
exposure_snps <- read_csv("data/exposure_A2GRCH38.csv")
#filter outcome on SNPs in the exposure
outcome_clumped <- filter(outcome1, rsid %in% exposure_snps$rsid)
#harmonise data
mr_dat <- harmonise_data(dplyr::tibble(SNP=exposure_snps$rsid, beta.exposure=exposure_snps$beta.exposure, se.exposure=exposure_snps$se.exposure, effect_allele.exposure = exposure_snps$effect_allele.exposure, other_allele.exposure = exposure_snps$other_allele.exposure,eaf.exposure = exposure_snps$eaf.exposure, id.exposure = exposure_snps$id.exposure, exposure = exposure_snps$exposure, pval.exposure = exposure_snps$pval), dplyr::tibble(SNP=outcome_clumped$rsid, beta.outcome=outcome_clumped$beta.outcome, se.outcome=outcome_clumped$se.outcome, effect_allele.outcome = outcome_clumped$effect_allele.outcome, other_allele.outcome = outcome_clumped$other_allele.outcome,eaf.outcome = outcome_clumped$eaf.outcome , outcome = outcome_clumped$outcome, id.outcome = outcome_clumped$id.outcome, pval.outcome= outcome_clumped$pval.outcome), action = 2) %>%
filter(pval.exposure < 5e-8 & SNP != "rs17885848")
write_csv(mr_dat, "harmonized/harmonized_A2_JiangL")
outcome2 <- read_tsv('gwas_resources/DouvilleNJModel1h.tsv.gz', col_select = c(1,2,3,4,5,6,7,8,9,13))
colnames(outcome2)<- c("CHR","POS","effect_allele.outcome","other_allele.outcome","beta.outcome","se.outcome","eaf.outcome","pval.outcome","rsid","N")
outcome2$outcome <- rep(c("Sepsis AKI"),times=nrow(outcome2));
clump_id <- "DouvilleNJ2025";
outcome2$id.outcome <- rep(c(clump_id),times=nrow(outcome2));
exposure_snps <- read_csv("data/exposure_A2GRCH38.csv")
#filter outcome on SNPs in the exposure
outcome_clumped <- filter(outcome2, rsid %in% exposure_snps$rsid)
#harmonise data
mr_dat <- harmonise_data(dplyr::tibble(SNP=exposure_snps$rsid, beta.exposure=exposure_snps$beta.exposure, se.exposure=exposure_snps$se.exposure, effect_allele.exposure = exposure_snps$effect_allele.exposure, other_allele.exposure = exposure_snps$other_allele.exposure,eaf.exposure = exposure_snps$eaf.exposure, id.exposure = exposure_snps$id.exposure, exposure = exposure_snps$exposure, pval.exposure = exposure_snps$pval), dplyr::tibble(SNP=outcome_clumped$rsid, beta.outcome=outcome_clumped$beta.outcome, se.outcome=outcome_clumped$se.outcome, effect_allele.outcome = outcome_clumped$effect_allele.outcome, other_allele.outcome = outcome_clumped$other_allele.outcome,eaf.outcome = outcome_clumped$eaf.outcome , outcome = outcome_clumped$outcome, id.outcome = outcome_clumped$id.outcome, pval.outcome= outcome_clumped$pval.outcome), action = 2) %>%
filter(pval.exposure < 5e-6)
write_csv(mr_dat, "harmonized/harmonized_A2_Douville")
outcome3 <- read_tsv('gwas_resources/AKIVermaAh.tsv.gz', col_select = c(1,2,3,4,5,6,7,8,9,13))
colnames(outcome3)<- c("CHR","POS","effect_allele.outcome","other_allele.outcome","oddsratio.outcome","se.outcome","eaf.outcome","pval.outcome","rsid","N")
outcome3$outcome <- rep(c("PheCode 585.1 (Verma)"),times=nrow(outcome3));
clump_id <- "vermaA2024";
outcome3$id.outcome <- rep(c(clump_id),times=nrow(outcome3));
outcome3$beta.outcome <- log(outcome3$oddsratio.outcome)
exposure_snps <- read_csv("data/exposure_A2GRCH38.csv")
#filter outcome on SNPs in the exposure
outcome_clumped <- filter(outcome3, rsid %in% exposure_snps$rsid)
#harmonise data
mr_dat <- harmonise_data(dplyr::tibble(SNP=exposure_snps$rsid, beta.exposure=exposure_snps$beta.exposure, se.exposure=exposure_snps$se.exposure, effect_allele.exposure = exposure_snps$effect_allele.exposure, other_allele.exposure = exposure_snps$other_allele.exposure,eaf.exposure = exposure_snps$eaf.exposure, id.exposure = exposure_snps$id.exposure, exposure = exposure_snps$exposure, pval.exposure = exposure_snps$pval), dplyr::tibble(SNP=outcome_clumped$rsid, beta.outcome=outcome_clumped$beta.outcome, se.outcome=outcome_clumped$se.outcome, effect_allele.outcome = outcome_clumped$effect_allele.outcome, other_allele.outcome = outcome_clumped$other_allele.outcome,eaf.outcome = outcome_clumped$eaf.outcome , outcome = outcome_clumped$outcome, id.outcome = outcome_clumped$id.outcome, pval.outcome= outcome_clumped$pval.outcome), action = 2) %>%
filter(pval.exposure < 5e-8)
write_csv(mr_dat, "harmonized/harmonized_A2_VermaA")