Instrument
Selection
# Set working directory
setwd("/Users/charleenadams/metsim_nacy1_bmi")
# Load ACY1 data
acy1 <- fread("phenocode-C1110.tsv")
# Retrieve TSS data for ACY1 using Ensembl
#ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", mirror = "useast")
gene_list <- unique(acy1$nearest_genes)
tss_data <- getBM(
attributes = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", "transcription_start_site"),
filters = "hgnc_symbol",
values = gene_list,
mart = ensembl)
# Rename columns for better clarity
colnames(tss_data) <- c("Ensembl_ID", "Gene", "Chromosome", "TSS")
ensembl_ACY1 <- tss_data[which(tss_data$Gene=="ACY1"),]
# Merge TSS Data with acy1 data
# Convert to data.table for merging
dat_dt <- as.data.table(acy1)
tss_data_dt <- as.data.table(tss_data)
# Ensure the 'Gene' column is of the same type
dat_dt[, Gene := as.character(nearest_genes)]
tss_data_dt[, Gene := as.character(Gene)]
# Remove duplicated rows in tss_data and keep only the first TSS for each gene
tss_data_unique <- tss_data_dt[!duplicated(tss_data_dt$Gene), ]
# Merge data
merged_data <- merge(dat_dt, tss_data_unique, by = "Gene", all.x = TRUE)
acy1 <- merged_data
# Define target position and window size
target_pos <- 51983340 # Target genomic position: 51983340
window_size <- 1000000 # 1mb upstream and downstream
# Calculate start and end positions for the 1MB window
start_pos <- target_pos - window_size
end_pos <- target_pos + window_size
# Ensure that start_pos is not negative
start_pos <- ifelse(start_pos < 1, 1, start_pos)
# Filter the data for SNPs within the 1MB window on chromosome 19
selected_data <- acy1[
chrom == "3" & pos >= start_pos & pos <= end_pos
]
output_file <- paste0("Selected_SNPs_chr3_", start_pos, "_", end_pos, ".csv")
fwrite(selected_data, output_file)
cat("Filtered SNPs saved to:", output_file, "\n")
acy1 <- fread("/Users/charleenadams/metsim_nacy1_bmi/Selected_SNPs_chr3_50983340_52983340.csv")
# Select instruments
agnostic_instruments <- acy1[acy1$pval < 5e-8, ]
exp_dat <- data.frame(
SNP = agnostic_instruments$rsid,
beta.exposure = agnostic_instruments$beta,
se.exposure = agnostic_instruments$sebeta,
eaf.exposure = agnostic_instruments$maf,
effect_allele.exposure = agnostic_instruments$alt,
other_allele.exposure = agnostic_instruments$ref,
pval.exposure = agnostic_instruments$pval,
samplesize.exposure = 6136,
chr.exposure = agnostic_instruments$chrom,
pos.exposure = agnostic_instruments$pos
)
exp_dat$exposure <- "ACY1"
exp_dat$id.exposure <- "ACY1"
# LD clumping
exp_dat_clumped <- clump_data(
exp_dat,
clump_kb = 500,
clump_r2 = 0.01, # 0.001 is too conservative
clump_p1 = 5e-8
)
# Load BMI data and prepare outcome dataset
bmi <- fread("/Users/charleenadams/acy1_bmi/GWAS_sumstats_EUR__invnorm_bmi__TOTALsample.tsv")
out_dat <- data.frame(
SNP = bmi$SNP,
beta.outcome = bmi$BETA,
se.outcome = bmi$SE,
eaf.outcome = bmi$A1FREQ,
effect_allele.outcome = bmi$ALLELE1,
other_allele.outcome = bmi$ALLELE0,
pval.outcome = bmi$P,
samplesize.outcome = 460000,
chr.outcome = bmi$CHR,
pos.outcome = bmi$BP
)
out_dat$outcome <- "BMI"
out_dat$id.outcome <- "BMI"
# Harmonize data
harmonized_data <- harmonise_data(
exposure_dat = exp_dat_clumped,
outcome_dat = out_dat
)
# Perform Steiger test
dat.steiger <- steiger_filtering(harmonized_data)
write.table(
dat.steiger,
file = "/Users/charleenadams/metsim_nacy1_bmi/harmonized_steiger_data.txt",
quote = FALSE,
row.names = FALSE,
col.names = TRUE, sep = "\t")
Two-instrument IVW
Analysis
Data and
Heterogeneity Test
dat.steiger <- fread("/Users/charleenadams/metsim_nacy1_bmi/harmonized_steiger_data.txt")
dat.steiger_rounded <- dat.steiger
dat.steiger_rounded <- as.data.table(dat.steiger_rounded)
# Identify numeric columns
num_cols <- sapply(dat.steiger_rounded, is.numeric)
# Round numeric columns to 3 decimal places
dat.steiger_rounded[, (names(dat.steiger_rounded)[num_cols]) := lapply(.SD, round, 3), .SDcols = num_cols]
# Display harmonized data
datatable(
dat.steiger_rounded,
options = list(pageLength = 10, scrollX = TRUE),
rownames = FALSE,
caption = "Harmonized Data with Steiger Information"
)
# Test for heterogeneity
het <- mr_heterogeneity(dat.steiger)
write.csv(het, "het.csv")
het_rounded <- het
het_rounded[, sapply(het_rounded, is.numeric)] <- lapply(
het_rounded[, sapply(het_rounded, is.numeric)],
round, 3
)
datatable(
het_rounded,
options = list(pageLength = 10, scrollX = TRUE),
rownames = FALSE,
caption = "Heterogeneity Test"
)
# # Test for pleiotropy
# plt <- mr_pleiotropy_test(dat.steiger)
# plt_rounded <- plt
# plt_rounded[, sapply(plt_rounded, is.numeric)] <- lapply(
# plt_rounded[, sapply(plt_rounded, is.numeric)],
# round, 3
# )
Results
# MR analysis
res <- mr(dat.steiger)
res_rounded <- res
res_rounded[, sapply(res_rounded, is.numeric)] <- lapply(
res_rounded[, sapply(res_rounded, is.numeric)],
round, 3
)
#res_rounded <- res_rounded[res_rounded$method != "Simple mode", ]
res_rounded <- res_rounded[order(res_rounded$method), ]
datatable(
res_rounded,
options = list(pageLength = 10, scrollX = TRUE),
rownames = FALSE,
caption = "Two-SNP IVW Results"
)
write.table(
res,
file = "/Users/charleenadams/metsim_nacy1_bmi/res.txt",
quote = FALSE,
row.names = FALSE,
col.names = TRUE, sep = "\t")
# Generate forest plot
ggplot(res_rounded, aes(x = method, y = b, ymin = b - 1.96 * se, ymax = b + 1.96 * se)) +
geom_pointrange() +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
coord_flip() +
labs(
title = "N-acetylalanine (ACY1 Region) on BMI",
x = "Method",
y = "Effect Size (Beta)"
) +
theme_minimal()

ggsave("forest_plot_acy1_bmi.png", width = 8, height = 6, dpi = 300)