1 Instrument Selection

# Set working directory
setwd("/Users/charleenadams/metsim_nacy1_bmi")

# Load ACY1 data
acy1 <- fread("phenocode-C1110.tsv")

# Retrieve TSS data for ACY1 using Ensembl
#ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", mirror = "useast")
gene_list <- unique(acy1$nearest_genes)
tss_data <- getBM(
  attributes = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", "transcription_start_site"),
  filters = "hgnc_symbol", 
  values = gene_list, 
  mart = ensembl)

# Rename columns for better clarity
colnames(tss_data) <- c("Ensembl_ID", "Gene", "Chromosome", "TSS")
ensembl_ACY1 <- tss_data[which(tss_data$Gene=="ACY1"),]

# Merge TSS Data with acy1 data
# Convert to data.table for merging
dat_dt <- as.data.table(acy1)
tss_data_dt <- as.data.table(tss_data)

# Ensure the 'Gene' column is of the same type
dat_dt[, Gene := as.character(nearest_genes)]
tss_data_dt[, Gene := as.character(Gene)]

# Remove duplicated rows in tss_data and keep only the first TSS for each gene
tss_data_unique <- tss_data_dt[!duplicated(tss_data_dt$Gene), ]

# Merge data
merged_data <- merge(dat_dt, tss_data_unique, by = "Gene", all.x = TRUE)
acy1 <- merged_data

# Define target position and window size
target_pos <- 51983340      # Target genomic position: 51983340
window_size <- 1000000       # 1mb upstream and downstream

# Calculate start and end positions for the 1MB window
start_pos <- target_pos - window_size
end_pos <- target_pos + window_size

# Ensure that start_pos is not negative
start_pos <- ifelse(start_pos < 1, 1, start_pos)

# Filter the data for SNPs within the 1MB window on chromosome 19
selected_data <- acy1[
  chrom == "3" & pos >= start_pos & pos <= end_pos
]

output_file <- paste0("Selected_SNPs_chr3_", start_pos, "_", end_pos, ".csv")
fwrite(selected_data, output_file)
cat("Filtered SNPs saved to:", output_file, "\n")

acy1 <- fread("/Users/charleenadams/metsim_nacy1_bmi/Selected_SNPs_chr3_50983340_52983340.csv")

# Select instruments
agnostic_instruments <- acy1[acy1$pval < 5e-8, ]
exp_dat <- data.frame(
  SNP = agnostic_instruments$rsid,
  beta.exposure = agnostic_instruments$beta,
  se.exposure = agnostic_instruments$sebeta,
  eaf.exposure = agnostic_instruments$maf,
  effect_allele.exposure = agnostic_instruments$alt,
  other_allele.exposure = agnostic_instruments$ref,
  pval.exposure = agnostic_instruments$pval,
  samplesize.exposure = 6136,
  chr.exposure = agnostic_instruments$chrom,
  pos.exposure = agnostic_instruments$pos
)
exp_dat$exposure <- "ACY1"
exp_dat$id.exposure <- "ACY1"

# LD clumping
exp_dat_clumped <- clump_data(
  exp_dat,
  clump_kb = 500,
  clump_r2 = 0.01, # 0.001 is too conservative
  clump_p1 = 5e-8
)

# Load BMI data and prepare outcome dataset
bmi <- fread("/Users/charleenadams/acy1_bmi/GWAS_sumstats_EUR__invnorm_bmi__TOTALsample.tsv")
out_dat <- data.frame(
  SNP = bmi$SNP,
  beta.outcome = bmi$BETA,
  se.outcome = bmi$SE,
  eaf.outcome = bmi$A1FREQ,
  effect_allele.outcome = bmi$ALLELE1,
  other_allele.outcome = bmi$ALLELE0,
  pval.outcome = bmi$P,
  samplesize.outcome = 460000,
  chr.outcome = bmi$CHR,
  pos.outcome = bmi$BP
)
out_dat$outcome <- "BMI"
out_dat$id.outcome <- "BMI"

# Harmonize data
harmonized_data <- harmonise_data(
  exposure_dat = exp_dat_clumped,
  outcome_dat = out_dat
)

# Perform Steiger test
dat.steiger <- steiger_filtering(harmonized_data)
write.table(
  dat.steiger,
  file = "/Users/charleenadams/metsim_nacy1_bmi/harmonized_steiger_data.txt",
  quote = FALSE,
  row.names = FALSE,
  col.names = TRUE, sep = "\t")

2 Two-instrument IVW Analysis

2.1 Data and Heterogeneity Test

dat.steiger <- fread("/Users/charleenadams/metsim_nacy1_bmi/harmonized_steiger_data.txt")
dat.steiger_rounded <- dat.steiger
dat.steiger_rounded <- as.data.table(dat.steiger_rounded)

# Identify numeric columns
num_cols <- sapply(dat.steiger_rounded, is.numeric)

# Round numeric columns to 3 decimal places
dat.steiger_rounded[, (names(dat.steiger_rounded)[num_cols]) := lapply(.SD, round, 3), .SDcols = num_cols]

# Display harmonized data
datatable(
  dat.steiger_rounded,
  options = list(pageLength = 10, scrollX = TRUE),
  rownames = FALSE,
  caption = "Harmonized Data with Steiger Information"
)
# Test for heterogeneity
het <- mr_heterogeneity(dat.steiger)
write.csv(het, "het.csv")
het_rounded <- het
het_rounded[, sapply(het_rounded, is.numeric)] <- lapply(
  het_rounded[, sapply(het_rounded, is.numeric)],
  round, 3
)

datatable(
  het_rounded,
  options = list(pageLength = 10, scrollX = TRUE),
  rownames = FALSE,
  caption = "Heterogeneity Test"
)
# # Test for pleiotropy
# plt <- mr_pleiotropy_test(dat.steiger)
# plt_rounded <- plt
# plt_rounded[, sapply(plt_rounded, is.numeric)] <- lapply(
#   plt_rounded[, sapply(plt_rounded, is.numeric)],
#   round, 3
# )

2.2 Results

# MR analysis
res <- mr(dat.steiger)
res_rounded <- res
res_rounded[, sapply(res_rounded, is.numeric)] <- lapply(
  res_rounded[, sapply(res_rounded, is.numeric)],
  round, 3
)
#res_rounded <- res_rounded[res_rounded$method != "Simple mode", ]
res_rounded <- res_rounded[order(res_rounded$method), ]

datatable(
  res_rounded,
  options = list(pageLength = 10, scrollX = TRUE),
  rownames = FALSE,
  caption = "Two-SNP IVW Results"
)
write.table(
  res,
  file = "/Users/charleenadams/metsim_nacy1_bmi/res.txt",
  quote = FALSE,
  row.names = FALSE,
  col.names = TRUE, sep = "\t")

# Generate forest plot
ggplot(res_rounded, aes(x = method, y = b, ymin = b - 1.96 * se, ymax = b + 1.96 * se)) +
  geom_pointrange() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
  coord_flip() +
  labs(
    title = "N-acetylalanine (ACY1 Region) on BMI",
    x = "Method",
    y = "Effect Size (Beta)"
  ) +
  theme_minimal()

ggsave("forest_plot_acy1_bmi.png", width = 8, height = 6, dpi = 300)