Contents

1 Helpful bash

# Helpful commands

# Getting number of lines
wc -l nucs_total_maf_beta_se.txt

# Listing by file type
ls *.Rmd | head -n 5
ls *.csv | head -n 5
ls *_mr.csv | head -n 5

# Removing directories with protected files 
rm -f -R /n/home04/cdadams/MR-eqtlGen-long #rm a directory with a protected file

# Viewing hidden files
ls -dl .[^.]* ~

# Removing git to re-initialize
rm -rf .git
rm README.md 

# Moving
mv *_mr.csv /n/home04/cdadams/MR-eqtlGen-bc
mv *_clump3.csv /n/home04/cdadams/MR-eqtlGen-bc

2 Format eqtlGen GWAS

\[ \beta = \frac{z} {\sqrt2p(1-p)(n + z^2)} \] \[ SE = \frac{1} {\sqrt2p(1-p)(n + z^2)} \] where \[p = MAF\]

# README files for the eQTLGen GWAS
# This README accompanies the files with cis-eQTL results from eQTLGen
 
# Files
# -----
# File with full cis-eQTL results: 2019-12-11-cis-eQTLsFDR-ProbeLevel-CohortInfoRemoved-BonferroniAdded.txt.gz
# File with significant (FDR<0.05) cis-eQTL results: 2019-12-11-cis-eQTLsFDR0.05-ProbeLevel-CohortInfoRemoved-BonferroniAdded.txt.gz
 
# Column Names
# ------------
# Pvalue - P-value
# SNP - SNP rs ID
# SNPChr - SNP chromosome
# SNPPos - SNP position
# AssessedAllele - Assessed allele, the Z-score refers to this allele
# OtherAllele - Not assessed allele
# Zscore - Z-score
# Gene - ENSG name (Ensembl v71) of the eQTL gene
# GeneSymbol - HGNC name of the gene
# GeneChr - Gene chromosome
# GenePos - Centre of gene position
# NrCohorts - Total number of cohorts where this SNP-gene combination was tested
# NrSamples - Total number of samples where this SNP-gene combination was tested
# FDR - False discovery rate estimated based on permutations
# BonferroniP - P-value after Bonferroni correction
# 
# Additional information
# ----------------------
# These files contain all cis-eQTL results from eQTLGen, accompanying the article.
# 19,250 genes that showed expression in blood were tested.
# Every SNP-gene combination with a distance <1Mb from the center of the gene and  tested in at least 2 cohorts was included.
# Associations where SNP/proxy positioned in Illumina probe were not removed from combined analysis.
# UPDATE LOG
# ----------
# 2018-10-19: Initial data release
# 2018-12-19: In the current README, following file names have been fixed and updated:
# 2019-12-20: Cis-eQTLs are now updated to have a 2-cohort filter: every cis-eQTL must be tested in at least 2 cohorts to be reported

# On cluster, I gzipped the eqtlGen file: gzip 2019-12-11-cis-eQTLsFDR-ProbeLevel-CohortInfoRemoved-BonferroniAdded.txt
# this is because I kept getting error messages both here & on the cluster saying ther was no room on the disk.
# I moved unnecessary files to a folder in my personal computer DB: /Users/charleenadams/Dropbox/Harvard/longevity/survival/odyssey_transfer/
# This has the gz files for the protein GWAS, the eQTLGen SMR file, and the eQTLGen EAF file.
# I launched an interacted terminal session with increased memory: 
# salloc -p test  --mem 5000 -t 0-06:00

#-----------------------------------------------------------------------------------------------------------------------
# README file for the MAF eQTLGen cis-eQTL data 

# A README accompanies the file with allele frequencies from eQTLGen cis-eQTL analysis: 
# 2018-07-18_SNP_AF_for_AlleleB_combined_allele_counts_and_MAF_pos_added.txt.gz

# Column Names
# ------------
# SNP -         SNP rs ID
# hg19_chr -    chr number
# hg19_pos -    SNP position (hg19)
# AlleleA -     Other allele
# AlleleB -     Assessed allele
# allA_total -  Total allele count of genotype AA
# allAB_total - Total allele count of genotype AB
# allB_total -  Total allele count of genotype BB
# AlleleB_all - Allele frequency of assessed allele

# Additional information
# ----------------------
# The allele frequencies were calculated using reported allele counts from all cohorts except 
# Framingham Heart Study, because the related samples are present in this cohort.

#-----------------------------------------------------------------------------------------------------------------------
setwd('/n/home04/cdadams/MR-eqtlGen-long')
combo4=read.csv('/n/home04/cdadams/MR-eqtlGen-long/combo4.csv')

tbl=fread('cat 2019-12-11-cis-eQTLsFDR-ProbeLevel-CohortInfoRemoved-BonferroniAdded.txt.gz | gunzip')

# Extract NUCs
nucs=tbl[which(tbl$GeneSymbol %in% combo4$gene),]
head(tbl$Gene)

# Read-in MAFs
eqtl_MAF=read.table(file = "2018-07-18_SNP_AF_for_AlleleB_combined_allele_counts_and_MAF_pos_added.txt", 
                    sep = "\t", header=TRUE)
nucs_total <- merge(nucs,eqtl_MAF,by="SNP")

# Calculate betas and SEs
n=30596
nucs_total$Beta=nucs_total$Zscore/sqrt(2*(nucs_total$AlleleB_all)*
                  (1-nucs_total$AlleleB_all)*(n+nucs_total$Zscore^2))
nucs_total$SE=1/sqrt(2*(nucs_total$AlleleB_all)*
                  (1-nucs_total$AlleleB_all)*(n+nucs_total$Zscore^2))

write.table(nucs_total, file = "/n/home04/cdadams/MR-eqtlGen-long/nucs_total_maf_beta_se.txt", sep = "\t",
            row.names = FALSE, col.names = TRUE, quote=FALSE)

# gznucs <- gzfile("/n/home04/cdadams/MR-eqtlGen-long/nucs.gz", "w")
# write.csv(nucs, gznucs)
# close(gznucs)

# Read-in for MR formatting
exposure_nuc_eqtlGen <- read_exposure_data(
  filename = "/n/home04/cdadams/MR-eqtlGen-long/nucs_total_maf_beta_se.txt",
  sep = '\t',
  snp_col = 'SNP',
  beta_col = 'Beta',
  se_col = 'SE',
  effect_allele_col = 'AssessedAllele',
  #phenotype_col = 'tissue',
  #units_col = 'tissue',
  other_allele_col = 'OtherAllele',
  eaf_col = 'AlleleB_all',
  #samplesize_col = 'samplesize',
  #ncase_col = 'ncase',
  #ncontrol_col = 'ncontrol',
  gene_col = 'GeneSymbol',
  pval_col = 'Pvalue', 
  id_col = 'NUC eqtlGEN'
)

exposure_nuc_eqtlGen_p=exposure_nuc_eqtlGen[which(exposure_nuc_eqtlGen$pval.exposure<0.000005),]
write.csv(exposure_nuc_eqtlGen_p, 'exposure_nuc_eqtlGen_p.csv')

3 Look at the longevity README

# The files for the longevity data are available at: https://datashare.ed.ac.uk/handle/10283/3209
# They come with the following README:

# This dataset contains HRC-imputed GWAS summary statistics on parental survival, under the assumption of common effect sizes between fathers and mothers. 

# rsid - reference SNP cluster ID; 
# snpid - SNP chromosome and position ID; 
# chr - Chromosome; 
# pos - Base-pair position on chromosome (GRCh37); 
# a1 - the effect allele; 
# a0 - the reference allele; 
# n - Number of phenotypes analysed for the SNP. Note that combined parental residuals from� UK Biobank are counted once, and LifeGen meta-analysed father and mother statistics are counted once each, and as a result, the total number of phenotypes analysed is actually greater than the number listed. 

# freq1 - frequency of the effect allele; 
# beta1 - log hazard protection ratio (negation of log hazard ratio) for carrying one copy of the a1 allele in self; 
# se - Standard Error; 
# p - the P value for the Wald test of association between imputed dosage and cox model residual; 
# direction - direction of effect in UK Biobank and LifeGen; 
# info - imputation info score; 
# freq_se - standard error of effect allele frequency between UK Biobank and LifeGen; 
# min_freq1 - minimum effect allele frequency across UK Biobank and LifeGen; 
# max_freq1 - maximum effect allele frequency across UK Biobank and LifeGen.

4 Harmonize the SNPs

exposure_nuc_eqtlGen_p=read.csv('exposure_nuc_eqtlGen_p.csv')

long_outcome <- read_outcome_data(
  snps = exposure_nuc_eqtlGen_p$SNP,
  filename = "/n/home04/cdadams/MR-eqtlGen-long/long_for_mr.txt",
  sep = '\t',
  snp_col = 'rsid',
  beta_col = 'beta1',
  se_col = 'se',
  effect_allele_col = 'a1',
  #phenotype_col = 'tissue',#tissue
  #units_col = 'tissue',
  other_allele_col = 'a0',
  eaf_col = 'freq1',
  #samplesize_col = 'samplesize',
  #ncase_col = 'ncase',
  #ncontrol_col = 'ncontrol',
  gene_col = 'gene_name',
  pval_col = 'p', 
  id_col = 'longevity'
)

#Harmonize the alleles btw the two gwas
nuc_eqtlGen_long <- harmonise_data(exposure_nuc_eqtlGen_p, long_outcome , action = 2)
nuc_eqtlGen_long$samplesize.outcome="1012240"
nuc_eqtlGen_long$samplesize.exposure="30596"
nuc_eqtlGen_long$id.exposure=nuc_eqtlGen_long$gene.exposure

write.csv(nuc_eqtlGen_long, 'nuc_eqtlGen_long.csv') #can simply read this in for the MR

5 Split, loop, MR, & FDR correct results

setwd("/n/home04/cdadams/MR-eqtlGen-long")
nuc_eqtlGen_long=read.csv('/n/home04/cdadams/MR-eqtlGen-long/nuc_eqtlGen_long.csv')
freq_gene=table(nuc_eqtlGen_long$gene.exposure)
freq_gene=as.data.frame(freq_gene)
freq_gene

# Command to remove everything except "v"
#rm(list=(ls()[ls()!="v"]))

# Split the harmonized data into separate data frames for each genes
nuc_split <- split(nuc_eqtlGen_long, nuc_eqtlGen_long$id.exposure)
list2env(nuc_split, envir= .GlobalEnv) #split the list into separate datasets

# Remove the original nuc_eqtlGen_long and nuc_split
rm(list= ls()[(ls() %in% c("nuc_eqtlGen_long","nuc_split") )])

# Save each harmonized gene as a .csv
# for (i in 1:length(files)){
#   write.csv(files[[i]], paste(names(files[i]), ".csv", sep = ""))
# }

# Get them back together: not used here but useful
# dfs = sapply(.GlobalEnv, is.data.frame) 
# dfs
# 
# df=do.call(rbind, mget(names(dfs)[dfs]))
# head(df)
# dim(df)

# Combine the separated files into a list
files <- mget(ls())
length(files)

# Use a loop to clump each NUC dataframe in the list, run MR, & save results
for (i in 1:length(files)){
   files[[i]]=clump_data(files[[i]])
   write.csv(files[[i]], paste(names(files[i]), "_clump_May.csv", sep = ""))
   files[[i]]=mr(files[[i]])
   write.csv(files[[i]], paste(names(files[i]), "_mr_May.csv", sep = ""))
}

# Idea for reading all mr results in and saving:
# Idea from: https://statisticsglobe.com/merge-csv-files-in-r
# Sadly, didn't work. 
# data_all <- list.files(path = "/n/home04/cdadams/MR-eqtlGen-long",  
#   pattern = "*_mr.csv", full.names = TRUE) %>% 
#   lapply(read_csv) %>%                                                    
#   bind_rows                                                                
#   write.csv(data_all, "complete_NUC_MR_res.csv")

# Merged all of the clumped data 
tbl_fread_clump <- 
    list.files(pattern = "*_clump_May.csv") %>% 
    map_df(~fread(.))
tbl_fread_clump_df=as.data.frame(tbl_fread_clump)
head(tbl_fread_clump_df)
colnames(tbl_fread_clump_df)
df <- tbl_fread_clump_df[c(3:37)]
head(df)
write.csv(df, 'df_May.csv')

6 Table of data

# Merge all the MR results and save 
tbl_fread <- 
    list.files(pattern = "*_mr_May.csv") %>% 
    map_df(~fread(.))

tbl_fread_df=as.data.frame(tbl_fread)
myvars=c('id.exposure','id.outcome','method', 'nsnp','b','se','pval')
res=tbl_fread_df[myvars]  
res$id.outcome="longevity"
res=res[order(res$p),]

# FDR correction
p=res$p
fdr=p.adjust(p, method = "fdr", n = length(p))
res$fdr=fdr
res=res[order(res$fdr),]
head(res)
write.csv(res,'MR_eqtlGEN_NUC_long_May.csv')

# Select just the set with >2 SNPs
res=res[order(res$fdr),]
res_meta_set=res[which(res$nsnp>2),]
res_meta_set=res_meta_set[which(res_meta_set$method=='Inverse variance weighted'),]

# FDR for res_meta_set (IVWs)
p2=res_meta_set$p
fdr2=p.adjust(p2, method = "fdr", n = length(p2))
res_meta_set$fdr2=fdr2
res_meta_set$fdr=NULL

res_meta_set=res_meta_set[order(res_meta_set$fdr2),]
head(res_meta_set)
write.csv(res_meta_set,'MR_eqtlGEN_NUC_long_meta_set_May.csv')

7 Results

7.1 Top results ordered by p-value (not FDR)

This includes a list of all tests run - includes 1-SNP tests (Wald ratios) - inverse variance weighted (IVW) - main meta-analytic test - sensitivity estimators: weighted median, weighted mode, and MR-Egger

res=read.csv("/n/home04/cdadams/MR-eqtlGen-long/MR_eqtlGEN_NUC_long_May.csv")
head(res, n=5)
##     X id.exposure id.outcome                    method nsnp           b
## 1 133      EXOSC6  longevity           Weighted median    6  0.04521698
## 2 725      SRFBP1  longevity                Wald ratio    1  0.26379902
## 3 468     NSUN5P1  longevity                Wald ratio    1 -0.10806606
## 4 704       RRP1B  longevity Inverse variance weighted    2  0.06181689
## 5 100      EIF2S1  longevity Inverse variance weighted    2  0.09804917
##           se         pval        fdr
## 1 0.01061975 2.064308e-05 0.01680347
## 2 0.06759308 9.510509e-05 0.03870777
## 3 0.03037154 3.734969e-04 0.10134216
## 4 0.01950860 1.531223e-03 0.20573515
## 5 0.03120291 1.676214e-03 0.20573515

7.2 Top IVW results by FDR

res_meta_set=read.csv('/n/home04/cdadams/MR-eqtlGen-long/MR_eqtlGEN_NUC_long_May.csv')
res_meta_set=res_meta_set[which(res_meta_set$method=="Inverse variance weighted"),]

head(res_meta_set, n=5)
##      X id.exposure id.outcome                    method nsnp           b
## 4  704       RRP1B  longevity Inverse variance weighted    2  0.06181689
## 5  100      EIF2S1  longevity Inverse variance weighted    2  0.09804917
## 8  431        NOL8  longevity Inverse variance weighted    2 -0.05681910
## 12 454       NOP58  longevity Inverse variance weighted    2 -0.04543306
## 14 581      RPL13A  longevity Inverse variance weighted    4 -0.03602445
##            se        pval       fdr
## 4  0.01950860 0.001531223 0.2057351
## 5  0.03120291 0.001676214 0.2057351
## 8  0.01860941 0.002263799 0.2175967
## 12 0.01571007 0.003828331 0.2498247
## 14 0.01268964 0.004527114 0.2498247

7.3 Top IVW results: Just RPs

# Subset by just the RPs
rp_list_expanded=read.csv('/n/home04/cdadams/ld/ldsc/expanded_verified.csv')

rp_metas=res_meta_set[which(res_meta_set$id.exposure %in% rp_list_expanded$all_RP),]
dim(rp_metas)
## [1] 65  9
# FDR correction for RP set
rp_metas_p=rp_metas$p
rp_metas_fdr=p.adjust(rp_metas_p, method = "fdr", n = length(rp_metas_p))
rp_metas$rp_metas_fdr=rp_metas_fdr
rp_metas=rp_metas[order(rp_metas$rp_metas_fdr),]
rp_metas
##       X id.exposure id.outcome                    method nsnp             b
## 14  581      RPL13A  longevity Inverse variance weighted    4 -0.0360244501
## 24  388       MRPS7  longevity Inverse variance weighted    4 -0.0306515258
## 26  617       RPL31  longevity Inverse variance weighted    2  0.0389393578
## 87  679        RPS8  longevity Inverse variance weighted    3 -0.0247515701
## 89  308       MRPL4  longevity Inverse variance weighted    2  0.0344329766
## 97  288      MRPL34  longevity Inverse variance weighted    4  0.0196048469
## 100 304      MRPL38  longevity Inverse variance weighted    5 -0.0373179598
## 126 607       RPL28  longevity Inverse variance weighted    4 -0.0361798531
## 137 673       RPS3A  longevity Inverse variance weighted    2  0.0421098466
## 139 393       MRPS9  longevity Inverse variance weighted    4 -0.0182263949
## 148 340      MRPL53  longevity Inverse variance weighted    3 -0.0260347974
## 152 313      MRPL42  longevity Inverse variance weighted    3 -0.0155597587
## 161 293      MRPL35  longevity Inverse variance weighted    3 -0.0174653472
## 169 647       RPS13  longevity Inverse variance weighted    2 -0.0730079144
## 175 327      MRPL48  longevity Inverse variance weighted    5  0.0198541353
## 186 658       RPS23  longevity Inverse variance weighted    6 -0.0108013737
## 225 356      MRPS17  longevity Inverse variance weighted    2 -0.0504272760
## 226 265      MRPL17  longevity Inverse variance weighted    2  0.0341054097
## 243 602      RPL27A  longevity Inverse variance weighted    3  0.0288417638
## 248 345      MRPL54  longevity Inverse variance weighted    4  0.0163325792
## 276 379      MRPS35  longevity Inverse variance weighted    2  0.0229113805
## 292 262      MRPL14  longevity Inverse variance weighted    2 -0.0137860963
## 295 665       RPS26  longevity Inverse variance weighted    5 -0.0587782286
## 300 632        RPL9  longevity Inverse variance weighted    4 -0.0162304457
## 315 383       MRPS6  longevity Inverse variance weighted    4  0.0272259765
## 320 652       RPS18  longevity Inverse variance weighted    2 -0.0193754597
## 325 378      MRPS34  longevity Inverse variance weighted    2 -0.0255084174
## 347 282      MRPL27  longevity Inverse variance weighted    2 -0.0418791658
## 368 565      RPL10A  longevity Inverse variance weighted    4 -0.0176219089
## 389 671       RPS29  longevity Inverse variance weighted    2 -0.0161249175
## 399 648       RPS14  longevity Inverse variance weighted    2  0.0235710566
## 412 268      MRPL18  longevity Inverse variance weighted    4 -0.0130982568
## 418 684        RPS9  longevity Inverse variance weighted    3  0.0085972462
## 423 375      MRPS33  longevity Inverse variance weighted    3  0.0246100497
## 424 318      MRPL43  longevity Inverse variance weighted    4 -0.0098019290
## 427 271      MRPL19  longevity Inverse variance weighted    2 -0.0102308036
## 474 276      MRPL21  longevity Inverse variance weighted    5  0.0041289643
## 486 335      MRPL52  longevity Inverse variance weighted    4  0.0084905998
## 494 279      MRPL22  longevity Inverse variance weighted    2 -0.0139102881
## 520 259      MRPL10  longevity Inverse variance weighted    2  0.0082443980
## 539 367      MRPS23  longevity Inverse variance weighted    2 -0.0470261814
## 541 299      MRPL37  longevity Inverse variance weighted    3 -0.0092595557
## 542 619       RPL36  longevity Inverse variance weighted    2  0.0158896195
## 566 272       MRPL2  longevity Inverse variance weighted    2 -0.0129157376
## 590 332      MRPL51  longevity Inverse variance weighted    2  0.0083828802
## 621 612       RPL29  longevity Inverse variance weighted    3  0.0473555202
## 632 586       RPL14  longevity Inverse variance weighted    4 -0.0045271219
## 650 592       RPL17  longevity Inverse variance weighted    4 -0.0028918893
## 662 618       RPL32  longevity Inverse variance weighted    2  0.0198522608
## 663 650      RPS15A  longevity Inverse variance weighted    2 -0.0022626055
## 672 627        RPL8  longevity Inverse variance weighted    5  0.0027697094
## 676 571       RPL12  longevity Inverse variance weighted    4 -0.0026335445
## 678 296      MRPL36  longevity Inverse variance weighted    2  0.0028899729
## 694 661       RPS24  longevity Inverse variance weighted    2 -0.0115598550
## 714 363      MRPS21  longevity Inverse variance weighted    3 -0.0022057945
## 718 273      MRPL20  longevity Inverse variance weighted    2  0.0098581675
## 730 655       RPS20  longevity Inverse variance weighted    2  0.0057325634
## 753 369      MRPS25  longevity Inverse variance weighted    2  0.0008505426
## 761 284      MRPL32  longevity Inverse variance weighted    2 -0.0011937342
## 770 358     MRPS18B  longevity Inverse variance weighted    2 -0.0034524043
## 772 576       RPL13  longevity Inverse variance weighted    4 -0.0006514211
## 785 283      MRPL28  longevity Inverse variance weighted    2  0.0012932128
## 791 674        RPS5  longevity Inverse variance weighted    2 -0.0004154042
## 793 644       RPS10  longevity Inverse variance weighted    2 -0.0019855090
## 808 354      MRPS15  longevity Inverse variance weighted    2  0.0001835885
##              se        pval       fdr rp_metas_fdr
## 14  0.012689637 0.004527114 0.2498247    0.2684181
## 24  0.011831101 0.009576466 0.3188581    0.2684181
## 26  0.015570264 0.012388526 0.3844115    0.2684181
## 87  0.014582681 0.089634960 0.8049054    0.8197270
## 89  0.020500425 0.093030772 0.8049054    0.8197270
## 97  0.011855563 0.098200458 0.8049054    0.8197270
## 100 0.022636363 0.099232759 0.8049054    0.8197270
## 126 0.024207335 0.135023501 0.8604251    0.8197270
## 137 0.029339791 0.151216725 0.8604251    0.8197270
## 139 0.012731716 0.152265731 0.8604251    0.8197270
## 148 0.018485807 0.159022474 0.8604251    0.8197270
## 152 0.011210353 0.165142778 0.8604251    0.8197270
## 161 0.012852640 0.174180891 0.8604251    0.8197270
## 169 0.054978185 0.184196829 0.8604251    0.8197270
## 175 0.015139255 0.189711151 0.8604251    0.8197270
## 186 0.008461713 0.201778949 0.8604251    0.8197270
## 225 0.043455422 0.245870958 0.8604251    0.8542722
## 226 0.029485312 0.247398424 0.8604251    0.8542722
## 243 0.025689715 0.261566240 0.8604251    0.8542722
## 248 0.014764680 0.268643192 0.8604251    0.8542722
## 276 0.021897594 0.295424067 0.8674751    0.8542722
## 292 0.013663719 0.312995569 0.8725287    0.8542722
## 295 0.059207727 0.320833799 0.8805519    0.8542722
## 300 0.016579066 0.327593694 0.8805519    0.8542722
## 315 0.028602566 0.341161989 0.8805519    0.8542722
## 320 0.020567161 0.346162929 0.8805519    0.8542722
## 325 0.027570114 0.354851542 0.8875535    0.8542722
## 347 0.048090857 0.383844654 0.8911195    0.8864025
## 368 0.021413149 0.410537555 0.9047959    0.8864025
## 389 0.020687433 0.435711906 0.9047959    0.8864025
## 399 0.030850985 0.444849776 0.9054230    0.8864025
## 412 0.017867548 0.463512538 0.9128071    0.8864025
## 418 0.011908616 0.470334452 0.9128071    0.8864025
## 423 0.034739605 0.478687722 0.9211627    0.8864025
## 424 0.013956613 0.482483178 0.9262767    0.8864025
## 427 0.014852449 0.490930638 0.9309395    0.8864025
## 474 0.006810805 0.544357136 0.9309395    0.9453862
## 486 0.014499640 0.558162403 0.9309395    0.9453862
## 494 0.024313021 0.567231699 0.9309395    0.9453862
## 520 0.015982194 0.605959911 0.9461506    0.9557763
## 539 0.096924984 0.627547807 0.9471157    0.9557763
## 541 0.019340725 0.632109730 0.9481647    0.9557763
## 542 0.033206015 0.632282774 0.9481647    0.9557763
## 566 0.029832784 0.665060014 0.9488405    0.9824750
## 590 0.021443399 0.695848559 0.9582014    0.9920145
## 621 0.141953762 0.738682772 0.9678343    0.9920145
## 632 0.014496652 0.754822110 0.9721918    0.9920145
## 650 0.010568799 0.784372689 0.9806869    0.9920145
## 662 0.080787368 0.805887598 0.9901535    0.9920145
## 663 0.009256295 0.806890339 0.9901535    0.9920145
## 672 0.012713394 0.827540075 0.9997700    0.9920145
## 676 0.012566664 0.834006480 0.9997700    0.9920145
## 678 0.013897833 0.835272588 0.9997700    0.9920145
## 694 0.063273511 0.855036087 0.9997700    0.9920145
## 714 0.015283002 0.885239952 0.9997700    0.9920145
## 718 0.071241711 0.889942992 0.9997700    0.9920145
## 730 0.054288844 0.915904673 0.9997700    0.9920145
## 753 0.013608422 0.950163713 0.9997700    0.9920145
## 761 0.020901594 0.954455886 0.9997700    0.9920145
## 770 0.073938355 0.962757902 0.9997700    0.9920145
## 772 0.014395854 0.963907557 0.9997700    0.9920145
## 785 0.037918063 0.972793059 0.9997700    0.9920145
## 791 0.013924843 0.976201135 0.9997700    0.9920145
## 793 0.068136249 0.976752715 0.9997700    0.9920145
## 808 0.025995532 0.994365138 1.0000000    0.9943651

8 Wald ratios (all unclumped, single SNPs)

setwd('/n/home04/cdadams/MR-eqtlGen-long')
nuc_eqtlGen_long=read.csv('/n/home04/cdadams/MR-eqtlGen-long/nuc_eqtlGen_long.csv')
nuc_eqtlGen_long$id.outcome

# Split the harmonized data into separate data frames for each genes
nuc_split <- split(nuc_eqtlGen_long, nuc_eqtlGen_long$id.exposure)
list2env(nuc_split, envir= .GlobalEnv) #split the list into separate datasets

# Remove the original nuc_eqtlGen_long and nuc_split
rm(list= ls()[(ls() %in% c("nuc_eqtlGen_long","nuc_split") )])

# Combine the separated files into a list
files <- mget(ls())
length(files)

# Use a loop to run MR & save results
for (i in 1:length(files)){
   files[[i]]=mr_singlesnp(files[[i]], parameters = default_parameters(), single_method = "mr_wald_ratio")
   write.csv(files[[i]], paste(names(files[i]), "_Walds_mr_May.csv", sep = ""))
}

# Merge all the MR results and save 
Walds_res_tbl_fread <- 
    list.files(pattern = "*_Walds_mr_May.csv") %>% 
    map_df(~fread(.))
Walds_res_tbl_fread_df=as.data.frame(Walds_res_tbl_fread)
myvars=c('id.exposure','id.outcome', 'SNP','b','se','p')
Walds_res=Walds_res_tbl_fread_df[myvars]  
Walds_res$id.outcome="longevity"
Walds_res=Walds_res[order(Walds_res$p),]
write.csv(Walds_res,"Walds_includes_metas_res_May.csv")

# Tried  selecting "rs", but the "inverse variance weighted" includes "rs"
# exp1 <- Walds_res[Walds_res$SNP %like% "rs", ]  
Walds_res2=Walds_res %>% 
  filter(str_detect(Walds_res$SNP, "^rs"))

# FDR correction
p=Walds_res2$p
fdr=p.adjust(p, method = "fdr", n = length(p))
Walds_res2$fdr=fdr
Walds_res2=Walds_res2[order(Walds_res2$fdr),]
write.csv(Walds_res2,'Walds_MR_eqtlGEN_NUC_long_May.csv')

# Bonferroni correction 

p=Walds_res2$p
bonferroni=p.adjust(p, method = "bonferroni", n = length(p))
Walds_res2$bonferroni=bonferroni
Walds_res2=Walds_res2[order(Walds_res2$bonferroni),]
write.csv(Walds_res2,'Walds_MR_eqtlGEN_NUC_long_May.csv')

9 Top Wald ratio results

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

10 Rpubs

Link: https://rpubs.com/Charleen_D_Adams/744199