Contents

0.1 Install smr_linux

https://cnsgenomics.com/software/smr/#Download

0.2 Modify and move .bashrc

I created a bin folder in my directory under: /n/holylfs/LABS/lemos_lab/Users/cdadams/ and put in a FAS Help ticket to get help with exporting a path (see note below). After adding the bin folder and exporting it in the .bashrc, I moved smr_Linux into the bin folder and ran it. (FAS Help also told me, though, that lemos_lab isn’t backed up daily. They said that even though I could certainly have small scripts there, our home directories are backed up daily. So, if files are accidentally deleted under lemos_lab, they might not be recoverable. FAS Help pointed out that lemos_lab is for “big data”, but personal directories are good for small files. I’m going to keep things all in lemos_lab for now, since I routinely run out of space in my home directory, and I back a lot up with version tracking on GitHub.)

Nano, a text edit accessible through the command line, can be used to edit scripts.

nano .bashrc
# .bashrc

# Source global definitions
if [ -f /etc/bashrc ]; then
        . /etc/bashrc
fi

#export PATH=$PATH:/n/home04/cdadams/edirect
export PATH=$PATH:$HOME/bin/

# FAS RC ticket #149663
export PATH=/n/holylfs/LABS/lemos_lab/Users/cdadams/bin:${PATH}

0.3 Data manage the lifespan GWAS for SMR

Each of the four lifespan summary statistics GWAS had to be individually formatted to work with SMR

setwd("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/")

## Lifespan
long=fread("/n/home04/cdadams/MR-eqtlGen-long/long_for_mr.txt", header=TRUE, sep="\t")
long=as.data.frame(long)

long$SNP=long$rsid
long$A1=long$a1
long$A2=long$a0
long$freq=long$freq1
long$b=long$beta
long$N="1012240"

myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_lifespan=long[myvars]

write.table(smr_lifespan, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_lifespan.txt", sep="\t", row.names = FALSE, col.names = TRUE)

## Healthspan
hs=fread("/n/home04/cdadams/metab_longevity/healthspan/healthspan_summary.csv")
hs=as.data.frame(hs)

hs$SNP=hs$SNPID
hs$A1=hs$beta
hs$A1=hs$EA
hs$A2=hs$RA
hs$freq=hs$EAF
hs$b=hs$beta

# Convert the -log10(p-value) to a p-value
hs=as_tibble(hs)
hs=rename(hs, neg_log_p = `-log10(p-value)`)
hs$p=10^-(hs$neg_log_p)
hs$N="300447"
hs=as.data.frame(hs)

myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_healthspan=hs[myvars]
smr_healthspan=smr_healthspan[order(smr_healthspan$p),]

write.table(smr_healthspan, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_healthspan.txt", sep="\t", row.names = FALSE, col.names = TRUE)

## Longevity 90
long90=fread("/n/home04/cdadams/metab_longevity/extreme_longevity/NA_gone_longevity_90_rsid.csv")
long90=as.data.frame(long90)

long90$SNP=long90$rsid_rs
long90$A1=long90$EAF
long90$A2=long90$NEA
long90$freq=long90$EAF
long90$b=long90$Beta
long90$se=long90$SE
long90$p=long90$P.value
long90$N="36745"

myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_long90=long90[myvars]

write.table(smr_long90, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_long90.txt", sep="\t", row.names = FALSE, col.names = TRUE)

## Longevity 99
long99=fread("/n/home04/cdadams/metab_longevity/extreme_longevity99/NA_gone_longevity_99_rsid.csv")
long99=as.data.frame(long99)

long99$SNP=long99$rsid_rs
long99$A1=long99$EAF
long99$A2=long99$NEA
long99$freq=long99$EAF
long99$b=long99$Beta
long99$se=long99$SE
long99$p=long99$P.value
long99$N="28967"

myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_long99=long99[myvars]

write.table(smr_long99, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_long99.txt", sep="\t", row.names = FALSE, col.names = TRUE)
#smr_long992=fread("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_long99.txt", sep="\t", header=TRUE)

0.4 Save lifespan snps as a txt file

Save the lifespan snps as a txt file (will need to do this for the three other GWAS too)

life_ld=fread("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_lifespan.txt", sep="\t")
snplist=life_ld$snp
write.table(snplist, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/snplist.txt", sep="\t", row.names=FALSE)

0.5 SED to remove quotes from snplist.txt & smr_lifespan.txt

cd /n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen
sed 's/\"//g' smr_lifespan.txt > smr_lifespan2
sed 's/\"//g' snplist.txt > snplist2.txt

0.6 Attempt to get an LD matrix with MR-BASE

I tried to create a local LD matrix using the European reference genome from 1000 genomes, but kept running into big-file problems. So, I just did it in Plink. Turns out I didn’t need to generate it at all after downloading the European binary files.

# Get a path to plink
devtools::install_github("explodecomputer/genetics.binaRies")
genetics.binaRies::get_plink_binary()
# /n/home04/cdadams/R/ifxrstudio/RELEASE_3_12/genetics.binaRies/bin/plink_Linux

library(genetics.binaRies)
devtools::install_github("explodecomputer/plinkbinr")
library(plinkbinr)
get_plink_exe()

# Generate an LD matrix
myld=ieugwasr::ld_matrix_local(
life_ld$snp,
plink_bin = "/n/home04/cdadams/R/ifxrstudio/RELEASE_3_12/genetics.binaRies/bin/plink_Linux" <yes-really>,
bfile = "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/EUR", 
with_alleles = TRUE)

0.8 Fetch EUR reference genome binaries

So much trouble! But, in the process, I got the binaries for EUR LD reference panel:

# Get the ld reference panels from 1000 genomes
wget http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz
tar -zxvf 1kg.v3.tgz

0.9 Request a session with increased memory

salloc -n 1 -p test -t 0-06:00 --mem=9000

0.10 SMR

smr_Linux smr --bfile EUR --gwas-summary smr_lifespan2 --beqtl-summary cis-eQTLs-full_eQTLGen_AF_incl_nr_formatted_20191212.new.txt_besd-dense --out mysmr --thread-num 10 

0.11 Fetch gene symbols & extract NUC genes from mysmr

The SMR results are for top SNPs & are reported with Ensembl IDs. I fetched the gene symbols, read-in the NUC genes, and extracted the NUC genes from mysmr.

mysmr=fread("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/mysmr.smr")
head(mysmr)
##            probeID ProbeChr            Gene Probe_bp      topSNP topSNP_chr
## 1: ENSG00000237491        1 ENSG00000237491   729801  rs12184325          1
## 2: ENSG00000225880        1 ENSG00000225880   762244 rs114525117          1
## 3: ENSG00000228794        1 ENSG00000228794   778907  rs60320384          1
## 4: ENSG00000188976        1 ENSG00000188976   887136   rs7417106          1
## 5: ENSG00000187961        1 ENSG00000187961   898531  rs10465242          1
## 6: ENSG00000187583        1 ENSG00000187583   906561   rs9697711          1
##    topSNP_bp A1 A2      Freq      b_GWAS    se_GWAS    p_GWAS    b_eQTL
## 1:    754105  T  C 0.0407555  0.00363243 0.01146170 0.7513047  1.144180
## 2:    759036  A  G 0.0417495  0.00135738 0.01145560 0.9056792  0.921721
## 3:    769223  G  C 0.1222660 -0.00501340 0.00616004 0.4157260 -0.189018
## 4:    911595  A  G 0.2127240 -0.00028841 0.00526643 0.9563264 -0.247736
## 5:    886788  G  A 0.1013920  0.00193227 0.00728517 0.7908298 -0.252370
## 6:    900972  T  G 0.0447316  0.00238660 0.00908880 0.7928691 -0.361831
##      se_eQTL       p_eQTL       b_SMR    se_SMR     p_SMR   p_HEIDI nsnp_HEIDI
## 1: 0.0561593 2.856293e-92  0.00317471 0.0100186 0.7513340 0.3489219          6
## 2: 0.0549631 4.058683e-63  0.00147266 0.0124289 0.9056818 0.6420299          5
## 3: 0.0271718 3.490795e-12  0.02652340 0.0328120 0.4188920 0.7737176         19
## 4: 0.0175934 4.956084e-45  0.00116418 0.0212584 0.9563269 0.9038120         20
## 5: 0.0193135 5.083089e-39 -0.00765651 0.0288730 0.7908718 0.6765434         20
## 6: 0.0246616 9.771819e-49 -0.00659591 0.0251230 0.7929014 0.9119470         18
dim(mysmr)
## [1] 15454    21
mysmr$ENSEMBL=mysmr$Gene
uni_smr=unique(mysmr)
dim(uni_smr)
## [1] 15454    22
# long=fread("/n/home04/cdadams/MR-eqtlGen-long/long_for_mr.txt", header=TRUE, sep="\t")
# colnames(long)
# smr_long <- merge(mysmr,long,by="rsid")
# dim(smr_long)
# head(smr_long)
# smr_long$ENSEMBL=smr_long$Gene
# head(smr_long)

require(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT"  "ENSEMBLTRANS"
##  [6] "ENTREZID"     "ENZYME"       "EVIDENCE"     "EVIDENCEALL"  "GENENAME"    
## [11] "GO"           "GOALL"        "IPI"          "MAP"          "OMIM"        
## [16] "ONTOLOGY"     "ONTOLOGYALL"  "PATH"         "PFAM"         "PMID"        
## [21] "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"       "UNIGENE"     
## [26] "UNIPROT"
annot <- select(org.Hs.eg.db,
  keys = keys(org.Hs.eg.db),
  columns = c('ENTREZID','SYMBOL','ENSEMBL'),
  keytype = 'ENTREZID')

fetched=merge(mysmr, annot, by="ENSEMBL")
dim(fetched)
## [1] 13338    24
head(fetched)
##            ENSEMBL         probeID ProbeChr            Gene  Probe_bp
## 1: ENSG00000000419 ENSG00000000419       20 ENSG00000000419  49563248
## 2: ENSG00000000457 ENSG00000000457        1 ENSG00000000457 169842606
## 3: ENSG00000000460 ENSG00000000460        1 ENSG00000000460 169727233
## 4: ENSG00000000971 ENSG00000000971        1 ENSG00000000971 196668821
## 5: ENSG00000001036 ENSG00000001036        6 ENSG00000001036 143824720
## 6: ENSG00000001084 ENSG00000001084        6 ENSG00000001084  53421953
##        topSNP topSNP_chr topSNP_bp A1 A2      Freq      b_GWAS    se_GWAS
## 1:  rs2426214         20  49573810  A  G 0.3479130  0.00621773 0.00396205
## 2: rs10753798          1 169844515  G  T 0.4731610  0.00124718 0.00387656
## 3: rs12142505          1 169773863  C  A 0.0964215  0.00575911 0.00658187
## 4:   rs203683          1 196678432  G  C 0.3966200  0.00154242 0.00389239
## 5:  rs4896659          6 143788753  C  T 0.2385690 -0.00505717 0.00464004
## 6:   rs661603          6  53370107  C  T 0.4115310 -0.00540383 0.00394217
##       p_GWAS     b_eQTL    se_eQTL        p_eQTL       b_SMR    se_SMR
## 1: 0.1165731 -0.0741559 0.00824705  2.432083e-19 -0.08384680 0.0542363
## 2: 0.7476626 -0.1929130 0.00787703 1.860251e-132 -0.00646498 0.0200966
## 3: 0.3815758  0.4038000 0.01353190 1.159037e-195  0.01426230 0.0163068
## 4: 0.6919090 -0.1028820 0.00808786  4.543568e-37 -0.01499210 0.0378517
## 5: 0.2757587 -0.1759580 0.00956877  1.615188e-75  0.02874070 0.0264164
## 6: 0.1704449 -0.1427030 0.00798096  1.676318e-71  0.03786770 0.0277061
##        p_SMR   p_HEIDI nsnp_HEIDI ENTREZID   SYMBOL
## 1: 0.1221159 0.1331619         20     8813     DPM1
## 2: 0.7476833 0.2143223         20    57147    SCYL3
## 3: 0.3817806 0.4451564          6    55732 C1orf112
## 4: 0.6920509 0.2555007         20     3075      CFH
## 5: 0.2766001 0.5134780         20     2519    FUCA2
## 6: 0.1716990 0.3952101         20     2729     GCLC
#table(fetched$SYMBOL)

# Read-in NUC genes
combo4=read.csv('/n/home04/cdadams/MR-eqtlGen-long/combo4.csv')

# Extract NUCs
nucs_fetched=fetched[which(fetched$SYMBOL %in% combo4$gene),]
dim(nucs_fetched)
## [1] 379  24
genes_tab=table(nucs_fetched$SYMBOL)
genes_tab=as.data.frame(genes_tab)
genes_tab
##          Var1 Freq
## 1        ABT1    1
## 2       APEX1    1
## 3        APOD    1
## 4        BAG6    1
## 5        BMS1    1
## 6        BMT2    1
## 7       BUD23    1
## 8        BYSL    1
## 9    C12orf65    1
## 10        C1D    1
## 11      C1QBP    1
## 12       CHD7    1
## 13       CKS2    1
## 14      CNOT6    1
## 15      CUL4A    1
## 16       DAP3    1
## 17     DCAF13    1
## 18      DDX10    1
## 19      DDX17    1
## 20      DDX18    1
## 21      DDX21    1
## 22      DDX27    1
## 23      DDX28    1
## 24      DDX31    1
## 25      DDX47    1
## 26      DDX49    1
## 27      DDX51    1
## 28      DDX54    1
## 29      DDX56    1
## 30       DENR    1
## 31      DHX37    1
## 32      DIMT1    1
## 33       DIS3    1
## 34       DRD4    1
## 35     DROSHA    1
## 36   EBNA1BP2    1
## 37       EFL1    1
## 38      EIF2A    1
## 39    EIF2AK2    1
## 40    EIF2AK4    1
## 41      EIF2D    1
## 42     EIF2S1    1
## 43      EIF3C    1
## 44      EIF3H    1
## 45      EIF3K    1
## 46     EIF4A3    1
## 47      EIF5A    1
## 48     EIF5A2    1
## 49       EIF6    1
## 50       EMG1    1
## 51      ERAL1    1
## 52      ERCC2    1
## 53       ERI1    1
## 54       ERI2    1
## 55       ESF1    1
## 56       ETF1    1
## 57     EXOSC1    1
## 58    EXOSC10    1
## 59     EXOSC2    1
## 60     EXOSC3    1
## 61     EXOSC5    1
## 62     EXOSC6    1
## 63     EXOSC7    1
## 64     EXOSC8    1
## 65     EXOSC9    1
## 66    FAM207A    1
## 67    FASTKD2    1
## 68        FBL    1
## 69      FBLL1    1
## 70       FCF1    1
## 71    FDXACB1    1
## 72       FRG1    1
## 73      FTSJ3    1
## 74       FXR2    1
## 75     GEMIN4    1
## 76     GEMIN5    1
## 77       GFM2    1
## 78       GLUL    1
## 79       GNL2    1
## 80      GTF3A    1
## 81    GTPBP10    1
## 82     GTPBP4    1
## 83     HEATR1    1
## 84     HEATR3    1
## 85       HELB    1
## 86       HELQ    1
## 87      HSPA5    1
## 88    IGHMBP2    1
## 89       IMP3    1
## 90       IMP4    1
## 91      ISG20    1
## 92    ISG20L2    1
## 93      KAT2B    1
## 94       KRI1    1
## 95       KRR1    1
## 96       LSG1    1
## 97       LSM6    1
## 98       LTO1    1
## 99       LTV1    1
## 100      LYAR    1
## 101     MAIP1    1
## 102     MAK16    1
## 103    MALSU1    1
## 104     MCRS1    1
## 105      MDN1    1
## 106   METTL15    1
## 107   METTL16    1
## 108   METTL17    1
## 109    METTL5    1
## 110 MPHOSPH10    1
## 111  MPHOSPH6    1
## 112   MPV17L2    1
## 113      MRM2    1
## 114      MRM3    1
## 115     MRPL1    1
## 116    MRPL10    1
## 117    MRPL12    1
## 118    MRPL13    1
## 119    MRPL14    1
## 120    MRPL15    1
## 121    MRPL16    1
## 122    MRPL17    1
## 123    MRPL18    1
## 124    MRPL19    1
## 125     MRPL2    1
## 126    MRPL20    1
## 127    MRPL21    1
## 128    MRPL22    1
## 129    MRPL23    1
## 130    MRPL24    1
## 131    MRPL27    1
## 132    MRPL28    1
## 133    MRPL32    1
## 134    MRPL33    1
## 135    MRPL34    1
## 136    MRPL35    1
## 137    MRPL36    1
## 138    MRPL37    1
## 139    MRPL38    1
## 140    MRPL39    1
## 141     MRPL4    1
## 142    MRPL40    1
## 143    MRPL41    1
## 144    MRPL42    1
## 145    MRPL43    1
## 146    MRPL44    1
## 147    MRPL47    1
## 148    MRPL48    1
## 149    MRPL49    1
## 150    MRPL50    1
## 151    MRPL51    1
## 152    MRPL52    1
## 153    MRPL53    1
## 154    MRPL54    1
## 155    MRPL55    1
## 156    MRPL58    1
## 157     MRPL9    1
## 158    MRPS10    1
## 159    MRPS11    1
## 160    MRPS12    1
## 161    MRPS14    1
## 162    MRPS15    1
## 163    MRPS16    1
## 164    MRPS17    1
## 165   MRPS18A    1
## 166   MRPS18C    1
## 167    MRPS22    1
## 168    MRPS23    1
## 169    MRPS24    1
## 170    MRPS25    1
## 171    MRPS26    1
## 172    MRPS30    1
## 173    MRPS31    1
## 174    MRPS33    1
## 175    MRPS34    1
## 176    MRPS35    1
## 177    MRPS36    1
## 178     MRPS6    1
## 179     MRPS7    1
## 180     MRPS9    1
## 181     MRTO4    1
## 182    MTERF3    1
## 183    MTERF4    1
## 184      MTG2    1
## 185     MTIF2    1
## 186     MTIF3    1
## 187      MTOR    1
## 188   MYBBP1A    1
## 189      NAF1    1
## 190     NAT10    1
## 191      NCK1    1
## 192    NDUFA7    1
## 193   NDUFAB1    1
## 194      NGDN    1
## 195      NIFK    1
## 196      NIP7    1
## 197      NLE1    1
## 198      NMD3    1
## 199      NOB1    1
## 200     NOC2L    1
## 201     NOC4L    1
## 202     NOL10    1
## 203      NOL6    1
## 204      NOL8    1
## 205      NOL9    1
## 206      NOM1    1
## 207     NOP10    1
## 208     NOP14    1
## 209     NOP16    1
## 210      NOP2    1
## 211     NOP53    1
## 212     NOP56    1
## 213     NOP58    1
## 214      NOP9    1
## 215      NPM3    1
## 216      NSA2    1
## 217     NSUN3    1
## 218     NSUN4    1
## 219   NSUN5P1    1
## 220   NSUN5P2    1
## 221    NUDT16    1
## 222    NUFIP1    1
## 223     NUP88    1
## 224       NVL    1
## 225     OXA1L    1
## 226   PAK1IP1    1
## 227    PDCD11    1
## 228      PELO    1
## 229      PES1    1
## 230    PIH1D1    1
## 231     PINX1    1
## 232     PNPT1    1
## 233    POLR1A    1
## 234    POLR1B    1
## 235      POP4    1
## 236      POP5    1
## 237      POP7    1
## 238      PPAN    1
## 239  PPARGC1A    1
## 240      PTEN    1
## 241      PWP1    1
## 242      PWP2    1
## 243      PYM1    1
## 244       RAN    1
## 245      RBFA    1
## 246      RBIS    1
## 247      RCL1    1
## 248    RICTOR    1
## 249     RIOK1    1
## 250     RIOK2    1
## 251     RIOK3    1
## 252     RIOX2    1
## 253    RNASEL    1
## 254      RPF1    1
## 255      RPF2    1
## 256    RPL10A    1
## 257     RPL11    1
## 258     RPL12    1
## 259     RPL13    1
## 260    RPL13A    1
## 261     RPL14    1
## 262     RPL15    1
## 263     RPL17    1
## 264    RPL18A    1
## 265     RPL21    1
## 266     RPL22    1
## 267    RPL27A    1
## 268     RPL28    1
## 269     RPL29    1
## 270      RPL3    1
## 271     RPL30    1
## 272     RPL31    1
## 273     RPL32    1
## 274     RPL36    1
## 275     RPL37    1
## 276    RPL37A    1
## 277     RPL3L    1
## 278      RPL4    1
## 279    RPL7L1    1
## 280      RPL8    1
## 281      RPL9    1
## 282     RPLP0    1
## 283     RPP25    1
## 284     RPP30    1
## 285     RPP38    1
## 286     RPP40    1
## 287     RPS10    1
## 288   RPS10P5    1
## 289     RPS11    1
## 290     RPS12    1
## 291     RPS13    1
## 292     RPS14    1
## 293     RPS15    1
## 294    RPS15A    1
## 295     RPS16    1
## 296     RPS18    1
## 297     RPS19    1
## 298      RPS2    1
## 299     RPS20    1
## 300     RPS23    1
## 301     RPS24    1
## 302     RPS25    1
## 303     RPS26    1
## 304    RPS27L    1
## 305     RPS28    1
## 306     RPS29    1
## 307      RPS3    1
## 308     RPS3A    1
## 309      RPS5    1
## 310      RPS6    1
## 311      RPS7    1
## 312      RPS8    1
## 313      RPS9    1
## 314      RPSA    1
## 315    RPUSD1    1
## 316    RPUSD2    1
## 317    RPUSD3    1
## 318    RPUSD4    1
## 319    RRNAD1    1
## 320      RRP1    1
## 321     RRP12    1
## 322     RRP15    1
## 323     RRP1B    1
## 324     RRP36    1
## 325     RRP7A    1
## 326    RRP7BP    1
## 327      RRP8    1
## 328    RSL1D1    1
## 329   RSL24D1    1
## 330     SART1    1
## 331      SBDS    1
## 332     SDAD1    1
## 333     SENP3    1
## 334     SERP1    1
## 335      SHFL    1
## 336      SHQ1    1
## 337     SNU13    1
## 338    SRFBP1    1
## 339     SRP19    1
## 340     SURF6    1
## 341      TBL3    1
## 342    TENT4B    1
## 343     TEX10    1
## 344     TFB1M    1
## 345     TFB2M    1
## 346   THUMPD1    1
## 347   TRMT112    1
## 348   TRMT61B    1
## 349      TSC1    1
## 350      TSR3    1
## 351     UBA52    1
## 352      URB1    1
## 353      URB2    1
## 354     USP36    1
## 355     UTP11    1
## 356    UTP14C    1
## 357     UTP15    1
## 358     UTP18    1
## 359     UTP20    1
## 360     UTP23    1
## 361      UTP4    1
## 362      UTP6    1
## 363     WBP11    1
## 364     WDR18    1
## 365      WDR3    1
## 366     WDR36    1
## 367     WDR43    1
## 368     WDR46    1
## 369     WDR55    1
## 370     WDR75    1
## 371       WRN    1
## 372      XPO1    1
## 373     XRCC5    1
## 374      XRN2    1
## 375      YBEY    1
## 376   ZC3H12A    1
## 377   ZNF354A    1
## 378    ZNF622    1
## 379    ZNHIT6    1
p=nucs_fetched$p_SMR
fdr=p.adjust(p, method = "fdr", n = length(p))
nucs_fetched$fdr=fdr
nucs_fetched=nucs_fetched[order(nucs_fetched$fdr),]
head(nucs_fetched)
##            ENSEMBL         probeID ProbeChr            Gene  Probe_bp
## 1: ENSG00000151304 ENSG00000151304        5 ENSG00000151304 121354460
## 2: ENSG00000223705 ENSG00000223705        7 ENSG00000223705  75042835
## 3: ENSG00000087269 ENSG00000087269        4 ENSG00000087269   2952386
## 4: ENSG00000134001 ENSG00000134001       14 ENSG00000134001  67839973
## 5: ENSG00000170473 ENSG00000170473       12 ENSG00000170473  56310799
## 6: ENSG00000188573 ENSG00000188573        5 ENSG00000188573 167956880
##        topSNP topSNP_chr topSNP_bp A1 A2      Freq     b_GWAS    se_GWAS
## 1: rs75965538          5 121254308  G  C 0.0377734 -0.0349702 0.00896040
## 2: rs58513914          7  75147934  A  G 0.0944334 -0.0226012 0.00635199
## 3:  rs1024323          4   3006043  T  C 0.4015900  0.0125863 0.00397296
## 4:  rs1885197         14  67247125  G  A 0.0168986 -0.0458829 0.01441740
## 5:  rs7308505         12  56328699  T  C 0.2017890  0.0164445 0.00485235
## 6: rs11557637          5 167924260  A  C 0.1799200  0.0152578 0.00488020
##          p_GWAS     b_eQTL    se_eQTL        p_eQTL      b_SMR    se_SMR
## 1: 9.510496e-05 -0.1347240 0.01943070  4.102610e-12  0.2595680 0.0763213
## 2: 3.734931e-04  0.3387670 0.01990220  5.679708e-65 -0.0667161 0.0191556
## 3: 1.535046e-03 -0.0997268 0.00816416  2.578702e-34 -0.1262070 0.0411564
## 4: 1.460309e-03 -0.4894320 0.03458350  1.810019e-45  0.0937473 0.0301931
## 5: 7.015272e-04  0.1101880 0.01171870  5.313011e-21  0.1492400 0.0468099
## 6: 1.769219e-03  0.2924120 0.01234140 4.197346e-124  0.0521790 0.0168341
##           p_SMR      p_HEIDI nsnp_HEIDI ENTREZID  SYMBOL       fdr
## 1: 0.0006714036 8.831948e-02         16   153443  SRFBP1 0.1272310
## 2: 0.0004960969 2.010156e-01         16   155400 NSUN5P1 0.1272310
## 3: 0.0021656100 1.038394e-05         20     8602   NOP14 0.1367944
## 4: 0.0019032930           NA         NA     1965  EIF2S1 0.1367944
## 5: 0.0014315090 6.738205e-01          9    84305    PYM1 0.1367944
## 6: 0.0019378500 4.536448e-01         20   345630   FBLL1 0.1367944
nucs_fetched_no_na=nucs_fetched[complete.cases(nucs_fetched),]
dim(nucs_fetched_no_na)
## [1] 378  25
sigs=nucs_fetched[which(nucs_fetched$p_SMR<0.05),]
dim(sigs)
## [1] 43 25
sigs=sigs[order(sigs$p_SMR),]
sigs=sigs[order(sigs$p_HEIDI, decreasing=TRUE),]
head(sigs, n=20)
##             ENSEMBL         probeID ProbeChr            Gene  Probe_bp
##  1: ENSG00000142541 ENSG00000142541       19 ENSG00000142541  49993188
##  2: ENSG00000132591 ENSG00000132591       17 ENSG00000132591  27185020
##  3: ENSG00000175467 ENSG00000175467       11 ENSG00000175467  65738229
##  4: ENSG00000176731 ENSG00000176731        8 ENSG00000176731  86129480
##  5: ENSG00000136522 ENSG00000136522        3 ENSG00000136522 179314257
##  6: ENSG00000071082 ENSG00000071082        2 ENSG00000071082 101629335
##  7: ENSG00000137547 ENSG00000137547        8 ENSG00000137547  55054115
##  8: ENSG00000170473 ENSG00000170473       12 ENSG00000170473  56310799
##  9: ENSG00000164327 ENSG00000164327        5 ENSG00000164327  39006265
## 10: ENSG00000082898 ENSG00000082898        2 ENSG00000082898  61735372
## 11: ENSG00000132676 ENSG00000132676        1 ENSG00000132676 155683277
## 12: ENSG00000136271 ENSG00000136271        7 ENSG00000136271  44609833
## 13: ENSG00000197498 ENSG00000197498        6 ENSG00000197498 111325260
## 14: ENSG00000108651 ENSG00000108651       17 ENSG00000108651  30208353
## 15: ENSG00000135521 ENSG00000135521        6 ENSG00000135521 144174715
## 16: ENSG00000125445 ENSG00000125445       17 ENSG00000125445  73260104
## 17: ENSG00000188573 ENSG00000188573        5 ENSG00000188573 167956880
## 18: ENSG00000204822 ENSG00000204822        2 ENSG00000204822  74699767
## 19: ENSG00000158092 ENSG00000158092        3 ENSG00000158092 136624857
## 20: ENSG00000198000 ENSG00000198000        9 ENSG00000198000  95073779
##         topSNP topSNP_chr topSNP_bp A1 A2      Freq      b_GWAS    se_GWAS
##  1: rs76687611         19  49990465  A  G 0.0447316 -0.02433200 0.00861094
##  2:  rs4795457         17  27087929  T  C 0.1938370  0.00970722 0.00477176
##  3:   rs551659         11  65749645  T  C 0.4294230  0.00896488 0.00390577
##  4:  rs3808538          8  86121311  G  T 0.2723660 -0.00948914 0.00436446
##  5:  rs6788205          3 179330599  A  G 0.0795229  0.01596290 0.00734056
##  6: rs78362750          2 101629987  T  A 0.2882700 -0.00919611 0.00423930
##  7:  rs4506187          8  54962327  C  G 0.1829030  0.01227080 0.00548449
##  8:  rs7308505         12  56328699  T  C 0.2017890  0.01644450 0.00485235
##  9:  rs1051942          5  38935244  A  T 0.1669980 -0.01102300 0.00511768
## 10:  rs2694623          2  61553466  T  C 0.1928430 -0.00944413 0.00456075
## 11:  rs6662953          1 155754448  T  C 0.2972170 -0.01288790 0.00424651
## 12:   rs217374          7  44612347  G  C 0.4085490 -0.00874423 0.00392781
## 13: rs76509375          6 111353398  C  G 0.1948310 -0.01155290 0.00483660
## 14:  rs1034627         17  30219569  A  G 0.1729620  0.01549400 0.00534540
## 15:  rs9484759          6 143858683  G  A 0.1928430 -0.01029570 0.00482726
## 16:  rs7225349         17  73240009  G  A 0.2027830  0.01078780 0.00514495
## 17: rs11557637          5 167924260  A  C 0.1799200  0.01525780 0.00488020
## 18:  rs2301984          2  74721055  G  A 0.1590460  0.01127860 0.00554162
## 19: rs80201571          3 136659779  T  G 0.2375750  0.01143550 0.00473990
## 20: rs58391046          9  95036323  C  T 0.2167000  0.01316830 0.00470360
##           p_GWAS     b_eQTL    se_eQTL        p_eQTL      b_SMR     se_SMR
##  1: 0.0047177200  0.5767300 0.01997800 2.986154e-183 -0.0421895 0.01500200
##  2: 0.0419207900 -0.2173600 0.00975022 4.339723e-110 -0.0446597 0.02204450
##  3: 0.0217163900 -0.1041110 0.00803750  2.253588e-38 -0.0861085 0.03809970
##  4: 0.0296913000 -0.3025300 0.00911868 2.318872e-241  0.0313660 0.01445750
##  5: 0.0296594500  0.1306400 0.01583140  1.557693e-16  0.1221890 0.05810730
##  6: 0.0300634300 -0.4108380 0.01298380 9.617430e-220  0.0223838 0.01034290
##  7: 0.0252630100  0.1705060 0.01159780  6.295466e-49  0.0719667 0.03253630
##  8: 0.0007015272  0.1101880 0.01171870  5.313011e-21  0.1492400 0.04680990
##  9: 0.0312481500  0.1021070 0.01054020  3.411006e-22 -0.1079550 0.05134460
## 10: 0.0383828900 -0.0636818 0.00953664  2.428897e-11  0.1483020 0.07498230
## 11: 0.0024058420 -0.2028900 0.00870452 3.626828e-120  0.0635215 0.02110680
## 12: 0.0259983000  0.2799180 0.00810544 2.427362e-261 -0.0312385 0.01406110
## 13: 0.0169104700 -0.4537850 0.00974094  0.000000e+00  0.0254591 0.01067240
## 14: 0.0037486670 -0.1398660 0.01641540  1.590004e-17 -0.1107780 0.04036900
## 15: 0.0329398900  0.0686851 0.00994543  4.978090e-12 -0.1498960 0.07355620
## 16: 0.0360139900 -0.6990710 0.01101090  0.000000e+00 -0.0154316 0.00736370
## 17: 0.0017692190  0.2924120 0.01234140 4.197346e-124  0.0521790 0.01683410
## 18: 0.0418264100 -0.6397660 0.01112800  0.000000e+00 -0.0176292 0.00866738
## 19: 0.0158390400 -0.1248600 0.00982449  5.270412e-37 -0.0915869 0.03863980
## 20: 0.0051164010 -0.2441800 0.00987775 6.486934e-135 -0.0539285 0.01938600
##           p_SMR   p_HEIDI nsnp_HEIDI ENTREZID SYMBOL       fdr
##  1: 0.004919442 0.9900725         20    23521 RPL13A 0.1707185
##  2: 0.042776220 0.9616853         20    26284  ERAL1 0.3954192
##  3: 0.023816100 0.8474123         20     9092  SART1 0.3223679
##  4: 0.030042430 0.7948770         20   401466   RBIS 0.3535188
##  5: 0.035481590 0.7930796         14    57129 MRPL47 0.3802146
##  6: 0.030451120 0.7917817         20     6160  RPL31 0.3535188
##  7: 0.026974430 0.7006191         20    29088 MRPL15 0.3407770
##  8: 0.001431509 0.6738205          9    84305   PYM1 0.1367944
##  9: 0.035504550 0.6656190         20   253260 RICTOR 0.3802146
## 10: 0.047948380 0.5376992         20     7514   XPO1 0.4226148
## 11: 0.002616500 0.5103245         20     7818   DAP3 0.1416648
## 12: 0.026308710 0.4882582         20    54606  DDX56 0.3407770
## 13: 0.017055120 0.4744518         20    84154   RPF2 0.2776316
## 14: 0.006067170 0.4592981         20    55813   UTP6 0.1768813
## 15: 0.041564880 0.4577675         18    84946   LTV1 0.3954192
## 16: 0.036115370 0.4541091         20    51081  MRPS7 0.3802146
## 17: 0.001937850 0.4536448         20   345630  FBLL1 0.1367944
## 18: 0.041954610 0.4117115         20   116540 MRPL53 0.3954192
## 19: 0.017774960 0.3399252         20     4690   NCK1 0.2776316
## 20: 0.005405335 0.3227455         20    55035   NOL8 0.1707185
sigs_no_na=sigs[complete.cases(sigs),]
sigs_no_na=complete.cases(sigs)
dim(sigs_no_na)
## NULL

1 Look at / verify cis-eQTL top associations in eqtlGen

I read-in the NUC MR done previously for comparison with the non-random gene sets and the data used to run it (nucs_MR_dat) and merged them.

# Read-in the nucs on lifespan MR results
nucs_res <- read_csv("/n/holylfs/LABS/lemos_lab/Users/cdadams/MR-eqtlGen-long/bon_genesets_100/bon_nucs/results/nucs_res.csv")
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   exposure = col_character(),
##   outcome = col_character(),
##   id.exposure = col_character(),
##   id.outcome = col_character(),
##   samplesize = col_double(),
##   SNP = col_character(),
##   b = col_double(),
##   se = col_double(),
##   p = col_double(),
##   fdr = col_double(),
##   bon = col_double()
## )
nucs_res=as.data.frame(nucs_res)
dim(nucs_res)
## [1] 93370    12
# Read-in the nucs on lifespan dat
nucs_MR_dat <- read_csv("/n/holylfs/LABS/lemos_lab/Users/cdadams/MR-eqtlGen-long/bon_genesets_100/bon_nucs/data/nucs_MR_dat.csv")
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   SNP = col_character(),
##   effect_allele.exposure = col_character(),
##   other_allele.exposure = col_character(),
##   effect_allele.outcome = col_character(),
##   other_allele.outcome = col_character(),
##   remove = col_logical(),
##   palindromic = col_logical(),
##   ambiguous = col_logical(),
##   id.outcome = col_character(),
##   outcome = col_character(),
##   mr_keep.outcome = col_logical(),
##   pval_origin.outcome = col_character(),
##   data_source.outcome = col_character(),
##   exposure = col_character(),
##   mr_keep.exposure = col_logical(),
##   pval_origin.exposure = col_character(),
##   id.exposure = col_character(),
##   data_source.exposure = col_character(),
##   mr_keep = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
nucs_MR_dat=as.data.frame(nucs_MR_dat)
dim(nucs_MR_dat)
## [1] 95222    34
nucs_life_dat_res=merge(nucs_res, nucs_MR_dat, by=c("SNP","id.exposure"))
dim(nucs_life_dat_res)
## [1] 93370    44
nucs_life_dat_res$SYMBOL=nucs_life_dat_res$exposure.y
nucs_life_dat_res=nucs_life_dat_res[order(nucs_life_dat_res$fdr),]

NSUN5P1=nucs_life_dat_res[which(nucs_life_dat_res$id.exposure=="NSUN5P1"),]
NSUN5P1=NSUN5P1[order(NSUN5P1$pval.exposure),]
head(NSUN5P1)
##               SNP id.exposure  X1.x exposure.x outcome.x id.outcome.x
## 58952  rs58513914     NSUN5P1 57852    NSUN5P1 longevity       vtcEuL
## 64073  rs62477693     NSUN5P1 57855    NSUN5P1 longevity       vtcEuL
## 10141 rs113761643     NSUN5P1 57835    NSUN5P1 longevity       vtcEuL
## 25964 rs144108627     NSUN5P1 57845    NSUN5P1 longevity       vtcEuL
## 9538  rs113144856     NSUN5P1 57833    NSUN5P1 longevity       vtcEuL
## 64071  rs62476648     NSUN5P1 57853    NSUN5P1 longevity       vtcEuL
##       samplesize          b         se            p        fdr bon  X1.y
## 58952    1012240 -0.1080661 0.03037154 0.0003734969 0.08572288   1 93432
## 64073    1012240 -0.1107199 0.03052340 0.0002863137 0.08572288   1 93436
## 10141    1012240 -0.1062037 0.03024336 0.0004453582 0.08572288   1 93411
## 25964    1012240 -0.1125758 0.03104858 0.0002880733 0.08572288   1 93425
## 9538     1012240 -0.1109647 0.03143906 0.0004163185 0.08572288   1 93409
## 64071    1012240 -0.1145884 0.03236244 0.0003989390 0.08572288   1 93433
##       effect_allele.exposure other_allele.exposure effect_allele.outcome
## 58952                      A                     G                     A
## 64073                      A                     G                     A
## 10141                      C                     A                     C
## 25964                      T                     G                     T
## 9538                       C                     G                     C
## 64071                      A                     G                     A
##       other_allele.outcome beta.exposure beta.outcome eaf.exposure eaf.outcome
## 58952                    G     0.2091428  -0.02260124   0.12215594  0.11457501
## 64073                    G     0.2280545  -0.02525018   0.09517283  0.09529602
## 10141                    A     0.2272803  -0.02413800   0.09569872  0.09501164
## 25964                    G     0.2216668  -0.02495432   0.09591117  0.09573044
## 9538                     G     0.2245217  -0.02491398   0.09318104  0.09405488
## 64071                    G     0.2227190  -0.02552102   0.09336540  0.09599120
##       remove palindromic ambiguous id.outcome.y chr.outcome pos.outcome
## 58952  FALSE       FALSE     FALSE       vtcEuL           7    75147934
## 64073  FALSE       FALSE     FALSE       vtcEuL           7    75111938
## 10141  FALSE       FALSE     FALSE       vtcEuL           7    75155047
## 25964  FALSE       FALSE     FALSE       vtcEuL           7    75158596
## 9538   FALSE        TRUE     FALSE       vtcEuL           7    75105913
## 64071  FALSE       FALSE     FALSE       vtcEuL           7    75053626
##       se.outcome pval.outcome info.outcome outcome.y mr_keep.outcome
## 58952 0.00635199 0.0003734931     0.947414 longevity            TRUE
## 64073 0.00696100 0.0002863170     0.954058 longevity            TRUE
## 10141 0.00687372 0.0004453585     0.952117 longevity            TRUE
## 25964 0.00688244 0.0002880747     0.954134 longevity            TRUE
## 9538  0.00705875 0.0004163225     0.960383 longevity            TRUE
## 64071 0.00720773 0.0003989365     0.949082 longevity            TRUE
##       pval_origin.outcome data_source.outcome pval.exposure se.exposure
## 58952            reported            textfile    7.2326e-57  0.01228683
## 64073            reported            textfile    5.4356e-54  0.01371385
## 10141            reported            textfile    7.0512e-54  0.01368021
## 25964            reported            textfile    4.9498e-51  0.01366955
## 9538             reported            textfile    5.1302e-51  0.01384748
## 64071            reported            textfile    3.4209e-50  0.01383606
##       exposure.y mr_keep.exposure pval_origin.exposure data_source.exposure
## 58952    NSUN5P1             TRUE             reported             textfile
## 64073    NSUN5P1             TRUE             reported             textfile
## 10141    NSUN5P1             TRUE             reported             textfile
## 25964    NSUN5P1             TRUE             reported             textfile
## 9538     NSUN5P1             TRUE             reported             textfile
## 64071    NSUN5P1             TRUE             reported             textfile
##       action mr_keep samplesize.outcome samplesize.exposure  SYMBOL
## 58952      2    TRUE            1012240               30596 NSUN5P1
## 64073      2    TRUE            1012240               30596 NSUN5P1
## 10141      2    TRUE            1012240               30596 NSUN5P1
## 25964      2    TRUE            1012240               30596 NSUN5P1
## 9538       2    TRUE            1012240               30596 NSUN5P1
## 64071      2    TRUE            1012240               30596 NSUN5P1
nucs_life_dat_res=nucs_life_dat_res[order(nucs_life_dat_res$pval.exposure),]
nucs_life_dat_res_gene_tab=table(nucs_life_dat_res$id.exposure)
nucs_life_dat_res_gene_tab=as.data.frame(nucs_life_dat_res_gene_tab)
nucs_life_dat_res_gene_tab #316
##          Var1 Freq
## 1        AATF   27
## 2        ABT1  454
## 3       APEX1   40
## 4        APOD   21
## 5        BAG6    8
## 6        BMS1  154
## 7        BYSL  284
## 8    C12orf65  330
## 9         C1D   43
## 10       CHD7  212
## 11       CKS2  273
## 12      CNOT6  496
## 13       DAP3  409
## 14     DCAF13  131
## 15      DDX10  247
## 16      DDX17  335
## 17      DDX18  263
## 18      DDX21   51
## 19      DDX28    9
## 20      DDX31  236
## 21      DDX47    7
## 22      DDX49   13
## 23      DDX51  161
## 24      DDX52  276
## 25      DDX56  449
## 26       DENR  177
## 27      DHX37   12
## 28      DIEXF  970
## 29      DIMT1  463
## 30       DIS3   96
## 31       DRD4  241
## 32     DROSHA   18
## 33   EBNA1BP2  400
## 34      EIF2A  267
## 35    EIF2AK2  597
## 36    EIF2AK4  257
## 37      EIF2D   69
## 38     EIF2S1  802
## 39      EIF3C  323
## 40      EIF3H    5
## 41      EIF3K  263
## 42     EIF4A3  437
## 43      EIF5A   62
## 44     EIF5A2   20
## 45       EIF6 1833
## 46       EMG1  156
## 47      ERAL1  241
## 48      ERCC2  111
## 49       ERI1  217
## 50       ERI2  142
## 51       ETF1  103
## 52     EXOSC1  508
## 53    EXOSC10  202
## 54     EXOSC2  114
## 55     EXOSC3  337
## 56     EXOSC6 1396
## 57     EXOSC7  348
## 58     EXOSC8  394
## 59     EXOSC9  360
## 60    FAM207A   66
## 61    FASTKD2   33
## 62        FBL   47
## 63      FBLL1  342
## 64       FCF1  115
## 65       FRG1   29
## 66      FTSJ3  679
## 67     GEMIN4  236
## 68       GFM2  129
## 69       GLUL  197
## 70       GNL2   93
## 71     GTF2H5  426
## 72      GTF3A  663
## 73    GTPBP10 1364
## 74     GTPBP4  434
## 75     HEATR1  369
## 76     HEATR3  684
## 77       HELB   99
## 78      HSPA5   17
## 79    IGHMBP2  964
## 80       IMP3  259
## 81       IMP4  465
## 82      ISG20  568
## 83    ISG20L2   63
## 84      KAT2B  523
## 85       KRI1  141
## 86       KRR1  279
## 87       LSG1  503
## 88       LSM6  327
## 89       LYAR  278
## 90      MAK16  375
## 91     MALSU1   26
## 92       MDN1  252
## 93    METTL16   17
## 94    METTL17  127
## 95     METTL5  540
## 96  MPHOSPH10   25
## 97   MPHOSPH6  810
## 98    MPV17L2  177
## 99      MRPL1   13
## 100    MRPL10  341
## 101    MRPL12  116
## 102    MRPL13  169
## 103    MRPL14  205
## 104    MRPL15  121
## 105    MRPL17  103
## 106    MRPL18  873
## 107    MRPL19  385
## 108     MRPL2  439
## 109    MRPL20  260
## 110    MRPL21 1209
## 111    MRPL22  234
## 112    MRPL23   11
## 113    MRPL24   61
## 114    MRPL27  178
## 115    MRPL28   77
## 116    MRPL32  204
## 117    MRPL33  262
## 118    MRPL34  508
## 119    MRPL35  876
## 120    MRPL36  327
## 121    MRPL37   93
## 122    MRPL38   43
## 123    MRPL39  555
## 124     MRPL4   37
## 125    MRPL40  349
## 126    MRPL41   13
## 127    MRPL42  555
## 128    MRPL43  164
## 129    MRPL44    1
## 130    MRPL45  107
## 131    MRPL47   49
## 132    MRPL48  961
## 133    MRPL49    8
## 134    MRPL50   73
## 135    MRPL51  199
## 136    MRPL52  168
## 137    MRPL53  469
## 138    MRPL54  180
## 139    MRPL55  397
## 140     MRPL9   20
## 141    MRPS10  105
## 142    MRPS11    4
## 143    MRPS15  148
## 144    MRPS16 1025
## 145    MRPS17   95
## 146   MRPS18C  282
## 147    MRPS21  907
## 148    MRPS24   55
## 149    MRPS25  271
## 150    MRPS26  116
## 151    MRPS30  261
## 152    MRPS31  257
## 153    MRPS33  531
## 154    MRPS34  463
## 155    MRPS35  195
## 156     MRPS6  653
## 157     MRPS7 1017
## 158     MRPS9  593
## 159     MRTO4  445
## 160     MTIF2   21
## 161     MTIF3  186
## 162      MTOR  233
## 163   MYBBP1A   42
## 164      NAF1   45
## 165     NAT10  290
## 166      NCK1  217
## 167    NDUFA7  172
## 168      NGDN  103
## 169      NIP7  635
## 170      NLE1  171
## 171      NMD3  849
## 172      NOB1   18
## 173     NOC2L  139
## 174     NOC4L   96
## 175     NOL10   14
## 176      NOL6  262
## 177      NOL8  550
## 178      NOL9  231
## 179      NOM1  519
## 180     NOP10  693
## 181     NOP14  116
## 182     NOP16   81
## 183     NOP56  215
## 184     NOP58  596
## 185      NPM3  492
## 186      NSA2  337
## 187     NSUN4 1687
## 188   NSUN5P1   35
## 189   NSUN5P2  536
## 190    NUFIP1   15
## 191     NUP88 1372
## 192       NVL  318
## 193     OXA1L   35
## 194   PAK1IP1    2
## 195      PELO  583
## 196      PES1  119
## 197    PIH1D1  130
## 198     PINX1    1
## 199     PNPT1  332
## 200    POLR1A  356
## 201    POLR1B  431
## 202      POP4    1
## 203      POP5  400
## 204      POP7   35
## 205  PPARGC1A  466
## 206      PWP1  797
## 207       RAN   65
## 208      RBFA  381
## 209      RCL1   46
## 210    RICTOR   33
## 211     RIOK1  285
## 212     RIOK2  104
## 213     RIOK3  101
## 214    RNASEL  250
## 215      RPF1   97
## 216      RPF2  311
## 217    RPL10A  531
## 218     RPL11   26
## 219     RPL12  526
## 220     RPL13  687
## 221    RPL13A  318
## 222     RPL14  694
## 223     RPL17  559
## 224     RPL21    1
## 225     RPL22   31
## 226    RPL27A  220
## 227     RPL28  221
## 228      RPL3   82
## 229     RPL31  318
## 230     RPL32   77
## 231     RPL36  128
## 232     RPL37   13
## 233    RPL37A   72
## 234     RPL3L   18
## 235    RPL7L1  151
## 236      RPL8  756
## 237      RPL9  377
## 238     RPLP0   42
## 239     RPP25    1
## 240     RPP38  134
## 241     RPP40  214
## 242     RPS10  312
## 243     RPS11   59
## 244     RPS12  166
## 245     RPS13   10
## 246     RPS14  100
## 247    RPS15A  274
## 248     RPS16  191
## 249     RPS18   59
## 250     RPS19  122
## 251      RPS2    5
## 252     RPS23 1297
## 253     RPS24    5
## 254     RPS25  112
## 255     RPS26  203
## 256    RPS27L   54
## 257     RPS28   64
## 258     RPS29  273
## 259      RPS3   13
## 260     RPS3A  253
## 261      RPS5  292
## 262      RPS8  573
## 263      RPS9  200
## 264    RPUSD1   33
## 265    RPUSD3   87
## 266    RPUSD4  479
## 267    RRNAD1   65
## 268      RRP1  148
## 269     RRP12 1134
## 270     RRP15   70
## 271     RRP1B  264
## 272     RRP36  258
## 273     RRP7A  647
## 274      RRP8  182
## 275   RSL24D1  620
## 276     SART1   81
## 277     SDAD1   26
## 278     SENP3   39
## 279     SERP1   60
## 280      SHQ1  125
## 281     SRP19  323
## 282     SURF6  410
## 283     TEX10   38
## 284     TFB1M  406
## 285     TFB2M   88
## 286   THUMPD1  742
## 287   TRMT112  565
## 288   TRMT61B  353
## 289      TSC1  103
## 290      TSR3   34
## 291     UBA52  367
## 292      URB1  204
## 293      URB2   18
## 294     USP36  422
## 295    UTP14C  815
## 296     UTP15  256
## 297     UTP18  269
## 298     UTP20   22
## 299     UTP23  278
## 300      UTP6   48
## 301     WBP11  161
## 302     WDR18  386
## 303      WDR3  247
## 304     WDR36   93
## 305     WDR43  129
## 306     WDR55 1082
## 307     WDR75  824
## 308       WRN  349
## 309     XRCC5  144
## 310      XRN2  907
## 311      YBEY 1676
## 312   ZC3H12A   13
## 313   ZNF354A  354
## 314    ZNF622   44
## 315    ZNHIT3  254
## 316    ZNHIT6  700
genes_commmon_mr_smr=nucs_fetched[which(nucs_fetched$SYMBOL %in% nucs_life_dat_res$id.exposure),]
dim(genes_commmon_mr_smr)
## [1] 309  25
p=genes_commmon_mr_smr$p_SMR
fdr=p.adjust(p, method = "fdr", n = length(p))
genes_commmon_mr_smr$fdr=fdr
genes_commmon_mr_smr=genes_commmon_mr_smr[order(genes_commmon_mr_smr$fdr),]
genes_commmon_mr_smr$SNP=genes_commmon_mr_smr$topSNP

heidi_mr=merge(genes_commmon_mr_smr, nucs_life_dat_res, by=c("SNP", "SYMBOL"))
dim(heidi_mr)
## [1] 304  69
head(heidi_mr)
##           SNP SYMBOL         ENSEMBL         probeID ProbeChr            Gene
## 1:    rs10072   RCL1 ENSG00000120158 ENSG00000120158        9 ENSG00000120158
## 2: rs10090927 MRPL13 ENSG00000172172 ENSG00000172172        8 ENSG00000172172
## 3:  rs1010878  DDX47 ENSG00000213782 ENSG00000213782       12 ENSG00000213782
## 4: rs10127637   URB2 ENSG00000135763 ENSG00000135763        1 ENSG00000135763
## 5: rs10169290 POLR1B ENSG00000125630 ENSG00000125630        2 ENSG00000125630
## 6: rs10188143  MTIF2 ENSG00000085760 ENSG00000085760        2 ENSG00000085760
##     Probe_bp     topSNP topSNP_chr topSNP_bp A1 A2      Freq      b_GWAS
## 1:   4826966    rs10072          9   4860964  C  T 0.3628230 -0.00591662
## 2: 121425321 rs10090927          8 121464984  T  C 0.3111330  0.00022228
## 3:  12974582  rs1010878         12  12977400  A  G 0.2902580 -0.00259857
## 4: 229778963 rs10127637          1 229788407  G  A 0.0536779  0.00450193
## 5: 113317063 rs10169290          2 113331702  G  C 0.0695825  0.00123118
## 6:  55480107 rs10188143          2  55505372  C  T 0.0238569 -0.00341465
##       se_GWAS    p_GWAS     b_eQTL    se_eQTL       p_eQTL       b_SMR
## 1: 0.00402872 0.1419389  0.1443490 0.00837709 1.543694e-66 -0.04098830
## 2: 0.00413305 0.9571091  0.0941883 0.00843905 6.327582e-29  0.00235995
## 3: 0.00444181 0.5585312  0.0777613 0.00924563 4.079184e-17 -0.03341730
## 4: 0.00904820 0.6188013 -0.1618780 0.01896680 1.403999e-17 -0.02781070
## 5: 0.00700717 0.8605278  0.9054120 0.02113480 0.000000e+00  0.00135980
## 6: 0.01398830 0.8071480 -0.2908040 0.02780580 1.341001e-25  0.01174210
##        se_SMR     p_SMR    p_HEIDI nsnp_HEIDI ENTREZID     fdr.x id.exposure
## 1: 0.02801080 0.1433839 0.44198620         20    10171 0.6415703        RCL1
## 2: 0.04388120 0.9571101 0.77152310         20    28998 0.9965244      MRPL13
## 3: 0.05725910 0.5594795 0.06924742         20    51202 0.8551116       DDX47
## 4: 0.05599020 0.6193961 0.93041140         16     9816 0.8748056        URB2
## 5: 0.00773927 0.8605285 0.73884140         20    84172 0.9670536      POLR1B
## 6: 0.04811530 0.8071995 0.99449170         14     4528 0.9376866       MTIF2
##     X1.x exposure.x outcome.x id.outcome.x samplesize            b         se
## 1: 64284       RCL1 longevity       vtcEuL    1012240 -0.040285038 0.02743072
## 2: 30288     MRPL13 longevity       vtcEuL    1012240  0.002351511 0.04372374
## 3:  4064      DDX47 longevity       vtcEuL    1012240 -0.033011244 0.05642706
## 4: 84464       URB2 longevity       vtcEuL    1012240 -0.030049715 0.06039539
## 5: 61697     POLR1B longevity       vtcEuL    1012240  0.001930047 0.01098472
## 6: 48766      MTIF2 longevity       vtcEuL    1012240  0.012558387 0.05144627
##            p     fdr.y bon  X1.y effect_allele.exposure other_allele.exposure
## 1: 0.1419385 0.6408509   1 40760                      C                     T
## 2: 0.9571096 0.9927599   1 47901                      T                     C
## 3: 0.5585309 0.8945395   1 54699                      A                     G
## 4: 0.6188014 0.9167828   1 61173                      G                     A
## 5: 0.8605274 0.9778189   1 67733                      G                     C
## 6: 0.8071480 0.9650099   1 76415                      C                     T
##    effect_allele.outcome other_allele.outcome beta.exposure beta.outcome
## 1:                     C                    T    0.14686892  -0.00591662
## 2:                     T                    C    0.09452644   0.00022228
## 3:                     A                    G    0.07871772  -0.00259857
## 4:                     G                    A   -0.14981606   0.00450193
## 5:                     G                    C    0.63790155   0.00123118
## 6:                     C                    T   -0.27190195  -0.00341465
##    eaf.exposure eaf.outcome remove palindromic ambiguous id.outcome.y
## 1:   0.33505581  0.35463096  FALSE       FALSE     FALSE       vtcEuL
## 2:   0.34804988  0.32426077  FALSE       FALSE     FALSE       vtcEuL
## 3:   0.24727930  0.26565132  FALSE       FALSE     FALSE       vtcEuL
## 4:   0.05605246  0.05400200  FALSE       FALSE     FALSE       vtcEuL
## 5:   0.07518683  0.08595268  FALSE        TRUE     FALSE       vtcEuL
## 6:   0.02470105  0.02322319  FALSE       FALSE     FALSE       vtcEuL
##    chr.outcome pos.outcome se.outcome pval.outcome info.outcome outcome.y
## 1:           9     4860964 0.00402872    0.1419389     0.995250 longevity
## 2:           8   121464984 0.00413305    0.9571091     0.987174 longevity
## 3:          12    12977400 0.00444181    0.5585312     0.974652 longevity
## 4:           1   229788407 0.00904820    0.6188013     0.968873 longevity
## 5:           2   113331702 0.00700717    0.8605278     0.995974 longevity
## 6:           2    55505372 0.01398834    0.8071480     0.949320 longevity
##    mr_keep.outcome pval_origin.outcome data_source.outcome pval.exposure
## 1:            TRUE            reported            textfile    1.9658e-58
## 2:            TRUE            reported            textfile    8.0577e-21
## 3:            TRUE            reported            textfile    5.1945e-09
## 4:            TRUE            reported            textfile    1.7879e-09
## 5:            TRUE            reported            textfile   1.0000e-200
## 6:            TRUE            reported            textfile    1.7077e-17
##    se.exposure exposure.y mr_keep.exposure pval_origin.exposure
## 1: 0.008523232       RCL1             TRUE             reported
## 2: 0.008469201     MRPL13             TRUE             reported
## 3: 0.009359236      DDX47             TRUE             reported
## 4: 0.017553553       URB2             TRUE             reported
## 5: 0.014890360     POLR1B             TRUE             reported
## 6: 0.025998675      MTIF2             TRUE             reported
##    data_source.exposure action mr_keep samplesize.outcome samplesize.exposure
## 1:             textfile      2    TRUE            1012240               30596
## 2:             textfile      2    TRUE            1012240               30596
## 3:             textfile      2    TRUE            1012240               30596
## 4:             textfile      2    TRUE            1012240               30596
## 5:             textfile      2    TRUE            1012240               30596
## 6:             textfile      2    TRUE            1012240               30596
`%notin%` <- Negate(`%in%`)
notin=genes_commmon_mr_smr[which(genes_commmon_mr_smr$SYMBOL %in% heidi_mr$SYMBOL),]
dim(notin)
## [1] 304  26
write.csv(heidi_mr,  "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/heidi_mr.csv")

1.1 Re-run process for the other longevity GWAS