Under conditions of nucleolar stress…
I created a bin folder in my directory under: /n/holylfs/LABS/lemos_lab/Users/cdadams/ and put in a FAS Help ticket to get help with exporting a path (see note below). After adding the bin folder and exporting it in the .bashrc, I moved smr_Linux into the bin folder and ran it. (FAS Help also told me, though, that lemos_lab isn’t backed up daily. They said that even though I could certainly have small scripts there, our home directories are backed up daily. So, if files are accidentally deleted under lemos_lab, they might not be recoverable. FAS Help pointed out that lemos_lab is for “big data”, but personal directories are good for small files. I’m going to keep things all in lemos_lab for now, since I routinely run out of space in my home directory, and I back a lot up with version tracking on GitHub.)
Nano, a text edit accessible through the command line, can be used to edit scripts.
nano .bashrc
# .bashrc
# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi
#export PATH=$PATH:/n/home04/cdadams/edirect
export PATH=$PATH:$HOME/bin/
# FAS RC ticket #149663
export PATH=/n/holylfs/LABS/lemos_lab/Users/cdadams/bin:${PATH}
Each of the four lifespan summary statistics GWAS had to be individually formatted to work with SMR
setwd("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/")
## Lifespan
long=fread("/n/home04/cdadams/MR-eqtlGen-long/long_for_mr.txt", header=TRUE, sep="\t")
long=as.data.frame(long)
long$SNP=long$rsid
long$A1=long$a1
long$A2=long$a0
long$freq=long$freq1
long$b=long$beta
long$N="1012240"
myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_lifespan=long[myvars]
write.table(smr_lifespan, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_lifespan.txt", sep="\t", row.names = FALSE, col.names = TRUE)
## Healthspan
hs=fread("/n/home04/cdadams/metab_longevity/healthspan/healthspan_summary.csv")
hs=as.data.frame(hs)
hs$SNP=hs$SNPID
hs$A1=hs$beta
hs$A1=hs$EA
hs$A2=hs$RA
hs$freq=hs$EAF
hs$b=hs$beta
# Convert the -log10(p-value) to a p-value
hs=as_tibble(hs)
hs=rename(hs, neg_log_p = `-log10(p-value)`)
hs$p=10^-(hs$neg_log_p)
hs$N="300447"
hs=as.data.frame(hs)
myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_healthspan=hs[myvars]
smr_healthspan=smr_healthspan[order(smr_healthspan$p),]
write.table(smr_healthspan, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_healthspan.txt", sep="\t", row.names = FALSE, col.names = TRUE)
## Longevity 90
long90=fread("/n/home04/cdadams/metab_longevity/extreme_longevity/NA_gone_longevity_90_rsid.csv")
long90=as.data.frame(long90)
long90$SNP=long90$rsid_rs
long90$A1=long90$EAF
long90$A2=long90$NEA
long90$freq=long90$EAF
long90$b=long90$Beta
long90$se=long90$SE
long90$p=long90$P.value
long90$N="36745"
myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_long90=long90[myvars]
write.table(smr_long90, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_long90.txt", sep="\t", row.names = FALSE, col.names = TRUE)
## Longevity 99
long99=fread("/n/home04/cdadams/metab_longevity/extreme_longevity99/NA_gone_longevity_99_rsid.csv")
long99=as.data.frame(long99)
long99$SNP=long99$rsid_rs
long99$A1=long99$EAF
long99$A2=long99$NEA
long99$freq=long99$EAF
long99$b=long99$Beta
long99$se=long99$SE
long99$p=long99$P.value
long99$N="28967"
myvars=c("SNP", "A1", "A2", "freq", "b", "se", "p", "N" )
smr_long99=long99[myvars]
write.table(smr_long99, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_long99.txt", sep="\t", row.names = FALSE, col.names = TRUE)
#smr_long992=fread("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_long99.txt", sep="\t", header=TRUE)
Save the lifespan snps as a txt file (will need to do this for the three other GWAS too)
life_ld=fread("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/smr_lifespan.txt", sep="\t")
snplist=life_ld$snp
write.table(snplist, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/snplist.txt", sep="\t", row.names=FALSE)
cd /n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen
sed 's/\"//g' smr_lifespan.txt > smr_lifespan2
sed 's/\"//g' snplist.txt > snplist2.txt
I tried to create a local LD matrix using the European reference genome from 1000 genomes, but kept running into big-file problems. So, I just did it in Plink. Turns out I didn’t need to generate it at all after downloading the European binary files.
# Get a path to plink
devtools::install_github("explodecomputer/genetics.binaRies")
genetics.binaRies::get_plink_binary()
# /n/home04/cdadams/R/ifxrstudio/RELEASE_3_12/genetics.binaRies/bin/plink_Linux
library(genetics.binaRies)
devtools::install_github("explodecomputer/plinkbinr")
library(plinkbinr)
get_plink_exe()
# Generate an LD matrix
myld=ieugwasr::ld_matrix_local(
life_ld$snp,
plink_bin = "/n/home04/cdadams/R/ifxrstudio/RELEASE_3_12/genetics.binaRies/bin/plink_Linux" <yes-really>,
bfile = "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/EUR",
with_alleles = TRUE)
I was able to use Plink to get an LD matrix. However, I realized just afterwards that I didn’t need it.
module add plink/1.90-fasrc01
plink --r2 --bfile EUR --ld-snp-list snplist.txt --ld-window-r2 0.8 --out samplesnp_ld
So much trouble! But, in the process, I got the binaries for EUR LD reference panel:
# Get the ld reference panels from 1000 genomes
wget http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz
tar -zxvf 1kg.v3.tgz
salloc -n 1 -p test -t 0-06:00 --mem=9000
smr_Linux smr --bfile EUR --gwas-summary smr_lifespan2 --beqtl-summary cis-eQTLs-full_eQTLGen_AF_incl_nr_formatted_20191212.new.txt_besd-dense --out mysmr --thread-num 10
The SMR results are for top SNPs & are reported with Ensembl IDs. I fetched the gene symbols, read-in the NUC genes, and extracted the NUC genes from mysmr.
mysmr=fread("/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/mysmr.smr")
head(mysmr)
## probeID ProbeChr Gene Probe_bp topSNP topSNP_chr
## 1: ENSG00000237491 1 ENSG00000237491 729801 rs12184325 1
## 2: ENSG00000225880 1 ENSG00000225880 762244 rs114525117 1
## 3: ENSG00000228794 1 ENSG00000228794 778907 rs60320384 1
## 4: ENSG00000188976 1 ENSG00000188976 887136 rs7417106 1
## 5: ENSG00000187961 1 ENSG00000187961 898531 rs10465242 1
## 6: ENSG00000187583 1 ENSG00000187583 906561 rs9697711 1
## topSNP_bp A1 A2 Freq b_GWAS se_GWAS p_GWAS b_eQTL
## 1: 754105 T C 0.0407555 0.00363243 0.01146170 0.7513047 1.144180
## 2: 759036 A G 0.0417495 0.00135738 0.01145560 0.9056792 0.921721
## 3: 769223 G C 0.1222660 -0.00501340 0.00616004 0.4157260 -0.189018
## 4: 911595 A G 0.2127240 -0.00028841 0.00526643 0.9563264 -0.247736
## 5: 886788 G A 0.1013920 0.00193227 0.00728517 0.7908298 -0.252370
## 6: 900972 T G 0.0447316 0.00238660 0.00908880 0.7928691 -0.361831
## se_eQTL p_eQTL b_SMR se_SMR p_SMR p_HEIDI nsnp_HEIDI
## 1: 0.0561593 2.856293e-92 0.00317471 0.0100186 0.7513340 0.3489219 6
## 2: 0.0549631 4.058683e-63 0.00147266 0.0124289 0.9056818 0.6420299 5
## 3: 0.0271718 3.490795e-12 0.02652340 0.0328120 0.4188920 0.7737176 19
## 4: 0.0175934 4.956084e-45 0.00116418 0.0212584 0.9563269 0.9038120 20
## 5: 0.0193135 5.083089e-39 -0.00765651 0.0288730 0.7908718 0.6765434 20
## 6: 0.0246616 9.771819e-49 -0.00659591 0.0251230 0.7929014 0.9119470 18
dim(mysmr)
## [1] 15454 21
mysmr$ENSEMBL=mysmr$Gene
uni_smr=unique(mysmr)
dim(uni_smr)
## [1] 15454 22
# long=fread("/n/home04/cdadams/MR-eqtlGen-long/long_for_mr.txt", header=TRUE, sep="\t")
# colnames(long)
# smr_long <- merge(mysmr,long,by="rsid")
# dim(smr_long)
# head(smr_long)
# smr_long$ENSEMBL=smr_long$Gene
# head(smr_long)
require(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GO" "GOALL" "IPI" "MAP" "OMIM"
## [16] "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG" "UNIGENE"
## [26] "UNIPROT"
annot <- select(org.Hs.eg.db,
keys = keys(org.Hs.eg.db),
columns = c('ENTREZID','SYMBOL','ENSEMBL'),
keytype = 'ENTREZID')
fetched=merge(mysmr, annot, by="ENSEMBL")
dim(fetched)
## [1] 13338 24
head(fetched)
## ENSEMBL probeID ProbeChr Gene Probe_bp
## 1: ENSG00000000419 ENSG00000000419 20 ENSG00000000419 49563248
## 2: ENSG00000000457 ENSG00000000457 1 ENSG00000000457 169842606
## 3: ENSG00000000460 ENSG00000000460 1 ENSG00000000460 169727233
## 4: ENSG00000000971 ENSG00000000971 1 ENSG00000000971 196668821
## 5: ENSG00000001036 ENSG00000001036 6 ENSG00000001036 143824720
## 6: ENSG00000001084 ENSG00000001084 6 ENSG00000001084 53421953
## topSNP topSNP_chr topSNP_bp A1 A2 Freq b_GWAS se_GWAS
## 1: rs2426214 20 49573810 A G 0.3479130 0.00621773 0.00396205
## 2: rs10753798 1 169844515 G T 0.4731610 0.00124718 0.00387656
## 3: rs12142505 1 169773863 C A 0.0964215 0.00575911 0.00658187
## 4: rs203683 1 196678432 G C 0.3966200 0.00154242 0.00389239
## 5: rs4896659 6 143788753 C T 0.2385690 -0.00505717 0.00464004
## 6: rs661603 6 53370107 C T 0.4115310 -0.00540383 0.00394217
## p_GWAS b_eQTL se_eQTL p_eQTL b_SMR se_SMR
## 1: 0.1165731 -0.0741559 0.00824705 2.432083e-19 -0.08384680 0.0542363
## 2: 0.7476626 -0.1929130 0.00787703 1.860251e-132 -0.00646498 0.0200966
## 3: 0.3815758 0.4038000 0.01353190 1.159037e-195 0.01426230 0.0163068
## 4: 0.6919090 -0.1028820 0.00808786 4.543568e-37 -0.01499210 0.0378517
## 5: 0.2757587 -0.1759580 0.00956877 1.615188e-75 0.02874070 0.0264164
## 6: 0.1704449 -0.1427030 0.00798096 1.676318e-71 0.03786770 0.0277061
## p_SMR p_HEIDI nsnp_HEIDI ENTREZID SYMBOL
## 1: 0.1221159 0.1331619 20 8813 DPM1
## 2: 0.7476833 0.2143223 20 57147 SCYL3
## 3: 0.3817806 0.4451564 6 55732 C1orf112
## 4: 0.6920509 0.2555007 20 3075 CFH
## 5: 0.2766001 0.5134780 20 2519 FUCA2
## 6: 0.1716990 0.3952101 20 2729 GCLC
#table(fetched$SYMBOL)
# Read-in NUC genes
combo4=read.csv('/n/home04/cdadams/MR-eqtlGen-long/combo4.csv')
# Extract NUCs
nucs_fetched=fetched[which(fetched$SYMBOL %in% combo4$gene),]
dim(nucs_fetched)
## [1] 379 24
genes_tab=table(nucs_fetched$SYMBOL)
genes_tab=as.data.frame(genes_tab)
genes_tab
## Var1 Freq
## 1 ABT1 1
## 2 APEX1 1
## 3 APOD 1
## 4 BAG6 1
## 5 BMS1 1
## 6 BMT2 1
## 7 BUD23 1
## 8 BYSL 1
## 9 C12orf65 1
## 10 C1D 1
## 11 C1QBP 1
## 12 CHD7 1
## 13 CKS2 1
## 14 CNOT6 1
## 15 CUL4A 1
## 16 DAP3 1
## 17 DCAF13 1
## 18 DDX10 1
## 19 DDX17 1
## 20 DDX18 1
## 21 DDX21 1
## 22 DDX27 1
## 23 DDX28 1
## 24 DDX31 1
## 25 DDX47 1
## 26 DDX49 1
## 27 DDX51 1
## 28 DDX54 1
## 29 DDX56 1
## 30 DENR 1
## 31 DHX37 1
## 32 DIMT1 1
## 33 DIS3 1
## 34 DRD4 1
## 35 DROSHA 1
## 36 EBNA1BP2 1
## 37 EFL1 1
## 38 EIF2A 1
## 39 EIF2AK2 1
## 40 EIF2AK4 1
## 41 EIF2D 1
## 42 EIF2S1 1
## 43 EIF3C 1
## 44 EIF3H 1
## 45 EIF3K 1
## 46 EIF4A3 1
## 47 EIF5A 1
## 48 EIF5A2 1
## 49 EIF6 1
## 50 EMG1 1
## 51 ERAL1 1
## 52 ERCC2 1
## 53 ERI1 1
## 54 ERI2 1
## 55 ESF1 1
## 56 ETF1 1
## 57 EXOSC1 1
## 58 EXOSC10 1
## 59 EXOSC2 1
## 60 EXOSC3 1
## 61 EXOSC5 1
## 62 EXOSC6 1
## 63 EXOSC7 1
## 64 EXOSC8 1
## 65 EXOSC9 1
## 66 FAM207A 1
## 67 FASTKD2 1
## 68 FBL 1
## 69 FBLL1 1
## 70 FCF1 1
## 71 FDXACB1 1
## 72 FRG1 1
## 73 FTSJ3 1
## 74 FXR2 1
## 75 GEMIN4 1
## 76 GEMIN5 1
## 77 GFM2 1
## 78 GLUL 1
## 79 GNL2 1
## 80 GTF3A 1
## 81 GTPBP10 1
## 82 GTPBP4 1
## 83 HEATR1 1
## 84 HEATR3 1
## 85 HELB 1
## 86 HELQ 1
## 87 HSPA5 1
## 88 IGHMBP2 1
## 89 IMP3 1
## 90 IMP4 1
## 91 ISG20 1
## 92 ISG20L2 1
## 93 KAT2B 1
## 94 KRI1 1
## 95 KRR1 1
## 96 LSG1 1
## 97 LSM6 1
## 98 LTO1 1
## 99 LTV1 1
## 100 LYAR 1
## 101 MAIP1 1
## 102 MAK16 1
## 103 MALSU1 1
## 104 MCRS1 1
## 105 MDN1 1
## 106 METTL15 1
## 107 METTL16 1
## 108 METTL17 1
## 109 METTL5 1
## 110 MPHOSPH10 1
## 111 MPHOSPH6 1
## 112 MPV17L2 1
## 113 MRM2 1
## 114 MRM3 1
## 115 MRPL1 1
## 116 MRPL10 1
## 117 MRPL12 1
## 118 MRPL13 1
## 119 MRPL14 1
## 120 MRPL15 1
## 121 MRPL16 1
## 122 MRPL17 1
## 123 MRPL18 1
## 124 MRPL19 1
## 125 MRPL2 1
## 126 MRPL20 1
## 127 MRPL21 1
## 128 MRPL22 1
## 129 MRPL23 1
## 130 MRPL24 1
## 131 MRPL27 1
## 132 MRPL28 1
## 133 MRPL32 1
## 134 MRPL33 1
## 135 MRPL34 1
## 136 MRPL35 1
## 137 MRPL36 1
## 138 MRPL37 1
## 139 MRPL38 1
## 140 MRPL39 1
## 141 MRPL4 1
## 142 MRPL40 1
## 143 MRPL41 1
## 144 MRPL42 1
## 145 MRPL43 1
## 146 MRPL44 1
## 147 MRPL47 1
## 148 MRPL48 1
## 149 MRPL49 1
## 150 MRPL50 1
## 151 MRPL51 1
## 152 MRPL52 1
## 153 MRPL53 1
## 154 MRPL54 1
## 155 MRPL55 1
## 156 MRPL58 1
## 157 MRPL9 1
## 158 MRPS10 1
## 159 MRPS11 1
## 160 MRPS12 1
## 161 MRPS14 1
## 162 MRPS15 1
## 163 MRPS16 1
## 164 MRPS17 1
## 165 MRPS18A 1
## 166 MRPS18C 1
## 167 MRPS22 1
## 168 MRPS23 1
## 169 MRPS24 1
## 170 MRPS25 1
## 171 MRPS26 1
## 172 MRPS30 1
## 173 MRPS31 1
## 174 MRPS33 1
## 175 MRPS34 1
## 176 MRPS35 1
## 177 MRPS36 1
## 178 MRPS6 1
## 179 MRPS7 1
## 180 MRPS9 1
## 181 MRTO4 1
## 182 MTERF3 1
## 183 MTERF4 1
## 184 MTG2 1
## 185 MTIF2 1
## 186 MTIF3 1
## 187 MTOR 1
## 188 MYBBP1A 1
## 189 NAF1 1
## 190 NAT10 1
## 191 NCK1 1
## 192 NDUFA7 1
## 193 NDUFAB1 1
## 194 NGDN 1
## 195 NIFK 1
## 196 NIP7 1
## 197 NLE1 1
## 198 NMD3 1
## 199 NOB1 1
## 200 NOC2L 1
## 201 NOC4L 1
## 202 NOL10 1
## 203 NOL6 1
## 204 NOL8 1
## 205 NOL9 1
## 206 NOM1 1
## 207 NOP10 1
## 208 NOP14 1
## 209 NOP16 1
## 210 NOP2 1
## 211 NOP53 1
## 212 NOP56 1
## 213 NOP58 1
## 214 NOP9 1
## 215 NPM3 1
## 216 NSA2 1
## 217 NSUN3 1
## 218 NSUN4 1
## 219 NSUN5P1 1
## 220 NSUN5P2 1
## 221 NUDT16 1
## 222 NUFIP1 1
## 223 NUP88 1
## 224 NVL 1
## 225 OXA1L 1
## 226 PAK1IP1 1
## 227 PDCD11 1
## 228 PELO 1
## 229 PES1 1
## 230 PIH1D1 1
## 231 PINX1 1
## 232 PNPT1 1
## 233 POLR1A 1
## 234 POLR1B 1
## 235 POP4 1
## 236 POP5 1
## 237 POP7 1
## 238 PPAN 1
## 239 PPARGC1A 1
## 240 PTEN 1
## 241 PWP1 1
## 242 PWP2 1
## 243 PYM1 1
## 244 RAN 1
## 245 RBFA 1
## 246 RBIS 1
## 247 RCL1 1
## 248 RICTOR 1
## 249 RIOK1 1
## 250 RIOK2 1
## 251 RIOK3 1
## 252 RIOX2 1
## 253 RNASEL 1
## 254 RPF1 1
## 255 RPF2 1
## 256 RPL10A 1
## 257 RPL11 1
## 258 RPL12 1
## 259 RPL13 1
## 260 RPL13A 1
## 261 RPL14 1
## 262 RPL15 1
## 263 RPL17 1
## 264 RPL18A 1
## 265 RPL21 1
## 266 RPL22 1
## 267 RPL27A 1
## 268 RPL28 1
## 269 RPL29 1
## 270 RPL3 1
## 271 RPL30 1
## 272 RPL31 1
## 273 RPL32 1
## 274 RPL36 1
## 275 RPL37 1
## 276 RPL37A 1
## 277 RPL3L 1
## 278 RPL4 1
## 279 RPL7L1 1
## 280 RPL8 1
## 281 RPL9 1
## 282 RPLP0 1
## 283 RPP25 1
## 284 RPP30 1
## 285 RPP38 1
## 286 RPP40 1
## 287 RPS10 1
## 288 RPS10P5 1
## 289 RPS11 1
## 290 RPS12 1
## 291 RPS13 1
## 292 RPS14 1
## 293 RPS15 1
## 294 RPS15A 1
## 295 RPS16 1
## 296 RPS18 1
## 297 RPS19 1
## 298 RPS2 1
## 299 RPS20 1
## 300 RPS23 1
## 301 RPS24 1
## 302 RPS25 1
## 303 RPS26 1
## 304 RPS27L 1
## 305 RPS28 1
## 306 RPS29 1
## 307 RPS3 1
## 308 RPS3A 1
## 309 RPS5 1
## 310 RPS6 1
## 311 RPS7 1
## 312 RPS8 1
## 313 RPS9 1
## 314 RPSA 1
## 315 RPUSD1 1
## 316 RPUSD2 1
## 317 RPUSD3 1
## 318 RPUSD4 1
## 319 RRNAD1 1
## 320 RRP1 1
## 321 RRP12 1
## 322 RRP15 1
## 323 RRP1B 1
## 324 RRP36 1
## 325 RRP7A 1
## 326 RRP7BP 1
## 327 RRP8 1
## 328 RSL1D1 1
## 329 RSL24D1 1
## 330 SART1 1
## 331 SBDS 1
## 332 SDAD1 1
## 333 SENP3 1
## 334 SERP1 1
## 335 SHFL 1
## 336 SHQ1 1
## 337 SNU13 1
## 338 SRFBP1 1
## 339 SRP19 1
## 340 SURF6 1
## 341 TBL3 1
## 342 TENT4B 1
## 343 TEX10 1
## 344 TFB1M 1
## 345 TFB2M 1
## 346 THUMPD1 1
## 347 TRMT112 1
## 348 TRMT61B 1
## 349 TSC1 1
## 350 TSR3 1
## 351 UBA52 1
## 352 URB1 1
## 353 URB2 1
## 354 USP36 1
## 355 UTP11 1
## 356 UTP14C 1
## 357 UTP15 1
## 358 UTP18 1
## 359 UTP20 1
## 360 UTP23 1
## 361 UTP4 1
## 362 UTP6 1
## 363 WBP11 1
## 364 WDR18 1
## 365 WDR3 1
## 366 WDR36 1
## 367 WDR43 1
## 368 WDR46 1
## 369 WDR55 1
## 370 WDR75 1
## 371 WRN 1
## 372 XPO1 1
## 373 XRCC5 1
## 374 XRN2 1
## 375 YBEY 1
## 376 ZC3H12A 1
## 377 ZNF354A 1
## 378 ZNF622 1
## 379 ZNHIT6 1
p=nucs_fetched$p_SMR
fdr=p.adjust(p, method = "fdr", n = length(p))
nucs_fetched$fdr=fdr
nucs_fetched=nucs_fetched[order(nucs_fetched$fdr),]
head(nucs_fetched)
## ENSEMBL probeID ProbeChr Gene Probe_bp
## 1: ENSG00000151304 ENSG00000151304 5 ENSG00000151304 121354460
## 2: ENSG00000223705 ENSG00000223705 7 ENSG00000223705 75042835
## 3: ENSG00000087269 ENSG00000087269 4 ENSG00000087269 2952386
## 4: ENSG00000134001 ENSG00000134001 14 ENSG00000134001 67839973
## 5: ENSG00000170473 ENSG00000170473 12 ENSG00000170473 56310799
## 6: ENSG00000188573 ENSG00000188573 5 ENSG00000188573 167956880
## topSNP topSNP_chr topSNP_bp A1 A2 Freq b_GWAS se_GWAS
## 1: rs75965538 5 121254308 G C 0.0377734 -0.0349702 0.00896040
## 2: rs58513914 7 75147934 A G 0.0944334 -0.0226012 0.00635199
## 3: rs1024323 4 3006043 T C 0.4015900 0.0125863 0.00397296
## 4: rs1885197 14 67247125 G A 0.0168986 -0.0458829 0.01441740
## 5: rs7308505 12 56328699 T C 0.2017890 0.0164445 0.00485235
## 6: rs11557637 5 167924260 A C 0.1799200 0.0152578 0.00488020
## p_GWAS b_eQTL se_eQTL p_eQTL b_SMR se_SMR
## 1: 9.510496e-05 -0.1347240 0.01943070 4.102610e-12 0.2595680 0.0763213
## 2: 3.734931e-04 0.3387670 0.01990220 5.679708e-65 -0.0667161 0.0191556
## 3: 1.535046e-03 -0.0997268 0.00816416 2.578702e-34 -0.1262070 0.0411564
## 4: 1.460309e-03 -0.4894320 0.03458350 1.810019e-45 0.0937473 0.0301931
## 5: 7.015272e-04 0.1101880 0.01171870 5.313011e-21 0.1492400 0.0468099
## 6: 1.769219e-03 0.2924120 0.01234140 4.197346e-124 0.0521790 0.0168341
## p_SMR p_HEIDI nsnp_HEIDI ENTREZID SYMBOL fdr
## 1: 0.0006714036 8.831948e-02 16 153443 SRFBP1 0.1272310
## 2: 0.0004960969 2.010156e-01 16 155400 NSUN5P1 0.1272310
## 3: 0.0021656100 1.038394e-05 20 8602 NOP14 0.1367944
## 4: 0.0019032930 NA NA 1965 EIF2S1 0.1367944
## 5: 0.0014315090 6.738205e-01 9 84305 PYM1 0.1367944
## 6: 0.0019378500 4.536448e-01 20 345630 FBLL1 0.1367944
nucs_fetched_no_na=nucs_fetched[complete.cases(nucs_fetched),]
dim(nucs_fetched_no_na)
## [1] 378 25
sigs=nucs_fetched[which(nucs_fetched$p_SMR<0.05),]
dim(sigs)
## [1] 43 25
sigs=sigs[order(sigs$p_SMR),]
sigs=sigs[order(sigs$p_HEIDI, decreasing=TRUE),]
head(sigs, n=20)
## ENSEMBL probeID ProbeChr Gene Probe_bp
## 1: ENSG00000142541 ENSG00000142541 19 ENSG00000142541 49993188
## 2: ENSG00000132591 ENSG00000132591 17 ENSG00000132591 27185020
## 3: ENSG00000175467 ENSG00000175467 11 ENSG00000175467 65738229
## 4: ENSG00000176731 ENSG00000176731 8 ENSG00000176731 86129480
## 5: ENSG00000136522 ENSG00000136522 3 ENSG00000136522 179314257
## 6: ENSG00000071082 ENSG00000071082 2 ENSG00000071082 101629335
## 7: ENSG00000137547 ENSG00000137547 8 ENSG00000137547 55054115
## 8: ENSG00000170473 ENSG00000170473 12 ENSG00000170473 56310799
## 9: ENSG00000164327 ENSG00000164327 5 ENSG00000164327 39006265
## 10: ENSG00000082898 ENSG00000082898 2 ENSG00000082898 61735372
## 11: ENSG00000132676 ENSG00000132676 1 ENSG00000132676 155683277
## 12: ENSG00000136271 ENSG00000136271 7 ENSG00000136271 44609833
## 13: ENSG00000197498 ENSG00000197498 6 ENSG00000197498 111325260
## 14: ENSG00000108651 ENSG00000108651 17 ENSG00000108651 30208353
## 15: ENSG00000135521 ENSG00000135521 6 ENSG00000135521 144174715
## 16: ENSG00000125445 ENSG00000125445 17 ENSG00000125445 73260104
## 17: ENSG00000188573 ENSG00000188573 5 ENSG00000188573 167956880
## 18: ENSG00000204822 ENSG00000204822 2 ENSG00000204822 74699767
## 19: ENSG00000158092 ENSG00000158092 3 ENSG00000158092 136624857
## 20: ENSG00000198000 ENSG00000198000 9 ENSG00000198000 95073779
## topSNP topSNP_chr topSNP_bp A1 A2 Freq b_GWAS se_GWAS
## 1: rs76687611 19 49990465 A G 0.0447316 -0.02433200 0.00861094
## 2: rs4795457 17 27087929 T C 0.1938370 0.00970722 0.00477176
## 3: rs551659 11 65749645 T C 0.4294230 0.00896488 0.00390577
## 4: rs3808538 8 86121311 G T 0.2723660 -0.00948914 0.00436446
## 5: rs6788205 3 179330599 A G 0.0795229 0.01596290 0.00734056
## 6: rs78362750 2 101629987 T A 0.2882700 -0.00919611 0.00423930
## 7: rs4506187 8 54962327 C G 0.1829030 0.01227080 0.00548449
## 8: rs7308505 12 56328699 T C 0.2017890 0.01644450 0.00485235
## 9: rs1051942 5 38935244 A T 0.1669980 -0.01102300 0.00511768
## 10: rs2694623 2 61553466 T C 0.1928430 -0.00944413 0.00456075
## 11: rs6662953 1 155754448 T C 0.2972170 -0.01288790 0.00424651
## 12: rs217374 7 44612347 G C 0.4085490 -0.00874423 0.00392781
## 13: rs76509375 6 111353398 C G 0.1948310 -0.01155290 0.00483660
## 14: rs1034627 17 30219569 A G 0.1729620 0.01549400 0.00534540
## 15: rs9484759 6 143858683 G A 0.1928430 -0.01029570 0.00482726
## 16: rs7225349 17 73240009 G A 0.2027830 0.01078780 0.00514495
## 17: rs11557637 5 167924260 A C 0.1799200 0.01525780 0.00488020
## 18: rs2301984 2 74721055 G A 0.1590460 0.01127860 0.00554162
## 19: rs80201571 3 136659779 T G 0.2375750 0.01143550 0.00473990
## 20: rs58391046 9 95036323 C T 0.2167000 0.01316830 0.00470360
## p_GWAS b_eQTL se_eQTL p_eQTL b_SMR se_SMR
## 1: 0.0047177200 0.5767300 0.01997800 2.986154e-183 -0.0421895 0.01500200
## 2: 0.0419207900 -0.2173600 0.00975022 4.339723e-110 -0.0446597 0.02204450
## 3: 0.0217163900 -0.1041110 0.00803750 2.253588e-38 -0.0861085 0.03809970
## 4: 0.0296913000 -0.3025300 0.00911868 2.318872e-241 0.0313660 0.01445750
## 5: 0.0296594500 0.1306400 0.01583140 1.557693e-16 0.1221890 0.05810730
## 6: 0.0300634300 -0.4108380 0.01298380 9.617430e-220 0.0223838 0.01034290
## 7: 0.0252630100 0.1705060 0.01159780 6.295466e-49 0.0719667 0.03253630
## 8: 0.0007015272 0.1101880 0.01171870 5.313011e-21 0.1492400 0.04680990
## 9: 0.0312481500 0.1021070 0.01054020 3.411006e-22 -0.1079550 0.05134460
## 10: 0.0383828900 -0.0636818 0.00953664 2.428897e-11 0.1483020 0.07498230
## 11: 0.0024058420 -0.2028900 0.00870452 3.626828e-120 0.0635215 0.02110680
## 12: 0.0259983000 0.2799180 0.00810544 2.427362e-261 -0.0312385 0.01406110
## 13: 0.0169104700 -0.4537850 0.00974094 0.000000e+00 0.0254591 0.01067240
## 14: 0.0037486670 -0.1398660 0.01641540 1.590004e-17 -0.1107780 0.04036900
## 15: 0.0329398900 0.0686851 0.00994543 4.978090e-12 -0.1498960 0.07355620
## 16: 0.0360139900 -0.6990710 0.01101090 0.000000e+00 -0.0154316 0.00736370
## 17: 0.0017692190 0.2924120 0.01234140 4.197346e-124 0.0521790 0.01683410
## 18: 0.0418264100 -0.6397660 0.01112800 0.000000e+00 -0.0176292 0.00866738
## 19: 0.0158390400 -0.1248600 0.00982449 5.270412e-37 -0.0915869 0.03863980
## 20: 0.0051164010 -0.2441800 0.00987775 6.486934e-135 -0.0539285 0.01938600
## p_SMR p_HEIDI nsnp_HEIDI ENTREZID SYMBOL fdr
## 1: 0.004919442 0.9900725 20 23521 RPL13A 0.1707185
## 2: 0.042776220 0.9616853 20 26284 ERAL1 0.3954192
## 3: 0.023816100 0.8474123 20 9092 SART1 0.3223679
## 4: 0.030042430 0.7948770 20 401466 RBIS 0.3535188
## 5: 0.035481590 0.7930796 14 57129 MRPL47 0.3802146
## 6: 0.030451120 0.7917817 20 6160 RPL31 0.3535188
## 7: 0.026974430 0.7006191 20 29088 MRPL15 0.3407770
## 8: 0.001431509 0.6738205 9 84305 PYM1 0.1367944
## 9: 0.035504550 0.6656190 20 253260 RICTOR 0.3802146
## 10: 0.047948380 0.5376992 20 7514 XPO1 0.4226148
## 11: 0.002616500 0.5103245 20 7818 DAP3 0.1416648
## 12: 0.026308710 0.4882582 20 54606 DDX56 0.3407770
## 13: 0.017055120 0.4744518 20 84154 RPF2 0.2776316
## 14: 0.006067170 0.4592981 20 55813 UTP6 0.1768813
## 15: 0.041564880 0.4577675 18 84946 LTV1 0.3954192
## 16: 0.036115370 0.4541091 20 51081 MRPS7 0.3802146
## 17: 0.001937850 0.4536448 20 345630 FBLL1 0.1367944
## 18: 0.041954610 0.4117115 20 116540 MRPL53 0.3954192
## 19: 0.017774960 0.3399252 20 4690 NCK1 0.2776316
## 20: 0.005405335 0.3227455 20 55035 NOL8 0.1707185
sigs_no_na=sigs[complete.cases(sigs),]
sigs_no_na=complete.cases(sigs)
dim(sigs_no_na)
## NULL
I read-in the NUC MR done previously for comparison with the non-random gene sets and the data used to run it (nucs_MR_dat) and merged them.
# Read-in the nucs on lifespan MR results
nucs_res <- read_csv("/n/holylfs/LABS/lemos_lab/Users/cdadams/MR-eqtlGen-long/bon_genesets_100/bon_nucs/results/nucs_res.csv")
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## exposure = col_character(),
## outcome = col_character(),
## id.exposure = col_character(),
## id.outcome = col_character(),
## samplesize = col_double(),
## SNP = col_character(),
## b = col_double(),
## se = col_double(),
## p = col_double(),
## fdr = col_double(),
## bon = col_double()
## )
nucs_res=as.data.frame(nucs_res)
dim(nucs_res)
## [1] 93370 12
# Read-in the nucs on lifespan dat
nucs_MR_dat <- read_csv("/n/holylfs/LABS/lemos_lab/Users/cdadams/MR-eqtlGen-long/bon_genesets_100/bon_nucs/data/nucs_MR_dat.csv")
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## SNP = col_character(),
## effect_allele.exposure = col_character(),
## other_allele.exposure = col_character(),
## effect_allele.outcome = col_character(),
## other_allele.outcome = col_character(),
## remove = col_logical(),
## palindromic = col_logical(),
## ambiguous = col_logical(),
## id.outcome = col_character(),
## outcome = col_character(),
## mr_keep.outcome = col_logical(),
## pval_origin.outcome = col_character(),
## data_source.outcome = col_character(),
## exposure = col_character(),
## mr_keep.exposure = col_logical(),
## pval_origin.exposure = col_character(),
## id.exposure = col_character(),
## data_source.exposure = col_character(),
## mr_keep = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
nucs_MR_dat=as.data.frame(nucs_MR_dat)
dim(nucs_MR_dat)
## [1] 95222 34
nucs_life_dat_res=merge(nucs_res, nucs_MR_dat, by=c("SNP","id.exposure"))
dim(nucs_life_dat_res)
## [1] 93370 44
nucs_life_dat_res$SYMBOL=nucs_life_dat_res$exposure.y
nucs_life_dat_res=nucs_life_dat_res[order(nucs_life_dat_res$fdr),]
NSUN5P1=nucs_life_dat_res[which(nucs_life_dat_res$id.exposure=="NSUN5P1"),]
NSUN5P1=NSUN5P1[order(NSUN5P1$pval.exposure),]
head(NSUN5P1)
## SNP id.exposure X1.x exposure.x outcome.x id.outcome.x
## 58952 rs58513914 NSUN5P1 57852 NSUN5P1 longevity vtcEuL
## 64073 rs62477693 NSUN5P1 57855 NSUN5P1 longevity vtcEuL
## 10141 rs113761643 NSUN5P1 57835 NSUN5P1 longevity vtcEuL
## 25964 rs144108627 NSUN5P1 57845 NSUN5P1 longevity vtcEuL
## 9538 rs113144856 NSUN5P1 57833 NSUN5P1 longevity vtcEuL
## 64071 rs62476648 NSUN5P1 57853 NSUN5P1 longevity vtcEuL
## samplesize b se p fdr bon X1.y
## 58952 1012240 -0.1080661 0.03037154 0.0003734969 0.08572288 1 93432
## 64073 1012240 -0.1107199 0.03052340 0.0002863137 0.08572288 1 93436
## 10141 1012240 -0.1062037 0.03024336 0.0004453582 0.08572288 1 93411
## 25964 1012240 -0.1125758 0.03104858 0.0002880733 0.08572288 1 93425
## 9538 1012240 -0.1109647 0.03143906 0.0004163185 0.08572288 1 93409
## 64071 1012240 -0.1145884 0.03236244 0.0003989390 0.08572288 1 93433
## effect_allele.exposure other_allele.exposure effect_allele.outcome
## 58952 A G A
## 64073 A G A
## 10141 C A C
## 25964 T G T
## 9538 C G C
## 64071 A G A
## other_allele.outcome beta.exposure beta.outcome eaf.exposure eaf.outcome
## 58952 G 0.2091428 -0.02260124 0.12215594 0.11457501
## 64073 G 0.2280545 -0.02525018 0.09517283 0.09529602
## 10141 A 0.2272803 -0.02413800 0.09569872 0.09501164
## 25964 G 0.2216668 -0.02495432 0.09591117 0.09573044
## 9538 G 0.2245217 -0.02491398 0.09318104 0.09405488
## 64071 G 0.2227190 -0.02552102 0.09336540 0.09599120
## remove palindromic ambiguous id.outcome.y chr.outcome pos.outcome
## 58952 FALSE FALSE FALSE vtcEuL 7 75147934
## 64073 FALSE FALSE FALSE vtcEuL 7 75111938
## 10141 FALSE FALSE FALSE vtcEuL 7 75155047
## 25964 FALSE FALSE FALSE vtcEuL 7 75158596
## 9538 FALSE TRUE FALSE vtcEuL 7 75105913
## 64071 FALSE FALSE FALSE vtcEuL 7 75053626
## se.outcome pval.outcome info.outcome outcome.y mr_keep.outcome
## 58952 0.00635199 0.0003734931 0.947414 longevity TRUE
## 64073 0.00696100 0.0002863170 0.954058 longevity TRUE
## 10141 0.00687372 0.0004453585 0.952117 longevity TRUE
## 25964 0.00688244 0.0002880747 0.954134 longevity TRUE
## 9538 0.00705875 0.0004163225 0.960383 longevity TRUE
## 64071 0.00720773 0.0003989365 0.949082 longevity TRUE
## pval_origin.outcome data_source.outcome pval.exposure se.exposure
## 58952 reported textfile 7.2326e-57 0.01228683
## 64073 reported textfile 5.4356e-54 0.01371385
## 10141 reported textfile 7.0512e-54 0.01368021
## 25964 reported textfile 4.9498e-51 0.01366955
## 9538 reported textfile 5.1302e-51 0.01384748
## 64071 reported textfile 3.4209e-50 0.01383606
## exposure.y mr_keep.exposure pval_origin.exposure data_source.exposure
## 58952 NSUN5P1 TRUE reported textfile
## 64073 NSUN5P1 TRUE reported textfile
## 10141 NSUN5P1 TRUE reported textfile
## 25964 NSUN5P1 TRUE reported textfile
## 9538 NSUN5P1 TRUE reported textfile
## 64071 NSUN5P1 TRUE reported textfile
## action mr_keep samplesize.outcome samplesize.exposure SYMBOL
## 58952 2 TRUE 1012240 30596 NSUN5P1
## 64073 2 TRUE 1012240 30596 NSUN5P1
## 10141 2 TRUE 1012240 30596 NSUN5P1
## 25964 2 TRUE 1012240 30596 NSUN5P1
## 9538 2 TRUE 1012240 30596 NSUN5P1
## 64071 2 TRUE 1012240 30596 NSUN5P1
nucs_life_dat_res=nucs_life_dat_res[order(nucs_life_dat_res$pval.exposure),]
nucs_life_dat_res_gene_tab=table(nucs_life_dat_res$id.exposure)
nucs_life_dat_res_gene_tab=as.data.frame(nucs_life_dat_res_gene_tab)
nucs_life_dat_res_gene_tab #316
## Var1 Freq
## 1 AATF 27
## 2 ABT1 454
## 3 APEX1 40
## 4 APOD 21
## 5 BAG6 8
## 6 BMS1 154
## 7 BYSL 284
## 8 C12orf65 330
## 9 C1D 43
## 10 CHD7 212
## 11 CKS2 273
## 12 CNOT6 496
## 13 DAP3 409
## 14 DCAF13 131
## 15 DDX10 247
## 16 DDX17 335
## 17 DDX18 263
## 18 DDX21 51
## 19 DDX28 9
## 20 DDX31 236
## 21 DDX47 7
## 22 DDX49 13
## 23 DDX51 161
## 24 DDX52 276
## 25 DDX56 449
## 26 DENR 177
## 27 DHX37 12
## 28 DIEXF 970
## 29 DIMT1 463
## 30 DIS3 96
## 31 DRD4 241
## 32 DROSHA 18
## 33 EBNA1BP2 400
## 34 EIF2A 267
## 35 EIF2AK2 597
## 36 EIF2AK4 257
## 37 EIF2D 69
## 38 EIF2S1 802
## 39 EIF3C 323
## 40 EIF3H 5
## 41 EIF3K 263
## 42 EIF4A3 437
## 43 EIF5A 62
## 44 EIF5A2 20
## 45 EIF6 1833
## 46 EMG1 156
## 47 ERAL1 241
## 48 ERCC2 111
## 49 ERI1 217
## 50 ERI2 142
## 51 ETF1 103
## 52 EXOSC1 508
## 53 EXOSC10 202
## 54 EXOSC2 114
## 55 EXOSC3 337
## 56 EXOSC6 1396
## 57 EXOSC7 348
## 58 EXOSC8 394
## 59 EXOSC9 360
## 60 FAM207A 66
## 61 FASTKD2 33
## 62 FBL 47
## 63 FBLL1 342
## 64 FCF1 115
## 65 FRG1 29
## 66 FTSJ3 679
## 67 GEMIN4 236
## 68 GFM2 129
## 69 GLUL 197
## 70 GNL2 93
## 71 GTF2H5 426
## 72 GTF3A 663
## 73 GTPBP10 1364
## 74 GTPBP4 434
## 75 HEATR1 369
## 76 HEATR3 684
## 77 HELB 99
## 78 HSPA5 17
## 79 IGHMBP2 964
## 80 IMP3 259
## 81 IMP4 465
## 82 ISG20 568
## 83 ISG20L2 63
## 84 KAT2B 523
## 85 KRI1 141
## 86 KRR1 279
## 87 LSG1 503
## 88 LSM6 327
## 89 LYAR 278
## 90 MAK16 375
## 91 MALSU1 26
## 92 MDN1 252
## 93 METTL16 17
## 94 METTL17 127
## 95 METTL5 540
## 96 MPHOSPH10 25
## 97 MPHOSPH6 810
## 98 MPV17L2 177
## 99 MRPL1 13
## 100 MRPL10 341
## 101 MRPL12 116
## 102 MRPL13 169
## 103 MRPL14 205
## 104 MRPL15 121
## 105 MRPL17 103
## 106 MRPL18 873
## 107 MRPL19 385
## 108 MRPL2 439
## 109 MRPL20 260
## 110 MRPL21 1209
## 111 MRPL22 234
## 112 MRPL23 11
## 113 MRPL24 61
## 114 MRPL27 178
## 115 MRPL28 77
## 116 MRPL32 204
## 117 MRPL33 262
## 118 MRPL34 508
## 119 MRPL35 876
## 120 MRPL36 327
## 121 MRPL37 93
## 122 MRPL38 43
## 123 MRPL39 555
## 124 MRPL4 37
## 125 MRPL40 349
## 126 MRPL41 13
## 127 MRPL42 555
## 128 MRPL43 164
## 129 MRPL44 1
## 130 MRPL45 107
## 131 MRPL47 49
## 132 MRPL48 961
## 133 MRPL49 8
## 134 MRPL50 73
## 135 MRPL51 199
## 136 MRPL52 168
## 137 MRPL53 469
## 138 MRPL54 180
## 139 MRPL55 397
## 140 MRPL9 20
## 141 MRPS10 105
## 142 MRPS11 4
## 143 MRPS15 148
## 144 MRPS16 1025
## 145 MRPS17 95
## 146 MRPS18C 282
## 147 MRPS21 907
## 148 MRPS24 55
## 149 MRPS25 271
## 150 MRPS26 116
## 151 MRPS30 261
## 152 MRPS31 257
## 153 MRPS33 531
## 154 MRPS34 463
## 155 MRPS35 195
## 156 MRPS6 653
## 157 MRPS7 1017
## 158 MRPS9 593
## 159 MRTO4 445
## 160 MTIF2 21
## 161 MTIF3 186
## 162 MTOR 233
## 163 MYBBP1A 42
## 164 NAF1 45
## 165 NAT10 290
## 166 NCK1 217
## 167 NDUFA7 172
## 168 NGDN 103
## 169 NIP7 635
## 170 NLE1 171
## 171 NMD3 849
## 172 NOB1 18
## 173 NOC2L 139
## 174 NOC4L 96
## 175 NOL10 14
## 176 NOL6 262
## 177 NOL8 550
## 178 NOL9 231
## 179 NOM1 519
## 180 NOP10 693
## 181 NOP14 116
## 182 NOP16 81
## 183 NOP56 215
## 184 NOP58 596
## 185 NPM3 492
## 186 NSA2 337
## 187 NSUN4 1687
## 188 NSUN5P1 35
## 189 NSUN5P2 536
## 190 NUFIP1 15
## 191 NUP88 1372
## 192 NVL 318
## 193 OXA1L 35
## 194 PAK1IP1 2
## 195 PELO 583
## 196 PES1 119
## 197 PIH1D1 130
## 198 PINX1 1
## 199 PNPT1 332
## 200 POLR1A 356
## 201 POLR1B 431
## 202 POP4 1
## 203 POP5 400
## 204 POP7 35
## 205 PPARGC1A 466
## 206 PWP1 797
## 207 RAN 65
## 208 RBFA 381
## 209 RCL1 46
## 210 RICTOR 33
## 211 RIOK1 285
## 212 RIOK2 104
## 213 RIOK3 101
## 214 RNASEL 250
## 215 RPF1 97
## 216 RPF2 311
## 217 RPL10A 531
## 218 RPL11 26
## 219 RPL12 526
## 220 RPL13 687
## 221 RPL13A 318
## 222 RPL14 694
## 223 RPL17 559
## 224 RPL21 1
## 225 RPL22 31
## 226 RPL27A 220
## 227 RPL28 221
## 228 RPL3 82
## 229 RPL31 318
## 230 RPL32 77
## 231 RPL36 128
## 232 RPL37 13
## 233 RPL37A 72
## 234 RPL3L 18
## 235 RPL7L1 151
## 236 RPL8 756
## 237 RPL9 377
## 238 RPLP0 42
## 239 RPP25 1
## 240 RPP38 134
## 241 RPP40 214
## 242 RPS10 312
## 243 RPS11 59
## 244 RPS12 166
## 245 RPS13 10
## 246 RPS14 100
## 247 RPS15A 274
## 248 RPS16 191
## 249 RPS18 59
## 250 RPS19 122
## 251 RPS2 5
## 252 RPS23 1297
## 253 RPS24 5
## 254 RPS25 112
## 255 RPS26 203
## 256 RPS27L 54
## 257 RPS28 64
## 258 RPS29 273
## 259 RPS3 13
## 260 RPS3A 253
## 261 RPS5 292
## 262 RPS8 573
## 263 RPS9 200
## 264 RPUSD1 33
## 265 RPUSD3 87
## 266 RPUSD4 479
## 267 RRNAD1 65
## 268 RRP1 148
## 269 RRP12 1134
## 270 RRP15 70
## 271 RRP1B 264
## 272 RRP36 258
## 273 RRP7A 647
## 274 RRP8 182
## 275 RSL24D1 620
## 276 SART1 81
## 277 SDAD1 26
## 278 SENP3 39
## 279 SERP1 60
## 280 SHQ1 125
## 281 SRP19 323
## 282 SURF6 410
## 283 TEX10 38
## 284 TFB1M 406
## 285 TFB2M 88
## 286 THUMPD1 742
## 287 TRMT112 565
## 288 TRMT61B 353
## 289 TSC1 103
## 290 TSR3 34
## 291 UBA52 367
## 292 URB1 204
## 293 URB2 18
## 294 USP36 422
## 295 UTP14C 815
## 296 UTP15 256
## 297 UTP18 269
## 298 UTP20 22
## 299 UTP23 278
## 300 UTP6 48
## 301 WBP11 161
## 302 WDR18 386
## 303 WDR3 247
## 304 WDR36 93
## 305 WDR43 129
## 306 WDR55 1082
## 307 WDR75 824
## 308 WRN 349
## 309 XRCC5 144
## 310 XRN2 907
## 311 YBEY 1676
## 312 ZC3H12A 13
## 313 ZNF354A 354
## 314 ZNF622 44
## 315 ZNHIT3 254
## 316 ZNHIT6 700
genes_commmon_mr_smr=nucs_fetched[which(nucs_fetched$SYMBOL %in% nucs_life_dat_res$id.exposure),]
dim(genes_commmon_mr_smr)
## [1] 309 25
p=genes_commmon_mr_smr$p_SMR
fdr=p.adjust(p, method = "fdr", n = length(p))
genes_commmon_mr_smr$fdr=fdr
genes_commmon_mr_smr=genes_commmon_mr_smr[order(genes_commmon_mr_smr$fdr),]
genes_commmon_mr_smr$SNP=genes_commmon_mr_smr$topSNP
heidi_mr=merge(genes_commmon_mr_smr, nucs_life_dat_res, by=c("SNP", "SYMBOL"))
dim(heidi_mr)
## [1] 304 69
head(heidi_mr)
## SNP SYMBOL ENSEMBL probeID ProbeChr Gene
## 1: rs10072 RCL1 ENSG00000120158 ENSG00000120158 9 ENSG00000120158
## 2: rs10090927 MRPL13 ENSG00000172172 ENSG00000172172 8 ENSG00000172172
## 3: rs1010878 DDX47 ENSG00000213782 ENSG00000213782 12 ENSG00000213782
## 4: rs10127637 URB2 ENSG00000135763 ENSG00000135763 1 ENSG00000135763
## 5: rs10169290 POLR1B ENSG00000125630 ENSG00000125630 2 ENSG00000125630
## 6: rs10188143 MTIF2 ENSG00000085760 ENSG00000085760 2 ENSG00000085760
## Probe_bp topSNP topSNP_chr topSNP_bp A1 A2 Freq b_GWAS
## 1: 4826966 rs10072 9 4860964 C T 0.3628230 -0.00591662
## 2: 121425321 rs10090927 8 121464984 T C 0.3111330 0.00022228
## 3: 12974582 rs1010878 12 12977400 A G 0.2902580 -0.00259857
## 4: 229778963 rs10127637 1 229788407 G A 0.0536779 0.00450193
## 5: 113317063 rs10169290 2 113331702 G C 0.0695825 0.00123118
## 6: 55480107 rs10188143 2 55505372 C T 0.0238569 -0.00341465
## se_GWAS p_GWAS b_eQTL se_eQTL p_eQTL b_SMR
## 1: 0.00402872 0.1419389 0.1443490 0.00837709 1.543694e-66 -0.04098830
## 2: 0.00413305 0.9571091 0.0941883 0.00843905 6.327582e-29 0.00235995
## 3: 0.00444181 0.5585312 0.0777613 0.00924563 4.079184e-17 -0.03341730
## 4: 0.00904820 0.6188013 -0.1618780 0.01896680 1.403999e-17 -0.02781070
## 5: 0.00700717 0.8605278 0.9054120 0.02113480 0.000000e+00 0.00135980
## 6: 0.01398830 0.8071480 -0.2908040 0.02780580 1.341001e-25 0.01174210
## se_SMR p_SMR p_HEIDI nsnp_HEIDI ENTREZID fdr.x id.exposure
## 1: 0.02801080 0.1433839 0.44198620 20 10171 0.6415703 RCL1
## 2: 0.04388120 0.9571101 0.77152310 20 28998 0.9965244 MRPL13
## 3: 0.05725910 0.5594795 0.06924742 20 51202 0.8551116 DDX47
## 4: 0.05599020 0.6193961 0.93041140 16 9816 0.8748056 URB2
## 5: 0.00773927 0.8605285 0.73884140 20 84172 0.9670536 POLR1B
## 6: 0.04811530 0.8071995 0.99449170 14 4528 0.9376866 MTIF2
## X1.x exposure.x outcome.x id.outcome.x samplesize b se
## 1: 64284 RCL1 longevity vtcEuL 1012240 -0.040285038 0.02743072
## 2: 30288 MRPL13 longevity vtcEuL 1012240 0.002351511 0.04372374
## 3: 4064 DDX47 longevity vtcEuL 1012240 -0.033011244 0.05642706
## 4: 84464 URB2 longevity vtcEuL 1012240 -0.030049715 0.06039539
## 5: 61697 POLR1B longevity vtcEuL 1012240 0.001930047 0.01098472
## 6: 48766 MTIF2 longevity vtcEuL 1012240 0.012558387 0.05144627
## p fdr.y bon X1.y effect_allele.exposure other_allele.exposure
## 1: 0.1419385 0.6408509 1 40760 C T
## 2: 0.9571096 0.9927599 1 47901 T C
## 3: 0.5585309 0.8945395 1 54699 A G
## 4: 0.6188014 0.9167828 1 61173 G A
## 5: 0.8605274 0.9778189 1 67733 G C
## 6: 0.8071480 0.9650099 1 76415 C T
## effect_allele.outcome other_allele.outcome beta.exposure beta.outcome
## 1: C T 0.14686892 -0.00591662
## 2: T C 0.09452644 0.00022228
## 3: A G 0.07871772 -0.00259857
## 4: G A -0.14981606 0.00450193
## 5: G C 0.63790155 0.00123118
## 6: C T -0.27190195 -0.00341465
## eaf.exposure eaf.outcome remove palindromic ambiguous id.outcome.y
## 1: 0.33505581 0.35463096 FALSE FALSE FALSE vtcEuL
## 2: 0.34804988 0.32426077 FALSE FALSE FALSE vtcEuL
## 3: 0.24727930 0.26565132 FALSE FALSE FALSE vtcEuL
## 4: 0.05605246 0.05400200 FALSE FALSE FALSE vtcEuL
## 5: 0.07518683 0.08595268 FALSE TRUE FALSE vtcEuL
## 6: 0.02470105 0.02322319 FALSE FALSE FALSE vtcEuL
## chr.outcome pos.outcome se.outcome pval.outcome info.outcome outcome.y
## 1: 9 4860964 0.00402872 0.1419389 0.995250 longevity
## 2: 8 121464984 0.00413305 0.9571091 0.987174 longevity
## 3: 12 12977400 0.00444181 0.5585312 0.974652 longevity
## 4: 1 229788407 0.00904820 0.6188013 0.968873 longevity
## 5: 2 113331702 0.00700717 0.8605278 0.995974 longevity
## 6: 2 55505372 0.01398834 0.8071480 0.949320 longevity
## mr_keep.outcome pval_origin.outcome data_source.outcome pval.exposure
## 1: TRUE reported textfile 1.9658e-58
## 2: TRUE reported textfile 8.0577e-21
## 3: TRUE reported textfile 5.1945e-09
## 4: TRUE reported textfile 1.7879e-09
## 5: TRUE reported textfile 1.0000e-200
## 6: TRUE reported textfile 1.7077e-17
## se.exposure exposure.y mr_keep.exposure pval_origin.exposure
## 1: 0.008523232 RCL1 TRUE reported
## 2: 0.008469201 MRPL13 TRUE reported
## 3: 0.009359236 DDX47 TRUE reported
## 4: 0.017553553 URB2 TRUE reported
## 5: 0.014890360 POLR1B TRUE reported
## 6: 0.025998675 MTIF2 TRUE reported
## data_source.exposure action mr_keep samplesize.outcome samplesize.exposure
## 1: textfile 2 TRUE 1012240 30596
## 2: textfile 2 TRUE 1012240 30596
## 3: textfile 2 TRUE 1012240 30596
## 4: textfile 2 TRUE 1012240 30596
## 5: textfile 2 TRUE 1012240 30596
## 6: textfile 2 TRUE 1012240 30596
`%notin%` <- Negate(`%in%`)
notin=genes_commmon_mr_smr[which(genes_commmon_mr_smr$SYMBOL %in% heidi_mr$SYMBOL),]
dim(notin)
## [1] 304 26
write.csv(heidi_mr, "/n/holylfs/LABS/lemos_lab/Users/cdadams/smr_eqtlGen/heidi_mr.csv")