GTEx breast eQTLs

  • Read in the full GTEx v8 “eGene” data
  • Clean up column names
  • Extract breast eQTLs
  • Extract RP/NUC genes
  • Select p<5e-06
setwd("/n/home04/cdadams/MR-GTEx-breast-bc")
master_v8_n=read.csv("/n/home04/cdadams/ld/ldsc/v8_master_tissues_n.csv")
combo4=read.csv("/n/home04/cdadams/ld/ldsc/combo4.csv")

# Look and clean
master_v8_n$X=NULL
colnames(master_v8_n)[2] <- "chromosome"
colnames(master_v8_n)[31] <- "MAF" 
master_v8_n$eaf=ifelse(master_v8_n$ref_factor=='1', master_v8_n$MAF, 1-master_v8_n$MAF)
gtex_breast=master_v8_n[which(master_v8_n$tissue=="Breast_Mammary_Tissue"),]
write.csv(gtex_breast, 'gtex_breast.csv')

gtex_breast_combo4=gtex_breast[which(gtex_breast$gene_name %in% combo4$gene),]
gtex_breast_combo4_p_06=gtex_breast_combo4[which(gtex_breast_combo4$pval<0.000005),]
gtex_breast_combo4_p_06<- gtex_breast_combo4_p_06[order(gtex_breast_combo4_p_06$gene_name, decreasing=TRUE),]
gtex_breast_combo4_p_06Clean <- gtex_breast_combo4_p_06[!duplicated(gtex_breast_combo4_p_06$gene_name),]
write.csv(gtex_breast_combo4_p_06Clean, '/n/home04/cdadams/MR-GTEx-breast-bc/gtex_breast_combo4_p_06Clean.csv')

Format & harmonize

exposure_nuc_GTEx_p <- read_exposure_data(
  filename = "/n/home04/cdadams/MR-GTEx-breast-bc/gtex_breast_combo4_p_06Clean.csv",
  sep = ',',
  snp_col = 'SNP',
  beta_col = 'beta',
  se_col = 'se',
  effect_allele_col = 'effect_allele',
  #phenotype_col = 'gene_name',
  #units_col = 'gene_name',
  other_allele_col = 'other_allele',
  eaf_col = 'eaf',
  samplesize_col = 'n',
  #ncase_col = 'ncase',
  #ncontrol_col = 'ncontrol',
  gene_col = 'gene_name',
  pval_col = 'pval'
)

bc_outcome <- read_outcome_data(
  snps = exposure_nuc_GTEx_p$SNP,
  filename = "/n/home04/cdadams/ld/ldsc/bcac_master3.txt",
  sep = '\t',
  snp_col = 'id',
  beta_col = 'Beta',
  se_col = 'SE.Gwas',
  effect_allele_col = 'Effect.Gwas',
  #phenotype_col = 'tissue',#tissue
  #units_col = 'tissue',
  other_allele_col = 'Baseline.Gwas',
  eaf_col = 'Freq.Gwas',
  #samplesize_col = 'samplesize',
  #ncase_col = '133384',
  #ncontrol_col = '113789',
  #gene_col = 'gene_name',
  pval_col = 'P.value.Gwas', 
  id_col = 'breast cancer'
)

#Harmonize the alleles btw the two gwas
nuc_GTEx_breast_bc <- harmonise_data(exposure_nuc_GTEx_p, bc_outcome, action = 2)
nuc_GTEx_breast_bc$samplesize.outcome="247173"
nuc_GTEx_breast_bc$samplesize.exposure="480"
nuc_GTEx_breast_bc$id.exposure=nuc_GTEx_breast_bc$gene.exposure
write.csv(nuc_GTEx_breast_bc, '/n/home04/cdadams/MR-GTEx-breast-bc/nuc_GTEx_breast_bc.csv') #can simply read this in for the MR

MR Wald Ratios

nuc_GTEx_breast_bc=read.csv('/n/home04/cdadams/MR-GTEx-breast-bc/nuc_GTEx_breast_bc.csv')
mr_results <- mr(nuc_GTEx_breast_bc)

# FDR correction
p=mr_results$p
fdr=p.adjust(p, method = "fdr", n = length(p))
mr_results$fdr=fdr
mr_results=mr_results[order(mr_results$fdr),]

# Format for nice presentation of results
mr_results$id.outcome=NULL
mr_results$exposure=mr_results$id.exposure
mr_results$outcome="breast cancer"
mr_results$id.exposure=NULL
head(mr_results, n=20)
##           outcome exposure     method nsnp           b         se         pval
## 72  breast cancer   MRPS30 Wald ratio    1  0.30014879 0.07567854 0.0000730577
## 77  breast cancer    MTIF2 Wald ratio    1 -0.29482578 0.10711837 0.0059171694
## 100 breast cancer    RPL10 Wald ratio    1  1.18379432 0.43981071 0.0071110274
## 24  breast cancer     ERI2 Wald ratio    1 -0.25051753 0.09657355 0.0094850097
## 7   breast cancer   DCAF13 Wald ratio    1 -0.32811729 0.15011902 0.0288365151
## 25  breast cancer  EXOSC10 Wald ratio    1 -0.16765193 0.08236812 0.0418108193
## 27  breast cancer    FBLL1 Wald ratio    1  0.04808987 0.02308314 0.0372208504
## 32  breast cancer    GTF3A Wald ratio    1  0.46264724 0.20404489 0.0233670418
## 34  breast cancer   GTPBP4 Wald ratio    1  0.34711125 0.16798103 0.0387933977
## 48  breast cancer   MRPL18 Wald ratio    1  0.14515565 0.06616533 0.0282474463
## 49  breast cancer   MRPL19 Wald ratio    1 -0.11303393 0.05446429 0.0379516188
## 87  breast cancer  NSUN5P1 Wald ratio    1  0.29176554 0.14004746 0.0372208504
## 122 breast cancer    RPS23 Wald ratio    1  0.09449224 0.04699876 0.0443755139
## 154 breast cancer    UTP15 Wald ratio    1 -0.25106295 0.10840078 0.0205548741
## 115 breast cancer    RPS11 Wald ratio    1  0.22401188 0.11571993 0.0528908764
## 83  breast cancer     NOL8 Wald ratio    1 -0.15302664 0.08113493 0.0592847208
## 10  breast cancer    DDX31 Wald ratio    1 -0.06368902 0.03519205 0.0703336189
## 22  breast cancer    EIF5A Wald ratio    1 -0.16360241 0.09468950 0.0840280729
## 45  breast cancer  METTL17 Wald ratio    1  0.24273652 0.14072761 0.0845513132
## 111 breast cancer     RPL8 Wald ratio    1  0.10638955 0.06040181 0.0781768756
##            fdr
## 72  0.01205452
## 77  0.39110651
## 100 0.39110651
## 24  0.39125665
## 7   0.52299713
## 25  0.52299713
## 27  0.52299713
## 32  0.52299713
## 34  0.52299713
## 48  0.52299713
## 49  0.52299713
## 87  0.52299713
## 122 0.52299713
## 154 0.52299713
## 115 0.58179964
## 83  0.61137368
## 10  0.68264983
## 22  0.69754833
## 45  0.69754833
## 111 0.69754833
write.csv(mr_results,'/n/home04/cdadams/MR-GTEx-breast-bc/Walds-GTEx-breast-bc.csv')