Author: Charleen D. Adams



Abbreviations and definitions





MR of decreased RP expression in breast on breast cancer



Code

####################################################################################
#### MR of decreased cytosolic RP expression in breast on breast cancer
####################################################################################
setwd('C:/Users/charl/Dropbox/Harvard/ribosomal_proteins/Main_Tables_Paper/v8/breast/verified/decreased_cytosolic')
load("C:/Users/charl/Dropbox/Harvard/ribosomal_proteins/Main_Tables_Paper/v8/forest_plots/tutorial/dpi_plots/no_tissue_ann/final_plots/atlas.RData")
breast_forest2 <- grepl.sub(data = forest2, pattern = "Breast", Var = "tissue")

#table(breast_forest2$n) #480
colnames(breast_forest2)[7]="eaf"

#format the eQTL data for MR
exposure_dat <- format_gtex_eqtl(breast_forest2)

exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="decreased RP expression in breast"
exposure_dat$id.exposure="decreased RP expression in breast"
exposure_dat$tissue="breast"

#select the decreased expression eQTLs
dat_decrease=exposure_dat[which(exposure_dat$beta.exposure<0),]

#subset by just the cytosolic RPs
RPs <- dat_decrease[startsWith(as.character(dat_decrease$gene_tissue), 'R'),]
RPs$rsid=RPs$SNP
RPs$pval=RPs$pval.exposure

#clump for LD
#exposure_dat <- clump_data(RPs)
exposure_dat <- ieugwasr::ld_clump(RPs)

#extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(exposure_dat$SNP, c('1126'),
  proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(exposure_dat, outcome_dat, action = 2)
dim(dat)

#run MR
mr_results<- mr(dat)
mr_results

#detect and remove outliers (outliers can indicate pleiotropy)
raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome, 
  seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
dim(ivwrad$outliers)[1] 
outliers=ivwrad$outliers[1]
outliers
myvars=outliers$SNP
dat2 <- dat[ ! dat$SNP %in% myvars, ]
dim(dat2) 
dim(dat)
dat2$outcome="breast cancer (BCAC)"
dat2$id.outcome="breast cancer (BCAC)"

#run MR without outliers
mr_results <- mr(dat2)
mr_results

singlesnp_results=mr_singlesnp(dat2, parameters = default_parameters(),
  single_method = "mr_wald_ratio", 
  all_method = c("mr_ivw",
  "mr_egger_regression", 
  "mr_weighted_mode",
  "mr_weighted_median"))

#remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:10,]
dim(singlesnp_results.2)
dat2.1=dat2[which(dat2$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)

#r2 and Fstat
#removed 2 NA's that didn't have EAF
n=480
k=10
p=dat$eaf.exposure
r2.1<-(2*(dat$beta.exposure^2)*p*(1-p))/1
r2=sum(r2.1, na.rm = TRUE)
r2 
Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat

#Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)
all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,
  Exp=T,split.exposure=T,split.outcome=T)
all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se

head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",  
  "or_uci95","pval","intercept","intercept_se","intercept_pval",
  "Q","Q_df","Q_pval")])
write.csv(all_res,"combined_results_decreased_cytosolic_breast_v8.csv")
write.csv(mr_results,'res_decreased_cytosolic_breast_v8.csv')
write.csv(singlesnp_results,'singlesnp_decreased_cytosolic_breast_v8.csv')
write.csv(dat, 'SIMEX.csv')


SNP characteristics (data sources)

SNP samplesize.exposure samplesize.outcome effect_allele.exposure other_allele.exposure effect_allele.outcome other_allele.outcome beta.exposure beta.outcome eaf.exposure eaf.outcome se.exposure se.outcome pval.exposure pval.outcome data_source.exposure data_source.outcome gene_tissue
1 rs10114375 480 228951 T G T G -0.167920 0.0143 0.154153 0.1012 0.0332558 0.0106 2e-07 0.1756000 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS6 (Breast_Mammary_Tissue)
2 rs1057232 480 228951 A G A G -0.348225 -0.0027 NA 0.5433 0.0216214 0.0070 0e+00 0.7029990 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS28 (Breast_Mammary_Tissue)
3 rs12126557 480 228951 T C T C -0.220478 -0.0186 0.049521 0.1368 0.0269593 0.0092 0e+00 0.0437200 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS8 (Breast_Mammary_Tissue)
4 rs17215231 480 228951 T C T C -0.713962 -0.0271 0.039936 0.0776 0.0359101 0.0120 0e+00 0.0243501 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS18 (Breast_Mammary_Tissue)
5 rs2280370 480 228951 G T G T -0.264306 -0.0182 NA 0.1888 0.0232047 0.0081 0e+00 0.0248702 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPL13 (Breast_Mammary_Tissue)
6 rs2687967 480 228951 A G A G -0.612739 -0.0139 0.692292 0.6960 0.0179954 0.0067 0e+00 0.0390904 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPL9 (Breast_Mammary_Tissue)
10 rs34629529 480 228951 A G A G -0.265553 0.0021 0.294129 0.3801 0.0199264 0.0071 0e+00 0.7639990 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS9 (Breast_Mammary_Tissue)
12 rs4938621 480 228951 T C T C -0.143291 -0.0100 0.260383 0.2160 0.0236331 0.0075 0e+00 0.1831000 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS25 (Breast_Mammary_Tissue)
13 rs62106040 480 228951 T A T A -0.212763 -0.0267 0.053914 0.0697 0.0369604 0.0153 0e+00 0.0808407 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS7 (Breast_Mammary_Tissue)
14 rs731835 480 228951 C T C T -0.172328 0.0054 0.329473 0.3134 0.0195324 0.0071 0e+00 0.4480000 gtex_eqtl Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) RPS5 (Breast_Mammary_Tissue)


Results for decreased expression of cytosolic RPs in breast on breast cancer

  • The top plot displays the individual SNPs included in the analysis, their effect estimates and 95% confidence intervals, along with the meta-analyzed “causal” estimates (the term “causal” is used in MR to mean MR is less prone to confounding and reverse causation than observational designs).

  • The inverse-variance weighted (IVW) estimate is the approach that is used to test the causal effect. The weighted median, weighted mode, and MR-Egger are sensitivity estimators. If the directions and magnitudes of the sensitivity estimators’ findings comport with those of the IVW, this is evidence against pleiotropy. (The sensitivity estimators needn’t all be statistically signficiant. For instance, the MR-Egger estimate isn’t, but this only means that the MR-Egger test doesn’t provide additional support for the IVW. The direction and magnitude of the MR-Egger estimate does, however, comport with the IVW’s.)

  • The bottom (scatter) plot is a sensitivity analysis for heterogeneity in the effect estimates. For the test of decreased RP expression on breast cancer, it shows the increased risk (the rising regression lines), that the sensitivity estimators comport (the regression lines don’t criss-cross), and that there aren’t substantial outliers (there are no substantial outliers, since I detected and removed them before running the final MR; outliers can indicate pleiotropy).

  • The MR findings support an increased risk for breast cancer corresponding to decreased expression of cytosolic RPs in breast tissue. The IVW estimate’s confidence interval does not include 1.

  • The Breast Cancer Association Consortium (BCAC) data is the outcome data source (number of cases=122,977; number of controls 105,974).

## $`decreased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 decreased RP expression in breast breast cancer (BCAC)
## $`decreased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 decreased RP expression in breast breast cancer (BCAC)


Sensitivity analysis of increased expression of cytosolic RPs in breast on breast cancer

The top (forest) plot displays the null findings. The IVW estimate’s confidence interval crosses 1.

The bottom (scatter) plot is a sensitivity analysis. For the test of increased RP expression on breast cancer, it shows the null relationship (regression lines are fairly flat) and that the sensitivity estimators fail to comport (the regression lines criss-cross).

## $`increased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 increased RP expression in breast breast cancer (BCAC)
## $`increased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 increased RP expression in breast breast cancer (BCAC)

Prostate cancer

  • I’ve done the same analysis of decreased cytosolic RP expression in prostate on prostate cancer. There is a signal for an increased risk for prostate cancer, as well.

  • Only one of the individual SNPs used in the two MR analyses overlap, and for prostate only 5 RP SNPs were selected after LD pruning and removing outliers.

  • I can do a scan that looks at all decreased cytosolic RPs for breast and prostate without running the meta-analysis that LD-clumps to obtain individual Wald ratio estimates. This will let me see SNPs that were excluded and look at the patterns.