Decreased cytosolic RPs in breast on breast cancer

Abbreviations and definitions

Gene-Tissue Expression (GTex) Project
Ribosomal protein (RP)
expression quantitative trait loci (eQTL)
significance threshold for analyses here (Q-value for ‘eGene’ status <0.05 & nominal P<5x10-6)
Mendelian randomization (MR)

MR of decreased RP expression in breast on breast cancer

Code

####################################################################################
#### MR of decreased cytosolic RP expression in breast on breast cancer
####################################################################################
setwd('C:/Users/charl/Dropbox/Harvard/ribosomal_proteins/Main_Tables_Paper/v8/breast/verified/decreased_cytosolic')
load("C:/Users/charl/Dropbox/Harvard/ribosomal_proteins/Main_Tables_Paper/v8/forest_plots/tutorial/dpi_plots/no_tissue_ann/final_plots/atlas.RData")
breast_forest2 <- grepl.sub(data = forest2, pattern = "Breast", Var = "tissue")

#table(breast_forest2$n) #480
colnames(breast_forest2)[7]="eaf"

#format the eQTL data for MR
exposure_dat <- format_gtex_eqtl(breast_forest2)

exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="decreased RP expression in breast"
exposure_dat$id.exposure="decreased RP expression in breast"
exposure_dat$tissue="breast"

#select the decreased expression eQTLs
dat_decrease=exposure_dat[which(exposure_dat$beta.exposure<0),]

#subset by just the cytosolic RPs
RPs <- dat_decrease[startsWith(as.character(dat_decrease$gene_tissue), 'R'),]
RPs$rsid=RPs$SNP
RPs$pval=RPs$pval.exposure

#clump for LD
#exposure_dat <- clump_data(RPs)
exposure_dat <- ieugwasr::ld_clump(RPs)

#extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(exposure_dat$SNP, c('1126'),
  proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(exposure_dat, outcome_dat, action = 2)
dim(dat)

#run MR
mr_results<- mr(dat)
mr_results

#detect and remove outliers (outliers can indicate pleiotropy)
raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome, 
  seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
dim(ivwrad$outliers)[1] 
outliers=ivwrad$outliers[1]
outliers
myvars=outliers$SNP
dat2 <- dat[ ! dat$SNP %in% myvars, ]
dim(dat2) 
dim(dat)
dat2$outcome="breast cancer (BCAC)"
dat2$id.outcome="breast cancer (BCAC)"

#run MR without outliers
mr_results <- mr(dat2)
mr_results

singlesnp_results=mr_singlesnp(dat2, parameters = default_parameters(),
  single_method = "mr_wald_ratio", 
  all_method = c("mr_ivw",
  "mr_egger_regression", 
  "mr_weighted_mode",
  "mr_weighted_median"))

#remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:10,]
dim(singlesnp_results.2)
dat2.1=dat2[which(dat2$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)

#r2 and Fstat
#removed 2 NA's that didn't have EAF
n=480
k=10
p=dat$eaf.exposure
r2.1<-(2*(dat$beta.exposure^2)*p*(1-p))/1
r2=sum(r2.1, na.rm = TRUE)
r2 
Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat

#Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)
all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,
  Exp=T,split.exposure=T,split.outcome=T)
all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se

head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",  
  "or_uci95","pval","intercept","intercept_se","intercept_pval",
  "Q","Q_df","Q_pval")])
write.csv(all_res,"combined_results_decreased_cytosolic_breast_v8.csv")
write.csv(mr_results,'res_decreased_cytosolic_breast_v8.csv')
write.csv(singlesnp_results,'singlesnp_decreased_cytosolic_breast_v8.csv')
write.csv(dat, 'SIMEX.csv')

SNP characteristics (data sources)

	SNP	samplesize.exposure	samplesize.outcome	effect_allele.exposure	other_allele.exposure	effect_allele.outcome	other_allele.outcome	beta.exposure	beta.outcome	eaf.exposure	eaf.outcome	se.exposure	se.outcome	pval.exposure	pval.outcome	data_source.exposure	data_source.outcome	gene_tissue
1	rs10114375	480	228951	T	G	T	G	-0.167920	0.0143	0.154153	0.1012	0.0332558	0.0106	2e-07	0.1756000	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS6 (Breast_Mammary_Tissue)
2	rs1057232	480	228951	A	G	A	G	-0.348225	-0.0027	NA	0.5433	0.0216214	0.0070	0e+00	0.7029990	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS28 (Breast_Mammary_Tissue)
3	rs12126557	480	228951	T	C	T	C	-0.220478	-0.0186	0.049521	0.1368	0.0269593	0.0092	0e+00	0.0437200	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS8 (Breast_Mammary_Tissue)
4	rs17215231	480	228951	T	C	T	C	-0.713962	-0.0271	0.039936	0.0776	0.0359101	0.0120	0e+00	0.0243501	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS18 (Breast_Mammary_Tissue)
5	rs2280370	480	228951	G	T	G	T	-0.264306	-0.0182	NA	0.1888	0.0232047	0.0081	0e+00	0.0248702	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPL13 (Breast_Mammary_Tissue)
6	rs2687967	480	228951	A	G	A	G	-0.612739	-0.0139	0.692292	0.6960	0.0179954	0.0067	0e+00	0.0390904	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPL9 (Breast_Mammary_Tissue)
10	rs34629529	480	228951	A	G	A	G	-0.265553	0.0021	0.294129	0.3801	0.0199264	0.0071	0e+00	0.7639990	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS9 (Breast_Mammary_Tissue)
12	rs4938621	480	228951	T	C	T	C	-0.143291	-0.0100	0.260383	0.2160	0.0236331	0.0075	0e+00	0.1831000	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS25 (Breast_Mammary_Tissue)
13	rs62106040	480	228951	T	A	T	A	-0.212763	-0.0267	0.053914	0.0697	0.0369604	0.0153	0e+00	0.0808407	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS7 (Breast_Mammary_Tissue)
14	rs731835	480	228951	C	T	C	T	-0.172328	0.0054	0.329473	0.3134	0.0195324	0.0071	0e+00	0.4480000	gtex_eqtl	Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis)	RPS5 (Breast_Mammary_Tissue)

Results for decreased expression of cytosolic RPs in breast on breast cancer

The top plot displays the individual SNPs included in the analysis, their effect estimates and 95% confidence intervals, along with the meta-analyzed “causal” estimates (the term “causal” is used in MR to mean MR is less prone to confounding and reverse causation than observational designs).
The inverse-variance weighted (IVW) estimate is the approach that is used to test the causal effect. The weighted median, weighted mode, and MR-Egger are sensitivity estimators. If the directions and magnitudes of the sensitivity estimators’ findings comport with those of the IVW, this is evidence against pleiotropy. (The sensitivity estimators needn’t all be statistically signficiant. For instance, the MR-Egger estimate isn’t, but this only means that the MR-Egger test doesn’t provide additional support for the IVW. The direction and magnitude of the MR-Egger estimate does, however, comport with the IVW’s.)
The bottom (scatter) plot is a sensitivity analysis for heterogeneity in the effect estimates. For the test of decreased RP expression on breast cancer, it shows the increased risk (the rising regression lines), that the sensitivity estimators comport (the regression lines don’t criss-cross), and that there aren’t substantial outliers (there are no substantial outliers, since I detected and removed them before running the final MR; outliers can indicate pleiotropy).
The MR findings support an increased risk for breast cancer corresponding to decreased expression of cytosolic RPs in breast tissue. The IVW estimate’s confidence interval does not include 1.
The Breast Cancer Association Consortium (BCAC) data is the outcome data source (number of cases=122,977; number of controls 105,974).

## $`decreased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 decreased RP expression in breast breast cancer (BCAC)

## $`decreased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 decreased RP expression in breast breast cancer (BCAC)

Sensitivity analysis of increased expression of cytosolic RPs in breast on breast cancer

The top (forest) plot displays the null findings. The IVW estimate’s confidence interval crosses 1.

The bottom (scatter) plot is a sensitivity analysis. For the test of increased RP expression on breast cancer, it shows the null relationship (regression lines are fairly flat) and that the sensitivity estimators fail to comport (the regression lines criss-cross).

## $`increased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 increased RP expression in breast breast cancer (BCAC)

## $`increased RP expression in breast.breast cancer (BCAC)`

## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##                         id.exposure           id.outcome
## 1 increased RP expression in breast breast cancer (BCAC)

Prostate cancer

I’ve done the same analysis of decreased cytosolic RP expression in prostate on prostate cancer. There is a signal for an increased risk for prostate cancer, as well.
Only one of the individual SNPs used in the two MR analyses overlap, and for prostate only 5 RP SNPs were selected after LD pruning and removing outliers.
I can do a scan that looks at all decreased cytosolic RPs for breast and prostate without running the meta-analysis that LD-clumps to obtain individual Wald ratio estimates. This will let me see SNPs that were excluded and look at the patterns.