Author: Charleen D. Adams
####################################################################################
#### MR of decreased cytosolic RP expression in breast on breast cancer
####################################################################################
setwd('C:/Users/charl/Dropbox/Harvard/ribosomal_proteins/Main_Tables_Paper/v8/breast/verified/decreased_cytosolic')
load("C:/Users/charl/Dropbox/Harvard/ribosomal_proteins/Main_Tables_Paper/v8/forest_plots/tutorial/dpi_plots/no_tissue_ann/final_plots/atlas.RData")
breast_forest2 <- grepl.sub(data = forest2, pattern = "Breast", Var = "tissue")
#table(breast_forest2$n) #480
colnames(breast_forest2)[7]="eaf"
#format the eQTL data for MR
exposure_dat <- format_gtex_eqtl(breast_forest2)
exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="decreased RP expression in breast"
exposure_dat$id.exposure="decreased RP expression in breast"
exposure_dat$tissue="breast"
#select the decreased expression eQTLs
dat_decrease=exposure_dat[which(exposure_dat$beta.exposure<0),]
#subset by just the cytosolic RPs
RPs <- dat_decrease[startsWith(as.character(dat_decrease$gene_tissue), 'R'),]
RPs$rsid=RPs$SNP
RPs$pval=RPs$pval.exposure
#clump for LD
#exposure_dat <- clump_data(RPs)
exposure_dat <- ieugwasr::ld_clump(RPs)
#extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(exposure_dat$SNP, c('1126'),
proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(exposure_dat, outcome_dat, action = 2)
dim(dat)
#run MR
mr_results<- mr(dat)
mr_results
#detect and remove outliers (outliers can indicate pleiotropy)
raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome,
seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
dim(ivwrad$outliers)[1]
outliers=ivwrad$outliers[1]
outliers
myvars=outliers$SNP
dat2 <- dat[ ! dat$SNP %in% myvars, ]
dim(dat2)
dim(dat)
dat2$outcome="breast cancer (BCAC)"
dat2$id.outcome="breast cancer (BCAC)"
#run MR without outliers
mr_results <- mr(dat2)
mr_results
singlesnp_results=mr_singlesnp(dat2, parameters = default_parameters(),
single_method = "mr_wald_ratio",
all_method = c("mr_ivw",
"mr_egger_regression",
"mr_weighted_mode",
"mr_weighted_median"))
#remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:10,]
dim(singlesnp_results.2)
dat2.1=dat2[which(dat2$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)
#r2 and Fstat
#removed 2 NA's that didn't have EAF
n=480
k=10
p=dat$eaf.exposure
r2.1<-(2*(dat$beta.exposure^2)*p*(1-p))/1
r2=sum(r2.1, na.rm = TRUE)
r2
Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat
#Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)
all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,
Exp=T,split.exposure=T,split.outcome=T)
all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se
head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",
"or_uci95","pval","intercept","intercept_se","intercept_pval",
"Q","Q_df","Q_pval")])
write.csv(all_res,"combined_results_decreased_cytosolic_breast_v8.csv")
write.csv(mr_results,'res_decreased_cytosolic_breast_v8.csv')
write.csv(singlesnp_results,'singlesnp_decreased_cytosolic_breast_v8.csv')
write.csv(dat, 'SIMEX.csv')
| SNP | samplesize.exposure | samplesize.outcome | effect_allele.exposure | other_allele.exposure | effect_allele.outcome | other_allele.outcome | beta.exposure | beta.outcome | eaf.exposure | eaf.outcome | se.exposure | se.outcome | pval.exposure | pval.outcome | data_source.exposure | data_source.outcome | gene_tissue | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | rs10114375 | 480 | 228951 | T | G | T | G | -0.167920 | 0.0143 | 0.154153 | 0.1012 | 0.0332558 | 0.0106 | 2e-07 | 0.1756000 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS6 (Breast_Mammary_Tissue) |
| 2 | rs1057232 | 480 | 228951 | A | G | A | G | -0.348225 | -0.0027 | NA | 0.5433 | 0.0216214 | 0.0070 | 0e+00 | 0.7029990 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS28 (Breast_Mammary_Tissue) |
| 3 | rs12126557 | 480 | 228951 | T | C | T | C | -0.220478 | -0.0186 | 0.049521 | 0.1368 | 0.0269593 | 0.0092 | 0e+00 | 0.0437200 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS8 (Breast_Mammary_Tissue) |
| 4 | rs17215231 | 480 | 228951 | T | C | T | C | -0.713962 | -0.0271 | 0.039936 | 0.0776 | 0.0359101 | 0.0120 | 0e+00 | 0.0243501 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS18 (Breast_Mammary_Tissue) |
| 5 | rs2280370 | 480 | 228951 | G | T | G | T | -0.264306 | -0.0182 | NA | 0.1888 | 0.0232047 | 0.0081 | 0e+00 | 0.0248702 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPL13 (Breast_Mammary_Tissue) |
| 6 | rs2687967 | 480 | 228951 | A | G | A | G | -0.612739 | -0.0139 | 0.692292 | 0.6960 | 0.0179954 | 0.0067 | 0e+00 | 0.0390904 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPL9 (Breast_Mammary_Tissue) |
| 10 | rs34629529 | 480 | 228951 | A | G | A | G | -0.265553 | 0.0021 | 0.294129 | 0.3801 | 0.0199264 | 0.0071 | 0e+00 | 0.7639990 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS9 (Breast_Mammary_Tissue) |
| 12 | rs4938621 | 480 | 228951 | T | C | T | C | -0.143291 | -0.0100 | 0.260383 | 0.2160 | 0.0236331 | 0.0075 | 0e+00 | 0.1831000 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS25 (Breast_Mammary_Tissue) |
| 13 | rs62106040 | 480 | 228951 | T | A | T | A | -0.212763 | -0.0267 | 0.053914 | 0.0697 | 0.0369604 | 0.0153 | 0e+00 | 0.0808407 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS7 (Breast_Mammary_Tissue) |
| 14 | rs731835 | 480 | 228951 | C | T | C | T | -0.172328 | 0.0054 | 0.329473 | 0.3134 | 0.0195324 | 0.0071 | 0e+00 | 0.4480000 | gtex_eqtl | Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) | RPS5 (Breast_Mammary_Tissue) |
The top plot displays the individual SNPs included in the analysis, their effect estimates and 95% confidence intervals, along with the meta-analyzed “causal” estimates (the term “causal” is used in MR to mean MR is less prone to confounding and reverse causation than observational designs).
The inverse-variance weighted (IVW) estimate is the approach that is used to test the causal effect. The weighted median, weighted mode, and MR-Egger are sensitivity estimators. If the directions and magnitudes of the sensitivity estimators’ findings comport with those of the IVW, this is evidence against pleiotropy. (The sensitivity estimators needn’t all be statistically signficiant. For instance, the MR-Egger estimate isn’t, but this only means that the MR-Egger test doesn’t provide additional support for the IVW. The direction and magnitude of the MR-Egger estimate does, however, comport with the IVW’s.)
The bottom (scatter) plot is a sensitivity analysis for heterogeneity in the effect estimates. For the test of decreased RP expression on breast cancer, it shows the increased risk (the rising regression lines), that the sensitivity estimators comport (the regression lines don’t criss-cross), and that there aren’t substantial outliers (there are no substantial outliers, since I detected and removed them before running the final MR; outliers can indicate pleiotropy).
The MR findings support an increased risk for breast cancer corresponding to decreased expression of cytosolic RPs in breast tissue. The IVW estimate’s confidence interval does not include 1.
The Breast Cancer Association Consortium (BCAC) data is the outcome data source (number of cases=122,977; number of controls 105,974).
## $`decreased RP expression in breast.breast cancer (BCAC)`
##
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
## id.exposure id.outcome
## 1 decreased RP expression in breast breast cancer (BCAC)
## $`decreased RP expression in breast.breast cancer (BCAC)`
##
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
## id.exposure id.outcome
## 1 decreased RP expression in breast breast cancer (BCAC)
The top (forest) plot displays the null findings. The IVW estimate’s confidence interval crosses 1.
The bottom (scatter) plot is a sensitivity analysis. For the test of increased RP expression on breast cancer, it shows the null relationship (regression lines are fairly flat) and that the sensitivity estimators fail to comport (the regression lines criss-cross).
## $`increased RP expression in breast.breast cancer (BCAC)`
##
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
## id.exposure id.outcome
## 1 increased RP expression in breast breast cancer (BCAC)
## $`increased RP expression in breast.breast cancer (BCAC)`
##
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
## id.exposure id.outcome
## 1 increased RP expression in breast breast cancer (BCAC)
I’ve done the same analysis of decreased cytosolic RP expression in prostate on prostate cancer. There is a signal for an increased risk for prostate cancer, as well.
Only one of the individual SNPs used in the two MR analyses overlap, and for prostate only 5 RP SNPs were selected after LD pruning and removing outliers.
I can do a scan that looks at all decreased cytosolic RPs for breast and prostate without running the meta-analysis that LD-clumps to obtain individual Wald ratio estimates. This will let me see SNPs that were excluded and look at the patterns.