We perform a meta-analytic Mendelian randomization (MR) of increased cytosolic ribosomal protein (RP) expression on breast cancer and find that an increase in expression (using eQTLs increasing expression cytosolic RPs) is null.
# Create the directory to match the repository in GitHub
mkdir /n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast
cd /n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast
# MR of increased cytosolic RP expression in breast on breast cancer
setwd('/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast')
# Hand-curated RP list (no pseudogenes): 150
rp_list_expanded=read.csv('/n/home04/cdadams/ld/ldsc/expanded_verified.csv')
rp_list_expanded
# Look and clean:
master_v8_n=read.csv("/n/home04/cdadams/ld/ldsc/v8_master_tissues_n.csv")
master_v8_n$X=NULL
colnames(master_v8_n)[2] <- "chromosome"
colnames(master_v8_n)[31] <- "MAF"
master_v8_n$eaf=ifelse(master_v8_n$ref_factor=='1', master_v8_n$MAF, 1-master_v8_n$MAF)
gtex_breast=master_v8_n[which(master_v8_n$tissue=="Breast_Mammary_Tissue"),]
write.csv(gtex_breast, '/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast.csv')
# Just read in the cleaned data
gtex_breast=read.csv('/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast.csv')
gtex_breast$X=NULL
# Get the RP eqtls
gtex_breast_RP=gtex_breast[which(gtex_breast$gene_name %in% rp_list_expanded$all_RP),]
# Select the P<5x10-06 eqtls
gtex_breast_RP_p_06=gtex_breast_RP[which(gtex_breast_RP$pval<0.000005),]
# Order
gtex_breast_RP_p_06=gtex_breast_RP_p_06[order(gtex_breast_RP_p_06$gene_name, decreasing=TRUE),]
# Remove duplicate entries
gtex_breast_RP_p_06Clean <- gtex_breast_RP_p_06[!duplicated(gtex_breast_RP_p_06$gene_name),]
gtex_breast_RP_p_06Clean$tissue="Breast"
write.csv(gtex_breast_RP_p_06Clean, '/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast_RP_p_06Clean.csv')
# MR format
# Use samplesize_col = 'position' to keep position. Change after.
exposure_dat <- read_exposure_data(
filename = "/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast_RP_p_06Clean.csv",
sep = ',',
snp_col = 'SNP',
beta_col = 'beta',
se_col = 'se',
effect_allele_col = 'effect_allele',
phenotype_col = 'tissue',
other_allele_col = 'other_allele',
eaf_col = 'eaf',
samplesize_col = 'position',
gene_col = 'gene_name',
pval_col = 'pval',
)
# Format the colname names
exposure_dat$chr_name=exposure_dat$chr.exposure
exposure_dat$chrom_start=exposure_dat$samplesize.exposure
exposure_dat$samplesize.exposure=NULL
exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="increased RP expression in breast"
exposure_dat$id.exposure="increased RP expression in breast"
# Select the increased expression eQTLs
dat_increase=exposure_dat[which(exposure_dat$beta.exposure>0),]
# Subset by just the cytosolic RPs
increase_RPs <- dat_increase[startsWith(as.character(dat_increase$gene.exposure), 'R'),]
# Clump for LD
increase_RPs_clump <- clump_data(increase_RPs)
# Extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(increase_RPs_clump$SNP, c('1126'),
proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(increase_RPs_clump, outcome_dat, action = 2)
# Run MR
mr_results<- mr(dat)
mr_results
# Detect and remove outliers (outliers can indicate pleiotropy)
# No outliers
# raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome,
# seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
# ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
# ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
# dim(ivwrad$outliers)[1]
# outliers=ivwrad$outliers[1]
# outliers
# myvars=outliers$SNP
# dat2 <- dat[! dat$SNP %in% myvars, ]
# dim(dat2)
# dim(dat)
dat$outcome="breast cancer (BCAC)"
dat$id.outcome="breast cancer (BCAC)"
# Run MR without outliers
# mr_results <- mr(dat2)
# mr_results
singlesnp_results=mr_singlesnp(dat, parameters = default_parameters(),
single_method = "mr_wald_ratio",
all_method = c("mr_ivw",
"mr_egger_regression",
"mr_weighted_mode",
"mr_weighted_median"))
# Remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:4,]
dim(singlesnp_results.2)
dat2.1=dat[which(dat$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)
# r2 and Fstat
n=480
k=4
p=dat$eaf.exposure
r2.1<-(2*(dat$beta.exposure^2)*p*(1-p))/1
r2=sum(r2.1, na.rm = TRUE)
r2
Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat
# Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)
# Bug in function for combining; save separately
# all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,Exp=T,split.exposure=T,split.outcome=T)
# all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
# all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se
#
# head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",
# "or_uci95","pval","intercept","intercept_se","intercept_pval",
# "Q","Q_df","Q_pval")])
#write.csv(all_res,"combined_results_increased_cytosolic_breast_v8.csv")
write.csv(mr_results,'res_increased_cytosolic_breast_v8.csv')
write.csv(singlesnp_results,'singlesnp_increased_cytosolic_breast_v8.csv')
write.csv(dat, 'increased_cytosolic_breast_v8_SIMEX.csv')
write.csv(plt,'plt_increased_cytosolic_breast_v8.csv')
write.csv(het,'het_increased_cytosolic_breast_v8.csv')
The top (forest) plot displays the null findings. The IVW estimate’s confidence interval crosses 1.
The bottom (scatter) plot is a sensitivity analysis. For the test of increased RP expression on breast cancer, it shows the null relationship.