Contents


0.1 Overview


0.2 Actions

# Create the directory to match the repository in GitHub
mkdir /n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast
cd /n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast

0.3 Code

# MR of increased cytosolic RP expression in breast on breast cancer
setwd('/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast')

# Hand-curated RP list (no pseudogenes): 150
rp_list_expanded=read.csv('/n/home04/cdadams/ld/ldsc/expanded_verified.csv')
rp_list_expanded

# Look and clean: 
master_v8_n=read.csv("/n/home04/cdadams/ld/ldsc/v8_master_tissues_n.csv")
master_v8_n$X=NULL
colnames(master_v8_n)[2] <- "chromosome"
colnames(master_v8_n)[31] <- "MAF"
master_v8_n$eaf=ifelse(master_v8_n$ref_factor=='1', master_v8_n$MAF, 1-master_v8_n$MAF)
gtex_breast=master_v8_n[which(master_v8_n$tissue=="Breast_Mammary_Tissue"),]
write.csv(gtex_breast, '/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast.csv')

# Just read in the cleaned data 
gtex_breast=read.csv('/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast.csv')
gtex_breast$X=NULL

# Get the RP eqtls 
gtex_breast_RP=gtex_breast[which(gtex_breast$gene_name %in% rp_list_expanded$all_RP),]

# Select the P<5x10-06 eqtls
gtex_breast_RP_p_06=gtex_breast_RP[which(gtex_breast_RP$pval<0.000005),]

# Order
gtex_breast_RP_p_06=gtex_breast_RP_p_06[order(gtex_breast_RP_p_06$gene_name, decreasing=TRUE),]

# Remove duplicate entries
gtex_breast_RP_p_06Clean <- gtex_breast_RP_p_06[!duplicated(gtex_breast_RP_p_06$gene_name),]
gtex_breast_RP_p_06Clean$tissue="Breast"

write.csv(gtex_breast_RP_p_06Clean, '/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast_RP_p_06Clean.csv')

# MR format
# Use samplesize_col = 'position' to keep position. Change after.

exposure_dat <- read_exposure_data(
  filename = "/n/home04/cdadams/MR-dysreg-GTEx-incr-RP-BC-breast/gtex_breast_RP_p_06Clean.csv",
  sep = ',',
  snp_col = 'SNP',
  beta_col = 'beta',
  se_col = 'se',
  effect_allele_col = 'effect_allele',
  phenotype_col = 'tissue',
  other_allele_col = 'other_allele',
  eaf_col = 'eaf',
  samplesize_col = 'position',
  gene_col = 'gene_name',
  pval_col = 'pval', 
)

# Format the colname names
exposure_dat$chr_name=exposure_dat$chr.exposure
exposure_dat$chrom_start=exposure_dat$samplesize.exposure
exposure_dat$samplesize.exposure=NULL
exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="increased RP expression in breast"
exposure_dat$id.exposure="increased RP expression in breast"

# Select the increased expression eQTLs
dat_increase=exposure_dat[which(exposure_dat$beta.exposure>0),]

# Subset by just the cytosolic RPs
increase_RPs <- dat_increase[startsWith(as.character(dat_increase$gene.exposure), 'R'),]

# Clump for LD
increase_RPs_clump <- clump_data(increase_RPs)

# Extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(increase_RPs_clump$SNP, c('1126'),
  proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(increase_RPs_clump, outcome_dat, action = 2)

# Run MR
mr_results<- mr(dat)
mr_results

# Detect and remove outliers (outliers can indicate pleiotropy)
# No outliers
# raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome, 
#   seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
# ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
# ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
# dim(ivwrad$outliers)[1] 
# outliers=ivwrad$outliers[1]
# outliers
# myvars=outliers$SNP
# dat2 <- dat[! dat$SNP %in% myvars, ]
# dim(dat2) 
# dim(dat)
dat$outcome="breast cancer (BCAC)"
dat$id.outcome="breast cancer (BCAC)"

# Run MR without outliers
# mr_results <- mr(dat2)
# mr_results

singlesnp_results=mr_singlesnp(dat, parameters = default_parameters(),
  single_method = "mr_wald_ratio", 
  all_method = c("mr_ivw",
  "mr_egger_regression", 
  "mr_weighted_mode",
  "mr_weighted_median"))

# Remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:4,]
dim(singlesnp_results.2)
dat2.1=dat[which(dat$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)

# r2 and Fstat

n=480
k=4
p=dat$eaf.exposure
r2.1<-(2*(dat$beta.exposure^2)*p*(1-p))/1
r2=sum(r2.1, na.rm = TRUE)
r2 
Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat

# Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)

# Bug in function for combining; save separately
# all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,Exp=T,split.exposure=T,split.outcome=T)
# all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
# all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se
# 
# head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",  
#   "or_uci95","pval","intercept","intercept_se","intercept_pval",
#   "Q","Q_df","Q_pval")])
#write.csv(all_res,"combined_results_increased_cytosolic_breast_v8.csv")

write.csv(mr_results,'res_increased_cytosolic_breast_v8.csv')
write.csv(singlesnp_results,'singlesnp_increased_cytosolic_breast_v8.csv')
write.csv(dat, 'increased_cytosolic_breast_v8_SIMEX.csv')
write.csv(plt,'plt_increased_cytosolic_breast_v8.csv')
write.csv(het,'het_increased_cytosolic_breast_v8.csv')

0.4 Data sources


0.5 Results


0.6 Interpretation

The top (forest) plot displays the null findings. The IVW estimate’s confidence interval crosses 1.

The bottom (scatter) plot is a sensitivity analysis. For the test of increased RP expression on breast cancer, it shows the null relationship.