Contents


0.1 Overview


0.2 Actions

# Create the directory to match the repository in GitHub
mkdir /n/home04/cdadams/MR-dysreg-GTEx-all-RP-BC-breast
cd /n/home04/cdadams/MR-dysreg-GTEx-all-RP-BC-breast

0.3 Code

# MR of decreased cytosolic RP expression in breast on breast cancer
setwd('/n/home04/cdadams/MR-dysreg-GTEx-all-decr-RP-BC-ERp-breast')

# Hand-curated RP list (no pseudogenes): 150
rp_list_expanded=read.csv('/n/home04/cdadams/ld/ldsc/expanded_verified.csv')
rp_list_expanded

# Look and clean: 
master_v8_n=read.csv("/n/home04/cdadams/ld/ldsc/v8_master_tissues_n.csv")
master_v8_n$X=NULL
colnames(master_v8_n)[2] <- "chromosome"
colnames(master_v8_n)[31] <- "MAF"
master_v8_n$eaf=ifelse(master_v8_n$ref_factor=='1', master_v8_n$MAF, 1-master_v8_n$MAF)
gtex_breast=master_v8_n[which(master_v8_n$tissue=="Breast_Mammary_Tissue"),]
write.csv(gtex_breast, '/n/home04/cdadams/MR-dysreg-GTEx-all-RP-BC-breast/gtex_breast.csv')

# Just read in the cleaned data 
gtex_breast=read.csv('/n/home04/cdadams/MR-dysreg-GTEx-all-RP-BC-breast/gtex_breast.csv')
gtex_breast$X=NULL

# Get the RP eqtls 
gtex_breast_RP=gtex_breast[which(gtex_breast$gene_name %in% rp_list_expanded$all_RP),]

# Select the P<5x10-06 eqtls
gtex_breast_RP_p_06=gtex_breast_RP[which(gtex_breast_RP$pval<0.000005),]

# Order
gtex_breast_RP_p_06=gtex_breast_RP_p_06[order(gtex_breast_RP_p_06$gene_name, decreasing=TRUE),]

# Remove duplicate entries
gtex_breast_RP_p_06Clean <- gtex_breast_RP_p_06[!duplicated(gtex_breast_RP_p_06$gene_name),]
gtex_breast_RP_p_06Clean$tissue="Breast"

write.csv(gtex_breast_RP_p_06Clean, '/n/home04/cdadams/MR-dysreg-GTEx-all-RP-BC-breast/gtex_breast_RP_p_06Clean.csv')

# MR format
# Use samplesize_col = 'position' to keep position. Change after.

exposure_dat <- read_exposure_data(
  filename = "/n/home04/cdadams/MR-dysreg-RP-BC/gtex_breast_RP_p_06Clean.csv",
  sep = ',',
  snp_col = 'SNP',
  beta_col = 'beta',
  se_col = 'se',
  effect_allele_col = 'effect_allele',
  phenotype_col = 'tissue',
  other_allele_col = 'other_allele',
  eaf_col = 'eaf',
  samplesize_col = 'position',
  gene_col = 'gene_name',
  pval_col = 'pval', 
)

# Format the colname names
exposure_dat$chr_name=exposure_dat$chr.exposure
exposure_dat$chrom_start=exposure_dat$samplesize.exposure
exposure_dat$samplesize.exposure=NULL
exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="RP expression in breast"
exposure_dat$id.exposure="RP expression in breast"

# Select the decreased expression eQTLs
dat_decrease=exposure_dat[which(exposure_dat$beta.exposure<0),]

# # Subset by just the cytosolic RPs
# decrease_RPs <- dat_decrease[startsWith(as.character(dat_decrease$gene.exposure), 'R'),]

# Clump for LD
exposure_dat_clump <- clump_data(dat_decrease)

# Extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(exposure_dat_clump$SNP, c('1127'),
  proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(exposure_dat_clump, outcome_dat, action = 2)

# Run MR
mr_results<- mr(dat)
mr_results

# Detect and remove outliers (outliers can indicate pleiotropy)
raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome, 
  seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
dim(ivwrad$outliers)[1] 
outliers=ivwrad$outliers[1]
outliers
myvars=outliers$SNP
dat2 <- dat[! dat$SNP %in% myvars, ]
dim(dat2) 
dim(dat)
dat2$outcome="breast cancer (BCAC)"
dat2$id.outcome="breast cancer (BCAC)"

# Run MR without outliers
mr_results <- mr(dat2)
mr_results

singlesnp_results=mr_singlesnp(dat2, parameters = default_parameters(),
  single_method = "mr_wald_ratio", 
  all_method = c("mr_ivw",
  "mr_egger_regression", 
  "mr_weighted_mode",
  "mr_weighted_median"))

# Remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:18,]
dim(singlesnp_results.2)
dat2.1=dat2[which(dat2$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)

# r2 and Fstat

n=480
k=18
p=dat$eaf.exposure
r2.1<-(2*(dat$beta.exposure^2)*p*(1-p))/1
r2=sum(r2.1, na.rm = TRUE)
r2 
Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat

# Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)

# Bug in function for combining; save separately
# all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,Exp=T,split.exposure=T,split.outcome=T)
# all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
# all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se
# 
# head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",  
#   "or_uci95","pval","intercept","intercept_se","intercept_pval",
#   "Q","Q_df","Q_pval")])
#write.csv(all_res,"combined_results_all_RP_MRP_breast_v8.csv")

write.csv(mr_results,'res_all_RP_breast_ERp_v8_decr.csv')
write.csv(singlesnp_results,'singlesnp_all_RP_breast_ERp_v8_decr.csv')
write.csv(dat, 'all_RP_breast_v8_SIMEX_ERp_decr.csv')
write.csv(plt,'plt_all_RP_breast_ERp_v8_decr.csv')
write.csv(het,'het_all_RP_breast_v8_ERp_decr.csv')

0.4 Figures


0.5 IVW & sensitivity estimator results

##                      method nsnp    or    se or_lci95 or_uci95  pval
## 1                  MR Egger   18 1.003 0.009    0.986    1.021 0.720
## 2           Weighted median   18 1.004 0.007    0.991    1.018 0.530
## 3 Inverse variance weighted   18 1.004 0.005    0.994    1.014 0.400
## 4               Simple mode   18 1.015 0.009    0.997    1.033 0.129
## 5             Weighted mode   18 1.006 0.007    0.993    1.019 0.408

0.6 Wald ratio results