Contents


0.1 Overview


0.2 Actions

# Create the directory to match the repository in GitHub
mkdir /n/home04/cdadams/MR-dysreg-GTEx-all-RP-BC-ERn-breast
cd /n/home04/cdadams/MR-dysreg-GTEx-all-RP-BC-ERn-breast

cd /n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast

0.3 Code

# MR of RP expression in breast on ER- breast cancer
setwd('/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast')

# Hand-curated RP list (no pseudogenes): 150
rp_list_expanded=read.csv('/n/home04/cdadams/ld/ldsc/expanded_verified.csv')
rp_list_expanded

# Look and clean; highlighted out but very important

# master_v8_n=read.csv("/n/home04/cdadams/ld/ldsc/v8_master_tissues_n.csv")
# head(master_v8_n)
# master_v8_n$X=NULL
# colnames(master_v8_n)[2] <- "chromosome"
# colnames(master_v8_n)[31] <- "MAF"
# master_v8_n$eaf=ifelse(master_v8_n$ref_factor=='1', master_v8_n$MAF, 1-master_v8_n$MAF)
# gtex_breast=master_v8_n[which(master_v8_n$tissue=="Breast_Mammary_Tissue"),]
# write.csv(gtex_breast, '/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/gtex_breast.csv')

gtex_breast=read.csv('/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/gtex_breast.csv')
gtex_breast$X=NULL
str(gtex_breast)
str(rp_list_expanded)
rp_list_expanded$all_RP=as.factor(rp_list_expanded$all_RP)
gtex_breast$gene_name=as.factor(gtex_breast$gene_name )
gtex_breast_RP=gtex_breast[which(gtex_breast$gene_name %in% rp_list_expanded$all_RP),]
dim(gtex_breast_RP)

gtex_breast_RPClean <- gtex_breast_RP[!duplicated(gtex_breast_RP$gene_name),]
dim(gtex_breast_RPClean)
table(gtex_breast_RPClean$SNP)

gtex_breast_RPClean$gene_name=droplevels(gtex_breast_RPClean$gene_name)
freq1=table(gtex_breast_RPClean$gene_name)
freq1=as.data.frame(freq1)
freq1  #150!
dim(freq1)
sum(freq1$Freq) #150

# Get the RP eqtls
# Select the P<5x10-06 eqtls
gtex_breast_RPClean_p_06=gtex_breast_RPClean[which(gtex_breast_RPClean$pval<0.000005),]

freq2=table(gtex_breast_RPClean_p_06$gene_name)
freq2=as.data.frame(freq2)
freq2
sum(freq2$Freq) #69

# # Order
gtex_breast_RPClean_p_06=gtex_breast_RPClean_p_06[order(gtex_breast_RPClean_p_06$gene_name, decreasing=TRUE),]
write.csv(gtex_breast_RPClean_p_06, '/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/gtex_breast_RPClean_p_06.csv')

# MR format
# Use samplesize_col = 'position' to keep position. Change after.

exposure_dat <- read_exposure_data(
  filename = "/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/gtex_breast_RPClean_p_06.csv",
  sep = ',',
  snp_col = 'SNP',
  beta_col = 'beta',
  se_col = 'se',
  effect_allele_col = 'effect_allele',
  phenotype_col = 'tissue',
  other_allele_col = 'other_allele',
  eaf_col = 'eaf',
  samplesize_col = 'position',
  gene_col = 'gene_name',
  pval_col = 'pval', 
)

freq3=table(exposure_dat$gene.exposure)
freq3=as.data.frame(freq3)
dim(freq3)
sum(freq3$Freq)
freq3

# Format the colname names
exposure_dat$chr_name=exposure_dat$chr.exposure
exposure_dat$chrom_start=exposure_dat$samplesize.exposure
exposure_dat$samplesize.exposure=NULL
exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="RP expression in breast"
exposure_dat$id.exposure="RP expression in breast"

# # Select the decreased expression eQTLs
# dat_decrease=exposure_dat[which(exposure_dat$beta.exposure<0),]
# 
# # Subset by just the cytosolic RPs
# decrease_RPs <- dat_decrease[startsWith(as.character(dat_decrease$gene.exposure), 'R'),]

# Extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(exposure_dat$SNP, c('1128'),
  proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(exposure_dat, outcome_dat, action = 2)

freq4=table(dat$gene.exposure)
freq4=as.data.frame(freq4)
freq4
sum(freq4$Freq)
dim(freq4)

# Clump for LD
dat_clump <- clump_data(dat)

freq5=table(dat_clump$gene.exposure)
freq5=as.data.frame(freq5)
freq5
sum(freq5$Freq)
dim(freq5)

# Run MR
mr_results<- mr(dat_clump)
mr_results

dat=dat_clump

# Detect and remove outliers (outliers can indicate pleiotropy)
raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome, 
  seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
dim(ivwrad$outliers)[1] 
outliers=ivwrad$outliers[1]
outliers
myvars=outliers$SNP
dat2 <- dat[! dat$SNP %in% myvars, ]
dim(dat2) 
dim(dat)
dat2$outcome="ER- breast cancer (BCAC)"
dat2$id.outcome="ER- breast cancer (BCAC)"

# Run MR without outliers
mr_results <- mr(dat2)
mr_results

singlesnp_results=mr_singlesnp(dat2, parameters = default_parameters(),
  single_method = "mr_wald_ratio", 
  all_method = c("mr_ivw",
  "mr_egger_regression", 
  "mr_weighted_mode",
  "mr_weighted_median"))

# Remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:35,]
head(singlesnp_results.2)
dim(singlesnp_results.2)
dat2.1=dat2[which(dat2$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)

# r2 and Fstat

n=480
k=35
summary(dat$eaf.exposure)

test=scale(dat$beta.exposure)
str(test)
test <- scale(dat$beta.exposure)
test3=as.data.frame(test)
dim(test3)
dat$scaled.dat=test3$V1
summary(dat$scaled.dat)

p=dat$eaf.exposure
r2.1<-(2*(dat$scaled.dat^2)*p*(1-p))/1 #dat$beta.exposure

r2=sum(r2.1, na.rm = TRUE)
r2 

#r2=mean(r2.1, na.rm=TRUE)
#r2

Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat

# Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)

# Bug in function for combining; save separately
# all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,Exp=T,split.exposure=T,split.outcome=T)
# all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
# all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se
# 
# head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",  
#   "or_uci95","pval","intercept","intercept_se","intercept_pval",
#   "Q","Q_df","Q_pval")])
#write.csv(all_res,"combined_results_all_RP_MRP_breast_v8.csv")

write.csv(mr_results,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/res_all_RP_breast_ERn_v8.csv')
write.csv(singlesnp_results,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/singlesnp_all_RP_breast_ERn_v8.csv')
write.csv(dat, '/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/all_RP_breast_v8_SIMEX_ERn.csv')
write.csv(plt,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/plt_all_RP_breast_ERn_v8.csv')
write.csv(het,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-ERn-breast/het_all_RP_breast_v8_ERn.csv')

0.4 Figures

FALSE Saving 8 x 5 in image

FALSE Saving 8 x 5 in image

0.5 IVW & sensitivity estimator results

##                      method nsnp    or    se or_lci95 or_uci95  pval
## 1                  MR Egger   35 1.006 0.012    0.982    1.030 0.641
## 2           Weighted median   35 1.008 0.009    0.989    1.026 0.419
## 3 Inverse variance weighted   35 1.009 0.007    0.996    1.022 0.173
## 4               Simple mode   35 1.009 0.013    0.983    1.036 0.485
## 5             Weighted mode   35 1.006 0.009    0.987    1.025 0.549

0.6 Wald ratio results


0.7 Rpubs: https://rpubs.com/Charleen_D_Adams/756480