Contents


0.1 Overview


0.2 Actions

# Create the directory to match the repository in GitHub
mkdir /n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood

cd /n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood

0.3 Code

# MR of any RP expression changes in blood on overall breast cancer
setwd('/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood')

# Hand-curated RP list (no pseudogenes): 150
rp_list_expanded=read.csv('/n/home04/cdadams/ld/ldsc/expanded_verified.csv')
rp_list_expanded

# Look and clean; highlighted out but very important

# master_v8_n=read.csv("/n/home04/cdadams/ld/ldsc/v8_master_tissues_n.csv")
# head(master_v8_n)
# master_v8_n$X=NULL
# colnames(master_v8_n)[2] <- "chromosome"
# colnames(master_v8_n)[31] <- "MAF"
# master_v8_n$eaf=ifelse(master_v8_n$ref_factor=='1', master_v8_n$MAF, 1-master_v8_n$MAF)
# gtex_blood=master_v8_n[which(master_v8_n$tissue=="Whole_Blood"),]
# write.csv(gtex_blood, '/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/gtex_blood.csv')

gtex_blood=read.csv('/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/gtex_blood.csv')
gtex_blood$X=NULL
str(gtex_blood)
str(rp_list_expanded)
rp_list_expanded$all_RP=as.factor(rp_list_expanded$all_RP)
gtex_blood$gene_name=as.factor(gtex_blood$gene_name )
gtex_blood_RP=gtex_blood[which(gtex_blood$gene_name %in% rp_list_expanded$all_RP),]
dim(gtex_blood_RP)

gtex_blood_RPClean <- gtex_blood_RP[!duplicated(gtex_blood_RP$gene_name),]
dim(gtex_blood_RPClean)
table(gtex_blood_RPClean$SNP)

gtex_blood_RPClean$gene_name=droplevels(gtex_blood_RPClean$gene_name)
freq1=table(gtex_blood_RPClean$gene_name)
freq1=as.data.frame(freq1)
freq1  #150!
dim(freq1)
sum(freq1$Freq) #150

# Get the RP eqtls
# Select the P<5x10-06 eqtls
gtex_blood_RPClean_p_06=gtex_blood_RPClean[which(gtex_blood_RPClean$pval<0.000005),]

freq2=table(gtex_blood_RPClean_p_06$gene_name)
freq2=as.data.frame(freq2)
freq2
sum(freq2$Freq) #69

# # Order
gtex_blood_RPClean_p_06=gtex_blood_RPClean_p_06[order(gtex_blood_RPClean_p_06$gene_name, decreasing=TRUE),]
write.csv(gtex_blood_RPClean_p_06, '/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/gtex_blood_RPClean_p_06.csv')

# MR format
# Use samplesize_col = 'position' to keep position. Change after.

exposure_dat <- read_exposure_data(
  filename = "/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/gtex_blood_RPClean_p_06.csv",
  sep = ',',
  snp_col = 'SNP',
  beta_col = 'beta',
  se_col = 'se',
  effect_allele_col = 'effect_allele',
  phenotype_col = 'tissue',
  other_allele_col = 'other_allele',
  eaf_col = 'eaf',
  samplesize_col = 'position',
  gene_col = 'gene_name',
  pval_col = 'pval', 
)

freq3=table(exposure_dat$gene.exposure)
freq3=as.data.frame(freq3)
dim(freq3)
sum(freq3$Freq)
freq3

# Format the colname names
exposure_dat$chr_name=exposure_dat$chr.exposure
exposure_dat$chrom_start=exposure_dat$samplesize.exposure
exposure_dat$samplesize.exposure=NULL
exposure_dat$gene_tissue=exposure_dat$exposure
exposure_dat$exposure="RP expression in blood"
exposure_dat$id.exposure="RP expression in blood"

# # Select the decreased expression eQTLs
# dat_decrease=exposure_dat[which(exposure_dat$beta.exposure<0),]
# 
# # Subset by just the cytosolic RPs
# decrease_RPs <- dat_decrease[startsWith(as.character(dat_decrease$gene.exposure), 'R'),]

# Extract the increased expression loci from BCAC
outcome_dat <- extract_outcome_data(exposure_dat$SNP, c('1126'),
  proxies = 1, rsq = 0.8, align_alleles = 1, palindromes = 1, maf_threshold = 0.3)
dat <- harmonise_data(exposure_dat, outcome_dat, action = 2)

freq4=table(dat$gene.exposure)
freq4=as.data.frame(freq4)
freq4
sum(freq4$Freq)
dim(freq4)

# Clump for LD
dat_clump <- clump_data(dat)

freq5=table(dat_clump$gene.exposure)
freq5=as.data.frame(freq5)
freq5
sum(freq5$Freq)
dim(freq5)


# Run MR
mr_results<- mr(dat_clump)
mr_results

dat=dat_clump

# Detect and remove outliers (outliers can indicate pleiotropy)
raddat <- format_radial(BXG=dat$beta.exposure, BYG=dat$beta.outcome, 
  seBXG=dat$se.exposure, seBYG=dat$se.outcome, RSID=dat$SNP)
ivwrad <- ivw_radial(raddat, alpha=0.05, weights=1)
ivwrad2 <- ivw_radial(raddat, alpha=0.05, weights=1)
dim(ivwrad$outliers)[1] 
outliers=ivwrad$outliers[1]
outliers
myvars=outliers$SNP
dat2 <- dat[! dat$SNP %in% myvars, ]
dim(dat2) 
dim(dat)
dat2$outcome="breast cancer (BCAC 2017)"
dat2$id.outcome="breast cancer (BCAC 2017)"

# Run MR without outliers
mr_results <- mr(dat2)
mr_results

singlesnp_results=mr_singlesnp(dat2, parameters = default_parameters(),
  single_method = "mr_wald_ratio", 
  all_method = c("mr_ivw",
  "mr_egger_regression", 
  "mr_weighted_mode",
  "mr_weighted_median"))

# Remove the snps that weren't used in the MR to have a record of the data source
singlesnp_results.2=singlesnp_results[1:34,]
dim(singlesnp_results.2)
dat2.1=dat2[which(dat2$SNP %in% singlesnp_results.2$SNP),]
dim(dat2.1)
dim(dat)
dat=dat2.1
dim(dat)

# r2 and Fstat

n=480
k=34
summary(dat$eaf.exposure)

test=scale(dat$beta.exposure)
str(test)
test <- scale(dat$beta.exposure)
test3=as.data.frame(test)
dim(test3)
dat$scaled.dat=test3$V1
summary(dat$scaled.dat)

p=dat$eaf.exposure
r2.1<-(2*(dat$scaled.dat^2)*p*(1-p))/1 #dat$beta.exposure

r2=sum(r2.1, na.rm = TRUE)
r2 

#r2=mean(r2.1, na.rm=TRUE)
#r2

Fstat <- r2*(n-1-k)/((1-r2)*k)
Fstat

# Combine results
sin=singlesnp_results
res=mr_results
het<-mr_heterogeneity(dat)
plt<-mr_pleiotropy_test(dat)

# Bug in function for combining; save separately
# all_res<-combine_all_mrresults(res,het,plt,sin,ao_slc=F,Exp=T,split.exposure=T,split.outcome=T)
# all_res$lower_intercept=all_res$intercept-1.96*all_res$intercept_se
# all_res$upper_intercept=all_res$intercept+1.96*all_res$intercept_se
# 
# head(all_res[,c("Method","outcome","exposure","nsnp","b","se","or", "or_lci95",  
#   "or_uci95","pval","intercept","intercept_se","intercept_pval",
#   "Q","Q_df","Q_pval")])
#write.csv(all_res,"combined_results_all_RP_MRP_blood_v8.csv")

write.csv(mr_results,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/res_all_RP_blood_v8.csv')
write.csv(singlesnp_results,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/singlesnp_all_RP_blood_v8.csv')
write.csv(dat, '/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/all_RP_blood_v8_SIMEX.csv')
write.csv(plt,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/plt_all_RP_blood_v8.csv')
write.csv(het,'/n/home04/cdadams/Aim2/MR-dysreg-GTEx-all-RP-BC-blood/het_all_RP_blood_v8.csv')

0.4 Figures

FALSE Saving 8 x 5 in image

FALSE Saving 8 x 5 in image

0.5 IVW & sensitivity estimator results

##                      method nsnp    or    se or_lci95 or_uci95  pval
## 1                  MR Egger   34 1.001 0.010    0.982    1.021 0.886
## 2           Weighted median   34 1.002 0.008    0.987    1.018 0.753
## 3 Inverse variance weighted   34 1.011 0.005    1.000    1.021 0.042
## 4               Simple mode   34 0.998 0.015    0.969    1.027 0.875
## 5             Weighted mode   34 1.000 0.007    0.986    1.015 0.956

0.6 Wald ratio results


0.7 Rpubs: https://rpubs.com/Charleen_D_Adams/756633