2016-05-17 50% liver dataset

rm(list = ls())
gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 372365 19.9     750400 40.1   592000 31.7
## Vcells 630004  4.9    1308461 10.0   937364  7.2

# set the working directory
setwd("/Volumes/Transcend/Thesis_project/subsetted_liver")
# subset dataset 
sebsetn <- 15
mouse.liver.expression.eqtl <-read.table(file="2016-05-16 mouse.liver.expression.eqtl.txt",  header=T)
head(mouse.liver.expression.eqtl)

##       ProbeSet   BXD1  BXD11  BXD12  BXD13  BXD14  BXD15  BXD16  BXD18
## 1   1415670_at 10.090 10.200 10.300 10.208  9.830 10.238  9.914 10.348
## 2   1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3   1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4   1415673_at  7.535  7.382  7.566  7.162  7.403  7.342  7.213  7.581
## 5 1415674_a_at  9.757  9.972  9.269  9.873  9.354  9.918  9.459  9.655
## 6   1415675_at  9.029  9.009  9.245  9.282  9.415  9.098  9.060  8.937
##    BXD19   BXD2  BXD20  BXD21  BXD24 BXD24a  BXD27  BXD28  BXD29  BXD31
## 1  9.939  9.871 10.077 10.159  9.746  9.890 10.286 10.177  9.959  9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4  7.551  7.368  7.251  7.373  7.408  7.473  7.238  7.424  7.475  7.491
## 5  9.544  9.557  9.460  9.322  9.795  9.758  9.628  9.263  9.671  9.645
## 6  8.995  9.104  9.123  9.086  9.087  9.040  8.844  9.173  8.951  9.131
##    BXD32  BXD33  BXD34  BXD36  BXD38  BXD39  BXD40  BXD42   BXD5   BXD6
## 1 10.059 10.102 10.174 10.022 10.364  9.745 10.074  9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4  7.419  7.386  7.496  7.369  7.265  7.130  7.149  7.383  7.439  7.233
## 5  9.368  9.741  9.721  9.684  9.387  9.649  9.693  9.793  9.840  9.519
## 6  9.108  9.122  9.153  9.008  9.138  9.126  9.233  9.060  9.134  9.081
##     BXD8   BXD9
## 1  9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4  7.397  7.390
## 5  9.340  9.160
## 6  8.926  9.274

dim(mouse.liver.expression.eqtl)

## [1] 20634    31

set.seed(50)
sub.mouse.liver.expression.eqtl <- mouse.liver.expression.eqtl[, c(1, sample(2:dim(mouse.liver.expression.eqtl)[2],sebsetn, replace=FALSE))]

head(sub.mouse.liver.expression.eqtl)

##       ProbeSet  BXD36  BXD24  BXD15  BXD34 BXD24a  BXD11  BXD29  BXD27
## 1   1415670_at 10.022  9.746 10.238 10.174  9.890 10.200  9.959 10.286
## 2   1415671_at 10.923 11.104 11.120 11.059 10.979 11.088 11.071 10.905
## 3   1415672_at 11.545 11.499 11.318 11.490 11.531 11.417 11.502 11.666
## 4   1415673_at  7.369  7.408  7.342  7.496  7.473  7.382  7.475  7.238
## 5 1415674_a_at  9.684  9.795  9.918  9.721  9.758  9.972  9.671  9.628
## 6   1415675_at  9.008  9.087  9.098  9.153  9.040  9.009  8.951  8.844
##     BXD1  BXD12  BXD18   BXD6  BXD21  BXD40  BXD14
## 1 10.090 10.300 10.348 10.069 10.159 10.074  9.830
## 2 10.932 11.007 11.123 11.043 10.969 11.067 10.955
## 3 11.432 11.442 11.561 11.504 11.328 11.543 11.561
## 4  7.535  7.566  7.581  7.233  7.373  7.149  7.403
## 5  9.757  9.269  9.655  9.519  9.322  9.693  9.354
## 6  9.029  9.245  8.937  9.081  9.086  9.233  9.415

dim(sub.mouse.liver.expression.eqtl)

## [1] 20634    16

write.table(sub.mouse.liver.expression.eqtl,file="2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="\t", row.names=FALSE, quote=FALSE)

#subset liver snp expression data
BXD.geno.SNP.eqtl.for.liver <-read.table(file="2016-05-16 BXD.geno.SNP.eqtl.for.liver.txt",  header=T)
head(BXD.geno.SNP.eqtl.for.liver)

##        Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs13459051    0     1     0     1     0     1     0     0     0    0
## 2 rs13459060    0     0     0     1     1     0     1     0     1    0
## 3 rs13459062    1     0     0     1     1     0     1     0     1    0
## 4 rs13459063    0     0     0     1     1     0     1     0     1    0
## 5 rs13459064    1     1     0     1     1     0     1     1     1    1
## 6 rs13459069    0     1     1     0     0     1     0     0     0    0
##   BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1     1     0     1      1     1     1     1     0     1     1     1     0
## 2     0     1     1      1     0     0     0     1     1     0     0     1
## 3     0     1     1      1     0     0     0     1     1     0     0     1
## 4     0     1     1      1     0     0     0     1     1     0     0     1
## 5     1     1     1      1     0     0     1     1     1     0     0     1
## 6     0     1     0      0     1     1     0     0     0     0     1     1
##   BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1     0     1     0     0    1    1    0    0
## 2     0     0     1     1    1    0    0    1
## 3     0     0     1     1    1    0    1    1
## 4     0     0     1     1    1    0    1    1
## 5     1     1     1     1    1    0    0    0
## 6     1     1     0     1    1    0    0    1

dim(BXD.geno.SNP.eqtl.for.liver)

## [1] 3024   31

set.seed(50)
sub.BXD.geno.SNP.eqtl.for.liver <- BXD.geno.SNP.eqtl.for.liver[, c(1, sample(2:dim(BXD.geno.SNP.eqtl.for.liver)[2],sebsetn, replace=FALSE))]
head(sub.BXD.geno.SNP.eqtl.for.liver)

##        Locus BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27 BXD1 BXD12
## 1 rs13459051     0     1     1     1      1     1     1     1    0     0
## 2 rs13459060     1     1     0     0      1     0     0     0    0     0
## 3 rs13459062     1     1     0     0      1     0     0     0    1     0
## 4 rs13459063     1     1     0     0      1     0     0     0    0     0
## 5 rs13459064     1     1     0     0      1     1     1     0    1     0
## 6 rs13459069     1     0     1     1      0     1     0     1    0     1
##   BXD18 BXD6 BXD21 BXD40 BXD14
## 1     0    1     0     0     0
## 2     0    0     1     1     1
## 3     0    0     1     1     1
## 4     0    0     1     1     1
## 5     1    0     1     1     1
## 6     0    0     1     0     0

dim(sub.BXD.geno.SNP.eqtl.for.liver)

## [1] 3024   16

write.table(sub.BXD.geno.SNP.eqtl.for.liver,file="2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="\t", row.names=FALSE, quote=FALSE)

library(MatrixEQTL)
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/subsetted_liver";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="");
snps_location_file_name = paste(base.dir, "/2016-05-16 BXD.geno.loc.eqtl.for.liver.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2016-05-16 liver.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = character() ;

# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();

# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;

# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;


## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t";      # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);

## Rows read:  2,000 
## Rows read:  3024  done.

## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);

## Rows read:  2,000 
## Rows read:  4,000 
## Rows read:  6,000 
## Rows read:  8,000 
## Rows read:  10,000 
## Rows read:  12,000 
## Rows read:  14,000 
## Rows read:  16,000 
## Rows read:  18,000 
## Rows read:  20,000 
## Rows read:  20634  done.

## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t";      # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1;          # one row of column labels
cvrt$fileSkipColumns = 1;       # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}

## Run the analysis

snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)

##       probeset Chromosome     Start       End
## 1   1415670_at          6  87887814  87913595
## 2   1415671_at          8 105524465 105566047
## 3   1415672_at          8  23241353  23257074
## 4   1415673_at          5 129765558 129787449
## 5 1415674_a_at          9  44403761  44407548
## 6   1415675_at          2  32570858  32573579

me = Matrix_eQTL_main(
  snps = snps,
  gene = gene,
  output_file_name = output_file_name_tra,
  pvOutputThreshold = pvOutputThreshold_tra,
  useModel = useModel,
  errorCovariance = numeric(),
  verbose = TRUE,
  output_file_name.cis = output_file_name_cis,
  pvOutputThreshold.cis = pvOutputThreshold_cis,
  snpspos = snpspos,
  genepos = genepos,
  cisDist = cisDist,
  pvalue.hist = TRUE,
  min.pv.by.genesnp = FALSE,
  noFDRsaveMemory = FALSE);

## Matching data files and location files 
## 20634 of 20634  genes matched
## 3024 of 3024  SNPs matched
## Task finished in  0.026  seconds
## Reordering SNPs
##  
## Task finished in  0.095  seconds
## Reordering genes
##  
## Task finished in  0.127  seconds
## Processing covariates 
## Task finished in  0.001  seconds
## Processing gene expression data (imputation, residualization, etc.) 
## Task finished in  0.017  seconds
## Creating output file(s) 
## Task finished in  0.014  seconds
## Performing eQTL analysis 
##  4.54% done, 5,899 cis-eQTLs, 0 trans-eQTLs
##  9.09% done, 11,411 cis-eQTLs, 0 trans-eQTLs
## 13.63% done, 17,998 cis-eQTLs, 0 trans-eQTLs
## 18.18% done, 22,514 cis-eQTLs, 0 trans-eQTLs
## 22.72% done, 27,349 cis-eQTLs, 0 trans-eQTLs
## 27.27% done, 31,978 cis-eQTLs, 0 trans-eQTLs
## 31.81% done, 38,098 cis-eQTLs, 0 trans-eQTLs
## 36.36% done, 38,605 cis-eQTLs, 0 trans-eQTLs
## 40.90% done, 0 trans-eQTLs
## 45.45% done, 0 trans-eQTLs
## 50.00% done, 0 trans-eQTLs
## 54.54% done, 0 trans-eQTLs
## 59.09% done, 0 trans-eQTLs
## 63.63% done, 0 trans-eQTLs
## 68.18% done, 0 trans-eQTLs
## 72.72% done, 0 trans-eQTLs
## 77.27% done, 0 trans-eQTLs
## 81.81% done, 0 trans-eQTLs
## 86.36% done, 42,998 cis-eQTLs, 0 trans-eQTLs
## 90.90% done, 48,565 cis-eQTLs, 16 trans-eQTLs
## 95.45% done, 54,309 cis-eQTLs, 16 trans-eQTLs
## 100.00% done, 54,955 cis-eQTLs, 16 trans-eQTLs
## Task finished in  7.849  seconds
##

unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')

## Analysis done in: 7.523  seconds

cat('Detected local eQTLs:','\n')

## Detected local eQTLs:

cis.eqtls<-me$cis$eqtls
head(cis.eqtls)

##        snps       gene statistic       pvalue          FDR      beta
## 1 rs4163042 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 2 rs4163058 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 3 rs4163391 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 4 rs4151923 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 5 rs3090019 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 6 rs4164131 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833

dim(cis.eqtls)

## [1] 54955     6

cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)


# eqtl analysis for lung
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/subsetted_liver";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2016-05-16 BXD.geno.SNP.eqtl.for.lung.txt", sep="");
snps_location_file_name = paste(base.dir, "/2016-05-16 BXD.geno.loc.eqtl.for.lung.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2016-05-16 mouse.lung.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2016-05-16 lung.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = character() ;

# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();

# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;

# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;


## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t";      # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);

## Rows read:  2,000 
## Rows read:  3024  done.

## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);

## Rows read:  2,000 
## Rows read:  4,000 
## Rows read:  6,000 
## Rows read:  8,000 
## Rows read:  10,000 
## Rows read:  12,000 
## Rows read:  14,000 
## Rows read:  16,000 
## Rows read:  18,000 
## Rows read:  20,000 
## Rows read:  20634  done.

## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t";      # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1;          # one row of column labels
cvrt$fileSkipColumns = 1;       # one column of row labels
if(length(covariates_file_name)>0) {
  cvrt$LoadFile(covariates_file_name);
}

## Run the analysis

snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)

##       probeset Chromosome     Start       End
## 1   1415670_at          6  87887814  87913595
## 2   1415671_at          8 105524465 105566047
## 3   1415672_at          8  23241353  23257074
## 4   1415673_at          5 129765558 129787449
## 5 1415674_a_at          9  44403761  44407548
## 6   1415675_at          2  32570858  32573579

me = Matrix_eQTL_main(
  snps = snps,
  gene = gene,
  output_file_name = output_file_name_tra,
  pvOutputThreshold = pvOutputThreshold_tra,
  useModel = useModel,
  errorCovariance = numeric(),
  verbose = TRUE,
  output_file_name.cis = output_file_name_cis,
  pvOutputThreshold.cis = pvOutputThreshold_cis,
  snpspos = snpspos,
  genepos = genepos,
  cisDist = cisDist,
  pvalue.hist = TRUE,
  min.pv.by.genesnp = FALSE,
  noFDRsaveMemory = FALSE);

## Matching data files and location files 
## 20634 of 20634  genes matched
## 3024 of 3024  SNPs matched
## Task finished in  0.016  seconds
## Reordering SNPs
##  
## Task finished in  0.379  seconds
## Reordering genes
##  
## Task finished in  0.249  seconds
## Processing covariates 
## Task finished in  0.003  seconds
## Processing gene expression data (imputation, residualization, etc.) 
## Task finished in  0.056  seconds
## Creating output file(s) 
## Task finished in  0.015  seconds
## Performing eQTL analysis 
##  4.54% done, 5,899 cis-eQTLs, 87 trans-eQTLs
##  9.09% done, 11,411 cis-eQTLs, 231 trans-eQTLs
## 13.63% done, 17,998 cis-eQTLs, 287 trans-eQTLs
## 18.18% done, 22,514 cis-eQTLs, 326 trans-eQTLs
## 22.72% done, 27,349 cis-eQTLs, 422 trans-eQTLs
## 27.27% done, 31,978 cis-eQTLs, 452 trans-eQTLs
## 31.81% done, 38,098 cis-eQTLs, 511 trans-eQTLs
## 36.36% done, 38,605 cis-eQTLs, 514 trans-eQTLs
## 40.90% done, 514 trans-eQTLs
## 45.45% done, 514 trans-eQTLs
## 50.00% done, 516 trans-eQTLs
## 54.54% done, 518 trans-eQTLs
## 59.09% done, 518 trans-eQTLs
## 63.63% done, 518 trans-eQTLs
## 68.18% done, 518 trans-eQTLs
## 72.72% done, 518 trans-eQTLs
## 77.27% done, 518 trans-eQTLs
## 81.81% done, 518 trans-eQTLs
## 86.36% done, 42,998 cis-eQTLs, 577 trans-eQTLs
## 90.90% done, 48,565 cis-eQTLs, 615 trans-eQTLs
## 95.45% done, 54,309 cis-eQTLs, 663 trans-eQTLs
## 100.00% done, 54,955 cis-eQTLs, 671 trans-eQTLs
## Task finished in  10.17  seconds
##

unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')

## Analysis done in: 9.921  seconds

cat('Detected local eQTLs:','\n')

## Detected local eQTLs:

cis.eqtls<-me$cis$eqtls
head(cis.eqtls)

##         snps       gene statistic       pvalue          FDR      beta
## 1  rs3681670 1421144_at -75.53535 2.361797e-47 6.489628e-43 -3.093200
## 2  rs3666583 1421144_at -75.53535 2.361797e-47 6.489628e-43 -3.093200
## 3 rs13481364 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143
## 4  rs3708704 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143
## 5 rs13481365 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143
## 6  rs3685299 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143

dim(cis.eqtls)

## [1] 54955     6

cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2016-05-16 mouselung.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)


# load mouse lung cis eqtl result
lung.mouse.eQTL<-read.table(file="2016-05-16 mouselung.cis.1M.eqtls.txt",  header=T)
# load mouse liver cis eqtl result
liver.mouse.eQTL<-read.table(file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt",  header=T)
mouse4302ensembl_id<-read.table(file="2015-12-04 mouse4302ensembl_id.txt",  header=T)
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt",  header=T)
# Add ensemble id annoatation to the data 
lung.mouse.eQTL<-merge(lung.mouse.eQTL, mouse4302ensembl_id, by.x = "gene", by.y="probe_id")
liver.mouse.eQTL<-merge(liver.mouse.eQTL, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
head(lung.mouse.eQTL)

##         gene       snps  statistic    pvalue       FDR        beta
## 1 1415670_at rs13478880  1.1363521 0.2621028 0.5270176  0.06786667
## 2 1415670_at rs13478876  0.9133601 0.3661462 0.6302137  0.05583410
## 3 1415670_at  rs3713705  1.1363521 0.2621028 0.5270176  0.06786667
## 4 1415670_at rs13475374  1.0503967 0.2994035 0.5651655  0.06286667
## 5 1415672_at rs13479651 -1.3052534 0.1987480 0.4483475 -0.03993582
## 6 1415672_at  rs3661760 -1.3052534 0.1987480 0.4483475 -0.03993582
##      beta_se         ensembl_id
## 1 0.05972327 ENSMUSG00000030058
## 2 0.06113044 ENSMUSG00000030058
## 3 0.05972327 ENSMUSG00000030058
## 4 0.05985040 ENSMUSG00000030058
## 5 0.03059622 ENSMUSG00000015341
## 6 0.03059622 ENSMUSG00000015341

head(liver.mouse.eQTL)

##         gene       snps  statistic    pvalue       FDR         beta
## 1 1415670_at  rs3713705 -0.4855043 0.6353986 0.8827614 -0.045982143
## 2 1415670_at rs13475374 -0.4855043 0.6353986 0.8827614 -0.045982143
## 3 1415670_at rs13478880 -0.4855043 0.6353986 0.8827614 -0.045982143
## 4 1415670_at rs13478876 -0.4855043 0.6353986 0.8827614 -0.045982143
## 5 1415672_at  rs3661760 -0.1379165 0.8924198 0.9733830 -0.006732143
## 6 1415672_at rs13479651 -0.1379165 0.8924198 0.9733830 -0.006732143
##      beta_se         ensembl_id
## 1 0.09471005 ENSMUSG00000030058
## 2 0.09471005 ENSMUSG00000030058
## 3 0.09471005 ENSMUSG00000030058
## 4 0.09471005 ENSMUSG00000030058
## 5 0.04881319 ENSMUSG00000015341
## 6 0.04881319 ENSMUSG00000015341

library(data.table)
# Select Gene-SNP pair with minimum P value
lung.mouse.eQTL.min <- data.table(lung.mouse.eQTL, key=c('ensembl_id', "pvalue"))
lung.mouse.eQTL.min<-lung.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
lung.mouse.eQTL.min<-as.data.frame(lung.mouse.eQTL.min)

liver.mouse.eQTL.min <- data.table(liver.mouse.eQTL, key=c('ensembl_id', "pvalue"))
liver.mouse.eQTL.min<-liver.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
liver.mouse.eQTL.min<-as.data.frame(liver.mouse.eQTL.min)

library(plyr)
lung.mouse.eQTL.min<-rename(lung.mouse.eQTL.min, c("pvalue"="lung_pvalue", "beta"="lung.beta", "beta_se"="lung.beta_se"))
liver.mouse.eQTL.min<-rename(liver.mouse.eQTL.min, c("pvalue"="liver_pvalue", "beta"="liver.beta", "beta_se"="liver.beta_se"))

head(lung.mouse.eQTL.min)

##           gene       snps  statistic lung_pvalue        FDR   lung.beta
## 1   1428645_at  rs6259798 -1.4324741 0.159238340 0.39180403 -0.05980000
## 2   1416677_at  rs3707743 -0.1308388 0.896513064 0.95927760 -0.02366071
## 3   1451678_at rs13481264 -0.3904599 0.698124238 0.86317736 -0.01356126
## 4   1425955_at rs13478643 -1.0185410 0.314117648 0.58046119 -0.07813675
## 5 1426241_a_at  rs4224744  0.9192838 0.363075471 0.62709198  0.05284706
## 6 1439267_x_at  rs4136521  3.4048233 0.001444641 0.01001896  0.18997619
##   lung.beta_se         ensembl_id
## 1   0.04174596 ENSMUSG00000000001
## 2   0.18083869 ENSMUSG00000000049
## 3   0.03473152 ENSMUSG00000000056
## 4   0.07671439 ENSMUSG00000000058
## 5   0.05748721 ENSMUSG00000000085
## 6   0.05579620 ENSMUSG00000000088

head(liver.mouse.eQTL.min)

##           gene       snps  statistic liver_pvalue        FDR  liver.beta
## 1   1428645_at rs13477320 -1.3801032  0.190827641 0.56008906 -0.07366667
## 2   1416677_at  rs3670642  3.2114557  0.006815612 0.08201375  0.07875000
## 3   1425344_at rs13481264 -1.5163463  0.153365833 0.51030633 -0.24430000
## 4   1417327_at  rs3655269  1.1762017  0.260604313 0.63908151  0.11090000
## 5 1426241_a_at  rs3675629 -0.7745039  0.452489862 0.78891766 -0.01910000
## 6   1448153_at  rs3708863  4.6928846  0.000420588 0.01068581  0.23727778
##   liver.beta_se         ensembl_id
## 1    0.05337765 ENSMUSG00000000001
## 2    0.02452159 ENSMUSG00000000049
## 3    0.16111096 ENSMUSG00000000056
## 4    0.09428655 ENSMUSG00000000058
## 5    0.02466095 ENSMUSG00000000085
## 6    0.05056118 ENSMUSG00000000088

tail(liver.mouse.eQTL.min)

##               gene       snps  statistic liver_pvalue       FDR
## 10596   1434694_at rs13476408 -1.7224576   0.10867553 0.4321929
## 10597   1437645_at  rs3686133  2.0044772   0.06630646 0.3372992
## 10598 1449939_s_at rs13481642 -0.4164743   0.68385715 0.9044643
## 10599   1422547_at  rs4165065 -1.6637788   0.12006199 0.4543101
## 10600   1451476_at  rs4165069 -0.1824077   0.85807655 0.9624377
## 10601 1453995_a_at  rs4165065 -1.4418659   0.17299524 0.5369037
##        liver.beta liver.beta_se         ensembl_id
## 10596 -0.16160714    0.09382358 ENSMUSG00000099041
## 10597  0.10340000    0.05158452 ENSMUSG00000099083
## 10598 -0.01975000    0.04742189 ENSMUSG00000099116
## 10599 -0.10844444    0.06517960 ENSMUSG00000099164
## 10600 -0.01105556    0.06060904 ENSMUSG00000099262
## 10601 -0.05450000    0.03779824 ENSMUSG00000099305

dim(lung.mouse.eQTL.min)

## [1] 10601     8

dim(liver.mouse.eQTL.min)

## [1] 10601     8

# lung, liver eqtl with ensemble_id
merged.mouse.eQTL.min<-merge(lung.mouse.eQTL.min, liver.mouse.eQTL.min, by.x = "ensembl_id", by.y="ensembl_id")
head(merged.mouse.eQTL.min)

##           ensembl_id       gene.x     snps.x statistic.x lung_pvalue
## 1 ENSMUSG00000000001   1428645_at  rs6259798  -1.4324741 0.159238340
## 2 ENSMUSG00000000049   1416677_at  rs3707743  -0.1308388 0.896513064
## 3 ENSMUSG00000000056   1451678_at rs13481264  -0.3904599 0.698124238
## 4 ENSMUSG00000000058   1425955_at rs13478643  -1.0185410 0.314117648
## 5 ENSMUSG00000000085 1426241_a_at  rs4224744   0.9192838 0.363075471
## 6 ENSMUSG00000000088 1439267_x_at  rs4136521   3.4048233 0.001444641
##        FDR.x   lung.beta lung.beta_se       gene.y     snps.y statistic.y
## 1 0.39180403 -0.05980000   0.04174596   1428645_at rs13477320  -1.3801032
## 2 0.95927760 -0.02366071   0.18083869   1416677_at  rs3670642   3.2114557
## 3 0.86317736 -0.01356126   0.03473152   1425344_at rs13481264  -1.5163463
## 4 0.58046119 -0.07813675   0.07671439   1417327_at  rs3655269   1.1762017
## 5 0.62709198  0.05284706   0.05748721 1426241_a_at  rs3675629  -0.7745039
## 6 0.01001896  0.18997619   0.05579620   1448153_at  rs3708863   4.6928846
##   liver_pvalue      FDR.y  liver.beta liver.beta_se
## 1  0.190827641 0.56008906 -0.07366667    0.05337765
## 2  0.006815612 0.08201375  0.07875000    0.02452159
## 3  0.153365833 0.51030633 -0.24430000    0.16111096
## 4  0.260604313 0.63908151  0.11090000    0.09428655
## 5  0.452489862 0.78891766 -0.01910000    0.02466095
## 6  0.000420588 0.01068581  0.23727778    0.05056118

dim(merged.mouse.eQTL.min)

## [1] 10601    15

merged.mouse.eQTL.min<-data.frame(merged.mouse.eQTL.min)
merged.mouse.eQTL.min<-merged.mouse.eQTL.min[, c(1, 5, 7, 8, 12, 14, 15 )]
head(merged.mouse.eQTL.min)

##           ensembl_id lung_pvalue   lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.159238340 -0.05980000   0.04174596  0.190827641
## 2 ENSMUSG00000000049 0.896513064 -0.02366071   0.18083869  0.006815612
## 3 ENSMUSG00000000056 0.698124238 -0.01356126   0.03473152  0.153365833
## 4 ENSMUSG00000000058 0.314117648 -0.07813675   0.07671439  0.260604313
## 5 ENSMUSG00000000085 0.363075471  0.05284706   0.05748721  0.452489862
## 6 ENSMUSG00000000088 0.001444641  0.18997619   0.05579620  0.000420588
##    liver.beta liver.beta_se
## 1 -0.07366667    0.05337765
## 2  0.07875000    0.02452159
## 3 -0.24430000    0.16111096
## 4  0.11090000    0.09428655
## 5 -0.01910000    0.02466095
## 6  0.23727778    0.05056118

write.table(merged.mouse.eQTL.min,file="2016-05-09 mouse.liver.expression.min.txt", sep="\t", row.names=FALSE, quote=FALSE)

merged.mouse.eQTL.min.variance2<-read.table(file="2016-05-09 mouse.liver.expression.min.txt",  header=T)

head(merged.mouse.eQTL.min.variance2)

##           ensembl_id lung_pvalue   lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.159238340 -0.05980000   0.04174596  0.190827641
## 2 ENSMUSG00000000049 0.896513064 -0.02366071   0.18083869  0.006815612
## 3 ENSMUSG00000000056 0.698124238 -0.01356126   0.03473152  0.153365833
## 4 ENSMUSG00000000058 0.314117648 -0.07813675   0.07671439  0.260604313
## 5 ENSMUSG00000000085 0.363075471  0.05284706   0.05748721  0.452489862
## 6 ENSMUSG00000000088 0.001444641  0.18997619   0.05579620  0.000420588
##    liver.beta liver.beta_se
## 1 -0.07366667    0.05337765
## 2  0.07875000    0.02452159
## 3 -0.24430000    0.16111096
## 4  0.11090000    0.09428655
## 5 -0.01910000    0.02466095
## 6  0.23727778    0.05056118

# caculate the absolute value of live/lung beta
merged.mouse.eQTL.min.variance2$abs_liver.beta<-abs(merged.mouse.eQTL.min.variance2$liver.beta)
merged.mouse.eQTL.min.variance2$abs_lung.beta<-abs(merged.mouse.eQTL.min.variance2$lung.beta)
# caculate negative log lung p value
merged.mouse.eQTL.min.variance2$neg_log_lung_pvalue<--log10(merged.mouse.eQTL.min.variance2$lung_pvalue)

# Simple linear regression between abs_liver.beta and abs_lung.beta
# fit1<-summary(lm(abs_liver.beta ~ abs_lung.beta, data=merged.mouse.eQTL.min.variance2))
# fit1
# tau<-fit1$sigma**2
# check association between abs_liver.beta and abs.lung.beta
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.2.4

ggplot(merged.mouse.eQTL.min.variance2, aes(x=abs_lung.beta, y=abs_liver.beta)) +geom_point()+geom_smooth(method=lm)

cor(merged.mouse.eQTL.min.variance2$abs_lung.beta, merged.mouse.eQTL.min.variance2$abs_liver.beta)

## [1] 0.3408502

merged.mouse.eQTL<-merged.mouse.eQTL.min.variance2
# retrieve ensembl_id
markers<-merged.mouse.eQTL[, 1]
# Yg=Ag + Bg*Xsnp+V
# retrieve betas.hat (liver.beta)
betas.hat<-merged.mouse.eQTL$abs_liver.beta
# retrieve liver.beta_se
se<-merged.mouse.eQTL$liver.beta_se

# creat Z matrix with 2 columns: 1 for intercept,abs_lung.beta (merged.mouse.eQTL[,10])
Z<-as.matrix(merged.mouse.eQTL$abs_lung.beta)
Z<-replace(Z,is.na(Z),0)
Z<-data.frame(1,Z) 
Z<-as.matrix(Z)
rowLength<-length(markers)
# liver.betas=Z*gama+T^2

# Regression: abs_liver.beta = intercept + beta*abs_lung.beta + error 
lmsummary<-summary(lm(abs_liver.beta~-1+Z, data=merged.mouse.eQTL))
lmsummary

## 
## Call:
## lm(formula = abs_liver.beta ~ -1 + Z, data = merged.mouse.eQTL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1122 -0.0765 -0.0414  0.0207  4.4632 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)    
## ZX1 0.081681   0.002218   36.83   <2e-16 ***
## ZZ  0.317374   0.008503   37.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1956 on 10599 degrees of freedom
## Multiple R-squared:  0.3488, Adjusted R-squared:  0.3487 
## F-statistic:  2839 on 2 and 10599 DF,  p-value: < 2.2e-16

# error ~ N(0, Tau)
tau<-lmsummary$sigma**2
tau

## [1] 0.0382713

# output coeffieients (gamma matrix)
# gamma matrix
gamma<-as.matrix(lmsummary$coefficients[,1])
# trasnpose Z matrix
Z_transpose<-t(Z)
# create identity matrix
identity<-diag(nrow=rowLength) 
# original betas.hat
betas.hat<-as.matrix(betas.hat)
#creat V matrix for liver_residual_variance
V <- matrix(0, rowLength, rowLength)
# V, liver residual variance
diag(V) <- merged.mouse.eQTL$liver.beta_se^2
# Creat Tau matrix
Tau<- diag(tau, rowLength, rowLength)
# follow Chen's paper and cacualte s
s <-V + Tau
# create inverse function for inversing diagnoal matrix 
diag.inverse <- function(x){diag(1/diag(x), nrow(x), ncol(x))}
# create multiplication function for multiplicating two diagnoal matrix 
diag.multi <- function(x,y){diag(diag(x)*diag(y), nrow(x), ncol(x))}
# inverse s
S <-diag.inverse(s)
# follow chen's paper to caculate omega
omega<-diag.multi(S, V)
# retrieve omega value from the matrix 
omega.diag<-diag(omega )
# summary the omega value
summary(omega.diag)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005392 0.041920 0.076700 0.127100 0.153300 0.974400

# betas.thea<- S %*% Z %*% gamma + (identity-S) %*% betas.hat
# caculate betas.tieda with the formula in Chen's paper
betas.tieda<- omega %*% Z %*% gamma + (identity-omega) %*% betas.hat
# crbetas.tieda<- cromega %*% Z %*% gamma + (identity-cromega) %*% betas.hat
head(betas.tieda)

##            [,1]
## [1,] 0.07553701
## [2,] 0.07891150
## [3,] 0.18031964
## [4,] 0.11006680
## [5,] 0.02034127
## [6,] 0.23131037

head(betas.hat)

##            [,1]
## [1,] 0.07366667
## [2,] 0.07875000
## [3,] 0.24430000
## [4,] 0.11090000
## [5,] 0.01910000
## [6,] 0.23727778

#regression beta
regbeta <-Z %*% gamma
head(regbeta)

##            [,1]
## [1,] 0.10066029
## [2,] 0.08919064
## [3,] 0.08598534
## [4,] 0.10647989
## [5,] 0.09845361
## [6,] 0.14197477

summary(regbeta)

##        V1         
##  Min.   :0.08168  
##  1st Qu.:0.09287  
##  Median :0.10364  
##  Mean   :0.12437  
##  3rd Qu.:0.12665  
##  Max.   :1.24358

markers1<-as.character(markers)
# combine ensemble_id, betas.hat and betas.tieda
outputVector<-c(markers1,betas.hat,betas.tieda)
write.table(matrix(outputVector,rowLength),file="2016-04-26_hm_tau_hmresults.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_hm_tau_hmresults.txt")
colnames(liver.mouse.eQTL.bayesian)<-c( "ensembl_id", "betas.hat","betas.tieda")
head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat betas.tieda
## 1 ENSMUSG00000000001 0.07366667  0.07553701
## 2 ENSMUSG00000000049 0.07875000  0.07891150
## 3 ENSMUSG00000000056 0.24430000  0.18031964
## 4 ENSMUSG00000000058 0.11090000  0.11006680
## 5 ENSMUSG00000000085 0.01910000  0.02034127
## 6 ENSMUSG00000000088 0.23727778  0.23131037

# merge dataset with betas.hat and betas.tieda
liver.mouse.eQTL.bayesian.all<- merge(liver.mouse.eQTL.bayesian, merged.mouse.eQTL.min.variance2, by = "ensembl_id")
head(liver.mouse.eQTL.bayesian.all)

##           ensembl_id  betas.hat betas.tieda lung_pvalue   lung.beta
## 1 ENSMUSG00000000001 0.07366667  0.07553701 0.159238340 -0.05980000
## 2 ENSMUSG00000000049 0.07875000  0.07891150 0.896513064 -0.02366071
## 3 ENSMUSG00000000056 0.24430000  0.18031964 0.698124238 -0.01356126
## 4 ENSMUSG00000000058 0.11090000  0.11006680 0.314117648 -0.07813675
## 5 ENSMUSG00000000085 0.01910000  0.02034127 0.363075471  0.05284706
## 6 ENSMUSG00000000088 0.23727778  0.23131037 0.001444641  0.18997619
##   lung.beta_se liver_pvalue  liver.beta liver.beta_se abs_liver.beta
## 1   0.04174596  0.190827641 -0.07366667    0.05337765     0.07366667
## 2   0.18083869  0.006815612  0.07875000    0.02452159     0.07875000
## 3   0.03473152  0.153365833 -0.24430000    0.16111096     0.24430000
## 4   0.07671439  0.260604313  0.11090000    0.09428655     0.11090000
## 5   0.05748721  0.452489862 -0.01910000    0.02466095     0.01910000
## 6   0.05579620  0.000420588  0.23727778    0.05056118     0.23727778
##   abs_lung.beta neg_log_lung_pvalue
## 1    0.05980000          0.79795236
## 2    0.02366071          0.04744338
## 3    0.01356126          0.15606728
## 4    0.07813675          0.50290766
## 5    0.05284706          0.44000309
## 6    0.18997619          2.84024014

write.table(liver.mouse.eQTL.bayesian.all,file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
liver.mouse.eQTL.bayesian<-read.table(file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat betas.tieda lung_pvalue   lung.beta
## 1 ENSMUSG00000000001 0.07366667  0.07553701 0.159238340 -0.05980000
## 2 ENSMUSG00000000049 0.07875000  0.07891150 0.896513064 -0.02366071
## 3 ENSMUSG00000000056 0.24430000  0.18031964 0.698124238 -0.01356126
## 4 ENSMUSG00000000058 0.11090000  0.11006680 0.314117648 -0.07813675
## 5 ENSMUSG00000000085 0.01910000  0.02034127 0.363075471  0.05284706
## 6 ENSMUSG00000000088 0.23727778  0.23131037 0.001444641  0.18997619
##   lung.beta_se liver_pvalue  liver.beta liver.beta_se abs_liver.beta
## 1   0.04174596  0.190827641 -0.07366667    0.05337765     0.07366667
## 2   0.18083869  0.006815612  0.07875000    0.02452159     0.07875000
## 3   0.03473152  0.153365833 -0.24430000    0.16111096     0.24430000
## 4   0.07671439  0.260604313  0.11090000    0.09428655     0.11090000
## 5   0.05748721  0.452489862 -0.01910000    0.02466095     0.01910000
## 6   0.05579620  0.000420588  0.23727778    0.05056118     0.23727778
##   abs_lung.beta neg_log_lung_pvalue
## 1    0.05980000          0.79795236
## 2    0.02366071          0.04744338
## 3    0.01356126          0.15606728
## 4    0.07813675          0.50290766
## 5    0.05284706          0.44000309
## 6    0.18997619          2.84024014

liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", 
                                                                        "liver.beta_se", "betas.tieda", 
                                                                         "liver_pvalue", "abs_lung.beta",
                                                                        "abs_lung.beta", "neg_log_lung_pvalue"))

head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat liver.beta_se betas.tieda liver_pvalue
## 1 ENSMUSG00000000001 0.07366667    0.05337765  0.07553701  0.190827641
## 2 ENSMUSG00000000049 0.07875000    0.02452159  0.07891150  0.006815612
## 3 ENSMUSG00000000056 0.24430000    0.16111096  0.18031964  0.153365833
## 4 ENSMUSG00000000058 0.11090000    0.09428655  0.11006680  0.260604313
## 5 ENSMUSG00000000085 0.01910000    0.02466095  0.02034127  0.452489862
## 6 ENSMUSG00000000088 0.23727778    0.05056118  0.23131037  0.000420588
##   abs_lung.beta abs_lung.beta.1 neg_log_lung_pvalue
## 1    0.05980000      0.05980000          0.79795236
## 2    0.02366071      0.02366071          0.04744338
## 3    0.01356126      0.01356126          0.15606728
## 4    0.07813675      0.07813675          0.50290766
## 5    0.05284706      0.05284706          0.44000309
## 6    0.18997619      0.18997619          2.84024014

# Caculate variance for beta.tieda by following Brian Kulis' lecture notes
# Invert Tau and V
Tau_invert<-diag.inverse(Tau)
V_invert<-diag.inverse(V)
PS_invert<-Tau_invert + V_invert

# PS_invert<-Tau_invert+V_invert%*% Z  %*% Z_transpose # previous wrong code
# S in Brian Kulis' lecture note:PS
PS <- diag.inverse(PS_invert)
# retrieve posterior variance 
ps<-diag(PS)
range(ps)

## [1] 0.0002063442 0.0372924672

library(reshape)

## 
## Attaching package: 'reshape'

## The following objects are masked from 'package:plyr':
## 
##     rename, round_any

## The following object is masked from 'package:data.table':
## 
##     melt

# reshape posterior variance to long format 
ps.long <- melt(ps)
head(ps.long)

##          value
## 1 0.0026517589
## 2 0.0005920069
## 3 0.0154667362
## 4 0.0072141860
## 5 0.0005986493
## 6 0.0023963615

# Caculate sd: square root on variance
ps.long$betas.tieda.se<-(ps.long$value)^0.5
# combine sd to the data.frame
liver.mouse.eQTL.bayesian<-cbind(liver.mouse.eQTL.bayesian,ps.long$betas.tieda.se)
# head(liver.mouse.eQTL.bayesian)
# rename betas.tieda.se
liver.mouse.eQTL.bayesian<-rename(liver.mouse.eQTL.bayesian, c("ps.long$betas.tieda.se"="betas.tieda.se", "liver.beta_se"="betas.hat.se"))

liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", "betas.hat.se", 
                                                                        "betas.tieda", "betas.tieda.se",                                                                         
                                                                        "liver_pvalue", "abs_lung.beta", "neg_log_lung_pvalue"))
# head(liver.mouse.eQTL.bayesian)

# library(tigerstats)
# pnormGC(0, region="below", mean=0.002352829,sd=0.09972950)

# caculate probability of betas.tieda below 0 based on betas.tieda and standard deviation
liver.mouse.eQTL.bayesian$p.below.0 <- pnorm(0,liver.mouse.eQTL.bayesian$betas.tieda, liver.mouse.eQTL.bayesian$betas.tieda.se)
head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667   0.05337765  0.07553701     0.05149523
## 2 ENSMUSG00000000049 0.07875000   0.02452159  0.07891150     0.02433119
## 3 ENSMUSG00000000056 0.24430000   0.16111096  0.18031964     0.12436533
## 4 ENSMUSG00000000058 0.11090000   0.09428655  0.11006680     0.08493636
## 5 ENSMUSG00000000085 0.01910000   0.02466095  0.02034127     0.02446731
## 6 ENSMUSG00000000088 0.23727778   0.05056118  0.23131037     0.04895265
##   liver_pvalue abs_lung.beta neg_log_lung_pvalue    p.below.0
## 1  0.190827641    0.05980000          0.79795236 7.120518e-02
## 2  0.006815612    0.02366071          0.04744338 5.909266e-04
## 3  0.153365833    0.01356126          0.15606728 7.354057e-02
## 4  0.260604313    0.07813675          0.50290766 9.750950e-02
## 5  0.452489862    0.05284706          0.44000309 2.028837e-01
## 6  0.000420588    0.18997619          2.84024014 1.149522e-06

dim(liver.mouse.eQTL.bayesian)

## [1] 10601     9

summary(liver.mouse.eQTL.bayesian$betas.tieda.se)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01436 0.04006 0.05418 0.06249 0.07659 0.19310

range(liver.mouse.eQTL.bayesian$p.below.0)

## [1] 0.0000000 0.4803112

write.table(liver.mouse.eQTL.bayesian,file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
liver.mouse.eQTL.bayesian <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
# head(liver.mouse.eQTL.bayesian)
# summary(liver.mouse.eQTL.bayesian$liver_residual_variance)
liver.mouse.eQTL.bayesian.4tau <- liver.mouse.eQTL.bayesian


# colnames(liver.mouse.eQTL.bayesian.4tau) <- c("ensembl_id", "betas.hat", "betas.hat.se", "betas.tieda", "betas.tieda.se", "liver_residual_variance", "liver_pvalue", "abs_lung.beta", 
#                                              "neg_log_lung_pvalue", "p.below.0", "betas.tieda2m", "betas.tieda3rd", "betas.tieda4max")
# head(liver.mouse.eQTL.bayesian.4tau)

# Introduce weight (Tmm) to adjust Tau with neg_log_lung_pvalue
liver.mouse.eQTL.bayesian.4tau$fzm <- liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue
# caculate ratio_fzm
liver.mouse.eQTL.bayesian.4tau$ratio_fzm <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/liver.mouse.eQTL.bayesian.4tau$fzm
range(liver.mouse.eQTL.bayesian.4tau$ratio_fzm)

## [1]   1 Inf

# set up threshold for ratio_fzm and caculate updated ratio_fzm (nratio_fzm)
threshold <- 0.05
liver.mouse.eQTL.bayesian.4tau$nratio_fzm <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$ratio_fzm > max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))] <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))
# liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$nratio_fzm >= max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log(threshold))] <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
summary(liver.mouse.eQTL.bayesian.4tau$nratio_fzm)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   25.04   35.84   29.41   35.84   35.84

# compare bayesian prediction to the traditional method 
# evaluate the predition with alle specific expreesion in the liver: Sandrine Lagarrigue's paper
liver.ASE <- read.csv(file= "ASE.genetics.113.153882-6.csv")
dim(liver.ASE)

## [1] 1191   19

head(liver.ASE)

##           replicate chr startExon        geneID SNPperExon3
## 1 M.CH. DxB and BxD   1   9535488          Rrs1          15
## 2  M.HF DxB and BxD   1   9535488          Rrs1          14
## 3  F.HF DxB and BxD   1   9535488          Rrs1          15
## 4 M.CH. DxB and BxD   1  37473929 6330578E17Rik           2
## 5 M.CH. DxB and BxD   1  58169979          Aox3           3
## 6  M.HF DxB and BxD   1  58169979          Aox3           3
##   sumBperExon.DxB4 sumDperExon.DxB4 sumBperExon.BxD4 sumDperExon.BxD4
## 1               45               19               50               25
## 2               74               39               66               30
## 3               76               20               77               40
## 4               78               32               47               17
## 5              473               82              225               27
## 6              252               56              263               53
##   FCadd1.DxB5 FCadd1.BxD5 BonBD.DxB6 BonBD.BxD6 pvalBH.DxB7 pvalBH.BxD7
## 1        2.30        1.96       0.70       0.67     1.0e-02     3.7e-02
## 2        1.88        2.16       0.65       0.69     1.1e-02     3.3e-03
## 3        3.67        1.90       0.79       0.66     4.9e-07     5.1e-03
## 4        2.39        2.67       0.71       0.73     1.9e-04     2.2e-03
## 5        5.71        8.07       0.85       0.89     3.6e-64     5.4e-37
## 6        4.44        4.89       0.82       0.83     8.7e-28     1.1e-31
##   UTR5 UTR3 strand exonCount
## 1    0    0      +         1
## 2    0    0      +         1
## 3    0    0      +         1
## 4    0    0      -         3
## 5    0    0      +        35
## 6    0    0      +        35

# 440 unique gene ID
length(unique(liver.ASE$geneID))

## [1] 440

# verify ASE table
liver.ASE1 <- liver.ASE[which(liver.ASE$replicate == "M.CH. DxB and BxD"), ]
liver.ASE2 <- liver.ASE[which(liver.ASE$replicate == "M.HF DxB and BxD"), ]
liver.ASE3 <- liver.ASE[which(liver.ASE$replicate == "F.HF DxB and BxD"), ]
length(unique(liver.ASE1$geneID))

## [1] 272

length(unique(liver.ASE2$geneID))

## [1] 275

length(unique(liver.ASE3$geneID))

## [1] 304

(length(unique(liver.ASE1$geneID))+length(unique(liver.ASE2$geneID))+length(unique(liver.ASE3$geneID)))/3

## [1] 283.6667

# As claimed in the paper: averaged 284 ASE for each replicate
sub.liver.ASE <-liver.ASE1
summary(sub.liver.ASE$pvalBH.DxB7)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0000058 0.0084070 0.0031000 0.1000000

sub.liver.ASE1 <- subset(sub.liver.ASE, pvalBH.DxB7 < 0.00000000000001)
sub.liver.ASE2 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.00000000000001 & pvalBH.DxB7 < 0.0000058)
sub.liver.ASE3 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0000058 & pvalBH.DxB7 < 0.0031000)
sub.liver.ASE4 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0031000)
dim(sub.liver.ASE1)

## [1] 89 19

dim(sub.liver.ASE2)

## [1] 97 19

dim(sub.liver.ASE3)

## [1] 93 19

dim(sub.liver.ASE4)

## [1] 94 19

# Subset liver ASE with different conditions
# sub.liver.ASE <-liver.ASE[which(liver.ASE$pvalBH.DxB7 < 0.05 & liver.ASE$pvalBH.BxD7 < 0.05), ]
# sub.liver.ASE <- sub.liver.ASE[order(sub.liver.ASE$pvalBH.BxD7), ]
# summary(sub.liver.ASE$pvalBH.BxD7)
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.DxB7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.BxD7 <= 9.0e-10 ), ]

# sub.liver.ASE <- sub.liver.ASE[ sub.liver.ASE$geneID %in%  names(table(sub.liver.ASE$geneID))[table(sub.liver.ASE$geneID) >1] , ]
# check the remain gene number after subsetting
dim(sub.liver.ASE)

## [1] 373  19

liver.ASE.symbol <- unique(sub.liver.ASE$geneID)
liver.ASE.symbol1 <- unique(sub.liver.ASE1$geneID)
liver.ASE.symbol2 <- unique(sub.liver.ASE2$geneID)
liver.ASE.symbol3 <- unique(sub.liver.ASE3$geneID)
liver.ASE.symbol4 <- unique(sub.liver.ASE4$geneID)
length(liver.ASE.symbol)

## [1] 272

# Annoate gene symbol wiht ensemble.ID
library(biomaRt)
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
liver.ASE.ensembl <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                              "mgi_symbol", values =liver.ASE.symbol, mart=mouse)

liver.ASE.ensembl1 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol1, mart=mouse)
liver.ASE.ensembl2 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol2, mart=mouse)
liver.ASE.ensembl3 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol3, mart=mouse)
liver.ASE.ensembl4 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol4, mart=mouse)
dim(liver.ASE.ensembl)

## [1] 241   2

liver.ASE.ensembl <- unique(liver.ASE.ensembl)
# delete liver ASE ensemble ID which are not in the liver.mouse.eQTL.bayesian data frame
liver.ASE.ensembl <- liver.ASE.ensembl[liver.ASE.ensembl$ensembl_gene_id %in% liver.mouse.eQTL.bayesian.4tau$ensembl_id, ]
dim(liver.ASE.ensembl)

## [1] 195   2

# create indicator for ASE true or not
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl1$ensembl_gene_id] <- 1
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl2$ensembl_gene_id] <- 2
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl3$ensembl_gene_id] <- 3
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl4$ensembl_gene_id] <- 4
# liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 5
 liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 1
 liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 0

summary(liver.mouse.eQTL.bayesian.4tau$eqtl)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.01839 0.00000 1.00000

liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue <- -log10(liver.mouse.eQTL.bayesian.4tau$liver_pvalue)
head(liver.mouse.eQTL.bayesian.4tau)

##           ensembl_id  betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667   0.05337765  0.07553701     0.05149523
## 2 ENSMUSG00000000049 0.07875000   0.02452159  0.07891150     0.02433119
## 3 ENSMUSG00000000056 0.24430000   0.16111096  0.18031964     0.12436533
## 4 ENSMUSG00000000058 0.11090000   0.09428655  0.11006680     0.08493636
## 5 ENSMUSG00000000085 0.01910000   0.02466095  0.02034127     0.02446731
## 6 ENSMUSG00000000088 0.23727778   0.05056118  0.23131037     0.04895265
##   liver_pvalue abs_lung.beta neg_log_lung_pvalue    p.below.0        fzm
## 1  0.190827641    0.05980000          0.79795236 7.120518e-02 0.79795236
## 2  0.006815612    0.02366071          0.04744338 5.909266e-04 0.04744338
## 3  0.153365833    0.01356126          0.15606728 7.354057e-02 0.15606728
## 4  0.260604313    0.07813675          0.50290766 9.750950e-02 0.50290766
## 5  0.452489862    0.05284706          0.44000309 2.028837e-01 0.44000309
## 6  0.000420588    0.18997619          2.84024014 1.149522e-06 2.84024014
##   ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1  58.43301   35.83834    0            0.7193587
## 2 982.78748   35.83834    0            2.1664952
## 3 298.76061   35.83834    0            0.8142714
## 4  92.71435   35.83834    0            0.5840184
## 5 105.96916   35.83834    0            0.3443911
## 6  16.41648   16.41648    0            3.3761431

by(liver.mouse.eQTL.bayesian.4tau[, c(1, 7, 9, 14)], liver.mouse.eQTL.bayesian.4tau[, "eqtl"], summary)

## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 0
##               ensembl_id    abs_lung.beta       p.below.0      
##  ENSMUSG00000000001:    1   Min.   :0.00000   Min.   :0.00000  
##  ENSMUSG00000000049:    1   1st Qu.:0.03487   1st Qu.:0.02077  
##  ENSMUSG00000000056:    1   Median :0.06815   Median :0.10012  
##  ENSMUSG00000000058:    1   Mean   :0.13143   Mean   :0.13523  
##  ENSMUSG00000000085:    1   3rd Qu.:0.13975   3rd Qu.:0.22312  
##  ENSMUSG00000000088:    1   Max.   :3.66099   Max.   :0.48031  
##  (Other)           :10400                                      
##  neg_log_liver_pvalue
##  Min.   : 0.0000     
##  1st Qu.: 0.2606     
##  Median : 0.5847     
##  Mean   : 0.9418     
##  3rd Qu.: 1.1517     
##  Max.   :15.6916     
##                      
## -------------------------------------------------------- 
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 1
##               ensembl_id  abs_lung.beta        p.below.0        
##  ENSMUSG00000000275:  1   Min.   :0.001944   Min.   :0.0000000  
##  ENSMUSG00000000673:  1   1st Qu.:0.076576   1st Qu.:0.0000000  
##  ENSMUSG00000001467:  1   Median :0.164283   Median :0.0008869  
##  ENSMUSG00000001473:  1   Mean   :0.299202   Mean   :0.0470796  
##  ENSMUSG00000001604:  1   3rd Qu.:0.360720   3rd Qu.:0.0524872  
##  ENSMUSG00000002395:  1   Max.   :3.659845   Max.   :0.3556079  
##  (Other)           :189                                         
##  neg_log_liver_pvalue
##  Min.   : 0.04328    
##  1st Qu.: 0.74359    
##  Median : 2.23718    
##  Mean   : 3.06327    
##  3rd Qu.: 4.71465    
##  Max.   :11.90806    
##

library(ggplot2)
boxplot(neg_log_liver_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="liver.mouse.eQTL", 
        xlab="ASE cutoff by p value", ylab="liver neg log p")

boxplot(neg_log_lung_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="lung.mouse.eQTL", 
        xlab="ASE cutoff by p value", ylab="lung neg log p")

liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]

plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl), xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue" )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue, , col="red", xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$betas.hat, liver.mouse.eQTL.bayesian.4tau$betas.tieda,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$betas.hat, liver.mouse.eQTL.bayesian.4tau.ase$betas.tieda, col="red") 
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

cor(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue)

## [1] 0.39218

length(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue)

## [1] 195

plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$p.below.0,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$p.below.0, col="red") 
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

head(liver.mouse.eQTL.bayesian.4tau)

##           ensembl_id  betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667   0.05337765  0.07553701     0.05149523
## 2 ENSMUSG00000000049 0.07875000   0.02452159  0.07891150     0.02433119
## 3 ENSMUSG00000000056 0.24430000   0.16111096  0.18031964     0.12436533
## 4 ENSMUSG00000000058 0.11090000   0.09428655  0.11006680     0.08493636
## 5 ENSMUSG00000000085 0.01910000   0.02466095  0.02034127     0.02446731
## 6 ENSMUSG00000000088 0.23727778   0.05056118  0.23131037     0.04895265
##   liver_pvalue abs_lung.beta neg_log_lung_pvalue    p.below.0        fzm
## 1  0.190827641    0.05980000          0.79795236 7.120518e-02 0.79795236
## 2  0.006815612    0.02366071          0.04744338 5.909266e-04 0.04744338
## 3  0.153365833    0.01356126          0.15606728 7.354057e-02 0.15606728
## 4  0.260604313    0.07813675          0.50290766 9.750950e-02 0.50290766
## 5  0.452489862    0.05284706          0.44000309 2.028837e-01 0.44000309
## 6  0.000420588    0.18997619          2.84024014 1.149522e-06 2.84024014
##   ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1  58.43301   35.83834    0            0.7193587
## 2 982.78748   35.83834    0            2.1664952
## 3 298.76061   35.83834    0            0.8142714
## 4  92.71435   35.83834    0            0.5840184
## 5 105.96916   35.83834    0            0.3443911
## 6  16.41648   16.41648    0            3.3761431

# Optimizing rho and adjust the weight
library(reshape)
rho.optimization <- matrix(0, nrow=nrow(liver.mouse.eQTL.bayesian.4tau), ncol=8)
colnames(rho.optimization)<-c("class", "rho","tmm","tau", "omega","beta_tieda", "n.betas.tieda.se","p.below.0")
nomega.diag<-diag(omega )
rhoclass <- seq(1,11, by=2) 
rho <- rhoclass*tau
result <- NULL
for (i in 1:length(rho))  {
  rho.optimization[ ,1] <- rhoclass[i]
  rho.optimization[ ,2] <- rho[i]
  rho.optimization[ ,3] <- (rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm
  rho.optimization[ ,4] <-tau*((rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
  nTau<- diag(rho.optimization[ ,3], rowLength, rowLength)
  ns<-V + nTau
  nS <- diag.inverse(ns)
  nomega<-diag.multi(nS, V)
  # nomega <- diag(0, rowLength, rowLength) # set nomega to 0 for code checking
  # nomega <- diag(1, rowLength, rowLength) # set nomega to 1 for code checking
  rho.optimization[ ,5] <- diag(nomega )
  rho.optimization[ ,6] <- nomega %*% Z %*% gamma + (identity-nomega) %*% betas.hat
  nTau_invert<-diag.inverse(nTau)
  V_invert<-diag.inverse(V)
  nPS_invert<-nTau_invert+ V_invert
  # nPS_invert<-nTau_invert+ diag.multi(diag.multi(V_invert, Z_transpose),  Z) # previous wrong code
  nPS<-diag.inverse(nPS_invert)
  nps<-diag(nPS)
  nps.long <- melt(nps)
  rho.optimization[ ,7] <-(nps.long$value)^0.5
  rho.optimization[ ,8] <- pnorm(0, rho.optimization[ ,6], rho.optimization[ ,7])
  
  result <- rbind(result,rho.optimization)
}

dim(result)

## [1] 63606     8

head(result)

##      class       rho tmm       tau        omega beta_tieda
## [1,]     1 0.0382713   1 0.0382713 0.0028410791 0.07374336
## [2,]     1 0.0382713   1 0.0382713 0.0006009470 0.07875627
## [3,]     1 0.0382713   1 0.0382713 0.0253000343 0.24029463
## [4,]     1 0.0382713   1 0.0382713 0.0088116182 0.11086105
## [5,]     1 0.0382713   1 0.0382713 0.0006077927 0.01914823
## [6,]     1 0.0382713   1 0.0382713 0.0025499141 0.23703476
##      n.betas.tieda.se    p.below.0
## [1,]       0.05330177 8.325479e-02
## [2,]       0.02451422 6.575204e-04
## [3,]       0.15905985 6.543011e-02
## [4,]       0.09387022 1.188007e-01
## [5,]       0.02465345 2.186692e-01
## [6,]       0.05049667 1.339133e-06

write.table(result, file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt",col.names=TRUE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian.result <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt",  header=T)

result.df <-liver.mouse.eQTL.bayesian.result
result.df$class <- factor(result.df$class)
# combine liver.mouse.eqtl.bayesian and rho.optimization.result for ploting
a <-liver.mouse.eQTL.bayesian.4tau[, c(1:2, 6, 7)]
a <-rbind(a, a, a, a, a, a)
dim(a)

## [1] 63606     4

new.result.df<-cbind(a, result.df)
head(new.result.df)

##           ensembl_id  betas.hat liver_pvalue abs_lung.beta class       rho
## 1 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000     1 0.0382713
## 2 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071     1 0.0382713
## 3 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126     1 0.0382713
## 4 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675     1 0.0382713
## 5 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706     1 0.0382713
## 6 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619     1 0.0382713
##   tmm       tau        omega beta_tieda n.betas.tieda.se    p.below.0
## 1   1 0.0382713 0.0028410791 0.07374336       0.05330177 8.325479e-02
## 2   1 0.0382713 0.0006009470 0.07875627       0.02451422 6.575204e-04
## 3   1 0.0382713 0.0253000343 0.24029463       0.15905985 6.543011e-02
## 4   1 0.0382713 0.0088116182 0.11086105       0.09387022 1.188007e-01
## 5   1 0.0382713 0.0006077927 0.01914823       0.02465345 2.186692e-01
## 6   1 0.0382713 0.0025499141 0.23703476       0.05049667 1.339133e-06

new.result.df2 <- new.result.df
by(new.result.df2, new.result.df2[, "class"], head)

## new.result.df2[, "class"]: 1
##           ensembl_id  betas.hat liver_pvalue abs_lung.beta class       rho
## 1 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000     1 0.0382713
## 2 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071     1 0.0382713
## 3 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126     1 0.0382713
## 4 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675     1 0.0382713
## 5 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706     1 0.0382713
## 6 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619     1 0.0382713
##   tmm       tau        omega beta_tieda n.betas.tieda.se    p.below.0
## 1   1 0.0382713 0.0028410791 0.07374336       0.05330177 8.325479e-02
## 2   1 0.0382713 0.0006009470 0.07875627       0.02451422 6.575204e-04
## 3   1 0.0382713 0.0253000343 0.24029463       0.15905985 6.543011e-02
## 4   1 0.0382713 0.0088116182 0.11086105       0.09387022 1.188007e-01
## 5   1 0.0382713 0.0006077927 0.01914823       0.02465345 2.186692e-01
## 6   1 0.0382713 0.0025499141 0.23703476       0.05049667 1.339133e-06
## -------------------------------------------------------- 
## new.result.df2[, "class"]: 3
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta class
## 11000 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000     3
## 21000 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071     3
## 31000 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126     3
## 41000 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675     3
## 51000 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706     3
## 61000 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619     3
##             rho          tmm          tau        omega beta_tieda
## 11000 0.1148139 1.256708e+17 4.809586e+15 2.267172e-20 0.07366667
## 21000 0.1148139 1.256708e+17 4.809586e+15 4.784788e-21 0.07875000
## 31000 0.1148139 1.256708e+17 4.809586e+15 2.065455e-19 0.24430000
## 41000 0.1148139 1.256708e+17 4.809586e+15 7.073998e-20 0.11090000
## 51000 0.1148139 1.256708e+17 4.809586e+15 4.839327e-21 0.01910000
## 61000 0.1148139 6.802265e+07 2.603315e+06 3.758208e-11 0.23727778
##       n.betas.tieda.se    p.below.0
## 11000       0.05337765 8.377744e-02
## 21000       0.02452159 6.603217e-04
## 31000       0.16111096 6.471591e-02
## 41000       0.09428655 1.197571e-01
## 51000       0.02466095 2.193164e-01
## 61000       0.05056118 1.346897e-06
## -------------------------------------------------------- 
## new.result.df2[, "class"]: 5
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta class
## 11002 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000     5
## 21002 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071     5
## 31002 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126     5
## 41002 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675     5
## 51002 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706     5
## 61002 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619     5
##             rho          tmm          tau        omega beta_tieda
## 11002 0.1913565 1.121827e+25 4.293376e+23 2.539763e-28 0.07366667
## 21002 0.1913565 1.121827e+25 4.293376e+23 5.360082e-29 0.07875000
## 31002 0.1913565 1.121827e+25 4.293376e+23 2.313792e-27 0.24430000
## 41002 0.1913565 1.121827e+25 4.293376e+23 7.924533e-28 0.11090000
## 51002 0.1913565 1.121827e+25 4.293376e+23 5.421179e-29 0.01910000
## 61002 0.1913565 2.982841e+11 1.141572e+10 8.570464e-15 0.23727778
##       n.betas.tieda.se    p.below.0
## 11002       0.05337765 8.377744e-02
## 21002       0.02452159 6.603217e-04
## 31002       0.16111096 6.471591e-02
## 41002       0.09428655 1.197571e-01
## 51002       0.02466095 2.193164e-01
## 61002       0.05056118 1.346897e-06
## -------------------------------------------------------- 
## new.result.df2[, "class"]: 7
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta class
## 11004 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000     7
## 21004 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071     7
## 31004 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126     7
## 41004 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675     7
## 51004 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706     7
## 61004 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619     7
##             rho          tmm          tau        omega beta_tieda
## 11004 0.2678991 1.936031e+30 7.409440e+28 1.471657e-33 0.07366667
## 21004 0.2678991 1.936031e+30 7.409440e+28 3.105882e-34 0.07875000
## 31004 0.2678991 1.936031e+30 7.409440e+28 1.340719e-32 0.24430000
## 41004 0.2678991 1.936031e+30 7.409440e+28 4.591845e-33 0.11090000
## 51004 0.2678991 1.936031e+30 7.409440e+28 3.141284e-34 0.01910000
## 61004 0.2678991 7.473751e+13 2.860301e+12 3.420549e-17 0.23727778
##       n.betas.tieda.se    p.below.0
## 11004       0.05337765 8.377744e-02
## 21004       0.02452159 6.603217e-04
## 31004       0.16111096 6.471591e-02
## 41004       0.09428655 1.197571e-01
## 51004       0.02466095 2.193164e-01
## 61004       0.05056118 1.346897e-06
## -------------------------------------------------------- 
## new.result.df2[, "class"]: 9
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta class
## 11006 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000     9
## 21006 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071     9
## 31006 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126     9
## 41006 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675     9
## 51006 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706     9
## 61006 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619     9
##             rho          tmm          tau        omega beta_tieda
## 11006 0.3444417 1.579316e+34 6.044247e+32 1.804056e-37 0.07366667
## 21006 0.3444417 1.579316e+34 6.044247e+32 3.807398e-38 0.07875000
## 31006 0.3444417 1.579316e+34 6.044247e+32 1.643543e-36 0.24430000
## 41006 0.3444417 1.579316e+34 6.044247e+32 5.628990e-37 0.11090000
## 51006 0.3444417 1.579316e+34 6.044247e+32 3.850796e-38 0.01910000
## 61006 0.3444417 4.627081e+15 1.770844e+14 5.524937e-19 0.23727778
##       n.betas.tieda.se    p.below.0
## 11006       0.05337765 8.377744e-02
## 21006       0.02452159 6.603217e-04
## 31006       0.16111096 6.471591e-02
## 41006       0.09428655 1.197571e-01
## 51006       0.02466095 2.193164e-01
## 61006       0.05056118 1.346897e-06
## -------------------------------------------------------- 
## new.result.df2[, "class"]: 11
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta class
## 11008 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000    11
## 21008 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071    11
## 31008 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126    11
## 41008 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675    11
## 51008 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706    11
## 61008 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619    11
##             rho          tmm          tau        omega beta_tieda
## 11008 0.4209842 2.097910e+37 8.028972e+35 1.358101e-40 0.07366667
## 21008 0.4209842 2.097910e+37 8.028972e+35 2.866226e-41 0.07875000
## 31008 0.4209842 2.097910e+37 8.028972e+35 1.237267e-39 0.24430000
## 41008 0.4209842 2.097910e+37 8.028972e+35 4.237529e-40 0.11090000
## 51008 0.4209842 2.097910e+37 8.028972e+35 2.898896e-41 0.01910000
## 61008 0.4209842 1.247402e+17 4.773970e+15 2.049405e-20 0.23727778
##       n.betas.tieda.se    p.below.0
## 11008       0.05337765 8.377744e-02
## 21008       0.02452159 6.603217e-04
## 31008       0.16111096 6.471591e-02
## 41008       0.09428655 1.197571e-01
## 51008       0.02466095 2.193164e-01
## 61008       0.05056118 1.346897e-06

# choose different rho class for plotting 
new.result.df2.rho1 <- new.result.df2[new.result.df2$class == 1, ]
head(new.result.df2.rho1)

##           ensembl_id  betas.hat liver_pvalue abs_lung.beta class       rho
## 1 ENSMUSG00000000001 0.07366667  0.190827641    0.05980000     1 0.0382713
## 2 ENSMUSG00000000049 0.07875000  0.006815612    0.02366071     1 0.0382713
## 3 ENSMUSG00000000056 0.24430000  0.153365833    0.01356126     1 0.0382713
## 4 ENSMUSG00000000058 0.11090000  0.260604313    0.07813675     1 0.0382713
## 5 ENSMUSG00000000085 0.01910000  0.452489862    0.05284706     1 0.0382713
## 6 ENSMUSG00000000088 0.23727778  0.000420588    0.18997619     1 0.0382713
##   tmm       tau        omega beta_tieda n.betas.tieda.se    p.below.0
## 1   1 0.0382713 0.0028410791 0.07374336       0.05330177 8.325479e-02
## 2   1 0.0382713 0.0006009470 0.07875627       0.02451422 6.575204e-04
## 3   1 0.0382713 0.0253000343 0.24029463       0.15905985 6.543011e-02
## 4   1 0.0382713 0.0088116182 0.11086105       0.09387022 1.188007e-01
## 5   1 0.0382713 0.0006077927 0.01914823       0.02465345 2.186692e-01
## 6   1 0.0382713 0.0025499141 0.23703476       0.05049667 1.339133e-06

tail(new.result.df2.rho1)

##               ensembl_id  betas.hat liver_pvalue abs_lung.beta class
## 10596 ENSMUSG00000099041 0.16160714   0.10867553    0.10059091     1
## 10597 ENSMUSG00000099083 0.10340000   0.06630646    0.12891000     1
## 10598 ENSMUSG00000099116 0.01975000   0.68385715    0.03457738     1
## 10599 ENSMUSG00000099164 0.10844444   0.12006199    0.08603846     1
## 10600 ENSMUSG00000099262 0.01105556   0.85807655    0.18600427     1
## 10601 ENSMUSG00000099305 0.05450000   0.17299524    0.06246296     1
##             rho tmm       tau       omega beta_tieda n.betas.tieda.se
## 10596 0.0382713   1 0.0382713 0.008726050 0.16118828       0.09341333
## 10597 0.0382713   1 0.0382713 0.002653901 0.10345094       0.05151603
## 10598 0.0382713   1 0.0382713 0.002243790 0.01991358       0.04736866
## 10599 0.0382713   1 0.0382713 0.004230408 0.10844674       0.06504159
## 10600 0.0382713   1 0.0382713 0.003660011 0.01153011       0.06049802
## 10601 0.0382713   1 0.0382713 0.001426669 0.05456706       0.03777127
##        p.below.0
## 10596 0.04221525
## 10597 0.02231468
## 10598 0.33709818
## 10599 0.04772295
## 10600 0.42442478
## 10601 0.07427516

# rank with p.below.0: bayesian modeling
new.result.df2.rho1$rank.p.below.0 <- rank(new.result.df2.rho1$p.below.0)
# rank with liver p value: traditionsl linear regression (one step regression)
new.result.df2.rho1$rank.liver.pvalue <- rank(new.result.df2.rho1$liver_pvalue)
new.result.df2.rho1 <- new.result.df2.rho1[order(new.result.df2.rho1$rank.p.below.0), ]
head(new.result.df2.rho1)

##              ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 2577 ENSMUSG00000022680  4.255833 2.034364e-16     2.7728167     1
## 9323 ENSMUSG00000057132  2.426200 7.421942e-14     3.0932000     1
## 2806 ENSMUSG00000023791  1.198018 4.042683e-13     0.4850804     1
## 612  ENSMUSG00000005161  1.872357 1.235782e-12     1.9872891     1
## 1891 ENSMUSG00000020803  0.465800 2.918690e-12     0.2315714     1
## 3371 ENSMUSG00000025178  1.418568 3.707316e-12     1.4921200     1
##            rho tmm       tau        omega beta_tieda n.betas.tieda.se
## 2577 0.0382713   1 0.0382713 0.0067767713  4.2335097       0.08232115
## 9323 0.0382713   1 0.0382713 0.0055026791  2.4187008       0.07418005
## 2806 0.0382713   1 0.0382713 0.0017539320  1.1963299       0.04187997
## 612  0.0382713   1 0.0382713 0.0050848675  1.8664589       0.07130826
## 1891 0.0382713   1 0.0382713 0.0003617983  0.4656876       0.01902100
## 3371 0.0382713   1 0.0382713 0.0034731501  1.4155697       0.05893344
##          p.below.0 rank.p.below.0 rank.liver.pvalue
## 2577  0.000000e+00              1                 1
## 9323 1.696164e-233              2                 2
## 2806 8.970068e-180              3                 3
## 612  2.592473e-151              4                 4
## 1891 1.125724e-132              5                 5
## 3371 8.634952e-128              6                 6

# caculate TPR: true positive rate
# caculate PPV: positive predictive rate
result.rho1 <- matrix(, nrow(new.result.df2.rho1), 8)
colnames(result.rho1)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR" )
for (i in 1:nrow(new.result.df2.rho1))
{
  newdata1.rho1 <- subset(new.result.df2.rho1, rank.p.below.0 <= i)
  overlap.newdata1.rho1 <- newdata1.rho1[newdata1.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  result.rho1[i, 1] <- i
  result.rho1[i, 2] <- nrow(overlap.newdata1.rho1)/nrow(newdata1.rho1)
  result.rho1[i, 3] <- nrow(overlap.newdata1.rho1)/nrow(liver.ASE.ensembl)
  newdata2.rho1 <- subset(new.result.df2.rho1, rank.liver.pvalue <= i)
  overlap.newdata2.rho1 <- newdata2.rho1[newdata2.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  result.rho1[i, 5] <- i
  result.rho1[i, 6] <- nrow(overlap.newdata2.rho1)/nrow(newdata2.rho1)
  result.rho1[i, 7] <- nrow(overlap.newdata2.rho1)/nrow(liver.ASE.ensembl)
}  
head(result.rho1)

##      bayrank    bayppv     bay_TPR bay_FPR orirank    orippv     ori_TPR
## [1,]       1 0.0000000 0.000000000      NA       1 0.0000000 0.000000000
## [2,]       2 0.0000000 0.000000000      NA       2 0.0000000 0.000000000
## [3,]       3 0.0000000 0.000000000      NA       3 0.0000000 0.000000000
## [4,]       4 0.2500000 0.005128205      NA       4 0.2500000 0.005128205
## [5,]       5 0.2000000 0.005128205      NA       5 0.2000000 0.005128205
## [6,]       6 0.1666667 0.005128205      NA       6 0.1666667 0.005128205
##      ori_FPR
## [1,]      NA
## [2,]      NA
## [3,]      NA
## [4,]      NA
## [5,]      NA
## [6,]      NA

tail(result.rho1)

##          bayrank     bayppv bay_TPR bay_FPR orirank     orippv ori_TPR
## [10596,]   10596 0.01840317       1      NA   10596 0.01840317       1
## [10597,]   10597 0.01840143       1      NA   10597 0.01840143       1
## [10598,]   10598 0.01839970       1      NA   10598 0.01839970       1
## [10599,]   10599 0.01839796       1      NA   10599 0.01839796       1
## [10600,]   10600 0.01839623       1      NA   10600 0.01839623       1
## [10601,]   10601 0.01839449       1      NA   10601 0.01839449       1
##          ori_FPR
## [10596,]      NA
## [10597,]      NA
## [10598,]      NA
## [10599,]      NA
## [10600,]      NA
## [10601,]      NA

# ploting "True positive rate"

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 0.4) , xlim=c(0, 300))
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# ploting "positive predictive value"
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt",  header=T)
head(MTeQTLs)

##         SNP       gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at            0           0       0.9260306
## 2 rs6365999 1424963_at            0           0       0.9260306
## 3 rs6376963 1424963_at            0           0       0.9306985
## 4 rs3677817 1424963_at            0           0       0.9329117
## 5 rs6269442 1424964_at            0           0       0.9438114
## 6 rs6365999 1424964_at            0           0       0.9438114
##   marginalP.lung
## 1      0.9027543
## 2      0.9027543
## 3      0.9104129
## 4      0.9140456
## 5      0.9219186
## 6      0.9219186

mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt",  header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
MTeQTLs.min <- data.table(MTeQTLs, key=c('ensembl_id', "marginalP.Liver"))
MTeQTLs.min <-MTeQTLs.min[J(unique(ensembl_id)),mult="first"]
merged.eQTL <- merge(new.result.df2.rho1, MTeQTLs.min, by ="ensembl_id")
merged.eQTL$rank.marginalP.Liver <- rank(merged.eQTL$marginalP.Liver)

merged.result <- matrix(, nrow(merged.eQTL), 12)
colnames(merged.result)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR", "MTrank","MTppv","MT_TPR","MT_FPR" )
for (i in 1:nrow(merged.eQTL))
{
  newdata1 <- subset(merged.eQTL, rank.p.below.0 <= i)
  overlap.newdata1 <- newdata1[newdata1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  merged.result[i, 1] <- i
  merged.result[i, 2] <- nrow(overlap.newdata1)/nrow(newdata1)
  merged.result[i, 3] <- nrow(overlap.newdata1)/nrow(liver.ASE.ensembl)
  merged.result[i, 4] <- (nrow(newdata1)-nrow(overlap.newdata1)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
   
  newdata2 <- subset(merged.eQTL, rank.liver.pvalue <= i)
  overlap.newdata2 <- newdata2[newdata2$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  merged.result[i, 5] <- i
  merged.result[i, 6] <- nrow(overlap.newdata2)/nrow(newdata2)
  merged.result[i, 7] <- nrow(overlap.newdata2)/nrow(liver.ASE.ensembl)
  merged.result[i, 8] <- (nrow(newdata2)-nrow(overlap.newdata2)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
  
  
  newdata3 <- subset(merged.eQTL, rank.marginalP.Liver <= i)
  overlap.newdata3 <- newdata3[newdata3$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  merged.result[i, 9] <- i
  merged.result[i, 10] <- nrow(overlap.newdata3)/nrow(newdata3)
  merged.result[i, 11] <- nrow(overlap.newdata3)/nrow(liver.ASE.ensembl)
  merged.result[i, 12] <- (nrow(newdata3)-nrow(overlap.newdata3)) /(nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
  
} 

head(merged.result)

##      bayrank    bayppv     bay_TPR      bay_FPR orirank    orippv
## [1,]       1 0.0000000 0.000000000 0.0001019680       1 0.0000000
## [2,]       2 0.0000000 0.000000000 0.0002039360       2 0.0000000
## [3,]       3 0.0000000 0.000000000 0.0003059039       3 0.0000000
## [4,]       4 0.2500000 0.005128205 0.0003059039       4 0.2500000
## [5,]       5 0.2000000 0.005128205 0.0004078719       5 0.2000000
## [6,]       6 0.1666667 0.005128205 0.0005098399       6 0.1666667
##          ori_TPR      ori_FPR MTrank     MTppv      MT_TPR       MT_FPR
## [1,] 0.000000000 0.0001019680      1 0.0000000 0.000000000 0.0001019680
## [2,] 0.000000000 0.0002039360      2 0.5000000 0.005128205 0.0001019680
## [3,] 0.000000000 0.0003059039      3 0.6666667 0.010256410 0.0001019680
## [4,] 0.005128205 0.0003059039      4 0.5000000 0.010256410 0.0002039360
## [5,] 0.005128205 0.0004078719      5 0.6000000 0.015384615 0.0002039360
## [6,] 0.005128205 0.0005098399      6 0.5000000 0.015384615 0.0003059039

tail(merged.result)

##          bayrank     bayppv   bay_TPR   bay_FPR orirank     orippv
## [9997,]     9997 0.01939792 0.9384615 0.9433058    9997 0.01939998
## [9998,]     9998 0.01939587 0.9384615 0.9434078    9998 0.01939792
## [9999,]     9999 0.01939381 0.9384615 0.9435097    9999 0.01939587
## [10000,]   10000 0.01939176 0.9384615 0.9436117   10000 0.01939381
## [10001,]   10001 0.01938970 0.9384615 0.9437137   10001 0.01939176
## [10002,]   10002 0.01938970 0.9384615 0.9437137   10002 0.01938970
##            ori_TPR   ori_FPR MTrank      MTppv    MT_TPR   MT_FPR
## [9997,]  0.9384615 0.9432038   9997 0.01840552 0.9435897 1.000612
## [9998,]  0.9384615 0.9433058   9998 0.01840368 0.9435897 1.000714
## [9999,]  0.9384615 0.9434078   9999 0.01840184 0.9435897 1.000816
## [10000,] 0.9384615 0.9435097  10000 0.01840000 0.9435897 1.000918
## [10001,] 0.9384615 0.9436117  10001 0.01839816 0.9435897 1.001020
## [10002,] 0.9384615 0.9437137  10002 0.01839632 0.9435897 1.001122

# ploting "True positive rate"
plot(merged.result[, 1], merged.result[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 1], merged.result[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1),  xlim=c(0, 300))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1),  xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 4], merged.result[, 3], type="l", col="red",  xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 8], merged.result[, 7], type="l", col="green", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 12], merged.result[, 11], type="l", col="blue", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)
title(main = "ROC curve")