2016-05-09 (50% subsetted dataset

rm(list = ls())
gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 370865 19.9     750400 40.1   592000 31.7
## Vcells 623904  4.8    1308461 10.0  1007471  7.7

# set the working directory
setwd("/Volumes/Transcend/Thesis_project/subsetted_liver")
# subset dataset 
sebsetn <- 15
mouse.liver.expression.eqtl <-read.table(file="2015-12-07 mouse.liver.expression.eqtl.txt",  header=T)
head(mouse.liver.expression.eqtl)

##       ProbeSet   BXD1  BXD11  BXD12  BXD13  BXD14  BXD15  BXD16  BXD18
## 1   1415670_at 10.090 10.200 10.300 10.208  9.830 10.238  9.914 10.348
## 2   1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3   1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4   1415673_at  7.535  7.382  7.566  7.162  7.403  7.342  7.213  7.581
## 5 1415674_a_at  9.757  9.972  9.269  9.873  9.354  9.918  9.459  9.655
## 6   1415675_at  9.029  9.009  9.245  9.282  9.415  9.098  9.060  8.937
##    BXD19   BXD2  BXD20  BXD21  BXD24 BXD24a  BXD27  BXD28  BXD29  BXD31
## 1  9.939  9.871 10.077 10.159  9.746  9.890 10.286 10.177  9.959  9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4  7.551  7.368  7.251  7.373  7.408  7.473  7.238  7.424  7.475  7.491
## 5  9.544  9.557  9.460  9.322  9.795  9.758  9.628  9.263  9.671  9.645
## 6  8.995  9.104  9.123  9.086  9.087  9.040  8.844  9.173  8.951  9.131
##    BXD32  BXD33  BXD34  BXD36  BXD38  BXD39  BXD40  BXD42   BXD5   BXD6
## 1 10.059 10.102 10.174 10.022 10.364  9.745 10.074  9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4  7.419  7.386  7.496  7.369  7.265  7.130  7.149  7.383  7.439  7.233
## 5  9.368  9.741  9.721  9.684  9.387  9.649  9.693  9.793  9.840  9.519
## 6  9.108  9.122  9.153  9.008  9.138  9.126  9.233  9.060  9.134  9.081
##     BXD8   BXD9
## 1  9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4  7.397  7.390
## 5  9.340  9.160
## 6  8.926  9.274

dim(mouse.liver.expression.eqtl)

## [1] 20855    31

set.seed(50)
sub.mouse.liver.expression.eqtl <- mouse.liver.expression.eqtl[, c(1, sample(2:dim(mouse.liver.expression.eqtl)[2],sebsetn, replace=FALSE))]

head(sub.mouse.liver.expression.eqtl)

##       ProbeSet  BXD36  BXD24  BXD15  BXD34 BXD24a  BXD11  BXD29  BXD27
## 1   1415670_at 10.022  9.746 10.238 10.174  9.890 10.200  9.959 10.286
## 2   1415671_at 10.923 11.104 11.120 11.059 10.979 11.088 11.071 10.905
## 3   1415672_at 11.545 11.499 11.318 11.490 11.531 11.417 11.502 11.666
## 4   1415673_at  7.369  7.408  7.342  7.496  7.473  7.382  7.475  7.238
## 5 1415674_a_at  9.684  9.795  9.918  9.721  9.758  9.972  9.671  9.628
## 6   1415675_at  9.008  9.087  9.098  9.153  9.040  9.009  8.951  8.844
##     BXD1  BXD12  BXD18   BXD6  BXD21  BXD40  BXD14
## 1 10.090 10.300 10.348 10.069 10.159 10.074  9.830
## 2 10.932 11.007 11.123 11.043 10.969 11.067 10.955
## 3 11.432 11.442 11.561 11.504 11.328 11.543 11.561
## 4  7.535  7.566  7.581  7.233  7.373  7.149  7.403
## 5  9.757  9.269  9.655  9.519  9.322  9.693  9.354
## 6  9.029  9.245  8.937  9.081  9.086  9.233  9.415

dim(sub.mouse.liver.expression.eqtl)

## [1] 20855    16

write.table(sub.mouse.liver.expression.eqtl,file="2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="\t", row.names=FALSE, quote=FALSE)

#subset liver snp expression data
BXD.geno.SNP.eqtl.for.liver <-read.table(file="2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt",  header=T)
head(BXD.geno.SNP.eqtl.for.liver)

##       Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs6269442    0     0     1     0     0     1     1     0     1    0
## 2 rs6365999    0     0     1     0     0     1     1     0     1    0
## 3 rs6376963    0     0     1     0     0     1     1     0     1    0
## 4 rs3677817    0     0     1     0     0     1     1     0     1    0
## 5 rs8236463    0     1     1     0     0     1     1     0     1    0
## 6 rs6333200    0     1     1     0     0     1     1     0     1    0
##   BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1     1     1     0      0     0     1     1     0     0     0     0     0
## 2     1     1     0      0     0     1     1     0     0     0     0     0
## 3     1     1     0      0     0     1     1     0     0     0     0     0
## 4     1     1     0      0     0     1     1     0     0     0     0     0
## 5     1     1     0      0     0     1     1     0     0     0     0     0
## 6     1     1     0      0     0     1     1     0     0     0     0     0
##   BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1     0     1     0     0    1    1    1    0
## 2     0     1     0     0    1    1    1    0
## 3     0     1     0     0    1    1    1    0
## 4     0     1     0     0    1    1    1    0
## 5     0     1     0     0    1    1    1    0
## 6     0     1     0     0    1    1    1    0

dim(BXD.geno.SNP.eqtl.for.liver)

## [1] 3811   31

set.seed(50)
sub.BXD.geno.SNP.eqtl.for.liver <- BXD.geno.SNP.eqtl.for.liver[, c(1, sample(2:dim(BXD.geno.SNP.eqtl.for.liver)[2],sebsetn, replace=FALSE))]
head(sub.BXD.geno.SNP.eqtl.for.liver)

##       Locus BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27 BXD1 BXD12
## 1 rs6269442     0     0     1     0      0     0     1     0    0     1
## 2 rs6365999     0     0     1     0      0     0     1     0    0     1
## 3 rs6376963     0     0     1     0      0     0     1     0    0     1
## 4 rs3677817     0     0     1     0      0     0     1     0    0     1
## 5 rs8236463     0     0     1     0      0     1     1     0    0     1
## 6 rs6333200     0     0     1     0      0     1     1     0    0     1
##   BXD18 BXD6 BXD21 BXD40 BXD14
## 1     0    1     1     0     0
## 2     0    1     1     0     0
## 3     0    1     1     0     0
## 4     0    1     1     0     0
## 5     0    1     1     0     0
## 6     0    1     1     0     0

dim(sub.BXD.geno.SNP.eqtl.for.liver)

## [1] 3811   16

write.table(sub.BXD.geno.SNP.eqtl.for.liver,file="2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="\t", row.names=FALSE, quote=FALSE)

library(MatrixEQTL)
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/subsetted_liver";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="");
snps_location_file_name = paste(base.dir, "/2015-12-07 BXD.geno.loc.eqtl.for.liver.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2015-12-07 liver.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = character() ;

# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();

# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;

# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;


## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t";      # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);

## Rows read:  2,000 
## Rows read:  3811  done.

## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);

## Rows read:  2,000 
## Rows read:  4,000 
## Rows read:  6,000 
## Rows read:  8,000 
## Rows read:  10,000 
## Rows read:  12,000 
## Rows read:  14,000 
## Rows read:  16,000 
## Rows read:  18,000 
## Rows read:  20,000 
## Rows read:  20855  done.

## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t";      # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1;          # one row of column labels
cvrt$fileSkipColumns = 1;       # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}

## Run the analysis

snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)

##       probe_id Chromosome start_location end_location
## 1   1415670_at          6       87887971     87890759
## 2   1415671_at          8      105524469    105566040
## 3   1415672_at          8       23241325     23257080
## 4   1415673_at          5      129765557    129787253
## 5 1415674_a_at          9       44403758     44407548
## 6   1415675_at          2       32570857     32573571

me = Matrix_eQTL_main(
  snps = snps,
  gene = gene,
  output_file_name = output_file_name_tra,
  pvOutputThreshold = pvOutputThreshold_tra,
  useModel = useModel,
  errorCovariance = numeric(),
  verbose = TRUE,
  output_file_name.cis = output_file_name_cis,
  pvOutputThreshold.cis = pvOutputThreshold_cis,
  snpspos = snpspos,
  genepos = genepos,
  cisDist = cisDist,
  pvalue.hist = TRUE,
  min.pv.by.genesnp = FALSE,
  noFDRsaveMemory = FALSE);

## Matching data files and location files 
## 20855 of 20855  genes matched
## 3811 of 3811  SNPs matched
## Task finished in  0.054  seconds
## Reordering genes
##  
## Task finished in  0.119  seconds
## Processing covariates 
## Task finished in  0.001  seconds
## Processing gene expression data (imputation, residualization, etc.) 
## Task finished in  0.013  seconds
## Creating output file(s) 
## Task finished in  0.011  seconds
## Performing eQTL analysis 
##  4.54% done, 7,118 cis-eQTLs, 0 trans-eQTLs
##  9.09% done, 13,788 cis-eQTLs, 0 trans-eQTLs
## 13.63% done, 21,752 cis-eQTLs, 0 trans-eQTLs
## 18.18% done, 27,806 cis-eQTLs, 0 trans-eQTLs
## 22.72% done, 34,278 cis-eQTLs, 0 trans-eQTLs
## 27.27% done, 35,915 cis-eQTLs, 0 trans-eQTLs
## 31.81% done, 0 trans-eQTLs
## 36.36% done, 0 trans-eQTLs
## 40.90% done, 0 trans-eQTLs
## 45.45% done, 0 trans-eQTLs
## 50.00% done, 0 trans-eQTLs
## 54.54% done, 0 trans-eQTLs
## 59.09% done, 0 trans-eQTLs
## 63.63% done, 0 trans-eQTLs
## 68.18% done, 0 trans-eQTLs
## 72.72% done, 0 trans-eQTLs
## 77.27% done, 40,035 cis-eQTLs, 0 trans-eQTLs
## 81.81% done, 47,465 cis-eQTLs, 0 trans-eQTLs
## 86.36% done, 53,735 cis-eQTLs, 0 trans-eQTLs
## 90.90% done, 60,292 cis-eQTLs, 17 trans-eQTLs
## 95.45% done, 67,748 cis-eQTLs, 17 trans-eQTLs
## 100.00% done, 69,174 cis-eQTLs, 17 trans-eQTLs
## Task finished in  8.867  seconds
##

unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')

## Analysis done in: 8.404  seconds

cat('Detected local eQTLs:','\n')

## Detected local eQTLs:

cis.eqtls<-me$cis$eqtls
head(cis.eqtls)

##        snps       gene statistic       pvalue          FDR      beta
## 1 rs4163042 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 2  116Mit88 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 3 rs4163058 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 4 rs4163391 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 5 rs4151923 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 6 rs3090019 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833

dim(cis.eqtls)

## [1] 69174     6

cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)


# load mouse lung cis eqtl result
lung.mouse.eQTL<-read.table(file="2015-12-04 mouselung.cis.1M.eqtls.txt",  header=T)
# load mouse liver cis eqtl result
liver.mouse.eQTL<-read.table(file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt",  header=T)
mouse4302ensembl_id<-read.table(file="2015-12-04 mouse4302ensembl_id.txt",  header=T)
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt",  header=T)
# Add ensemble id annoatation to the data 
lung.mouse.eQTL<-merge(lung.mouse.eQTL, mouse4302ensembl_id, by.x = "gene", by.y="probe_id")
liver.mouse.eQTL<-merge(liver.mouse.eQTL, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
head(lung.mouse.eQTL)

##         gene          snps  statistic    pvalue       FDR        beta
## 1 1415670_at    rs13475374  1.0503967 0.2994035 0.5712395  0.06286667
## 2 1415670_at    rs13478876  0.9133601 0.3661462 0.6356597  0.05583410
## 3 1415670_at     rs3713705  1.1363521 0.2621028 0.5310916  0.06786667
## 4 1415670_at    rs13478880  1.1363521 0.2621028 0.5310916  0.06786667
## 5 1415670_at gnf06.086.089  0.9133601 0.3661462 0.6356597  0.05583410
## 6 1415672_at    rs13479651 -1.3052534 0.1987480 0.4510718 -0.03993582
##      beta_se         ensembl_id
## 1 0.05985040 ENSMUSG00000030058
## 2 0.06113044 ENSMUSG00000030058
## 3 0.05972327 ENSMUSG00000030058
## 4 0.05972327 ENSMUSG00000030058
## 5 0.06113044 ENSMUSG00000030058
## 6 0.03059622 ENSMUSG00000015341

head(liver.mouse.eQTL)

##         gene          snps  statistic    pvalue       FDR        beta
## 1 1415670_at    rs13475374 -0.4855043 0.6353986 0.8846784 -0.04598214
## 2 1415670_at    rs13478880 -0.4855043 0.6353986 0.8846784 -0.04598214
## 3 1415670_at gnf06.086.089 -0.4855043 0.6353986 0.8846784 -0.04598214
## 4 1415670_at    rs13478876 -0.4855043 0.6353986 0.8846784 -0.04598214
## 5 1415670_at     rs3713705 -0.4855043 0.6353986 0.8846784 -0.04598214
## 6 1415671_at    rs13479962  0.3050090 0.7651877 0.9346530  0.01290000
##      beta_se         ensembl_id
## 1 0.09471005 ENSMUSG00000030058
## 2 0.09471005 ENSMUSG00000030058
## 3 0.09471005 ENSMUSG00000030058
## 4 0.09471005 ENSMUSG00000030058
## 5 0.09471005 ENSMUSG00000030058
## 6 0.04229383 ENSMUSG00000013160

library(data.table)
# Select Gene-SNP pair with minimum P value
lung.mouse.eQTL.min <- data.table(lung.mouse.eQTL, key=c('ensembl_id', "pvalue"))
lung.mouse.eQTL.min<-lung.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
lung.mouse.eQTL.min<-as.data.frame(lung.mouse.eQTL.min)

liver.mouse.eQTL.min <- data.table(liver.mouse.eQTL, key=c('ensembl_id', "pvalue"))
liver.mouse.eQTL.min<-liver.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
liver.mouse.eQTL.min<-as.data.frame(liver.mouse.eQTL.min)

library(plyr)
lung.mouse.eQTL.min<-rename(lung.mouse.eQTL.min, c("pvalue"="lung_pvalue", "beta"="lung.beta", "beta_se"="lung.beta_se"))
liver.mouse.eQTL.min<-rename(liver.mouse.eQTL.min, c("pvalue"="liver_pvalue", "beta"="liver.beta", "beta_se"="liver.beta_se"))

head(lung.mouse.eQTL.min)

##           gene             snps  statistic lung_pvalue       FDR
## 1   1428645_at        rs3702359 -1.4324741   0.1592383 0.3935788
## 2 1421514_a_at     gnfX.141.820 -0.1728806   0.8635565 0.9457289
## 3   1416677_at        rs3720981 -0.1308388   0.8965131 0.9603432
## 4   1451677_at CEL-11_120628029  0.4574870   0.6496243 0.8393760
## 5   1425955_at       rs13478643 -1.0185410   0.3141176 0.5866782
## 6 1426241_a_at        rs4224744  0.9192838   0.3630755 0.6327046
##      lung.beta lung.beta_se         ensembl_id
## 1 -0.059800000   0.04174596 ENSMUSG00000000001
## 2 -0.005911765   0.03419566 ENSMUSG00000000037
## 3 -0.023660714   0.18083869 ENSMUSG00000000049
## 4  0.016446429   0.03594950 ENSMUSG00000000056
## 5 -0.078136752   0.07671439 ENSMUSG00000000058
## 6  0.052847059   0.05748721 ENSMUSG00000000085

head(liver.mouse.eQTL.min)

##           gene             snps  statistic liver_pvalue        FDR
## 1   1428645_at       rs13477320 -1.3801032  0.190827641 0.57060083
## 2   1449320_at   CEL-X_71438949  1.2101767  0.247756346 0.63979906
## 3 1421514_a_at  CEL-X_154048891 -0.3183323  0.755289094 0.93238030
## 4   1416677_at        rs3670642  3.2114557  0.006815612 0.08431322
## 5   1425344_at CEL-11_120628029 -1.5163463  0.153365833 0.52379422
## 6   1417327_at  NAT_6_18.199327  1.1762017  0.260604313 0.65097975
##    liver.beta liver.beta_se         ensembl_id
## 1 -0.07366667    0.05337765 ENSMUSG00000000001
## 2  0.06350000    0.05247167 ENSMUSG00000000003
## 3 -0.01177273    0.03698251 ENSMUSG00000000037
## 4  0.07875000    0.02452159 ENSMUSG00000000049
## 5 -0.24430000    0.16111096 ENSMUSG00000000056
## 6  0.11090000    0.09428655 ENSMUSG00000000058

tail(liver.mouse.eQTL.min)

##               gene       snps  statistic liver_pvalue       FDR
## 10962   1434694_at rs13459062 -1.7224576   0.10867553 0.4460593
## 10963   1437645_at   115Mit16  2.0044772   0.06630646 0.3458785
## 10964 1449939_s_at rs13481642 -0.4164743   0.68385715 0.9063245
## 10965   1422547_at  rs4165065 -1.6637788   0.12006199 0.4695369
## 10966   1451476_at  rs4165065 -0.1824077   0.85807655 0.9631432
## 10967 1453995_a_at  rs4165065 -1.4418659   0.17299524 0.5490605
##        liver.beta liver.beta_se         ensembl_id
## 10962 -0.16160714    0.09382358 ENSMUSG00000099041
## 10963  0.10340000    0.05158452 ENSMUSG00000099083
## 10964 -0.01975000    0.04742189 ENSMUSG00000099116
## 10965 -0.10844444    0.06517960 ENSMUSG00000099164
## 10966 -0.01105556    0.06060904 ENSMUSG00000099262
## 10967 -0.05450000    0.03779824 ENSMUSG00000099305

dim(lung.mouse.eQTL.min)

## [1] 11015     8

dim(liver.mouse.eQTL.min)

## [1] 10967     8

# lung, liver eqtl with ensemble_id
merged.mouse.eQTL.min<-merge(lung.mouse.eQTL.min, liver.mouse.eQTL.min, by.x = "ensembl_id", by.y="ensembl_id")
head(merged.mouse.eQTL.min)

##           ensembl_id       gene.x           snps.x statistic.x lung_pvalue
## 1 ENSMUSG00000000001   1428645_at        rs3702359  -1.4324741   0.1592383
## 2 ENSMUSG00000000037 1421514_a_at     gnfX.141.820  -0.1728806   0.8635565
## 3 ENSMUSG00000000049   1416677_at        rs3720981  -0.1308388   0.8965131
## 4 ENSMUSG00000000056   1451677_at CEL-11_120628029   0.4574870   0.6496243
## 5 ENSMUSG00000000058   1425955_at       rs13478643  -1.0185410   0.3141176
## 6 ENSMUSG00000000085 1426241_a_at        rs4224744   0.9192838   0.3630755
##       FDR.x    lung.beta lung.beta_se       gene.y           snps.y
## 1 0.3935788 -0.059800000   0.04174596   1428645_at       rs13477320
## 2 0.9457289 -0.005911765   0.03419566 1421514_a_at  CEL-X_154048891
## 3 0.9603432 -0.023660714   0.18083869   1416677_at        rs3670642
## 4 0.8393760  0.016446429   0.03594950   1425344_at CEL-11_120628029
## 5 0.5866782 -0.078136752   0.07671439   1417327_at  NAT_6_18.199327
## 6 0.6327046  0.052847059   0.05748721 1426241_a_at    gnf04.117.102
##   statistic.y liver_pvalue      FDR.y  liver.beta liver.beta_se
## 1  -1.3801032  0.190827641 0.57060083 -0.07366667    0.05337765
## 2  -0.3183323  0.755289094 0.93238030 -0.01177273    0.03698251
## 3   3.2114557  0.006815612 0.08431322  0.07875000    0.02452159
## 4  -1.5163463  0.153365833 0.52379422 -0.24430000    0.16111096
## 5   1.1762017  0.260604313 0.65097975  0.11090000    0.09428655
## 6  -0.7745039  0.452489862 0.79168033 -0.01910000    0.02466095

dim(merged.mouse.eQTL.min)

## [1] 10422    15

merged.mouse.eQTL.min<-data.frame(merged.mouse.eQTL.min)
merged.mouse.eQTL.min<-merged.mouse.eQTL.min[, c(1, 5, 7, 8, 12, 14, 15 )]
head(merged.mouse.eQTL.min)

##           ensembl_id lung_pvalue    lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001   0.1592383 -0.059800000   0.04174596  0.190827641
## 2 ENSMUSG00000000037   0.8635565 -0.005911765   0.03419566  0.755289094
## 3 ENSMUSG00000000049   0.8965131 -0.023660714   0.18083869  0.006815612
## 4 ENSMUSG00000000056   0.6496243  0.016446429   0.03594950  0.153365833
## 5 ENSMUSG00000000058   0.3141176 -0.078136752   0.07671439  0.260604313
## 6 ENSMUSG00000000085   0.3630755  0.052847059   0.05748721  0.452489862
##    liver.beta liver.beta_se
## 1 -0.07366667    0.05337765
## 2 -0.01177273    0.03698251
## 3  0.07875000    0.02452159
## 4 -0.24430000    0.16111096
## 5  0.11090000    0.09428655
## 6 -0.01910000    0.02466095

write.table(merged.mouse.eQTL.min,file="2016-05-09 mouse.liver.expression.min.txt", sep="\t", row.names=FALSE, quote=FALSE)

merged.mouse.eQTL.min.variance2<-read.table(file="2016-05-09 mouse.liver.expression.min.txt",  header=T)

head(merged.mouse.eQTL.min.variance2)

##           ensembl_id lung_pvalue    lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001   0.1592383 -0.059800000   0.04174596  0.190827641
## 2 ENSMUSG00000000037   0.8635565 -0.005911765   0.03419566  0.755289094
## 3 ENSMUSG00000000049   0.8965131 -0.023660714   0.18083869  0.006815612
## 4 ENSMUSG00000000056   0.6496243  0.016446429   0.03594950  0.153365833
## 5 ENSMUSG00000000058   0.3141176 -0.078136752   0.07671439  0.260604313
## 6 ENSMUSG00000000085   0.3630755  0.052847059   0.05748721  0.452489862
##    liver.beta liver.beta_se
## 1 -0.07366667    0.05337765
## 2 -0.01177273    0.03698251
## 3  0.07875000    0.02452159
## 4 -0.24430000    0.16111096
## 5  0.11090000    0.09428655
## 6 -0.01910000    0.02466095

# caculate the absolute value of live/lung beta
merged.mouse.eQTL.min.variance2$abs_liver.beta<-abs(merged.mouse.eQTL.min.variance2$liver.beta)
merged.mouse.eQTL.min.variance2$abs_lung.beta<-abs(merged.mouse.eQTL.min.variance2$lung.beta)
# caculate negative log lung p value
merged.mouse.eQTL.min.variance2$neg_log_lung_pvalue<--log10(merged.mouse.eQTL.min.variance2$lung_pvalue)

# Simple linear regression between abs_liver.beta and abs_lung.beta
# fit1<-summary(lm(abs_liver.beta ~ abs_lung.beta, data=merged.mouse.eQTL.min.variance2))
# fit1
# tau<-fit1$sigma**2
# check association between abs_liver.beta and abs.lung.beta
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.2.4

ggplot(merged.mouse.eQTL.min.variance2, aes(x=abs_lung.beta, y=abs_liver.beta)) +geom_point()+geom_smooth(method=lm)

cor(merged.mouse.eQTL.min.variance2$abs_lung.beta, merged.mouse.eQTL.min.variance2$abs_liver.beta)

## [1] 0.3961265

merged.mouse.eQTL<-merged.mouse.eQTL.min.variance2
# retrieve ensembl_id
markers<-merged.mouse.eQTL[, 1]
# Yg=Ag + Bg*Xsnp+V
# retrieve betas.hat (liver.beta)
betas.hat<-merged.mouse.eQTL$abs_liver.beta
# retrieve liver.beta_se
se<-merged.mouse.eQTL$liver.beta_se

# creat Z matrix with 2 columns: 1 for intercept,abs_lung.beta (merged.mouse.eQTL[,10])
Z<-as.matrix(merged.mouse.eQTL$abs_lung.beta)
Z<-replace(Z,is.na(Z),0)
Z<-data.frame(1,Z) 
Z<-as.matrix(Z)
rowLength<-length(markers)
# liver.betas=Z*gama+T^2

# Regression: abs_liver.beta = intercept + beta*abs_lung.beta + error 
lmsummary<-summary(lm(abs_liver.beta~-1+Z, data=merged.mouse.eQTL))
lmsummary

## 
## Call:
## lm(formula = abs_liver.beta ~ -1 + Z, data = merged.mouse.eQTL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2079 -0.0735 -0.0389  0.0218  4.2525 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)    
## ZX1 0.075463   0.002259   33.41   <2e-16 ***
## ZZ  0.365645   0.008303   44.04   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1987 on 10420 degrees of freedom
## Multiple R-squared:  0.3701, Adjusted R-squared:  0.3699 
## F-statistic:  3061 on 2 and 10420 DF,  p-value: < 2.2e-16

# error ~ N(0, Tau)
tau<-lmsummary$sigma**2
tau

## [1] 0.03950088

# output coeffieients (gamma matrix)
# gamma matrix
gamma<-as.matrix(lmsummary$coefficients[,1])
# trasnpose Z matrix
Z_transpose<-t(Z)
# create identity matrix
identity<-diag(nrow=rowLength) 
# original betas.hat
betas.hat<-as.matrix(betas.hat)
#creat V matrix for liver_residual_variance
V <- matrix(0, rowLength, rowLength)
# V, liver residual variance
diag(V) <- merged.mouse.eQTL$liver.beta_se^2
# Creat Tau matrix
Tau<- diag(tau, rowLength, rowLength)
# follow Chen's paper and cacualte s
s <-V + Tau
# create inverse function for inversing diagnoal matrix 
diag.inverse <- function(x){diag(1/diag(x), nrow(x), ncol(x))}
# create multiplication function for multiplicating two diagnoal matrix 
diag.multi <- function(x,y){diag(diag(x)*diag(y), nrow(x), ncol(x))}
# inverse s
S <-diag.inverse(s)
# follow chen's paper to caculate omega
omega<-diag.multi(S, V)
# retrieve omega value from the matrix 
omega.diag<-diag(omega )
# summary the omega value
summary(omega.diag)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005225 0.039240 0.071550 0.121400 0.145500 0.962900

# betas.thea<- S %*% Z %*% gamma + (identity-S) %*% betas.hat
# caculate betas.tieda with the formula in Chen's paper
betas.tieda<- omega %*% Z %*% gamma + (identity-omega) %*% betas.hat
# crbetas.tieda<- cromega %*% Z %*% gamma + (identity-cromega) %*% betas.hat
head(betas.tieda)

##            [,1]
## [1,] 0.07525859
## [2,] 0.01397654
## [3,] 0.07883044
## [4,] 0.17973373
## [5,] 0.10963860
## [6,] 0.02024762

head(betas.hat)

##            [,1]
## [1,] 0.07366667
## [2,] 0.01177273
## [3,] 0.07875000
## [4,] 0.24430000
## [5,] 0.11090000
## [6,] 0.01910000

#regression beta
regbeta <-Z %*% gamma
head(regbeta)

##            [,1]
## [1,] 0.09732903
## [2,] 0.07762505
## [3,] 0.08411487
## [4,] 0.08147700
## [5,] 0.10403378
## [6,] 0.09478672

summary(regbeta)

##        V1         
##  Min.   :0.07546  
##  1st Qu.:0.08888  
##  Median :0.10149  
##  Mean   :0.12590  
##  3rd Qu.:0.12863  
##  Max.   :2.21539

markers1<-as.character(markers)
# combine ensemble_id, betas.hat and betas.tieda
outputVector<-c(markers1,betas.hat,betas.tieda)
write.table(matrix(outputVector,rowLength),file="2016-04-26_hm_tau_hmresults.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_hm_tau_hmresults.txt")
colnames(liver.mouse.eQTL.bayesian)<-c( "ensembl_id", "betas.hat","betas.tieda")
head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat betas.tieda
## 1 ENSMUSG00000000001 0.07366667  0.07525859
## 2 ENSMUSG00000000037 0.01177273  0.01397654
## 3 ENSMUSG00000000049 0.07875000  0.07883044
## 4 ENSMUSG00000000056 0.24430000  0.17973373
## 5 ENSMUSG00000000058 0.11090000  0.10963860
## 6 ENSMUSG00000000085 0.01910000  0.02024762

# merge dataset with betas.hat and betas.tieda
liver.mouse.eQTL.bayesian.all<- merge(liver.mouse.eQTL.bayesian, merged.mouse.eQTL.min.variance2, by = "ensembl_id")
head(liver.mouse.eQTL.bayesian.all)

##           ensembl_id  betas.hat betas.tieda lung_pvalue    lung.beta
## 1 ENSMUSG00000000001 0.07366667  0.07525859   0.1592383 -0.059800000
## 2 ENSMUSG00000000037 0.01177273  0.01397654   0.8635565 -0.005911765
## 3 ENSMUSG00000000049 0.07875000  0.07883044   0.8965131 -0.023660714
## 4 ENSMUSG00000000056 0.24430000  0.17973373   0.6496243  0.016446429
## 5 ENSMUSG00000000058 0.11090000  0.10963860   0.3141176 -0.078136752
## 6 ENSMUSG00000000085 0.01910000  0.02024762   0.3630755  0.052847059
##   lung.beta_se liver_pvalue  liver.beta liver.beta_se abs_liver.beta
## 1   0.04174596  0.190827641 -0.07366667    0.05337765     0.07366667
## 2   0.03419566  0.755289094 -0.01177273    0.03698251     0.01177273
## 3   0.18083869  0.006815612  0.07875000    0.02452159     0.07875000
## 4   0.03594950  0.153365833 -0.24430000    0.16111096     0.24430000
## 5   0.07671439  0.260604313  0.11090000    0.09428655     0.11090000
## 6   0.05748721  0.452489862 -0.01910000    0.02466095     0.01910000
##   abs_lung.beta neg_log_lung_pvalue
## 1   0.059800000          0.79795236
## 2   0.005911765          0.06370924
## 3   0.023660714          0.04744338
## 4   0.016446429          0.18733775
## 5   0.078136752          0.50290766
## 6   0.052847059          0.44000309

write.table(liver.mouse.eQTL.bayesian.all,file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
liver.mouse.eQTL.bayesian<-read.table(file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat betas.tieda lung_pvalue    lung.beta
## 1 ENSMUSG00000000001 0.07366667  0.07525859   0.1592383 -0.059800000
## 2 ENSMUSG00000000037 0.01177273  0.01397654   0.8635565 -0.005911765
## 3 ENSMUSG00000000049 0.07875000  0.07883044   0.8965131 -0.023660714
## 4 ENSMUSG00000000056 0.24430000  0.17973373   0.6496243  0.016446429
## 5 ENSMUSG00000000058 0.11090000  0.10963860   0.3141176 -0.078136752
## 6 ENSMUSG00000000085 0.01910000  0.02024762   0.3630755  0.052847059
##   lung.beta_se liver_pvalue  liver.beta liver.beta_se abs_liver.beta
## 1   0.04174596  0.190827641 -0.07366667    0.05337765     0.07366667
## 2   0.03419566  0.755289094 -0.01177273    0.03698251     0.01177273
## 3   0.18083869  0.006815612  0.07875000    0.02452159     0.07875000
## 4   0.03594950  0.153365833 -0.24430000    0.16111096     0.24430000
## 5   0.07671439  0.260604313  0.11090000    0.09428655     0.11090000
## 6   0.05748721  0.452489862 -0.01910000    0.02466095     0.01910000
##   abs_lung.beta neg_log_lung_pvalue
## 1   0.059800000          0.79795236
## 2   0.005911765          0.06370924
## 3   0.023660714          0.04744338
## 4   0.016446429          0.18733775
## 5   0.078136752          0.50290766
## 6   0.052847059          0.44000309

liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", 
                                                                        "liver.beta_se", "betas.tieda", 
                                                                         "liver_pvalue", "abs_lung.beta",
                                                                        "abs_lung.beta", "neg_log_lung_pvalue"))

head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat liver.beta_se betas.tieda liver_pvalue
## 1 ENSMUSG00000000001 0.07366667    0.05337765  0.07525859  0.190827641
## 2 ENSMUSG00000000037 0.01177273    0.03698251  0.01397654  0.755289094
## 3 ENSMUSG00000000049 0.07875000    0.02452159  0.07883044  0.006815612
## 4 ENSMUSG00000000056 0.24430000    0.16111096  0.17973373  0.153365833
## 5 ENSMUSG00000000058 0.11090000    0.09428655  0.10963860  0.260604313
## 6 ENSMUSG00000000085 0.01910000    0.02466095  0.02024762  0.452489862
##   abs_lung.beta abs_lung.beta.1 neg_log_lung_pvalue
## 1   0.059800000     0.059800000          0.79795236
## 2   0.005911765     0.005911765          0.06370924
## 3   0.023660714     0.023660714          0.04744338
## 4   0.016446429     0.016446429          0.18733775
## 5   0.078136752     0.078136752          0.50290766
## 6   0.052847059     0.052847059          0.44000309

# Caculate variance for beta.tieda by following Brian Kulis' lecture notes
# Invert Tau and V
Tau_invert<-diag.inverse(Tau)
V_invert<-diag.inverse(V)
PS_invert<-Tau_invert + V_invert

# PS_invert<-Tau_invert+V_invert%*% Z  %*% Z_transpose # previous wrong code
# S in Brian Kulis' lecture note:PS
PS <- diag.inverse(PS_invert)
# retrieve posterior variance 
ps<-diag(PS)
range(ps)

## [1] 0.0002063788 0.0380352366

library(reshape)

## 
## Attaching package: 'reshape'

## The following objects are masked from 'package:plyr':
## 
##     rename, round_any

## The following object is masked from 'package:data.table':
## 
##     melt

# reshape posterior variance to long format 
ps.long <- melt(ps)
head(ps.long)

##          value
## 1 0.0026574907
## 2 0.0013219343
## 3 0.0005922921
## 4 0.0156637851
## 5 0.0072567664
## 6 0.0005989409

# Caculate sd: square root on variance
ps.long$betas.tieda.se<-(ps.long$value)^0.5
# combine sd to the data.frame
liver.mouse.eQTL.bayesian<-cbind(liver.mouse.eQTL.bayesian,ps.long$betas.tieda.se)
# head(liver.mouse.eQTL.bayesian)
# rename betas.tieda.se
liver.mouse.eQTL.bayesian<-rename(liver.mouse.eQTL.bayesian, c("ps.long$betas.tieda.se"="betas.tieda.se", "liver.beta_se"="betas.hat.se"))

liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", "betas.hat.se", 
                                                                        "betas.tieda", "betas.tieda.se",                                                                         
                                                                        "liver_pvalue", "abs_lung.beta", "neg_log_lung_pvalue"))
# head(liver.mouse.eQTL.bayesian)

# library(tigerstats)
# pnormGC(0, region="below", mean=0.002352829,sd=0.09972950)

# caculate probability of betas.tieda below 0 based on betas.tieda and standard deviation
liver.mouse.eQTL.bayesian$p.below.0 <- pnorm(0,liver.mouse.eQTL.bayesian$betas.tieda, liver.mouse.eQTL.bayesian$betas.tieda.se)
head(liver.mouse.eQTL.bayesian)

##           ensembl_id  betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667   0.05337765  0.07525859     0.05155085
## 2 ENSMUSG00000000037 0.01177273   0.03698251  0.01397654     0.03635841
## 3 ENSMUSG00000000049 0.07875000   0.02452159  0.07883044     0.02433705
## 4 ENSMUSG00000000056 0.24430000   0.16111096  0.17973373     0.12515504
## 5 ENSMUSG00000000058 0.11090000   0.09428655  0.10963860     0.08518666
## 6 ENSMUSG00000000085 0.01910000   0.02466095  0.02024762     0.02447327
##   liver_pvalue abs_lung.beta neg_log_lung_pvalue   p.below.0
## 1  0.190827641   0.059800000          0.79795236 0.072160112
## 2  0.755289094   0.005911765          0.06370924 0.350337299
## 3  0.006815612   0.023660714          0.04744338 0.000599512
## 4  0.153365833   0.016446429          0.18733775 0.075488568
## 5  0.260604313   0.078136752          0.50290766 0.099040250
## 6  0.452489862   0.052847059          0.44000309 0.204023322

dim(liver.mouse.eQTL.bayesian)

## [1] 10422     9

summary(liver.mouse.eQTL.bayesian$betas.tieda.se)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01437 0.03937 0.05316 0.06185 0.07581 0.19500

range(liver.mouse.eQTL.bayesian$p.below.0)

## [1] 0.0000000 0.4820141

write.table(liver.mouse.eQTL.bayesian,file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
liver.mouse.eQTL.bayesian <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
# head(liver.mouse.eQTL.bayesian)
# summary(liver.mouse.eQTL.bayesian$liver_residual_variance)
liver.mouse.eQTL.bayesian.4tau <- liver.mouse.eQTL.bayesian


# colnames(liver.mouse.eQTL.bayesian.4tau) <- c("ensembl_id", "betas.hat", "betas.hat.se", "betas.tieda", "betas.tieda.se", "liver_residual_variance", "liver_pvalue", "abs_lung.beta", 
#                                              "neg_log_lung_pvalue", "p.below.0", "betas.tieda2m", "betas.tieda3rd", "betas.tieda4max")
# head(liver.mouse.eQTL.bayesian.4tau)

# Introduce weight (Tmm) to adjust Tau with neg_log_lung_pvalue
liver.mouse.eQTL.bayesian.4tau$fzm <- liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue
# caculate ratio_fzm
liver.mouse.eQTL.bayesian.4tau$ratio_fzm <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/liver.mouse.eQTL.bayesian.4tau$fzm
range(liver.mouse.eQTL.bayesian.4tau$ratio_fzm)

## [1]   1 Inf

# set up threshold for ratio_fzm and caculate updated ratio_fzm (nratio_fzm)
threshold <- 0.05
liver.mouse.eQTL.bayesian.4tau$nratio_fzm <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$ratio_fzm > max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))] <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))
# liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$nratio_fzm >= max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log(threshold))] <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
summary(liver.mouse.eQTL.bayesian.4tau$nratio_fzm)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   24.16   35.84   29.18   35.84   35.84

# compare bayesian prediction to the traditional method 
# evaluate the predition with alle specific expreesion in the liver: Sandrine Lagarrigue's paper
liver.ASE <- read.csv(file= "ASE.genetics.113.153882-6.csv")
dim(liver.ASE)

## [1] 1191   19

head(liver.ASE)

##           replicate chr startExon        geneID SNPperExon3
## 1 M.CH. DxB and BxD   1   9535488          Rrs1          15
## 2  M.HF DxB and BxD   1   9535488          Rrs1          14
## 3  F.HF DxB and BxD   1   9535488          Rrs1          15
## 4 M.CH. DxB and BxD   1  37473929 6330578E17Rik           2
## 5 M.CH. DxB and BxD   1  58169979          Aox3           3
## 6  M.HF DxB and BxD   1  58169979          Aox3           3
##   sumBperExon.DxB4 sumDperExon.DxB4 sumBperExon.BxD4 sumDperExon.BxD4
## 1               45               19               50               25
## 2               74               39               66               30
## 3               76               20               77               40
## 4               78               32               47               17
## 5              473               82              225               27
## 6              252               56              263               53
##   FCadd1.DxB5 FCadd1.BxD5 BonBD.DxB6 BonBD.BxD6 pvalBH.DxB7 pvalBH.BxD7
## 1        2.30        1.96       0.70       0.67     1.0e-02     3.7e-02
## 2        1.88        2.16       0.65       0.69     1.1e-02     3.3e-03
## 3        3.67        1.90       0.79       0.66     4.9e-07     5.1e-03
## 4        2.39        2.67       0.71       0.73     1.9e-04     2.2e-03
## 5        5.71        8.07       0.85       0.89     3.6e-64     5.4e-37
## 6        4.44        4.89       0.82       0.83     8.7e-28     1.1e-31
##   UTR5 UTR3 strand exonCount
## 1    0    0      +         1
## 2    0    0      +         1
## 3    0    0      +         1
## 4    0    0      -         3
## 5    0    0      +        35
## 6    0    0      +        35

# 440 unique gene ID
length(unique(liver.ASE$geneID))

## [1] 440

# verify ASE table
liver.ASE1 <- liver.ASE[which(liver.ASE$replicate == "M.CH. DxB and BxD"), ]
liver.ASE2 <- liver.ASE[which(liver.ASE$replicate == "M.HF DxB and BxD"), ]
liver.ASE3 <- liver.ASE[which(liver.ASE$replicate == "F.HF DxB and BxD"), ]
length(unique(liver.ASE1$geneID))

## [1] 272

length(unique(liver.ASE2$geneID))

## [1] 275

length(unique(liver.ASE3$geneID))

## [1] 304

(length(unique(liver.ASE1$geneID))+length(unique(liver.ASE2$geneID))+length(unique(liver.ASE3$geneID)))/3

## [1] 283.6667

# As claimed in the paper: averaged 284 ASE for each replicate
sub.liver.ASE <-liver.ASE1
summary(sub.liver.ASE$pvalBH.DxB7)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0000058 0.0084070 0.0031000 0.1000000

sub.liver.ASE1 <- subset(sub.liver.ASE, pvalBH.DxB7 < 0.00000000000001)
sub.liver.ASE2 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.00000000000001 & pvalBH.DxB7 < 0.0000058)
sub.liver.ASE3 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0000058 & pvalBH.DxB7 < 0.0031000)
sub.liver.ASE4 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0031000)
dim(sub.liver.ASE1)

## [1] 89 19

dim(sub.liver.ASE2)

## [1] 97 19

dim(sub.liver.ASE3)

## [1] 93 19

dim(sub.liver.ASE4)

## [1] 94 19

# Subset liver ASE with different conditions
# sub.liver.ASE <-liver.ASE[which(liver.ASE$pvalBH.DxB7 < 0.05 & liver.ASE$pvalBH.BxD7 < 0.05), ]
# sub.liver.ASE <- sub.liver.ASE[order(sub.liver.ASE$pvalBH.BxD7), ]
# summary(sub.liver.ASE$pvalBH.BxD7)
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.DxB7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.BxD7 <= 9.0e-10 ), ]

# sub.liver.ASE <- sub.liver.ASE[ sub.liver.ASE$geneID %in%  names(table(sub.liver.ASE$geneID))[table(sub.liver.ASE$geneID) >1] , ]
# check the remain gene number after subsetting
dim(sub.liver.ASE)

## [1] 373  19

liver.ASE.symbol <- unique(sub.liver.ASE$geneID)
liver.ASE.symbol1 <- unique(sub.liver.ASE1$geneID)
liver.ASE.symbol2 <- unique(sub.liver.ASE2$geneID)
liver.ASE.symbol3 <- unique(sub.liver.ASE3$geneID)
liver.ASE.symbol4 <- unique(sub.liver.ASE4$geneID)
length(liver.ASE.symbol)

## [1] 272

# Annoate gene symbol wiht ensemble.ID
library(biomaRt)
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
liver.ASE.ensembl <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                              "mgi_symbol", values =liver.ASE.symbol, mart=mouse)

liver.ASE.ensembl1 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol1, mart=mouse)
liver.ASE.ensembl2 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol2, mart=mouse)
liver.ASE.ensembl3 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol3, mart=mouse)
liver.ASE.ensembl4 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                               "mgi_symbol", values =liver.ASE.symbol4, mart=mouse)
dim(liver.ASE.ensembl)

## [1] 241   2

liver.ASE.ensembl <- unique(liver.ASE.ensembl)
# delete liver ASE ensemble ID which are not in the liver.mouse.eQTL.bayesian data frame
liver.ASE.ensembl <- liver.ASE.ensembl[liver.ASE.ensembl$ensembl_gene_id %in% liver.mouse.eQTL.bayesian.4tau$ensembl_id, ]
dim(liver.ASE.ensembl)

## [1] 190   2

# create indicator for ASE true or not
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl1$ensembl_gene_id] <- 1
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl2$ensembl_gene_id] <- 2
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl3$ensembl_gene_id] <- 3
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl4$ensembl_gene_id] <- 4
# liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 5
 liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 1
 liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 0

summary(liver.mouse.eQTL.bayesian.4tau$eqtl)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.01823 0.00000 1.00000

liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue <- -log10(liver.mouse.eQTL.bayesian.4tau$liver_pvalue)
head(liver.mouse.eQTL.bayesian.4tau)

##           ensembl_id  betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667   0.05337765  0.07525859     0.05155085
## 2 ENSMUSG00000000037 0.01177273   0.03698251  0.01397654     0.03635841
## 3 ENSMUSG00000000049 0.07875000   0.02452159  0.07883044     0.02433705
## 4 ENSMUSG00000000056 0.24430000   0.16111096  0.17973373     0.12515504
## 5 ENSMUSG00000000058 0.11090000   0.09428655  0.10963860     0.08518666
## 6 ENSMUSG00000000085 0.01910000   0.02466095  0.02024762     0.02447327
##   liver_pvalue abs_lung.beta neg_log_lung_pvalue   p.below.0        fzm
## 1  0.190827641   0.059800000          0.79795236 0.072160112 0.79795236
## 2  0.755289094   0.005911765          0.06370924 0.350337299 0.06370924
## 3  0.006815612   0.023660714          0.04744338 0.000599512 0.04744338
## 4  0.153365833   0.016446429          0.18733775 0.075488568 0.18733775
## 5  0.260604313   0.078136752          0.50290766 0.099040250 0.50290766
## 6  0.452489862   0.052847059          0.44000309 0.204023322 0.44000309
##   ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1  58.43301   35.83834    0            0.7193587
## 2 731.86805   35.83834    0            0.1218868
## 3 982.78748   35.83834    0            2.1664952
## 4 248.89142   35.83834    0            0.8142714
## 5  92.71435   35.83834    0            0.5840184
## 6 105.96916   35.83834    0            0.3443911

by(liver.mouse.eQTL.bayesian.4tau[, c(1, 7, 9, 14)], liver.mouse.eQTL.bayesian.4tau[, "eqtl"], summary)

## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 0
##               ensembl_id    abs_lung.beta       p.below.0      
##  ENSMUSG00000000001:    1   Min.   :0.00000   Min.   :0.00000  
##  ENSMUSG00000000037:    1   1st Qu.:0.03640   1st Qu.:0.01960  
##  ENSMUSG00000000049:    1   Median :0.07032   Median :0.09616  
##  ENSMUSG00000000056:    1   Mean   :0.13375   Mean   :0.13291  
##  ENSMUSG00000000058:    1   3rd Qu.:0.14167   3rd Qu.:0.21888  
##  ENSMUSG00000000085:    1   Max.   :4.17678   Max.   :0.48201  
##  (Other)           :10226                                      
##  neg_log_liver_pvalue
##  Min.   : 0.0000     
##  1st Qu.: 0.2730     
##  Median : 0.6021     
##  Mean   : 0.9621     
##  3rd Qu.: 1.1737     
##  Max.   :15.6916     
##                      
## -------------------------------------------------------- 
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 1
##               ensembl_id  abs_lung.beta        p.below.0        
##  ENSMUSG00000000275:  1   Min.   :0.001944   Min.   :0.0000000  
##  ENSMUSG00000000673:  1   1st Qu.:0.081911   1st Qu.:0.0000000  
##  ENSMUSG00000001467:  1   Median :0.180165   Median :0.0004881  
##  ENSMUSG00000001473:  1   Mean   :0.363517   Mean   :0.0412863  
##  ENSMUSG00000001604:  1   3rd Qu.:0.376020   3rd Qu.:0.0278189  
##  ENSMUSG00000002395:  1   Max.   :5.852455   Max.   :0.3579107  
##  (Other)           :184                                         
##  neg_log_liver_pvalue
##  Min.   : 0.04328    
##  1st Qu.: 0.83625    
##  Median : 2.37521    
##  Mean   : 3.16619    
##  3rd Qu.: 4.66122    
##  Max.   :13.80969    
##

library(ggplot2)
boxplot(neg_log_liver_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="liver.mouse.eQTL", 
        xlab="ASE cutoff by p value", ylab="liver neg log p")

boxplot(neg_log_lung_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="lung.mouse.eQTL", 
        xlab="ASE cutoff by p value", ylab="lung neg log p")

liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]

plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl), xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue" )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue, , col="red", xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$betas.hat, liver.mouse.eQTL.bayesian.4tau$betas.tieda,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$betas.hat, liver.mouse.eQTL.bayesian.4tau.ase$betas.tieda, col="red") 
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

cor(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue)

## [1] 0.4786611

length(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue)

## [1] 190

plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$p.below.0,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$p.below.0, col="red") 
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

head(liver.mouse.eQTL.bayesian.4tau)

##           ensembl_id  betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667   0.05337765  0.07525859     0.05155085
## 2 ENSMUSG00000000037 0.01177273   0.03698251  0.01397654     0.03635841
## 3 ENSMUSG00000000049 0.07875000   0.02452159  0.07883044     0.02433705
## 4 ENSMUSG00000000056 0.24430000   0.16111096  0.17973373     0.12515504
## 5 ENSMUSG00000000058 0.11090000   0.09428655  0.10963860     0.08518666
## 6 ENSMUSG00000000085 0.01910000   0.02466095  0.02024762     0.02447327
##   liver_pvalue abs_lung.beta neg_log_lung_pvalue   p.below.0        fzm
## 1  0.190827641   0.059800000          0.79795236 0.072160112 0.79795236
## 2  0.755289094   0.005911765          0.06370924 0.350337299 0.06370924
## 3  0.006815612   0.023660714          0.04744338 0.000599512 0.04744338
## 4  0.153365833   0.016446429          0.18733775 0.075488568 0.18733775
## 5  0.260604313   0.078136752          0.50290766 0.099040250 0.50290766
## 6  0.452489862   0.052847059          0.44000309 0.204023322 0.44000309
##   ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1  58.43301   35.83834    0            0.7193587
## 2 731.86805   35.83834    0            0.1218868
## 3 982.78748   35.83834    0            2.1664952
## 4 248.89142   35.83834    0            0.8142714
## 5  92.71435   35.83834    0            0.5840184
## 6 105.96916   35.83834    0            0.3443911

# Optimizing rho and adjust the weight
library(reshape)
rho.optimization <- matrix(0, nrow=nrow(liver.mouse.eQTL.bayesian.4tau), ncol=7)
colnames(rho.optimization)<-c("rho","tmm","tau", "omega","beta_tieda", "n.betas.tieda.se","p.below.0" )
nomega.diag<-diag(omega )
rho <- seq(1,1.1, by=0.02)*tau # tau = 0.03940381

result <- NULL
for (i in 1:length(rho))  {
  rho.optimization[ ,1] <- rho[i]
  rho.optimization[ ,2] <- (rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm
  rho.optimization[ ,3] <-tau*((rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
  nTau<- diag(rho.optimization[ ,3], rowLength, rowLength)
  ns<-V + nTau
  nS <- diag.inverse(ns)
  nomega<-diag.multi(nS, V)
  # nomega <- diag(0, rowLength, rowLength) # set nomega to 0 for code checking
  # nomega <- diag(1, rowLength, rowLength) # set nomega to 1 for code checking
  rho.optimization[ ,4] <- diag(nomega )
  rho.optimization[ ,5] <- nomega %*% Z %*% gamma + (identity-nomega) %*% betas.hat
  nTau_invert<-diag.inverse(nTau)
  V_invert<-diag.inverse(V)
  nPS_invert<-nTau_invert+ V_invert
  # nPS_invert<-nTau_invert+ diag.multi(diag.multi(V_invert, Z_transpose),  Z) # previous wrong code
  nPS<-diag.inverse(nPS_invert)
  nps<-diag(nPS)
  nps.long <- melt(nps)
  rho.optimization[ ,6] <-(nps.long$value)^0.5
  rho.optimization[ ,7] <- pnorm(0, rho.optimization[ ,5], rho.optimization[ ,6])
  result <- rbind(result,rho.optimization)
}

dim(result)

## [1] 62532     7

head(result)

##             rho tmm        tau      omega beta_tieda n.betas.tieda.se
## [1,] 0.03950088   1 0.03950088 0.06727674 0.07525859       0.05155085
## [2,] 0.03950088   1 0.03950088 0.03346594 0.01397654       0.03635841
## [3,] 0.03950088   1 0.03950088 0.01499440 0.07883044       0.02433705
## [4,] 0.03950088   1 0.03950088 0.39654268 0.17973373       0.12515504
## [5,] 0.03950088   1 0.03950088 0.18371151 0.10963860       0.08518666
## [6,] 0.03950088   1 0.03950088 0.01516272 0.02024762       0.02447327
##        p.below.0
## [1,] 0.072160112
## [2,] 0.350337299
## [3,] 0.000599512
## [4,] 0.075488568
## [5,] 0.099040250
## [6,] 0.204023322

write.table(result, file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt",col.names=TRUE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian.result <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt",  header=T)

result.df <-liver.mouse.eQTL.bayesian.result
result.df$rho.class <- factor(result.df$rho/tau)
# combine liver.mouse.eqtl.bayesian and rho.optimization.result for ploting
a <-liver.mouse.eQTL.bayesian.4tau[, c(1:2, 6, 7)]
a <-rbind(a, a, a, a, a, a)
dim(a)

## [1] 62532     4

new.result.df<-cbind(a, result.df)
head(new.result.df)

##           ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho tmm
## 1 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.03950088   1
## 2 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.03950088   1
## 3 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.03950088   1
## 4 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.03950088   1
## 5 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.03950088   1
## 6 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.03950088   1
##          tau      omega beta_tieda n.betas.tieda.se   p.below.0 rho.class
## 1 0.03950088 0.06727674 0.07525859       0.05155085 0.072160112         1
## 2 0.03950088 0.03346594 0.01397654       0.03635841 0.350337299         1
## 3 0.03950088 0.01499440 0.07883044       0.02433705 0.000599512         1
## 4 0.03950088 0.39654268 0.17973373       0.12515504 0.075488568         1
## 5 0.03950088 0.18371151 0.10963860       0.08518666 0.099040250         1
## 6 0.03950088 0.01516272 0.02024762       0.02447327 0.204023322         1

new.result.df2 <- new.result.df
head(new.result.df$rho.class)

## [1] 1 1 1 1 1 1
## Levels: 1 1.02 1.04 1.06 1.08 1.1

by(new.result.df2, new.result.df2[, "rho.class"], head)

## new.result.df2[, "rho.class"]: 1
##           ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho tmm
## 1 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.03950088   1
## 2 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.03950088   1
## 3 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.03950088   1
## 4 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.03950088   1
## 5 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.03950088   1
## 6 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.03950088   1
##          tau      omega beta_tieda n.betas.tieda.se   p.below.0 rho.class
## 1 0.03950088 0.06727674 0.07525859       0.05155085 0.072160112         1
## 2 0.03950088 0.03346594 0.01397654       0.03635841 0.350337299         1
## 3 0.03950088 0.01499440 0.07883044       0.02433705 0.000599512         1
## 4 0.03950088 0.39654268 0.17973373       0.12515504 0.075488568         1
## 5 0.03950088 0.18371151 0.10963860       0.08518666 0.099040250         1
## 6 0.03950088 0.01516272 0.02024762       0.02447327 0.204023322         1
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.02
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta       rho
## 11000 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.0402909
## 21000 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.0402909
## 31000 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.0402909
## 41000 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.0402909
## 51000 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.0402909
## 61000 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.0402909
##            tmm        tau       omega beta_tieda n.betas.tieda.se
## 11000 2.033368 0.08031981 0.034257649 0.07447728       0.05245539
## 21000 2.033368 0.08031981 0.016743144 0.01287530       0.03667160
## 31000 2.033368 0.08031981 0.007430797 0.07878987       0.02443031
## 41000 2.033368 0.08031981 0.244237706 0.20453248       0.14006115
## 51000 2.033368 0.08031981 0.099652245 0.11021577       0.08946535
## 61000 2.033368 0.08031981 0.007514859 0.01966878       0.02456811
##          p.below.0 rho.class
## 11000 0.0778298406      1.02
## 21000 0.3627576457      1.02
## 31000 0.0006296736      1.02
## 41000 0.0721026583      1.02
## 51000 0.1089861262      1.02
## 61000 0.2116869787      1.02
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.04
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho
## 11002 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.04108092
## 21002 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.04108092
## 31002 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.04108092
## 41002 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.04108092
## 51002 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.04108092
## 61002 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.04108092
##            tmm       tau       omega beta_tieda n.betas.tieda.se
## 11002 4.077994 0.1610844 0.017380054 0.07407792       0.05291177
## 21002 4.077994 0.1610844 0.008419134 0.01232715       0.03682650
## 31002 4.077994 0.1610844 0.003718996 0.07876995       0.02447595
## 41002 4.077994 0.1610844 0.138775590 0.22170414       0.14951448
## 51002 4.077994 0.1610844 0.052301741 0.11054088       0.09178776
## 61002 4.077994 0.1610844 0.003761227 0.01938467       0.02461453
##          p.below.0 rho.class
## 11002 0.0807525640      1.04
## 21002 0.3689121997      1.04
## 31002 0.0006448559      1.04
## 41002 0.0690601555      1.04
## 51002 0.1142349624      1.04
## 61002 0.2154858658      1.04
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.06
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho
## 11004 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.04187093
## 21004 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.04187093
## 31004 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.04187093
## 41004 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.04187093
## 51004 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.04187093
## 61004 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.04187093
##            tmm       tau       omega beta_tieda n.betas.tieda.se
## 11004 8.070868 0.3188064 0.008857841 0.07387626       0.05314072
## 21004 8.070868 0.3188064 0.004271757 0.01205403       0.03690343
## 31004 8.070868 0.3188064 0.001882573 0.07876010       0.02449850
## 41004 8.070868 0.3188064 0.075288621 0.23204128       0.15492738
## 51004 8.070868 0.3188064 0.027128631 0.11071373       0.09299882
## 61004 8.070868 0.3188064 0.001903991 0.01924411       0.02463746
##          p.below.0 rho.class
## 11004 0.0822339814      1.06
## 21004 0.3719711664      1.06
## 31004 0.0006524609      1.06
## 41004 0.0671001186      1.06
## 51004 0.1169278548      1.06
## 61004 0.2173743737      1.06
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.08
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho
## 11006 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.04266095
## 21006 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.04266095
## 31006 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.04266095
## 41006 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.04266095
## 51006 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.04266095
## 61006 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.04266095
##            tmm      tau        omega beta_tieda n.betas.tieda.se
## 11006 15.77074 0.622958 0.0045527985 0.07377440       0.05325601
## 21006 15.77074 0.622958 0.0021906929 0.01191699       0.03694198
## 31006 15.77074 0.622958 0.0009643163 0.07875517       0.02450976
## 41006 15.77074 0.622958 0.0400002352 0.23778704       0.15785584
## 51006 15.77074 0.622958 0.0140697672 0.11080339       0.09362090
## 61006 15.77074 0.622958 0.0009752972 0.01917382       0.02464892
##         p.below.0 rho.class
## 11006 0.082983669      1.08
## 21006 0.373504140      1.08
## 31006 0.000656287      1.08
## 41006 0.065987941      1.08
## 51006 0.118299061      1.08
## 61006 0.218320878      1.08
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.1
##               ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho
## 11008 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.04345097
## 21008 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.04345097
## 31008 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.04345097
## 41008 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.04345097
## 51008 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.04345097
## 61008 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.04345097
##            tmm      tau        omega beta_tieda n.betas.tieda.se
## 11008 30.44004 1.202408 0.0023639545 0.07372260       0.05331452
## 21008 30.44004 1.202408 0.0011361797 0.01184755       0.03696149
## 31008 30.44004 1.202408 0.0004998367 0.07875268       0.02451546
## 41008 30.44004 1.202408 0.0211311295 0.24085937       0.15939964
## 51008 30.44004 1.202408 0.0073391942 0.11084961       0.09393992
## 61008 30.44004 1.202408 0.0005055312 0.01913826       0.02465471
##          p.below.0 rho.class
## 11008 0.0833651717       1.1
## 21008 0.3742804206       1.1
## 31008 0.0006582282       1.1
## 41008 0.0653890239       1.1
## 51008 0.1189990373       1.1
## 61008 0.2188002013       1.1

# choose different rho class for plotting 
new.result.df2.rho1 <- new.result.df2[new.result.df2$rho.class == 1, ]
head(new.result.df2.rho1)

##           ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho tmm
## 1 ENSMUSG00000000001 0.07366667  0.190827641   0.059800000 0.03950088   1
## 2 ENSMUSG00000000037 0.01177273  0.755289094   0.005911765 0.03950088   1
## 3 ENSMUSG00000000049 0.07875000  0.006815612   0.023660714 0.03950088   1
## 4 ENSMUSG00000000056 0.24430000  0.153365833   0.016446429 0.03950088   1
## 5 ENSMUSG00000000058 0.11090000  0.260604313   0.078136752 0.03950088   1
## 6 ENSMUSG00000000085 0.01910000  0.452489862   0.052847059 0.03950088   1
##          tau      omega beta_tieda n.betas.tieda.se   p.below.0 rho.class
## 1 0.03950088 0.06727674 0.07525859       0.05155085 0.072160112         1
## 2 0.03950088 0.03346594 0.01397654       0.03635841 0.350337299         1
## 3 0.03950088 0.01499440 0.07883044       0.02433705 0.000599512         1
## 4 0.03950088 0.39654268 0.17973373       0.12515504 0.075488568         1
## 5 0.03950088 0.18371151 0.10963860       0.08518666 0.099040250         1
## 6 0.03950088 0.01516272 0.02024762       0.02447327 0.204023322         1

tail(new.result.df2.rho1)

##               ensembl_id  betas.hat liver_pvalue abs_lung.beta        rho
## 10417 ENSMUSG00000099041 0.16160714   0.10867553    0.10059091 0.03950088
## 10418 ENSMUSG00000099083 0.10340000   0.06630646    0.12891000 0.03950088
## 10419 ENSMUSG00000099116 0.01975000   0.68385715    0.03457738 0.03950088
## 10420 ENSMUSG00000099164 0.10844444   0.12006199    0.08603846 0.03950088
## 10421 ENSMUSG00000099262 0.01105556   0.85807655    0.18600427 0.03950088
## 10422 ENSMUSG00000099305 0.05450000   0.17299524    0.06246296 0.03950088
##       tmm        tau      omega beta_tieda n.betas.tieda.se  p.below.0
## 10417   1 0.03950088 0.18223979 0.15261122       0.08484475 0.03603218
## 10418   1 0.03950088 0.06311306 0.10461169       0.04993017 0.01807838
## 10419   1 0.03950088 0.05386470 0.02343200       0.04612703 0.30573070
## 10420   1 0.03950088 0.09710749 0.10829670       0.06193409 0.04018183
## 10421   1 0.03950088 0.08508424 0.02232237       0.05797329 0.35010178
## 10422   1 0.03950088 0.03490647 0.05602900       0.03713268 0.06566391
##       rho.class
## 10417         1
## 10418         1
## 10419         1
## 10420         1
## 10421         1
## 10422         1

# rank with p.below.0: bayesian modeling
new.result.df2.rho1$rank.p.below.0 <- rank(new.result.df2.rho1$p.below.0)
# rank with liver p value: traditionsl linear regression (one step regression)
new.result.df2.rho1$rank.liver.pvalue <- rank(new.result.df2.rho1$liver_pvalue)
new.result.df2.rho1 <- new.result.df2.rho1[order(new.result.df2.rho1$rank.p.below.0), ]
head(new.result.df2.rho1)

##               ensembl_id betas.hat liver_pvalue abs_lung.beta        rho
## 2524  ENSMUSG00000022680  4.255833 2.034364e-16     4.1767833 0.03950088
## 10012 ENSMUSG00000073411  5.304083 1.549934e-14     5.8524545 0.03950088
## 4677  ENSMUSG00000028656  2.154341 5.245966e-14     2.9349028 0.03950088
## 9191  ENSMUSG00000057132  2.426200 7.421942e-14     3.0932000 0.03950088
## 2758  ENSMUSG00000023791  1.198018 4.042683e-13     0.4850804 0.03950088
## 3125  ENSMUSG00000024735  2.858722 3.352312e-13     1.1153500 0.03950088
##       tmm        tau      omega beta_tieda n.betas.tieda.se     p.below.0
## 2524    1 0.03950088 0.14728921   3.865053       0.07627617  0.000000e+00
## 10012   1 0.03950088 0.34419291   4.240976       0.11660156 6.004650e-290
## 4677    1 0.03950088 0.09472791   2.059069       0.06117055 1.072073e-248
## 9191    1 0.03950088 0.12286551   2.276338       0.06966560 1.757179e-234
## 2758    1 0.03950088 0.04258612   1.157766       0.04101450 1.317748e-175
## 3125    1 0.03950088 0.19741575   2.389774       0.08830683 1.373629e-161
##       rho.class rank.p.below.0 rank.liver.pvalue
## 2524          1              1                 1
## 10012         1              2                 2
## 4677          1              3                 3
## 9191          1              4                 4
## 2758          1              5                 6
## 3125          1              6                 5

# caculate TPR: true positive rate
# caculate PPV: positive predictive rate
result.rho1 <- matrix(, nrow(new.result.df2.rho1), 8)
colnames(result.rho1)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR" )
for (i in 1:nrow(new.result.df2.rho1))
{
  newdata1.rho1 <- subset(new.result.df2.rho1, rank.p.below.0 <= i)
  overlap.newdata1.rho1 <- newdata1.rho1[newdata1.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  result.rho1[i, 1] <- i
  result.rho1[i, 2] <- nrow(overlap.newdata1.rho1)/nrow(newdata1.rho1)
  result.rho1[i, 3] <- nrow(overlap.newdata1.rho1)/nrow(liver.ASE.ensembl)
  newdata2.rho1 <- subset(new.result.df2.rho1, rank.liver.pvalue <= i)
  overlap.newdata2.rho1 <- newdata2.rho1[newdata2.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  result.rho1[i, 5] <- i
  result.rho1[i, 6] <- nrow(overlap.newdata2.rho1)/nrow(newdata2.rho1)
  result.rho1[i, 7] <- nrow(overlap.newdata2.rho1)/nrow(liver.ASE.ensembl)
}  
head(result.rho1)

##      bayrank    bayppv     bay_TPR bay_FPR orirank    orippv     ori_TPR
## [1,]       1 0.0000000 0.000000000      NA       1 0.0000000 0.000000000
## [2,]       2 0.5000000 0.005263158      NA       2 0.5000000 0.005263158
## [3,]       3 0.3333333 0.005263158      NA       3 0.3333333 0.005263158
## [4,]       4 0.2500000 0.005263158      NA       4 0.2500000 0.005263158
## [5,]       5 0.2000000 0.005263158      NA       5 0.4000000 0.010526316
## [6,]       6 0.3333333 0.010526316      NA       6 0.3333333 0.010526316
##      ori_FPR
## [1,]      NA
## [2,]      NA
## [3,]      NA
## [4,]      NA
## [5,]      NA
## [6,]      NA

tail(result.rho1)

##          bayrank     bayppv bay_TPR bay_FPR orirank     orippv ori_TPR
## [10417,]   10417 0.01823942       1      NA   10417 0.01823942       1
## [10418,]   10418 0.01823767       1      NA   10418 0.01823767       1
## [10419,]   10419 0.01823592       1      NA   10419 0.01823592       1
## [10420,]   10420 0.01823417       1      NA   10420 0.01823417       1
## [10421,]   10421 0.01823242       1      NA   10421 0.01823242       1
## [10422,]   10422 0.01823067       1      NA   10422 0.01823067       1
##          ori_FPR
## [10417,]      NA
## [10418,]      NA
## [10419,]      NA
## [10420,]      NA
## [10421,]      NA
## [10422,]      NA

# ploting "True positive rate"

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 0.4) , xlim=c(0, 300))
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# ploting "positive predictive value"
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt",  header=T)
head(MTeQTLs)

##         SNP       gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at            0           0       0.9260306
## 2 rs6365999 1424963_at            0           0       0.9260306
## 3 rs6376963 1424963_at            0           0       0.9306985
## 4 rs3677817 1424963_at            0           0       0.9329117
## 5 rs6269442 1424964_at            0           0       0.9438114
## 6 rs6365999 1424964_at            0           0       0.9438114
##   marginalP.lung
## 1      0.9027543
## 2      0.9027543
## 3      0.9104129
## 4      0.9140456
## 5      0.9219186
## 6      0.9219186

mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt",  header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")


# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt",  header=T)
head(MTeQTLs)

##         SNP       gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at            0           0       0.9260306
## 2 rs6365999 1424963_at            0           0       0.9260306
## 3 rs6376963 1424963_at            0           0       0.9306985
## 4 rs3677817 1424963_at            0           0       0.9329117
## 5 rs6269442 1424964_at            0           0       0.9438114
## 6 rs6365999 1424964_at            0           0       0.9438114
##   marginalP.lung
## 1      0.9027543
## 2      0.9027543
## 3      0.9104129
## 4      0.9140456
## 5      0.9219186
## 6      0.9219186

mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt",  header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
MTeQTLs.min <- data.table(MTeQTLs, key=c('ensembl_id', "marginalP.Liver"))
MTeQTLs.min <-MTeQTLs.min[J(unique(ensembl_id)),mult="first"]
merged.eQTL <- merge(new.result.df2.rho1, MTeQTLs.min, by ="ensembl_id")
merged.eQTL$rank.marginalP.Liver <- rank(merged.eQTL$marginalP.Liver)

merged.result <- matrix(, nrow(merged.eQTL), 12)
colnames(merged.result)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR", "MTrank","MTppv","MT_TPR","MT_FPR" )
for (i in 1:nrow(merged.eQTL))
{
  newdata1 <- subset(merged.eQTL, rank.p.below.0 <= i)
  overlap.newdata1 <- newdata1[newdata1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  merged.result[i, 1] <- i
  merged.result[i, 2] <- nrow(overlap.newdata1)/nrow(newdata1)
  merged.result[i, 3] <- nrow(overlap.newdata1)/nrow(liver.ASE.ensembl)
  merged.result[i, 4] <- (nrow(newdata1)-nrow(overlap.newdata1)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
   
  newdata2 <- subset(merged.eQTL, rank.liver.pvalue <= i)
  overlap.newdata2 <- newdata2[newdata2$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  merged.result[i, 5] <- i
  merged.result[i, 6] <- nrow(overlap.newdata2)/nrow(newdata2)
  merged.result[i, 7] <- nrow(overlap.newdata2)/nrow(liver.ASE.ensembl)
  merged.result[i, 8] <- (nrow(newdata2)-nrow(overlap.newdata2)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
  
  
  newdata3 <- subset(merged.eQTL, rank.liver.pvalue <= i)
  overlap.newdata3 <- newdata3[newdata3$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  merged.result[i, 9] <- i
  merged.result[i, 10] <- nrow(overlap.newdata3)/nrow(newdata3)
  merged.result[i, 11] <- nrow(overlap.newdata3)/nrow(liver.ASE.ensembl)
  merged.result[i, 12] <- (nrow(newdata3)-nrow(overlap.newdata3)) /(nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
  
} 

head(merged.result)

##      bayrank    bayppv     bay_TPR      bay_FPR orirank    orippv
## [1,]       1 0.0000000 0.000000000 0.0000977326       1 0.0000000
## [2,]       2 0.5000000 0.005263158 0.0000977326       2 0.5000000
## [3,]       3 0.3333333 0.005263158 0.0001954652       3 0.3333333
## [4,]       4 0.2500000 0.005263158 0.0002931978       4 0.2500000
## [5,]       5 0.2000000 0.005263158 0.0003909304       5 0.4000000
## [6,]       6 0.3333333 0.010526316 0.0003909304       6 0.3333333
##          ori_TPR      ori_FPR MTrank     MTppv      MT_TPR       MT_FPR
## [1,] 0.000000000 0.0000977326      1 0.0000000 0.000000000 0.0000977326
## [2,] 0.005263158 0.0000977326      2 0.5000000 0.005263158 0.0000977326
## [3,] 0.005263158 0.0001954652      3 0.3333333 0.005263158 0.0001954652
## [4,] 0.005263158 0.0002931978      4 0.2500000 0.005263158 0.0002931978
## [5,] 0.010526316 0.0002931978      5 0.4000000 0.010526316 0.0002931978
## [6,] 0.010526316 0.0003909304      6 0.3333333 0.010526316 0.0003909304

tail(merged.result)

##          bayrank     bayppv bay_TPR   bay_FPR orirank     orippv ori_TPR
## [10417,]   10417 0.01823942       1 0.9995113   10417 0.01823942       1
## [10418,]   10418 0.01823767       1 0.9996091   10418 0.01823767       1
## [10419,]   10419 0.01823592       1 0.9997068   10419 0.01823592       1
## [10420,]   10420 0.01823417       1 0.9998045   10420 0.01823417       1
## [10421,]   10421 0.01823242       1 0.9999023   10421 0.01823242       1
## [10422,]   10422 0.01823067       1 1.0000000   10422 0.01823067       1
##            ori_FPR MTrank      MTppv MT_TPR    MT_FPR
## [10417,] 0.9995113  10417 0.01823942      1 0.9995113
## [10418,] 0.9996091  10418 0.01823767      1 0.9996091
## [10419,] 0.9997068  10419 0.01823592      1 0.9997068
## [10420,] 0.9998045  10420 0.01823417      1 0.9998045
## [10421,] 0.9999023  10421 0.01823242      1 0.9999023
## [10422,] 1.0000000  10422 0.01823067      1 1.0000000

# ploting "True positive rate"

plot(merged.result[, 1], merged.result[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 1], merged.result[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1),  xlim=c(0, 300))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1),  xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 4], merged.result[, 3], type="l", col="red",  xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 8], merged.result[, 7], type="l", col="green", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 12], merged.result[, 11], type="l", col="blue", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)
title(main = "ROC curve")