Figure 1. Weight effect of rho

Weight effect of rho

Figure 2. Weight effect of lung P value

Weight effect of lung P value

rm(list = ls())
gc()
##          used (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells 309130 16.6     592000    31.7     460000    24.6
## Vcells 532883  4.1 2375110180 18120.7 2184544708 16666.8
# set the working directory
setwd("/Volumes/Transcend/Thesis")
merged.mouse.eQTL.min.variance2<-read.table(file="2016-04-22-merged.mouse.eQTL.min.variance.txt",  header=T)
head(merged.mouse.eQTL.min.variance2)
##           ensembl_id   liver.beta liver.beta_se liver_residual_variance
## 1 ENSMUSG00000000001 -0.015986111   0.025624670             0.018910763
## 2 ENSMUSG00000000003  0.016880682   0.012653279             0.003757142
## 3 ENSMUSG00000000037 -0.003865079   0.013629289             0.004681090
## 4 ENSMUSG00000000049  0.042233333   0.007808461             0.001829162
## 5 ENSMUSG00000000056  0.063604072   0.051481198             0.078095912
## 6 ENSMUSG00000000058 -0.025611111   0.012982317             0.004853968
##   lung_pvalue   lung.beta lung.beta_se liver_pvalue
## 1  0.12293526 -0.03269000   0.02077547 5.377718e-01
## 2  0.06295461  0.02954960   0.01547936 1.929225e-01
## 3  0.30681836 -0.01759477   0.01701272 7.788138e-01
## 4  0.79280526 -0.02388393   0.09036397 9.084753e-06
## 5  0.21831618  0.02167500   0.01734983 2.269175e-01
## 6  0.29250998  0.02885470   0.02707600 5.846922e-02
# caculate the absolute value of live/lung beta
merged.mouse.eQTL.min.variance2$abs_liver.beta<-abs(merged.mouse.eQTL.min.variance2$liver.beta)
merged.mouse.eQTL.min.variance2$abs_lung.beta<-abs(merged.mouse.eQTL.min.variance2$lung.beta)
# caculate negative log lung p value
merged.mouse.eQTL.min.variance2$neg_log_lung_pvalue<--log10(merged.mouse.eQTL.min.variance2$lung_pvalue)

# Simple linear regression between abs_liver.beta and abs_lung.beta
# fit1<-summary(lm(abs_liver.beta ~ abs_lung.beta, data=merged.mouse.eQTL.min.variance2))
# fit1
# tau<-fit1$sigma**2
# check association between abs_liver.beta and abs.lung.beta
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
ggplot(merged.mouse.eQTL.min.variance2, aes(x=abs_lung.beta, y=abs_liver.beta)) +geom_point()+geom_smooth(method=lm)

cor(merged.mouse.eQTL.min.variance2$abs_lung.beta, merged.mouse.eQTL.min.variance2$abs_liver.beta)
## [1] 0.3984727
merged.mouse.eQTL<-merged.mouse.eQTL.min.variance2
# retrieve ensembl_id
markers<-merged.mouse.eQTL[, 1]
# Yg=Ag + Bg*Xsnp+V
# retrieve betas.hat (liver.beta)
betas.hat<-abs(merged.mouse.eQTL[,2])
# retrieve liver.beta_se
se<-(merged.mouse.eQTL[,3])
# retrieve liver_residua_variance
liver_residual_variance<-merged.mouse.eQTL[,4]
# creat Z matrix with 2 columns: 1 for intercept,abs_lung.beta (merged.mouse.eQTL[,10])
Z<-as.matrix(merged.mouse.eQTL[,10])
Z<-replace(Z,is.na(Z),0)
Z<-data.frame(1,Z) 
Z<-as.matrix(Z)

rowLength<-length(markers)
# liver.betas=Z*gama+T^2

# Regression: abs_liver.beta = intercept + beta*abs_lung.beta + error 
lmsummary<-summary(lm(abs_liver.beta~-1+Z, data=merged.mouse.eQTL))
lmsummary
## 
## Call:
## lm(formula = abs_liver.beta ~ -1 + Z, data = merged.mouse.eQTL)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59768 -0.03182 -0.01870  0.00623  2.04049 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)    
## ZX1 0.026589   0.001119   23.76   <2e-16 ***
## ZZ  0.489214   0.010733   45.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09849 on 11007 degrees of freedom
## Multiple R-squared:  0.3304, Adjusted R-squared:  0.3303 
## F-statistic:  2716 on 2 and 11007 DF,  p-value: < 2.2e-16
# error ~ N(0, Tau)
tau<-lmsummary$sigma**2
tau
## [1] 0.009699749
# output coeffieients (gamma matrix)
outputVector<-c(lmsummary$coefficients[,1],lmsummary$coefficients[,2])
write.table(matrix(outputVector,length(Z[1,])),file="2016-04-26_hm_tau_coefficients with intercept.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
hm_tau_coefficients<-read.table(file="2016-04-26_hm_tau_coefficients with intercept.txt",  header=T)
# gamma matrix
gamma<-as.matrix(lmsummary$coefficients[,1])
# trasnpose Z matrix
Z_transpose<-t(Z)
# create identity matrix
identity<-diag(nrow=rowLength) 
# original betas.hat
betas.hat<-as.matrix(betas.hat)
#creat V matrix for liver_residual_variance
V <- matrix(0, rowLength, rowLength)
# V, liver residual variance
diag(V) <-liver_residual_variance 
# Creat Tau matrix
Tau<- diag(tau, rowLength, rowLength)
# follow Chen's paper and cacualte s
s <-V + Tau
# create inverse function for inversing diagnoal matrix 

diag.inverse <- function(x){diag(1/diag(x), nrow(x), ncol(x))}
# create multiplication function for multiplicating two diagnoal matrix 
diag.multi <- function(x,y){diag(diag(x)*diag(y), nrow(x), ncol(x))}
# inverse s
S <-diag.inverse(s)
# follow chen's paper to caculate omega
omega<-diag.multi(S, V)
# retrieve omega value from the matrix 
omega.diag<-diag(omega )
# summary the omega value
summary(omega.diag)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.09131 0.36400 0.52520 0.54380 0.70830 0.99760
# betas.thea<- S %*% Z %*% gamma + (identity-S) %*% betas.hat
# caculate betas.tieda with the formula in Chen's paper
betas.tieda<- omega %*% Z %*% gamma + (identity-omega) %*% betas.hat
# crbetas.tieda<- cromega %*% Z %*% gamma + (identity-cromega) %*% betas.hat
head(betas.tieda)
##            [,1]
## [1,] 0.03356494
## [2,] 0.02362737
## [3,] 0.01406380
## [4,] 0.04160507
## [5,] 0.04011077
## [6,] 0.03064532
head(betas.hat)
##             [,1]
## [1,] 0.015986111
## [2,] 0.016880682
## [3,] 0.003865079
## [4,] 0.042233333
## [5,] 0.063604072
## [6,] 0.025611111
#regression beta
regbeta <-Z %*% gamma
head(regbeta)
##            [,1]
## [1,] 0.04258151
## [2,] 0.04104519
## [3,] 0.03519672
## [4,] 0.03827346
## [5,] 0.03719283
## [6,] 0.04070523
# summary(regbeta)

markers1<-as.character(markers)
# combine ensemble_id, betas.hat and betas.tieda
outputVector<-c(markers1,betas.hat,betas.tieda)
write.table(matrix(outputVector,rowLength),file="2016-04-26_hm_tau_hmresults.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_hm_tau_hmresults.txt")
colnames(liver.mouse.eQTL.bayesian)<-c( "ensembl_id", "betas.hat","betas.tieda")
head(liver.mouse.eQTL.bayesian)
##           ensembl_id   betas.hat betas.tieda
## 1 ENSMUSG00000000001 0.015986111  0.03356494
## 2 ENSMUSG00000000003 0.016880682  0.02362737
## 3 ENSMUSG00000000037 0.003865079  0.01406380
## 4 ENSMUSG00000000049 0.042233333  0.04160507
## 5 ENSMUSG00000000056 0.063604072  0.04011077
## 6 ENSMUSG00000000058 0.025611111  0.03064532
# merge dataset with betas.hat and betas.tieda
liver.mouse.eQTL.bayesian.all<- merge(liver.mouse.eQTL.bayesian, merged.mouse.eQTL.min.variance2, by = "ensembl_id")
head(liver.mouse.eQTL.bayesian.all)
##           ensembl_id   betas.hat betas.tieda   liver.beta liver.beta_se
## 1 ENSMUSG00000000001 0.015986111  0.03356494 -0.015986111   0.025624670
## 2 ENSMUSG00000000003 0.016880682  0.02362737  0.016880682   0.012653279
## 3 ENSMUSG00000000037 0.003865079  0.01406380 -0.003865079   0.013629289
## 4 ENSMUSG00000000049 0.042233333  0.04160507  0.042233333   0.007808461
## 5 ENSMUSG00000000056 0.063604072  0.04011077  0.063604072   0.051481198
## 6 ENSMUSG00000000058 0.025611111  0.03064532 -0.025611111   0.012982317
##   liver_residual_variance lung_pvalue   lung.beta lung.beta_se
## 1             0.018910763  0.12293526 -0.03269000   0.02077547
## 2             0.003757142  0.06295461  0.02954960   0.01547936
## 3             0.004681090  0.30681836 -0.01759477   0.01701272
## 4             0.001829162  0.79280526 -0.02388393   0.09036397
## 5             0.078095912  0.21831618  0.02167500   0.01734983
## 6             0.004853968  0.29250998  0.02885470   0.02707600
##   liver_pvalue abs_liver.beta abs_lung.beta neg_log_lung_pvalue
## 1 5.377718e-01    0.015986111    0.03269000           0.9103235
## 2 1.929225e-01    0.016880682    0.02954960           1.2009724
## 3 7.788138e-01    0.003865079    0.01759477           0.5131187
## 4 9.084753e-06    0.042233333    0.02388393           0.1008335
## 5 2.269175e-01    0.063604072    0.02167500           0.6609141
## 6 5.846922e-02    0.025611111    0.02885470           0.5338593
write.table(liver.mouse.eQTL.bayesian.all,file="2016-04-26_liver.mouse.eQTL.bayesian.all.txt")
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_liver.mouse.eQTL.bayesian.all.txt")
head(liver.mouse.eQTL.bayesian)
##           ensembl_id   betas.hat betas.tieda   liver.beta liver.beta_se
## 1 ENSMUSG00000000001 0.015986111  0.03356494 -0.015986111   0.025624670
## 2 ENSMUSG00000000003 0.016880682  0.02362737  0.016880682   0.012653279
## 3 ENSMUSG00000000037 0.003865079  0.01406380 -0.003865079   0.013629289
## 4 ENSMUSG00000000049 0.042233333  0.04160507  0.042233333   0.007808461
## 5 ENSMUSG00000000056 0.063604072  0.04011077  0.063604072   0.051481198
## 6 ENSMUSG00000000058 0.025611111  0.03064532 -0.025611111   0.012982317
##   liver_residual_variance lung_pvalue   lung.beta lung.beta_se
## 1             0.018910763  0.12293526 -0.03269000   0.02077547
## 2             0.003757142  0.06295461  0.02954960   0.01547936
## 3             0.004681090  0.30681836 -0.01759477   0.01701272
## 4             0.001829162  0.79280526 -0.02388393   0.09036397
## 5             0.078095912  0.21831618  0.02167500   0.01734983
## 6             0.004853968  0.29250998  0.02885470   0.02707600
##   liver_pvalue abs_liver.beta abs_lung.beta neg_log_lung_pvalue
## 1 5.377718e-01    0.015986111    0.03269000           0.9103235
## 2 1.929225e-01    0.016880682    0.02954960           1.2009724
## 3 7.788138e-01    0.003865079    0.01759477           0.5131187
## 4 9.084753e-06    0.042233333    0.02388393           0.1008335
## 5 2.269175e-01    0.063604072    0.02167500           0.6609141
## 6 5.846922e-02    0.025611111    0.02885470           0.5338593
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", 
                                                                        "liver.beta_se", "betas.tieda", 
                                                                        "liver_residual_variance", "liver_pvalue", "abs_lung.beta",
                                                                        "abs_lung.beta", "neg_log_lung_pvalue"))

head(liver.mouse.eQTL.bayesian)
##           ensembl_id   betas.hat liver.beta_se betas.tieda
## 1 ENSMUSG00000000001 0.015986111   0.025624670  0.03356494
## 2 ENSMUSG00000000003 0.016880682   0.012653279  0.02362737
## 3 ENSMUSG00000000037 0.003865079   0.013629289  0.01406380
## 4 ENSMUSG00000000049 0.042233333   0.007808461  0.04160507
## 5 ENSMUSG00000000056 0.063604072   0.051481198  0.04011077
## 6 ENSMUSG00000000058 0.025611111   0.012982317  0.03064532
##   liver_residual_variance liver_pvalue abs_lung.beta abs_lung.beta.1
## 1             0.018910763 5.377718e-01    0.03269000      0.03269000
## 2             0.003757142 1.929225e-01    0.02954960      0.02954960
## 3             0.004681090 7.788138e-01    0.01759477      0.01759477
## 4             0.001829162 9.084753e-06    0.02388393      0.02388393
## 5             0.078095912 2.269175e-01    0.02167500      0.02167500
## 6             0.004853968 5.846922e-02    0.02885470      0.02885470
##   neg_log_lung_pvalue
## 1           0.9103235
## 2           1.2009724
## 3           0.5131187
## 4           0.1008335
## 5           0.6609141
## 6           0.5338593
# Caculate variance for beta.tieda by following Brian Kulis' lecture notes
# Invert Tau and V
Tau_invert<-diag.inverse(Tau)
V_invert<-diag.inverse(V)
# Retrieve x information (SNP correspondting to the pair of gene and SNP with minimum P value)
snps <-read.table(file="2016-04-21-mouse.SNP.min.txt",  header=T)
head(snps)
##               snps BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19
## 1       rs13477320    0     2     2     0     0     2     0     0     0
## 2   CEL-X_71438949    0     0     0     0     0     0     0     0     0
## 3  CEL-X_154048891    0     0     0     2     0     2     0     0     2
## 4        rs3704454    2     0     0     0     0     0     0     2     2
## 5 CEL-11_121219118    2     0     2     2     0     2     2     0     0
## 6        rs3655269    0     2     2     2     2     0     2     2     0
##   BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34
## 1    2     0     2     0      0     0     2     0     0     0     2     2
## 2    0     2     0     0      0     0     2     0     0     2     0     0
## 3    0     0     0     2      2     2     0     0     0     2     0     0
## 4    2     2     2     2      2     0     0     0     0     2     0     2
## 5    2     0     2     2      2     2     0     0     0     2     0     0
## 6    0     0     0     2      2     2     2     0     0     2     2     2
##   BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1     2     2     2     0     0    0    0    2    0
## 2     0     2     0     2     2    0    2    0    2
## 3     0     2     0     0     2    0    0    0    0
## 4     0     0     0     0     2    2    2    2    2
## 5     2     0     0     0     2    2    2    2    2
## 6     0     0     0     2     2    0    2    2    2
nrow(snps)
## [1] 11009
# caculate XtX
xtxlist <- NULL
for (i in 1:nrow(snps))  {
  x <- matrix(as.numeric(snps[i, 2:31]),nrow = 30,ncol = 1)
  # not sure how to deal with NA
  # here I ajust them to 0
  x[is.na(x)] <- 0
  xtx <-t(x) %*% x
  xtxlist <- c(xtxlist, xtx)}

head(xtxlist) 
## [1] 48 32 36 60 68 72
length(xtxlist)
## [1] 11009
xtx<- diag(xtxlist, rowLength, rowLength)

dim(xtx)
## [1] 11009 11009
xtx[1:6, 1:6]
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]   48    0    0    0    0    0
## [2,]    0   32    0    0    0    0
## [3,]    0    0   36    0    0    0
## [4,]    0    0    0   60    0    0
## [5,]    0    0    0    0   68    0
## [6,]    0    0    0    0    0   72
# corrected code to caculate bayesian variance
# S0^-1 in Brian Kulis' lecture note: Tau_invert
# 1/sigma^2 in Brian Kulis' lecture note: V_invert
# XTX in Brian Kulis' lecture note: xtx
PS_invert<-Tau_invert + diag.multi(V_invert, xtx)

# PS_invert<-Tau_invert+V_invert%*% Z  %*% Z_transpose # previous wrong code
# S in Brian Kulis' lecture note:PS
PS <- diag.inverse(PS_invert)
# retrieve posterior variance 
ps<-diag(PS)
range(ps)
## [1] 1.520574e-05 8.614716e-03
library(reshape)
# reshape posterior variance to long format 
ps.long <- melt(ps)
head(ps.long)
##          value
## 1 3.785968e-04
## 2 1.160065e-04
## 3 1.283102e-04
## 4 3.039052e-05
## 5 1.026884e-03
## 6 6.695090e-05
# Caculate sd: square root on variance
ps.long$betas.tieda.se<-(ps.long$value)^0.5
# combine sd to the data.frame
liver.mouse.eQTL.bayesian<-cbind(liver.mouse.eQTL.bayesian,ps.long$betas.tieda.se)
# head(liver.mouse.eQTL.bayesian)
# rename betas.tieda.se
liver.mouse.eQTL.bayesian<-rename(liver.mouse.eQTL.bayesian, c("ps.long$betas.tieda.se"="betas.tieda.se", "liver.beta_se"="betas.hat.se"))

liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", "betas.hat.se", 
                                                                        "betas.tieda", "betas.tieda.se", "liver_residual_variance",                                                                        
                                                                        "liver_pvalue", "abs_lung.beta", "neg_log_lung_pvalue"))
# head(liver.mouse.eQTL.bayesian)

# library(tigerstats)
# pnormGC(0, region="below", mean=0.002352829,sd=0.09972950)

# caculate probability of betas.tieda below 0 based on betas.tieda and standard deviation
liver.mouse.eQTL.bayesian$p.below.0 <- pnorm(0,liver.mouse.eQTL.bayesian$betas.tieda, liver.mouse.eQTL.bayesian$betas.tieda.se)
head(liver.mouse.eQTL.bayesian)
##           ensembl_id   betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.015986111  0.025624670  0.03356494    0.019457564
## 2 ENSMUSG00000000003 0.016880682  0.012653279  0.02362737    0.010770630
## 3 ENSMUSG00000000037 0.003865079  0.013629289  0.01406380    0.011327409
## 4 ENSMUSG00000000049 0.042233333  0.007808461  0.04160507    0.005512759
## 5 ENSMUSG00000000056 0.063604072  0.051481198  0.04011077    0.032045034
## 6 ENSMUSG00000000058 0.025611111  0.012982317  0.03064532    0.008182353
##   liver_residual_variance liver_pvalue abs_lung.beta neg_log_lung_pvalue
## 1             0.018910763 5.377718e-01    0.03269000           0.9103235
## 2             0.003757142 1.929225e-01    0.02954960           1.2009724
## 3             0.004681090 7.788138e-01    0.01759477           0.5131187
## 4             0.001829162 9.084753e-06    0.02388393           0.1008335
## 5             0.078095912 2.269175e-01    0.02167500           0.6609141
## 6             0.004853968 5.846922e-02    0.02885470           0.5338593
##      p.below.0
## 1 4.226075e-02
## 2 1.412903e-02
## 3 1.071971e-01
## 4 2.226153e-14
## 5 1.053396e-01
## 6 9.009137e-05
dim(liver.mouse.eQTL.bayesian)
## [1] 11009    10
summary(liver.mouse.eQTL.bayesian$betas.tieda.se)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.003899 0.009691 0.013470 0.016720 0.019920 0.092820
range(liver.mouse.eQTL.bayesian$p.below.0)
## [1] 0.0000000 0.3644177
write.table(liver.mouse.eQTL.bayesian,file="2016-04-26_liver.mouse.eQTL.bayesian with beta.txt")
liver.mouse.eQTL.bayesian <- read.table(file="2016-04-26_liver.mouse.eQTL.bayesian with beta.txt")
# head(liver.mouse.eQTL.bayesian)
# summary(liver.mouse.eQTL.bayesian$liver_residual_variance)
liver.mouse.eQTL.bayesian.4tau <- liver.mouse.eQTL.bayesian


# colnames(liver.mouse.eQTL.bayesian.4tau) <- c("ensembl_id", "betas.hat", "betas.hat.se", "betas.tieda", "betas.tieda.se", "liver_residual_variance", "liver_pvalue", "abs_lung.beta", 
#                                              "neg_log_lung_pvalue", "p.below.0", "betas.tieda2m", "betas.tieda3rd", "betas.tieda4max")
# head(liver.mouse.eQTL.bayesian.4tau)

# Introduce weight (Tmm) to adjust Tau with neg_log_lung_pvalue
liver.mouse.eQTL.bayesian.4tau$fzm <- liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue
# caculate ratio_fzm
liver.mouse.eQTL.bayesian.4tau$ratio_fzm <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/liver.mouse.eQTL.bayesian.4tau$fzm
range(liver.mouse.eQTL.bayesian.4tau$ratio_fzm)
## [1] 1.000000e+00 1.990653e+16
# set up threshold for ratio_fzm and caculate updated ratio_fzm (nratio_fzm)
threshold <- 0.05
liver.mouse.eQTL.bayesian.4tau$nratio_fzm <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$ratio_fzm > max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))] <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))
# liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$nratio_fzm >= max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log(threshold))] <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
summary(liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   11.23   13.28   11.53   13.28   13.28
# compare bayesian prediction to the traditional method 
# evaluate the predition with alle specific expreesion in the liver: Sandrine Lagarrigue's paper
liver.ASE <- read.csv(file= "ASE.genetics.113.153882-6.csv")
dim(liver.ASE)
## [1] 1191   19
head(liver.ASE)
##           replicate chr startExon        geneID SNPperExon3
## 1 M.CH. DxB and BxD   1   9535488          Rrs1          15
## 2  M.HF DxB and BxD   1   9535488          Rrs1          14
## 3  F.HF DxB and BxD   1   9535488          Rrs1          15
## 4 M.CH. DxB and BxD   1  37473929 6330578E17Rik           2
## 5 M.CH. DxB and BxD   1  58169979          Aox3           3
## 6  M.HF DxB and BxD   1  58169979          Aox3           3
##   sumBperExon.DxB4 sumDperExon.DxB4 sumBperExon.BxD4 sumDperExon.BxD4
## 1               45               19               50               25
## 2               74               39               66               30
## 3               76               20               77               40
## 4               78               32               47               17
## 5              473               82              225               27
## 6              252               56              263               53
##   FCadd1.DxB5 FCadd1.BxD5 BonBD.DxB6 BonBD.BxD6 pvalBH.DxB7 pvalBH.BxD7
## 1        2.30        1.96       0.70       0.67     1.0e-02     3.7e-02
## 2        1.88        2.16       0.65       0.69     1.1e-02     3.3e-03
## 3        3.67        1.90       0.79       0.66     4.9e-07     5.1e-03
## 4        2.39        2.67       0.71       0.73     1.9e-04     2.2e-03
## 5        5.71        8.07       0.85       0.89     3.6e-64     5.4e-37
## 6        4.44        4.89       0.82       0.83     8.7e-28     1.1e-31
##   UTR5 UTR3 strand exonCount
## 1    0    0      +         1
## 2    0    0      +         1
## 3    0    0      +         1
## 4    0    0      -         3
## 5    0    0      +        35
## 6    0    0      +        35
# 440 unique gene ID
length(unique(liver.ASE$geneID))
## [1] 440
# verify ASE table
liver.ASE1 <- liver.ASE[which(liver.ASE$replicate == "M.CH. DxB and BxD"), ]
liver.ASE2 <- liver.ASE[which(liver.ASE$replicate == "M.HF DxB and BxD"), ]
liver.ASE3 <- liver.ASE[which(liver.ASE$replicate == "F.HF DxB and BxD"), ]
length(unique(liver.ASE1$geneID))
## [1] 272
length(unique(liver.ASE2$geneID))
## [1] 275
length(unique(liver.ASE3$geneID))
## [1] 304
(length(unique(liver.ASE1$geneID))+length(unique(liver.ASE2$geneID))+length(unique(liver.ASE3$geneID)))/3
## [1] 283.6667
# As claimed in the paper: averaged 284 ASE for each replicate
sub.liver.ASE <-liver.ASE
# Subset liver ASE with different conditions
# sub.liver.ASE <-liver.ASE[which(liver.ASE$pvalBH.DxB7 < 0.05 & liver.ASE$pvalBH.BxD7 < 0.05), ]
# sub.liver.ASE <- sub.liver.ASE[order(sub.liver.ASE$pvalBH.BxD7), ]
# summary(sub.liver.ASE$pvalBH.BxD7)
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.DxB7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.BxD7 <= 9.0e-10 ), ]

# sub.liver.ASE <- sub.liver.ASE[ sub.liver.ASE$geneID %in%  names(table(sub.liver.ASE$geneID))[table(sub.liver.ASE$geneID) >1] , ]
# check the remain gene number after subsetting
dim(sub.liver.ASE)
## [1] 1191   19
liver.ASE.symbol <- unique(sub.liver.ASE$geneID)
length(liver.ASE.symbol)
## [1] 440
# Annoate gene symbol wiht ensemble.ID
library(biomaRt)
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
liver.ASE.ensembl <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
                              "mgi_symbol"    , values =liver.ASE.symbol ,mart=mouse)
dim(liver.ASE.ensembl)
## [1] 391   2
liver.ASE.ensembl <- unique(liver.ASE.ensembl)
# delete liver ASE ensemble ID which are not in the liver.mouse.eQTL.bayesian data frame
liver.ASE.ensembl <- liver.ASE.ensembl[liver.ASE.ensembl$ensembl_gene_id %in% liver.mouse.eQTL.bayesian.4tau$ensembl_id, ]
dim(liver.ASE.ensembl)
## [1] 319   2
# create indicator for ASE true or not
liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 1
liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 0
summary(liver.mouse.eQTL.bayesian.4tau$eqtl)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.02898 0.00000 1.00000
liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue <- -log10(liver.mouse.eQTL.bayesian.4tau$liver_pvalue)
head(liver.mouse.eQTL.bayesian.4tau)
##           ensembl_id   betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.015986111  0.025624670  0.03356494    0.019457564
## 2 ENSMUSG00000000003 0.016880682  0.012653279  0.02362737    0.010770630
## 3 ENSMUSG00000000037 0.003865079  0.013629289  0.01406380    0.011327409
## 4 ENSMUSG00000000049 0.042233333  0.007808461  0.04160507    0.005512759
## 5 ENSMUSG00000000056 0.063604072  0.051481198  0.04011077    0.032045034
## 6 ENSMUSG00000000058 0.025611111  0.012982317  0.03064532    0.008182353
##   liver_residual_variance liver_pvalue abs_lung.beta neg_log_lung_pvalue
## 1             0.018910763 5.377718e-01    0.03269000           0.9103235
## 2             0.003757142 1.929225e-01    0.02954960           1.2009724
## 3             0.004681090 7.788138e-01    0.01759477           0.5131187
## 4             0.001829162 9.084753e-06    0.02388393           0.1008335
## 5             0.078095912 2.269175e-01    0.02167500           0.6609141
## 6             0.004853968 5.846922e-02    0.02885470           0.5338593
##      p.below.0       fzm ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 4.226075e-02 0.9103235  18.97871    13.2793    0            0.2694020
## 2 1.412903e-02 1.2009724  14.38565    13.2793    0            0.7146171
## 3 1.071971e-01 0.5131187  33.67012    13.2793    0            0.1085664
## 4 2.226153e-14 0.1008335 171.33960    13.2793    0            5.0416869
## 5 1.053396e-01 0.6609141  26.14072    13.2793    0            0.6441321
## 6 9.009137e-05 0.5338593  32.36202    13.2793    0            1.2330727
by(liver.mouse.eQTL.bayesian.4tau[, c(1, 7, 9, 14)], liver.mouse.eQTL.bayesian.4tau[, "eqtl"], summary)
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 0
##               ensembl_id     liver_pvalue     neg_log_lung_pvalue
##  ENSMUSG00000000001:    1   Min.   :0.00000   Min.   : 0.0000    
##  ENSMUSG00000000003:    1   1st Qu.:0.04496   1st Qu.: 0.3454    
##  ENSMUSG00000000037:    1   Median :0.22711   Median : 0.7243    
##  ENSMUSG00000000049:    1   Mean   :0.30496   Mean   : 1.2496    
##  ENSMUSG00000000056:    1   3rd Qu.:0.51563   3rd Qu.: 1.5044    
##  ENSMUSG00000000058:    1   Max.   :1.00000   Max.   :17.2768    
##  (Other)           :10684                                        
##       eqtl  
##  Min.   :0  
##  1st Qu.:0  
##  Median :0  
##  Mean   :0  
##  3rd Qu.:0  
##  Max.   :0  
##             
## -------------------------------------------------------- 
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 1
##               ensembl_id   liver_pvalue       neg_log_lung_pvalue
##  ENSMUSG00000000275:  1   Min.   :0.0000000   Min.   : 0.03236   
##  ENSMUSG00000000673:  1   1st Qu.:0.0000001   1st Qu.: 0.67845   
##  ENSMUSG00000001089:  1   Median :0.0003878   Median : 1.65562   
##  ENSMUSG00000001435:  1   Mean   :0.0925145   Mean   : 2.65142   
##  ENSMUSG00000001467:  1   3rd Qu.:0.0718551   3rd Qu.: 3.91680   
##  ENSMUSG00000001473:  1   Max.   :0.9764010   Max.   :13.23882   
##  (Other)           :313                                          
##       eqtl  
##  Min.   :1  
##  1st Qu.:1  
##  Median :1  
##  Mean   :1  
##  3rd Qu.:1  
##  Max.   :1  
## 
liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]

plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl), xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue" )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$betas.hat, liver.mouse.eQTL.bayesian.4tau$betas.tieda,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$betas.hat, liver.mouse.eQTL.bayesian.4tau.ase$betas.tieda, col="red") 
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

cor(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue)
## [1] 0.413809
length(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue)
## [1] 319
liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]
plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue, , col="red", xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$p.below.0, col="red") 
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$p.below.0,  col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

head(liver.mouse.eQTL.bayesian.4tau)
##           ensembl_id   betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.015986111  0.025624670  0.03356494    0.019457564
## 2 ENSMUSG00000000003 0.016880682  0.012653279  0.02362737    0.010770630
## 3 ENSMUSG00000000037 0.003865079  0.013629289  0.01406380    0.011327409
## 4 ENSMUSG00000000049 0.042233333  0.007808461  0.04160507    0.005512759
## 5 ENSMUSG00000000056 0.063604072  0.051481198  0.04011077    0.032045034
## 6 ENSMUSG00000000058 0.025611111  0.012982317  0.03064532    0.008182353
##   liver_residual_variance liver_pvalue abs_lung.beta neg_log_lung_pvalue
## 1             0.018910763 5.377718e-01    0.03269000           0.9103235
## 2             0.003757142 1.929225e-01    0.02954960           1.2009724
## 3             0.004681090 7.788138e-01    0.01759477           0.5131187
## 4             0.001829162 9.084753e-06    0.02388393           0.1008335
## 5             0.078095912 2.269175e-01    0.02167500           0.6609141
## 6             0.004853968 5.846922e-02    0.02885470           0.5338593
##      p.below.0       fzm ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 4.226075e-02 0.9103235  18.97871    13.2793    0            0.2694020
## 2 1.412903e-02 1.2009724  14.38565    13.2793    0            0.7146171
## 3 1.071971e-01 0.5131187  33.67012    13.2793    0            0.1085664
## 4 2.226153e-14 0.1008335 171.33960    13.2793    0            5.0416869
## 5 1.053396e-01 0.6609141  26.14072    13.2793    0            0.6441321
## 6 9.009137e-05 0.5338593  32.36202    13.2793    0            1.2330727
# Optimizing rho and adjust the weight
library(reshape)
rho.optimization <- matrix(0, nrow=nrow(liver.mouse.eQTL.bayesian.4tau), ncol=7)
colnames(rho.optimization)<-c("rho","tmm","tau", "omega","beta_tieda", "n.betas.tieda.se","p.below.0" )
nomega.diag<-diag(omega )
rho <- seq(1,1.1, by=0.02)*tau
rho
## [1] 0.009699749 0.009893744 0.010087739 0.010281734 0.010475729 0.010669724
result <- NULL
for (i in 1:length(rho))  {
  rho.optimization[ ,1] <- rho[i]
  rho.optimization[ ,2] <- (rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm
  rho.optimization[ ,3] <-tau*((rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
  nTau<- diag(rho.optimization[ ,3], rowLength, rowLength)
  ns<-V + nTau
  nS <- diag.inverse(ns)
  nomega<-diag.multi(nS, V)
  # nomega <- diag(0, rowLength, rowLength) # set nomega to 0 for code checking
  # nomega <- diag(1, rowLength, rowLength) # set nomega to 1 for code checking
  rho.optimization[ ,4] <- diag(nomega )
  rho.optimization[ ,5] <- nomega %*% Z %*% gamma + (identity-nomega) %*% betas.hat
  nTau_invert<-diag.inverse(nTau)
  V_invert<-diag.inverse(V)
  nPS_invert<-nTau_invert+ diag.multi(V_invert, xtx) 
  # nPS_invert<-nTau_invert+ diag.multi(diag.multi(V_invert, Z_transpose),  Z) # previous wrong code
  nPS<-diag.inverse(nPS_invert)
  nps<-diag(nPS)
  nps.long <- melt(nps)
  rho.optimization[ ,6] <-(nps.long$value)^0.5
  rho.optimization[ ,7] <- pnorm(0, rho.optimization[ ,5], rho.optimization[ ,6])
  result <- rbind(result,rho.optimization)
}

dim(result)
## [1] 66054     7
head(result)
##              rho tmm         tau     omega beta_tieda n.betas.tieda.se
## [1,] 0.009699749   1 0.009699749 0.6609725 0.03356494      0.019457564
## [2,] 0.009699749   1 0.009699749 0.2791983 0.02362737      0.010770630
## [3,] 0.009699749   1 0.009699749 0.3255088 0.01406380      0.011327409
## [4,] 0.009699749   1 0.009699749 0.1586587 0.04160507      0.005512759
## [5,] 0.009699749   1 0.009699749 0.8895190 0.04011077      0.032045034
## [6,] 0.009699749   1 0.009699749 0.3335208 0.03064532      0.008182353
##         p.below.0
## [1,] 4.226075e-02
## [2,] 1.412903e-02
## [3,] 1.071971e-01
## [4,] 2.226153e-14
## [5,] 1.053396e-01
## [6,] 9.009137e-05
write.table(result, file="2016-04-26_liver.mouse.eQTL.bayesian.result.txt",col.names=TRUE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian.result <- read.table(file="2016-04-26_liver.mouse.eQTL.bayesian.result.txt",  header=T)

result.df <-liver.mouse.eQTL.bayesian.result
result.df$rho.class <- factor(result.df$rho/tau)
# combine liver.mouse.eqtl.bayesian and rho.optimization.result for ploting
a <-liver.mouse.eQTL.bayesian.4tau[, c(1:2, 6, 7, 9)]
a <-rbind(a, a, a, a, a, a)
dim(a)
## [1] 66054     5
new.result.df<-cbind(a, result.df)
head(new.result.df)
##           ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 1 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 2 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 3 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 4 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 5 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 6 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##   neg_log_lung_pvalue         rho tmm         tau     omega beta_tieda
## 1           0.9103235 0.009699749   1 0.009699749 0.6609725 0.03356494
## 2           1.2009724 0.009699749   1 0.009699749 0.2791983 0.02362737
## 3           0.5131187 0.009699749   1 0.009699749 0.3255088 0.01406380
## 4           0.1008335 0.009699749   1 0.009699749 0.1586587 0.04160507
## 5           0.6609141 0.009699749   1 0.009699749 0.8895190 0.04011077
## 6           0.5338593 0.009699749   1 0.009699749 0.3335208 0.03064532
##   n.betas.tieda.se    p.below.0 rho.class
## 1      0.019457564 4.226075e-02         1
## 2      0.010770630 1.412903e-02         1
## 3      0.011327409 1.071971e-01         1
## 4      0.005512759 2.226153e-14         1
## 5      0.032045034 1.053396e-01         1
## 6      0.008182353 9.009137e-05         1
new.result.df2 <- new.result.df
head(new.result.df$rho.class)
## [1] 1 1 1 1 1 1
## Levels: 1 1.02 1.03999999999999 1.06 1.08 1.1
by(new.result.df2, new.result.df2[, "rho.class"], head)
## new.result.df2[, "rho.class"]: 1
##           ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 1 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 2 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 3 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 4 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 5 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 6 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##   neg_log_lung_pvalue         rho tmm         tau     omega beta_tieda
## 1           0.9103235 0.009699749   1 0.009699749 0.6609725 0.03356494
## 2           1.2009724 0.009699749   1 0.009699749 0.2791983 0.02362737
## 3           0.5131187 0.009699749   1 0.009699749 0.3255088 0.01406380
## 4           0.1008335 0.009699749   1 0.009699749 0.1586587 0.04160507
## 5           0.6609141 0.009699749   1 0.009699749 0.8895190 0.04011077
## 6           0.5338593 0.009699749   1 0.009699749 0.3335208 0.03064532
##   n.betas.tieda.se    p.below.0 rho.class
## 1      0.019457564 4.226075e-02         1
## 2      0.010770630 1.412903e-02         1
## 3      0.011327409 1.071971e-01         1
## 4      0.005512759 2.226153e-14         1
## 5      0.032045034 1.053396e-01         1
## 6      0.008182353 9.009137e-05         1
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.02
##               ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 11010 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 21000 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 31000 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 41000 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 51000 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 61000 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##       neg_log_lung_pvalue         rho      tmm        tau     omega
## 11010           0.9103235 0.009893744 1.300781 0.01261725 0.5998082
## 21000           1.2009724 0.009893744 1.300781 0.01261725 0.2294523
## 31000           0.5131187 0.009893744 1.300781 0.01261725 0.2706092
## 41000           0.1008335 0.009893744 1.300781 0.01261725 0.1266170
## 51000           0.6609141 0.009893744 1.300781 0.01261725 0.8609105
## 61000           0.5338593 0.009893744 1.300781 0.01261725 0.2778265
##       beta_tieda n.betas.tieda.se    p.below.0 rho.class
## 11010 0.03193825      0.019545968 5.112867e-02      1.02
## 21000 0.02242528      0.010785554 1.879967e-02      1.02
## 31000 0.01234371      0.011344773 1.382860e-01      1.02
## 41000 0.04173195      0.005514757 1.904978e-14      1.02
## 51000 0.04086635      0.032444613 1.039117e-01      1.02
## 61000 0.02980466      0.008188890 1.365068e-04      1.02
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.03999999999999
##               ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 11013 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 21002 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 31002 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 41002 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 51002 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 61002 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##       neg_log_lung_pvalue        rho      tmm        tau     omega
## 11013           0.9103235 0.01008774 1.683414 0.01632869 0.5366361
## 21002           1.2009724 0.01008774 1.683414 0.01632869 0.1870543
## 31002           0.5131187 0.01008774 1.683414 0.01632869 0.2228053
## 41002           0.1008335 0.01008774 1.683414 0.01632869 0.1007367
## 51002           0.6609141 0.01008774 1.683414 0.01632869 0.8270717
## 61002           0.5338593 0.01008774 1.683414 0.01632869 0.2291482
##       beta_tieda n.betas.tieda.se    p.below.0        rho.class
## 11013 0.03025816      0.019613579 6.144995e-02 1.03999999999999
## 21002 0.02140076      0.010796873 2.373260e-02 1.03999999999999
## 31002 0.01084593      0.011357948 1.698090e-01 1.03999999999999
## 41002 0.04183443      0.005516269 1.677537e-14 1.03999999999999
## 51002 0.04176008      0.032756685 1.011800e-01 1.03999999999999
## 61002 0.02906990      0.008193841 1.942501e-04 1.03999999999999
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.06
##               ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 11016 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 21004 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 31004 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 41004 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 51004 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 61004 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##       neg_log_lung_pvalue        rho      tmm        tau      omega
## 11016           0.9103235 0.01028173 2.167925 0.02102833 0.47349009
## 21004           1.2009724 0.01028173 2.167925 0.02102833 0.15158647
## 31004           0.5131187 0.01028173 2.167925 0.02102833 0.18207686
## 41004           0.1008335 0.01028173 2.167925 0.02102833 0.08002463
## 51004           0.6609141 0.01028173 2.167925 0.02102833 0.78785889
## 61004           0.5338593 0.01028173 2.167925 0.02102833 0.18754011
##        beta_tieda n.betas.tieda.se    p.below.0 rho.class
## 11016 0.028578771      0.019665419 7.307717e-02      1.06
## 21004 0.020543694      0.010805497 2.863619e-02      1.06
## 31004 0.009569846      0.011367988 1.999432e-01      1.06
## 41004 0.041916446      0.005517418 1.514050e-14      1.06
## 51004 0.042795737      0.032999902 9.734237e-02      1.06
## 61004 0.028441864      0.008197608 2.606830e-04      1.06
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.08
##               ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 11019 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 21006 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 31006 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 41006 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 51006 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 61006 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##       neg_log_lung_pvalue        rho      tmm        tau      omega
## 11019           0.9103235 0.01047573 2.778716 0.02695284 0.41232611
## 21006           1.2009724 0.01047573 2.778716 0.02695284 0.12234267
## 31006           0.5131187 0.01047573 2.778716 0.02695284 0.14797684
## 41006           0.1008335 0.01047573 2.778716 0.02695284 0.06355227
## 51006           0.6609141 0.01047573 2.778716 0.02695284 0.74342539
## 61006           0.5338593 0.01047573 2.778716 0.02695284 0.15260782
##        beta_tieda n.betas.tieda.se    p.below.0 rho.class
## 11019 0.026952090      0.019705289 8.569373e-02      1.08
## 21006 0.019837032      0.010812097 3.327454e-02      1.08
## 31006 0.008501437      0.011375674 2.274308e-01      1.08
## 41006 0.041981675      0.005518296 1.394817e-14      1.08
## 51006 0.043969281      0.033189345 9.261857e-02      1.08
## 61006 0.027914592      0.008200489 3.320149e-04      1.08
## -------------------------------------------------------- 
## new.result.df2[, "rho.class"]: 1.1
##               ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 11025 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 21008 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 31008 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 41008 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 51008 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 61008 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##       neg_log_lung_pvalue        rho      tmm        tau      omega
## 11025           0.9103235 0.01066972 3.545405 0.03438954 0.35479651
## 21008           1.2009724 0.01066972 3.545405 0.03438954 0.09849196
## 31008           0.5131187 0.01066972 3.545405 0.03438954 0.11981095
## 41008           0.1008335 0.01066972 3.545405 0.03438954 0.05050324
## 51008           0.6609141 0.01066972 3.545405 0.03438954 0.69427564
## 61008           0.5338593 0.01066972 3.545405 0.03438954 0.12368843
##        beta_tieda n.betas.tieda.se    p.below.0 rho.class
## 11025 0.025422067      0.019736056 9.885510e-02       1.1
## 21008 0.019260691      0.010817171 3.749166e-02       1.1
## 31008 0.007618953      0.011381584 2.516168e-01       1.1
## 41008 0.042033347      0.005518970 1.306690e-14       1.1
## 51008 0.045267387      0.033336986 8.725202e-02       1.1
## 61008 0.027478079      0.008202702 4.042309e-04       1.1
# choose different rho class for plotting 
new.result.df2.rho1 <- new.result.df2[new.result.df2$rho.class == 1, ]
head(new.result.df2.rho1)
##           ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 1 ENSMUSG00000000001 0.015986111             0.018910763 5.377718e-01
## 2 ENSMUSG00000000003 0.016880682             0.003757142 1.929225e-01
## 3 ENSMUSG00000000037 0.003865079             0.004681090 7.788138e-01
## 4 ENSMUSG00000000049 0.042233333             0.001829162 9.084753e-06
## 5 ENSMUSG00000000056 0.063604072             0.078095912 2.269175e-01
## 6 ENSMUSG00000000058 0.025611111             0.004853968 5.846922e-02
##   neg_log_lung_pvalue         rho tmm         tau     omega beta_tieda
## 1           0.9103235 0.009699749   1 0.009699749 0.6609725 0.03356494
## 2           1.2009724 0.009699749   1 0.009699749 0.2791983 0.02362737
## 3           0.5131187 0.009699749   1 0.009699749 0.3255088 0.01406380
## 4           0.1008335 0.009699749   1 0.009699749 0.1586587 0.04160507
## 5           0.6609141 0.009699749   1 0.009699749 0.8895190 0.04011077
## 6           0.5338593 0.009699749   1 0.009699749 0.3335208 0.03064532
##   n.betas.tieda.se    p.below.0 rho.class
## 1      0.019457564 4.226075e-02         1
## 2      0.010770630 1.412903e-02         1
## 3      0.011327409 1.071971e-01         1
## 4      0.005512759 2.226153e-14         1
## 5      0.032045034 1.053396e-01         1
## 6      0.008182353 9.009137e-05         1
tail(new.result.df2.rho1)
##               ensembl_id   betas.hat liver_residual_variance liver_pvalue
## 11004 ENSMUSG00000099041 0.062044643             0.021588736   0.02861902
## 11005 ENSMUSG00000099083 0.030113122             0.006648153   0.05474021
## 11006 ENSMUSG00000099116 0.009430556             0.005630947   0.50556296
## 11007 ENSMUSG00000099164 0.039370813             0.015532369   0.10653819
## 11008 ENSMUSG00000099262 0.007846890             0.010872458   0.69418700
## 11009 ENSMUSG00000099305 0.043763889             0.022984334   0.13257346
##       neg_log_lung_pvalue         rho tmm         tau     omega beta_tieda
## 11004           0.7276229 0.009699749   1 0.009699749 0.6899898 0.04676365
## 11005           1.7000754 0.009699749   1 0.009699749 0.4066670 0.03896957
## 11006           0.4066951 0.009699749   1 0.009699749 0.3672989 0.02064434
## 11007           0.5751693 0.009699749   1 0.009699749 0.6155793 0.04087628
## 11008           1.1744631 0.009699749   1 0.009699749 0.5285022 0.03353317
## 11009           1.1413112 0.009699749   1 0.009699749 0.7032271 0.03977079
##       n.betas.tieda.se    p.below.0 rho.class
## 11004      0.018055118 4.798103e-03         1
## 11005      0.009838260 3.731477e-05         1
## 11006      0.008808068 9.544239e-03         1
## 11007      0.014147650 1.930732e-03         1
## 11008      0.011873474 2.369873e-03         1
## 11009      0.017579977 1.184012e-02         1
library(ggplot2)
dev.off()
## null device 
##           1
# plot Tmm
qplot(neg_log_lung_pvalue,tmm, data = new.result.df2.rho1)
# rank with p.below.0: bayesian modeling
new.result.df2.rho1$rank.p.below.0 <- rank(new.result.df2.rho1$p.below.0)
# rank with liver p value: traditionsl linear regression (one step regression)
new.result.df2.rho1$rank.liver.pvalue <- rank(new.result.df2.rho1$liver_pvalue)
# caculate TPR: true positive rate
# caculate PPV: positive predictive rate
result.rho1 <- matrix(, nrow(new.result.df2.rho1), 8)
colnames(result.rho1)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR" )
for (i in 1:nrow(new.result.df2.rho1))
{
  newdata1.rho1 <- subset(new.result.df2.rho1, rank.p.below.0 <= i)
  overlap.newdata1.rho1 <- newdata1.rho1[newdata1.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  result.rho1[i, 1] <- i
  result.rho1[i, 2] <- nrow(overlap.newdata1.rho1)/nrow(newdata1.rho1)
  result.rho1[i, 3] <- nrow(overlap.newdata1.rho1)/nrow(liver.ASE.ensembl)
  newdata2.rho1 <- subset(new.result.df2.rho1, rank.liver.pvalue <= i)
  overlap.newdata2.rho1 <- newdata2.rho1[newdata2.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ] 
  result.rho1[i, 5] <- i
  result.rho1[i, 6] <- nrow(overlap.newdata2.rho1)/nrow(newdata2.rho1)
  result.rho1[i, 7] <- nrow(overlap.newdata2.rho1)/nrow(liver.ASE.ensembl)
}  
head(result.rho1)
##      bayrank bayppv     bay_TPR bay_FPR orirank    orippv     ori_TPR
## [1,]       1    NaN 0.000000000      NA       1 0.0000000 0.000000000
## [2,]       2    NaN 0.000000000      NA       2 0.5000000 0.003134796
## [3,]       3    NaN 0.000000000      NA       3 0.6666667 0.006269592
## [4,]       4    0.5 0.009404389      NA       4 0.5000000 0.006269592
## [5,]       5    0.5 0.009404389      NA       5 0.4000000 0.006269592
## [6,]       6    0.5 0.009404389      NA       6 0.5000000 0.009404389
##      ori_FPR
## [1,]      NA
## [2,]      NA
## [3,]      NA
## [4,]      NA
## [5,]      NA
## [6,]      NA
tail(result.rho1)
##          bayrank     bayppv bay_TPR bay_FPR orirank     orippv ori_TPR
## [11004,]   11004 0.02898946       1      NA   11004 0.02898946       1
## [11005,]   11005 0.02898682       1      NA   11005 0.02898682       1
## [11006,]   11006 0.02898419       1      NA   11006 0.02898419       1
## [11007,]   11007 0.02898156       1      NA   11007 0.02898156       1
## [11008,]   11008 0.02897892       1      NA   11008 0.02897892       1
## [11009,]   11009 0.02897629       1      NA   11009 0.02897629       1
##          ori_FPR
## [11004,]      NA
## [11005,]      NA
## [11006,]      NA
## [11007,]      NA
## [11008,]      NA
## [11009,]      NA
# ploting "True positive rate"
plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red",  xlab="Ranking", ylab="TPR", ylim=c(0, 0.4) , xlim=c(0, 300))
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# ploting "positive predictive value"
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)