Figure 2. Weight effect of lung P value

rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 309130 16.6 592000 31.7 460000 24.6
## Vcells 532883 4.1 2375110180 18120.7 2184544708 16666.8
# set the working directory
setwd("/Volumes/Transcend/Thesis")
merged.mouse.eQTL.min.variance2<-read.table(file="2016-04-22-merged.mouse.eQTL.min.variance.txt", header=T)
head(merged.mouse.eQTL.min.variance2)
## ensembl_id liver.beta liver.beta_se liver_residual_variance
## 1 ENSMUSG00000000001 -0.015986111 0.025624670 0.018910763
## 2 ENSMUSG00000000003 0.016880682 0.012653279 0.003757142
## 3 ENSMUSG00000000037 -0.003865079 0.013629289 0.004681090
## 4 ENSMUSG00000000049 0.042233333 0.007808461 0.001829162
## 5 ENSMUSG00000000056 0.063604072 0.051481198 0.078095912
## 6 ENSMUSG00000000058 -0.025611111 0.012982317 0.004853968
## lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 0.12293526 -0.03269000 0.02077547 5.377718e-01
## 2 0.06295461 0.02954960 0.01547936 1.929225e-01
## 3 0.30681836 -0.01759477 0.01701272 7.788138e-01
## 4 0.79280526 -0.02388393 0.09036397 9.084753e-06
## 5 0.21831618 0.02167500 0.01734983 2.269175e-01
## 6 0.29250998 0.02885470 0.02707600 5.846922e-02
# caculate the absolute value of live/lung beta
merged.mouse.eQTL.min.variance2$abs_liver.beta<-abs(merged.mouse.eQTL.min.variance2$liver.beta)
merged.mouse.eQTL.min.variance2$abs_lung.beta<-abs(merged.mouse.eQTL.min.variance2$lung.beta)
# caculate negative log lung p value
merged.mouse.eQTL.min.variance2$neg_log_lung_pvalue<--log10(merged.mouse.eQTL.min.variance2$lung_pvalue)
# Simple linear regression between abs_liver.beta and abs_lung.beta
# fit1<-summary(lm(abs_liver.beta ~ abs_lung.beta, data=merged.mouse.eQTL.min.variance2))
# fit1
# tau<-fit1$sigma**2
# check association between abs_liver.beta and abs.lung.beta
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
ggplot(merged.mouse.eQTL.min.variance2, aes(x=abs_lung.beta, y=abs_liver.beta)) +geom_point()+geom_smooth(method=lm)

cor(merged.mouse.eQTL.min.variance2$abs_lung.beta, merged.mouse.eQTL.min.variance2$abs_liver.beta)
## [1] 0.3984727
merged.mouse.eQTL<-merged.mouse.eQTL.min.variance2
# retrieve ensembl_id
markers<-merged.mouse.eQTL[, 1]
# Yg=Ag + Bg*Xsnp+V
# retrieve betas.hat (liver.beta)
betas.hat<-abs(merged.mouse.eQTL[,2])
# retrieve liver.beta_se
se<-(merged.mouse.eQTL[,3])
# retrieve liver_residua_variance
liver_residual_variance<-merged.mouse.eQTL[,4]
# creat Z matrix with 2 columns: 1 for intercept,abs_lung.beta (merged.mouse.eQTL[,10])
Z<-as.matrix(merged.mouse.eQTL[,10])
Z<-replace(Z,is.na(Z),0)
Z<-data.frame(1,Z)
Z<-as.matrix(Z)
rowLength<-length(markers)
# liver.betas=Z*gama+T^2
# Regression: abs_liver.beta = intercept + beta*abs_lung.beta + error
lmsummary<-summary(lm(abs_liver.beta~-1+Z, data=merged.mouse.eQTL))
lmsummary
##
## Call:
## lm(formula = abs_liver.beta ~ -1 + Z, data = merged.mouse.eQTL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59768 -0.03182 -0.01870 0.00623 2.04049
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## ZX1 0.026589 0.001119 23.76 <2e-16 ***
## ZZ 0.489214 0.010733 45.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09849 on 11007 degrees of freedom
## Multiple R-squared: 0.3304, Adjusted R-squared: 0.3303
## F-statistic: 2716 on 2 and 11007 DF, p-value: < 2.2e-16
# error ~ N(0, Tau)
tau<-lmsummary$sigma**2
tau
## [1] 0.009699749
# output coeffieients (gamma matrix)
outputVector<-c(lmsummary$coefficients[,1],lmsummary$coefficients[,2])
write.table(matrix(outputVector,length(Z[1,])),file="2016-04-26_hm_tau_coefficients with intercept.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
hm_tau_coefficients<-read.table(file="2016-04-26_hm_tau_coefficients with intercept.txt", header=T)
# gamma matrix
gamma<-as.matrix(lmsummary$coefficients[,1])
# trasnpose Z matrix
Z_transpose<-t(Z)
# create identity matrix
identity<-diag(nrow=rowLength)
# original betas.hat
betas.hat<-as.matrix(betas.hat)
#creat V matrix for liver_residual_variance
V <- matrix(0, rowLength, rowLength)
# V, liver residual variance
diag(V) <-liver_residual_variance
# Creat Tau matrix
Tau<- diag(tau, rowLength, rowLength)
# follow Chen's paper and cacualte s
s <-V + Tau
# create inverse function for inversing diagnoal matrix
diag.inverse <- function(x){diag(1/diag(x), nrow(x), ncol(x))}
# create multiplication function for multiplicating two diagnoal matrix
diag.multi <- function(x,y){diag(diag(x)*diag(y), nrow(x), ncol(x))}
# inverse s
S <-diag.inverse(s)
# follow chen's paper to caculate omega
omega<-diag.multi(S, V)
# retrieve omega value from the matrix
omega.diag<-diag(omega )
# summary the omega value
summary(omega.diag)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.09131 0.36400 0.52520 0.54380 0.70830 0.99760
# betas.thea<- S %*% Z %*% gamma + (identity-S) %*% betas.hat
# caculate betas.tieda with the formula in Chen's paper
betas.tieda<- omega %*% Z %*% gamma + (identity-omega) %*% betas.hat
# crbetas.tieda<- cromega %*% Z %*% gamma + (identity-cromega) %*% betas.hat
head(betas.tieda)
## [,1]
## [1,] 0.03356494
## [2,] 0.02362737
## [3,] 0.01406380
## [4,] 0.04160507
## [5,] 0.04011077
## [6,] 0.03064532
head(betas.hat)
## [,1]
## [1,] 0.015986111
## [2,] 0.016880682
## [3,] 0.003865079
## [4,] 0.042233333
## [5,] 0.063604072
## [6,] 0.025611111
#regression beta
regbeta <-Z %*% gamma
head(regbeta)
## [,1]
## [1,] 0.04258151
## [2,] 0.04104519
## [3,] 0.03519672
## [4,] 0.03827346
## [5,] 0.03719283
## [6,] 0.04070523
# summary(regbeta)
markers1<-as.character(markers)
# combine ensemble_id, betas.hat and betas.tieda
outputVector<-c(markers1,betas.hat,betas.tieda)
write.table(matrix(outputVector,rowLength),file="2016-04-26_hm_tau_hmresults.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_hm_tau_hmresults.txt")
colnames(liver.mouse.eQTL.bayesian)<-c( "ensembl_id", "betas.hat","betas.tieda")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda
## 1 ENSMUSG00000000001 0.015986111 0.03356494
## 2 ENSMUSG00000000003 0.016880682 0.02362737
## 3 ENSMUSG00000000037 0.003865079 0.01406380
## 4 ENSMUSG00000000049 0.042233333 0.04160507
## 5 ENSMUSG00000000056 0.063604072 0.04011077
## 6 ENSMUSG00000000058 0.025611111 0.03064532
# merge dataset with betas.hat and betas.tieda
liver.mouse.eQTL.bayesian.all<- merge(liver.mouse.eQTL.bayesian, merged.mouse.eQTL.min.variance2, by = "ensembl_id")
head(liver.mouse.eQTL.bayesian.all)
## ensembl_id betas.hat betas.tieda liver.beta liver.beta_se
## 1 ENSMUSG00000000001 0.015986111 0.03356494 -0.015986111 0.025624670
## 2 ENSMUSG00000000003 0.016880682 0.02362737 0.016880682 0.012653279
## 3 ENSMUSG00000000037 0.003865079 0.01406380 -0.003865079 0.013629289
## 4 ENSMUSG00000000049 0.042233333 0.04160507 0.042233333 0.007808461
## 5 ENSMUSG00000000056 0.063604072 0.04011077 0.063604072 0.051481198
## 6 ENSMUSG00000000058 0.025611111 0.03064532 -0.025611111 0.012982317
## liver_residual_variance lung_pvalue lung.beta lung.beta_se
## 1 0.018910763 0.12293526 -0.03269000 0.02077547
## 2 0.003757142 0.06295461 0.02954960 0.01547936
## 3 0.004681090 0.30681836 -0.01759477 0.01701272
## 4 0.001829162 0.79280526 -0.02388393 0.09036397
## 5 0.078095912 0.21831618 0.02167500 0.01734983
## 6 0.004853968 0.29250998 0.02885470 0.02707600
## liver_pvalue abs_liver.beta abs_lung.beta neg_log_lung_pvalue
## 1 5.377718e-01 0.015986111 0.03269000 0.9103235
## 2 1.929225e-01 0.016880682 0.02954960 1.2009724
## 3 7.788138e-01 0.003865079 0.01759477 0.5131187
## 4 9.084753e-06 0.042233333 0.02388393 0.1008335
## 5 2.269175e-01 0.063604072 0.02167500 0.6609141
## 6 5.846922e-02 0.025611111 0.02885470 0.5338593
write.table(liver.mouse.eQTL.bayesian.all,file="2016-04-26_liver.mouse.eQTL.bayesian.all.txt")
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_liver.mouse.eQTL.bayesian.all.txt")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda liver.beta liver.beta_se
## 1 ENSMUSG00000000001 0.015986111 0.03356494 -0.015986111 0.025624670
## 2 ENSMUSG00000000003 0.016880682 0.02362737 0.016880682 0.012653279
## 3 ENSMUSG00000000037 0.003865079 0.01406380 -0.003865079 0.013629289
## 4 ENSMUSG00000000049 0.042233333 0.04160507 0.042233333 0.007808461
## 5 ENSMUSG00000000056 0.063604072 0.04011077 0.063604072 0.051481198
## 6 ENSMUSG00000000058 0.025611111 0.03064532 -0.025611111 0.012982317
## liver_residual_variance lung_pvalue lung.beta lung.beta_se
## 1 0.018910763 0.12293526 -0.03269000 0.02077547
## 2 0.003757142 0.06295461 0.02954960 0.01547936
## 3 0.004681090 0.30681836 -0.01759477 0.01701272
## 4 0.001829162 0.79280526 -0.02388393 0.09036397
## 5 0.078095912 0.21831618 0.02167500 0.01734983
## 6 0.004853968 0.29250998 0.02885470 0.02707600
## liver_pvalue abs_liver.beta abs_lung.beta neg_log_lung_pvalue
## 1 5.377718e-01 0.015986111 0.03269000 0.9103235
## 2 1.929225e-01 0.016880682 0.02954960 1.2009724
## 3 7.788138e-01 0.003865079 0.01759477 0.5131187
## 4 9.084753e-06 0.042233333 0.02388393 0.1008335
## 5 2.269175e-01 0.063604072 0.02167500 0.6609141
## 6 5.846922e-02 0.025611111 0.02885470 0.5338593
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat",
"liver.beta_se", "betas.tieda",
"liver_residual_variance", "liver_pvalue", "abs_lung.beta",
"abs_lung.beta", "neg_log_lung_pvalue"))
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat liver.beta_se betas.tieda
## 1 ENSMUSG00000000001 0.015986111 0.025624670 0.03356494
## 2 ENSMUSG00000000003 0.016880682 0.012653279 0.02362737
## 3 ENSMUSG00000000037 0.003865079 0.013629289 0.01406380
## 4 ENSMUSG00000000049 0.042233333 0.007808461 0.04160507
## 5 ENSMUSG00000000056 0.063604072 0.051481198 0.04011077
## 6 ENSMUSG00000000058 0.025611111 0.012982317 0.03064532
## liver_residual_variance liver_pvalue abs_lung.beta abs_lung.beta.1
## 1 0.018910763 5.377718e-01 0.03269000 0.03269000
## 2 0.003757142 1.929225e-01 0.02954960 0.02954960
## 3 0.004681090 7.788138e-01 0.01759477 0.01759477
## 4 0.001829162 9.084753e-06 0.02388393 0.02388393
## 5 0.078095912 2.269175e-01 0.02167500 0.02167500
## 6 0.004853968 5.846922e-02 0.02885470 0.02885470
## neg_log_lung_pvalue
## 1 0.9103235
## 2 1.2009724
## 3 0.5131187
## 4 0.1008335
## 5 0.6609141
## 6 0.5338593
# Caculate variance for beta.tieda by following Brian Kulis' lecture notes
# Invert Tau and V
Tau_invert<-diag.inverse(Tau)
V_invert<-diag.inverse(V)
# Retrieve x information (SNP correspondting to the pair of gene and SNP with minimum P value)
snps <-read.table(file="2016-04-21-mouse.SNP.min.txt", header=T)
head(snps)
## snps BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19
## 1 rs13477320 0 2 2 0 0 2 0 0 0
## 2 CEL-X_71438949 0 0 0 0 0 0 0 0 0
## 3 CEL-X_154048891 0 0 0 2 0 2 0 0 2
## 4 rs3704454 2 0 0 0 0 0 0 2 2
## 5 CEL-11_121219118 2 0 2 2 0 2 2 0 0
## 6 rs3655269 0 2 2 2 2 0 2 2 0
## BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34
## 1 2 0 2 0 0 0 2 0 0 0 2 2
## 2 0 2 0 0 0 0 2 0 0 2 0 0
## 3 0 0 0 2 2 2 0 0 0 2 0 0
## 4 2 2 2 2 2 0 0 0 0 2 0 2
## 5 2 0 2 2 2 2 0 0 0 2 0 0
## 6 0 0 0 2 2 2 2 0 0 2 2 2
## BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1 2 2 2 0 0 0 0 2 0
## 2 0 2 0 2 2 0 2 0 2
## 3 0 2 0 0 2 0 0 0 0
## 4 0 0 0 0 2 2 2 2 2
## 5 2 0 0 0 2 2 2 2 2
## 6 0 0 0 2 2 0 2 2 2
nrow(snps)
## [1] 11009
# caculate XtX
xtxlist <- NULL
for (i in 1:nrow(snps)) {
x <- matrix(as.numeric(snps[i, 2:31]),nrow = 30,ncol = 1)
# not sure how to deal with NA
# here I ajust them to 0
x[is.na(x)] <- 0
xtx <-t(x) %*% x
xtxlist <- c(xtxlist, xtx)}
head(xtxlist)
## [1] 48 32 36 60 68 72
length(xtxlist)
## [1] 11009
xtx<- diag(xtxlist, rowLength, rowLength)
dim(xtx)
## [1] 11009 11009
xtx[1:6, 1:6]
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 48 0 0 0 0 0
## [2,] 0 32 0 0 0 0
## [3,] 0 0 36 0 0 0
## [4,] 0 0 0 60 0 0
## [5,] 0 0 0 0 68 0
## [6,] 0 0 0 0 0 72
# corrected code to caculate bayesian variance
# S0^-1 in Brian Kulis' lecture note: Tau_invert
# 1/sigma^2 in Brian Kulis' lecture note: V_invert
# XTX in Brian Kulis' lecture note: xtx
PS_invert<-Tau_invert + diag.multi(V_invert, xtx)
# PS_invert<-Tau_invert+V_invert%*% Z %*% Z_transpose # previous wrong code
# S in Brian Kulis' lecture note:PS
PS <- diag.inverse(PS_invert)
# retrieve posterior variance
ps<-diag(PS)
range(ps)
## [1] 1.520574e-05 8.614716e-03
library(reshape)
# reshape posterior variance to long format
ps.long <- melt(ps)
head(ps.long)
## value
## 1 3.785968e-04
## 2 1.160065e-04
## 3 1.283102e-04
## 4 3.039052e-05
## 5 1.026884e-03
## 6 6.695090e-05
# Caculate sd: square root on variance
ps.long$betas.tieda.se<-(ps.long$value)^0.5
# combine sd to the data.frame
liver.mouse.eQTL.bayesian<-cbind(liver.mouse.eQTL.bayesian,ps.long$betas.tieda.se)
# head(liver.mouse.eQTL.bayesian)
# rename betas.tieda.se
liver.mouse.eQTL.bayesian<-rename(liver.mouse.eQTL.bayesian, c("ps.long$betas.tieda.se"="betas.tieda.se", "liver.beta_se"="betas.hat.se"))
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", "betas.hat.se",
"betas.tieda", "betas.tieda.se", "liver_residual_variance",
"liver_pvalue", "abs_lung.beta", "neg_log_lung_pvalue"))
# head(liver.mouse.eQTL.bayesian)
# library(tigerstats)
# pnormGC(0, region="below", mean=0.002352829,sd=0.09972950)
# caculate probability of betas.tieda below 0 based on betas.tieda and standard deviation
liver.mouse.eQTL.bayesian$p.below.0 <- pnorm(0,liver.mouse.eQTL.bayesian$betas.tieda, liver.mouse.eQTL.bayesian$betas.tieda.se)
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.015986111 0.025624670 0.03356494 0.019457564
## 2 ENSMUSG00000000003 0.016880682 0.012653279 0.02362737 0.010770630
## 3 ENSMUSG00000000037 0.003865079 0.013629289 0.01406380 0.011327409
## 4 ENSMUSG00000000049 0.042233333 0.007808461 0.04160507 0.005512759
## 5 ENSMUSG00000000056 0.063604072 0.051481198 0.04011077 0.032045034
## 6 ENSMUSG00000000058 0.025611111 0.012982317 0.03064532 0.008182353
## liver_residual_variance liver_pvalue abs_lung.beta neg_log_lung_pvalue
## 1 0.018910763 5.377718e-01 0.03269000 0.9103235
## 2 0.003757142 1.929225e-01 0.02954960 1.2009724
## 3 0.004681090 7.788138e-01 0.01759477 0.5131187
## 4 0.001829162 9.084753e-06 0.02388393 0.1008335
## 5 0.078095912 2.269175e-01 0.02167500 0.6609141
## 6 0.004853968 5.846922e-02 0.02885470 0.5338593
## p.below.0
## 1 4.226075e-02
## 2 1.412903e-02
## 3 1.071971e-01
## 4 2.226153e-14
## 5 1.053396e-01
## 6 9.009137e-05
dim(liver.mouse.eQTL.bayesian)
## [1] 11009 10
summary(liver.mouse.eQTL.bayesian$betas.tieda.se)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003899 0.009691 0.013470 0.016720 0.019920 0.092820
range(liver.mouse.eQTL.bayesian$p.below.0)
## [1] 0.0000000 0.3644177
write.table(liver.mouse.eQTL.bayesian,file="2016-04-26_liver.mouse.eQTL.bayesian with beta.txt")
liver.mouse.eQTL.bayesian <- read.table(file="2016-04-26_liver.mouse.eQTL.bayesian with beta.txt")
# head(liver.mouse.eQTL.bayesian)
# summary(liver.mouse.eQTL.bayesian$liver_residual_variance)
liver.mouse.eQTL.bayesian.4tau <- liver.mouse.eQTL.bayesian
# colnames(liver.mouse.eQTL.bayesian.4tau) <- c("ensembl_id", "betas.hat", "betas.hat.se", "betas.tieda", "betas.tieda.se", "liver_residual_variance", "liver_pvalue", "abs_lung.beta",
# "neg_log_lung_pvalue", "p.below.0", "betas.tieda2m", "betas.tieda3rd", "betas.tieda4max")
# head(liver.mouse.eQTL.bayesian.4tau)
# Introduce weight (Tmm) to adjust Tau with neg_log_lung_pvalue
liver.mouse.eQTL.bayesian.4tau$fzm <- liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue
# caculate ratio_fzm
liver.mouse.eQTL.bayesian.4tau$ratio_fzm <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/liver.mouse.eQTL.bayesian.4tau$fzm
range(liver.mouse.eQTL.bayesian.4tau$ratio_fzm)
## [1] 1.000000e+00 1.990653e+16
# set up threshold for ratio_fzm and caculate updated ratio_fzm (nratio_fzm)
threshold <- 0.05
liver.mouse.eQTL.bayesian.4tau$nratio_fzm <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$ratio_fzm > max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))] <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))
# liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$nratio_fzm >= max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log(threshold))] <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
summary(liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 11.23 13.28 11.53 13.28 13.28
# compare bayesian prediction to the traditional method
# evaluate the predition with alle specific expreesion in the liver: Sandrine Lagarrigue's paper
liver.ASE <- read.csv(file= "ASE.genetics.113.153882-6.csv")
dim(liver.ASE)
## [1] 1191 19
head(liver.ASE)
## replicate chr startExon geneID SNPperExon3
## 1 M.CH. DxB and BxD 1 9535488 Rrs1 15
## 2 M.HF DxB and BxD 1 9535488 Rrs1 14
## 3 F.HF DxB and BxD 1 9535488 Rrs1 15
## 4 M.CH. DxB and BxD 1 37473929 6330578E17Rik 2
## 5 M.CH. DxB and BxD 1 58169979 Aox3 3
## 6 M.HF DxB and BxD 1 58169979 Aox3 3
## sumBperExon.DxB4 sumDperExon.DxB4 sumBperExon.BxD4 sumDperExon.BxD4
## 1 45 19 50 25
## 2 74 39 66 30
## 3 76 20 77 40
## 4 78 32 47 17
## 5 473 82 225 27
## 6 252 56 263 53
## FCadd1.DxB5 FCadd1.BxD5 BonBD.DxB6 BonBD.BxD6 pvalBH.DxB7 pvalBH.BxD7
## 1 2.30 1.96 0.70 0.67 1.0e-02 3.7e-02
## 2 1.88 2.16 0.65 0.69 1.1e-02 3.3e-03
## 3 3.67 1.90 0.79 0.66 4.9e-07 5.1e-03
## 4 2.39 2.67 0.71 0.73 1.9e-04 2.2e-03
## 5 5.71 8.07 0.85 0.89 3.6e-64 5.4e-37
## 6 4.44 4.89 0.82 0.83 8.7e-28 1.1e-31
## UTR5 UTR3 strand exonCount
## 1 0 0 + 1
## 2 0 0 + 1
## 3 0 0 + 1
## 4 0 0 - 3
## 5 0 0 + 35
## 6 0 0 + 35
# 440 unique gene ID
length(unique(liver.ASE$geneID))
## [1] 440
# verify ASE table
liver.ASE1 <- liver.ASE[which(liver.ASE$replicate == "M.CH. DxB and BxD"), ]
liver.ASE2 <- liver.ASE[which(liver.ASE$replicate == "M.HF DxB and BxD"), ]
liver.ASE3 <- liver.ASE[which(liver.ASE$replicate == "F.HF DxB and BxD"), ]
length(unique(liver.ASE1$geneID))
## [1] 272
length(unique(liver.ASE2$geneID))
## [1] 275
length(unique(liver.ASE3$geneID))
## [1] 304
(length(unique(liver.ASE1$geneID))+length(unique(liver.ASE2$geneID))+length(unique(liver.ASE3$geneID)))/3
## [1] 283.6667
# As claimed in the paper: averaged 284 ASE for each replicate
sub.liver.ASE <-liver.ASE
# Subset liver ASE with different conditions
# sub.liver.ASE <-liver.ASE[which(liver.ASE$pvalBH.DxB7 < 0.05 & liver.ASE$pvalBH.BxD7 < 0.05), ]
# sub.liver.ASE <- sub.liver.ASE[order(sub.liver.ASE$pvalBH.BxD7), ]
# summary(sub.liver.ASE$pvalBH.BxD7)
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.DxB7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.BxD7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[ sub.liver.ASE$geneID %in% names(table(sub.liver.ASE$geneID))[table(sub.liver.ASE$geneID) >1] , ]
# check the remain gene number after subsetting
dim(sub.liver.ASE)
## [1] 1191 19
liver.ASE.symbol <- unique(sub.liver.ASE$geneID)
length(liver.ASE.symbol)
## [1] 440
# Annoate gene symbol wiht ensemble.ID
library(biomaRt)
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
liver.ASE.ensembl <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol" , values =liver.ASE.symbol ,mart=mouse)
dim(liver.ASE.ensembl)
## [1] 391 2
liver.ASE.ensembl <- unique(liver.ASE.ensembl)
# delete liver ASE ensemble ID which are not in the liver.mouse.eQTL.bayesian data frame
liver.ASE.ensembl <- liver.ASE.ensembl[liver.ASE.ensembl$ensembl_gene_id %in% liver.mouse.eQTL.bayesian.4tau$ensembl_id, ]
dim(liver.ASE.ensembl)
## [1] 319 2
# create indicator for ASE true or not
liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 1
liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 0
summary(liver.mouse.eQTL.bayesian.4tau$eqtl)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.02898 0.00000 1.00000
liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue <- -log10(liver.mouse.eQTL.bayesian.4tau$liver_pvalue)
head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.015986111 0.025624670 0.03356494 0.019457564
## 2 ENSMUSG00000000003 0.016880682 0.012653279 0.02362737 0.010770630
## 3 ENSMUSG00000000037 0.003865079 0.013629289 0.01406380 0.011327409
## 4 ENSMUSG00000000049 0.042233333 0.007808461 0.04160507 0.005512759
## 5 ENSMUSG00000000056 0.063604072 0.051481198 0.04011077 0.032045034
## 6 ENSMUSG00000000058 0.025611111 0.012982317 0.03064532 0.008182353
## liver_residual_variance liver_pvalue abs_lung.beta neg_log_lung_pvalue
## 1 0.018910763 5.377718e-01 0.03269000 0.9103235
## 2 0.003757142 1.929225e-01 0.02954960 1.2009724
## 3 0.004681090 7.788138e-01 0.01759477 0.5131187
## 4 0.001829162 9.084753e-06 0.02388393 0.1008335
## 5 0.078095912 2.269175e-01 0.02167500 0.6609141
## 6 0.004853968 5.846922e-02 0.02885470 0.5338593
## p.below.0 fzm ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 4.226075e-02 0.9103235 18.97871 13.2793 0 0.2694020
## 2 1.412903e-02 1.2009724 14.38565 13.2793 0 0.7146171
## 3 1.071971e-01 0.5131187 33.67012 13.2793 0 0.1085664
## 4 2.226153e-14 0.1008335 171.33960 13.2793 0 5.0416869
## 5 1.053396e-01 0.6609141 26.14072 13.2793 0 0.6441321
## 6 9.009137e-05 0.5338593 32.36202 13.2793 0 1.2330727
by(liver.mouse.eQTL.bayesian.4tau[, c(1, 7, 9, 14)], liver.mouse.eQTL.bayesian.4tau[, "eqtl"], summary)
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 0
## ensembl_id liver_pvalue neg_log_lung_pvalue
## ENSMUSG00000000001: 1 Min. :0.00000 Min. : 0.0000
## ENSMUSG00000000003: 1 1st Qu.:0.04496 1st Qu.: 0.3454
## ENSMUSG00000000037: 1 Median :0.22711 Median : 0.7243
## ENSMUSG00000000049: 1 Mean :0.30496 Mean : 1.2496
## ENSMUSG00000000056: 1 3rd Qu.:0.51563 3rd Qu.: 1.5044
## ENSMUSG00000000058: 1 Max. :1.00000 Max. :17.2768
## (Other) :10684
## eqtl
## Min. :0
## 1st Qu.:0
## Median :0
## Mean :0
## 3rd Qu.:0
## Max. :0
##
## --------------------------------------------------------
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 1
## ensembl_id liver_pvalue neg_log_lung_pvalue
## ENSMUSG00000000275: 1 Min. :0.0000000 Min. : 0.03236
## ENSMUSG00000000673: 1 1st Qu.:0.0000001 1st Qu.: 0.67845
## ENSMUSG00000001089: 1 Median :0.0003878 Median : 1.65562
## ENSMUSG00000001435: 1 Mean :0.0925145 Mean : 2.65142
## ENSMUSG00000001467: 1 3rd Qu.:0.0718551 3rd Qu.: 3.91680
## ENSMUSG00000001473: 1 Max. :0.9764010 Max. :13.23882
## (Other) :313
## eqtl
## Min. :1
## 1st Qu.:1
## Median :1
## Mean :1
## 3rd Qu.:1
## Max. :1
##
liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]
plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl), xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue" )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$betas.hat, liver.mouse.eQTL.bayesian.4tau$betas.tieda, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$betas.hat, liver.mouse.eQTL.bayesian.4tau.ase$betas.tieda, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

cor(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue)
## [1] 0.413809
length(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue)
## [1] 319
liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]
plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue, , col="red", xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$p.below.0, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$p.below.0, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.015986111 0.025624670 0.03356494 0.019457564
## 2 ENSMUSG00000000003 0.016880682 0.012653279 0.02362737 0.010770630
## 3 ENSMUSG00000000037 0.003865079 0.013629289 0.01406380 0.011327409
## 4 ENSMUSG00000000049 0.042233333 0.007808461 0.04160507 0.005512759
## 5 ENSMUSG00000000056 0.063604072 0.051481198 0.04011077 0.032045034
## 6 ENSMUSG00000000058 0.025611111 0.012982317 0.03064532 0.008182353
## liver_residual_variance liver_pvalue abs_lung.beta neg_log_lung_pvalue
## 1 0.018910763 5.377718e-01 0.03269000 0.9103235
## 2 0.003757142 1.929225e-01 0.02954960 1.2009724
## 3 0.004681090 7.788138e-01 0.01759477 0.5131187
## 4 0.001829162 9.084753e-06 0.02388393 0.1008335
## 5 0.078095912 2.269175e-01 0.02167500 0.6609141
## 6 0.004853968 5.846922e-02 0.02885470 0.5338593
## p.below.0 fzm ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 4.226075e-02 0.9103235 18.97871 13.2793 0 0.2694020
## 2 1.412903e-02 1.2009724 14.38565 13.2793 0 0.7146171
## 3 1.071971e-01 0.5131187 33.67012 13.2793 0 0.1085664
## 4 2.226153e-14 0.1008335 171.33960 13.2793 0 5.0416869
## 5 1.053396e-01 0.6609141 26.14072 13.2793 0 0.6441321
## 6 9.009137e-05 0.5338593 32.36202 13.2793 0 1.2330727
# Optimizing rho and adjust the weight
library(reshape)
rho.optimization <- matrix(0, nrow=nrow(liver.mouse.eQTL.bayesian.4tau), ncol=7)
colnames(rho.optimization)<-c("rho","tmm","tau", "omega","beta_tieda", "n.betas.tieda.se","p.below.0" )
nomega.diag<-diag(omega )
rho <- seq(1,1.1, by=0.02)*tau
rho
## [1] 0.009699749 0.009893744 0.010087739 0.010281734 0.010475729 0.010669724
result <- NULL
for (i in 1:length(rho)) {
rho.optimization[ ,1] <- rho[i]
rho.optimization[ ,2] <- (rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm
rho.optimization[ ,3] <-tau*((rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
nTau<- diag(rho.optimization[ ,3], rowLength, rowLength)
ns<-V + nTau
nS <- diag.inverse(ns)
nomega<-diag.multi(nS, V)
# nomega <- diag(0, rowLength, rowLength) # set nomega to 0 for code checking
# nomega <- diag(1, rowLength, rowLength) # set nomega to 1 for code checking
rho.optimization[ ,4] <- diag(nomega )
rho.optimization[ ,5] <- nomega %*% Z %*% gamma + (identity-nomega) %*% betas.hat
nTau_invert<-diag.inverse(nTau)
V_invert<-diag.inverse(V)
nPS_invert<-nTau_invert+ diag.multi(V_invert, xtx)
# nPS_invert<-nTau_invert+ diag.multi(diag.multi(V_invert, Z_transpose), Z) # previous wrong code
nPS<-diag.inverse(nPS_invert)
nps<-diag(nPS)
nps.long <- melt(nps)
rho.optimization[ ,6] <-(nps.long$value)^0.5
rho.optimization[ ,7] <- pnorm(0, rho.optimization[ ,5], rho.optimization[ ,6])
result <- rbind(result,rho.optimization)
}
dim(result)
## [1] 66054 7
head(result)
## rho tmm tau omega beta_tieda n.betas.tieda.se
## [1,] 0.009699749 1 0.009699749 0.6609725 0.03356494 0.019457564
## [2,] 0.009699749 1 0.009699749 0.2791983 0.02362737 0.010770630
## [3,] 0.009699749 1 0.009699749 0.3255088 0.01406380 0.011327409
## [4,] 0.009699749 1 0.009699749 0.1586587 0.04160507 0.005512759
## [5,] 0.009699749 1 0.009699749 0.8895190 0.04011077 0.032045034
## [6,] 0.009699749 1 0.009699749 0.3335208 0.03064532 0.008182353
## p.below.0
## [1,] 4.226075e-02
## [2,] 1.412903e-02
## [3,] 1.071971e-01
## [4,] 2.226153e-14
## [5,] 1.053396e-01
## [6,] 9.009137e-05
write.table(result, file="2016-04-26_liver.mouse.eQTL.bayesian.result.txt",col.names=TRUE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian.result <- read.table(file="2016-04-26_liver.mouse.eQTL.bayesian.result.txt", header=T)
result.df <-liver.mouse.eQTL.bayesian.result
result.df$rho.class <- factor(result.df$rho/tau)
# combine liver.mouse.eqtl.bayesian and rho.optimization.result for ploting
a <-liver.mouse.eQTL.bayesian.4tau[, c(1:2, 6, 7, 9)]
a <-rbind(a, a, a, a, a, a)
dim(a)
## [1] 66054 5
new.result.df<-cbind(a, result.df)
head(new.result.df)
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 1 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 2 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 3 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 4 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 5 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 6 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega beta_tieda
## 1 0.9103235 0.009699749 1 0.009699749 0.6609725 0.03356494
## 2 1.2009724 0.009699749 1 0.009699749 0.2791983 0.02362737
## 3 0.5131187 0.009699749 1 0.009699749 0.3255088 0.01406380
## 4 0.1008335 0.009699749 1 0.009699749 0.1586587 0.04160507
## 5 0.6609141 0.009699749 1 0.009699749 0.8895190 0.04011077
## 6 0.5338593 0.009699749 1 0.009699749 0.3335208 0.03064532
## n.betas.tieda.se p.below.0 rho.class
## 1 0.019457564 4.226075e-02 1
## 2 0.010770630 1.412903e-02 1
## 3 0.011327409 1.071971e-01 1
## 4 0.005512759 2.226153e-14 1
## 5 0.032045034 1.053396e-01 1
## 6 0.008182353 9.009137e-05 1
new.result.df2 <- new.result.df
head(new.result.df$rho.class)
## [1] 1 1 1 1 1 1
## Levels: 1 1.02 1.03999999999999 1.06 1.08 1.1
by(new.result.df2, new.result.df2[, "rho.class"], head)
## new.result.df2[, "rho.class"]: 1
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 1 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 2 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 3 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 4 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 5 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 6 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega beta_tieda
## 1 0.9103235 0.009699749 1 0.009699749 0.6609725 0.03356494
## 2 1.2009724 0.009699749 1 0.009699749 0.2791983 0.02362737
## 3 0.5131187 0.009699749 1 0.009699749 0.3255088 0.01406380
## 4 0.1008335 0.009699749 1 0.009699749 0.1586587 0.04160507
## 5 0.6609141 0.009699749 1 0.009699749 0.8895190 0.04011077
## 6 0.5338593 0.009699749 1 0.009699749 0.3335208 0.03064532
## n.betas.tieda.se p.below.0 rho.class
## 1 0.019457564 4.226075e-02 1
## 2 0.010770630 1.412903e-02 1
## 3 0.011327409 1.071971e-01 1
## 4 0.005512759 2.226153e-14 1
## 5 0.032045034 1.053396e-01 1
## 6 0.008182353 9.009137e-05 1
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.02
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 11010 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 21000 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 31000 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 41000 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 51000 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 61000 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega
## 11010 0.9103235 0.009893744 1.300781 0.01261725 0.5998082
## 21000 1.2009724 0.009893744 1.300781 0.01261725 0.2294523
## 31000 0.5131187 0.009893744 1.300781 0.01261725 0.2706092
## 41000 0.1008335 0.009893744 1.300781 0.01261725 0.1266170
## 51000 0.6609141 0.009893744 1.300781 0.01261725 0.8609105
## 61000 0.5338593 0.009893744 1.300781 0.01261725 0.2778265
## beta_tieda n.betas.tieda.se p.below.0 rho.class
## 11010 0.03193825 0.019545968 5.112867e-02 1.02
## 21000 0.02242528 0.010785554 1.879967e-02 1.02
## 31000 0.01234371 0.011344773 1.382860e-01 1.02
## 41000 0.04173195 0.005514757 1.904978e-14 1.02
## 51000 0.04086635 0.032444613 1.039117e-01 1.02
## 61000 0.02980466 0.008188890 1.365068e-04 1.02
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.03999999999999
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 11013 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 21002 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 31002 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 41002 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 51002 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 61002 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega
## 11013 0.9103235 0.01008774 1.683414 0.01632869 0.5366361
## 21002 1.2009724 0.01008774 1.683414 0.01632869 0.1870543
## 31002 0.5131187 0.01008774 1.683414 0.01632869 0.2228053
## 41002 0.1008335 0.01008774 1.683414 0.01632869 0.1007367
## 51002 0.6609141 0.01008774 1.683414 0.01632869 0.8270717
## 61002 0.5338593 0.01008774 1.683414 0.01632869 0.2291482
## beta_tieda n.betas.tieda.se p.below.0 rho.class
## 11013 0.03025816 0.019613579 6.144995e-02 1.03999999999999
## 21002 0.02140076 0.010796873 2.373260e-02 1.03999999999999
## 31002 0.01084593 0.011357948 1.698090e-01 1.03999999999999
## 41002 0.04183443 0.005516269 1.677537e-14 1.03999999999999
## 51002 0.04176008 0.032756685 1.011800e-01 1.03999999999999
## 61002 0.02906990 0.008193841 1.942501e-04 1.03999999999999
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.06
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 11016 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 21004 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 31004 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 41004 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 51004 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 61004 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega
## 11016 0.9103235 0.01028173 2.167925 0.02102833 0.47349009
## 21004 1.2009724 0.01028173 2.167925 0.02102833 0.15158647
## 31004 0.5131187 0.01028173 2.167925 0.02102833 0.18207686
## 41004 0.1008335 0.01028173 2.167925 0.02102833 0.08002463
## 51004 0.6609141 0.01028173 2.167925 0.02102833 0.78785889
## 61004 0.5338593 0.01028173 2.167925 0.02102833 0.18754011
## beta_tieda n.betas.tieda.se p.below.0 rho.class
## 11016 0.028578771 0.019665419 7.307717e-02 1.06
## 21004 0.020543694 0.010805497 2.863619e-02 1.06
## 31004 0.009569846 0.011367988 1.999432e-01 1.06
## 41004 0.041916446 0.005517418 1.514050e-14 1.06
## 51004 0.042795737 0.032999902 9.734237e-02 1.06
## 61004 0.028441864 0.008197608 2.606830e-04 1.06
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.08
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 11019 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 21006 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 31006 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 41006 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 51006 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 61006 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega
## 11019 0.9103235 0.01047573 2.778716 0.02695284 0.41232611
## 21006 1.2009724 0.01047573 2.778716 0.02695284 0.12234267
## 31006 0.5131187 0.01047573 2.778716 0.02695284 0.14797684
## 41006 0.1008335 0.01047573 2.778716 0.02695284 0.06355227
## 51006 0.6609141 0.01047573 2.778716 0.02695284 0.74342539
## 61006 0.5338593 0.01047573 2.778716 0.02695284 0.15260782
## beta_tieda n.betas.tieda.se p.below.0 rho.class
## 11019 0.026952090 0.019705289 8.569373e-02 1.08
## 21006 0.019837032 0.010812097 3.327454e-02 1.08
## 31006 0.008501437 0.011375674 2.274308e-01 1.08
## 41006 0.041981675 0.005518296 1.394817e-14 1.08
## 51006 0.043969281 0.033189345 9.261857e-02 1.08
## 61006 0.027914592 0.008200489 3.320149e-04 1.08
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.1
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 11025 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 21008 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 31008 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 41008 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 51008 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 61008 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega
## 11025 0.9103235 0.01066972 3.545405 0.03438954 0.35479651
## 21008 1.2009724 0.01066972 3.545405 0.03438954 0.09849196
## 31008 0.5131187 0.01066972 3.545405 0.03438954 0.11981095
## 41008 0.1008335 0.01066972 3.545405 0.03438954 0.05050324
## 51008 0.6609141 0.01066972 3.545405 0.03438954 0.69427564
## 61008 0.5338593 0.01066972 3.545405 0.03438954 0.12368843
## beta_tieda n.betas.tieda.se p.below.0 rho.class
## 11025 0.025422067 0.019736056 9.885510e-02 1.1
## 21008 0.019260691 0.010817171 3.749166e-02 1.1
## 31008 0.007618953 0.011381584 2.516168e-01 1.1
## 41008 0.042033347 0.005518970 1.306690e-14 1.1
## 51008 0.045267387 0.033336986 8.725202e-02 1.1
## 61008 0.027478079 0.008202702 4.042309e-04 1.1
# choose different rho class for plotting
new.result.df2.rho1 <- new.result.df2[new.result.df2$rho.class == 1, ]
head(new.result.df2.rho1)
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 1 ENSMUSG00000000001 0.015986111 0.018910763 5.377718e-01
## 2 ENSMUSG00000000003 0.016880682 0.003757142 1.929225e-01
## 3 ENSMUSG00000000037 0.003865079 0.004681090 7.788138e-01
## 4 ENSMUSG00000000049 0.042233333 0.001829162 9.084753e-06
## 5 ENSMUSG00000000056 0.063604072 0.078095912 2.269175e-01
## 6 ENSMUSG00000000058 0.025611111 0.004853968 5.846922e-02
## neg_log_lung_pvalue rho tmm tau omega beta_tieda
## 1 0.9103235 0.009699749 1 0.009699749 0.6609725 0.03356494
## 2 1.2009724 0.009699749 1 0.009699749 0.2791983 0.02362737
## 3 0.5131187 0.009699749 1 0.009699749 0.3255088 0.01406380
## 4 0.1008335 0.009699749 1 0.009699749 0.1586587 0.04160507
## 5 0.6609141 0.009699749 1 0.009699749 0.8895190 0.04011077
## 6 0.5338593 0.009699749 1 0.009699749 0.3335208 0.03064532
## n.betas.tieda.se p.below.0 rho.class
## 1 0.019457564 4.226075e-02 1
## 2 0.010770630 1.412903e-02 1
## 3 0.011327409 1.071971e-01 1
## 4 0.005512759 2.226153e-14 1
## 5 0.032045034 1.053396e-01 1
## 6 0.008182353 9.009137e-05 1
tail(new.result.df2.rho1)
## ensembl_id betas.hat liver_residual_variance liver_pvalue
## 11004 ENSMUSG00000099041 0.062044643 0.021588736 0.02861902
## 11005 ENSMUSG00000099083 0.030113122 0.006648153 0.05474021
## 11006 ENSMUSG00000099116 0.009430556 0.005630947 0.50556296
## 11007 ENSMUSG00000099164 0.039370813 0.015532369 0.10653819
## 11008 ENSMUSG00000099262 0.007846890 0.010872458 0.69418700
## 11009 ENSMUSG00000099305 0.043763889 0.022984334 0.13257346
## neg_log_lung_pvalue rho tmm tau omega beta_tieda
## 11004 0.7276229 0.009699749 1 0.009699749 0.6899898 0.04676365
## 11005 1.7000754 0.009699749 1 0.009699749 0.4066670 0.03896957
## 11006 0.4066951 0.009699749 1 0.009699749 0.3672989 0.02064434
## 11007 0.5751693 0.009699749 1 0.009699749 0.6155793 0.04087628
## 11008 1.1744631 0.009699749 1 0.009699749 0.5285022 0.03353317
## 11009 1.1413112 0.009699749 1 0.009699749 0.7032271 0.03977079
## n.betas.tieda.se p.below.0 rho.class
## 11004 0.018055118 4.798103e-03 1
## 11005 0.009838260 3.731477e-05 1
## 11006 0.008808068 9.544239e-03 1
## 11007 0.014147650 1.930732e-03 1
## 11008 0.011873474 2.369873e-03 1
## 11009 0.017579977 1.184012e-02 1
library(ggplot2)
dev.off()
## null device
## 1
# plot Tmm
qplot(neg_log_lung_pvalue,tmm, data = new.result.df2.rho1)
# rank with p.below.0: bayesian modeling
new.result.df2.rho1$rank.p.below.0 <- rank(new.result.df2.rho1$p.below.0)
# rank with liver p value: traditionsl linear regression (one step regression)
new.result.df2.rho1$rank.liver.pvalue <- rank(new.result.df2.rho1$liver_pvalue)
# caculate TPR: true positive rate
# caculate PPV: positive predictive rate
result.rho1 <- matrix(, nrow(new.result.df2.rho1), 8)
colnames(result.rho1)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR" )
for (i in 1:nrow(new.result.df2.rho1))
{
newdata1.rho1 <- subset(new.result.df2.rho1, rank.p.below.0 <= i)
overlap.newdata1.rho1 <- newdata1.rho1[newdata1.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 1] <- i
result.rho1[i, 2] <- nrow(overlap.newdata1.rho1)/nrow(newdata1.rho1)
result.rho1[i, 3] <- nrow(overlap.newdata1.rho1)/nrow(liver.ASE.ensembl)
newdata2.rho1 <- subset(new.result.df2.rho1, rank.liver.pvalue <= i)
overlap.newdata2.rho1 <- newdata2.rho1[newdata2.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 5] <- i
result.rho1[i, 6] <- nrow(overlap.newdata2.rho1)/nrow(newdata2.rho1)
result.rho1[i, 7] <- nrow(overlap.newdata2.rho1)/nrow(liver.ASE.ensembl)
}
head(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [1,] 1 NaN 0.000000000 NA 1 0.0000000 0.000000000
## [2,] 2 NaN 0.000000000 NA 2 0.5000000 0.003134796
## [3,] 3 NaN 0.000000000 NA 3 0.6666667 0.006269592
## [4,] 4 0.5 0.009404389 NA 4 0.5000000 0.006269592
## [5,] 5 0.5 0.009404389 NA 5 0.4000000 0.006269592
## [6,] 6 0.5 0.009404389 NA 6 0.5000000 0.009404389
## ori_FPR
## [1,] NA
## [2,] NA
## [3,] NA
## [4,] NA
## [5,] NA
## [6,] NA
tail(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [11004,] 11004 0.02898946 1 NA 11004 0.02898946 1
## [11005,] 11005 0.02898682 1 NA 11005 0.02898682 1
## [11006,] 11006 0.02898419 1 NA 11006 0.02898419 1
## [11007,] 11007 0.02898156 1 NA 11007 0.02898156 1
## [11008,] 11008 0.02897892 1 NA 11008 0.02897892 1
## [11009,] 11009 0.02897629 1 NA 11009 0.02897629 1
## ori_FPR
## [11004,] NA
## [11005,] NA
## [11006,] NA
## [11007,] NA
## [11008,] NA
## [11009,] NA
# ploting "True positive rate"
plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)
plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4) , xlim=c(0, 300))
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)
# ploting "positive predictive value"
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)