rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 372365 19.9 750400 40.1 592000 31.7
## Vcells 630004 4.9 1308461 10.0 937364 7.2
# set the working directory
setwd("/Volumes/Transcend/Thesis_project/subsetted_liver")
# subset dataset
sebsetn <- 15
mouse.liver.expression.eqtl <-read.table(file="2016-05-16 mouse.liver.expression.eqtl.txt", header=T)
head(mouse.liver.expression.eqtl)
## ProbeSet BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18
## 1 1415670_at 10.090 10.200 10.300 10.208 9.830 10.238 9.914 10.348
## 2 1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3 1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4 1415673_at 7.535 7.382 7.566 7.162 7.403 7.342 7.213 7.581
## 5 1415674_a_at 9.757 9.972 9.269 9.873 9.354 9.918 9.459 9.655
## 6 1415675_at 9.029 9.009 9.245 9.282 9.415 9.098 9.060 8.937
## BXD19 BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31
## 1 9.939 9.871 10.077 10.159 9.746 9.890 10.286 10.177 9.959 9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4 7.551 7.368 7.251 7.373 7.408 7.473 7.238 7.424 7.475 7.491
## 5 9.544 9.557 9.460 9.322 9.795 9.758 9.628 9.263 9.671 9.645
## 6 8.995 9.104 9.123 9.086 9.087 9.040 8.844 9.173 8.951 9.131
## BXD32 BXD33 BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6
## 1 10.059 10.102 10.174 10.022 10.364 9.745 10.074 9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4 7.419 7.386 7.496 7.369 7.265 7.130 7.149 7.383 7.439 7.233
## 5 9.368 9.741 9.721 9.684 9.387 9.649 9.693 9.793 9.840 9.519
## 6 9.108 9.122 9.153 9.008 9.138 9.126 9.233 9.060 9.134 9.081
## BXD8 BXD9
## 1 9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4 7.397 7.390
## 5 9.340 9.160
## 6 8.926 9.274
dim(mouse.liver.expression.eqtl)
## [1] 20634 31
set.seed(50)
sub.mouse.liver.expression.eqtl <- mouse.liver.expression.eqtl[, c(1, sample(2:dim(mouse.liver.expression.eqtl)[2],sebsetn, replace=FALSE))]
head(sub.mouse.liver.expression.eqtl)
## ProbeSet BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27
## 1 1415670_at 10.022 9.746 10.238 10.174 9.890 10.200 9.959 10.286
## 2 1415671_at 10.923 11.104 11.120 11.059 10.979 11.088 11.071 10.905
## 3 1415672_at 11.545 11.499 11.318 11.490 11.531 11.417 11.502 11.666
## 4 1415673_at 7.369 7.408 7.342 7.496 7.473 7.382 7.475 7.238
## 5 1415674_a_at 9.684 9.795 9.918 9.721 9.758 9.972 9.671 9.628
## 6 1415675_at 9.008 9.087 9.098 9.153 9.040 9.009 8.951 8.844
## BXD1 BXD12 BXD18 BXD6 BXD21 BXD40 BXD14
## 1 10.090 10.300 10.348 10.069 10.159 10.074 9.830
## 2 10.932 11.007 11.123 11.043 10.969 11.067 10.955
## 3 11.432 11.442 11.561 11.504 11.328 11.543 11.561
## 4 7.535 7.566 7.581 7.233 7.373 7.149 7.403
## 5 9.757 9.269 9.655 9.519 9.322 9.693 9.354
## 6 9.029 9.245 8.937 9.081 9.086 9.233 9.415
dim(sub.mouse.liver.expression.eqtl)
## [1] 20634 16
write.table(sub.mouse.liver.expression.eqtl,file="2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="\t", row.names=FALSE, quote=FALSE)
#subset liver snp expression data
BXD.geno.SNP.eqtl.for.liver <-read.table(file="2016-05-16 BXD.geno.SNP.eqtl.for.liver.txt", header=T)
head(BXD.geno.SNP.eqtl.for.liver)
## Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs13459051 0 1 0 1 0 1 0 0 0 0
## 2 rs13459060 0 0 0 1 1 0 1 0 1 0
## 3 rs13459062 1 0 0 1 1 0 1 0 1 0
## 4 rs13459063 0 0 0 1 1 0 1 0 1 0
## 5 rs13459064 1 1 0 1 1 0 1 1 1 1
## 6 rs13459069 0 1 1 0 0 1 0 0 0 0
## BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1 1 0 1 1 1 1 1 0 1 1 1 0
## 2 0 1 1 1 0 0 0 1 1 0 0 1
## 3 0 1 1 1 0 0 0 1 1 0 0 1
## 4 0 1 1 1 0 0 0 1 1 0 0 1
## 5 1 1 1 1 0 0 1 1 1 0 0 1
## 6 0 1 0 0 1 1 0 0 0 0 1 1
## BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1 0 1 0 0 1 1 0 0
## 2 0 0 1 1 1 0 0 1
## 3 0 0 1 1 1 0 1 1
## 4 0 0 1 1 1 0 1 1
## 5 1 1 1 1 1 0 0 0
## 6 1 1 0 1 1 0 0 1
dim(BXD.geno.SNP.eqtl.for.liver)
## [1] 3024 31
set.seed(50)
sub.BXD.geno.SNP.eqtl.for.liver <- BXD.geno.SNP.eqtl.for.liver[, c(1, sample(2:dim(BXD.geno.SNP.eqtl.for.liver)[2],sebsetn, replace=FALSE))]
head(sub.BXD.geno.SNP.eqtl.for.liver)
## Locus BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27 BXD1 BXD12
## 1 rs13459051 0 1 1 1 1 1 1 1 0 0
## 2 rs13459060 1 1 0 0 1 0 0 0 0 0
## 3 rs13459062 1 1 0 0 1 0 0 0 1 0
## 4 rs13459063 1 1 0 0 1 0 0 0 0 0
## 5 rs13459064 1 1 0 0 1 1 1 0 1 0
## 6 rs13459069 1 0 1 1 0 1 0 1 0 1
## BXD18 BXD6 BXD21 BXD40 BXD14
## 1 0 1 0 0 0
## 2 0 0 1 1 1
## 3 0 0 1 1 1
## 4 0 0 1 1 1
## 5 1 0 1 1 1
## 6 0 0 1 0 0
dim(sub.BXD.geno.SNP.eqtl.for.liver)
## [1] 3024 16
write.table(sub.BXD.geno.SNP.eqtl.for.liver,file="2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="\t", row.names=FALSE, quote=FALSE)
library(MatrixEQTL)
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/subsetted_liver";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="");
snps_location_file_name = paste(base.dir, "/2016-05-16 BXD.geno.loc.eqtl.for.liver.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2016-05-16 liver.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = character() ;
# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;
## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t"; # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);
## Rows read: 2,000
## Rows read: 3024 done.
## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);
## Rows read: 2,000
## Rows read: 4,000
## Rows read: 6,000
## Rows read: 8,000
## Rows read: 10,000
## Rows read: 12,000
## Rows read: 14,000
## Rows read: 16,000
## Rows read: 18,000
## Rows read: 20,000
## Rows read: 20634 done.
## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t"; # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1; # one row of column labels
cvrt$fileSkipColumns = 1; # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)
## probeset Chromosome Start End
## 1 1415670_at 6 87887814 87913595
## 2 1415671_at 8 105524465 105566047
## 3 1415672_at 8 23241353 23257074
## 4 1415673_at 5 129765558 129787449
## 5 1415674_a_at 9 44403761 44407548
## 6 1415675_at 2 32570858 32573579
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = numeric(),
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = TRUE,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
## Matching data files and location files
## 20634 of 20634 genes matched
## 3024 of 3024 SNPs matched
## Task finished in 0.026 seconds
## Reordering SNPs
##
## Task finished in 0.095 seconds
## Reordering genes
##
## Task finished in 0.127 seconds
## Processing covariates
## Task finished in 0.001 seconds
## Processing gene expression data (imputation, residualization, etc.)
## Task finished in 0.017 seconds
## Creating output file(s)
## Task finished in 0.014 seconds
## Performing eQTL analysis
## 4.54% done, 5,899 cis-eQTLs, 0 trans-eQTLs
## 9.09% done, 11,411 cis-eQTLs, 0 trans-eQTLs
## 13.63% done, 17,998 cis-eQTLs, 0 trans-eQTLs
## 18.18% done, 22,514 cis-eQTLs, 0 trans-eQTLs
## 22.72% done, 27,349 cis-eQTLs, 0 trans-eQTLs
## 27.27% done, 31,978 cis-eQTLs, 0 trans-eQTLs
## 31.81% done, 38,098 cis-eQTLs, 0 trans-eQTLs
## 36.36% done, 38,605 cis-eQTLs, 0 trans-eQTLs
## 40.90% done, 0 trans-eQTLs
## 45.45% done, 0 trans-eQTLs
## 50.00% done, 0 trans-eQTLs
## 54.54% done, 0 trans-eQTLs
## 59.09% done, 0 trans-eQTLs
## 63.63% done, 0 trans-eQTLs
## 68.18% done, 0 trans-eQTLs
## 72.72% done, 0 trans-eQTLs
## 77.27% done, 0 trans-eQTLs
## 81.81% done, 0 trans-eQTLs
## 86.36% done, 42,998 cis-eQTLs, 0 trans-eQTLs
## 90.90% done, 48,565 cis-eQTLs, 16 trans-eQTLs
## 95.45% done, 54,309 cis-eQTLs, 16 trans-eQTLs
## 100.00% done, 54,955 cis-eQTLs, 16 trans-eQTLs
## Task finished in 7.849 seconds
##
unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')
## Analysis done in: 7.523 seconds
cat('Detected local eQTLs:','\n')
## Detected local eQTLs:
cis.eqtls<-me$cis$eqtls
head(cis.eqtls)
## snps gene statistic pvalue FDR beta
## 1 rs4163042 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 2 rs4163058 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 3 rs4163391 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 4 rs4151923 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 5 rs3090019 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
## 6 rs4164131 1452705_at -51.52246 2.034364e-16 1.597121e-12 -4.255833
dim(cis.eqtls)
## [1] 54955 6
cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)
# eqtl analysis for lung
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/subsetted_liver";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2016-05-16 BXD.geno.SNP.eqtl.for.lung.txt", sep="");
snps_location_file_name = paste(base.dir, "/2016-05-16 BXD.geno.loc.eqtl.for.lung.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2016-05-16 mouse.lung.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2016-05-16 lung.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = character() ;
# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;
## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t"; # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);
## Rows read: 2,000
## Rows read: 3024 done.
## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);
## Rows read: 2,000
## Rows read: 4,000
## Rows read: 6,000
## Rows read: 8,000
## Rows read: 10,000
## Rows read: 12,000
## Rows read: 14,000
## Rows read: 16,000
## Rows read: 18,000
## Rows read: 20,000
## Rows read: 20634 done.
## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t"; # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1; # one row of column labels
cvrt$fileSkipColumns = 1; # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)
## probeset Chromosome Start End
## 1 1415670_at 6 87887814 87913595
## 2 1415671_at 8 105524465 105566047
## 3 1415672_at 8 23241353 23257074
## 4 1415673_at 5 129765558 129787449
## 5 1415674_a_at 9 44403761 44407548
## 6 1415675_at 2 32570858 32573579
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = numeric(),
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = TRUE,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
## Matching data files and location files
## 20634 of 20634 genes matched
## 3024 of 3024 SNPs matched
## Task finished in 0.016 seconds
## Reordering SNPs
##
## Task finished in 0.379 seconds
## Reordering genes
##
## Task finished in 0.249 seconds
## Processing covariates
## Task finished in 0.003 seconds
## Processing gene expression data (imputation, residualization, etc.)
## Task finished in 0.056 seconds
## Creating output file(s)
## Task finished in 0.015 seconds
## Performing eQTL analysis
## 4.54% done, 5,899 cis-eQTLs, 87 trans-eQTLs
## 9.09% done, 11,411 cis-eQTLs, 231 trans-eQTLs
## 13.63% done, 17,998 cis-eQTLs, 287 trans-eQTLs
## 18.18% done, 22,514 cis-eQTLs, 326 trans-eQTLs
## 22.72% done, 27,349 cis-eQTLs, 422 trans-eQTLs
## 27.27% done, 31,978 cis-eQTLs, 452 trans-eQTLs
## 31.81% done, 38,098 cis-eQTLs, 511 trans-eQTLs
## 36.36% done, 38,605 cis-eQTLs, 514 trans-eQTLs
## 40.90% done, 514 trans-eQTLs
## 45.45% done, 514 trans-eQTLs
## 50.00% done, 516 trans-eQTLs
## 54.54% done, 518 trans-eQTLs
## 59.09% done, 518 trans-eQTLs
## 63.63% done, 518 trans-eQTLs
## 68.18% done, 518 trans-eQTLs
## 72.72% done, 518 trans-eQTLs
## 77.27% done, 518 trans-eQTLs
## 81.81% done, 518 trans-eQTLs
## 86.36% done, 42,998 cis-eQTLs, 577 trans-eQTLs
## 90.90% done, 48,565 cis-eQTLs, 615 trans-eQTLs
## 95.45% done, 54,309 cis-eQTLs, 663 trans-eQTLs
## 100.00% done, 54,955 cis-eQTLs, 671 trans-eQTLs
## Task finished in 10.17 seconds
##
unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')
## Analysis done in: 9.921 seconds
cat('Detected local eQTLs:','\n')
## Detected local eQTLs:
cis.eqtls<-me$cis$eqtls
head(cis.eqtls)
## snps gene statistic pvalue FDR beta
## 1 rs3681670 1421144_at -75.53535 2.361797e-47 6.489628e-43 -3.093200
## 2 rs3666583 1421144_at -75.53535 2.361797e-47 6.489628e-43 -3.093200
## 3 rs13481364 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143
## 4 rs3708704 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143
## 5 rs13481365 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143
## 6 rs3685299 1438758_at -50.66421 5.603003e-40 4.398757e-36 -2.167143
dim(cis.eqtls)
## [1] 54955 6
cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2016-05-16 mouselung.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)
# load mouse lung cis eqtl result
lung.mouse.eQTL<-read.table(file="2016-05-16 mouselung.cis.1M.eqtls.txt", header=T)
# load mouse liver cis eqtl result
liver.mouse.eQTL<-read.table(file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", header=T)
mouse4302ensembl_id<-read.table(file="2015-12-04 mouse4302ensembl_id.txt", header=T)
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
# Add ensemble id annoatation to the data
lung.mouse.eQTL<-merge(lung.mouse.eQTL, mouse4302ensembl_id, by.x = "gene", by.y="probe_id")
liver.mouse.eQTL<-merge(liver.mouse.eQTL, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
head(lung.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs13478880 1.1363521 0.2621028 0.5270176 0.06786667
## 2 1415670_at rs13478876 0.9133601 0.3661462 0.6302137 0.05583410
## 3 1415670_at rs3713705 1.1363521 0.2621028 0.5270176 0.06786667
## 4 1415670_at rs13475374 1.0503967 0.2994035 0.5651655 0.06286667
## 5 1415672_at rs13479651 -1.3052534 0.1987480 0.4483475 -0.03993582
## 6 1415672_at rs3661760 -1.3052534 0.1987480 0.4483475 -0.03993582
## beta_se ensembl_id
## 1 0.05972327 ENSMUSG00000030058
## 2 0.06113044 ENSMUSG00000030058
## 3 0.05972327 ENSMUSG00000030058
## 4 0.05985040 ENSMUSG00000030058
## 5 0.03059622 ENSMUSG00000015341
## 6 0.03059622 ENSMUSG00000015341
head(liver.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs3713705 -0.4855043 0.6353986 0.8827614 -0.045982143
## 2 1415670_at rs13475374 -0.4855043 0.6353986 0.8827614 -0.045982143
## 3 1415670_at rs13478880 -0.4855043 0.6353986 0.8827614 -0.045982143
## 4 1415670_at rs13478876 -0.4855043 0.6353986 0.8827614 -0.045982143
## 5 1415672_at rs3661760 -0.1379165 0.8924198 0.9733830 -0.006732143
## 6 1415672_at rs13479651 -0.1379165 0.8924198 0.9733830 -0.006732143
## beta_se ensembl_id
## 1 0.09471005 ENSMUSG00000030058
## 2 0.09471005 ENSMUSG00000030058
## 3 0.09471005 ENSMUSG00000030058
## 4 0.09471005 ENSMUSG00000030058
## 5 0.04881319 ENSMUSG00000015341
## 6 0.04881319 ENSMUSG00000015341
library(data.table)
# Select Gene-SNP pair with minimum P value
lung.mouse.eQTL.min <- data.table(lung.mouse.eQTL, key=c('ensembl_id', "pvalue"))
lung.mouse.eQTL.min<-lung.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
lung.mouse.eQTL.min<-as.data.frame(lung.mouse.eQTL.min)
liver.mouse.eQTL.min <- data.table(liver.mouse.eQTL, key=c('ensembl_id', "pvalue"))
liver.mouse.eQTL.min<-liver.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
liver.mouse.eQTL.min<-as.data.frame(liver.mouse.eQTL.min)
library(plyr)
lung.mouse.eQTL.min<-rename(lung.mouse.eQTL.min, c("pvalue"="lung_pvalue", "beta"="lung.beta", "beta_se"="lung.beta_se"))
liver.mouse.eQTL.min<-rename(liver.mouse.eQTL.min, c("pvalue"="liver_pvalue", "beta"="liver.beta", "beta_se"="liver.beta_se"))
head(lung.mouse.eQTL.min)
## gene snps statistic lung_pvalue FDR lung.beta
## 1 1428645_at rs6259798 -1.4324741 0.159238340 0.39180403 -0.05980000
## 2 1416677_at rs3707743 -0.1308388 0.896513064 0.95927760 -0.02366071
## 3 1451678_at rs13481264 -0.3904599 0.698124238 0.86317736 -0.01356126
## 4 1425955_at rs13478643 -1.0185410 0.314117648 0.58046119 -0.07813675
## 5 1426241_a_at rs4224744 0.9192838 0.363075471 0.62709198 0.05284706
## 6 1439267_x_at rs4136521 3.4048233 0.001444641 0.01001896 0.18997619
## lung.beta_se ensembl_id
## 1 0.04174596 ENSMUSG00000000001
## 2 0.18083869 ENSMUSG00000000049
## 3 0.03473152 ENSMUSG00000000056
## 4 0.07671439 ENSMUSG00000000058
## 5 0.05748721 ENSMUSG00000000085
## 6 0.05579620 ENSMUSG00000000088
head(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR liver.beta
## 1 1428645_at rs13477320 -1.3801032 0.190827641 0.56008906 -0.07366667
## 2 1416677_at rs3670642 3.2114557 0.006815612 0.08201375 0.07875000
## 3 1425344_at rs13481264 -1.5163463 0.153365833 0.51030633 -0.24430000
## 4 1417327_at rs3655269 1.1762017 0.260604313 0.63908151 0.11090000
## 5 1426241_a_at rs3675629 -0.7745039 0.452489862 0.78891766 -0.01910000
## 6 1448153_at rs3708863 4.6928846 0.000420588 0.01068581 0.23727778
## liver.beta_se ensembl_id
## 1 0.05337765 ENSMUSG00000000001
## 2 0.02452159 ENSMUSG00000000049
## 3 0.16111096 ENSMUSG00000000056
## 4 0.09428655 ENSMUSG00000000058
## 5 0.02466095 ENSMUSG00000000085
## 6 0.05056118 ENSMUSG00000000088
tail(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR
## 10596 1434694_at rs13476408 -1.7224576 0.10867553 0.4321929
## 10597 1437645_at rs3686133 2.0044772 0.06630646 0.3372992
## 10598 1449939_s_at rs13481642 -0.4164743 0.68385715 0.9044643
## 10599 1422547_at rs4165065 -1.6637788 0.12006199 0.4543101
## 10600 1451476_at rs4165069 -0.1824077 0.85807655 0.9624377
## 10601 1453995_a_at rs4165065 -1.4418659 0.17299524 0.5369037
## liver.beta liver.beta_se ensembl_id
## 10596 -0.16160714 0.09382358 ENSMUSG00000099041
## 10597 0.10340000 0.05158452 ENSMUSG00000099083
## 10598 -0.01975000 0.04742189 ENSMUSG00000099116
## 10599 -0.10844444 0.06517960 ENSMUSG00000099164
## 10600 -0.01105556 0.06060904 ENSMUSG00000099262
## 10601 -0.05450000 0.03779824 ENSMUSG00000099305
dim(lung.mouse.eQTL.min)
## [1] 10601 8
dim(liver.mouse.eQTL.min)
## [1] 10601 8
# lung, liver eqtl with ensemble_id
merged.mouse.eQTL.min<-merge(lung.mouse.eQTL.min, liver.mouse.eQTL.min, by.x = "ensembl_id", by.y="ensembl_id")
head(merged.mouse.eQTL.min)
## ensembl_id gene.x snps.x statistic.x lung_pvalue
## 1 ENSMUSG00000000001 1428645_at rs6259798 -1.4324741 0.159238340
## 2 ENSMUSG00000000049 1416677_at rs3707743 -0.1308388 0.896513064
## 3 ENSMUSG00000000056 1451678_at rs13481264 -0.3904599 0.698124238
## 4 ENSMUSG00000000058 1425955_at rs13478643 -1.0185410 0.314117648
## 5 ENSMUSG00000000085 1426241_a_at rs4224744 0.9192838 0.363075471
## 6 ENSMUSG00000000088 1439267_x_at rs4136521 3.4048233 0.001444641
## FDR.x lung.beta lung.beta_se gene.y snps.y statistic.y
## 1 0.39180403 -0.05980000 0.04174596 1428645_at rs13477320 -1.3801032
## 2 0.95927760 -0.02366071 0.18083869 1416677_at rs3670642 3.2114557
## 3 0.86317736 -0.01356126 0.03473152 1425344_at rs13481264 -1.5163463
## 4 0.58046119 -0.07813675 0.07671439 1417327_at rs3655269 1.1762017
## 5 0.62709198 0.05284706 0.05748721 1426241_a_at rs3675629 -0.7745039
## 6 0.01001896 0.18997619 0.05579620 1448153_at rs3708863 4.6928846
## liver_pvalue FDR.y liver.beta liver.beta_se
## 1 0.190827641 0.56008906 -0.07366667 0.05337765
## 2 0.006815612 0.08201375 0.07875000 0.02452159
## 3 0.153365833 0.51030633 -0.24430000 0.16111096
## 4 0.260604313 0.63908151 0.11090000 0.09428655
## 5 0.452489862 0.78891766 -0.01910000 0.02466095
## 6 0.000420588 0.01068581 0.23727778 0.05056118
dim(merged.mouse.eQTL.min)
## [1] 10601 15
merged.mouse.eQTL.min<-data.frame(merged.mouse.eQTL.min)
merged.mouse.eQTL.min<-merged.mouse.eQTL.min[, c(1, 5, 7, 8, 12, 14, 15 )]
head(merged.mouse.eQTL.min)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.159238340 -0.05980000 0.04174596 0.190827641
## 2 ENSMUSG00000000049 0.896513064 -0.02366071 0.18083869 0.006815612
## 3 ENSMUSG00000000056 0.698124238 -0.01356126 0.03473152 0.153365833
## 4 ENSMUSG00000000058 0.314117648 -0.07813675 0.07671439 0.260604313
## 5 ENSMUSG00000000085 0.363075471 0.05284706 0.05748721 0.452489862
## 6 ENSMUSG00000000088 0.001444641 0.18997619 0.05579620 0.000420588
## liver.beta liver.beta_se
## 1 -0.07366667 0.05337765
## 2 0.07875000 0.02452159
## 3 -0.24430000 0.16111096
## 4 0.11090000 0.09428655
## 5 -0.01910000 0.02466095
## 6 0.23727778 0.05056118
write.table(merged.mouse.eQTL.min,file="2016-05-09 mouse.liver.expression.min.txt", sep="\t", row.names=FALSE, quote=FALSE)
merged.mouse.eQTL.min.variance2<-read.table(file="2016-05-09 mouse.liver.expression.min.txt", header=T)
head(merged.mouse.eQTL.min.variance2)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.159238340 -0.05980000 0.04174596 0.190827641
## 2 ENSMUSG00000000049 0.896513064 -0.02366071 0.18083869 0.006815612
## 3 ENSMUSG00000000056 0.698124238 -0.01356126 0.03473152 0.153365833
## 4 ENSMUSG00000000058 0.314117648 -0.07813675 0.07671439 0.260604313
## 5 ENSMUSG00000000085 0.363075471 0.05284706 0.05748721 0.452489862
## 6 ENSMUSG00000000088 0.001444641 0.18997619 0.05579620 0.000420588
## liver.beta liver.beta_se
## 1 -0.07366667 0.05337765
## 2 0.07875000 0.02452159
## 3 -0.24430000 0.16111096
## 4 0.11090000 0.09428655
## 5 -0.01910000 0.02466095
## 6 0.23727778 0.05056118
# caculate the absolute value of live/lung beta
merged.mouse.eQTL.min.variance2$abs_liver.beta<-abs(merged.mouse.eQTL.min.variance2$liver.beta)
merged.mouse.eQTL.min.variance2$abs_lung.beta<-abs(merged.mouse.eQTL.min.variance2$lung.beta)
# caculate negative log lung p value
merged.mouse.eQTL.min.variance2$neg_log_lung_pvalue<--log10(merged.mouse.eQTL.min.variance2$lung_pvalue)
# Simple linear regression between abs_liver.beta and abs_lung.beta
# fit1<-summary(lm(abs_liver.beta ~ abs_lung.beta, data=merged.mouse.eQTL.min.variance2))
# fit1
# tau<-fit1$sigma**2
# check association between abs_liver.beta and abs.lung.beta
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
ggplot(merged.mouse.eQTL.min.variance2, aes(x=abs_lung.beta, y=abs_liver.beta)) +geom_point()+geom_smooth(method=lm)

cor(merged.mouse.eQTL.min.variance2$abs_lung.beta, merged.mouse.eQTL.min.variance2$abs_liver.beta)
## [1] 0.3408502
merged.mouse.eQTL<-merged.mouse.eQTL.min.variance2
# retrieve ensembl_id
markers<-merged.mouse.eQTL[, 1]
# Yg=Ag + Bg*Xsnp+V
# retrieve betas.hat (liver.beta)
betas.hat<-merged.mouse.eQTL$abs_liver.beta
# retrieve liver.beta_se
se<-merged.mouse.eQTL$liver.beta_se
# creat Z matrix with 2 columns: 1 for intercept,abs_lung.beta (merged.mouse.eQTL[,10])
Z<-as.matrix(merged.mouse.eQTL$abs_lung.beta)
Z<-replace(Z,is.na(Z),0)
Z<-data.frame(1,Z)
Z<-as.matrix(Z)
rowLength<-length(markers)
# liver.betas=Z*gama+T^2
# Regression: abs_liver.beta = intercept + beta*abs_lung.beta + error
lmsummary<-summary(lm(abs_liver.beta~-1+Z, data=merged.mouse.eQTL))
lmsummary
##
## Call:
## lm(formula = abs_liver.beta ~ -1 + Z, data = merged.mouse.eQTL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1122 -0.0765 -0.0414 0.0207 4.4632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## ZX1 0.081681 0.002218 36.83 <2e-16 ***
## ZZ 0.317374 0.008503 37.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1956 on 10599 degrees of freedom
## Multiple R-squared: 0.3488, Adjusted R-squared: 0.3487
## F-statistic: 2839 on 2 and 10599 DF, p-value: < 2.2e-16
# error ~ N(0, Tau)
tau<-lmsummary$sigma**2
tau
## [1] 0.0382713
# output coeffieients (gamma matrix)
# gamma matrix
gamma<-as.matrix(lmsummary$coefficients[,1])
# trasnpose Z matrix
Z_transpose<-t(Z)
# create identity matrix
identity<-diag(nrow=rowLength)
# original betas.hat
betas.hat<-as.matrix(betas.hat)
#creat V matrix for liver_residual_variance
V <- matrix(0, rowLength, rowLength)
# V, liver residual variance
diag(V) <- merged.mouse.eQTL$liver.beta_se^2
# Creat Tau matrix
Tau<- diag(tau, rowLength, rowLength)
# follow Chen's paper and cacualte s
s <-V + Tau
# create inverse function for inversing diagnoal matrix
diag.inverse <- function(x){diag(1/diag(x), nrow(x), ncol(x))}
# create multiplication function for multiplicating two diagnoal matrix
diag.multi <- function(x,y){diag(diag(x)*diag(y), nrow(x), ncol(x))}
# inverse s
S <-diag.inverse(s)
# follow chen's paper to caculate omega
omega<-diag.multi(S, V)
# retrieve omega value from the matrix
omega.diag<-diag(omega )
# summary the omega value
summary(omega.diag)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005392 0.041920 0.076700 0.127100 0.153300 0.974400
# betas.thea<- S %*% Z %*% gamma + (identity-S) %*% betas.hat
# caculate betas.tieda with the formula in Chen's paper
betas.tieda<- omega %*% Z %*% gamma + (identity-omega) %*% betas.hat
# crbetas.tieda<- cromega %*% Z %*% gamma + (identity-cromega) %*% betas.hat
head(betas.tieda)
## [,1]
## [1,] 0.07553701
## [2,] 0.07891150
## [3,] 0.18031964
## [4,] 0.11006680
## [5,] 0.02034127
## [6,] 0.23131037
head(betas.hat)
## [,1]
## [1,] 0.07366667
## [2,] 0.07875000
## [3,] 0.24430000
## [4,] 0.11090000
## [5,] 0.01910000
## [6,] 0.23727778
#regression beta
regbeta <-Z %*% gamma
head(regbeta)
## [,1]
## [1,] 0.10066029
## [2,] 0.08919064
## [3,] 0.08598534
## [4,] 0.10647989
## [5,] 0.09845361
## [6,] 0.14197477
summary(regbeta)
## V1
## Min. :0.08168
## 1st Qu.:0.09287
## Median :0.10364
## Mean :0.12437
## 3rd Qu.:0.12665
## Max. :1.24358
markers1<-as.character(markers)
# combine ensemble_id, betas.hat and betas.tieda
outputVector<-c(markers1,betas.hat,betas.tieda)
write.table(matrix(outputVector,rowLength),file="2016-04-26_hm_tau_hmresults.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_hm_tau_hmresults.txt")
colnames(liver.mouse.eQTL.bayesian)<-c( "ensembl_id", "betas.hat","betas.tieda")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda
## 1 ENSMUSG00000000001 0.07366667 0.07553701
## 2 ENSMUSG00000000049 0.07875000 0.07891150
## 3 ENSMUSG00000000056 0.24430000 0.18031964
## 4 ENSMUSG00000000058 0.11090000 0.11006680
## 5 ENSMUSG00000000085 0.01910000 0.02034127
## 6 ENSMUSG00000000088 0.23727778 0.23131037
# merge dataset with betas.hat and betas.tieda
liver.mouse.eQTL.bayesian.all<- merge(liver.mouse.eQTL.bayesian, merged.mouse.eQTL.min.variance2, by = "ensembl_id")
head(liver.mouse.eQTL.bayesian.all)
## ensembl_id betas.hat betas.tieda lung_pvalue lung.beta
## 1 ENSMUSG00000000001 0.07366667 0.07553701 0.159238340 -0.05980000
## 2 ENSMUSG00000000049 0.07875000 0.07891150 0.896513064 -0.02366071
## 3 ENSMUSG00000000056 0.24430000 0.18031964 0.698124238 -0.01356126
## 4 ENSMUSG00000000058 0.11090000 0.11006680 0.314117648 -0.07813675
## 5 ENSMUSG00000000085 0.01910000 0.02034127 0.363075471 0.05284706
## 6 ENSMUSG00000000088 0.23727778 0.23131037 0.001444641 0.18997619
## lung.beta_se liver_pvalue liver.beta liver.beta_se abs_liver.beta
## 1 0.04174596 0.190827641 -0.07366667 0.05337765 0.07366667
## 2 0.18083869 0.006815612 0.07875000 0.02452159 0.07875000
## 3 0.03473152 0.153365833 -0.24430000 0.16111096 0.24430000
## 4 0.07671439 0.260604313 0.11090000 0.09428655 0.11090000
## 5 0.05748721 0.452489862 -0.01910000 0.02466095 0.01910000
## 6 0.05579620 0.000420588 0.23727778 0.05056118 0.23727778
## abs_lung.beta neg_log_lung_pvalue
## 1 0.05980000 0.79795236
## 2 0.02366071 0.04744338
## 3 0.01356126 0.15606728
## 4 0.07813675 0.50290766
## 5 0.05284706 0.44000309
## 6 0.18997619 2.84024014
write.table(liver.mouse.eQTL.bayesian.all,file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
liver.mouse.eQTL.bayesian<-read.table(file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda lung_pvalue lung.beta
## 1 ENSMUSG00000000001 0.07366667 0.07553701 0.159238340 -0.05980000
## 2 ENSMUSG00000000049 0.07875000 0.07891150 0.896513064 -0.02366071
## 3 ENSMUSG00000000056 0.24430000 0.18031964 0.698124238 -0.01356126
## 4 ENSMUSG00000000058 0.11090000 0.11006680 0.314117648 -0.07813675
## 5 ENSMUSG00000000085 0.01910000 0.02034127 0.363075471 0.05284706
## 6 ENSMUSG00000000088 0.23727778 0.23131037 0.001444641 0.18997619
## lung.beta_se liver_pvalue liver.beta liver.beta_se abs_liver.beta
## 1 0.04174596 0.190827641 -0.07366667 0.05337765 0.07366667
## 2 0.18083869 0.006815612 0.07875000 0.02452159 0.07875000
## 3 0.03473152 0.153365833 -0.24430000 0.16111096 0.24430000
## 4 0.07671439 0.260604313 0.11090000 0.09428655 0.11090000
## 5 0.05748721 0.452489862 -0.01910000 0.02466095 0.01910000
## 6 0.05579620 0.000420588 0.23727778 0.05056118 0.23727778
## abs_lung.beta neg_log_lung_pvalue
## 1 0.05980000 0.79795236
## 2 0.02366071 0.04744338
## 3 0.01356126 0.15606728
## 4 0.07813675 0.50290766
## 5 0.05284706 0.44000309
## 6 0.18997619 2.84024014
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat",
"liver.beta_se", "betas.tieda",
"liver_pvalue", "abs_lung.beta",
"abs_lung.beta", "neg_log_lung_pvalue"))
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat liver.beta_se betas.tieda liver_pvalue
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07553701 0.190827641
## 2 ENSMUSG00000000049 0.07875000 0.02452159 0.07891150 0.006815612
## 3 ENSMUSG00000000056 0.24430000 0.16111096 0.18031964 0.153365833
## 4 ENSMUSG00000000058 0.11090000 0.09428655 0.11006680 0.260604313
## 5 ENSMUSG00000000085 0.01910000 0.02466095 0.02034127 0.452489862
## 6 ENSMUSG00000000088 0.23727778 0.05056118 0.23131037 0.000420588
## abs_lung.beta abs_lung.beta.1 neg_log_lung_pvalue
## 1 0.05980000 0.05980000 0.79795236
## 2 0.02366071 0.02366071 0.04744338
## 3 0.01356126 0.01356126 0.15606728
## 4 0.07813675 0.07813675 0.50290766
## 5 0.05284706 0.05284706 0.44000309
## 6 0.18997619 0.18997619 2.84024014
# Caculate variance for beta.tieda by following Brian Kulis' lecture notes
# Invert Tau and V
Tau_invert<-diag.inverse(Tau)
V_invert<-diag.inverse(V)
PS_invert<-Tau_invert + V_invert
# PS_invert<-Tau_invert+V_invert%*% Z %*% Z_transpose # previous wrong code
# S in Brian Kulis' lecture note:PS
PS <- diag.inverse(PS_invert)
# retrieve posterior variance
ps<-diag(PS)
range(ps)
## [1] 0.0002063442 0.0372924672
library(reshape)
##
## Attaching package: 'reshape'
## The following objects are masked from 'package:plyr':
##
## rename, round_any
## The following object is masked from 'package:data.table':
##
## melt
# reshape posterior variance to long format
ps.long <- melt(ps)
head(ps.long)
## value
## 1 0.0026517589
## 2 0.0005920069
## 3 0.0154667362
## 4 0.0072141860
## 5 0.0005986493
## 6 0.0023963615
# Caculate sd: square root on variance
ps.long$betas.tieda.se<-(ps.long$value)^0.5
# combine sd to the data.frame
liver.mouse.eQTL.bayesian<-cbind(liver.mouse.eQTL.bayesian,ps.long$betas.tieda.se)
# head(liver.mouse.eQTL.bayesian)
# rename betas.tieda.se
liver.mouse.eQTL.bayesian<-rename(liver.mouse.eQTL.bayesian, c("ps.long$betas.tieda.se"="betas.tieda.se", "liver.beta_se"="betas.hat.se"))
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", "betas.hat.se",
"betas.tieda", "betas.tieda.se",
"liver_pvalue", "abs_lung.beta", "neg_log_lung_pvalue"))
# head(liver.mouse.eQTL.bayesian)
# library(tigerstats)
# pnormGC(0, region="below", mean=0.002352829,sd=0.09972950)
# caculate probability of betas.tieda below 0 based on betas.tieda and standard deviation
liver.mouse.eQTL.bayesian$p.below.0 <- pnorm(0,liver.mouse.eQTL.bayesian$betas.tieda, liver.mouse.eQTL.bayesian$betas.tieda.se)
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07553701 0.05149523
## 2 ENSMUSG00000000049 0.07875000 0.02452159 0.07891150 0.02433119
## 3 ENSMUSG00000000056 0.24430000 0.16111096 0.18031964 0.12436533
## 4 ENSMUSG00000000058 0.11090000 0.09428655 0.11006680 0.08493636
## 5 ENSMUSG00000000085 0.01910000 0.02466095 0.02034127 0.02446731
## 6 ENSMUSG00000000088 0.23727778 0.05056118 0.23131037 0.04895265
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0
## 1 0.190827641 0.05980000 0.79795236 7.120518e-02
## 2 0.006815612 0.02366071 0.04744338 5.909266e-04
## 3 0.153365833 0.01356126 0.15606728 7.354057e-02
## 4 0.260604313 0.07813675 0.50290766 9.750950e-02
## 5 0.452489862 0.05284706 0.44000309 2.028837e-01
## 6 0.000420588 0.18997619 2.84024014 1.149522e-06
dim(liver.mouse.eQTL.bayesian)
## [1] 10601 9
summary(liver.mouse.eQTL.bayesian$betas.tieda.se)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01436 0.04006 0.05418 0.06249 0.07659 0.19310
range(liver.mouse.eQTL.bayesian$p.below.0)
## [1] 0.0000000 0.4803112
write.table(liver.mouse.eQTL.bayesian,file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
liver.mouse.eQTL.bayesian <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
# head(liver.mouse.eQTL.bayesian)
# summary(liver.mouse.eQTL.bayesian$liver_residual_variance)
liver.mouse.eQTL.bayesian.4tau <- liver.mouse.eQTL.bayesian
# colnames(liver.mouse.eQTL.bayesian.4tau) <- c("ensembl_id", "betas.hat", "betas.hat.se", "betas.tieda", "betas.tieda.se", "liver_residual_variance", "liver_pvalue", "abs_lung.beta",
# "neg_log_lung_pvalue", "p.below.0", "betas.tieda2m", "betas.tieda3rd", "betas.tieda4max")
# head(liver.mouse.eQTL.bayesian.4tau)
# Introduce weight (Tmm) to adjust Tau with neg_log_lung_pvalue
liver.mouse.eQTL.bayesian.4tau$fzm <- liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue
# caculate ratio_fzm
liver.mouse.eQTL.bayesian.4tau$ratio_fzm <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/liver.mouse.eQTL.bayesian.4tau$fzm
range(liver.mouse.eQTL.bayesian.4tau$ratio_fzm)
## [1] 1 Inf
# set up threshold for ratio_fzm and caculate updated ratio_fzm (nratio_fzm)
threshold <- 0.05
liver.mouse.eQTL.bayesian.4tau$nratio_fzm <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$ratio_fzm > max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))] <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))
# liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$nratio_fzm >= max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log(threshold))] <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
summary(liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 25.04 35.84 29.41 35.84 35.84
# compare bayesian prediction to the traditional method
# evaluate the predition with alle specific expreesion in the liver: Sandrine Lagarrigue's paper
liver.ASE <- read.csv(file= "ASE.genetics.113.153882-6.csv")
dim(liver.ASE)
## [1] 1191 19
head(liver.ASE)
## replicate chr startExon geneID SNPperExon3
## 1 M.CH. DxB and BxD 1 9535488 Rrs1 15
## 2 M.HF DxB and BxD 1 9535488 Rrs1 14
## 3 F.HF DxB and BxD 1 9535488 Rrs1 15
## 4 M.CH. DxB and BxD 1 37473929 6330578E17Rik 2
## 5 M.CH. DxB and BxD 1 58169979 Aox3 3
## 6 M.HF DxB and BxD 1 58169979 Aox3 3
## sumBperExon.DxB4 sumDperExon.DxB4 sumBperExon.BxD4 sumDperExon.BxD4
## 1 45 19 50 25
## 2 74 39 66 30
## 3 76 20 77 40
## 4 78 32 47 17
## 5 473 82 225 27
## 6 252 56 263 53
## FCadd1.DxB5 FCadd1.BxD5 BonBD.DxB6 BonBD.BxD6 pvalBH.DxB7 pvalBH.BxD7
## 1 2.30 1.96 0.70 0.67 1.0e-02 3.7e-02
## 2 1.88 2.16 0.65 0.69 1.1e-02 3.3e-03
## 3 3.67 1.90 0.79 0.66 4.9e-07 5.1e-03
## 4 2.39 2.67 0.71 0.73 1.9e-04 2.2e-03
## 5 5.71 8.07 0.85 0.89 3.6e-64 5.4e-37
## 6 4.44 4.89 0.82 0.83 8.7e-28 1.1e-31
## UTR5 UTR3 strand exonCount
## 1 0 0 + 1
## 2 0 0 + 1
## 3 0 0 + 1
## 4 0 0 - 3
## 5 0 0 + 35
## 6 0 0 + 35
# 440 unique gene ID
length(unique(liver.ASE$geneID))
## [1] 440
# verify ASE table
liver.ASE1 <- liver.ASE[which(liver.ASE$replicate == "M.CH. DxB and BxD"), ]
liver.ASE2 <- liver.ASE[which(liver.ASE$replicate == "M.HF DxB and BxD"), ]
liver.ASE3 <- liver.ASE[which(liver.ASE$replicate == "F.HF DxB and BxD"), ]
length(unique(liver.ASE1$geneID))
## [1] 272
length(unique(liver.ASE2$geneID))
## [1] 275
length(unique(liver.ASE3$geneID))
## [1] 304
(length(unique(liver.ASE1$geneID))+length(unique(liver.ASE2$geneID))+length(unique(liver.ASE3$geneID)))/3
## [1] 283.6667
# As claimed in the paper: averaged 284 ASE for each replicate
sub.liver.ASE <-liver.ASE1
summary(sub.liver.ASE$pvalBH.DxB7)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0000000 0.0000058 0.0084070 0.0031000 0.1000000
sub.liver.ASE1 <- subset(sub.liver.ASE, pvalBH.DxB7 < 0.00000000000001)
sub.liver.ASE2 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.00000000000001 & pvalBH.DxB7 < 0.0000058)
sub.liver.ASE3 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0000058 & pvalBH.DxB7 < 0.0031000)
sub.liver.ASE4 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0031000)
dim(sub.liver.ASE1)
## [1] 89 19
dim(sub.liver.ASE2)
## [1] 97 19
dim(sub.liver.ASE3)
## [1] 93 19
dim(sub.liver.ASE4)
## [1] 94 19
# Subset liver ASE with different conditions
# sub.liver.ASE <-liver.ASE[which(liver.ASE$pvalBH.DxB7 < 0.05 & liver.ASE$pvalBH.BxD7 < 0.05), ]
# sub.liver.ASE <- sub.liver.ASE[order(sub.liver.ASE$pvalBH.BxD7), ]
# summary(sub.liver.ASE$pvalBH.BxD7)
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.DxB7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.BxD7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[ sub.liver.ASE$geneID %in% names(table(sub.liver.ASE$geneID))[table(sub.liver.ASE$geneID) >1] , ]
# check the remain gene number after subsetting
dim(sub.liver.ASE)
## [1] 373 19
liver.ASE.symbol <- unique(sub.liver.ASE$geneID)
liver.ASE.symbol1 <- unique(sub.liver.ASE1$geneID)
liver.ASE.symbol2 <- unique(sub.liver.ASE2$geneID)
liver.ASE.symbol3 <- unique(sub.liver.ASE3$geneID)
liver.ASE.symbol4 <- unique(sub.liver.ASE4$geneID)
length(liver.ASE.symbol)
## [1] 272
# Annoate gene symbol wiht ensemble.ID
library(biomaRt)
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
liver.ASE.ensembl <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol, mart=mouse)
liver.ASE.ensembl1 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol1, mart=mouse)
liver.ASE.ensembl2 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol2, mart=mouse)
liver.ASE.ensembl3 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol3, mart=mouse)
liver.ASE.ensembl4 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol4, mart=mouse)
dim(liver.ASE.ensembl)
## [1] 241 2
liver.ASE.ensembl <- unique(liver.ASE.ensembl)
# delete liver ASE ensemble ID which are not in the liver.mouse.eQTL.bayesian data frame
liver.ASE.ensembl <- liver.ASE.ensembl[liver.ASE.ensembl$ensembl_gene_id %in% liver.mouse.eQTL.bayesian.4tau$ensembl_id, ]
dim(liver.ASE.ensembl)
## [1] 195 2
# create indicator for ASE true or not
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl1$ensembl_gene_id] <- 1
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl2$ensembl_gene_id] <- 2
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl3$ensembl_gene_id] <- 3
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl4$ensembl_gene_id] <- 4
# liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 5
liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 1
liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 0
summary(liver.mouse.eQTL.bayesian.4tau$eqtl)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.01839 0.00000 1.00000
liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue <- -log10(liver.mouse.eQTL.bayesian.4tau$liver_pvalue)
head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07553701 0.05149523
## 2 ENSMUSG00000000049 0.07875000 0.02452159 0.07891150 0.02433119
## 3 ENSMUSG00000000056 0.24430000 0.16111096 0.18031964 0.12436533
## 4 ENSMUSG00000000058 0.11090000 0.09428655 0.11006680 0.08493636
## 5 ENSMUSG00000000085 0.01910000 0.02466095 0.02034127 0.02446731
## 6 ENSMUSG00000000088 0.23727778 0.05056118 0.23131037 0.04895265
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0 fzm
## 1 0.190827641 0.05980000 0.79795236 7.120518e-02 0.79795236
## 2 0.006815612 0.02366071 0.04744338 5.909266e-04 0.04744338
## 3 0.153365833 0.01356126 0.15606728 7.354057e-02 0.15606728
## 4 0.260604313 0.07813675 0.50290766 9.750950e-02 0.50290766
## 5 0.452489862 0.05284706 0.44000309 2.028837e-01 0.44000309
## 6 0.000420588 0.18997619 2.84024014 1.149522e-06 2.84024014
## ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 58.43301 35.83834 0 0.7193587
## 2 982.78748 35.83834 0 2.1664952
## 3 298.76061 35.83834 0 0.8142714
## 4 92.71435 35.83834 0 0.5840184
## 5 105.96916 35.83834 0 0.3443911
## 6 16.41648 16.41648 0 3.3761431
by(liver.mouse.eQTL.bayesian.4tau[, c(1, 7, 9, 14)], liver.mouse.eQTL.bayesian.4tau[, "eqtl"], summary)
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 0
## ensembl_id abs_lung.beta p.below.0
## ENSMUSG00000000001: 1 Min. :0.00000 Min. :0.00000
## ENSMUSG00000000049: 1 1st Qu.:0.03487 1st Qu.:0.02077
## ENSMUSG00000000056: 1 Median :0.06815 Median :0.10012
## ENSMUSG00000000058: 1 Mean :0.13143 Mean :0.13523
## ENSMUSG00000000085: 1 3rd Qu.:0.13975 3rd Qu.:0.22312
## ENSMUSG00000000088: 1 Max. :3.66099 Max. :0.48031
## (Other) :10400
## neg_log_liver_pvalue
## Min. : 0.0000
## 1st Qu.: 0.2606
## Median : 0.5847
## Mean : 0.9418
## 3rd Qu.: 1.1517
## Max. :15.6916
##
## --------------------------------------------------------
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 1
## ensembl_id abs_lung.beta p.below.0
## ENSMUSG00000000275: 1 Min. :0.001944 Min. :0.0000000
## ENSMUSG00000000673: 1 1st Qu.:0.076576 1st Qu.:0.0000000
## ENSMUSG00000001467: 1 Median :0.164283 Median :0.0008869
## ENSMUSG00000001473: 1 Mean :0.299202 Mean :0.0470796
## ENSMUSG00000001604: 1 3rd Qu.:0.360720 3rd Qu.:0.0524872
## ENSMUSG00000002395: 1 Max. :3.659845 Max. :0.3556079
## (Other) :189
## neg_log_liver_pvalue
## Min. : 0.04328
## 1st Qu.: 0.74359
## Median : 2.23718
## Mean : 3.06327
## 3rd Qu.: 4.71465
## Max. :11.90806
##
library(ggplot2)
boxplot(neg_log_liver_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="liver.mouse.eQTL",
xlab="ASE cutoff by p value", ylab="liver neg log p")

boxplot(neg_log_lung_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="lung.mouse.eQTL",
xlab="ASE cutoff by p value", ylab="lung neg log p")

liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]
plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl), xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue" )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue, , col="red", xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$betas.hat, liver.mouse.eQTL.bayesian.4tau$betas.tieda, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$betas.hat, liver.mouse.eQTL.bayesian.4tau.ase$betas.tieda, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

cor(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue)
## [1] 0.39218
length(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue)
## [1] 195
plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$p.below.0, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$p.below.0, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07553701 0.05149523
## 2 ENSMUSG00000000049 0.07875000 0.02452159 0.07891150 0.02433119
## 3 ENSMUSG00000000056 0.24430000 0.16111096 0.18031964 0.12436533
## 4 ENSMUSG00000000058 0.11090000 0.09428655 0.11006680 0.08493636
## 5 ENSMUSG00000000085 0.01910000 0.02466095 0.02034127 0.02446731
## 6 ENSMUSG00000000088 0.23727778 0.05056118 0.23131037 0.04895265
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0 fzm
## 1 0.190827641 0.05980000 0.79795236 7.120518e-02 0.79795236
## 2 0.006815612 0.02366071 0.04744338 5.909266e-04 0.04744338
## 3 0.153365833 0.01356126 0.15606728 7.354057e-02 0.15606728
## 4 0.260604313 0.07813675 0.50290766 9.750950e-02 0.50290766
## 5 0.452489862 0.05284706 0.44000309 2.028837e-01 0.44000309
## 6 0.000420588 0.18997619 2.84024014 1.149522e-06 2.84024014
## ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 58.43301 35.83834 0 0.7193587
## 2 982.78748 35.83834 0 2.1664952
## 3 298.76061 35.83834 0 0.8142714
## 4 92.71435 35.83834 0 0.5840184
## 5 105.96916 35.83834 0 0.3443911
## 6 16.41648 16.41648 0 3.3761431
# Optimizing rho and adjust the weight
library(reshape)
rho.optimization <- matrix(0, nrow=nrow(liver.mouse.eQTL.bayesian.4tau), ncol=8)
colnames(rho.optimization)<-c("class", "rho","tmm","tau", "omega","beta_tieda", "n.betas.tieda.se","p.below.0")
nomega.diag<-diag(omega )
rhoclass <- seq(1,11, by=2)
rho <- rhoclass*tau
result <- NULL
for (i in 1:length(rho)) {
rho.optimization[ ,1] <- rhoclass[i]
rho.optimization[ ,2] <- rho[i]
rho.optimization[ ,3] <- (rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm
rho.optimization[ ,4] <-tau*((rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
nTau<- diag(rho.optimization[ ,3], rowLength, rowLength)
ns<-V + nTau
nS <- diag.inverse(ns)
nomega<-diag.multi(nS, V)
# nomega <- diag(0, rowLength, rowLength) # set nomega to 0 for code checking
# nomega <- diag(1, rowLength, rowLength) # set nomega to 1 for code checking
rho.optimization[ ,5] <- diag(nomega )
rho.optimization[ ,6] <- nomega %*% Z %*% gamma + (identity-nomega) %*% betas.hat
nTau_invert<-diag.inverse(nTau)
V_invert<-diag.inverse(V)
nPS_invert<-nTau_invert+ V_invert
# nPS_invert<-nTau_invert+ diag.multi(diag.multi(V_invert, Z_transpose), Z) # previous wrong code
nPS<-diag.inverse(nPS_invert)
nps<-diag(nPS)
nps.long <- melt(nps)
rho.optimization[ ,7] <-(nps.long$value)^0.5
rho.optimization[ ,8] <- pnorm(0, rho.optimization[ ,6], rho.optimization[ ,7])
result <- rbind(result,rho.optimization)
}
dim(result)
## [1] 63606 8
head(result)
## class rho tmm tau omega beta_tieda
## [1,] 1 0.0382713 1 0.0382713 0.0028410791 0.07374336
## [2,] 1 0.0382713 1 0.0382713 0.0006009470 0.07875627
## [3,] 1 0.0382713 1 0.0382713 0.0253000343 0.24029463
## [4,] 1 0.0382713 1 0.0382713 0.0088116182 0.11086105
## [5,] 1 0.0382713 1 0.0382713 0.0006077927 0.01914823
## [6,] 1 0.0382713 1 0.0382713 0.0025499141 0.23703476
## n.betas.tieda.se p.below.0
## [1,] 0.05330177 8.325479e-02
## [2,] 0.02451422 6.575204e-04
## [3,] 0.15905985 6.543011e-02
## [4,] 0.09387022 1.188007e-01
## [5,] 0.02465345 2.186692e-01
## [6,] 0.05049667 1.339133e-06
write.table(result, file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt",col.names=TRUE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian.result <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt", header=T)
result.df <-liver.mouse.eQTL.bayesian.result
result.df$class <- factor(result.df$class)
# combine liver.mouse.eqtl.bayesian and rho.optimization.result for ploting
a <-liver.mouse.eQTL.bayesian.4tau[, c(1:2, 6, 7)]
a <-rbind(a, a, a, a, a, a)
dim(a)
## [1] 63606 4
new.result.df<-cbind(a, result.df)
head(new.result.df)
## ensembl_id betas.hat liver_pvalue abs_lung.beta class rho
## 1 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 1 0.0382713
## 2 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 1 0.0382713
## 3 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 1 0.0382713
## 4 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 1 0.0382713
## 5 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 1 0.0382713
## 6 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 1 0.0382713
## tmm tau omega beta_tieda n.betas.tieda.se p.below.0
## 1 1 0.0382713 0.0028410791 0.07374336 0.05330177 8.325479e-02
## 2 1 0.0382713 0.0006009470 0.07875627 0.02451422 6.575204e-04
## 3 1 0.0382713 0.0253000343 0.24029463 0.15905985 6.543011e-02
## 4 1 0.0382713 0.0088116182 0.11086105 0.09387022 1.188007e-01
## 5 1 0.0382713 0.0006077927 0.01914823 0.02465345 2.186692e-01
## 6 1 0.0382713 0.0025499141 0.23703476 0.05049667 1.339133e-06
new.result.df2 <- new.result.df
by(new.result.df2, new.result.df2[, "class"], head)
## new.result.df2[, "class"]: 1
## ensembl_id betas.hat liver_pvalue abs_lung.beta class rho
## 1 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 1 0.0382713
## 2 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 1 0.0382713
## 3 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 1 0.0382713
## 4 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 1 0.0382713
## 5 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 1 0.0382713
## 6 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 1 0.0382713
## tmm tau omega beta_tieda n.betas.tieda.se p.below.0
## 1 1 0.0382713 0.0028410791 0.07374336 0.05330177 8.325479e-02
## 2 1 0.0382713 0.0006009470 0.07875627 0.02451422 6.575204e-04
## 3 1 0.0382713 0.0253000343 0.24029463 0.15905985 6.543011e-02
## 4 1 0.0382713 0.0088116182 0.11086105 0.09387022 1.188007e-01
## 5 1 0.0382713 0.0006077927 0.01914823 0.02465345 2.186692e-01
## 6 1 0.0382713 0.0025499141 0.23703476 0.05049667 1.339133e-06
## --------------------------------------------------------
## new.result.df2[, "class"]: 3
## ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 11000 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 3
## 21000 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 3
## 31000 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 3
## 41000 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 3
## 51000 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 3
## 61000 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 3
## rho tmm tau omega beta_tieda
## 11000 0.1148139 1.256708e+17 4.809586e+15 2.267172e-20 0.07366667
## 21000 0.1148139 1.256708e+17 4.809586e+15 4.784788e-21 0.07875000
## 31000 0.1148139 1.256708e+17 4.809586e+15 2.065455e-19 0.24430000
## 41000 0.1148139 1.256708e+17 4.809586e+15 7.073998e-20 0.11090000
## 51000 0.1148139 1.256708e+17 4.809586e+15 4.839327e-21 0.01910000
## 61000 0.1148139 6.802265e+07 2.603315e+06 3.758208e-11 0.23727778
## n.betas.tieda.se p.below.0
## 11000 0.05337765 8.377744e-02
## 21000 0.02452159 6.603217e-04
## 31000 0.16111096 6.471591e-02
## 41000 0.09428655 1.197571e-01
## 51000 0.02466095 2.193164e-01
## 61000 0.05056118 1.346897e-06
## --------------------------------------------------------
## new.result.df2[, "class"]: 5
## ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 11002 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 5
## 21002 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 5
## 31002 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 5
## 41002 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 5
## 51002 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 5
## 61002 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 5
## rho tmm tau omega beta_tieda
## 11002 0.1913565 1.121827e+25 4.293376e+23 2.539763e-28 0.07366667
## 21002 0.1913565 1.121827e+25 4.293376e+23 5.360082e-29 0.07875000
## 31002 0.1913565 1.121827e+25 4.293376e+23 2.313792e-27 0.24430000
## 41002 0.1913565 1.121827e+25 4.293376e+23 7.924533e-28 0.11090000
## 51002 0.1913565 1.121827e+25 4.293376e+23 5.421179e-29 0.01910000
## 61002 0.1913565 2.982841e+11 1.141572e+10 8.570464e-15 0.23727778
## n.betas.tieda.se p.below.0
## 11002 0.05337765 8.377744e-02
## 21002 0.02452159 6.603217e-04
## 31002 0.16111096 6.471591e-02
## 41002 0.09428655 1.197571e-01
## 51002 0.02466095 2.193164e-01
## 61002 0.05056118 1.346897e-06
## --------------------------------------------------------
## new.result.df2[, "class"]: 7
## ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 11004 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 7
## 21004 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 7
## 31004 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 7
## 41004 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 7
## 51004 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 7
## 61004 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 7
## rho tmm tau omega beta_tieda
## 11004 0.2678991 1.936031e+30 7.409440e+28 1.471657e-33 0.07366667
## 21004 0.2678991 1.936031e+30 7.409440e+28 3.105882e-34 0.07875000
## 31004 0.2678991 1.936031e+30 7.409440e+28 1.340719e-32 0.24430000
## 41004 0.2678991 1.936031e+30 7.409440e+28 4.591845e-33 0.11090000
## 51004 0.2678991 1.936031e+30 7.409440e+28 3.141284e-34 0.01910000
## 61004 0.2678991 7.473751e+13 2.860301e+12 3.420549e-17 0.23727778
## n.betas.tieda.se p.below.0
## 11004 0.05337765 8.377744e-02
## 21004 0.02452159 6.603217e-04
## 31004 0.16111096 6.471591e-02
## 41004 0.09428655 1.197571e-01
## 51004 0.02466095 2.193164e-01
## 61004 0.05056118 1.346897e-06
## --------------------------------------------------------
## new.result.df2[, "class"]: 9
## ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 11006 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 9
## 21006 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 9
## 31006 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 9
## 41006 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 9
## 51006 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 9
## 61006 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 9
## rho tmm tau omega beta_tieda
## 11006 0.3444417 1.579316e+34 6.044247e+32 1.804056e-37 0.07366667
## 21006 0.3444417 1.579316e+34 6.044247e+32 3.807398e-38 0.07875000
## 31006 0.3444417 1.579316e+34 6.044247e+32 1.643543e-36 0.24430000
## 41006 0.3444417 1.579316e+34 6.044247e+32 5.628990e-37 0.11090000
## 51006 0.3444417 1.579316e+34 6.044247e+32 3.850796e-38 0.01910000
## 61006 0.3444417 4.627081e+15 1.770844e+14 5.524937e-19 0.23727778
## n.betas.tieda.se p.below.0
## 11006 0.05337765 8.377744e-02
## 21006 0.02452159 6.603217e-04
## 31006 0.16111096 6.471591e-02
## 41006 0.09428655 1.197571e-01
## 51006 0.02466095 2.193164e-01
## 61006 0.05056118 1.346897e-06
## --------------------------------------------------------
## new.result.df2[, "class"]: 11
## ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 11008 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 11
## 21008 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 11
## 31008 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 11
## 41008 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 11
## 51008 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 11
## 61008 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 11
## rho tmm tau omega beta_tieda
## 11008 0.4209842 2.097910e+37 8.028972e+35 1.358101e-40 0.07366667
## 21008 0.4209842 2.097910e+37 8.028972e+35 2.866226e-41 0.07875000
## 31008 0.4209842 2.097910e+37 8.028972e+35 1.237267e-39 0.24430000
## 41008 0.4209842 2.097910e+37 8.028972e+35 4.237529e-40 0.11090000
## 51008 0.4209842 2.097910e+37 8.028972e+35 2.898896e-41 0.01910000
## 61008 0.4209842 1.247402e+17 4.773970e+15 2.049405e-20 0.23727778
## n.betas.tieda.se p.below.0
## 11008 0.05337765 8.377744e-02
## 21008 0.02452159 6.603217e-04
## 31008 0.16111096 6.471591e-02
## 41008 0.09428655 1.197571e-01
## 51008 0.02466095 2.193164e-01
## 61008 0.05056118 1.346897e-06
# choose different rho class for plotting
new.result.df2.rho1 <- new.result.df2[new.result.df2$class == 1, ]
head(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta class rho
## 1 ENSMUSG00000000001 0.07366667 0.190827641 0.05980000 1 0.0382713
## 2 ENSMUSG00000000049 0.07875000 0.006815612 0.02366071 1 0.0382713
## 3 ENSMUSG00000000056 0.24430000 0.153365833 0.01356126 1 0.0382713
## 4 ENSMUSG00000000058 0.11090000 0.260604313 0.07813675 1 0.0382713
## 5 ENSMUSG00000000085 0.01910000 0.452489862 0.05284706 1 0.0382713
## 6 ENSMUSG00000000088 0.23727778 0.000420588 0.18997619 1 0.0382713
## tmm tau omega beta_tieda n.betas.tieda.se p.below.0
## 1 1 0.0382713 0.0028410791 0.07374336 0.05330177 8.325479e-02
## 2 1 0.0382713 0.0006009470 0.07875627 0.02451422 6.575204e-04
## 3 1 0.0382713 0.0253000343 0.24029463 0.15905985 6.543011e-02
## 4 1 0.0382713 0.0088116182 0.11086105 0.09387022 1.188007e-01
## 5 1 0.0382713 0.0006077927 0.01914823 0.02465345 2.186692e-01
## 6 1 0.0382713 0.0025499141 0.23703476 0.05049667 1.339133e-06
tail(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 10596 ENSMUSG00000099041 0.16160714 0.10867553 0.10059091 1
## 10597 ENSMUSG00000099083 0.10340000 0.06630646 0.12891000 1
## 10598 ENSMUSG00000099116 0.01975000 0.68385715 0.03457738 1
## 10599 ENSMUSG00000099164 0.10844444 0.12006199 0.08603846 1
## 10600 ENSMUSG00000099262 0.01105556 0.85807655 0.18600427 1
## 10601 ENSMUSG00000099305 0.05450000 0.17299524 0.06246296 1
## rho tmm tau omega beta_tieda n.betas.tieda.se
## 10596 0.0382713 1 0.0382713 0.008726050 0.16118828 0.09341333
## 10597 0.0382713 1 0.0382713 0.002653901 0.10345094 0.05151603
## 10598 0.0382713 1 0.0382713 0.002243790 0.01991358 0.04736866
## 10599 0.0382713 1 0.0382713 0.004230408 0.10844674 0.06504159
## 10600 0.0382713 1 0.0382713 0.003660011 0.01153011 0.06049802
## 10601 0.0382713 1 0.0382713 0.001426669 0.05456706 0.03777127
## p.below.0
## 10596 0.04221525
## 10597 0.02231468
## 10598 0.33709818
## 10599 0.04772295
## 10600 0.42442478
## 10601 0.07427516
# rank with p.below.0: bayesian modeling
new.result.df2.rho1$rank.p.below.0 <- rank(new.result.df2.rho1$p.below.0)
# rank with liver p value: traditionsl linear regression (one step regression)
new.result.df2.rho1$rank.liver.pvalue <- rank(new.result.df2.rho1$liver_pvalue)
new.result.df2.rho1 <- new.result.df2.rho1[order(new.result.df2.rho1$rank.p.below.0), ]
head(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta class
## 2577 ENSMUSG00000022680 4.255833 2.034364e-16 2.7728167 1
## 9323 ENSMUSG00000057132 2.426200 7.421942e-14 3.0932000 1
## 2806 ENSMUSG00000023791 1.198018 4.042683e-13 0.4850804 1
## 612 ENSMUSG00000005161 1.872357 1.235782e-12 1.9872891 1
## 1891 ENSMUSG00000020803 0.465800 2.918690e-12 0.2315714 1
## 3371 ENSMUSG00000025178 1.418568 3.707316e-12 1.4921200 1
## rho tmm tau omega beta_tieda n.betas.tieda.se
## 2577 0.0382713 1 0.0382713 0.0067767713 4.2335097 0.08232115
## 9323 0.0382713 1 0.0382713 0.0055026791 2.4187008 0.07418005
## 2806 0.0382713 1 0.0382713 0.0017539320 1.1963299 0.04187997
## 612 0.0382713 1 0.0382713 0.0050848675 1.8664589 0.07130826
## 1891 0.0382713 1 0.0382713 0.0003617983 0.4656876 0.01902100
## 3371 0.0382713 1 0.0382713 0.0034731501 1.4155697 0.05893344
## p.below.0 rank.p.below.0 rank.liver.pvalue
## 2577 0.000000e+00 1 1
## 9323 1.696164e-233 2 2
## 2806 8.970068e-180 3 3
## 612 2.592473e-151 4 4
## 1891 1.125724e-132 5 5
## 3371 8.634952e-128 6 6
# caculate TPR: true positive rate
# caculate PPV: positive predictive rate
result.rho1 <- matrix(, nrow(new.result.df2.rho1), 8)
colnames(result.rho1)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR" )
for (i in 1:nrow(new.result.df2.rho1))
{
newdata1.rho1 <- subset(new.result.df2.rho1, rank.p.below.0 <= i)
overlap.newdata1.rho1 <- newdata1.rho1[newdata1.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 1] <- i
result.rho1[i, 2] <- nrow(overlap.newdata1.rho1)/nrow(newdata1.rho1)
result.rho1[i, 3] <- nrow(overlap.newdata1.rho1)/nrow(liver.ASE.ensembl)
newdata2.rho1 <- subset(new.result.df2.rho1, rank.liver.pvalue <= i)
overlap.newdata2.rho1 <- newdata2.rho1[newdata2.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 5] <- i
result.rho1[i, 6] <- nrow(overlap.newdata2.rho1)/nrow(newdata2.rho1)
result.rho1[i, 7] <- nrow(overlap.newdata2.rho1)/nrow(liver.ASE.ensembl)
}
head(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [1,] 1 0.0000000 0.000000000 NA 1 0.0000000 0.000000000
## [2,] 2 0.0000000 0.000000000 NA 2 0.0000000 0.000000000
## [3,] 3 0.0000000 0.000000000 NA 3 0.0000000 0.000000000
## [4,] 4 0.2500000 0.005128205 NA 4 0.2500000 0.005128205
## [5,] 5 0.2000000 0.005128205 NA 5 0.2000000 0.005128205
## [6,] 6 0.1666667 0.005128205 NA 6 0.1666667 0.005128205
## ori_FPR
## [1,] NA
## [2,] NA
## [3,] NA
## [4,] NA
## [5,] NA
## [6,] NA
tail(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [10596,] 10596 0.01840317 1 NA 10596 0.01840317 1
## [10597,] 10597 0.01840143 1 NA 10597 0.01840143 1
## [10598,] 10598 0.01839970 1 NA 10598 0.01839970 1
## [10599,] 10599 0.01839796 1 NA 10599 0.01839796 1
## [10600,] 10600 0.01839623 1 NA 10600 0.01839623 1
## [10601,] 10601 0.01839449 1 NA 10601 0.01839449 1
## ori_FPR
## [10596,] NA
## [10597,] NA
## [10598,] NA
## [10599,] NA
## [10600,] NA
## [10601,] NA
# ploting "True positive rate"
plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4) , xlim=c(0, 300))
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# ploting "positive predictive value"
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt", header=T)
head(MTeQTLs)
## SNP gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at 0 0 0.9260306
## 2 rs6365999 1424963_at 0 0 0.9260306
## 3 rs6376963 1424963_at 0 0 0.9306985
## 4 rs3677817 1424963_at 0 0 0.9329117
## 5 rs6269442 1424964_at 0 0 0.9438114
## 6 rs6365999 1424964_at 0 0 0.9438114
## marginalP.lung
## 1 0.9027543
## 2 0.9027543
## 3 0.9104129
## 4 0.9140456
## 5 0.9219186
## 6 0.9219186
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
MTeQTLs.min <- data.table(MTeQTLs, key=c('ensembl_id', "marginalP.Liver"))
MTeQTLs.min <-MTeQTLs.min[J(unique(ensembl_id)),mult="first"]
merged.eQTL <- merge(new.result.df2.rho1, MTeQTLs.min, by ="ensembl_id")
merged.eQTL$rank.marginalP.Liver <- rank(merged.eQTL$marginalP.Liver)
merged.result <- matrix(, nrow(merged.eQTL), 12)
colnames(merged.result)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR", "MTrank","MTppv","MT_TPR","MT_FPR" )
for (i in 1:nrow(merged.eQTL))
{
newdata1 <- subset(merged.eQTL, rank.p.below.0 <= i)
overlap.newdata1 <- newdata1[newdata1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 1] <- i
merged.result[i, 2] <- nrow(overlap.newdata1)/nrow(newdata1)
merged.result[i, 3] <- nrow(overlap.newdata1)/nrow(liver.ASE.ensembl)
merged.result[i, 4] <- (nrow(newdata1)-nrow(overlap.newdata1)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
newdata2 <- subset(merged.eQTL, rank.liver.pvalue <= i)
overlap.newdata2 <- newdata2[newdata2$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 5] <- i
merged.result[i, 6] <- nrow(overlap.newdata2)/nrow(newdata2)
merged.result[i, 7] <- nrow(overlap.newdata2)/nrow(liver.ASE.ensembl)
merged.result[i, 8] <- (nrow(newdata2)-nrow(overlap.newdata2)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
newdata3 <- subset(merged.eQTL, rank.marginalP.Liver <= i)
overlap.newdata3 <- newdata3[newdata3$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 9] <- i
merged.result[i, 10] <- nrow(overlap.newdata3)/nrow(newdata3)
merged.result[i, 11] <- nrow(overlap.newdata3)/nrow(liver.ASE.ensembl)
merged.result[i, 12] <- (nrow(newdata3)-nrow(overlap.newdata3)) /(nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
}
head(merged.result)
## bayrank bayppv bay_TPR bay_FPR orirank orippv
## [1,] 1 0.0000000 0.000000000 0.0001019680 1 0.0000000
## [2,] 2 0.0000000 0.000000000 0.0002039360 2 0.0000000
## [3,] 3 0.0000000 0.000000000 0.0003059039 3 0.0000000
## [4,] 4 0.2500000 0.005128205 0.0003059039 4 0.2500000
## [5,] 5 0.2000000 0.005128205 0.0004078719 5 0.2000000
## [6,] 6 0.1666667 0.005128205 0.0005098399 6 0.1666667
## ori_TPR ori_FPR MTrank MTppv MT_TPR MT_FPR
## [1,] 0.000000000 0.0001019680 1 0.0000000 0.000000000 0.0001019680
## [2,] 0.000000000 0.0002039360 2 0.5000000 0.005128205 0.0001019680
## [3,] 0.000000000 0.0003059039 3 0.6666667 0.010256410 0.0001019680
## [4,] 0.005128205 0.0003059039 4 0.5000000 0.010256410 0.0002039360
## [5,] 0.005128205 0.0004078719 5 0.6000000 0.015384615 0.0002039360
## [6,] 0.005128205 0.0005098399 6 0.5000000 0.015384615 0.0003059039
tail(merged.result)
## bayrank bayppv bay_TPR bay_FPR orirank orippv
## [9997,] 9997 0.01939792 0.9384615 0.9433058 9997 0.01939998
## [9998,] 9998 0.01939587 0.9384615 0.9434078 9998 0.01939792
## [9999,] 9999 0.01939381 0.9384615 0.9435097 9999 0.01939587
## [10000,] 10000 0.01939176 0.9384615 0.9436117 10000 0.01939381
## [10001,] 10001 0.01938970 0.9384615 0.9437137 10001 0.01939176
## [10002,] 10002 0.01938970 0.9384615 0.9437137 10002 0.01938970
## ori_TPR ori_FPR MTrank MTppv MT_TPR MT_FPR
## [9997,] 0.9384615 0.9432038 9997 0.01840552 0.9435897 1.000612
## [9998,] 0.9384615 0.9433058 9998 0.01840368 0.9435897 1.000714
## [9999,] 0.9384615 0.9434078 9999 0.01840184 0.9435897 1.000816
## [10000,] 0.9384615 0.9435097 10000 0.01840000 0.9435897 1.000918
## [10001,] 0.9384615 0.9436117 10001 0.01839816 0.9435897 1.001020
## [10002,] 0.9384615 0.9437137 10002 0.01839632 0.9435897 1.001122
# ploting "True positive rate"
plot(merged.result[, 1], merged.result[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 1], merged.result[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 4], merged.result[, 3], type="l", col="red", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 8], merged.result[, 7], type="l", col="green", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 12], merged.result[, 11], type="l", col="blue", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)
title(main = "ROC curve")
