rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 370865 19.9 750400 40.1 592000 31.7
## Vcells 623900 4.8 1308461 10.0 1007467 7.7
# set the working directory
setwd("/Volumes/Transcend/Thesis_project/subsetted_liver")
# subset dataset
sebsetn <- 30
mouse.liver.expression.eqtl <-read.table(file="2015-12-07 mouse.liver.expression.eqtl.txt", header=T)
head(mouse.liver.expression.eqtl)
## ProbeSet BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18
## 1 1415670_at 10.090 10.200 10.300 10.208 9.830 10.238 9.914 10.348
## 2 1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3 1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4 1415673_at 7.535 7.382 7.566 7.162 7.403 7.342 7.213 7.581
## 5 1415674_a_at 9.757 9.972 9.269 9.873 9.354 9.918 9.459 9.655
## 6 1415675_at 9.029 9.009 9.245 9.282 9.415 9.098 9.060 8.937
## BXD19 BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31
## 1 9.939 9.871 10.077 10.159 9.746 9.890 10.286 10.177 9.959 9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4 7.551 7.368 7.251 7.373 7.408 7.473 7.238 7.424 7.475 7.491
## 5 9.544 9.557 9.460 9.322 9.795 9.758 9.628 9.263 9.671 9.645
## 6 8.995 9.104 9.123 9.086 9.087 9.040 8.844 9.173 8.951 9.131
## BXD32 BXD33 BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6
## 1 10.059 10.102 10.174 10.022 10.364 9.745 10.074 9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4 7.419 7.386 7.496 7.369 7.265 7.130 7.149 7.383 7.439 7.233
## 5 9.368 9.741 9.721 9.684 9.387 9.649 9.693 9.793 9.840 9.519
## 6 9.108 9.122 9.153 9.008 9.138 9.126 9.233 9.060 9.134 9.081
## BXD8 BXD9
## 1 9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4 7.397 7.390
## 5 9.340 9.160
## 6 8.926 9.274
dim(mouse.liver.expression.eqtl)
## [1] 20855 31
set.seed(50)
sub.mouse.liver.expression.eqtl <- mouse.liver.expression.eqtl[, c(1, sample(2:dim(mouse.liver.expression.eqtl)[2],sebsetn, replace=FALSE))]
head(sub.mouse.liver.expression.eqtl)
## ProbeSet BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27
## 1 1415670_at 10.022 9.746 10.238 10.174 9.890 10.200 9.959 10.286
## 2 1415671_at 10.923 11.104 11.120 11.059 10.979 11.088 11.071 10.905
## 3 1415672_at 11.545 11.499 11.318 11.490 11.531 11.417 11.502 11.666
## 4 1415673_at 7.369 7.408 7.342 7.496 7.473 7.382 7.475 7.238
## 5 1415674_a_at 9.684 9.795 9.918 9.721 9.758 9.972 9.671 9.628
## 6 1415675_at 9.008 9.087 9.098 9.153 9.040 9.009 8.951 8.844
## BXD1 BXD12 BXD18 BXD6 BXD21 BXD40 BXD14 BXD20 BXD31 BXD28
## 1 10.090 10.300 10.348 10.069 10.159 10.074 9.830 10.077 9.882 10.177
## 2 10.932 11.007 11.123 11.043 10.969 11.067 10.955 10.988 10.996 11.013
## 3 11.432 11.442 11.561 11.504 11.328 11.543 11.561 11.367 11.446 11.500
## 4 7.535 7.566 7.581 7.233 7.373 7.149 7.403 7.251 7.491 7.424
## 5 9.757 9.269 9.655 9.519 9.322 9.693 9.354 9.460 9.645 9.263
## 6 9.029 9.245 8.937 9.081 9.086 9.233 9.415 9.123 9.131 9.173
## BXD9 BXD39 BXD16 BXD38 BXD32 BXD33 BXD19 BXD13 BXD5 BXD42
## 1 10.142 9.745 9.914 10.364 10.059 10.102 9.939 10.208 10.160 9.961
## 2 10.982 11.030 11.012 11.053 10.854 11.084 10.922 11.020 10.878 10.891
## 3 11.700 11.516 11.461 11.496 11.548 11.511 11.575 11.555 11.374 11.457
## 4 7.390 7.130 7.213 7.265 7.419 7.386 7.551 7.162 7.439 7.383
## 5 9.160 9.649 9.459 9.387 9.368 9.741 9.544 9.873 9.840 9.793
## 6 9.274 9.126 9.060 9.138 9.108 9.122 8.995 9.282 9.134 9.060
## BXD8 BXD2
## 1 9.956 9.871
## 2 11.206 10.802
## 3 11.531 11.426
## 4 7.397 7.368
## 5 9.340 9.557
## 6 8.926 9.104
dim(sub.mouse.liver.expression.eqtl)
## [1] 20855 31
write.table(sub.mouse.liver.expression.eqtl,file="2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="\t", row.names=FALSE, quote=FALSE)
#subset liver snp expression data
BXD.geno.SNP.eqtl.for.liver <-read.table(file="2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt", header=T)
head(BXD.geno.SNP.eqtl.for.liver)
## Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs6269442 0 0 1 0 0 1 1 0 1 0
## 2 rs6365999 0 0 1 0 0 1 1 0 1 0
## 3 rs6376963 0 0 1 0 0 1 1 0 1 0
## 4 rs3677817 0 0 1 0 0 1 1 0 1 0
## 5 rs8236463 0 1 1 0 0 1 1 0 1 0
## 6 rs6333200 0 1 1 0 0 1 1 0 1 0
## BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1 1 1 0 0 0 1 1 0 0 0 0 0
## 2 1 1 0 0 0 1 1 0 0 0 0 0
## 3 1 1 0 0 0 1 1 0 0 0 0 0
## 4 1 1 0 0 0 1 1 0 0 0 0 0
## 5 1 1 0 0 0 1 1 0 0 0 0 0
## 6 1 1 0 0 0 1 1 0 0 0 0 0
## BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1 0 1 0 0 1 1 1 0
## 2 0 1 0 0 1 1 1 0
## 3 0 1 0 0 1 1 1 0
## 4 0 1 0 0 1 1 1 0
## 5 0 1 0 0 1 1 1 0
## 6 0 1 0 0 1 1 1 0
dim(BXD.geno.SNP.eqtl.for.liver)
## [1] 3811 31
set.seed(50)
sub.BXD.geno.SNP.eqtl.for.liver <- BXD.geno.SNP.eqtl.for.liver[, c(1, sample(2:dim(BXD.geno.SNP.eqtl.for.liver)[2],sebsetn, replace=FALSE))]
head(sub.BXD.geno.SNP.eqtl.for.liver)
## Locus BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27 BXD1 BXD12
## 1 rs6269442 0 0 1 0 0 0 1 0 0 1
## 2 rs6365999 0 0 1 0 0 0 1 0 0 1
## 3 rs6376963 0 0 1 0 0 0 1 0 0 1
## 4 rs3677817 0 0 1 0 0 0 1 0 0 1
## 5 rs8236463 0 0 1 0 0 1 1 0 0 1
## 6 rs6333200 0 0 1 0 0 1 1 0 0 1
## BXD18 BXD6 BXD21 BXD40 BXD14 BXD20 BXD31 BXD28 BXD9 BXD39 BXD16 BXD38
## 1 0 1 1 0 0 1 0 1 0 1 1 0
## 2 0 1 1 0 0 1 0 1 0 1 1 0
## 3 0 1 1 0 0 1 0 1 0 1 1 0
## 4 0 1 1 0 0 1 0 1 0 1 1 0
## 5 0 1 1 0 0 1 0 1 0 1 1 0
## 6 0 1 1 0 0 1 0 1 0 1 1 0
## BXD32 BXD33 BXD19 BXD13 BXD5 BXD42 BXD8 BXD2
## 1 0 0 1 0 1 0 1 0
## 2 0 0 1 0 1 0 1 0
## 3 0 0 1 0 1 0 1 0
## 4 0 0 1 0 1 0 1 0
## 5 0 0 1 0 1 0 1 0
## 6 0 0 1 0 1 0 1 0
dim(sub.BXD.geno.SNP.eqtl.for.liver)
## [1] 3811 31
write.table(sub.BXD.geno.SNP.eqtl.for.liver,file="2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="\t", row.names=FALSE, quote=FALSE)
library(MatrixEQTL)
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/subsetted_liver";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="");
snps_location_file_name = paste(base.dir, "/2015-12-07 BXD.geno.loc.eqtl.for.liver.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2015-12-07 liver.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = character() ;
# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;
## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t"; # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);
## Rows read: 2,000
## Rows read: 3811 done.
## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);
## Rows read: 2,000
## Rows read: 4,000
## Rows read: 6,000
## Rows read: 8,000
## Rows read: 10,000
## Rows read: 12,000
## Rows read: 14,000
## Rows read: 16,000
## Rows read: 18,000
## Rows read: 20,000
## Rows read: 20855 done.
## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t"; # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1; # one row of column labels
cvrt$fileSkipColumns = 1; # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)
## probe_id Chromosome start_location end_location
## 1 1415670_at 6 87887971 87890759
## 2 1415671_at 8 105524469 105566040
## 3 1415672_at 8 23241325 23257080
## 4 1415673_at 5 129765557 129787253
## 5 1415674_a_at 9 44403758 44407548
## 6 1415675_at 2 32570857 32573571
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = numeric(),
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = TRUE,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
## Matching data files and location files
## 20855 of 20855 genes matched
## 3811 of 3811 SNPs matched
## Task finished in 0.024 seconds
## Reordering genes
##
## Task finished in 0.149 seconds
## Processing covariates
## Task finished in 0.001 seconds
## Processing gene expression data (imputation, residualization, etc.)
## Task finished in 0.028 seconds
## Creating output file(s)
## Task finished in 0.016 seconds
## Performing eQTL analysis
## 4.54% done, 7,118 cis-eQTLs, 67 trans-eQTLs
## 9.09% done, 13,788 cis-eQTLs, 79 trans-eQTLs
## 13.63% done, 21,752 cis-eQTLs, 137 trans-eQTLs
## 18.18% done, 27,806 cis-eQTLs, 154 trans-eQTLs
## 22.72% done, 34,278 cis-eQTLs, 272 trans-eQTLs
## 27.27% done, 35,915 cis-eQTLs, 272 trans-eQTLs
## 31.81% done, 272 trans-eQTLs
## 36.36% done, 272 trans-eQTLs
## 40.90% done, 272 trans-eQTLs
## 45.45% done, 272 trans-eQTLs
## 50.00% done, 272 trans-eQTLs
## 54.54% done, 272 trans-eQTLs
## 59.09% done, 272 trans-eQTLs
## 63.63% done, 272 trans-eQTLs
## 68.18% done, 272 trans-eQTLs
## 72.72% done, 282 trans-eQTLs
## 77.27% done, 40,035 cis-eQTLs, 283 trans-eQTLs
## 81.81% done, 47,465 cis-eQTLs, 318 trans-eQTLs
## 86.36% done, 53,735 cis-eQTLs, 348 trans-eQTLs
## 90.90% done, 60,292 cis-eQTLs, 355 trans-eQTLs
## 95.45% done, 67,748 cis-eQTLs, 395 trans-eQTLs
## 100.00% done, 69,174 cis-eQTLs, 395 trans-eQTLs
## Task finished in 10.219 seconds
##
unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')
## Analysis done in: 9.8 seconds
cat('Detected local eQTLs:','\n')
## Detected local eQTLs:
cis.eqtls<-me$cis$eqtls
head(cis.eqtls)
## snps gene statistic pvalue FDR beta
## 1 rs4163042 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 2 116Mit88 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 3 rs4163058 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 4 rs4163391 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 5 rs4151923 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 6 rs3090019 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
dim(cis.eqtls)
## [1] 69174 6
cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)
# load mouse lung cis eqtl result
lung.mouse.eQTL<-read.table(file="2015-12-04 mouselung.cis.1M.eqtls.txt", header=T)
# load mouse liver cis eqtl result
liver.mouse.eQTL<-read.table(file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", header=T)
mouse4302ensembl_id<-read.table(file="2015-12-04 mouse4302ensembl_id.txt", header=T)
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
# Add ensemble id annoatation to the data
lung.mouse.eQTL<-merge(lung.mouse.eQTL, mouse4302ensembl_id, by.x = "gene", by.y="probe_id")
liver.mouse.eQTL<-merge(liver.mouse.eQTL, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
head(lung.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs13475374 1.0503967 0.2994035 0.5712395 0.06286667
## 2 1415670_at rs13478876 0.9133601 0.3661462 0.6356597 0.05583410
## 3 1415670_at rs3713705 1.1363521 0.2621028 0.5310916 0.06786667
## 4 1415670_at rs13478880 1.1363521 0.2621028 0.5310916 0.06786667
## 5 1415670_at gnf06.086.089 0.9133601 0.3661462 0.6356597 0.05583410
## 6 1415672_at rs13479651 -1.3052534 0.1987480 0.4510718 -0.03993582
## beta_se ensembl_id
## 1 0.05985040 ENSMUSG00000030058
## 2 0.06113044 ENSMUSG00000030058
## 3 0.05972327 ENSMUSG00000030058
## 4 0.05972327 ENSMUSG00000030058
## 5 0.06113044 ENSMUSG00000030058
## 6 0.03059622 ENSMUSG00000015341
head(liver.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs13478876 0.3577236 0.7232327 0.9060716 0.022464286
## 2 1415670_at rs13475374 0.1359233 0.8928544 0.9654697 0.008533333
## 3 1415670_at gnf06.086.089 0.3577236 0.7232327 0.9060716 0.022464286
## 4 1415670_at rs13478880 0.3577236 0.7232327 0.9060716 0.022464286
## 5 1415670_at rs3713705 0.3577236 0.7232327 0.9060716 0.022464286
## 6 1415671_at rs3675894 1.0520305 0.3017805 0.6480655 0.038909091
## beta_se ensembl_id
## 1 0.06279788 ENSMUSG00000030058
## 2 0.06278048 ENSMUSG00000030058
## 3 0.06279788 ENSMUSG00000030058
## 4 0.06279788 ENSMUSG00000030058
## 5 0.06279788 ENSMUSG00000030058
## 6 0.03698476 ENSMUSG00000013160
library(data.table)
# Select Gene-SNP pair with minimum P value
lung.mouse.eQTL.min <- data.table(lung.mouse.eQTL, key=c('ensembl_id', "pvalue"))
lung.mouse.eQTL.min<-lung.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
lung.mouse.eQTL.min<-as.data.frame(lung.mouse.eQTL.min)
liver.mouse.eQTL.min <- data.table(liver.mouse.eQTL, key=c('ensembl_id', "pvalue"))
liver.mouse.eQTL.min<-liver.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
liver.mouse.eQTL.min<-as.data.frame(liver.mouse.eQTL.min)
library(plyr)
lung.mouse.eQTL.min<-rename(lung.mouse.eQTL.min, c("pvalue"="lung_pvalue", "beta"="lung.beta", "beta_se"="lung.beta_se"))
liver.mouse.eQTL.min<-rename(liver.mouse.eQTL.min, c("pvalue"="liver_pvalue", "beta"="liver.beta", "beta_se"="liver.beta_se"))
head(lung.mouse.eQTL.min)
## gene snps statistic lung_pvalue FDR
## 1 1428645_at rs3702359 -1.4324741 0.1592383 0.3935788
## 2 1421514_a_at gnfX.141.820 -0.1728806 0.8635565 0.9457289
## 3 1416677_at rs3720981 -0.1308388 0.8965131 0.9603432
## 4 1451677_at CEL-11_120628029 0.4574870 0.6496243 0.8393760
## 5 1425955_at rs13478643 -1.0185410 0.3141176 0.5866782
## 6 1426241_a_at rs4224744 0.9192838 0.3630755 0.6327046
## lung.beta lung.beta_se ensembl_id
## 1 -0.059800000 0.04174596 ENSMUSG00000000001
## 2 -0.005911765 0.03419566 ENSMUSG00000000037
## 3 -0.023660714 0.18083869 ENSMUSG00000000049
## 4 0.016446429 0.03594950 ENSMUSG00000000056
## 5 -0.078136752 0.07671439 ENSMUSG00000000058
## 6 0.052847059 0.05748721 ENSMUSG00000000085
head(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR
## 1 1428645_at rs13477321 -0.6238563 5.377718e-01 0.8222409897
## 2 1449320_at CEL-X_71438949 1.3340954 1.929225e-01 0.5174275252
## 3 1421514_a_at CEL-X_154048891 -0.2835863 7.788138e-01 0.9261630010
## 4 1416677_at rs3670642 5.4086629 9.084753e-06 0.0001847233
## 5 1451678_at CEL-11_121219118 1.2354816 2.269175e-01 0.5622059072
## 6 1425955_at NAT_6_18.199327 -1.9727688 5.846922e-02 0.2581607284
## liver.beta liver.beta_se ensembl_id
## 1 -0.031972222 0.05124934 ENSMUSG00000000001
## 2 0.033761364 0.02530656 ENSMUSG00000000003
## 3 -0.007730159 0.02725858 ENSMUSG00000000037
## 4 0.084466667 0.01561692 ENSMUSG00000000049
## 5 0.127208145 0.10296240 ENSMUSG00000000056
## 6 -0.051222222 0.02596463 ENSMUSG00000000058
tail(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR
## 10962 1434694_at rs13459062 -2.3077250 0.02861902 0.1563120
## 10963 1437645_at rs4231033 2.0047987 0.05474021 0.2465394
## 10964 1449939_s_at rs13481636 0.6744392 0.50556296 0.8021978
## 10965 1422547_at rs4165065 -1.6676237 0.10653819 0.3695183
## 10966 1451476_at rs4165065 0.3972613 0.69418700 0.8956391
## 10967 1448115_at rs4165069 1.5491597 0.13257346 0.4209997
## liver.beta liver.beta_se ensembl_id
## 10962 -0.12408929 0.05377126 ENSMUSG00000099041
## 10963 0.06022624 0.03004104 ENSMUSG00000099083
## 10964 0.01886111 0.02796562 ENSMUSG00000099116
## 10965 -0.07874163 0.04721786 ENSMUSG00000099164
## 10966 0.01569378 0.03950493 ENSMUSG00000099262
## 10967 0.08752778 0.05650017 ENSMUSG00000099305
dim(lung.mouse.eQTL.min)
## [1] 11015 8
dim(liver.mouse.eQTL.min)
## [1] 10967 8
# lung, liver eqtl with ensemble_id
merged.mouse.eQTL.min<-merge(lung.mouse.eQTL.min, liver.mouse.eQTL.min, by.x = "ensembl_id", by.y="ensembl_id")
head(merged.mouse.eQTL.min)
## ensembl_id gene.x snps.x statistic.x lung_pvalue
## 1 ENSMUSG00000000001 1428645_at rs3702359 -1.4324741 0.1592383
## 2 ENSMUSG00000000037 1421514_a_at gnfX.141.820 -0.1728806 0.8635565
## 3 ENSMUSG00000000049 1416677_at rs3720981 -0.1308388 0.8965131
## 4 ENSMUSG00000000056 1451677_at CEL-11_120628029 0.4574870 0.6496243
## 5 ENSMUSG00000000058 1425955_at rs13478643 -1.0185410 0.3141176
## 6 ENSMUSG00000000085 1426241_a_at rs4224744 0.9192838 0.3630755
## FDR.x lung.beta lung.beta_se gene.y snps.y
## 1 0.3935788 -0.059800000 0.04174596 1428645_at rs13477321
## 2 0.9457289 -0.005911765 0.03419566 1421514_a_at CEL-X_154048891
## 3 0.9603432 -0.023660714 0.18083869 1416677_at rs3670642
## 4 0.8393760 0.016446429 0.03594950 1451678_at CEL-11_121219118
## 5 0.5866782 -0.078136752 0.07671439 1425955_at NAT_6_18.199327
## 6 0.6327046 0.052847059 0.05748721 1426241_a_at CEL-4_120039566
## statistic.y liver_pvalue FDR.y liver.beta liver.beta_se
## 1 -0.6238563 5.377718e-01 0.8222409897 -0.031972222 0.05124934
## 2 -0.2835863 7.788138e-01 0.9261630010 -0.007730159 0.02725858
## 3 5.4086629 9.084753e-06 0.0001847233 0.084466667 0.01561692
## 4 1.2354816 2.269175e-01 0.5622059072 0.127208145 0.10296240
## 5 -1.9727688 5.846922e-02 0.2581607284 -0.051222222 0.02596463
## 6 -2.2103759 3.542074e-02 0.1820043924 -0.072100000 0.03261889
dim(merged.mouse.eQTL.min)
## [1] 10422 15
merged.mouse.eQTL.min<-data.frame(merged.mouse.eQTL.min)
merged.mouse.eQTL.min<-merged.mouse.eQTL.min[, c(1, 5, 7, 8, 12, 14, 15 )]
head(merged.mouse.eQTL.min)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.1592383 -0.059800000 0.04174596 5.377718e-01
## 2 ENSMUSG00000000037 0.8635565 -0.005911765 0.03419566 7.788138e-01
## 3 ENSMUSG00000000049 0.8965131 -0.023660714 0.18083869 9.084753e-06
## 4 ENSMUSG00000000056 0.6496243 0.016446429 0.03594950 2.269175e-01
## 5 ENSMUSG00000000058 0.3141176 -0.078136752 0.07671439 5.846922e-02
## 6 ENSMUSG00000000085 0.3630755 0.052847059 0.05748721 3.542074e-02
## liver.beta liver.beta_se
## 1 -0.031972222 0.05124934
## 2 -0.007730159 0.02725858
## 3 0.084466667 0.01561692
## 4 0.127208145 0.10296240
## 5 -0.051222222 0.02596463
## 6 -0.072100000 0.03261889
write.table(merged.mouse.eQTL.min,file="2016-05-09 mouse.liver.expression.min.txt", sep="\t", row.names=FALSE, quote=FALSE)
merged.mouse.eQTL.min.variance2<-read.table(file="2016-05-09 mouse.liver.expression.min.txt", header=T)
head(merged.mouse.eQTL.min.variance2)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.1592383 -0.059800000 0.04174596 5.377718e-01
## 2 ENSMUSG00000000037 0.8635565 -0.005911765 0.03419566 7.788138e-01
## 3 ENSMUSG00000000049 0.8965131 -0.023660714 0.18083869 9.084753e-06
## 4 ENSMUSG00000000056 0.6496243 0.016446429 0.03594950 2.269175e-01
## 5 ENSMUSG00000000058 0.3141176 -0.078136752 0.07671439 5.846922e-02
## 6 ENSMUSG00000000085 0.3630755 0.052847059 0.05748721 3.542074e-02
## liver.beta liver.beta_se
## 1 -0.031972222 0.05124934
## 2 -0.007730159 0.02725858
## 3 0.084466667 0.01561692
## 4 0.127208145 0.10296240
## 5 -0.051222222 0.02596463
## 6 -0.072100000 0.03261889
# caculate the absolute value of live/lung beta
merged.mouse.eQTL.min.variance2$abs_liver.beta<-abs(merged.mouse.eQTL.min.variance2$liver.beta)
merged.mouse.eQTL.min.variance2$abs_lung.beta<-abs(merged.mouse.eQTL.min.variance2$lung.beta)
# caculate negative log lung p value
merged.mouse.eQTL.min.variance2$neg_log_lung_pvalue<--log10(merged.mouse.eQTL.min.variance2$lung_pvalue)
# Simple linear regression between abs_liver.beta and abs_lung.beta
# fit1<-summary(lm(abs_liver.beta ~ abs_lung.beta, data=merged.mouse.eQTL.min.variance2))
# fit1
# tau<-fit1$sigma**2
# check association between abs_liver.beta and abs.lung.beta
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
ggplot(merged.mouse.eQTL.min.variance2, aes(x=abs_lung.beta, y=abs_liver.beta)) +geom_point()+geom_smooth(method=lm)

cor(merged.mouse.eQTL.min.variance2$abs_lung.beta, merged.mouse.eQTL.min.variance2$abs_liver.beta)
## [1] 0.4248095
merged.mouse.eQTL<-merged.mouse.eQTL.min.variance2
# retrieve ensembl_id
markers<-merged.mouse.eQTL[, 1]
# Yg=Ag + Bg*Xsnp+V
# retrieve betas.hat (liver.beta)
betas.hat<-merged.mouse.eQTL$abs_liver.beta
# retrieve liver.beta_se
se<-merged.mouse.eQTL$liver.beta_se
# creat Z matrix with 2 columns: 1 for intercept,abs_lung.beta (merged.mouse.eQTL[,10])
Z<-as.matrix(merged.mouse.eQTL$abs_lung.beta)
Z<-replace(Z,is.na(Z),0)
Z<-data.frame(1,Z)
Z<-as.matrix(Z)
rowLength<-length(markers)
# liver.betas=Z*gama+T^2
# Regression: abs_liver.beta = intercept + beta*abs_lung.beta + error
lmsummary<-summary(lm(abs_liver.beta~-1+Z, data=merged.mouse.eQTL))
lmsummary
##
## Call:
## lm(formula = abs_liver.beta ~ -1 + Z, data = merged.mouse.eQTL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2837 -0.0631 -0.0365 0.0130 4.0819
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## ZX1 0.055966 0.002256 24.81 <2e-16 ***
## ZZ 0.397226 0.008293 47.90 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1985 on 10420 degrees of freedom
## Multiple R-squared: 0.3471, Adjusted R-squared: 0.347
## F-statistic: 2770 on 2 and 10420 DF, p-value: < 2.2e-16
# error ~ N(0, Tau)
tau<-lmsummary$sigma**2
tau
## [1] 0.03940381
# output coeffieients (gamma matrix)
# gamma matrix
gamma<-as.matrix(lmsummary$coefficients[,1])
# trasnpose Z matrix
Z_transpose<-t(Z)
# create identity matrix
identity<-diag(nrow=rowLength)
# original betas.hat
betas.hat<-as.matrix(betas.hat)
#creat V matrix for liver_residual_variance
V <- matrix(0, rowLength, rowLength)
# V, liver residual variance
diag(V) <- merged.mouse.eQTL$liver.beta_se^2
# Creat Tau matrix
Tau<- diag(tau, rowLength, rowLength)
# follow Chen's paper and cacualte s
s <-V + Tau
# create inverse function for inversing diagnoal matrix
diag.inverse <- function(x){diag(1/diag(x), nrow(x), ncol(x))}
# create multiplication function for multiplicating two diagnoal matrix
diag.multi <- function(x,y){diag(diag(x)*diag(y), nrow(x), ncol(x))}
# inverse s
S <-diag.inverse(s)
# follow chen's paper to caculate omega
omega<-diag.multi(S, V)
# retrieve omega value from the matrix
omega.diag<-diag(omega )
# summary the omega value
summary(omega.diag)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003302 0.019490 0.037230 0.071130 0.078250 0.932400
# betas.thea<- S %*% Z %*% gamma + (identity-S) %*% betas.hat
# caculate betas.tieda with the formula in Chen's paper
betas.tieda<- omega %*% Z %*% gamma + (identity-omega) %*% betas.hat
# crbetas.tieda<- cromega %*% Z %*% gamma + (identity-cromega) %*% betas.hat
head(betas.tieda)
## [,1]
## [1,] 0.034956015
## [2,] 0.008666362
## [3,] 0.084349163
## [4,] 0.113489573
## [5,] 0.051824117
## [6,] 0.072227735
head(betas.hat)
## [,1]
## [1,] 0.031972222
## [2,] 0.007730159
## [3,] 0.084466667
## [4,] 0.127208145
## [5,] 0.051222222
## [6,] 0.072100000
#regression beta
regbeta <-Z %*% gamma
head(regbeta)
## [,1]
## [1,] 0.07972016
## [2,] 0.05831437
## [3,] 0.06536471
## [4,] 0.06249901
## [5,] 0.08700399
## [6,] 0.07695827
summary(regbeta)
## V1
## Min. :0.05597
## 1st Qu.:0.07054
## Median :0.08424
## Mean :0.11076
## 3rd Qu.:0.11373
## Max. :2.38071
markers1<-as.character(markers)
# combine ensemble_id, betas.hat and betas.tieda
outputVector<-c(markers1,betas.hat,betas.tieda)
write.table(matrix(outputVector,rowLength),file="2016-04-26_hm_tau_hmresults.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_hm_tau_hmresults.txt")
colnames(liver.mouse.eQTL.bayesian)<-c( "ensembl_id", "betas.hat","betas.tieda")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda
## 1 ENSMUSG00000000001 0.031972222 0.034956015
## 2 ENSMUSG00000000037 0.007730159 0.008666362
## 3 ENSMUSG00000000049 0.084466667 0.084349163
## 4 ENSMUSG00000000056 0.127208145 0.113489573
## 5 ENSMUSG00000000058 0.051222222 0.051824117
## 6 ENSMUSG00000000085 0.072100000 0.072227735
# merge dataset with betas.hat and betas.tieda
liver.mouse.eQTL.bayesian.all<- merge(liver.mouse.eQTL.bayesian, merged.mouse.eQTL.min.variance2, by = "ensembl_id")
head(liver.mouse.eQTL.bayesian.all)
## ensembl_id betas.hat betas.tieda lung_pvalue lung.beta
## 1 ENSMUSG00000000001 0.031972222 0.034956015 0.1592383 -0.059800000
## 2 ENSMUSG00000000037 0.007730159 0.008666362 0.8635565 -0.005911765
## 3 ENSMUSG00000000049 0.084466667 0.084349163 0.8965131 -0.023660714
## 4 ENSMUSG00000000056 0.127208145 0.113489573 0.6496243 0.016446429
## 5 ENSMUSG00000000058 0.051222222 0.051824117 0.3141176 -0.078136752
## 6 ENSMUSG00000000085 0.072100000 0.072227735 0.3630755 0.052847059
## lung.beta_se liver_pvalue liver.beta liver.beta_se abs_liver.beta
## 1 0.04174596 5.377718e-01 -0.031972222 0.05124934 0.031972222
## 2 0.03419566 7.788138e-01 -0.007730159 0.02725858 0.007730159
## 3 0.18083869 9.084753e-06 0.084466667 0.01561692 0.084466667
## 4 0.03594950 2.269175e-01 0.127208145 0.10296240 0.127208145
## 5 0.07671439 5.846922e-02 -0.051222222 0.02596463 0.051222222
## 6 0.05748721 3.542074e-02 -0.072100000 0.03261889 0.072100000
## abs_lung.beta neg_log_lung_pvalue
## 1 0.059800000 0.79795236
## 2 0.005911765 0.06370924
## 3 0.023660714 0.04744338
## 4 0.016446429 0.18733775
## 5 0.078136752 0.50290766
## 6 0.052847059 0.44000309
write.table(liver.mouse.eQTL.bayesian.all,file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
liver.mouse.eQTL.bayesian<-read.table(file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda lung_pvalue lung.beta
## 1 ENSMUSG00000000001 0.031972222 0.034956015 0.1592383 -0.059800000
## 2 ENSMUSG00000000037 0.007730159 0.008666362 0.8635565 -0.005911765
## 3 ENSMUSG00000000049 0.084466667 0.084349163 0.8965131 -0.023660714
## 4 ENSMUSG00000000056 0.127208145 0.113489573 0.6496243 0.016446429
## 5 ENSMUSG00000000058 0.051222222 0.051824117 0.3141176 -0.078136752
## 6 ENSMUSG00000000085 0.072100000 0.072227735 0.3630755 0.052847059
## lung.beta_se liver_pvalue liver.beta liver.beta_se abs_liver.beta
## 1 0.04174596 5.377718e-01 -0.031972222 0.05124934 0.031972222
## 2 0.03419566 7.788138e-01 -0.007730159 0.02725858 0.007730159
## 3 0.18083869 9.084753e-06 0.084466667 0.01561692 0.084466667
## 4 0.03594950 2.269175e-01 0.127208145 0.10296240 0.127208145
## 5 0.07671439 5.846922e-02 -0.051222222 0.02596463 0.051222222
## 6 0.05748721 3.542074e-02 -0.072100000 0.03261889 0.072100000
## abs_lung.beta neg_log_lung_pvalue
## 1 0.059800000 0.79795236
## 2 0.005911765 0.06370924
## 3 0.023660714 0.04744338
## 4 0.016446429 0.18733775
## 5 0.078136752 0.50290766
## 6 0.052847059 0.44000309
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat",
"liver.beta_se", "betas.tieda",
"liver_pvalue", "abs_lung.beta",
"abs_lung.beta", "neg_log_lung_pvalue"))
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat liver.beta_se betas.tieda liver_pvalue
## 1 ENSMUSG00000000001 0.031972222 0.05124934 0.034956015 5.377718e-01
## 2 ENSMUSG00000000037 0.007730159 0.02725858 0.008666362 7.788138e-01
## 3 ENSMUSG00000000049 0.084466667 0.01561692 0.084349163 9.084753e-06
## 4 ENSMUSG00000000056 0.127208145 0.10296240 0.113489573 2.269175e-01
## 5 ENSMUSG00000000058 0.051222222 0.02596463 0.051824117 5.846922e-02
## 6 ENSMUSG00000000085 0.072100000 0.03261889 0.072227735 3.542074e-02
## abs_lung.beta abs_lung.beta.1 neg_log_lung_pvalue
## 1 0.059800000 0.059800000 0.79795236
## 2 0.005911765 0.005911765 0.06370924
## 3 0.023660714 0.023660714 0.04744338
## 4 0.016446429 0.016446429 0.18733775
## 5 0.078136752 0.078136752 0.50290766
## 6 0.052847059 0.052847059 0.44000309
# Caculate variance for beta.tieda by following Brian Kulis' lecture notes
# Invert Tau and V
Tau_invert<-diag.inverse(Tau)
V_invert<-diag.inverse(V)
PS_invert<-Tau_invert + V_invert
# PS_invert<-Tau_invert+V_invert%*% Z %*% Z_transpose # previous wrong code
# S in Brian Kulis' lecture note:PS
PS <- diag.inverse(PS_invert)
# retrieve posterior variance
ps<-diag(PS)
range(ps)
## [1] 0.0001301085 0.0367406659
library(reshape)
##
## Attaching package: 'reshape'
## The following objects are masked from 'package:plyr':
##
## rename, round_any
## The following object is masked from 'package:data.table':
##
## melt
# reshape posterior variance to long format
ps.long <- melt(ps)
head(ps.long)
## value
## 1 0.0024623639
## 2 0.0007292782
## 3 0.0002423880
## 4 0.0083537506
## 5 0.0006628220
## 6 0.0010360170
# Caculate sd: square root on variance
ps.long$betas.tieda.se<-(ps.long$value)^0.5
# combine sd to the data.frame
liver.mouse.eQTL.bayesian<-cbind(liver.mouse.eQTL.bayesian,ps.long$betas.tieda.se)
# head(liver.mouse.eQTL.bayesian)
# rename betas.tieda.se
liver.mouse.eQTL.bayesian<-rename(liver.mouse.eQTL.bayesian, c("ps.long$betas.tieda.se"="betas.tieda.se", "liver.beta_se"="betas.hat.se"))
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", "betas.hat.se",
"betas.tieda", "betas.tieda.se",
"liver_pvalue", "abs_lung.beta", "neg_log_lung_pvalue"))
# head(liver.mouse.eQTL.bayesian)
# library(tigerstats)
# pnormGC(0, region="below", mean=0.002352829,sd=0.09972950)
# caculate probability of betas.tieda below 0 based on betas.tieda and standard deviation
liver.mouse.eQTL.bayesian$p.below.0 <- pnorm(0,liver.mouse.eQTL.bayesian$betas.tieda, liver.mouse.eQTL.bayesian$betas.tieda.se)
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.031972222 0.05124934 0.034956015 0.04962221
## 2 ENSMUSG00000000037 0.007730159 0.02725858 0.008666362 0.02700515
## 3 ENSMUSG00000000049 0.084466667 0.01561692 0.084349163 0.01556882
## 4 ENSMUSG00000000056 0.127208145 0.10296240 0.113489573 0.09139885
## 5 ENSMUSG00000000058 0.051222222 0.02596463 0.051824117 0.02574533
## 6 ENSMUSG00000000085 0.072100000 0.03261889 0.072227735 0.03218722
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0
## 1 5.377718e-01 0.059800000 0.79795236 2.405785e-01
## 2 7.788138e-01 0.005911765 0.06370924 3.741374e-01
## 3 9.084753e-06 0.023660714 0.04744338 3.016370e-08
## 4 2.269175e-01 0.016446429 0.18733775 1.071744e-01
## 5 5.846922e-02 0.078136752 0.50290766 2.205983e-02
## 6 3.542074e-02 0.052847059 0.44000309 1.241658e-02
dim(liver.mouse.eQTL.bayesian)
## [1] 10422 9
summary(liver.mouse.eQTL.bayesian$betas.tieda.se)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01141 0.02771 0.03830 0.04602 0.05553 0.19170
range(liver.mouse.eQTL.bayesian$p.below.0)
## [1] 0.0000000 0.4868896
write.table(liver.mouse.eQTL.bayesian,file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
liver.mouse.eQTL.bayesian <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
# head(liver.mouse.eQTL.bayesian)
# summary(liver.mouse.eQTL.bayesian$liver_residual_variance)
liver.mouse.eQTL.bayesian.4tau <- liver.mouse.eQTL.bayesian
# colnames(liver.mouse.eQTL.bayesian.4tau) <- c("ensembl_id", "betas.hat", "betas.hat.se", "betas.tieda", "betas.tieda.se", "liver_residual_variance", "liver_pvalue", "abs_lung.beta",
# "neg_log_lung_pvalue", "p.below.0", "betas.tieda2m", "betas.tieda3rd", "betas.tieda4max")
# head(liver.mouse.eQTL.bayesian.4tau)
# Introduce weight (Tmm) to adjust Tau with neg_log_lung_pvalue
liver.mouse.eQTL.bayesian.4tau$fzm <- liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue
# caculate ratio_fzm
liver.mouse.eQTL.bayesian.4tau$ratio_fzm <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/liver.mouse.eQTL.bayesian.4tau$fzm
range(liver.mouse.eQTL.bayesian.4tau$ratio_fzm)
## [1] 1 Inf
# set up threshold for ratio_fzm and caculate updated ratio_fzm (nratio_fzm)
threshold <- 0.05
liver.mouse.eQTL.bayesian.4tau$nratio_fzm <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$ratio_fzm > max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))] <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))
# liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$nratio_fzm >= max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log(threshold))] <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
summary(liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 24.16 35.84 29.18 35.84 35.84
# compare bayesian prediction to the traditional method
# evaluate the predition with alle specific expreesion in the liver: Sandrine Lagarrigue's paper
liver.ASE <- read.csv(file= "ASE.genetics.113.153882-6.csv")
dim(liver.ASE)
## [1] 1191 19
head(liver.ASE)
## replicate chr startExon geneID SNPperExon3
## 1 M.CH. DxB and BxD 1 9535488 Rrs1 15
## 2 M.HF DxB and BxD 1 9535488 Rrs1 14
## 3 F.HF DxB and BxD 1 9535488 Rrs1 15
## 4 M.CH. DxB and BxD 1 37473929 6330578E17Rik 2
## 5 M.CH. DxB and BxD 1 58169979 Aox3 3
## 6 M.HF DxB and BxD 1 58169979 Aox3 3
## sumBperExon.DxB4 sumDperExon.DxB4 sumBperExon.BxD4 sumDperExon.BxD4
## 1 45 19 50 25
## 2 74 39 66 30
## 3 76 20 77 40
## 4 78 32 47 17
## 5 473 82 225 27
## 6 252 56 263 53
## FCadd1.DxB5 FCadd1.BxD5 BonBD.DxB6 BonBD.BxD6 pvalBH.DxB7 pvalBH.BxD7
## 1 2.30 1.96 0.70 0.67 1.0e-02 3.7e-02
## 2 1.88 2.16 0.65 0.69 1.1e-02 3.3e-03
## 3 3.67 1.90 0.79 0.66 4.9e-07 5.1e-03
## 4 2.39 2.67 0.71 0.73 1.9e-04 2.2e-03
## 5 5.71 8.07 0.85 0.89 3.6e-64 5.4e-37
## 6 4.44 4.89 0.82 0.83 8.7e-28 1.1e-31
## UTR5 UTR3 strand exonCount
## 1 0 0 + 1
## 2 0 0 + 1
## 3 0 0 + 1
## 4 0 0 - 3
## 5 0 0 + 35
## 6 0 0 + 35
# 440 unique gene ID
length(unique(liver.ASE$geneID))
## [1] 440
# verify ASE table
liver.ASE1 <- liver.ASE[which(liver.ASE$replicate == "M.CH. DxB and BxD"), ]
liver.ASE2 <- liver.ASE[which(liver.ASE$replicate == "M.HF DxB and BxD"), ]
liver.ASE3 <- liver.ASE[which(liver.ASE$replicate == "F.HF DxB and BxD"), ]
length(unique(liver.ASE1$geneID))
## [1] 272
length(unique(liver.ASE2$geneID))
## [1] 275
length(unique(liver.ASE3$geneID))
## [1] 304
(length(unique(liver.ASE1$geneID))+length(unique(liver.ASE2$geneID))+length(unique(liver.ASE3$geneID)))/3
## [1] 283.6667
# As claimed in the paper: averaged 284 ASE for each replicate
sub.liver.ASE <-liver.ASE1
summary(sub.liver.ASE$pvalBH.DxB7)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0000000 0.0000058 0.0084070 0.0031000 0.1000000
sub.liver.ASE1 <- subset(sub.liver.ASE, pvalBH.DxB7 < 0.00000000000001)
sub.liver.ASE2 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.00000000000001 & pvalBH.DxB7 < 0.0000058)
sub.liver.ASE3 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0000058 & pvalBH.DxB7 < 0.0031000)
sub.liver.ASE4 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0031000)
dim(sub.liver.ASE1)
## [1] 89 19
dim(sub.liver.ASE2)
## [1] 97 19
dim(sub.liver.ASE3)
## [1] 93 19
dim(sub.liver.ASE4)
## [1] 94 19
# Subset liver ASE with different conditions
# sub.liver.ASE <-liver.ASE[which(liver.ASE$pvalBH.DxB7 < 0.05 & liver.ASE$pvalBH.BxD7 < 0.05), ]
# sub.liver.ASE <- sub.liver.ASE[order(sub.liver.ASE$pvalBH.BxD7), ]
# summary(sub.liver.ASE$pvalBH.BxD7)
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.DxB7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.BxD7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[ sub.liver.ASE$geneID %in% names(table(sub.liver.ASE$geneID))[table(sub.liver.ASE$geneID) >1] , ]
# check the remain gene number after subsetting
dim(sub.liver.ASE)
## [1] 373 19
liver.ASE.symbol <- unique(sub.liver.ASE$geneID)
liver.ASE.symbol1 <- unique(sub.liver.ASE1$geneID)
liver.ASE.symbol2 <- unique(sub.liver.ASE2$geneID)
liver.ASE.symbol3 <- unique(sub.liver.ASE3$geneID)
liver.ASE.symbol4 <- unique(sub.liver.ASE4$geneID)
length(liver.ASE.symbol)
## [1] 272
# Annoate gene symbol wiht ensemble.ID
library(biomaRt)
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
liver.ASE.ensembl <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol, mart=mouse)
liver.ASE.ensembl1 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol1, mart=mouse)
liver.ASE.ensembl2 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol2, mart=mouse)
liver.ASE.ensembl3 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol3, mart=mouse)
liver.ASE.ensembl4 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol4, mart=mouse)
dim(liver.ASE.ensembl)
## [1] 241 2
liver.ASE.ensembl <- unique(liver.ASE.ensembl)
# delete liver ASE ensemble ID which are not in the liver.mouse.eQTL.bayesian data frame
liver.ASE.ensembl <- liver.ASE.ensembl[liver.ASE.ensembl$ensembl_gene_id %in% liver.mouse.eQTL.bayesian.4tau$ensembl_id, ]
dim(liver.ASE.ensembl)
## [1] 190 2
# create indicator for ASE true or not
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl1$ensembl_gene_id] <- 1
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl2$ensembl_gene_id] <- 2
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl3$ensembl_gene_id] <- 3
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl4$ensembl_gene_id] <- 4
# liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 5
liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 1
liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 0
summary(liver.mouse.eQTL.bayesian.4tau$eqtl)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.01823 0.00000 1.00000
liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue <- -log10(liver.mouse.eQTL.bayesian.4tau$liver_pvalue)
head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.031972222 0.05124934 0.034956015 0.04962221
## 2 ENSMUSG00000000037 0.007730159 0.02725858 0.008666362 0.02700515
## 3 ENSMUSG00000000049 0.084466667 0.01561692 0.084349163 0.01556882
## 4 ENSMUSG00000000056 0.127208145 0.10296240 0.113489573 0.09139885
## 5 ENSMUSG00000000058 0.051222222 0.02596463 0.051824117 0.02574533
## 6 ENSMUSG00000000085 0.072100000 0.03261889 0.072227735 0.03218722
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0 fzm
## 1 5.377718e-01 0.059800000 0.79795236 2.405785e-01 0.79795236
## 2 7.788138e-01 0.005911765 0.06370924 3.741374e-01 0.06370924
## 3 9.084753e-06 0.023660714 0.04744338 3.016370e-08 0.04744338
## 4 2.269175e-01 0.016446429 0.18733775 1.071744e-01 0.18733775
## 5 5.846922e-02 0.078136752 0.50290766 2.205983e-02 0.50290766
## 6 3.542074e-02 0.052847059 0.44000309 1.241658e-02 0.44000309
## ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 58.43301 35.83834 0 0.2694020
## 2 731.86805 35.83834 0 0.1085664
## 3 982.78748 35.83834 0 5.0416869
## 4 248.89142 35.83834 0 0.6441321
## 5 92.71435 35.83834 0 1.2330727
## 6 105.96916 35.83834 0 1.4507424
by(liver.mouse.eQTL.bayesian.4tau[, c(1, 7, 9, 14)], liver.mouse.eQTL.bayesian.4tau[, "eqtl"], summary)
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 0
## ensembl_id abs_lung.beta p.below.0
## ENSMUSG00000000001: 1 Min. :0.00000 Min. :0.00000
## ENSMUSG00000000037: 1 1st Qu.:0.03640 1st Qu.:0.01409
## ENSMUSG00000000049: 1 Median :0.07032 Median :0.09311
## ENSMUSG00000000056: 1 Mean :0.13375 Mean :0.13351
## ENSMUSG00000000058: 1 3rd Qu.:0.14167 3rd Qu.:0.22674
## ENSMUSG00000000085: 1 Max. :4.17678 Max. :0.48689
## (Other) :10226
## neg_log_liver_pvalue
## Min. : 0.0000
## 1st Qu.: 0.2931
## Median : 0.6614
## Mean : 1.3702
## 3rd Qu.: 1.3957
## Max. :33.3106
##
## --------------------------------------------------------
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 1
## ensembl_id abs_lung.beta p.below.0
## ENSMUSG00000000275: 1 Min. :0.001944 Min. :0.0000000
## ENSMUSG00000000673: 1 1st Qu.:0.081911 1st Qu.:0.0000000
## ENSMUSG00000001467: 1 Median :0.180165 Median :0.0000009
## ENSMUSG00000001473: 1 Mean :0.363517 Mean :0.0313941
## ENSMUSG00000001604: 1 3rd Qu.:0.376020 3rd Qu.:0.0113437
## ENSMUSG00000002395: 1 Max. :5.852455 Max. :0.4182454
## (Other) :184
## neg_log_liver_pvalue
## Min. : 0.02767
## 1st Qu.: 1.43790
## Median : 4.39619
## Mean : 5.86726
## 3rd Qu.: 8.95591
## Max. :30.93157
##
library(ggplot2)
boxplot(neg_log_liver_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="liver.mouse.eQTL",
xlab="ASE cutoff by p value", ylab="liver neg log p")

boxplot(neg_log_lung_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="lung.mouse.eQTL",
xlab="ASE cutoff by p value", ylab="lung neg log p")

liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]
plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl), xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue" )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue, , col="red", xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$betas.hat, liver.mouse.eQTL.bayesian.4tau$betas.tieda, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$betas.hat, liver.mouse.eQTL.bayesian.4tau.ase$betas.tieda, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

cor(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue)
## [1] 0.526559
length(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue)
## [1] 190
plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$p.below.0, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$p.below.0, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.031972222 0.05124934 0.034956015 0.04962221
## 2 ENSMUSG00000000037 0.007730159 0.02725858 0.008666362 0.02700515
## 3 ENSMUSG00000000049 0.084466667 0.01561692 0.084349163 0.01556882
## 4 ENSMUSG00000000056 0.127208145 0.10296240 0.113489573 0.09139885
## 5 ENSMUSG00000000058 0.051222222 0.02596463 0.051824117 0.02574533
## 6 ENSMUSG00000000085 0.072100000 0.03261889 0.072227735 0.03218722
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0 fzm
## 1 5.377718e-01 0.059800000 0.79795236 2.405785e-01 0.79795236
## 2 7.788138e-01 0.005911765 0.06370924 3.741374e-01 0.06370924
## 3 9.084753e-06 0.023660714 0.04744338 3.016370e-08 0.04744338
## 4 2.269175e-01 0.016446429 0.18733775 1.071744e-01 0.18733775
## 5 5.846922e-02 0.078136752 0.50290766 2.205983e-02 0.50290766
## 6 3.542074e-02 0.052847059 0.44000309 1.241658e-02 0.44000309
## ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 58.43301 35.83834 0 0.2694020
## 2 731.86805 35.83834 0 0.1085664
## 3 982.78748 35.83834 0 5.0416869
## 4 248.89142 35.83834 0 0.6441321
## 5 92.71435 35.83834 0 1.2330727
## 6 105.96916 35.83834 0 1.4507424
# Optimizing rho and adjust the weight
library(reshape)
rho.optimization <- matrix(0, nrow=nrow(liver.mouse.eQTL.bayesian.4tau), ncol=7)
colnames(rho.optimization)<-c("rho","tmm","tau", "omega","beta_tieda", "n.betas.tieda.se","p.below.0" )
nomega.diag<-diag(omega )
rho <- seq(1,1.1, by=0.02)*tau # tau = 0.03940381
result <- NULL
for (i in 1:length(rho)) {
rho.optimization[ ,1] <- rho[i]
rho.optimization[ ,2] <- (rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm
rho.optimization[ ,3] <-tau*((rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
nTau<- diag(rho.optimization[ ,3], rowLength, rowLength)
ns<-V + nTau
nS <- diag.inverse(ns)
nomega<-diag.multi(nS, V)
# nomega <- diag(0, rowLength, rowLength) # set nomega to 0 for code checking
# nomega <- diag(1, rowLength, rowLength) # set nomega to 1 for code checking
rho.optimization[ ,4] <- diag(nomega )
rho.optimization[ ,5] <- nomega %*% Z %*% gamma + (identity-nomega) %*% betas.hat
nTau_invert<-diag.inverse(nTau)
V_invert<-diag.inverse(V)
nPS_invert<-nTau_invert+ V_invert
# nPS_invert<-nTau_invert+ diag.multi(diag.multi(V_invert, Z_transpose), Z) # previous wrong code
nPS<-diag.inverse(nPS_invert)
nps<-diag(nPS)
nps.long <- melt(nps)
rho.optimization[ ,6] <-(nps.long$value)^0.5
rho.optimization[ ,7] <- pnorm(0, rho.optimization[ ,5], rho.optimization[ ,6])
result <- rbind(result,rho.optimization)
}
dim(result)
## [1] 62532 7
head(result)
## rho tmm tau omega beta_tieda n.betas.tieda.se
## [1,] 0.03940381 1 0.03940381 0.062490502 0.034956015 0.04962221
## [2,] 0.03940381 1 0.03940381 0.018507810 0.008666362 0.02700515
## [3,] 0.03940381 1 0.03940381 0.006151385 0.084349163 0.01556882
## [4,] 0.03940381 1 0.03940381 0.212003624 0.113489573 0.09139885
## [5,] 0.03940381 1 0.03940381 0.016821267 0.051824117 0.02574533
## [6,] 0.03940381 1 0.03940381 0.026292305 0.072227735 0.03218722
## p.below.0
## [1,] 2.405785e-01
## [2,] 3.741374e-01
## [3,] 3.016370e-08
## [4,] 1.071744e-01
## [5,] 2.205983e-02
## [6,] 1.241658e-02
write.table(result, file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt",col.names=TRUE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian.result <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt", header=T)
result.df <-liver.mouse.eQTL.bayesian.result
result.df$rho.class <- factor(result.df$rho/tau)
# combine liver.mouse.eqtl.bayesian and rho.optimization.result for ploting
a <-liver.mouse.eQTL.bayesian.4tau[, c(1:2, 6, 7)]
a <-rbind(a, a, a, a, a, a)
dim(a)
## [1] 62532 4
new.result.df<-cbind(a, result.df)
head(new.result.df)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho tmm
## 1 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.03940381 1
## 2 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.03940381 1
## 3 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.03940381 1
## 4 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.03940381 1
## 5 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.03940381 1
## 6 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.03940381 1
## tau omega beta_tieda n.betas.tieda.se p.below.0
## 1 0.03940381 0.062490502 0.034956015 0.04962221 2.405785e-01
## 2 0.03940381 0.018507810 0.008666362 0.02700515 3.741374e-01
## 3 0.03940381 0.006151385 0.084349163 0.01556882 3.016370e-08
## 4 0.03940381 0.212003624 0.113489573 0.09139885 1.071744e-01
## 5 0.03940381 0.016821267 0.051824117 0.02574533 2.205983e-02
## 6 0.03940381 0.026292305 0.072227735 0.03218722 1.241658e-02
## rho.class
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
new.result.df2 <- new.result.df
head(new.result.df$rho.class)
## [1] 1 1 1 1 1 1
## Levels: 1 1.02 1.04 1.06 1.08 1.1
by(new.result.df2, new.result.df2[, "rho.class"], head)
## new.result.df2[, "rho.class"]: 1
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho tmm
## 1 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.03940381 1
## 2 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.03940381 1
## 3 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.03940381 1
## 4 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.03940381 1
## 5 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.03940381 1
## 6 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.03940381 1
## tau omega beta_tieda n.betas.tieda.se p.below.0
## 1 0.03940381 0.062490502 0.034956015 0.04962221 2.405785e-01
## 2 0.03940381 0.018507810 0.008666362 0.02700515 3.741374e-01
## 3 0.03940381 0.006151385 0.084349163 0.01556882 3.016370e-08
## 4 0.03940381 0.212003624 0.113489573 0.09139885 1.071744e-01
## 5 0.03940381 0.016821267 0.051824117 0.02574533 2.205983e-02
## 6 0.03940381 0.026292305 0.072227735 0.03218722 1.241658e-02
## rho.class
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.02
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11000 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.04019189
## 21000 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.04019189
## 31000 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.04019189
## 41000 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.04019189
## 51000 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.04019189
## 61000 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.04019189
## tmm tau omega beta_tieda n.betas.tieda.se
## 11000 2.033368 0.08012243 0.031740531 0.03348777 0.05042944
## 21000 2.033368 0.08012243 0.009188473 0.00819495 0.02713306
## 31000 2.033368 0.08012243 0.003034707 0.08440870 0.01559321
## 41000 2.033368 0.08012243 0.116852122 0.11964674 0.09675989
## 51000 2.033368 0.08012243 0.008343944 0.05152078 0.02585608
## 61000 2.033368 0.08012243 0.013105539 0.07216367 0.03240444
## p.below.0 rho.class
## 11000 2.533285e-01 1.02
## 21000 3.813153e-01 1.02
## 31000 3.095917e-08 1.02
## 41000 1.081303e-01 1.02
## 51000 2.315274e-02 1.02
## 61000 1.297468e-02 1.02
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.04
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11002 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.04097996
## 21002 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.04097996
## 31002 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.04097996
## 41002 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.04097996
## 51002 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.04097996
## 61002 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.04097996
## tmm tau omega beta_tieda n.betas.tieda.se
## 11002 4.077994 0.1606885 0.016082384 0.032740123 0.05083556
## 21002 4.077994 0.1606885 0.004602756 0.007962986 0.02719577
## 31002 4.077994 0.1606885 0.001515470 0.084437718 0.01560508
## 41002 4.077994 0.1606885 0.061890764 0.123203247 0.09972530
## 51002 4.077994 0.1606885 0.004177932 0.051371716 0.02591034
## 61002 4.077994 0.1606885 0.006577900 0.072131957 0.03251143
## p.below.0 rho.class
## 11002 2.597748e-01 1.04
## 21002 3.848366e-01 1.04
## 31002 3.135250e-08 1.04
## 41002 1.083360e-01 1.04
## 51002 2.370201e-02 1.04
## 61002 1.325477e-02 1.04
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.06
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11004 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.04176804
## 21004 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.04176804
## 31004 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.04176804
## 41004 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.04176804
## 51004 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.04176804
## 61004 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.04176804
## tmm tau omega beta_tieda n.betas.tieda.se
## 11004 8.070868 0.318023 0.0081911723 0.032363334 0.05103901
## 21004 8.070868 0.318023 0.0023309579 0.007848068 0.02722679
## 31004 8.070868 0.318023 0.0007663012 0.084452029 0.01561094
## 41004 8.070868 0.318023 0.0322595076 0.125120660 0.10128802
## 51004 8.070868 0.318023 0.0021153698 0.051297914 0.02593716
## 61004 8.070868 0.318023 0.0033344886 0.072116200 0.03256446
## p.below.0 rho.class
## 11004 2.630110e-01 1.06
## 21004 3.865784e-01 1.06
## 31004 3.154781e-08 1.06
## 41004 1.083602e-01 1.06
## 51004 2.397694e-02 1.06
## 61004 1.339487e-02 1.06
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.08
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11006 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.04255612
## 21006 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.04255612
## 31006 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.04255612
## 41006 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.04255612
## 51006 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.04255612
## 61006 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.04255612
## tmm tau omega beta_tieda n.betas.tieda.se
## 11006 15.77074 0.6214271 0.0042087649 0.032173182 0.05114138
## 21006 15.77074 0.6214271 0.0011942555 0.007790569 0.02724230
## 31006 15.77074 0.6214271 0.0003923108 0.084459173 0.01561386
## 41006 15.77074 0.6214271 0.0167733853 0.126122754 0.10209523
## 51006 15.77074 0.6214271 0.0010836857 0.051260998 0.02595056
## 61006 15.77074 0.6214271 0.0017092482 0.072108304 0.03259100
## p.below.0 rho.class
## 11006 2.646409e-01 1.08
## 21006 3.874493e-01 1.08
## 31006 3.164564e-08 1.08
## 41006 1.083512e-01 1.08
## 51006 2.411519e-02 1.08
## 61006 1.346531e-02 1.08
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.1
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11008 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.04334419
## 21008 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.04334419
## 31008 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.04334419
## 41008 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.04334419
## 51008 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.04334419
## 61008 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.04334419
## tmm tau omega beta_tieda n.betas.tieda.se
## 11008 30.44004 1.199453 0.0021849585 0.032076549 0.05119332
## 21008 30.44004 1.199453 0.0006190904 0.007761475 0.02725014
## 31008 30.44004 1.199453 0.0002032915 0.084462783 0.01561533
## 41008 30.44004 1.199453 0.0087609715 0.126641230 0.10251038
## 51008 30.44004 1.199453 0.0005617421 0.051242322 0.02595734
## 61008 30.44004 1.199453 0.0008862776 0.072104306 0.03260443
## p.below.0 rho.class
## 11008 2.654683e-01 1.1
## 21008 3.878898e-01 1.1
## 31008 3.169517e-08 1.1
## 41008 1.083410e-01 1.1
## 51008 2.418533e-02 1.1
## 61008 1.350103e-02 1.1
# choose different rho class for plotting
new.result.df2.rho1 <- new.result.df2[new.result.df2$rho.class == 1, ]
head(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho tmm
## 1 ENSMUSG00000000001 0.031972222 5.377718e-01 0.059800000 0.03940381 1
## 2 ENSMUSG00000000037 0.007730159 7.788138e-01 0.005911765 0.03940381 1
## 3 ENSMUSG00000000049 0.084466667 9.084753e-06 0.023660714 0.03940381 1
## 4 ENSMUSG00000000056 0.127208145 2.269175e-01 0.016446429 0.03940381 1
## 5 ENSMUSG00000000058 0.051222222 5.846922e-02 0.078136752 0.03940381 1
## 6 ENSMUSG00000000085 0.072100000 3.542074e-02 0.052847059 0.03940381 1
## tau omega beta_tieda n.betas.tieda.se p.below.0
## 1 0.03940381 0.062490502 0.034956015 0.04962221 2.405785e-01
## 2 0.03940381 0.018507810 0.008666362 0.02700515 3.741374e-01
## 3 0.03940381 0.006151385 0.084349163 0.01556882 3.016370e-08
## 4 0.03940381 0.212003624 0.113489573 0.09139885 1.071744e-01
## 5 0.03940381 0.016821267 0.051824117 0.02574533 2.205983e-02
## 6 0.03940381 0.026292305 0.072227735 0.03218722 1.241658e-02
## rho.class
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
tail(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 10417 ENSMUSG00000099041 0.12408929 0.02861902 0.10059091 0.03940381
## 10418 ENSMUSG00000099083 0.06022624 0.05474021 0.12891000 0.03940381
## 10419 ENSMUSG00000099116 0.01886111 0.50556296 0.03457738 0.03940381
## 10420 ENSMUSG00000099164 0.07874163 0.10653819 0.08603846 0.03940381
## 10421 ENSMUSG00000099262 0.01569378 0.69418700 0.18600427 0.03940381
## 10422 ENSMUSG00000099305 0.08752778 0.13257346 0.06246296 0.03940381
## tmm tau omega beta_tieda n.betas.tieda.se p.below.0
## 10417 1 0.03940381 0.06836122 0.12216383 0.05190079 0.009291429
## 10418 1 0.03940381 0.02239017 0.06127738 0.02970283 0.019555601
## 10419 1 0.03940381 0.01946146 0.01985053 0.02769216 0.236739937
## 10420 1 0.03940381 0.05355148 0.07935217 0.04593618 0.042043971
## 10421 1 0.03940381 0.03809742 0.02004290 0.03874511 0.302472852
## 10422 1 0.03940381 0.07494278 0.08702193 0.05434180 0.054646275
## rho.class
## 10417 1
## 10418 1
## 10419 1
## 10420 1
## 10421 1
## 10422 1
# rank with p.below.0: bayesian modeling
new.result.df2.rho1$rank.p.below.0 <- rank(new.result.df2.rho1$p.below.0)
# rank with liver p value: traditionsl linear regression (one step regression)
new.result.df2.rho1$rank.liver.pvalue <- rank(new.result.df2.rho1$liver_pvalue)
new.result.df2.rho1 <- new.result.df2.rho1[order(new.result.df2.rho1$rank.p.below.0), ]
head(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 2524 ENSMUSG00000022680 4.190552 4.891311e-34 4.176783 0.03940381
## 3125 ENSMUSG00000024735 2.742250 1.117845e-27 1.115350 0.03940381
## 4677 ENSMUSG00000028656 2.129254 4.552986e-26 2.934903 0.03940381
## 9191 ENSMUSG00000057132 2.458249 7.377910e-27 3.093200 0.03940381
## 10012 ENSMUSG00000073411 5.526800 1.170670e-31 5.852455 0.03940381
## 573 ENSMUSG00000005161 1.978946 5.933016e-24 1.987289 0.03940381
## tmm tau omega beta_tieda n.betas.tieda.se p.below.0
## 2524 1 0.03940381 0.07103865 4.014699 0.05290741 0.000000e+00
## 3125 1 0.03940381 0.08592142 2.549508 0.05818618 0.000000e+00
## 4677 1 0.03940381 0.06903123 2.066610 0.05215452 0.000000e+00
## 9191 1 0.03940381 0.07970032 2.364714 0.05604013 0.000000e+00
## 10012 1 0.03940381 0.16468321 5.008692 0.08055524 0.000000e+00
## 573 1 0.03940381 0.08371173 1.884052 0.05743310 2.557521e-236
## rho.class rank.p.below.0 rank.liver.pvalue
## 2524 1 3 1
## 3125 1 3 3
## 4677 1 3 5
## 9191 1 3 4
## 10012 1 3 2
## 573 1 6 6
# caculate TPR: true positive rate
# caculate PPV: positive predictive rate
result.rho1 <- matrix(, nrow(new.result.df2.rho1), 8)
colnames(result.rho1)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR" )
for (i in 1:nrow(new.result.df2.rho1))
{
newdata1.rho1 <- subset(new.result.df2.rho1, rank.p.below.0 <= i)
overlap.newdata1.rho1 <- newdata1.rho1[newdata1.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 1] <- i
result.rho1[i, 2] <- nrow(overlap.newdata1.rho1)/nrow(newdata1.rho1)
result.rho1[i, 3] <- nrow(overlap.newdata1.rho1)/nrow(liver.ASE.ensembl)
newdata2.rho1 <- subset(new.result.df2.rho1, rank.liver.pvalue <= i)
overlap.newdata2.rho1 <- newdata2.rho1[newdata2.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 5] <- i
result.rho1[i, 6] <- nrow(overlap.newdata2.rho1)/nrow(newdata2.rho1)
result.rho1[i, 7] <- nrow(overlap.newdata2.rho1)/nrow(liver.ASE.ensembl)
}
head(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [1,] 1 NaN 0.00000000 NA 1 0.0000000 0.000000000
## [2,] 2 NaN 0.00000000 NA 2 0.5000000 0.005263158
## [3,] 3 0.4 0.01052632 NA 3 0.6666667 0.010526316
## [4,] 4 0.4 0.01052632 NA 4 0.5000000 0.010526316
## [5,] 5 0.4 0.01052632 NA 5 0.4000000 0.010526316
## [6,] 6 0.5 0.01578947 NA 6 0.5000000 0.015789474
## ori_FPR
## [1,] NA
## [2,] NA
## [3,] NA
## [4,] NA
## [5,] NA
## [6,] NA
tail(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [10417,] 10417 0.01823942 1 NA 10417 0.01823942 1
## [10418,] 10418 0.01823767 1 NA 10418 0.01823767 1
## [10419,] 10419 0.01823592 1 NA 10419 0.01823592 1
## [10420,] 10420 0.01823417 1 NA 10420 0.01823417 1
## [10421,] 10421 0.01823242 1 NA 10421 0.01823242 1
## [10422,] 10422 0.01823067 1 NA 10422 0.01823067 1
## ori_FPR
## [10417,] NA
## [10418,] NA
## [10419,] NA
## [10420,] NA
## [10421,] NA
## [10422,] NA
# ploting "True positive rate"
plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4) , xlim=c(0, 300))
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# ploting "positive predictive value"
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt", header=T)
head(MTeQTLs)
## SNP gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at 0 0 0.9260306
## 2 rs6365999 1424963_at 0 0 0.9260306
## 3 rs6376963 1424963_at 0 0 0.9306985
## 4 rs3677817 1424963_at 0 0 0.9329117
## 5 rs6269442 1424964_at 0 0 0.9438114
## 6 rs6365999 1424964_at 0 0 0.9438114
## marginalP.lung
## 1 0.9027543
## 2 0.9027543
## 3 0.9104129
## 4 0.9140456
## 5 0.9219186
## 6 0.9219186
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt", header=T)
head(MTeQTLs)
## SNP gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at 0 0 0.9260306
## 2 rs6365999 1424963_at 0 0 0.9260306
## 3 rs6376963 1424963_at 0 0 0.9306985
## 4 rs3677817 1424963_at 0 0 0.9329117
## 5 rs6269442 1424964_at 0 0 0.9438114
## 6 rs6365999 1424964_at 0 0 0.9438114
## marginalP.lung
## 1 0.9027543
## 2 0.9027543
## 3 0.9104129
## 4 0.9140456
## 5 0.9219186
## 6 0.9219186
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
MTeQTLs.min <- data.table(MTeQTLs, key=c('ensembl_id', "marginalP.Liver"))
MTeQTLs.min <-MTeQTLs.min[J(unique(ensembl_id)),mult="first"]
merged.eQTL <- merge(new.result.df2.rho1, MTeQTLs.min, by ="ensembl_id")
merged.eQTL$rank.marginalP.Liver <- rank(merged.eQTL$marginalP.Liver)
merged.result <- matrix(, nrow(merged.eQTL), 12)
colnames(merged.result)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR", "MTrank","MTppv","MT_TPR","MT_FPR" )
for (i in 1:nrow(merged.eQTL))
{
newdata1 <- subset(merged.eQTL, rank.p.below.0 <= i)
overlap.newdata1 <- newdata1[newdata1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 1] <- i
merged.result[i, 2] <- nrow(overlap.newdata1)/nrow(newdata1)
merged.result[i, 3] <- nrow(overlap.newdata1)/nrow(liver.ASE.ensembl)
merged.result[i, 4] <- (nrow(newdata1)-nrow(overlap.newdata1)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
newdata2 <- subset(merged.eQTL, rank.liver.pvalue <= i)
overlap.newdata2 <- newdata2[newdata2$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 5] <- i
merged.result[i, 6] <- nrow(overlap.newdata2)/nrow(newdata2)
merged.result[i, 7] <- nrow(overlap.newdata2)/nrow(liver.ASE.ensembl)
merged.result[i, 8] <- (nrow(newdata2)-nrow(overlap.newdata2)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
newdata3 <- subset(merged.eQTL, rank.liver.pvalue <= i)
overlap.newdata3 <- newdata3[newdata3$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 9] <- i
merged.result[i, 10] <- nrow(overlap.newdata3)/nrow(newdata3)
merged.result[i, 11] <- nrow(overlap.newdata3)/nrow(liver.ASE.ensembl)
merged.result[i, 12] <- (nrow(newdata3)-nrow(overlap.newdata3)) /(nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
}
head(merged.result)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [1,] 1 NaN 0.00000000 0.0000000000 1 0.0000000 0.000000000
## [2,] 2 NaN 0.00000000 0.0000000000 2 0.5000000 0.005263158
## [3,] 3 0.4 0.01052632 0.0002931978 3 0.6666667 0.010526316
## [4,] 4 0.4 0.01052632 0.0002931978 4 0.5000000 0.010526316
## [5,] 5 0.4 0.01052632 0.0002931978 5 0.4000000 0.010526316
## [6,] 6 0.5 0.01578947 0.0002931978 6 0.5000000 0.015789474
## ori_FPR MTrank MTppv MT_TPR MT_FPR
## [1,] 0.0000977326 1 0.0000000 0.000000000 0.0000977326
## [2,] 0.0000977326 2 0.5000000 0.005263158 0.0000977326
## [3,] 0.0000977326 3 0.6666667 0.010526316 0.0000977326
## [4,] 0.0001954652 4 0.5000000 0.010526316 0.0001954652
## [5,] 0.0002931978 5 0.4000000 0.010526316 0.0002931978
## [6,] 0.0002931978 6 0.5000000 0.015789474 0.0002931978
tail(merged.result)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [10417,] 10417 0.01823942 1 0.9995113 10417 0.01823942 1
## [10418,] 10418 0.01823767 1 0.9996091 10418 0.01823767 1
## [10419,] 10419 0.01823592 1 0.9997068 10419 0.01823592 1
## [10420,] 10420 0.01823417 1 0.9998045 10420 0.01823417 1
## [10421,] 10421 0.01823242 1 0.9999023 10421 0.01823242 1
## [10422,] 10422 0.01823067 1 1.0000000 10422 0.01823067 1
## ori_FPR MTrank MTppv MT_TPR MT_FPR
## [10417,] 0.9995113 10417 0.01823942 1 0.9995113
## [10418,] 0.9996091 10418 0.01823767 1 0.9996091
## [10419,] 0.9997068 10419 0.01823592 1 0.9997068
## [10420,] 0.9998045 10420 0.01823417 1 0.9998045
## [10421,] 0.9999023 10421 0.01823242 1 0.9999023
## [10422,] 1.0000000 10422 0.01823067 1 1.0000000
# ploting "True positive rate"
plot(merged.result[, 1], merged.result[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 1], merged.result[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 4], merged.result[, 3], type="l", col="red", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 8], merged.result[, 7], type="l", col="green", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 12], merged.result[, 11], type="l", col="blue", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)
title(main = "ROC curve")
