rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 370865 19.9 750400 40.1 592000 31.7
## Vcells 623904 4.8 1308461 10.0 1007471 7.7
# set the working directory
setwd("/Volumes/Transcend/Thesis_project/subsetted_liver")
# subset dataset
sebsetn <- 15
mouse.liver.expression.eqtl <-read.table(file="2015-12-07 mouse.liver.expression.eqtl.txt", header=T)
head(mouse.liver.expression.eqtl)
## ProbeSet BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18
## 1 1415670_at 10.090 10.200 10.300 10.208 9.830 10.238 9.914 10.348
## 2 1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3 1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4 1415673_at 7.535 7.382 7.566 7.162 7.403 7.342 7.213 7.581
## 5 1415674_a_at 9.757 9.972 9.269 9.873 9.354 9.918 9.459 9.655
## 6 1415675_at 9.029 9.009 9.245 9.282 9.415 9.098 9.060 8.937
## BXD19 BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31
## 1 9.939 9.871 10.077 10.159 9.746 9.890 10.286 10.177 9.959 9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4 7.551 7.368 7.251 7.373 7.408 7.473 7.238 7.424 7.475 7.491
## 5 9.544 9.557 9.460 9.322 9.795 9.758 9.628 9.263 9.671 9.645
## 6 8.995 9.104 9.123 9.086 9.087 9.040 8.844 9.173 8.951 9.131
## BXD32 BXD33 BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6
## 1 10.059 10.102 10.174 10.022 10.364 9.745 10.074 9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4 7.419 7.386 7.496 7.369 7.265 7.130 7.149 7.383 7.439 7.233
## 5 9.368 9.741 9.721 9.684 9.387 9.649 9.693 9.793 9.840 9.519
## 6 9.108 9.122 9.153 9.008 9.138 9.126 9.233 9.060 9.134 9.081
## BXD8 BXD9
## 1 9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4 7.397 7.390
## 5 9.340 9.160
## 6 8.926 9.274
dim(mouse.liver.expression.eqtl)
## [1] 20855 31
set.seed(50)
sub.mouse.liver.expression.eqtl <- mouse.liver.expression.eqtl[, c(1, sample(2:dim(mouse.liver.expression.eqtl)[2],sebsetn, replace=FALSE))]
head(sub.mouse.liver.expression.eqtl)
## ProbeSet BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27
## 1 1415670_at 10.022 9.746 10.238 10.174 9.890 10.200 9.959 10.286
## 2 1415671_at 10.923 11.104 11.120 11.059 10.979 11.088 11.071 10.905
## 3 1415672_at 11.545 11.499 11.318 11.490 11.531 11.417 11.502 11.666
## 4 1415673_at 7.369 7.408 7.342 7.496 7.473 7.382 7.475 7.238
## 5 1415674_a_at 9.684 9.795 9.918 9.721 9.758 9.972 9.671 9.628
## 6 1415675_at 9.008 9.087 9.098 9.153 9.040 9.009 8.951 8.844
## BXD1 BXD12 BXD18 BXD6 BXD21 BXD40 BXD14
## 1 10.090 10.300 10.348 10.069 10.159 10.074 9.830
## 2 10.932 11.007 11.123 11.043 10.969 11.067 10.955
## 3 11.432 11.442 11.561 11.504 11.328 11.543 11.561
## 4 7.535 7.566 7.581 7.233 7.373 7.149 7.403
## 5 9.757 9.269 9.655 9.519 9.322 9.693 9.354
## 6 9.029 9.245 8.937 9.081 9.086 9.233 9.415
dim(sub.mouse.liver.expression.eqtl)
## [1] 20855 16
write.table(sub.mouse.liver.expression.eqtl,file="2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="\t", row.names=FALSE, quote=FALSE)
#subset liver snp expression data
BXD.geno.SNP.eqtl.for.liver <-read.table(file="2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt", header=T)
head(BXD.geno.SNP.eqtl.for.liver)
## Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs6269442 0 0 1 0 0 1 1 0 1 0
## 2 rs6365999 0 0 1 0 0 1 1 0 1 0
## 3 rs6376963 0 0 1 0 0 1 1 0 1 0
## 4 rs3677817 0 0 1 0 0 1 1 0 1 0
## 5 rs8236463 0 1 1 0 0 1 1 0 1 0
## 6 rs6333200 0 1 1 0 0 1 1 0 1 0
## BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1 1 1 0 0 0 1 1 0 0 0 0 0
## 2 1 1 0 0 0 1 1 0 0 0 0 0
## 3 1 1 0 0 0 1 1 0 0 0 0 0
## 4 1 1 0 0 0 1 1 0 0 0 0 0
## 5 1 1 0 0 0 1 1 0 0 0 0 0
## 6 1 1 0 0 0 1 1 0 0 0 0 0
## BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1 0 1 0 0 1 1 1 0
## 2 0 1 0 0 1 1 1 0
## 3 0 1 0 0 1 1 1 0
## 4 0 1 0 0 1 1 1 0
## 5 0 1 0 0 1 1 1 0
## 6 0 1 0 0 1 1 1 0
dim(BXD.geno.SNP.eqtl.for.liver)
## [1] 3811 31
set.seed(50)
sub.BXD.geno.SNP.eqtl.for.liver <- BXD.geno.SNP.eqtl.for.liver[, c(1, sample(2:dim(BXD.geno.SNP.eqtl.for.liver)[2],sebsetn, replace=FALSE))]
head(sub.BXD.geno.SNP.eqtl.for.liver)
## Locus BXD36 BXD24 BXD15 BXD34 BXD24a BXD11 BXD29 BXD27 BXD1 BXD12
## 1 rs6269442 0 0 1 0 0 0 1 0 0 1
## 2 rs6365999 0 0 1 0 0 0 1 0 0 1
## 3 rs6376963 0 0 1 0 0 0 1 0 0 1
## 4 rs3677817 0 0 1 0 0 0 1 0 0 1
## 5 rs8236463 0 0 1 0 0 1 1 0 0 1
## 6 rs6333200 0 0 1 0 0 1 1 0 0 1
## BXD18 BXD6 BXD21 BXD40 BXD14
## 1 0 1 1 0 0
## 2 0 1 1 0 0
## 3 0 1 1 0 0
## 4 0 1 1 0 0
## 5 0 1 1 0 0
## 6 0 1 1 0 0
dim(sub.BXD.geno.SNP.eqtl.for.liver)
## [1] 3811 16
write.table(sub.BXD.geno.SNP.eqtl.for.liver,file="2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="\t", row.names=FALSE, quote=FALSE)
library(MatrixEQTL)
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/subsetted_liver";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2016-05-09 sub.BXD.geno.SNP.eqtl.for.liver.txt", sep="");
snps_location_file_name = paste(base.dir, "/2015-12-07 BXD.geno.loc.eqtl.for.liver.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2016-05-09 sub.mouse.liver.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2015-12-07 liver.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
covariates_file_name = character() ;
# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;
## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t"; # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);
## Rows read: 2,000
## Rows read: 3811 done.
## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);
## Rows read: 2,000
## Rows read: 4,000
## Rows read: 6,000
## Rows read: 8,000
## Rows read: 10,000
## Rows read: 12,000
## Rows read: 14,000
## Rows read: 16,000
## Rows read: 18,000
## Rows read: 20,000
## Rows read: 20855 done.
## Load covariates
cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t"; # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1; # one row of column labels
cvrt$fileSkipColumns = 1; # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)
## probe_id Chromosome start_location end_location
## 1 1415670_at 6 87887971 87890759
## 2 1415671_at 8 105524469 105566040
## 3 1415672_at 8 23241325 23257080
## 4 1415673_at 5 129765557 129787253
## 5 1415674_a_at 9 44403758 44407548
## 6 1415675_at 2 32570857 32573571
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = numeric(),
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = TRUE,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
## Matching data files and location files
## 20855 of 20855 genes matched
## 3811 of 3811 SNPs matched
## Task finished in 0.054 seconds
## Reordering genes
##
## Task finished in 0.119 seconds
## Processing covariates
## Task finished in 0.001 seconds
## Processing gene expression data (imputation, residualization, etc.)
## Task finished in 0.013 seconds
## Creating output file(s)
## Task finished in 0.011 seconds
## Performing eQTL analysis
## 4.54% done, 7,118 cis-eQTLs, 0 trans-eQTLs
## 9.09% done, 13,788 cis-eQTLs, 0 trans-eQTLs
## 13.63% done, 21,752 cis-eQTLs, 0 trans-eQTLs
## 18.18% done, 27,806 cis-eQTLs, 0 trans-eQTLs
## 22.72% done, 34,278 cis-eQTLs, 0 trans-eQTLs
## 27.27% done, 35,915 cis-eQTLs, 0 trans-eQTLs
## 31.81% done, 0 trans-eQTLs
## 36.36% done, 0 trans-eQTLs
## 40.90% done, 0 trans-eQTLs
## 45.45% done, 0 trans-eQTLs
## 50.00% done, 0 trans-eQTLs
## 54.54% done, 0 trans-eQTLs
## 59.09% done, 0 trans-eQTLs
## 63.63% done, 0 trans-eQTLs
## 68.18% done, 0 trans-eQTLs
## 72.72% done, 0 trans-eQTLs
## 77.27% done, 40,035 cis-eQTLs, 0 trans-eQTLs
## 81.81% done, 47,465 cis-eQTLs, 0 trans-eQTLs
## 86.36% done, 53,735 cis-eQTLs, 0 trans-eQTLs
## 90.90% done, 60,292 cis-eQTLs, 17 trans-eQTLs
## 95.45% done, 67,748 cis-eQTLs, 17 trans-eQTLs
## 100.00% done, 69,174 cis-eQTLs, 17 trans-eQTLs
## Task finished in 8.867 seconds
##
unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')
## Analysis done in: 8.404 seconds
cat('Detected local eQTLs:','\n')
## Detected local eQTLs:
cis.eqtls<-me$cis$eqtls
head(cis.eqtls)
## snps gene statistic pvalue FDR beta
## 1 rs4163042 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 2 116Mit88 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 3 rs4163058 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 4 rs4163391 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 5 rs4151923 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
## 6 rs3090019 1452705_at -51.52246 2.034364e-16 1.759064e-12 -4.255833
dim(cis.eqtls)
## [1] 69174 6
cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)
# load mouse lung cis eqtl result
lung.mouse.eQTL<-read.table(file="2015-12-04 mouselung.cis.1M.eqtls.txt", header=T)
# load mouse liver cis eqtl result
liver.mouse.eQTL<-read.table(file="2016-05-09 sub.mouseliver.cis.1M.eqtls.txt", header=T)
mouse4302ensembl_id<-read.table(file="2015-12-04 mouse4302ensembl_id.txt", header=T)
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
# Add ensemble id annoatation to the data
lung.mouse.eQTL<-merge(lung.mouse.eQTL, mouse4302ensembl_id, by.x = "gene", by.y="probe_id")
liver.mouse.eQTL<-merge(liver.mouse.eQTL, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
head(lung.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs13475374 1.0503967 0.2994035 0.5712395 0.06286667
## 2 1415670_at rs13478876 0.9133601 0.3661462 0.6356597 0.05583410
## 3 1415670_at rs3713705 1.1363521 0.2621028 0.5310916 0.06786667
## 4 1415670_at rs13478880 1.1363521 0.2621028 0.5310916 0.06786667
## 5 1415670_at gnf06.086.089 0.9133601 0.3661462 0.6356597 0.05583410
## 6 1415672_at rs13479651 -1.3052534 0.1987480 0.4510718 -0.03993582
## beta_se ensembl_id
## 1 0.05985040 ENSMUSG00000030058
## 2 0.06113044 ENSMUSG00000030058
## 3 0.05972327 ENSMUSG00000030058
## 4 0.05972327 ENSMUSG00000030058
## 5 0.06113044 ENSMUSG00000030058
## 6 0.03059622 ENSMUSG00000015341
head(liver.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs13475374 -0.4855043 0.6353986 0.8846784 -0.04598214
## 2 1415670_at rs13478880 -0.4855043 0.6353986 0.8846784 -0.04598214
## 3 1415670_at gnf06.086.089 -0.4855043 0.6353986 0.8846784 -0.04598214
## 4 1415670_at rs13478876 -0.4855043 0.6353986 0.8846784 -0.04598214
## 5 1415670_at rs3713705 -0.4855043 0.6353986 0.8846784 -0.04598214
## 6 1415671_at rs13479962 0.3050090 0.7651877 0.9346530 0.01290000
## beta_se ensembl_id
## 1 0.09471005 ENSMUSG00000030058
## 2 0.09471005 ENSMUSG00000030058
## 3 0.09471005 ENSMUSG00000030058
## 4 0.09471005 ENSMUSG00000030058
## 5 0.09471005 ENSMUSG00000030058
## 6 0.04229383 ENSMUSG00000013160
library(data.table)
# Select Gene-SNP pair with minimum P value
lung.mouse.eQTL.min <- data.table(lung.mouse.eQTL, key=c('ensembl_id', "pvalue"))
lung.mouse.eQTL.min<-lung.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
lung.mouse.eQTL.min<-as.data.frame(lung.mouse.eQTL.min)
liver.mouse.eQTL.min <- data.table(liver.mouse.eQTL, key=c('ensembl_id', "pvalue"))
liver.mouse.eQTL.min<-liver.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
liver.mouse.eQTL.min<-as.data.frame(liver.mouse.eQTL.min)
library(plyr)
lung.mouse.eQTL.min<-rename(lung.mouse.eQTL.min, c("pvalue"="lung_pvalue", "beta"="lung.beta", "beta_se"="lung.beta_se"))
liver.mouse.eQTL.min<-rename(liver.mouse.eQTL.min, c("pvalue"="liver_pvalue", "beta"="liver.beta", "beta_se"="liver.beta_se"))
head(lung.mouse.eQTL.min)
## gene snps statistic lung_pvalue FDR
## 1 1428645_at rs3702359 -1.4324741 0.1592383 0.3935788
## 2 1421514_a_at gnfX.141.820 -0.1728806 0.8635565 0.9457289
## 3 1416677_at rs3720981 -0.1308388 0.8965131 0.9603432
## 4 1451677_at CEL-11_120628029 0.4574870 0.6496243 0.8393760
## 5 1425955_at rs13478643 -1.0185410 0.3141176 0.5866782
## 6 1426241_a_at rs4224744 0.9192838 0.3630755 0.6327046
## lung.beta lung.beta_se ensembl_id
## 1 -0.059800000 0.04174596 ENSMUSG00000000001
## 2 -0.005911765 0.03419566 ENSMUSG00000000037
## 3 -0.023660714 0.18083869 ENSMUSG00000000049
## 4 0.016446429 0.03594950 ENSMUSG00000000056
## 5 -0.078136752 0.07671439 ENSMUSG00000000058
## 6 0.052847059 0.05748721 ENSMUSG00000000085
head(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR
## 1 1428645_at rs13477320 -1.3801032 0.190827641 0.57060083
## 2 1449320_at CEL-X_71438949 1.2101767 0.247756346 0.63979906
## 3 1421514_a_at CEL-X_154048891 -0.3183323 0.755289094 0.93238030
## 4 1416677_at rs3670642 3.2114557 0.006815612 0.08431322
## 5 1425344_at CEL-11_120628029 -1.5163463 0.153365833 0.52379422
## 6 1417327_at NAT_6_18.199327 1.1762017 0.260604313 0.65097975
## liver.beta liver.beta_se ensembl_id
## 1 -0.07366667 0.05337765 ENSMUSG00000000001
## 2 0.06350000 0.05247167 ENSMUSG00000000003
## 3 -0.01177273 0.03698251 ENSMUSG00000000037
## 4 0.07875000 0.02452159 ENSMUSG00000000049
## 5 -0.24430000 0.16111096 ENSMUSG00000000056
## 6 0.11090000 0.09428655 ENSMUSG00000000058
tail(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR
## 10962 1434694_at rs13459062 -1.7224576 0.10867553 0.4460593
## 10963 1437645_at 115Mit16 2.0044772 0.06630646 0.3458785
## 10964 1449939_s_at rs13481642 -0.4164743 0.68385715 0.9063245
## 10965 1422547_at rs4165065 -1.6637788 0.12006199 0.4695369
## 10966 1451476_at rs4165065 -0.1824077 0.85807655 0.9631432
## 10967 1453995_a_at rs4165065 -1.4418659 0.17299524 0.5490605
## liver.beta liver.beta_se ensembl_id
## 10962 -0.16160714 0.09382358 ENSMUSG00000099041
## 10963 0.10340000 0.05158452 ENSMUSG00000099083
## 10964 -0.01975000 0.04742189 ENSMUSG00000099116
## 10965 -0.10844444 0.06517960 ENSMUSG00000099164
## 10966 -0.01105556 0.06060904 ENSMUSG00000099262
## 10967 -0.05450000 0.03779824 ENSMUSG00000099305
dim(lung.mouse.eQTL.min)
## [1] 11015 8
dim(liver.mouse.eQTL.min)
## [1] 10967 8
# lung, liver eqtl with ensemble_id
merged.mouse.eQTL.min<-merge(lung.mouse.eQTL.min, liver.mouse.eQTL.min, by.x = "ensembl_id", by.y="ensembl_id")
head(merged.mouse.eQTL.min)
## ensembl_id gene.x snps.x statistic.x lung_pvalue
## 1 ENSMUSG00000000001 1428645_at rs3702359 -1.4324741 0.1592383
## 2 ENSMUSG00000000037 1421514_a_at gnfX.141.820 -0.1728806 0.8635565
## 3 ENSMUSG00000000049 1416677_at rs3720981 -0.1308388 0.8965131
## 4 ENSMUSG00000000056 1451677_at CEL-11_120628029 0.4574870 0.6496243
## 5 ENSMUSG00000000058 1425955_at rs13478643 -1.0185410 0.3141176
## 6 ENSMUSG00000000085 1426241_a_at rs4224744 0.9192838 0.3630755
## FDR.x lung.beta lung.beta_se gene.y snps.y
## 1 0.3935788 -0.059800000 0.04174596 1428645_at rs13477320
## 2 0.9457289 -0.005911765 0.03419566 1421514_a_at CEL-X_154048891
## 3 0.9603432 -0.023660714 0.18083869 1416677_at rs3670642
## 4 0.8393760 0.016446429 0.03594950 1425344_at CEL-11_120628029
## 5 0.5866782 -0.078136752 0.07671439 1417327_at NAT_6_18.199327
## 6 0.6327046 0.052847059 0.05748721 1426241_a_at gnf04.117.102
## statistic.y liver_pvalue FDR.y liver.beta liver.beta_se
## 1 -1.3801032 0.190827641 0.57060083 -0.07366667 0.05337765
## 2 -0.3183323 0.755289094 0.93238030 -0.01177273 0.03698251
## 3 3.2114557 0.006815612 0.08431322 0.07875000 0.02452159
## 4 -1.5163463 0.153365833 0.52379422 -0.24430000 0.16111096
## 5 1.1762017 0.260604313 0.65097975 0.11090000 0.09428655
## 6 -0.7745039 0.452489862 0.79168033 -0.01910000 0.02466095
dim(merged.mouse.eQTL.min)
## [1] 10422 15
merged.mouse.eQTL.min<-data.frame(merged.mouse.eQTL.min)
merged.mouse.eQTL.min<-merged.mouse.eQTL.min[, c(1, 5, 7, 8, 12, 14, 15 )]
head(merged.mouse.eQTL.min)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.1592383 -0.059800000 0.04174596 0.190827641
## 2 ENSMUSG00000000037 0.8635565 -0.005911765 0.03419566 0.755289094
## 3 ENSMUSG00000000049 0.8965131 -0.023660714 0.18083869 0.006815612
## 4 ENSMUSG00000000056 0.6496243 0.016446429 0.03594950 0.153365833
## 5 ENSMUSG00000000058 0.3141176 -0.078136752 0.07671439 0.260604313
## 6 ENSMUSG00000000085 0.3630755 0.052847059 0.05748721 0.452489862
## liver.beta liver.beta_se
## 1 -0.07366667 0.05337765
## 2 -0.01177273 0.03698251
## 3 0.07875000 0.02452159
## 4 -0.24430000 0.16111096
## 5 0.11090000 0.09428655
## 6 -0.01910000 0.02466095
write.table(merged.mouse.eQTL.min,file="2016-05-09 mouse.liver.expression.min.txt", sep="\t", row.names=FALSE, quote=FALSE)
merged.mouse.eQTL.min.variance2<-read.table(file="2016-05-09 mouse.liver.expression.min.txt", header=T)
head(merged.mouse.eQTL.min.variance2)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.1592383 -0.059800000 0.04174596 0.190827641
## 2 ENSMUSG00000000037 0.8635565 -0.005911765 0.03419566 0.755289094
## 3 ENSMUSG00000000049 0.8965131 -0.023660714 0.18083869 0.006815612
## 4 ENSMUSG00000000056 0.6496243 0.016446429 0.03594950 0.153365833
## 5 ENSMUSG00000000058 0.3141176 -0.078136752 0.07671439 0.260604313
## 6 ENSMUSG00000000085 0.3630755 0.052847059 0.05748721 0.452489862
## liver.beta liver.beta_se
## 1 -0.07366667 0.05337765
## 2 -0.01177273 0.03698251
## 3 0.07875000 0.02452159
## 4 -0.24430000 0.16111096
## 5 0.11090000 0.09428655
## 6 -0.01910000 0.02466095
# caculate the absolute value of live/lung beta
merged.mouse.eQTL.min.variance2$abs_liver.beta<-abs(merged.mouse.eQTL.min.variance2$liver.beta)
merged.mouse.eQTL.min.variance2$abs_lung.beta<-abs(merged.mouse.eQTL.min.variance2$lung.beta)
# caculate negative log lung p value
merged.mouse.eQTL.min.variance2$neg_log_lung_pvalue<--log10(merged.mouse.eQTL.min.variance2$lung_pvalue)
# Simple linear regression between abs_liver.beta and abs_lung.beta
# fit1<-summary(lm(abs_liver.beta ~ abs_lung.beta, data=merged.mouse.eQTL.min.variance2))
# fit1
# tau<-fit1$sigma**2
# check association between abs_liver.beta and abs.lung.beta
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
ggplot(merged.mouse.eQTL.min.variance2, aes(x=abs_lung.beta, y=abs_liver.beta)) +geom_point()+geom_smooth(method=lm)

cor(merged.mouse.eQTL.min.variance2$abs_lung.beta, merged.mouse.eQTL.min.variance2$abs_liver.beta)
## [1] 0.3961265
merged.mouse.eQTL<-merged.mouse.eQTL.min.variance2
# retrieve ensembl_id
markers<-merged.mouse.eQTL[, 1]
# Yg=Ag + Bg*Xsnp+V
# retrieve betas.hat (liver.beta)
betas.hat<-merged.mouse.eQTL$abs_liver.beta
# retrieve liver.beta_se
se<-merged.mouse.eQTL$liver.beta_se
# creat Z matrix with 2 columns: 1 for intercept,abs_lung.beta (merged.mouse.eQTL[,10])
Z<-as.matrix(merged.mouse.eQTL$abs_lung.beta)
Z<-replace(Z,is.na(Z),0)
Z<-data.frame(1,Z)
Z<-as.matrix(Z)
rowLength<-length(markers)
# liver.betas=Z*gama+T^2
# Regression: abs_liver.beta = intercept + beta*abs_lung.beta + error
lmsummary<-summary(lm(abs_liver.beta~-1+Z, data=merged.mouse.eQTL))
lmsummary
##
## Call:
## lm(formula = abs_liver.beta ~ -1 + Z, data = merged.mouse.eQTL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2079 -0.0735 -0.0389 0.0218 4.2525
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## ZX1 0.075463 0.002259 33.41 <2e-16 ***
## ZZ 0.365645 0.008303 44.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1987 on 10420 degrees of freedom
## Multiple R-squared: 0.3701, Adjusted R-squared: 0.3699
## F-statistic: 3061 on 2 and 10420 DF, p-value: < 2.2e-16
# error ~ N(0, Tau)
tau<-lmsummary$sigma**2
tau
## [1] 0.03950088
# output coeffieients (gamma matrix)
# gamma matrix
gamma<-as.matrix(lmsummary$coefficients[,1])
# trasnpose Z matrix
Z_transpose<-t(Z)
# create identity matrix
identity<-diag(nrow=rowLength)
# original betas.hat
betas.hat<-as.matrix(betas.hat)
#creat V matrix for liver_residual_variance
V <- matrix(0, rowLength, rowLength)
# V, liver residual variance
diag(V) <- merged.mouse.eQTL$liver.beta_se^2
# Creat Tau matrix
Tau<- diag(tau, rowLength, rowLength)
# follow Chen's paper and cacualte s
s <-V + Tau
# create inverse function for inversing diagnoal matrix
diag.inverse <- function(x){diag(1/diag(x), nrow(x), ncol(x))}
# create multiplication function for multiplicating two diagnoal matrix
diag.multi <- function(x,y){diag(diag(x)*diag(y), nrow(x), ncol(x))}
# inverse s
S <-diag.inverse(s)
# follow chen's paper to caculate omega
omega<-diag.multi(S, V)
# retrieve omega value from the matrix
omega.diag<-diag(omega )
# summary the omega value
summary(omega.diag)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005225 0.039240 0.071550 0.121400 0.145500 0.962900
# betas.thea<- S %*% Z %*% gamma + (identity-S) %*% betas.hat
# caculate betas.tieda with the formula in Chen's paper
betas.tieda<- omega %*% Z %*% gamma + (identity-omega) %*% betas.hat
# crbetas.tieda<- cromega %*% Z %*% gamma + (identity-cromega) %*% betas.hat
head(betas.tieda)
## [,1]
## [1,] 0.07525859
## [2,] 0.01397654
## [3,] 0.07883044
## [4,] 0.17973373
## [5,] 0.10963860
## [6,] 0.02024762
head(betas.hat)
## [,1]
## [1,] 0.07366667
## [2,] 0.01177273
## [3,] 0.07875000
## [4,] 0.24430000
## [5,] 0.11090000
## [6,] 0.01910000
#regression beta
regbeta <-Z %*% gamma
head(regbeta)
## [,1]
## [1,] 0.09732903
## [2,] 0.07762505
## [3,] 0.08411487
## [4,] 0.08147700
## [5,] 0.10403378
## [6,] 0.09478672
summary(regbeta)
## V1
## Min. :0.07546
## 1st Qu.:0.08888
## Median :0.10149
## Mean :0.12590
## 3rd Qu.:0.12863
## Max. :2.21539
markers1<-as.character(markers)
# combine ensemble_id, betas.hat and betas.tieda
outputVector<-c(markers1,betas.hat,betas.tieda)
write.table(matrix(outputVector,rowLength),file="2016-04-26_hm_tau_hmresults.txt",col.names=FALSE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian<-read.table(file="2016-04-26_hm_tau_hmresults.txt")
colnames(liver.mouse.eQTL.bayesian)<-c( "ensembl_id", "betas.hat","betas.tieda")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda
## 1 ENSMUSG00000000001 0.07366667 0.07525859
## 2 ENSMUSG00000000037 0.01177273 0.01397654
## 3 ENSMUSG00000000049 0.07875000 0.07883044
## 4 ENSMUSG00000000056 0.24430000 0.17973373
## 5 ENSMUSG00000000058 0.11090000 0.10963860
## 6 ENSMUSG00000000085 0.01910000 0.02024762
# merge dataset with betas.hat and betas.tieda
liver.mouse.eQTL.bayesian.all<- merge(liver.mouse.eQTL.bayesian, merged.mouse.eQTL.min.variance2, by = "ensembl_id")
head(liver.mouse.eQTL.bayesian.all)
## ensembl_id betas.hat betas.tieda lung_pvalue lung.beta
## 1 ENSMUSG00000000001 0.07366667 0.07525859 0.1592383 -0.059800000
## 2 ENSMUSG00000000037 0.01177273 0.01397654 0.8635565 -0.005911765
## 3 ENSMUSG00000000049 0.07875000 0.07883044 0.8965131 -0.023660714
## 4 ENSMUSG00000000056 0.24430000 0.17973373 0.6496243 0.016446429
## 5 ENSMUSG00000000058 0.11090000 0.10963860 0.3141176 -0.078136752
## 6 ENSMUSG00000000085 0.01910000 0.02024762 0.3630755 0.052847059
## lung.beta_se liver_pvalue liver.beta liver.beta_se abs_liver.beta
## 1 0.04174596 0.190827641 -0.07366667 0.05337765 0.07366667
## 2 0.03419566 0.755289094 -0.01177273 0.03698251 0.01177273
## 3 0.18083869 0.006815612 0.07875000 0.02452159 0.07875000
## 4 0.03594950 0.153365833 -0.24430000 0.16111096 0.24430000
## 5 0.07671439 0.260604313 0.11090000 0.09428655 0.11090000
## 6 0.05748721 0.452489862 -0.01910000 0.02466095 0.01910000
## abs_lung.beta neg_log_lung_pvalue
## 1 0.059800000 0.79795236
## 2 0.005911765 0.06370924
## 3 0.023660714 0.04744338
## 4 0.016446429 0.18733775
## 5 0.078136752 0.50290766
## 6 0.052847059 0.44000309
write.table(liver.mouse.eQTL.bayesian.all,file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
liver.mouse.eQTL.bayesian<-read.table(file="2016-05-09_liver.mouse.eQTL.bayesian.all.txt")
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.tieda lung_pvalue lung.beta
## 1 ENSMUSG00000000001 0.07366667 0.07525859 0.1592383 -0.059800000
## 2 ENSMUSG00000000037 0.01177273 0.01397654 0.8635565 -0.005911765
## 3 ENSMUSG00000000049 0.07875000 0.07883044 0.8965131 -0.023660714
## 4 ENSMUSG00000000056 0.24430000 0.17973373 0.6496243 0.016446429
## 5 ENSMUSG00000000058 0.11090000 0.10963860 0.3141176 -0.078136752
## 6 ENSMUSG00000000085 0.01910000 0.02024762 0.3630755 0.052847059
## lung.beta_se liver_pvalue liver.beta liver.beta_se abs_liver.beta
## 1 0.04174596 0.190827641 -0.07366667 0.05337765 0.07366667
## 2 0.03419566 0.755289094 -0.01177273 0.03698251 0.01177273
## 3 0.18083869 0.006815612 0.07875000 0.02452159 0.07875000
## 4 0.03594950 0.153365833 -0.24430000 0.16111096 0.24430000
## 5 0.07671439 0.260604313 0.11090000 0.09428655 0.11090000
## 6 0.05748721 0.452489862 -0.01910000 0.02466095 0.01910000
## abs_lung.beta neg_log_lung_pvalue
## 1 0.059800000 0.79795236
## 2 0.005911765 0.06370924
## 3 0.023660714 0.04744338
## 4 0.016446429 0.18733775
## 5 0.078136752 0.50290766
## 6 0.052847059 0.44000309
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat",
"liver.beta_se", "betas.tieda",
"liver_pvalue", "abs_lung.beta",
"abs_lung.beta", "neg_log_lung_pvalue"))
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat liver.beta_se betas.tieda liver_pvalue
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07525859 0.190827641
## 2 ENSMUSG00000000037 0.01177273 0.03698251 0.01397654 0.755289094
## 3 ENSMUSG00000000049 0.07875000 0.02452159 0.07883044 0.006815612
## 4 ENSMUSG00000000056 0.24430000 0.16111096 0.17973373 0.153365833
## 5 ENSMUSG00000000058 0.11090000 0.09428655 0.10963860 0.260604313
## 6 ENSMUSG00000000085 0.01910000 0.02466095 0.02024762 0.452489862
## abs_lung.beta abs_lung.beta.1 neg_log_lung_pvalue
## 1 0.059800000 0.059800000 0.79795236
## 2 0.005911765 0.005911765 0.06370924
## 3 0.023660714 0.023660714 0.04744338
## 4 0.016446429 0.016446429 0.18733775
## 5 0.078136752 0.078136752 0.50290766
## 6 0.052847059 0.052847059 0.44000309
# Caculate variance for beta.tieda by following Brian Kulis' lecture notes
# Invert Tau and V
Tau_invert<-diag.inverse(Tau)
V_invert<-diag.inverse(V)
PS_invert<-Tau_invert + V_invert
# PS_invert<-Tau_invert+V_invert%*% Z %*% Z_transpose # previous wrong code
# S in Brian Kulis' lecture note:PS
PS <- diag.inverse(PS_invert)
# retrieve posterior variance
ps<-diag(PS)
range(ps)
## [1] 0.0002063788 0.0380352366
library(reshape)
##
## Attaching package: 'reshape'
## The following objects are masked from 'package:plyr':
##
## rename, round_any
## The following object is masked from 'package:data.table':
##
## melt
# reshape posterior variance to long format
ps.long <- melt(ps)
head(ps.long)
## value
## 1 0.0026574907
## 2 0.0013219343
## 3 0.0005922921
## 4 0.0156637851
## 5 0.0072567664
## 6 0.0005989409
# Caculate sd: square root on variance
ps.long$betas.tieda.se<-(ps.long$value)^0.5
# combine sd to the data.frame
liver.mouse.eQTL.bayesian<-cbind(liver.mouse.eQTL.bayesian,ps.long$betas.tieda.se)
# head(liver.mouse.eQTL.bayesian)
# rename betas.tieda.se
liver.mouse.eQTL.bayesian<-rename(liver.mouse.eQTL.bayesian, c("ps.long$betas.tieda.se"="betas.tieda.se", "liver.beta_se"="betas.hat.se"))
liver.mouse.eQTL.bayesian<-subset(liver.mouse.eQTL.bayesian, select = c("ensembl_id", "betas.hat", "betas.hat.se",
"betas.tieda", "betas.tieda.se",
"liver_pvalue", "abs_lung.beta", "neg_log_lung_pvalue"))
# head(liver.mouse.eQTL.bayesian)
# library(tigerstats)
# pnormGC(0, region="below", mean=0.002352829,sd=0.09972950)
# caculate probability of betas.tieda below 0 based on betas.tieda and standard deviation
liver.mouse.eQTL.bayesian$p.below.0 <- pnorm(0,liver.mouse.eQTL.bayesian$betas.tieda, liver.mouse.eQTL.bayesian$betas.tieda.se)
head(liver.mouse.eQTL.bayesian)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07525859 0.05155085
## 2 ENSMUSG00000000037 0.01177273 0.03698251 0.01397654 0.03635841
## 3 ENSMUSG00000000049 0.07875000 0.02452159 0.07883044 0.02433705
## 4 ENSMUSG00000000056 0.24430000 0.16111096 0.17973373 0.12515504
## 5 ENSMUSG00000000058 0.11090000 0.09428655 0.10963860 0.08518666
## 6 ENSMUSG00000000085 0.01910000 0.02466095 0.02024762 0.02447327
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0
## 1 0.190827641 0.059800000 0.79795236 0.072160112
## 2 0.755289094 0.005911765 0.06370924 0.350337299
## 3 0.006815612 0.023660714 0.04744338 0.000599512
## 4 0.153365833 0.016446429 0.18733775 0.075488568
## 5 0.260604313 0.078136752 0.50290766 0.099040250
## 6 0.452489862 0.052847059 0.44000309 0.204023322
dim(liver.mouse.eQTL.bayesian)
## [1] 10422 9
summary(liver.mouse.eQTL.bayesian$betas.tieda.se)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01437 0.03937 0.05316 0.06185 0.07581 0.19500
range(liver.mouse.eQTL.bayesian$p.below.0)
## [1] 0.0000000 0.4820141
write.table(liver.mouse.eQTL.bayesian,file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
liver.mouse.eQTL.bayesian <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian with beta.txt")
# head(liver.mouse.eQTL.bayesian)
# summary(liver.mouse.eQTL.bayesian$liver_residual_variance)
liver.mouse.eQTL.bayesian.4tau <- liver.mouse.eQTL.bayesian
# colnames(liver.mouse.eQTL.bayesian.4tau) <- c("ensembl_id", "betas.hat", "betas.hat.se", "betas.tieda", "betas.tieda.se", "liver_residual_variance", "liver_pvalue", "abs_lung.beta",
# "neg_log_lung_pvalue", "p.below.0", "betas.tieda2m", "betas.tieda3rd", "betas.tieda4max")
# head(liver.mouse.eQTL.bayesian.4tau)
# Introduce weight (Tmm) to adjust Tau with neg_log_lung_pvalue
liver.mouse.eQTL.bayesian.4tau$fzm <- liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue
# caculate ratio_fzm
liver.mouse.eQTL.bayesian.4tau$ratio_fzm <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/liver.mouse.eQTL.bayesian.4tau$fzm
range(liver.mouse.eQTL.bayesian.4tau$ratio_fzm)
## [1] 1 Inf
# set up threshold for ratio_fzm and caculate updated ratio_fzm (nratio_fzm)
threshold <- 0.05
liver.mouse.eQTL.bayesian.4tau$nratio_fzm <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$ratio_fzm > max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))] <- max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log10(threshold))
# liver.mouse.eQTL.bayesian.4tau$nratio_fzm[liver.mouse.eQTL.bayesian.4tau$nratio_fzm >= max(liver.mouse.eQTL.bayesian.4tau$fzm)/(-log(threshold))] <- liver.mouse.eQTL.bayesian.4tau$ratio_fzm
summary(liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 24.16 35.84 29.18 35.84 35.84
# compare bayesian prediction to the traditional method
# evaluate the predition with alle specific expreesion in the liver: Sandrine Lagarrigue's paper
liver.ASE <- read.csv(file= "ASE.genetics.113.153882-6.csv")
dim(liver.ASE)
## [1] 1191 19
head(liver.ASE)
## replicate chr startExon geneID SNPperExon3
## 1 M.CH. DxB and BxD 1 9535488 Rrs1 15
## 2 M.HF DxB and BxD 1 9535488 Rrs1 14
## 3 F.HF DxB and BxD 1 9535488 Rrs1 15
## 4 M.CH. DxB and BxD 1 37473929 6330578E17Rik 2
## 5 M.CH. DxB and BxD 1 58169979 Aox3 3
## 6 M.HF DxB and BxD 1 58169979 Aox3 3
## sumBperExon.DxB4 sumDperExon.DxB4 sumBperExon.BxD4 sumDperExon.BxD4
## 1 45 19 50 25
## 2 74 39 66 30
## 3 76 20 77 40
## 4 78 32 47 17
## 5 473 82 225 27
## 6 252 56 263 53
## FCadd1.DxB5 FCadd1.BxD5 BonBD.DxB6 BonBD.BxD6 pvalBH.DxB7 pvalBH.BxD7
## 1 2.30 1.96 0.70 0.67 1.0e-02 3.7e-02
## 2 1.88 2.16 0.65 0.69 1.1e-02 3.3e-03
## 3 3.67 1.90 0.79 0.66 4.9e-07 5.1e-03
## 4 2.39 2.67 0.71 0.73 1.9e-04 2.2e-03
## 5 5.71 8.07 0.85 0.89 3.6e-64 5.4e-37
## 6 4.44 4.89 0.82 0.83 8.7e-28 1.1e-31
## UTR5 UTR3 strand exonCount
## 1 0 0 + 1
## 2 0 0 + 1
## 3 0 0 + 1
## 4 0 0 - 3
## 5 0 0 + 35
## 6 0 0 + 35
# 440 unique gene ID
length(unique(liver.ASE$geneID))
## [1] 440
# verify ASE table
liver.ASE1 <- liver.ASE[which(liver.ASE$replicate == "M.CH. DxB and BxD"), ]
liver.ASE2 <- liver.ASE[which(liver.ASE$replicate == "M.HF DxB and BxD"), ]
liver.ASE3 <- liver.ASE[which(liver.ASE$replicate == "F.HF DxB and BxD"), ]
length(unique(liver.ASE1$geneID))
## [1] 272
length(unique(liver.ASE2$geneID))
## [1] 275
length(unique(liver.ASE3$geneID))
## [1] 304
(length(unique(liver.ASE1$geneID))+length(unique(liver.ASE2$geneID))+length(unique(liver.ASE3$geneID)))/3
## [1] 283.6667
# As claimed in the paper: averaged 284 ASE for each replicate
sub.liver.ASE <-liver.ASE1
summary(sub.liver.ASE$pvalBH.DxB7)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0000000 0.0000058 0.0084070 0.0031000 0.1000000
sub.liver.ASE1 <- subset(sub.liver.ASE, pvalBH.DxB7 < 0.00000000000001)
sub.liver.ASE2 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.00000000000001 & pvalBH.DxB7 < 0.0000058)
sub.liver.ASE3 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0000058 & pvalBH.DxB7 < 0.0031000)
sub.liver.ASE4 <- subset(sub.liver.ASE, pvalBH.DxB7 >= 0.0031000)
dim(sub.liver.ASE1)
## [1] 89 19
dim(sub.liver.ASE2)
## [1] 97 19
dim(sub.liver.ASE3)
## [1] 93 19
dim(sub.liver.ASE4)
## [1] 94 19
# Subset liver ASE with different conditions
# sub.liver.ASE <-liver.ASE[which(liver.ASE$pvalBH.DxB7 < 0.05 & liver.ASE$pvalBH.BxD7 < 0.05), ]
# sub.liver.ASE <- sub.liver.ASE[order(sub.liver.ASE$pvalBH.BxD7), ]
# summary(sub.liver.ASE$pvalBH.BxD7)
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.DxB7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[which(sub.liver.ASE$pvalBH.BxD7 <= 9.0e-10 ), ]
# sub.liver.ASE <- sub.liver.ASE[ sub.liver.ASE$geneID %in% names(table(sub.liver.ASE$geneID))[table(sub.liver.ASE$geneID) >1] , ]
# check the remain gene number after subsetting
dim(sub.liver.ASE)
## [1] 373 19
liver.ASE.symbol <- unique(sub.liver.ASE$geneID)
liver.ASE.symbol1 <- unique(sub.liver.ASE1$geneID)
liver.ASE.symbol2 <- unique(sub.liver.ASE2$geneID)
liver.ASE.symbol3 <- unique(sub.liver.ASE3$geneID)
liver.ASE.symbol4 <- unique(sub.liver.ASE4$geneID)
length(liver.ASE.symbol)
## [1] 272
# Annoate gene symbol wiht ensemble.ID
library(biomaRt)
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
liver.ASE.ensembl <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol, mart=mouse)
liver.ASE.ensembl1 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol1, mart=mouse)
liver.ASE.ensembl2 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol2, mart=mouse)
liver.ASE.ensembl3 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol3, mart=mouse)
liver.ASE.ensembl4 <- getBM( attributes=c("ensembl_gene_id", "mgi_symbol") , filters=
"mgi_symbol", values =liver.ASE.symbol4, mart=mouse)
dim(liver.ASE.ensembl)
## [1] 241 2
liver.ASE.ensembl <- unique(liver.ASE.ensembl)
# delete liver ASE ensemble ID which are not in the liver.mouse.eQTL.bayesian data frame
liver.ASE.ensembl <- liver.ASE.ensembl[liver.ASE.ensembl$ensembl_gene_id %in% liver.mouse.eQTL.bayesian.4tau$ensembl_id, ]
dim(liver.ASE.ensembl)
## [1] 190 2
# create indicator for ASE true or not
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl1$ensembl_gene_id] <- 1
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl2$ensembl_gene_id] <- 2
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl3$ensembl_gene_id] <- 3
# liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl4$ensembl_gene_id] <- 4
# liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 5
liver.mouse.eQTL.bayesian.4tau$eqtl[liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 1
liver.mouse.eQTL.bayesian.4tau$eqtl[!liver.mouse.eQTL.bayesian.4tau$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id] <- 0
summary(liver.mouse.eQTL.bayesian.4tau$eqtl)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.01823 0.00000 1.00000
liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue <- -log10(liver.mouse.eQTL.bayesian.4tau$liver_pvalue)
head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07525859 0.05155085
## 2 ENSMUSG00000000037 0.01177273 0.03698251 0.01397654 0.03635841
## 3 ENSMUSG00000000049 0.07875000 0.02452159 0.07883044 0.02433705
## 4 ENSMUSG00000000056 0.24430000 0.16111096 0.17973373 0.12515504
## 5 ENSMUSG00000000058 0.11090000 0.09428655 0.10963860 0.08518666
## 6 ENSMUSG00000000085 0.01910000 0.02466095 0.02024762 0.02447327
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0 fzm
## 1 0.190827641 0.059800000 0.79795236 0.072160112 0.79795236
## 2 0.755289094 0.005911765 0.06370924 0.350337299 0.06370924
## 3 0.006815612 0.023660714 0.04744338 0.000599512 0.04744338
## 4 0.153365833 0.016446429 0.18733775 0.075488568 0.18733775
## 5 0.260604313 0.078136752 0.50290766 0.099040250 0.50290766
## 6 0.452489862 0.052847059 0.44000309 0.204023322 0.44000309
## ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 58.43301 35.83834 0 0.7193587
## 2 731.86805 35.83834 0 0.1218868
## 3 982.78748 35.83834 0 2.1664952
## 4 248.89142 35.83834 0 0.8142714
## 5 92.71435 35.83834 0 0.5840184
## 6 105.96916 35.83834 0 0.3443911
by(liver.mouse.eQTL.bayesian.4tau[, c(1, 7, 9, 14)], liver.mouse.eQTL.bayesian.4tau[, "eqtl"], summary)
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 0
## ensembl_id abs_lung.beta p.below.0
## ENSMUSG00000000001: 1 Min. :0.00000 Min. :0.00000
## ENSMUSG00000000037: 1 1st Qu.:0.03640 1st Qu.:0.01960
## ENSMUSG00000000049: 1 Median :0.07032 Median :0.09616
## ENSMUSG00000000056: 1 Mean :0.13375 Mean :0.13291
## ENSMUSG00000000058: 1 3rd Qu.:0.14167 3rd Qu.:0.21888
## ENSMUSG00000000085: 1 Max. :4.17678 Max. :0.48201
## (Other) :10226
## neg_log_liver_pvalue
## Min. : 0.0000
## 1st Qu.: 0.2730
## Median : 0.6021
## Mean : 0.9621
## 3rd Qu.: 1.1737
## Max. :15.6916
##
## --------------------------------------------------------
## liver.mouse.eQTL.bayesian.4tau[, "eqtl"]: 1
## ensembl_id abs_lung.beta p.below.0
## ENSMUSG00000000275: 1 Min. :0.001944 Min. :0.0000000
## ENSMUSG00000000673: 1 1st Qu.:0.081911 1st Qu.:0.0000000
## ENSMUSG00000001467: 1 Median :0.180165 Median :0.0004881
## ENSMUSG00000001473: 1 Mean :0.363517 Mean :0.0412863
## ENSMUSG00000001604: 1 3rd Qu.:0.376020 3rd Qu.:0.0278189
## ENSMUSG00000002395: 1 Max. :5.852455 Max. :0.3579107
## (Other) :184
## neg_log_liver_pvalue
## Min. : 0.04328
## 1st Qu.: 0.83625
## Median : 2.37521
## Mean : 3.16619
## 3rd Qu.: 4.66122
## Max. :13.80969
##
library(ggplot2)
boxplot(neg_log_liver_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="liver.mouse.eQTL",
xlab="ASE cutoff by p value", ylab="liver neg log p")

boxplot(neg_log_lung_pvalue ~ eqtl,data=liver.mouse.eQTL.bayesian.4tau, main="lung.mouse.eQTL",
xlab="ASE cutoff by p value", ylab="lung neg log p")

liver.mouse.eQTL.bayesian.4tau.ase <- liver.mouse.eQTL.bayesian.4tau[liver.mouse.eQTL.bayesian.4tau$eqtl == 1, ]
plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$neg_log_lung_pvalue, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl), xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue" )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue, , col="red", xlab="neg_log_liver_pvalue", ylab="neg_log_lung_pvalue")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau$betas.hat, liver.mouse.eQTL.bayesian.4tau$betas.tieda, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$betas.hat, liver.mouse.eQTL.bayesian.4tau.ase$betas.tieda, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

cor(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$neg_log_lung_pvalue)
## [1] 0.4786611
length(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue)
## [1] 190
plot(liver.mouse.eQTL.bayesian.4tau$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau$p.below.0, col=factor(liver.mouse.eQTL.bayesian.4tau$eqtl) )
legend("topright", cex = .75, inset=.05, c("ASE","others"), text.col = c("red", "black"), horiz=TRUE)

plot(liver.mouse.eQTL.bayesian.4tau.ase$neg_log_liver_pvalue, liver.mouse.eQTL.bayesian.4tau.ase$p.below.0, col="red")
legend("topright", cex = .75, inset=.05, c("ASE"), text.col = c("red"), horiz=TRUE)

head(liver.mouse.eQTL.bayesian.4tau)
## ensembl_id betas.hat betas.hat.se betas.tieda betas.tieda.se
## 1 ENSMUSG00000000001 0.07366667 0.05337765 0.07525859 0.05155085
## 2 ENSMUSG00000000037 0.01177273 0.03698251 0.01397654 0.03635841
## 3 ENSMUSG00000000049 0.07875000 0.02452159 0.07883044 0.02433705
## 4 ENSMUSG00000000056 0.24430000 0.16111096 0.17973373 0.12515504
## 5 ENSMUSG00000000058 0.11090000 0.09428655 0.10963860 0.08518666
## 6 ENSMUSG00000000085 0.01910000 0.02466095 0.02024762 0.02447327
## liver_pvalue abs_lung.beta neg_log_lung_pvalue p.below.0 fzm
## 1 0.190827641 0.059800000 0.79795236 0.072160112 0.79795236
## 2 0.755289094 0.005911765 0.06370924 0.350337299 0.06370924
## 3 0.006815612 0.023660714 0.04744338 0.000599512 0.04744338
## 4 0.153365833 0.016446429 0.18733775 0.075488568 0.18733775
## 5 0.260604313 0.078136752 0.50290766 0.099040250 0.50290766
## 6 0.452489862 0.052847059 0.44000309 0.204023322 0.44000309
## ratio_fzm nratio_fzm eqtl neg_log_liver_pvalue
## 1 58.43301 35.83834 0 0.7193587
## 2 731.86805 35.83834 0 0.1218868
## 3 982.78748 35.83834 0 2.1664952
## 4 248.89142 35.83834 0 0.8142714
## 5 92.71435 35.83834 0 0.5840184
## 6 105.96916 35.83834 0 0.3443911
# Optimizing rho and adjust the weight
library(reshape)
rho.optimization <- matrix(0, nrow=nrow(liver.mouse.eQTL.bayesian.4tau), ncol=7)
colnames(rho.optimization)<-c("rho","tmm","tau", "omega","beta_tieda", "n.betas.tieda.se","p.below.0" )
nomega.diag<-diag(omega )
rho <- seq(1,1.1, by=0.02)*tau # tau = 0.03940381
result <- NULL
for (i in 1:length(rho)) {
rho.optimization[ ,1] <- rho[i]
rho.optimization[ ,2] <- (rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm
rho.optimization[ ,3] <-tau*((rho[i]/tau)^liver.mouse.eQTL.bayesian.4tau$nratio_fzm)
nTau<- diag(rho.optimization[ ,3], rowLength, rowLength)
ns<-V + nTau
nS <- diag.inverse(ns)
nomega<-diag.multi(nS, V)
# nomega <- diag(0, rowLength, rowLength) # set nomega to 0 for code checking
# nomega <- diag(1, rowLength, rowLength) # set nomega to 1 for code checking
rho.optimization[ ,4] <- diag(nomega )
rho.optimization[ ,5] <- nomega %*% Z %*% gamma + (identity-nomega) %*% betas.hat
nTau_invert<-diag.inverse(nTau)
V_invert<-diag.inverse(V)
nPS_invert<-nTau_invert+ V_invert
# nPS_invert<-nTau_invert+ diag.multi(diag.multi(V_invert, Z_transpose), Z) # previous wrong code
nPS<-diag.inverse(nPS_invert)
nps<-diag(nPS)
nps.long <- melt(nps)
rho.optimization[ ,6] <-(nps.long$value)^0.5
rho.optimization[ ,7] <- pnorm(0, rho.optimization[ ,5], rho.optimization[ ,6])
result <- rbind(result,rho.optimization)
}
dim(result)
## [1] 62532 7
head(result)
## rho tmm tau omega beta_tieda n.betas.tieda.se
## [1,] 0.03950088 1 0.03950088 0.06727674 0.07525859 0.05155085
## [2,] 0.03950088 1 0.03950088 0.03346594 0.01397654 0.03635841
## [3,] 0.03950088 1 0.03950088 0.01499440 0.07883044 0.02433705
## [4,] 0.03950088 1 0.03950088 0.39654268 0.17973373 0.12515504
## [5,] 0.03950088 1 0.03950088 0.18371151 0.10963860 0.08518666
## [6,] 0.03950088 1 0.03950088 0.01516272 0.02024762 0.02447327
## p.below.0
## [1,] 0.072160112
## [2,] 0.350337299
## [3,] 0.000599512
## [4,] 0.075488568
## [5,] 0.099040250
## [6,] 0.204023322
write.table(result, file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt",col.names=TRUE,row.names=FALSE,quote=FALSE)
liver.mouse.eQTL.bayesian.result <- read.table(file="2016-05-04_liver.mouse.eQTL.bayesian.result.txt", header=T)
result.df <-liver.mouse.eQTL.bayesian.result
result.df$rho.class <- factor(result.df$rho/tau)
# combine liver.mouse.eqtl.bayesian and rho.optimization.result for ploting
a <-liver.mouse.eQTL.bayesian.4tau[, c(1:2, 6, 7)]
a <-rbind(a, a, a, a, a, a)
dim(a)
## [1] 62532 4
new.result.df<-cbind(a, result.df)
head(new.result.df)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho tmm
## 1 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.03950088 1
## 2 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.03950088 1
## 3 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.03950088 1
## 4 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.03950088 1
## 5 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.03950088 1
## 6 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.03950088 1
## tau omega beta_tieda n.betas.tieda.se p.below.0 rho.class
## 1 0.03950088 0.06727674 0.07525859 0.05155085 0.072160112 1
## 2 0.03950088 0.03346594 0.01397654 0.03635841 0.350337299 1
## 3 0.03950088 0.01499440 0.07883044 0.02433705 0.000599512 1
## 4 0.03950088 0.39654268 0.17973373 0.12515504 0.075488568 1
## 5 0.03950088 0.18371151 0.10963860 0.08518666 0.099040250 1
## 6 0.03950088 0.01516272 0.02024762 0.02447327 0.204023322 1
new.result.df2 <- new.result.df
head(new.result.df$rho.class)
## [1] 1 1 1 1 1 1
## Levels: 1 1.02 1.04 1.06 1.08 1.1
by(new.result.df2, new.result.df2[, "rho.class"], head)
## new.result.df2[, "rho.class"]: 1
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho tmm
## 1 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.03950088 1
## 2 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.03950088 1
## 3 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.03950088 1
## 4 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.03950088 1
## 5 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.03950088 1
## 6 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.03950088 1
## tau omega beta_tieda n.betas.tieda.se p.below.0 rho.class
## 1 0.03950088 0.06727674 0.07525859 0.05155085 0.072160112 1
## 2 0.03950088 0.03346594 0.01397654 0.03635841 0.350337299 1
## 3 0.03950088 0.01499440 0.07883044 0.02433705 0.000599512 1
## 4 0.03950088 0.39654268 0.17973373 0.12515504 0.075488568 1
## 5 0.03950088 0.18371151 0.10963860 0.08518666 0.099040250 1
## 6 0.03950088 0.01516272 0.02024762 0.02447327 0.204023322 1
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.02
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11000 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.0402909
## 21000 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.0402909
## 31000 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.0402909
## 41000 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.0402909
## 51000 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.0402909
## 61000 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.0402909
## tmm tau omega beta_tieda n.betas.tieda.se
## 11000 2.033368 0.08031981 0.034257649 0.07447728 0.05245539
## 21000 2.033368 0.08031981 0.016743144 0.01287530 0.03667160
## 31000 2.033368 0.08031981 0.007430797 0.07878987 0.02443031
## 41000 2.033368 0.08031981 0.244237706 0.20453248 0.14006115
## 51000 2.033368 0.08031981 0.099652245 0.11021577 0.08946535
## 61000 2.033368 0.08031981 0.007514859 0.01966878 0.02456811
## p.below.0 rho.class
## 11000 0.0778298406 1.02
## 21000 0.3627576457 1.02
## 31000 0.0006296736 1.02
## 41000 0.0721026583 1.02
## 51000 0.1089861262 1.02
## 61000 0.2116869787 1.02
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.04
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11002 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.04108092
## 21002 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.04108092
## 31002 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.04108092
## 41002 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.04108092
## 51002 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.04108092
## 61002 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.04108092
## tmm tau omega beta_tieda n.betas.tieda.se
## 11002 4.077994 0.1610844 0.017380054 0.07407792 0.05291177
## 21002 4.077994 0.1610844 0.008419134 0.01232715 0.03682650
## 31002 4.077994 0.1610844 0.003718996 0.07876995 0.02447595
## 41002 4.077994 0.1610844 0.138775590 0.22170414 0.14951448
## 51002 4.077994 0.1610844 0.052301741 0.11054088 0.09178776
## 61002 4.077994 0.1610844 0.003761227 0.01938467 0.02461453
## p.below.0 rho.class
## 11002 0.0807525640 1.04
## 21002 0.3689121997 1.04
## 31002 0.0006448559 1.04
## 41002 0.0690601555 1.04
## 51002 0.1142349624 1.04
## 61002 0.2154858658 1.04
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.06
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11004 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.04187093
## 21004 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.04187093
## 31004 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.04187093
## 41004 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.04187093
## 51004 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.04187093
## 61004 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.04187093
## tmm tau omega beta_tieda n.betas.tieda.se
## 11004 8.070868 0.3188064 0.008857841 0.07387626 0.05314072
## 21004 8.070868 0.3188064 0.004271757 0.01205403 0.03690343
## 31004 8.070868 0.3188064 0.001882573 0.07876010 0.02449850
## 41004 8.070868 0.3188064 0.075288621 0.23204128 0.15492738
## 51004 8.070868 0.3188064 0.027128631 0.11071373 0.09299882
## 61004 8.070868 0.3188064 0.001903991 0.01924411 0.02463746
## p.below.0 rho.class
## 11004 0.0822339814 1.06
## 21004 0.3719711664 1.06
## 31004 0.0006524609 1.06
## 41004 0.0671001186 1.06
## 51004 0.1169278548 1.06
## 61004 0.2173743737 1.06
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.08
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11006 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.04266095
## 21006 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.04266095
## 31006 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.04266095
## 41006 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.04266095
## 51006 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.04266095
## 61006 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.04266095
## tmm tau omega beta_tieda n.betas.tieda.se
## 11006 15.77074 0.622958 0.0045527985 0.07377440 0.05325601
## 21006 15.77074 0.622958 0.0021906929 0.01191699 0.03694198
## 31006 15.77074 0.622958 0.0009643163 0.07875517 0.02450976
## 41006 15.77074 0.622958 0.0400002352 0.23778704 0.15785584
## 51006 15.77074 0.622958 0.0140697672 0.11080339 0.09362090
## 61006 15.77074 0.622958 0.0009752972 0.01917382 0.02464892
## p.below.0 rho.class
## 11006 0.082983669 1.08
## 21006 0.373504140 1.08
## 31006 0.000656287 1.08
## 41006 0.065987941 1.08
## 51006 0.118299061 1.08
## 61006 0.218320878 1.08
## --------------------------------------------------------
## new.result.df2[, "rho.class"]: 1.1
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 11008 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.04345097
## 21008 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.04345097
## 31008 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.04345097
## 41008 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.04345097
## 51008 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.04345097
## 61008 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.04345097
## tmm tau omega beta_tieda n.betas.tieda.se
## 11008 30.44004 1.202408 0.0023639545 0.07372260 0.05331452
## 21008 30.44004 1.202408 0.0011361797 0.01184755 0.03696149
## 31008 30.44004 1.202408 0.0004998367 0.07875268 0.02451546
## 41008 30.44004 1.202408 0.0211311295 0.24085937 0.15939964
## 51008 30.44004 1.202408 0.0073391942 0.11084961 0.09393992
## 61008 30.44004 1.202408 0.0005055312 0.01913826 0.02465471
## p.below.0 rho.class
## 11008 0.0833651717 1.1
## 21008 0.3742804206 1.1
## 31008 0.0006582282 1.1
## 41008 0.0653890239 1.1
## 51008 0.1189990373 1.1
## 61008 0.2188002013 1.1
# choose different rho class for plotting
new.result.df2.rho1 <- new.result.df2[new.result.df2$rho.class == 1, ]
head(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho tmm
## 1 ENSMUSG00000000001 0.07366667 0.190827641 0.059800000 0.03950088 1
## 2 ENSMUSG00000000037 0.01177273 0.755289094 0.005911765 0.03950088 1
## 3 ENSMUSG00000000049 0.07875000 0.006815612 0.023660714 0.03950088 1
## 4 ENSMUSG00000000056 0.24430000 0.153365833 0.016446429 0.03950088 1
## 5 ENSMUSG00000000058 0.11090000 0.260604313 0.078136752 0.03950088 1
## 6 ENSMUSG00000000085 0.01910000 0.452489862 0.052847059 0.03950088 1
## tau omega beta_tieda n.betas.tieda.se p.below.0 rho.class
## 1 0.03950088 0.06727674 0.07525859 0.05155085 0.072160112 1
## 2 0.03950088 0.03346594 0.01397654 0.03635841 0.350337299 1
## 3 0.03950088 0.01499440 0.07883044 0.02433705 0.000599512 1
## 4 0.03950088 0.39654268 0.17973373 0.12515504 0.075488568 1
## 5 0.03950088 0.18371151 0.10963860 0.08518666 0.099040250 1
## 6 0.03950088 0.01516272 0.02024762 0.02447327 0.204023322 1
tail(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 10417 ENSMUSG00000099041 0.16160714 0.10867553 0.10059091 0.03950088
## 10418 ENSMUSG00000099083 0.10340000 0.06630646 0.12891000 0.03950088
## 10419 ENSMUSG00000099116 0.01975000 0.68385715 0.03457738 0.03950088
## 10420 ENSMUSG00000099164 0.10844444 0.12006199 0.08603846 0.03950088
## 10421 ENSMUSG00000099262 0.01105556 0.85807655 0.18600427 0.03950088
## 10422 ENSMUSG00000099305 0.05450000 0.17299524 0.06246296 0.03950088
## tmm tau omega beta_tieda n.betas.tieda.se p.below.0
## 10417 1 0.03950088 0.18223979 0.15261122 0.08484475 0.03603218
## 10418 1 0.03950088 0.06311306 0.10461169 0.04993017 0.01807838
## 10419 1 0.03950088 0.05386470 0.02343200 0.04612703 0.30573070
## 10420 1 0.03950088 0.09710749 0.10829670 0.06193409 0.04018183
## 10421 1 0.03950088 0.08508424 0.02232237 0.05797329 0.35010178
## 10422 1 0.03950088 0.03490647 0.05602900 0.03713268 0.06566391
## rho.class
## 10417 1
## 10418 1
## 10419 1
## 10420 1
## 10421 1
## 10422 1
# rank with p.below.0: bayesian modeling
new.result.df2.rho1$rank.p.below.0 <- rank(new.result.df2.rho1$p.below.0)
# rank with liver p value: traditionsl linear regression (one step regression)
new.result.df2.rho1$rank.liver.pvalue <- rank(new.result.df2.rho1$liver_pvalue)
new.result.df2.rho1 <- new.result.df2.rho1[order(new.result.df2.rho1$rank.p.below.0), ]
head(new.result.df2.rho1)
## ensembl_id betas.hat liver_pvalue abs_lung.beta rho
## 2524 ENSMUSG00000022680 4.255833 2.034364e-16 4.1767833 0.03950088
## 10012 ENSMUSG00000073411 5.304083 1.549934e-14 5.8524545 0.03950088
## 4677 ENSMUSG00000028656 2.154341 5.245966e-14 2.9349028 0.03950088
## 9191 ENSMUSG00000057132 2.426200 7.421942e-14 3.0932000 0.03950088
## 2758 ENSMUSG00000023791 1.198018 4.042683e-13 0.4850804 0.03950088
## 3125 ENSMUSG00000024735 2.858722 3.352312e-13 1.1153500 0.03950088
## tmm tau omega beta_tieda n.betas.tieda.se p.below.0
## 2524 1 0.03950088 0.14728921 3.865053 0.07627617 0.000000e+00
## 10012 1 0.03950088 0.34419291 4.240976 0.11660156 6.004650e-290
## 4677 1 0.03950088 0.09472791 2.059069 0.06117055 1.072073e-248
## 9191 1 0.03950088 0.12286551 2.276338 0.06966560 1.757179e-234
## 2758 1 0.03950088 0.04258612 1.157766 0.04101450 1.317748e-175
## 3125 1 0.03950088 0.19741575 2.389774 0.08830683 1.373629e-161
## rho.class rank.p.below.0 rank.liver.pvalue
## 2524 1 1 1
## 10012 1 2 2
## 4677 1 3 3
## 9191 1 4 4
## 2758 1 5 6
## 3125 1 6 5
# caculate TPR: true positive rate
# caculate PPV: positive predictive rate
result.rho1 <- matrix(, nrow(new.result.df2.rho1), 8)
colnames(result.rho1)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR" )
for (i in 1:nrow(new.result.df2.rho1))
{
newdata1.rho1 <- subset(new.result.df2.rho1, rank.p.below.0 <= i)
overlap.newdata1.rho1 <- newdata1.rho1[newdata1.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 1] <- i
result.rho1[i, 2] <- nrow(overlap.newdata1.rho1)/nrow(newdata1.rho1)
result.rho1[i, 3] <- nrow(overlap.newdata1.rho1)/nrow(liver.ASE.ensembl)
newdata2.rho1 <- subset(new.result.df2.rho1, rank.liver.pvalue <= i)
overlap.newdata2.rho1 <- newdata2.rho1[newdata2.rho1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
result.rho1[i, 5] <- i
result.rho1[i, 6] <- nrow(overlap.newdata2.rho1)/nrow(newdata2.rho1)
result.rho1[i, 7] <- nrow(overlap.newdata2.rho1)/nrow(liver.ASE.ensembl)
}
head(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [1,] 1 0.0000000 0.000000000 NA 1 0.0000000 0.000000000
## [2,] 2 0.5000000 0.005263158 NA 2 0.5000000 0.005263158
## [3,] 3 0.3333333 0.005263158 NA 3 0.3333333 0.005263158
## [4,] 4 0.2500000 0.005263158 NA 4 0.2500000 0.005263158
## [5,] 5 0.2000000 0.005263158 NA 5 0.4000000 0.010526316
## [6,] 6 0.3333333 0.010526316 NA 6 0.3333333 0.010526316
## ori_FPR
## [1,] NA
## [2,] NA
## [3,] NA
## [4,] NA
## [5,] NA
## [6,] NA
tail(result.rho1)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [10417,] 10417 0.01823942 1 NA 10417 0.01823942 1
## [10418,] 10418 0.01823767 1 NA 10418 0.01823767 1
## [10419,] 10419 0.01823592 1 NA 10419 0.01823592 1
## [10420,] 10420 0.01823417 1 NA 10420 0.01823417 1
## [10421,] 10421 0.01823242 1 NA 10421 0.01823242 1
## [10422,] 10422 0.01823067 1 NA 10422 0.01823067 1
## ori_FPR
## [10417,] NA
## [10418,] NA
## [10419,] NA
## [10420,] NA
## [10421,] NA
## [10422,] NA
# ploting "True positive rate"
plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4) , xlim=c(0, 300))
par(new=TRUE)
plot( result.rho1[, 1], result.rho1[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 0.4), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# ploting "positive predictive value"
plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

plot(result.rho1[, 1], result.rho1[, 2], type="l", col="red", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
par(new=TRUE)
plot(result.rho1[, 5], result.rho1[, 6], type="l", col="green", xlab="Ranking", ylab="PPV", ylim=c(0, 1), xlim=c(0, 500))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original"), text.col = c("red", "green"), horiz=TRUE)

# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt", header=T)
head(MTeQTLs)
## SNP gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at 0 0 0.9260306
## 2 rs6365999 1424963_at 0 0 0.9260306
## 3 rs6376963 1424963_at 0 0 0.9306985
## 4 rs3677817 1424963_at 0 0 0.9329117
## 5 rs6269442 1424964_at 0 0 0.9438114
## 6 rs6365999 1424964_at 0 0 0.9438114
## marginalP.lung
## 1 0.9027543
## 2 0.9027543
## 3 0.9104129
## 4 0.9140456
## 5 0.9219186
## 6 0.9219186
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
# Retrieve MT-eQTLs result
MTeQTLs <-read.table(file="MT-eQTLs.txt", header=T)
head(MTeQTLs)
## SNP gene isEQTL.Liver isEQTL.lung marginalP.Liver
## 1 rs6269442 1424963_at 0 0 0.9260306
## 2 rs6365999 1424963_at 0 0 0.9260306
## 3 rs6376963 1424963_at 0 0 0.9306985
## 4 rs3677817 1424963_at 0 0 0.9329117
## 5 rs6269442 1424964_at 0 0 0.9438114
## 6 rs6365999 1424964_at 0 0 0.9438114
## marginalP.lung
## 1 0.9027543
## 2 0.9027543
## 3 0.9104129
## 4 0.9140456
## 5 0.9219186
## 6 0.9219186
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
MTeQTLs<-merge(MTeQTLs, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
MTeQTLs.min <- data.table(MTeQTLs, key=c('ensembl_id', "marginalP.Liver"))
MTeQTLs.min <-MTeQTLs.min[J(unique(ensembl_id)),mult="first"]
merged.eQTL <- merge(new.result.df2.rho1, MTeQTLs.min, by ="ensembl_id")
merged.eQTL$rank.marginalP.Liver <- rank(merged.eQTL$marginalP.Liver)
merged.result <- matrix(, nrow(merged.eQTL), 12)
colnames(merged.result)<-c("bayrank","bayppv","bay_TPR","bay_FPR", "orirank","orippv","ori_TPR","ori_FPR", "MTrank","MTppv","MT_TPR","MT_FPR" )
for (i in 1:nrow(merged.eQTL))
{
newdata1 <- subset(merged.eQTL, rank.p.below.0 <= i)
overlap.newdata1 <- newdata1[newdata1$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 1] <- i
merged.result[i, 2] <- nrow(overlap.newdata1)/nrow(newdata1)
merged.result[i, 3] <- nrow(overlap.newdata1)/nrow(liver.ASE.ensembl)
merged.result[i, 4] <- (nrow(newdata1)-nrow(overlap.newdata1)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
newdata2 <- subset(merged.eQTL, rank.liver.pvalue <= i)
overlap.newdata2 <- newdata2[newdata2$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 5] <- i
merged.result[i, 6] <- nrow(overlap.newdata2)/nrow(newdata2)
merged.result[i, 7] <- nrow(overlap.newdata2)/nrow(liver.ASE.ensembl)
merged.result[i, 8] <- (nrow(newdata2)-nrow(overlap.newdata2)) / (nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
newdata3 <- subset(merged.eQTL, rank.liver.pvalue <= i)
overlap.newdata3 <- newdata3[newdata3$ensembl_id %in% liver.ASE.ensembl$ensembl_gene_id, ]
merged.result[i, 9] <- i
merged.result[i, 10] <- nrow(overlap.newdata3)/nrow(newdata3)
merged.result[i, 11] <- nrow(overlap.newdata3)/nrow(liver.ASE.ensembl)
merged.result[i, 12] <- (nrow(newdata3)-nrow(overlap.newdata3)) /(nrow(merged.eQTL)-nrow(liver.ASE.ensembl))
}
head(merged.result)
## bayrank bayppv bay_TPR bay_FPR orirank orippv
## [1,] 1 0.0000000 0.000000000 0.0000977326 1 0.0000000
## [2,] 2 0.5000000 0.005263158 0.0000977326 2 0.5000000
## [3,] 3 0.3333333 0.005263158 0.0001954652 3 0.3333333
## [4,] 4 0.2500000 0.005263158 0.0002931978 4 0.2500000
## [5,] 5 0.2000000 0.005263158 0.0003909304 5 0.4000000
## [6,] 6 0.3333333 0.010526316 0.0003909304 6 0.3333333
## ori_TPR ori_FPR MTrank MTppv MT_TPR MT_FPR
## [1,] 0.000000000 0.0000977326 1 0.0000000 0.000000000 0.0000977326
## [2,] 0.005263158 0.0000977326 2 0.5000000 0.005263158 0.0000977326
## [3,] 0.005263158 0.0001954652 3 0.3333333 0.005263158 0.0001954652
## [4,] 0.005263158 0.0002931978 4 0.2500000 0.005263158 0.0002931978
## [5,] 0.010526316 0.0002931978 5 0.4000000 0.010526316 0.0002931978
## [6,] 0.010526316 0.0003909304 6 0.3333333 0.010526316 0.0003909304
tail(merged.result)
## bayrank bayppv bay_TPR bay_FPR orirank orippv ori_TPR
## [10417,] 10417 0.01823942 1 0.9995113 10417 0.01823942 1
## [10418,] 10418 0.01823767 1 0.9996091 10418 0.01823767 1
## [10419,] 10419 0.01823592 1 0.9997068 10419 0.01823592 1
## [10420,] 10420 0.01823417 1 0.9998045 10420 0.01823417 1
## [10421,] 10421 0.01823242 1 0.9999023 10421 0.01823242 1
## [10422,] 10422 0.01823067 1 1.0000000 10422 0.01823067 1
## ori_FPR MTrank MTppv MT_TPR MT_FPR
## [10417,] 0.9995113 10417 0.01823942 1 0.9995113
## [10418,] 0.9996091 10418 0.01823767 1 0.9996091
## [10419,] 0.9997068 10419 0.01823592 1 0.9997068
## [10420,] 0.9998045 10420 0.01823417 1 0.9998045
## [10421,] 0.9999023 10421 0.01823242 1 0.9999023
## [10422,] 1.0000000 10422 0.01823067 1 1.0000000
# ploting "True positive rate"
plot(merged.result[, 1], merged.result[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 1], merged.result[, 3], type="l", col="red", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300) )
par(new=TRUE)
plot( merged.result[, 1], merged.result[, 7], type="l", col="green", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300))
par(new=TRUE)
plot(merged.result[, 1], merged.result[, 11], type="l", col="blue", xlab="Ranking", ylab="TPR", ylim=c(0, 1), xlim=c(0, 300))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)

plot(merged.result[, 4], merged.result[, 3], type="l", col="red", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1) )
par(new=TRUE)
plot( merged.result[, 8], merged.result[, 7], type="l", col="green", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
par(new=TRUE)
plot(merged.result[, 12], merged.result[, 11], type="l", col="blue", xlab="False positive rate", ylab="True positive rate", ylim=c(0, 1))
legend("bottomright", cex = .75, inset=.05, c("Bayesian","Original", "MT"), text.col = c("red", "green", "blue"), horiz=TRUE)
title(main = "ROC curve")
