# Matrix eQTL by Andrey A. Shabalin
# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
#
# Be sure to use an up to date version of R and Matrix eQTL.
# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
rm(list = ls())
setwd("/Volumes/Transcend/Thesis_project/2.2.lung.eqtl.analysis")
# sorted.GE.test<-read.table("sortedGE.txt", header = TRUE, stringsAsFactors = FALSE);
# sorted.GE.test<-sorted.GE.test[1:4,]
# sorted.GE.test
# write.table(sorted.GE.test,file="sorted.GE.test.txt", sep="\t", row.names=FALSE, quote=FALSE)
library(MatrixEQTL)
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/2.2.lung.eqtl.analysis";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2015-12-07 BXD.geno.SNP.eqtl.for.lung.txt", sep="");
snps_location_file_name = paste(base.dir, "/2015-12-07 BXD.geno.loc.eqtl.for.lung.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2015-12-07 mouse.lung.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2015-12-07 lung.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
# covariates_file_name = paste(base.dir, "/Covariates.txt", sep="");
# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;
## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t"; # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);
## Rows read: 2,000
## Rows read: 3811 done.
## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);
## Rows read: 2,000
## Rows read: 4,000
## Rows read: 6,000
## Rows read: 8,000
## Rows read: 10,000
## Rows read: 12,000
## Rows read: 14,000
## Rows read: 16,000
## Rows read: 18,000
## Rows read: 20,000
## Rows read: 22,000
## Rows read: 24,000
## Rows read: 26,000
## Rows read: 28,000
## Rows read: 30,000
## Rows read: 32,000
## Rows read: 34,000
## Rows read: 34470 done.
## Load covariates
#cvrt = SlicedData$new();
#cvrt$fileDelimiter = "\t"; # the TAB character
#cvrt$fileOmitCharacters = "NA"; # denote missing values;
#cvrt$fileSkipRows = 1; # one row of column labels
#cvrt$fileSkipColumns = 1; # one column of row labels
#if(length(covariates_file_name)>0) {
# cvrt$LoadFile(covariates_file_name);
#}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)
## ProbeSet Chr Blat.Mb.Start Blat.Mb.End
## 1 1415670_at 6 87859681 87859682
## 2 1415671_at 8 108048521 108048893
## 3 1415672_at 8 24351869 24352391
## 4 1415673_at 5 130271465 130272427
## 5 1415674_a_at 9 44212489 44213429
## 6 1415675_at 2 32428524 32428891
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = numeric(),
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = TRUE,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
## Matching data files and location files
## 34470 of 34470 genes matched
## 3811 of 3811 SNPs matched
## Task finished in 0.065 seconds
## Reordering genes
##
## Task finished in 0.611 seconds
## Processing covariates
## Task finished in 0.004 seconds
## Processing gene expression data (imputation, residualization, etc.)
## Task finished in 0.228 seconds
## Creating output file(s)
## Task finished in 0.021 seconds
## Performing eQTL analysis
## 2.77% done, 7,963 cis-eQTLs, 169 trans-eQTLs
## 5.55% done, 14,855 cis-eQTLs, 357 trans-eQTLs
## 8.33% done, 21,896 cis-eQTLs, 548 trans-eQTLs
## 11.11% done, 27,963 cis-eQTLs, 701 trans-eQTLs
## 13.88% done, 36,530 cis-eQTLs, 762 trans-eQTLs
## 16.66% done, 42,280 cis-eQTLs, 813 trans-eQTLs
## 19.44% done, 49,292 cis-eQTLs, 851 trans-eQTLs
## 22.22% done, 54,841 cis-eQTLs, 1,008 trans-eQTLs
## 25.00% done, 58,664 cis-eQTLs, 1,083 trans-eQTLs
## 27.77% done, 1,083 trans-eQTLs
## 30.55% done, 1,083 trans-eQTLs
## 33.33% done, 1,083 trans-eQTLs
## 36.11% done, 1,083 trans-eQTLs
## 38.88% done, 1,083 trans-eQTLs
## 41.66% done, 1,083 trans-eQTLs
## 44.44% done, 1,083 trans-eQTLs
## 47.22% done, 1,083 trans-eQTLs
## 50.00% done, 1,083 trans-eQTLs
## 52.77% done, 1,083 trans-eQTLs
## 55.55% done, 1,083 trans-eQTLs
## 58.33% done, 1,092 trans-eQTLs
## 61.11% done, 1,099 trans-eQTLs
## 63.88% done, 1,099 trans-eQTLs
## 66.66% done, 1,106 trans-eQTLs
## 69.44% done, 1,106 trans-eQTLs
## 72.22% done, 1,106 trans-eQTLs
## 75.00% done, 59,989 cis-eQTLs, 1,135 trans-eQTLs
## 77.77% done, 65,774 cis-eQTLs, 1,174 trans-eQTLs
## 80.55% done, 72,273 cis-eQTLs, 1,316 trans-eQTLs
## 83.33% done, 79,875 cis-eQTLs, 1,392 trans-eQTLs
## 86.11% done, 85,785 cis-eQTLs, 1,462 trans-eQTLs
## 88.88% done, 92,077 cis-eQTLs, 1,561 trans-eQTLs
## 91.66% done, 98,470 cis-eQTLs, 1,659 trans-eQTLs
## 94.44% done, 105,124 cis-eQTLs, 1,711 trans-eQTLs
## 97.22% done, 112,224 cis-eQTLs, 1,772 trans-eQTLs
## 100.00% done, 112,835 cis-eQTLs, 1,774 trans-eQTLs
## Task finished in 20.858 seconds
##
unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')
## Analysis done in: 20.507 seconds
cat('Detected local eQTLs:','\n')
## Detected local eQTLs:
cis.eqtls<-me$cis$eqtls
head(cis.eqtls)
## snps gene statistic pvalue FDR beta
## 1 rs3681670 1421144_at -75.53535 2.361797e-47 1.332467e-42 -3.093200
## 2 rs3666583 1421144_at -75.53535 2.361797e-47 1.332467e-42 -3.093200
## 3 rs3713848 1437904_at 58.54734 1.217939e-42 3.435655e-38 3.170574
## 4 rs3669217 1437904_at 58.54734 1.217939e-42 3.435655e-38 3.170574
## 5 rs13481364 1438758_at -50.66421 5.603003e-40 6.322148e-36 -2.167143
## 6 rs3708704 1438758_at -50.66421 5.603003e-40 6.322148e-36 -2.167143
dim(cis.eqtls)
## [1] 112835 6
cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2015-12-04 mouselung.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)