# Matrix eQTL by Andrey A. Shabalin
# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
#
# Be sure to use an up to date version of R and Matrix eQTL.
# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
rm(list = ls())
setwd("/Volumes/Transcend/Thesis_project/2.1.liver.eqtl.analysis")
library(MatrixEQTL)
## Location of the package with the data files.
base.dir = "/Volumes/Transcend/Thesis_project/2.1.liver.eqtl.analysis";
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = paste(base.dir, "/2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt", sep="");
snps_location_file_name = paste(base.dir, "/2015-12-07 BXD.geno.loc.eqtl.for.liver.txt", sep="");
# Gene expression file name
expression_file_name = paste(base.dir, "/2015-12-07 mouse.liver.expression.eqtl.txt", sep="");
gene_location_file_name = paste(base.dir, "/2015-12-07 liver.gene.loc.txt", sep="");
# Covariates file name
# Set to character() for no covariates
# covariates_file_name = paste(base.dir, "/Covariates.txt", sep="");
# Output file name
output_file_name_cis = tempfile();
output_file_name_tra = tempfile();
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 1;
pvOutputThreshold_tra = 0.000000000000005;
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6;
## Load genotype data
snps = SlicedData$new();
snps$fileDelimiter = "\t"; # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;
snps$fileSkipColumns = 1;
snps$fileSliceSize = 2000;
snps$LoadFile(SNP_file_name);
## Rows read: 2,000
## Rows read: 3811 done.
## Load gene expression data
gene = SlicedData$new();
gene$fileDelimiter = "\t";
gene$fileOmitCharacters = "NA"; # denote missing values;
gene$fileSkipRows = 1;
gene$fileSkipColumns = 1;
gene$fileSliceSize = 2000;
gene$LoadFile(expression_file_name);
## Rows read: 2,000
## Rows read: 4,000
## Rows read: 6,000
## Rows read: 8,000
## Rows read: 10,000
## Rows read: 12,000
## Rows read: 14,000
## Rows read: 16,000
## Rows read: 18,000
## Rows read: 20,000
## Rows read: 20855 done.
## Load covariates
#cvrt = SlicedData$new();
#cvrt$fileDelimiter = "\t"; # the TAB character
#cvrt$fileOmitCharacters = "NA"; # denote missing values;
#cvrt$fileSkipRows = 1; # one row of column labels
#cvrt$fileSkipColumns = 1; # one column of row labels
#if(length(covariates_file_name)>0) {
# cvrt$LoadFile(covariates_file_name);
#}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
head(genepos)
## probe_id Chromosome start_location end_location
## 1 1415670_at 6 87887971 87890759
## 2 1415671_at 8 105524469 105566040
## 3 1415672_at 8 23241325 23257080
## 4 1415673_at 5 129765557 129787253
## 5 1415674_a_at 9 44403758 44407548
## 6 1415675_at 2 32570857 32573571
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = numeric(),
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = TRUE,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
## Matching data files and location files
## 20855 of 20855 genes matched
## 3811 of 3811 SNPs matched
## Task finished in 0.025 seconds
## Reordering genes
##
## Task finished in 0.231 seconds
## Processing covariates
## Task finished in 0.002 seconds
## Processing gene expression data (imputation, residualization, etc.)
## Task finished in 0.04 seconds
## Creating output file(s)
## Task finished in 0.015 seconds
## Performing eQTL analysis
## 4.54% done, 7,118 cis-eQTLs, 67 trans-eQTLs
## 9.09% done, 13,788 cis-eQTLs, 79 trans-eQTLs
## 13.63% done, 21,752 cis-eQTLs, 137 trans-eQTLs
## 18.18% done, 27,806 cis-eQTLs, 154 trans-eQTLs
## 22.72% done, 34,278 cis-eQTLs, 272 trans-eQTLs
## 27.27% done, 35,915 cis-eQTLs, 272 trans-eQTLs
## 31.81% done, 272 trans-eQTLs
## 36.36% done, 272 trans-eQTLs
## 40.90% done, 272 trans-eQTLs
## 45.45% done, 272 trans-eQTLs
## 50.00% done, 272 trans-eQTLs
## 54.54% done, 272 trans-eQTLs
## 59.09% done, 272 trans-eQTLs
## 63.63% done, 272 trans-eQTLs
## 68.18% done, 272 trans-eQTLs
## 72.72% done, 282 trans-eQTLs
## 77.27% done, 40,035 cis-eQTLs, 283 trans-eQTLs
## 81.81% done, 47,465 cis-eQTLs, 318 trans-eQTLs
## 86.36% done, 53,735 cis-eQTLs, 348 trans-eQTLs
## 90.90% done, 60,292 cis-eQTLs, 355 trans-eQTLs
## 95.45% done, 67,748 cis-eQTLs, 395 trans-eQTLs
## 100.00% done, 69,174 cis-eQTLs, 395 trans-eQTLs
## Task finished in 10.289 seconds
##
unlink(output_file_name_cis);
## Results:
cat('Analysis done in:', me$time.in.sec, ' seconds', '\n')
## Analysis done in: 9.876 seconds
cat('Detected local eQTLs:','\n')
## Detected local eQTLs:
cis.eqtls<-me$cis$eqtls
head(cis.eqtls)
## snps gene statistic pvalue FDR beta
## 1 rs4163042 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 2 116Mit88 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 3 rs4163058 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 4 rs4163391 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 5 rs4151923 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
## 6 rs3090019 1452705_at -76.34025 4.891311e-34 4.229394e-30 -4.190552
dim(cis.eqtls)
## [1] 69174 6
cis.eqtls$beta_se <-cis.eqtls$beta/cis.eqtls$statistic
write.table(cis.eqtls,file="2015-12-07 mouseliver.cis.1M.eqtls.txt", sep="\t", row.names=FALSE, quote=FALSE)