See: http://jtleek.com/genstats/inst/doc/04_10_eQTL.html

eQTL

Dependencies

# install.packages(c("devtools","MatrixEQTL"))
# source("http://www.bioconductor.org/biocLite.R")
# biocLite(c("Biobase"))
library(devtools)
library(broom)
library(Biobase)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(MatrixEQTL)

Download the data

Here we are going to follow along with the tutorial on MatrixEQTL. First we find the files http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/runit.html

base.dir = find.package("MatrixEQTL")
SNP_file_name = paste(base.dir, "/data/SNP.txt", sep = "");
expression_file_name = paste(base.dir, "/data/GE.txt", sep = "")
covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep = "")
output_file_name = tempfile()

Next we load the data so we can see it

expr = read.table(expression_file_name,
                  sep = "\t",
                  header = T, 
                  row.names = 1)
expr[1,]
##         Sam_01 Sam_02 Sam_03 Sam_04 Sam_05 Sam_06 Sam_07 Sam_08 Sam_09
## Gene_01   4.91   4.63   5.18   5.07   5.74   5.09   5.31   5.29   4.73
##         Sam_10 Sam_11 Sam_12 Sam_13 Sam_14 Sam_15 Sam_16
## Gene_01   5.72   4.75   4.54   5.01   5.03   4.84   4.44
snps = read.table(SNP_file_name,
                  sep = "\t",
                  header = T,
                  row.names = 1)
snps[1,]
##        Sam_01 Sam_02 Sam_03 Sam_04 Sam_05 Sam_06 Sam_07 Sam_08 Sam_09
## Snp_01      2      0      2      0      2      1      2      1      1
##        Sam_10 Sam_11 Sam_12 Sam_13 Sam_14 Sam_15 Sam_16
## Snp_01      1      2      2      1      2      2      1
cvrt = read.table(covariates_file_name,
                  sep = "\t",
                  header = T,
                  row.names = 1)

eQTL is linear regession

The simplest eQTL analysis just computes linear regression models for each SNP/gene pair.

e1 = as.numeric(expr[1,])
s1 = as.numeric(snps[1,])
lm1 = lm(e1 ~ s1)
tidy(lm1)
##          term   estimate std.error  statistic      p.value
## 1 (Intercept) 4.92967742 0.2137508 23.0627348 1.546033e-12
## 2          s1 0.06387097 0.1386998  0.4604979 6.522304e-01

We can visualize the data and the model fits

plot(e1 ~ jitter(s1),
     col = (s1 + 1),
     xaxt = "n",
     xlab = "Genotype",
     ylab = "Expression")

axis(1, at = c(0:2), labels = c("AA", "Aa", "aa"))

lines(lm1$fitted ~ s1, type = "b", pch = 15, col = "darkgrey")

Fitting many eQTL models with MatrixEQTL

Set general parameters

We need to set up the p-value cutoff and the error model (in this case assuming independent errors)

pvOutputThreshold = 1e-2
errorCovariance = numeric()
useModel = modelLINEAR

Set the data up

Now we need to set up the snp and gene expression data in the special format required by the MatrixEQTL package

snps = SlicedData$new()
snps$fileDelimiter = "\t"       # the TAB character
snps$fileOmitCharacters = "NA"  # denote missing values;
snps$fileSkipRows = 1           # one row of column labels
snps$fileSkipColumns = 1        # one column of row labels
snps$fileSliceSize = 2000       # read file in pieces of 2,000 rows
snps$LoadFile( SNP_file_name )
## Rows read:  15  done.
gene = SlicedData$new()
gene$fileDelimiter = "\t"           # the TAB character
gene$fileOmitCharacters = "NA"      # denote missing values;
gene$fileSkipRows = 1               # one row of column labels
gene$fileSkipColumns = 1            # one column of row labels
gene$fileSliceSize = 2000           # read file in pieces of 2,000 rows
gene$LoadFile(expression_file_name)
## Rows read:  10  done.
cvrt = SlicedData$new()

Running MatrixEQTL

We can now run the code to calculate the eQTL that we are interested in

me = Matrix_eQTL_engine(snps = snps,
                        gene = gene,
                        cvrt = cvrt,
                        output_file_name = NULL,
                        # vOutputThreshold = pvOutputThreshold,
                        useModel = useModel,
                        errorCovariance = errorCovariance,
                        verbose = TRUE,
                        pvalue.hist = TRUE,
                        min.pv.by.genesnp = FALSE,
                        noFDRsaveMemory = FALSE)
## Processing covariates 
## Task finished in  0  seconds
## Processing gene expression data (imputation, residualization, etc.) 
## Task finished in  0.002  seconds
## Creating output file(s) 
## Task finished in  0.008  seconds
## Performing eQTL analysis 
## 100.00% done, 0 eQTLs
## No significant associations were found.
## Task finished in  0.004  seconds
## 

Understanding the results

We can make a plot of all the p-values from the tests

plot(me)

We can also figure look at the number and type of eQTL

me$all$neqtls
## [1] 0
me$all$eqtls
## [1] snps      gene      beta      statistic pvalue    FDR      
## <0 rows> (or 0-length row.names)

More information

eQTL is an entire field of research.

MatrixEQTL package * http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/

MatrixEQTL paper * http://bioinformatics.oxfordjournals.org/content/28/10/1353.abstract?keytype=ref&ijkey=zjMWpHTAUk7OJFw

Surrogate variables applied to eQTL * http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.0030161

A Bayesian sva framework for eQTL * http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000770

Bioconductor eQTL workflow * http://www.bioconductor.org/help/workflows/eQTL/

Session information

devtools::session_info()
## Session info -------------------------------------------------------------
##  setting  value                       
##  version  R version 3.4.1 (2017-06-30)
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language en_US                       
##  collate  en_US.UTF-8                 
##  tz       America/New_York            
##  date     2017-07-25
## Packages -----------------------------------------------------------------
##  package      * version date       source        
##  assertthat     0.2.0   2017-04-11 CRAN (R 3.4.1)
##  backports      1.1.0   2017-05-22 CRAN (R 3.4.1)
##  base         * 3.4.1   2017-07-08 local         
##  bindr          0.1     2016-11-13 CRAN (R 3.4.1)
##  bindrcpp       0.2     2017-06-17 CRAN (R 3.4.1)
##  Biobase      * 2.36.2  2017-07-25 Bioconductor  
##  BiocGenerics * 0.22.0  2017-07-25 Bioconductor  
##  broom        * 0.4.2   2017-02-13 CRAN (R 3.4.1)
##  compiler       3.4.1   2017-07-08 local         
##  datasets     * 3.4.1   2017-07-08 local         
##  devtools     * 1.13.2  2017-06-02 CRAN (R 3.4.1)
##  digest         0.6.12  2017-01-27 CRAN (R 3.4.1)
##  dplyr          0.7.2   2017-07-20 CRAN (R 3.4.1)
##  evaluate       0.10.1  2017-06-24 CRAN (R 3.4.1)
##  foreign        0.8-69  2017-06-21 CRAN (R 3.4.1)
##  glue           1.1.1   2017-06-21 CRAN (R 3.4.1)
##  graphics     * 3.4.1   2017-07-08 local         
##  grDevices    * 3.4.1   2017-07-08 local         
##  grid           3.4.1   2017-07-08 local         
##  htmltools      0.3.6   2017-04-28 CRAN (R 3.4.1)
##  knitr          1.16    2017-05-18 CRAN (R 3.4.1)
##  lattice        0.20-35 2017-03-25 CRAN (R 3.4.1)
##  magrittr       1.5     2014-11-22 CRAN (R 3.4.1)
##  MatrixEQTL   * 2.1.1   2015-02-03 CRAN (R 3.4.1)
##  memoise        1.1.0   2017-04-21 CRAN (R 3.4.1)
##  methods      * 3.4.1   2017-07-08 local         
##  mnormt         1.5-5   2016-10-15 CRAN (R 3.4.1)
##  nlme           3.1-131 2017-02-06 CRAN (R 3.4.1)
##  parallel     * 3.4.1   2017-07-08 local         
##  pkgconfig      2.0.1   2017-03-21 CRAN (R 3.4.1)
##  plyr           1.8.4   2016-06-08 CRAN (R 3.4.1)
##  psych          1.7.5   2017-05-03 CRAN (R 3.4.1)
##  R6             2.2.2   2017-06-17 CRAN (R 3.4.1)
##  Rcpp           0.12.12 2017-07-15 CRAN (R 3.4.1)
##  reshape2       1.4.2   2016-10-22 CRAN (R 3.4.1)
##  rlang          0.1.1   2017-05-18 CRAN (R 3.4.1)
##  rmarkdown      1.6     2017-06-15 CRAN (R 3.4.1)
##  rprojroot      1.2     2017-01-16 CRAN (R 3.4.1)
##  stats        * 3.4.1   2017-07-08 local         
##  stringi        1.1.5   2017-04-07 CRAN (R 3.4.1)
##  stringr        1.2.0   2017-02-18 CRAN (R 3.4.1)
##  tibble         1.3.3   2017-05-28 CRAN (R 3.4.1)
##  tidyr          0.6.3   2017-05-15 CRAN (R 3.4.1)
##  tools          3.4.1   2017-07-08 local         
##  utils        * 3.4.1   2017-07-08 local         
##  withr          1.0.2   2016-06-20 CRAN (R 3.4.1)
##  yaml           2.1.14  2016-11-12 CRAN (R 3.4.1)