1 Introduction

This document shows the steps used to process the LUAD DNA methylation data from GDC.

6 Read and process IDAT files

GDCprepare uses openSesame from sesame package with default arguments.

## function (files, barcode, summarizedExperiment, platform, legacy) 
## {
##     if (!requireNamespace("sesame", quietly = TRUE)) {
##         stop("sesame package is needed for this function to work. Please install it.", 
##             call. = FALSE)
##     }
##     moved.files <- sapply(files, USE.NAMES = FALSE, function(x) {
##         if (grepl("Raw_intensities", dirname(dirname(x)))) {
##             return(file.path(dirname(dirname(x)), basename(x)))
##         }
##         return(x)
##     })
##     plyr::a_ply(files, 1, function(x) {
##         if (grepl("Raw_intensities", dirname(dirname(x)))) {
##             tryCatch(move(x, file.path(dirname(dirname(x)), basename(x)), 
##                 keep.copy = FALSE), error = function(e) {
##             })
##         }
##     })
##     samples <- unique(gsub("_Grn.idat|_Red.idat", "", moved.files))
##     message("Processing  IDATs with Sesame - http://bioconductor.org/packages/sesame/")
##     message("Running opensesame - applying quality masking and nondetection masking (threshold P-value 0.05)")
##     message("Please cite: doi: 10.1093/nar/gky691 and 10.1093/nar/gkt090")
##     betas <- sesame::openSesame(samples)
##     barcode <- unique(data.frame(file = gsub("_Grn.idat|_Red.idat", 
##         "", basename(moved.files)), barcode = barcode))
##     colnames(betas) <- barcode$barcode[match(basename(samples), 
##         barcode$file)]
##     if (summarizedExperiment) {
##         met.platform <- "EPIC"
##         if (grepl("450", platform)) 
##             met.platform <- "450K"
##         if (grepl("27", platform)) 
##             met.platform <- "27K"
##         betas <- makeSEFromDNAMethylationMatrix(betas, genome = ifelse(legacy, 
##             "hg19", "hg38"), met.platform = met.platform)
##         colData(betas) <- DataFrame(colDataPrepare(colnames(betas)))
##     }
##     return(betas)
## }
## <bytecode: 0x55b2b31d1740>
## <environment: namespace:TCGAbiolinks>
## Raw data has 485577 probes
## Raw data has 507 samples

7 Sample filtering

Removing FFPE samples: http://gdac.broadinstitute.org/runs/stddata__2016_01_28/samples_report/FFPE_Cases.html

## 
## FALSE  TRUE 
##   496    11

Remove samples replicates. For example, if patient has two samples (-01A and -01B), we will keep 01A.

##      TCGA-44-6775-01                TCGA-44-5645-01               
## [1,] "TCGA-44-6775-01A-11D-1856-05" "TCGA-44-5645-01A-01D-1626-05"
## [2,] "TCGA-44-6775-01A-11D-A276-05" "TCGA-44-5645-01A-01D-A276-05"
##      TCGA-44-6146-01                TCGA-44-6147-01               
## [1,] "TCGA-44-6146-01A-11D-1756-05" "TCGA-44-6147-01A-11D-1756-05"
## [2,] "TCGA-44-6146-01A-11D-A276-05" "TCGA-44-6147-01A-11D-A276-05"
## 
## FALSE 
##   492

9 Final dataset

## 
##   Primary solid Tumor Recurrent Solid Tumor   Solid Tissue Normal 
##                   458                     2                    32
## Raw data has 408956 probes
## Raw data has 492 samples

10 Samples metadata

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

12 Session information

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.1 (2019-07-05)
##  os       Ubuntu 19.10                
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language en_US                       
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2019-12-09                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  ! package                * version   date       lib source        
##    annotate                 1.64.0    2019-10-29 [1] Bioconductor  
##    AnnotationDbi            1.48.0    2019-10-29 [1] Bioconductor  
##    AnnotationHub          * 2.18.0    2019-10-29 [1] Bioconductor  
##    aroma.light              3.16.0    2019-10-29 [1] Bioconductor  
##    askpass                  1.1       2019-01-13 [2] CRAN (R 3.6.1)
##    assertthat               0.2.1     2019-03-21 [2] CRAN (R 3.6.1)
##    backports                1.1.5     2019-10-02 [2] CRAN (R 3.6.1)
##    Biobase                * 2.46.0    2019-10-29 [1] Bioconductor  
##    BiocFileCache          * 1.10.0    2019-10-29 [1] Bioconductor  
##    BiocGenerics           * 0.32.0    2019-10-29 [1] Bioconductor  
##    BiocManager              1.30.10   2019-11-16 [2] CRAN (R 3.6.1)
##    BiocParallel           * 1.20.0    2019-10-30 [1] Bioconductor  
##    BiocVersion              3.10.1    2019-06-06 [1] Bioconductor  
##    biomaRt                  2.42.0    2019-10-29 [1] Bioconductor  
##    Biostrings               2.54.0    2019-10-29 [1] Bioconductor  
##    bit                      1.1-14    2018-05-29 [2] CRAN (R 3.6.1)
##    bit64                    0.9-7     2017-05-08 [2] CRAN (R 3.6.1)
##    bitops                   1.0-6     2013-08-17 [2] CRAN (R 3.6.1)
##    blob                     1.2.0     2019-07-09 [2] CRAN (R 3.6.1)
##    broom                    0.5.2     2019-04-07 [2] CRAN (R 3.6.1)
##    callr                    3.4.0     2019-12-09 [2] CRAN (R 3.6.1)
##    cli                      2.0.0     2019-12-09 [2] CRAN (R 3.6.1)
##    codetools                0.2-16    2018-12-24 [4] CRAN (R 3.6.0)
##    colorspace               1.4-1     2019-03-18 [2] CRAN (R 3.6.1)
##    crayon                   1.3.4     2017-09-16 [2] CRAN (R 3.6.1)
##    crosstalk                1.0.0     2016-12-21 [2] CRAN (R 3.6.1)
##    curl                     4.3       2019-12-02 [2] CRAN (R 3.6.1)
##    data.table               1.12.6    2019-10-18 [1] CRAN (R 3.6.1)
##    DBI                      1.0.0     2018-05-02 [2] CRAN (R 3.6.1)
##    dbplyr                 * 1.4.2     2019-06-17 [2] CRAN (R 3.6.1)
##    DelayedArray           * 0.12.0    2019-10-29 [1] Bioconductor  
##    desc                     1.2.0     2018-05-01 [2] CRAN (R 3.6.1)
##    DESeq                    1.38.0    2019-10-29 [1] Bioconductor  
##    devtools                 2.2.1     2019-09-24 [2] CRAN (R 3.6.1)
##    digest                   0.6.22    2019-10-21 [1] CRAN (R 3.6.1)
##    DNAcopy                  1.60.0    2019-10-29 [1] Bioconductor  
##    doParallel               1.0.15    2019-08-02 [2] CRAN (R 3.6.1)
##    downloader               0.4       2015-07-09 [2] CRAN (R 3.6.1)
##    dplyr                  * 0.8.3     2019-07-04 [2] CRAN (R 3.6.1)
##    DT                       0.10      2019-11-12 [2] CRAN (R 3.6.1)
##    EDASeq                   2.20.0    2019-10-29 [1] Bioconductor  
##    edgeR                    3.28.0    2019-10-29 [1] Bioconductor  
##    ellipsis                 0.3.0     2019-09-20 [2] CRAN (R 3.6.1)
##    evaluate                 0.14      2019-05-28 [2] CRAN (R 3.6.1)
##    ExperimentHub          * 1.12.0    2019-10-29 [1] Bioconductor  
##    fansi                    0.4.0     2018-10-05 [2] CRAN (R 3.6.1)
##    fastmap                  1.0.1     2019-10-08 [2] CRAN (R 3.6.1)
##    foreach                  1.4.7     2019-07-27 [2] CRAN (R 3.6.1)
##    fs                       1.3.1     2019-05-06 [2] CRAN (R 3.6.1)
##    genefilter               1.68.0    2019-10-29 [1] Bioconductor  
##    geneplotter              1.64.0    2019-10-29 [1] Bioconductor  
##    generics                 0.0.2     2018-11-29 [2] CRAN (R 3.6.1)
##    GenomeInfoDb           * 1.22.0    2019-10-29 [1] Bioconductor  
##    GenomeInfoDbData         1.2.2     2019-11-06 [1] Bioconductor  
##    GenomicAlignments        1.22.0    2019-10-29 [1] Bioconductor  
##    GenomicFeatures          1.38.0    2019-10-29 [1] Bioconductor  
##    GenomicRanges          * 1.38.0    2019-10-29 [1] Bioconductor  
##    ggplot2                  3.2.1     2019-08-10 [2] CRAN (R 3.6.1)
##    ggpubr                   0.2.4     2019-11-14 [2] CRAN (R 3.6.1)
##    ggrepel                  0.8.1     2019-05-07 [2] CRAN (R 3.6.1)
##    ggsignif                 0.6.0     2019-08-08 [2] CRAN (R 3.6.1)
##    ggthemes                 4.2.0     2019-05-13 [2] CRAN (R 3.6.1)
##    glue                     1.3.1     2019-03-12 [2] CRAN (R 3.6.1)
##    gridExtra                2.3       2017-09-09 [2] CRAN (R 3.6.1)
##    gtable                   0.3.0     2019-03-25 [1] CRAN (R 3.6.1)
##    hms                      0.5.2     2019-10-30 [2] CRAN (R 3.6.1)
##    htmltools                0.4.0     2019-10-04 [2] CRAN (R 3.6.1)
##    htmlwidgets              1.5.1     2019-10-08 [2] CRAN (R 3.6.1)
##    httpuv                   1.5.2     2019-09-11 [2] CRAN (R 3.6.1)
##    httr                     1.4.1     2019-08-05 [2] CRAN (R 3.6.1)
##    hwriter                  1.3.2     2014-09-10 [2] CRAN (R 3.6.1)
##    interactiveDisplayBase   1.24.0    2019-10-29 [1] Bioconductor  
##    IRanges                * 2.20.0    2019-10-29 [1] Bioconductor  
##    iterators                1.0.12    2019-07-26 [2] CRAN (R 3.6.1)
##    jsonlite                 1.6       2018-12-07 [2] CRAN (R 3.6.1)
##    km.ci                    0.5-2     2009-08-30 [2] CRAN (R 3.6.1)
##    KMsurv                   0.1-5     2012-12-03 [2] CRAN (R 3.6.1)
##    knitr                    1.26      2019-11-12 [2] CRAN (R 3.6.1)
##    later                    1.0.0     2019-10-04 [2] CRAN (R 3.6.1)
##    lattice                  0.20-38   2018-11-04 [4] CRAN (R 3.6.0)
##    latticeExtra             0.6-28    2016-02-09 [2] CRAN (R 3.6.1)
##    lazyeval                 0.2.2     2019-03-15 [2] CRAN (R 3.6.1)
##    lifecycle                0.1.0     2019-08-01 [2] CRAN (R 3.6.1)
##    limma                    3.42.0    2019-10-29 [1] Bioconductor  
##    locfit                   1.5-9.1   2013-04-20 [2] CRAN (R 3.6.1)
##    magrittr                 1.5       2014-11-22 [2] CRAN (R 3.6.1)
##    Matrix                   1.2-18    2019-11-27 [2] CRAN (R 3.6.1)
##    matrixStats            * 0.55.0    2019-09-07 [2] CRAN (R 3.6.1)
##    memoise                  1.1.0     2017-04-21 [2] CRAN (R 3.6.1)
##    mgcv                     1.8-31    2019-11-09 [4] CRAN (R 3.6.1)
##    mime                     0.7       2019-06-11 [2] CRAN (R 3.6.1)
##    munsell                  0.5.0     2018-06-12 [2] CRAN (R 3.6.1)
##    nlme                     3.1-142   2019-11-07 [4] CRAN (R 3.6.1)
##    openssl                  1.4.1     2019-07-18 [2] CRAN (R 3.6.1)
##    parsetools               0.1.1     2019-07-11 [2] CRAN (R 3.6.1)
##    pillar                   1.4.2     2019-06-29 [2] CRAN (R 3.6.1)
##    pkgbuild                 1.0.6     2019-10-09 [2] CRAN (R 3.6.1)
##    pkgcond                  0.1.0     2018-12-03 [2] CRAN (R 3.6.1)
##    pkgconfig                2.0.3     2019-09-22 [2] CRAN (R 3.6.1)
##    pkgload                  1.0.2     2018-10-29 [2] CRAN (R 3.6.1)
##    plyr                     1.8.4     2016-06-08 [2] CRAN (R 3.6.1)
##    postlogic                0.1.0     2018-11-26 [2] CRAN (R 3.6.1)
##    preprocessCore           1.48.0    2019-10-29 [1] Bioconductor  
##    prettyunits              1.0.2     2015-07-13 [2] CRAN (R 3.6.1)
##    processx                 3.4.1     2019-07-18 [2] CRAN (R 3.6.1)
##    progress                 1.2.2     2019-05-16 [2] CRAN (R 3.6.1)
##    promises                 1.1.0     2019-10-04 [2] CRAN (R 3.6.1)
##    ps                       1.3.0     2018-12-21 [2] CRAN (R 3.6.1)
##    purrr                    0.3.3     2019-10-18 [1] CRAN (R 3.6.1)
##    purrrogress              0.1.1     2019-07-22 [2] CRAN (R 3.6.1)
##    R.methodsS3              1.7.1     2016-02-16 [2] CRAN (R 3.6.1)
##    R.oo                     1.23.0    2019-11-03 [1] CRAN (R 3.6.1)
##    R.utils                  2.9.2     2019-12-08 [2] CRAN (R 3.6.1)
##    R6                       2.4.1     2019-11-12 [2] CRAN (R 3.6.1)
##    randomForest             4.6-14    2018-03-25 [2] CRAN (R 3.6.1)
##    rappdirs                 0.3.1     2016-03-28 [2] CRAN (R 3.6.1)
##    RColorBrewer             1.1-2     2014-12-07 [1] CRAN (R 3.6.1)
##    Rcpp                     1.0.3     2019-11-08 [2] CRAN (R 3.6.1)
##    RCurl                    1.95-4.12 2019-03-04 [2] CRAN (R 3.6.1)
##    readr                    1.3.1     2018-12-21 [2] CRAN (R 3.6.1)
##    remotes                  2.1.0     2019-06-24 [2] CRAN (R 3.6.1)
##    rlang                    0.4.2     2019-11-23 [2] CRAN (R 3.6.1)
##    rmarkdown                1.16      2019-10-01 [1] CRAN (R 3.6.1)
##    rprojroot                1.3-2     2018-01-03 [2] CRAN (R 3.6.1)
##    Rsamtools                2.2.0     2019-10-29 [1] Bioconductor  
##    RSQLite                  2.1.4     2019-12-04 [2] CRAN (R 3.6.1)
##    rtracklayer              1.46.0    2019-10-29 [1] Bioconductor  
##    rvest                    0.3.5     2019-11-08 [2] CRAN (R 3.6.1)
##    S4Vectors              * 0.24.0    2019-10-29 [1] Bioconductor  
##    scales                   1.1.0     2019-11-18 [2] CRAN (R 3.6.1)
##    selectr                  0.4-2     2019-11-20 [2] CRAN (R 3.6.1)
##    sesame                 * 1.4.0     2019-10-29 [1] Bioconductor  
##    sesameData             * 1.4.0     2019-11-05 [1] Bioconductor  
##    sessioninfo              1.1.1     2018-11-05 [2] CRAN (R 3.6.1)
##    shiny                    1.4.0     2019-10-10 [2] CRAN (R 3.6.1)
##    ShortRead                1.44.0    2019-10-29 [1] Bioconductor  
##    stringi                  1.4.3     2019-03-12 [2] CRAN (R 3.6.1)
##    stringr                  1.4.0     2019-02-10 [2] CRAN (R 3.6.1)
##    SummarizedExperiment   * 1.16.0    2019-10-29 [1] Bioconductor  
##    survival                 3.1-8     2019-12-03 [4] CRAN (R 3.6.1)
##    survminer                0.4.6     2019-09-03 [2] CRAN (R 3.6.1)
##    survMisc                 0.5.5     2018-07-05 [2] CRAN (R 3.6.1)
##    sva                      3.34.0    2019-10-29 [1] Bioconductor  
##  R TCGAbiolinks           * 2.15.2    <NA>       [2] <NA>          
##    TCGAbiolinksGUI.data   * 1.7.0     2019-12-04 [2] Bioconductor  
##    testextra                0.1.0     2019-01-18 [2] CRAN (R 3.6.1)
##    testthat                 2.3.1     2019-12-01 [2] CRAN (R 3.6.1)
##    tibble                   2.1.3     2019-06-06 [2] CRAN (R 3.6.1)
##    tidyr                    1.0.0     2019-09-11 [2] CRAN (R 3.6.1)
##    tidyselect               0.2.5     2018-10-11 [2] CRAN (R 3.6.1)
##    usethis                  1.5.1     2019-07-04 [2] CRAN (R 3.6.1)
##    vctrs                    0.2.0     2019-07-05 [2] CRAN (R 3.6.1)
##    wheatmap                 0.1.0     2018-03-15 [2] CRAN (R 3.6.1)
##    withr                    2.1.2     2018-03-15 [2] CRAN (R 3.6.1)
##    xfun                     0.10      2019-10-01 [1] CRAN (R 3.6.1)
##    XML                      3.98-1.20 2019-06-06 [2] CRAN (R 3.6.1)
##    xml2                     1.2.2     2019-08-09 [2] CRAN (R 3.6.1)
##    xtable                   1.8-4     2019-04-21 [2] CRAN (R 3.6.1)
##    XVector                  0.26.0    2019-10-29 [1] Bioconductor  
##    yaml                     2.2.0     2018-07-25 [2] CRAN (R 3.6.1)
##    zeallot                  0.1.0     2018-01-28 [2] CRAN (R 3.6.1)
##    zlibbioc                 1.32.0    2019-10-29 [1] Bioconductor  
##    zoo                      1.8-6     2019-05-28 [2] CRAN (R 3.6.1)
## 
## [1] /home/tiagochst/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library
## 
##  R ── Package was removed from disk.