Data

library(MultiAssayExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(ELMER.data)
library(ELMER)
# get distal probes that are 2kb away from TSS on chromosome 1
distal.probes <- get.feature.probe(
  genome = "hg19", 
  met.platform = "450K"
)
## Accessing DNAm annotation from sesame package for: hg19 - 450K
## snapshotDate(): 2022-04-26
## see ?sesameData and browseVignettes('sesameData') for documentation
## loading from cache
## Returning distal probes: 165935
data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp
data(LUSC_meth_refined,package = "ELMER.data") # Meth

# remove genes with 0 expression for more than 66% of samples
is.gene.expressed.in.at.least.66.percent.of.samples <- rowSums(GeneExp == 0) < ncol(GeneExp)/3

mae <- createMAE(
  exp = GeneExp, 
  met = Meth,
  save = FALSE,
  linearize.exp = TRUE,
  filter.probes = distal.probes,
  filter.genes = is.gene.expressed.in.at.least.66.percent.of.samples[is.gene.expressed.in.at.least.66.percent.of.samples],
  met.platform = "450K",
  genome = "hg19",
  TCGA = TRUE
)
## =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
## Creating a SummarizedExperiment from gene expression input
## =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
## Creating a SummarizedExperiment from DNA methylation input
## Accessing DNAm annotation from sesame package for: hg19 - 450K
## snapshotDate(): 2022-04-26
## see ?sesameData and browseVignettes('sesameData') for documentation
## loading from cache
## Checking if samples have both DNA methylation and Gene expression and if they are in the same order...
## Starting to add information to samples
##  => Add clinical information to samples
##  => Adding TCGA molecular information from marker papers
##  => Information will have prefix 'paper_'
## lusc subtype information from:doi:10.1038/nature11404
## Creating MultiAssayExperiment
mae
## A MultiAssayExperiment object of 2 listed
##  experiments with user-defined names and respective classes.
##  Containing an ExperimentList class object of length 2:
##  [1] DNA methylation: RangedSummarizedExperiment with 1642 rows and 234 columns
##  [2] Gene expression: RangedSummarizedExperiment with 3432 rows and 234 columns
## Functionality:
##  experiments() - obtain the ExperimentList instance
##  colData() - the primary/phenotype DataFrame
##  sampleMap() - the sample coordination DataFrame
##  `$`, `[`, `[[` - extract colData columns, subset, or experiment
##  *Format() - convert into a long or wide DataFrame
##  assays() - convert ExperimentList to a SimpleList of matrices
##  exportClass() - save data to flat files

Analysis

group.col <- "definition"
group1 <-  "Primary solid Tumor"
group2 <- "Solid Tissue Normal"
dir.out <- "result"
diff.dir <-  "hypo" # Search for hypomethylated probes in group 1

sig.diff <- get.diff.meth(
  data = mae, 
  group.col = group.col,
  group1 = group1,
  group2 = group2,
  mode = "supervised",
  sig.dif = 0.3,
  diff.dir = diff.dir,
  cores = 2, 
  dir.out = dir.out,
  pvalue = 0.01
)
nearGenes <- GetNearGenes(
  data = mae, 
  probes = sig.diff$probe, 
  numFlankingGenes = 20
) # 10 upstream and 10 dowstream genes
pair.positive.correlated <- get.pair(
  data = mae,
  group.col = group.col,
  group1 = group1,
  mode = "supervised",
  correlation = "positive", 
  group2 = group2,
  Pe = 0.001,
  nearGenes = nearGenes,
  diff.dir = diff.dir,
  filter.probes = TRUE, # See preAssociationProbeFiltering function
  filter.percentage = 0.05,
  filter.portion = 0.3,
  dir.out = dir.out,
  cores = 2,
  label = diff.dir
)
scatter.plot(
    data = mae,
    byPair = list(probe = pair.positive.correlated$Probe[4], gene =pair.positive.correlated$GeneID[4]), 
    category = "definition", save = FALSE, lm_line = TRUE,correlation = TRUE
) 
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'language'