library(MultiAssayExperiment)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(biobroom)
## Loading required package: broom
#read in pre-prepared ccle MAEO
x <- readRDS('~/BigData/MultiAssayExperiment/ccleMAEO.rds')
x
## A "MultiAssayExperiment" object of 3 listed
##  experiments with user-defined names and respective classes. 
##  Containing an "Elist" class object of length 3: 
##  [1] CNA: "RangedSummarizedExperiment" - 21217 rows, 477 columns 
##  [2] Mutations: "RangedRaggedAssay" - 61534 rows, 451 columns 
##  [3] mRNA: "ExpressionSet" - 18988 rows, 480 columns 
## To access slots use: 
##  Elist() - to obtain the "Elist" of experiment instances 
##  pData() - for the primary/phenotype "DataFrame" 
##  sampleMap() - for the sample availability "DataFrame" 
##  metadata() - for the metadata object of 'ANY' class 
## See also: subsetByAssay(), subsetByRow(), subsetByColumn()
#get just the lung cell line names
lung_names <- rownames(pData(x))[pData(x)$TissueOrigin == 'LUNG']

#subset the lung data
lung_data <- subsetByColumn(x, lung_names)

#specify some cancer genes
my_genes <- c('PTEN', 'TP53', 'BRAF', 'KRAS', 'SMARCA4', 'SMARCA2')

#use piping to combine subset by column and row - this is pretty quick!
y <- x %>% subsetByColumn(lung_names) %>% subsetByRow(my_genes)

#what does this look like?
y
## A "MultiAssayExperiment" object of 3 listed
##  experiments with user-defined names and respective classes. 
##  Containing an "Elist" class object of length 3: 
##  [1] CNA: "RangedSummarizedExperiment" - 6 rows, 90 columns 
##  [2] Mutations: "RangedRaggedAssay" - 135 rows, 87 columns 
##  [3] mRNA: "ExpressionSet" - 0 rows, 91 columns 
## To access slots use: 
##  Elist() - to obtain the "Elist" of experiment instances 
##  pData() - for the primary/phenotype "DataFrame" 
##  sampleMap() - for the sample availability "DataFrame" 
##  metadata() - for the metadata object of 'ANY' class 
## See also: subsetByAssay(), subsetByRow(), subsetByColumn()
#look at Elist
Elist(y)
## "Elist" class object of length 3: 
##  [1] CNA: "RangedSummarizedExperiment" - 6 rows, 90 columns 
##  [2] Mutations: "RangedRaggedAssay" - 135 rows, 87 columns 
##  [3] mRNA: "ExpressionSet" - 0 rows, 91 columns
#look at the copy number object
Elist(y)[['CNA']]
## class: RangedSummarizedExperiment 
## dim: 6 90 
## metadata(0):
## assays(1): counts
## rownames(6): PTEN TP53 ... SMARCA4 SMARCA2
## rowData names(0):
## colnames(90): A549_LUNG CAL12T_LUNG ... SW900_LUNG VMRCLCD_LUNG
## colData names(0):
#use biobroom to turn into a tidy data frame
Elist(y)[['CNA']] %>% tidy()
## Source: local data frame [540 x 3]
## 
##       gene      sample   value
##      (chr)       (chr)   (dbl)
## 1     PTEN   A549_LUNG  0.1261
## 2     TP53   A549_LUNG  0.4549
## 3     BRAF   A549_LUNG  0.1121
## 4     KRAS   A549_LUNG  0.4009
## 5  SMARCA4   A549_LUNG -0.5486
## 6  SMARCA2   A549_LUNG  0.0908
## 7     PTEN CAL12T_LUNG  0.3261
## 8     TP53 CAL12T_LUNG -0.0048
## 9     BRAF CAL12T_LUNG -0.2783
## 10    KRAS CAL12T_LUNG -0.2664
## ..     ...         ...     ...
#same for mutations
Elist(y)[['Mutations']] %>% tidy()
## Source: local data frame [135 x 7]
## 
##        start       end width   names strand seqname        item
##        (int)     (int) (int)   (chr)  (chr)   (chr)       (chr)
## 1   25398285  25398285     1    KRAS      +   chr12   A549_LUNG
## 2   11121117  11121139    23 SMARCA4      +   chr19   A549_LUNG
## 3    7578526   7578526     1    TP53      +   chr17 CAL12T_LUNG
## 4   11144114  11144114     1 SMARCA4      +   chr19 CAL12T_LUNG
## 5  140481411 140481411     1    BRAF      +    chr7 CAL12T_LUNG
## 6   25398285  25398285     1    KRAS      +   chr12  CALU1_LUNG
## 7    7577570   7577570     1    TP53      +   chr17  CALU3_LUNG
## 8   25380277  25380278     2    KRAS      +   chr12  CALU6_LUNG
## 9    7578263   7578263     1    TP53      +   chr17  CALU6_LUNG
## 10  25398284  25398284     1    KRAS      +   chr12 CORL23_LUNG
## ..       ...       ...   ...     ...    ...     ...         ...
#rownames for expression data are probeids so we get an empty ExpressionSet here
Elist(y)[['mRNA']]
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 0 features, 91 samples 
##   element names: exprs 
## protocolData: none
## phenoData: none
## featureData
##   featureNames:
##   fvarLabels: annoteFeatures
##   fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation: