movics

library(MOVICS)

Registered S3 methods overwritten by 'ggpp':
  method                  from   
  heightDetails.titleGrob ggplot2
  widthDetails.titleGrob  ggplot2

load(system.file("extdata", "brca.tcga.RData", package = "MOVICS", mustWork = TRUE))
load(system.file("extdata", "brca.yau.RData",  package = "MOVICS", mustWork = TRUE))

names(brca.tcga)

[1] "mRNA.expr"   "lncRNA.expr" "meth.beta"   "mut.status"  "count"      
[6] "fpkm"        "maf"         "segment"     "clin.info"

names(brca.yau)

[1] "mRNA.expr" "clin.info"

# extract multi-omics data
mo.data   <- brca.tcga[1:4]

## Extracting data for downstream analysis
count     <- brca.tcga$count
fpkm      <- brca.tcga$fpkm
maf       <- brca.tcga$maf
segment   <- brca.tcga$segment
surv.info <- brca.tcga$clin.info

getElites() function to filter out features that meet some stringent requirements, and those features that are preserved in this procedure are considered elites by MOVICS. Five filtering methods are provided here, namely mad for median absolute deviation, sd for standard deviation, pca for principal components analysis, cox for univariate Cox proportional hazards regression, and freq for binary omics data. This function also handles missing values coded in NA
 by removing them directly or imputing them by k
 nearest neighbors using a Euclidean metric through argument of na.action

# ident optimal clustering number 
optk.brca <- getClustNum(data        = mo.data,
                         is.binary   = c(F,F,F,T),
                         try.N.clust = 2:8, # trying cluster number from 2 to 8
                         fig.name    = "CLUSTER NUMBER OF TCGA-BRCA")

calculating Cluster Prediction Index...

5% complete
5% complete

10% complete
10% complete

15% complete
15% complete

20% complete

25% complete
25% complete

30% complete
30% complete

35% complete
35% complete

40% complete

45% complete
45% complete

50% complete
50% complete

55% complete
55% complete

60% complete

65% complete
65% complete

70% complete
70% complete

75% complete
75% complete

80% complete

85% complete
85% complete

90% complete
90% complete

95% complete
95% complete

100% complete

calculating Gap-statistics...

visualization done...

--the imputed optimal cluster number is 3 arbitrarily, but it would be better referring to other priori knowledge.

#  iClusterBayes 
iClusterBayes.res <- getiClusterBayes(data        = mo.data,
                                      N.clust     = 5,
                                      type        = c("gaussian","gaussian","gaussian","binomial"),
                                      n.burnin    = 1800,
                                      n.draw      = 1200,
                                      prior.gamma = c(0.5, 0.5, 0.5, 0.5),
                                      sdev        = 0.05,
                                      thin        = 3)

clustering done...

feature selection done...

# convert beta value to M value for stronger signal
indata <- mo.data
indata$meth.beta <- log2(indata$meth.beta / (1 - indata$meth.beta))

# data normalization for heatmap
plotdata <- getStdiz(data       = indata,
                     halfwidth  = c(2,2,2,NA), # no truncation for mutation
                     centerFlag = c(T,T,T,F), # no center for mutation
                     scaleFlag  = c(T,T,T,F)) # no scale for mutation

feat   <- iClusterBayes.res$feat.res
feat1  <- feat[which(feat$dataset == "mRNA.expr"),][1:10,"feature"] 
feat2  <- feat[which(feat$dataset == "lncRNA.expr"),][1:10,"feature"]
feat3  <- feat[which(feat$dataset == "meth.beta"),][1:10,"feature"]
feat4  <- feat[which(feat$dataset == "mut.status"),][1:10,"feature"]
annRow <- list(feat1, feat2, feat3, feat4)

mRNA.col   <- c("#00FF00", "#008000", "#000000", "#800000", "#FF0000")
lncRNA.col <- c("#6699CC", "white"  , "#FF3C38")
meth.col   <- c("#0074FE", "#96EBF9", "#FEE900", "#F00003")
mut.col    <- c("grey90" , "black")
col.list   <- list(mRNA.col, lncRNA.col, meth.col, mut.col)

# comprehensive heatmap (
getMoHeatmap(data          = plotdata,
             row.title     = c("mRNA","lncRNA","Methylation","Mutation"),
             is.binary     = c(F,F,F,T), 
             legend.name   = c("mRNA.FPKM","lncRNA.FPKM","M value","Mutated"),
             clust.res     = iClusterBayes.res$clust.res, 
             clust.dend    = NULL, # no dendrogram
             show.rownames = c(F,F,F,F),
             show.colnames = FALSE, 
             annRow        = annRow, 
             color         = col.list,
             annCol        = NULL,
             annColors     = NULL,
             width         = 10, 
             height        = 5, 
             fig.name      = "COMPREHENSIVE HEATMAP OF ICLUSTERBAYES")

After identification of cancer subtypes, it is essential to further characterize each subtype by discovering their difference from multiple aspects. To this end, MOVICS provides commonly used downstream analyses in cancer subtyping researches for easily cohesion with results derived from GET Module.

# survival comparison
surv.brca <- compSurv(moic.res         = iClusterBayes.res,
                      surv.info        = surv.info,
                      convt.time       = "m", #m for month as unit
                      surv.median.line = "h", 
                      xyrs.est         = c(5,10), # 5 and 10-year survival
                      fig.name         = "KAPLAN-MEIER CURVE OF CONSENSUSMOIC")

--a total of 643 samples are identified.

--removed missing values.

--leaving 642 observations.

Warning in geom_segment(aes(x = 0, y = max(y2), xend = max(x1), yend = max(y2)), : All aesthetics have length 1, but the data has 4 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
All aesthetics have length 1, but the data has 4 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
All aesthetics have length 1, but the data has 4 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
All aesthetics have length 1, but the data has 4 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
All aesthetics have length 1, but the data has 4 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
All aesthetics have length 1, but the data has 4 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.

print(surv.brca)

$fitd
Call:
survdiff(formula = Surv(futime, fustat) ~ Subtype, data = mosurv.res, 
    na.action = na.exclude)

              N Observed Expected (O-E)^2/E (O-E)^2/V
Subtype=CS1 113       16    14.65     0.124     0.156
Subtype=CS2  69       13     5.61     9.755    10.786
Subtype=CS3 205       23    25.50     0.246     0.376
Subtype=CS4 148        7    18.95     7.536    10.163
Subtype=CS5 107       16    10.29     3.169     3.689

 Chisq= 21.2  on 4 degrees of freedom, p= 3e-04 

$fit
Call: survfit(formula = Surv(futime, fustat) ~ Subtype, data = mosurv.res, 
    na.action = na.exclude, error = "greenwood", type = "kaplan-meier", 
    conf.type = "plain")

      n events median 0.95LCL 0.95UCL
CS1 113     16     NA    97.2      NA
CS2  69     13   71.9    49.4      NA
CS3 205     23  122.6    94.0      NA
CS4 148      7  216.2   113.5      NA
CS5 107     16  102.5    68.8      NA

$xyrs.est
Call: survfit(formula = Surv(futime, fustat) ~ Subtype, data = mosurv.res)

                Subtype=CS1 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
 1825     25      12    0.823  0.0497        0.731        0.926
 3650      3       4    0.504  0.1601        0.270        0.939

                Subtype=CS2 
        time       n.risk      n.event     survival      std.err lower 95% CI 
    1825.000        8.000       12.000        0.563        0.107        0.387 
upper 95% CI 
       0.818 

                Subtype=CS3 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
 1825     42      12    0.837  0.0464        0.751        0.933
 3650      8       8    0.558  0.0926        0.403        0.772

                Subtype=CS4 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
 1825     27       4    0.924  0.0403        0.848            1
 3650      4       2    0.616  0.1799        0.348            1

                Subtype=CS5 
 time n.risk n.event survival std.err lower 95% CI upper 95% CI
 1825     17      10    0.772  0.0708        0.645        0.924
 3650      3       6    0.401  0.1256        0.217        0.741


$overall.p
[1] 0.00029119

$pairwise.p

    Pairwise comparisons using Log-Rank test 

data:  mosurv.res and Subtype 

    CS1    CS2     CS3    CS4   
CS2 0.1538 -       -      -     
CS3 0.6218 0.0079  -      -     
CS4 0.0122 3.9e-05 0.0830 -     
CS5 0.4563 0.4904  0.1538 0.0019

P value adjustment method: BH

## Comparing clin. features

clin.brca <- compClinvar(moic.res      = iClusterBayes.res,
                         var2comp      = surv.info, 
                         strata        = "Subtype",
                         factorVars    = c("PAM50","pstage","fustat"), 
                         nonnormalVars = "futime",
                         exactVars     = "pstage",
                         doWord        = TRUE, 
                         tab.name      = "SUMMARIZATION OF CLINICAL FEATURES")

--all samples matched.

Registered S3 methods overwritten by 'proxy':
  method               from    
  print.registry_field registry
  print.registry_entry registry

print(clin.brca$compTab)

                          level                      CS1
1                      n                             113
2             fustat (%)      0               97 (85.8) 
3                             1               16 (14.2) 
4  futime (median [IQR])        785.00 [470.00, 1605.00]
5              PAM50 (%)  Basal              107 (94.7) 
6                          Her2                1 ( 0.9) 
7                          LumA                1 ( 0.9) 
8                          LumB                0 ( 0.0) 
9                        Normal                4 ( 3.5) 
10            pstage (%)     T1               24 (21.2) 
11                           T2               72 (63.7) 
12                           T3               13 (11.5) 
13                           T4                3 ( 2.7) 
14                           TX                1 ( 0.9) 
15                  age                    55.73 ± 12.14
                        CS2                      CS3                      CS4
1                        69                      205                      148
2                56 (81.2)               182 (88.8)               141 (95.3) 
3                13 (18.8)                23 (11.2)                 7 ( 4.7) 
4  738.00 [433.00, 1272.00] 746.00 [435.00, 1471.00] 883.00 [477.50, 1587.75]
5                 2 ( 2.9)                 2 ( 1.0)                 0 ( 0.0) 
6                33 (47.8)                 2 ( 1.0)                 0 ( 0.0) 
7                14 (20.3)               121 (59.0)               132 (89.2) 
8                13 (18.8)                80 (39.0)                 1 ( 0.7) 
9                 7 (10.1)                 0 ( 0.0)                15 (10.1) 
10               21 (30.4)                59 (28.8)                40 (27.0) 
11               38 (55.1)               121 (59.0)                76 (51.4) 
12                5 ( 7.2)                20 ( 9.8)                32 (21.6) 
13                5 ( 7.2)                 5 ( 2.4)                 0 ( 0.0) 
14                0 ( 0.0)                 0 ( 0.0)                 0 ( 0.0) 
15            58.59 ± 13.19            56.99 ± 14.04            58.13 ± 11.60
                        CS5      p    test
1                       108               
2                91 (84.3)   0.013        
3                17 (15.7)                
4  672.00 [391.00, 1153.00]  0.219 nonnorm
5                 0 ( 0.0)  <0.001        
6                 2 ( 1.9)                
7                76 (70.4)                
8                30 (27.8)                
9                 0 ( 0.0)                
10               28 (25.9)   0.007   exact
11               60 (55.6)                
12               14 (13.0)                
13                6 ( 5.6)                
14                0 ( 0.0)                
15            62.60 ± 12.70  0.001

# drug sensitivity comparison
drug.brca <- compDrugsen(moic.res    = iClusterBayes.res,
                         norm.expr   = fpkm[,iClusterBayes.res$clust.res$samID], 
                         drugs       = c("Cisplatin", "Paclitaxel"), 
                         tissueType  = "breast", 
                         test.method = "nonparametric", # statistical testing 
                         prefix      = "BOXVIOLIN OF ESTIMATED IC50")

--all samples matched.

--log2 transformation done for expression data.

Cisplatin done...

Cisplatin: Kruskal-Wallis rank sum test p value = 1.34e-57
post-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:
    CS1        CS2        CS3        CS4       
CS2 "3.86e-02" " NA"      " NA"      " NA"     
CS3 "2.67e-35" "1.03e-21" " NA"      " NA"     
CS4 "7.31e-18" "6.89e-09" "2.02e-12" " NA"     
CS5 "1.08e-28" "1.45e-19" "5.45e-01" "3.87e-11"

Paclitaxel done...

Paclitaxel: Kruskal-Wallis rank sum test p value = 4.48e-16
post-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:
    CS1        CS2        CS3        CS4       
CS2 "3.29e-08" " NA"      " NA"      " NA"     
CS3 "2.14e-01" "2.72e-13" " NA"      " NA"     
CS4 "2.30e-03" "3.92e-15" "1.64e-02" " NA"     
CS5 "4.87e-01" "1.18e-09" "6.70e-01" "2.14e-02"