Contents

1 Enumerate mutations by symbol in BRCA, couple to overall survival

suppressMessages({
bmut = curatedTCGAData("BRCA", "Mutation", dry.run=FALSE, version="2.0.1")
})
sampleTables(bmut)
## $`BRCA_Mutation-20160128`
## 
##  01  06 
## 988   5

1.1 Filter only for primary solid tumors (code 01)

bmut.srv <- TCGAprimaryTumors(bmut)
## harmonizing input:
##   removing 5 sampleMap rows with 'colname' not in colnames of experiments
sampleTables(bmut.srv)
## $`BRCA_Mutation-20160128`
## 
##  01 
## 988

1.2 Check for replicates

anyReplicated(bmut.srv)
## BRCA_Mutation-20160128 
##                   TRUE
## Obtain names of replicate samples by colData rowname
replicates <- Filter(length,
    lapply(replicated(bmut.srv)[[1]], function(x) colnames(bmut.srv[[1]])[x])
)

1.3 Total number of patients with replicates

sum(sapply(replicated(bmut.srv), any))
## [1] 9
## OR
length(replicates)
## [1] 9

1.4 Remove replicate observations

bmut.surv <- bmut.srv[,
    list("BRCA_Mutation-20160128" =
        !colnames(bmut.srv)[[1]] %in% unlist(sapply(replicates, tail, -1)))
]
## harmonizing input:
##   removing 11 sampleMap rows with 'colname' not in colnames of experiments
stopifnot(!anyReplicated(bmut.surv))

1.5 See mutation classifications

table(mcols(bmut.surv[[1]])$Variant_Classification)
## 
##   Frame_Shift_Del   Frame_Shift_Ins      In_Frame_Del      In_Frame_Ins 
##              3162              2567               627               161 
## Missense_Mutation Nonsense_Mutation  Nonstop_Mutation               RNA 
##             55063              4841               133              4474 
##            Silent       Splice_Site 
##             17901              1561

1.6 Select only non-silent mutations

## mcolsFilter (?)
ragex <- bmut.surv[[1]]
ragex <- ragex[mcols(bmut.surv[[1]])$Variant_Classification != "Silent", ]
table(mcols(ragex)$Variant_Classification)
## 
##   Frame_Shift_Del   Frame_Shift_Ins      In_Frame_Del      In_Frame_Ins 
##              3162              2567               627               161 
## Missense_Mutation Nonsense_Mutation  Nonstop_Mutation               RNA 
##             55063              4841               133              4474 
##       Splice_Site 
##              1561
bmut[[1]] <- ragex
## harmonizing input:
##   removing 16 sampleMap rows with 'colname' not in colnames of experiments

1.7 Set up survival time

colData(bmut.surv) <- colData(bmut)[!is.na(bmut$OS.Time), ]
## harmonizing input:
##   removing 184 sampleMap rows with 'primary' not in colData
osurv <- Surv(bmut.surv$OS.Time/365.25, bmut.surv$OS.event)

1.8 Obtain Hugo_Symbols for each sample

system.time(
  mutsyms <- as(
    lapply(as(bmut.surv[[1L]], "GRangesList"), function(x) x$Hugo_Symbol),
  "CharacterList")
)
##    user  system elapsed 
##   3.359   0.004   3.364
mutsyms
## CharacterList of length 793
## [["TCGA-A1-A0SB-01A-11D-A142-09"]] ABLIM1 ADAMTS20 CADM2 ... ZNF574 ZNF777
## [["TCGA-A1-A0SD-01A-11D-A10Y-09"]] ANK3 CASK CNTFR ... ZFP91 ZNF544 ZNF740
## [["TCGA-A1-A0SE-01A-11D-A099-09"]] APLF ARRDC4 B3GNT1 ... WDR91 ZFHX4 ZNF541
## [["TCGA-A1-A0SF-01A-11D-A142-09"]] ACRBP ARL6IP6 BEST3 ... TPH1 UBE2QL1 ZNF91
## [["TCGA-A1-A0SG-01A-11D-A142-09"]] ARC BPIFB3 C2orf73 ... TMEM99 TYSND1 ZNF217
## [["TCGA-A1-A0SH-01A-11D-A099-09"]] ACSL4 AHCTF1 ALPK3 ... WDR87 ZFHX4 ZNF606
## [["TCGA-A1-A0SI-01A-11D-A142-09"]] ABAT ABCA8 ACAN ... ZNF673 ZNF701 ZNF790
## [["TCGA-A1-A0SJ-01A-11D-A099-09"]] ALG1 AMZ2 ASCL3 ... UBAP1L ZBTB11 ZNF543
## [["TCGA-A1-A0SK-01A-12D-A099-09"]] ABCA11P ACBD5 AHNAK ... ZNF66P ZNF717 ZNF777
## [["TCGA-A1-A0SM-01A-11D-A099-09"]] AHDC1 ATP1A4 CLEC12A ... ZAN ZNF229 ZNF804B
## ...
## <783 more elements>

1.9 Sample mutations tally

system.time({
  print(table(sapply(mutsyms, function(x) sum(duplicated(x)))))
})
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   14   15   16 
##  354  184   80   45   24   18    7    5    6    1    2    1    4    1    1    2 
##   17   20   21   22   23   24   25   26   27   28   29   30   31   32   33   37 
##    2    2    2    2    1    2    2    1    1    2    2    2    1    2    3    1 
##   39   48   49   51   52   57   64   66   70   72   73   79   83   90   91   95 
##    1    1    1    2    1    2    1    1    1    1    1    1    1    1    1    1 
##   96  102  104  136  166  170  221  264  726 1009 1469 
##    1    2    1    1    1    1    1    1    1    1    1
##    user  system elapsed 
##   0.015   0.000   0.016

1.10 Plot overall survival curve

plot(survfit(osurv~1), main = "Overall BRCA Survival", xlab = "Years")
## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

1.11 Compare to GDC Data Portal plot

The display, based on 793 observations with non-missing TCGA OS.Time, is consistent with the display at the GDC Data Portal, based on 1077 cases.

2 Obtain gene-stratified survival

gstrat <- function(sym="TTN", mutlist, survdat) {
  stopifnot(inherits(survdat, "Surv"))
  stopifnot(length(survdat) == length(mutlist))
  hassym <- unlist(list(sym) %in% mutlist)
  plot(survfit(survdat~hassym), main=sym, lty=1:2, xlab="Years")
}
gstrat("TTN", mutsyms, osurv)
## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

gstrat("TP53", mutsyms, osurv)
## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

2.1 Frequencies

commut <- tail(sort(table(unlist(mutsyms))), 30)
commut
## 
##   MUC17    APOB   XIRP2  AHNAK2     NEB   OBSCN   AHNAK   MUC5B   HMCN1    RYR3 
##      43      45      45      46      47      47      48      48      50      50 
##   SYNE2   FCGBP   USH2A     DMD    SPEN     FLG CROCCP2     DST    RYR2    CDH1 
##      50      51      51      52      52      56      58      58      62      64 
##   SYNE1    MUC4    MLL3   MUC12  MAP3K1   GATA3   MUC16    TP53     TTN  PIK3CA 
##      65      69      75      78      91      97     134     272     300     312

2.2 Log mutation count per tumor

hist(log(sapply(mutsyms,length)), main="Log mutation count per tumor")

3 Combinations of mutations

hasmut <- function(sym="TP53", mutlist) {
 sapply(mutlist, function(x) sym %in% x)
}
table( hasmut("TP53", mutsyms), hasmut("PIK3CA", mutsyms) )
##        
##         FALSE TRUE
##   FALSE   341  198
##   TRUE    192   62

3.1 Assemble pairs

common_pairs <- combn(names(commut),2)
common_pairs[,1:4]
##      [,1]    [,2]    [,3]     [,4]   
## [1,] "MUC17" "MUC17" "MUC17"  "MUC17"
## [2,] "APOB"  "XIRP2" "AHNAK2" "NEB"

3.2 Greedy search for deleterious pairs

indicate_pair <- function(sym1, sym2, mutlist)
  hasmut(sym1, mutlist) & hasmut(sym2, mutlist)
chk <- apply(common_pairs,2,function(z) indicate_pair(z[1], z[2], mutsyms))
chkp.inds <- which(apply(chk,2,sum)>20) # disallow very rare combos
dim(chk)
## [1] 793 435
dim(chk[,chkp.inds])
## [1] 793  15
chisqs <- apply(chk[,chkp.inds],2,function(z)survdiff(osurv~z)$chisq)
hist(chisqs)

cpr <- common_pairs[,chkp.inds][, which(chisqs>5)]
plot(survfit(osurv~chk[,chkp.inds[which(chisqs>5)]]), lty=1:2, main=paste(cpr, collapse=" & " ))
## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

## Warning: partial match of 'std' to 'std.err'

4 Using domain classification of mutations

dom <- assay(bmut.surv[[1L]], "domain_WU")
length(grep("Znf", na.omit(as.character(dom)))) # frequently noted
## [1] 1857
register(MulticoreParam(parallel::detectCores() - 1L))
system.time(
    mutdoms <- bplapply(
        seq_len(ncol(dom)),
        function(x) as.character(na.omit(dom[,x]))
    )
)
##    user  system elapsed 
## 259.998  97.413  17.471

4.1 Isolate individuals with mutations

4.1.1 Znf domain

noz <- sapply(mutdoms, function(x) length(grep("Znf", x))==0)
table(noz)
## noz
## FALSE  TRUE 
##   520   273
survdiff(osurv~noz)
## Call:
## survdiff(formula = osurv ~ noz)
## 
##             N Observed Expected (O-E)^2/E (O-E)^2/V
## noz=FALSE 520       51     50.1    0.0168    0.0395
## noz=TRUE  273       37     37.9    0.0221    0.0395
## 
##  Chisq= 0  on 1 degrees of freedom, p= 0.8

4.1.2 SH3

nosh3 <- sapply(mutdoms, function(x) length(grep("SH3", x))==0)
survdiff(osurv~nosh3)
## Call:
## survdiff(formula = osurv ~ nosh3)
## 
##               N Observed Expected (O-E)^2/E (O-E)^2/V
## nosh3=FALSE 109        8     8.32   0.01215    0.0136
## nosh3=TRUE  684       80    79.68   0.00127    0.0136
## 
##  Chisq= 0  on 1 degrees of freedom, p= 0.9