suppressMessages({
bmut = curatedTCGAData("BRCA", "Mutation", dry.run=FALSE, version="2.0.1")
})
sampleTables(bmut)## $`BRCA_Mutation-20160128`
##
## 01 06
## 988 5
bmut.srv <- TCGAprimaryTumors(bmut)## harmonizing input:
## removing 5 sampleMap rows with 'colname' not in colnames of experiments
sampleTables(bmut.srv)## $`BRCA_Mutation-20160128`
##
## 01
## 988
anyReplicated(bmut.srv)## BRCA_Mutation-20160128
## TRUE
## Obtain names of replicate samples by colData rowname
replicates <- Filter(length,
lapply(replicated(bmut.srv)[[1]], function(x) colnames(bmut.srv[[1]])[x])
)sum(sapply(replicated(bmut.srv), any))## [1] 9
## OR
length(replicates)## [1] 9
bmut.surv <- bmut.srv[,
list("BRCA_Mutation-20160128" =
!colnames(bmut.srv)[[1]] %in% unlist(sapply(replicates, tail, -1)))
]## harmonizing input:
## removing 11 sampleMap rows with 'colname' not in colnames of experiments
stopifnot(!anyReplicated(bmut.surv))table(mcols(bmut.surv[[1]])$Variant_Classification)##
## Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins
## 3162 2567 627 161
## Missense_Mutation Nonsense_Mutation Nonstop_Mutation RNA
## 55063 4841 133 4474
## Silent Splice_Site
## 17901 1561
## mcolsFilter (?)
ragex <- bmut.surv[[1]]
ragex <- ragex[mcols(bmut.surv[[1]])$Variant_Classification != "Silent", ]
table(mcols(ragex)$Variant_Classification)##
## Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins
## 3162 2567 627 161
## Missense_Mutation Nonsense_Mutation Nonstop_Mutation RNA
## 55063 4841 133 4474
## Splice_Site
## 1561
bmut[[1]] <- ragex## harmonizing input:
## removing 16 sampleMap rows with 'colname' not in colnames of experiments
colData(bmut.surv) <- colData(bmut)[!is.na(bmut$OS.Time), ]## harmonizing input:
## removing 184 sampleMap rows with 'primary' not in colData
osurv <- Surv(bmut.surv$OS.Time/365.25, bmut.surv$OS.event)system.time(
mutsyms <- as(
lapply(as(bmut.surv[[1L]], "GRangesList"), function(x) x$Hugo_Symbol),
"CharacterList")
)## user system elapsed
## 3.359 0.004 3.364
mutsyms## CharacterList of length 793
## [["TCGA-A1-A0SB-01A-11D-A142-09"]] ABLIM1 ADAMTS20 CADM2 ... ZNF574 ZNF777
## [["TCGA-A1-A0SD-01A-11D-A10Y-09"]] ANK3 CASK CNTFR ... ZFP91 ZNF544 ZNF740
## [["TCGA-A1-A0SE-01A-11D-A099-09"]] APLF ARRDC4 B3GNT1 ... WDR91 ZFHX4 ZNF541
## [["TCGA-A1-A0SF-01A-11D-A142-09"]] ACRBP ARL6IP6 BEST3 ... TPH1 UBE2QL1 ZNF91
## [["TCGA-A1-A0SG-01A-11D-A142-09"]] ARC BPIFB3 C2orf73 ... TMEM99 TYSND1 ZNF217
## [["TCGA-A1-A0SH-01A-11D-A099-09"]] ACSL4 AHCTF1 ALPK3 ... WDR87 ZFHX4 ZNF606
## [["TCGA-A1-A0SI-01A-11D-A142-09"]] ABAT ABCA8 ACAN ... ZNF673 ZNF701 ZNF790
## [["TCGA-A1-A0SJ-01A-11D-A099-09"]] ALG1 AMZ2 ASCL3 ... UBAP1L ZBTB11 ZNF543
## [["TCGA-A1-A0SK-01A-12D-A099-09"]] ABCA11P ACBD5 AHNAK ... ZNF66P ZNF717 ZNF777
## [["TCGA-A1-A0SM-01A-11D-A099-09"]] AHDC1 ATP1A4 CLEC12A ... ZAN ZNF229 ZNF804B
## ...
## <783 more elements>
system.time({
print(table(sapply(mutsyms, function(x) sum(duplicated(x)))))
})##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 14 15 16
## 354 184 80 45 24 18 7 5 6 1 2 1 4 1 1 2
## 17 20 21 22 23 24 25 26 27 28 29 30 31 32 33 37
## 2 2 2 2 1 2 2 1 1 2 2 2 1 2 3 1
## 39 48 49 51 52 57 64 66 70 72 73 79 83 90 91 95
## 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1
## 96 102 104 136 166 170 221 264 726 1009 1469
## 1 2 1 1 1 1 1 1 1 1 1
## user system elapsed
## 0.015 0.000 0.016
plot(survfit(osurv~1), main = "Overall BRCA Survival", xlab = "Years")## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
The display, based on 793 observations with non-missing TCGA OS.Time, is consistent with the display at the GDC Data Portal, based on 1077 cases.
gstrat <- function(sym="TTN", mutlist, survdat) {
stopifnot(inherits(survdat, "Surv"))
stopifnot(length(survdat) == length(mutlist))
hassym <- unlist(list(sym) %in% mutlist)
plot(survfit(survdat~hassym), main=sym, lty=1:2, xlab="Years")
}
gstrat("TTN", mutsyms, osurv)## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
gstrat("TP53", mutsyms, osurv)## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
commut <- tail(sort(table(unlist(mutsyms))), 30)
commut##
## MUC17 APOB XIRP2 AHNAK2 NEB OBSCN AHNAK MUC5B HMCN1 RYR3
## 43 45 45 46 47 47 48 48 50 50
## SYNE2 FCGBP USH2A DMD SPEN FLG CROCCP2 DST RYR2 CDH1
## 50 51 51 52 52 56 58 58 62 64
## SYNE1 MUC4 MLL3 MUC12 MAP3K1 GATA3 MUC16 TP53 TTN PIK3CA
## 65 69 75 78 91 97 134 272 300 312
hist(log(sapply(mutsyms,length)), main="Log mutation count per tumor")hasmut <- function(sym="TP53", mutlist) {
sapply(mutlist, function(x) sym %in% x)
}
table( hasmut("TP53", mutsyms), hasmut("PIK3CA", mutsyms) )##
## FALSE TRUE
## FALSE 341 198
## TRUE 192 62
common_pairs <- combn(names(commut),2)
common_pairs[,1:4]## [,1] [,2] [,3] [,4]
## [1,] "MUC17" "MUC17" "MUC17" "MUC17"
## [2,] "APOB" "XIRP2" "AHNAK2" "NEB"
indicate_pair <- function(sym1, sym2, mutlist)
hasmut(sym1, mutlist) & hasmut(sym2, mutlist)
chk <- apply(common_pairs,2,function(z) indicate_pair(z[1], z[2], mutsyms))
chkp.inds <- which(apply(chk,2,sum)>20) # disallow very rare combos
dim(chk)## [1] 793 435
dim(chk[,chkp.inds])## [1] 793 15
chisqs <- apply(chk[,chkp.inds],2,function(z)survdiff(osurv~z)$chisq)
hist(chisqs)cpr <- common_pairs[,chkp.inds][, which(chisqs>5)]
plot(survfit(osurv~chk[,chkp.inds[which(chisqs>5)]]), lty=1:2, main=paste(cpr, collapse=" & " ))## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
## Warning: partial match of 'std' to 'std.err'
dom <- assay(bmut.surv[[1L]], "domain_WU")
length(grep("Znf", na.omit(as.character(dom)))) # frequently noted## [1] 1857
register(MulticoreParam(parallel::detectCores() - 1L))
system.time(
mutdoms <- bplapply(
seq_len(ncol(dom)),
function(x) as.character(na.omit(dom[,x]))
)
)## user system elapsed
## 259.998 97.413 17.471
noz <- sapply(mutdoms, function(x) length(grep("Znf", x))==0)
table(noz)## noz
## FALSE TRUE
## 520 273
survdiff(osurv~noz)## Call:
## survdiff(formula = osurv ~ noz)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## noz=FALSE 520 51 50.1 0.0168 0.0395
## noz=TRUE 273 37 37.9 0.0221 0.0395
##
## Chisq= 0 on 1 degrees of freedom, p= 0.8
nosh3 <- sapply(mutdoms, function(x) length(grep("SH3", x))==0)
survdiff(osurv~nosh3)## Call:
## survdiff(formula = osurv ~ nosh3)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## nosh3=FALSE 109 8 8.32 0.01215 0.0136
## nosh3=TRUE 684 80 79.68 0.00127 0.0136
##
## Chisq= 0 on 1 degrees of freedom, p= 0.9