library(Biobase)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(GEOquery)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
#Author: Abigail E. Moore

#This code reproduces the CIT validation dataset info from Table 1 of the following source:
#Marisa, L., Reyniès, A. D., Duval, A., Selves, J., Gaub, M. P., Vescovo, L., . . . Boige, V. (2013). 
#Gene Expression Classification of Colon Cancer into Molecular Subtypes: Characterization, Validation, and Prognostic Value. 
#PLoS Medicine, 10(5). doi:10.1371/journal.pmed.1001453

#GSM 972400:972522 are the validation dataset


###Loop through .soft files to get the necessary data for this analysis into a dataframe

CITValidationDF = NULL

for (i in 972400:972522){
  GSMName <- paste("GSM", i, sep = "") 
  GSM <- getGEO(GSMName, destdir=".")
  CITValidationDF = rbind(CITValidationDF, data.frame(Meta(GSM)$characteristics_ch1[1], Meta(GSM)$characteristics_ch1[2],
                                              Meta(GSM)$characteristics_ch1[3], Meta(GSM)$characteristics_ch1[4],
                                              Meta(GSM)$characteristics_ch1[8], Meta(GSM)$characteristics_ch1[9],
                                              Meta(GSM)$characteristics_ch1[11], Meta(GSM)$characteristics_ch1[14],
                                              Meta(GSM)$characteristics_ch1[15], Meta(GSM)$characteristics_ch1[16],
                                              Meta(GSM)$characteristics_ch1[18], Meta(GSM)$characteristics_ch1[22],
                                              Meta(GSM)$characteristics_ch1[26]))
}
## Using locally cached version of GSM972400 found here:
## ./GSM972400.soft
## Using locally cached version of GSM972401 found here:
## ./GSM972401.soft
## Using locally cached version of GSM972402 found here:
## ./GSM972402.soft
## Using locally cached version of GSM972403 found here:
## ./GSM972403.soft
## Using locally cached version of GSM972404 found here:
## ./GSM972404.soft
## Using locally cached version of GSM972405 found here:
## ./GSM972405.soft
## Using locally cached version of GSM972406 found here:
## ./GSM972406.soft
## Using locally cached version of GSM972407 found here:
## ./GSM972407.soft
## Using locally cached version of GSM972408 found here:
## ./GSM972408.soft
## Using locally cached version of GSM972409 found here:
## ./GSM972409.soft
## Using locally cached version of GSM972410 found here:
## ./GSM972410.soft
## Using locally cached version of GSM972411 found here:
## ./GSM972411.soft
## Using locally cached version of GSM972412 found here:
## ./GSM972412.soft
## Using locally cached version of GSM972413 found here:
## ./GSM972413.soft
## Using locally cached version of GSM972414 found here:
## ./GSM972414.soft
## Using locally cached version of GSM972415 found here:
## ./GSM972415.soft
## Using locally cached version of GSM972416 found here:
## ./GSM972416.soft
## Using locally cached version of GSM972417 found here:
## ./GSM972417.soft
## Using locally cached version of GSM972418 found here:
## ./GSM972418.soft
## Using locally cached version of GSM972419 found here:
## ./GSM972419.soft
## Using locally cached version of GSM972420 found here:
## ./GSM972420.soft
## Using locally cached version of GSM972421 found here:
## ./GSM972421.soft
## Using locally cached version of GSM972422 found here:
## ./GSM972422.soft
## Using locally cached version of GSM972423 found here:
## ./GSM972423.soft
## Using locally cached version of GSM972424 found here:
## ./GSM972424.soft
## Using locally cached version of GSM972425 found here:
## ./GSM972425.soft
## Using locally cached version of GSM972426 found here:
## ./GSM972426.soft
## Using locally cached version of GSM972427 found here:
## ./GSM972427.soft
## Using locally cached version of GSM972428 found here:
## ./GSM972428.soft
## Using locally cached version of GSM972429 found here:
## ./GSM972429.soft
## Using locally cached version of GSM972430 found here:
## ./GSM972430.soft
## Using locally cached version of GSM972431 found here:
## ./GSM972431.soft
## Using locally cached version of GSM972432 found here:
## ./GSM972432.soft
## Using locally cached version of GSM972433 found here:
## ./GSM972433.soft
## Using locally cached version of GSM972434 found here:
## ./GSM972434.soft
## Using locally cached version of GSM972435 found here:
## ./GSM972435.soft
## Using locally cached version of GSM972436 found here:
## ./GSM972436.soft
## Using locally cached version of GSM972437 found here:
## ./GSM972437.soft
## Using locally cached version of GSM972438 found here:
## ./GSM972438.soft
## Using locally cached version of GSM972439 found here:
## ./GSM972439.soft
## Using locally cached version of GSM972440 found here:
## ./GSM972440.soft
## Using locally cached version of GSM972441 found here:
## ./GSM972441.soft
## Using locally cached version of GSM972442 found here:
## ./GSM972442.soft
## Using locally cached version of GSM972443 found here:
## ./GSM972443.soft
## Using locally cached version of GSM972444 found here:
## ./GSM972444.soft
## Using locally cached version of GSM972445 found here:
## ./GSM972445.soft
## Using locally cached version of GSM972446 found here:
## ./GSM972446.soft
## Using locally cached version of GSM972447 found here:
## ./GSM972447.soft
## Using locally cached version of GSM972448 found here:
## ./GSM972448.soft
## Using locally cached version of GSM972449 found here:
## ./GSM972449.soft
## Using locally cached version of GSM972450 found here:
## ./GSM972450.soft
## Using locally cached version of GSM972451 found here:
## ./GSM972451.soft
## Using locally cached version of GSM972452 found here:
## ./GSM972452.soft
## Using locally cached version of GSM972453 found here:
## ./GSM972453.soft
## Using locally cached version of GSM972454 found here:
## ./GSM972454.soft
## Using locally cached version of GSM972455 found here:
## ./GSM972455.soft
## Using locally cached version of GSM972456 found here:
## ./GSM972456.soft
## Using locally cached version of GSM972457 found here:
## ./GSM972457.soft
## Using locally cached version of GSM972458 found here:
## ./GSM972458.soft
## Using locally cached version of GSM972459 found here:
## ./GSM972459.soft
## Using locally cached version of GSM972460 found here:
## ./GSM972460.soft
## Using locally cached version of GSM972461 found here:
## ./GSM972461.soft
## Using locally cached version of GSM972462 found here:
## ./GSM972462.soft
## Using locally cached version of GSM972463 found here:
## ./GSM972463.soft
## Using locally cached version of GSM972464 found here:
## ./GSM972464.soft
## Using locally cached version of GSM972465 found here:
## ./GSM972465.soft
## Using locally cached version of GSM972466 found here:
## ./GSM972466.soft
## Using locally cached version of GSM972467 found here:
## ./GSM972467.soft
## Using locally cached version of GSM972468 found here:
## ./GSM972468.soft
## Using locally cached version of GSM972469 found here:
## ./GSM972469.soft
## Using locally cached version of GSM972470 found here:
## ./GSM972470.soft
## Using locally cached version of GSM972471 found here:
## ./GSM972471.soft
## Using locally cached version of GSM972472 found here:
## ./GSM972472.soft
## Using locally cached version of GSM972473 found here:
## ./GSM972473.soft
## Using locally cached version of GSM972474 found here:
## ./GSM972474.soft
## Using locally cached version of GSM972475 found here:
## ./GSM972475.soft
## Using locally cached version of GSM972476 found here:
## ./GSM972476.soft
## Using locally cached version of GSM972477 found here:
## ./GSM972477.soft
## Using locally cached version of GSM972478 found here:
## ./GSM972478.soft
## Using locally cached version of GSM972479 found here:
## ./GSM972479.soft
## Using locally cached version of GSM972480 found here:
## ./GSM972480.soft
## Using locally cached version of GSM972481 found here:
## ./GSM972481.soft
## Using locally cached version of GSM972482 found here:
## ./GSM972482.soft
## Using locally cached version of GSM972483 found here:
## ./GSM972483.soft
## Using locally cached version of GSM972484 found here:
## ./GSM972484.soft
## Using locally cached version of GSM972485 found here:
## ./GSM972485.soft
## Using locally cached version of GSM972486 found here:
## ./GSM972486.soft
## Using locally cached version of GSM972487 found here:
## ./GSM972487.soft
## Using locally cached version of GSM972488 found here:
## ./GSM972488.soft
## Using locally cached version of GSM972489 found here:
## ./GSM972489.soft
## Using locally cached version of GSM972490 found here:
## ./GSM972490.soft
## Using locally cached version of GSM972491 found here:
## ./GSM972491.soft
## Using locally cached version of GSM972492 found here:
## ./GSM972492.soft
## Using locally cached version of GSM972493 found here:
## ./GSM972493.soft
## Using locally cached version of GSM972494 found here:
## ./GSM972494.soft
## Using locally cached version of GSM972495 found here:
## ./GSM972495.soft
## Using locally cached version of GSM972496 found here:
## ./GSM972496.soft
## Using locally cached version of GSM972497 found here:
## ./GSM972497.soft
## Using locally cached version of GSM972498 found here:
## ./GSM972498.soft
## Using locally cached version of GSM972499 found here:
## ./GSM972499.soft
## Using locally cached version of GSM972500 found here:
## ./GSM972500.soft
## Using locally cached version of GSM972501 found here:
## ./GSM972501.soft
## Using locally cached version of GSM972502 found here:
## ./GSM972502.soft
## Using locally cached version of GSM972503 found here:
## ./GSM972503.soft
## Using locally cached version of GSM972504 found here:
## ./GSM972504.soft
## Using locally cached version of GSM972505 found here:
## ./GSM972505.soft
## Using locally cached version of GSM972506 found here:
## ./GSM972506.soft
## Using locally cached version of GSM972507 found here:
## ./GSM972507.soft
## Using locally cached version of GSM972508 found here:
## ./GSM972508.soft
## Using locally cached version of GSM972509 found here:
## ./GSM972509.soft
## Using locally cached version of GSM972510 found here:
## ./GSM972510.soft
## Using locally cached version of GSM972511 found here:
## ./GSM972511.soft
## Using locally cached version of GSM972512 found here:
## ./GSM972512.soft
## Using locally cached version of GSM972513 found here:
## ./GSM972513.soft
## Using locally cached version of GSM972514 found here:
## ./GSM972514.soft
## Using locally cached version of GSM972515 found here:
## ./GSM972515.soft
## Using locally cached version of GSM972516 found here:
## ./GSM972516.soft
## Using locally cached version of GSM972517 found here:
## ./GSM972517.soft
## Using locally cached version of GSM972518 found here:
## ./GSM972518.soft
## Using locally cached version of GSM972519 found here:
## ./GSM972519.soft
## Using locally cached version of GSM972520 found here:
## ./GSM972520.soft
## Using locally cached version of GSM972521 found here:
## ./GSM972521.soft
## Using locally cached version of GSM972522 found here:
## ./GSM972522.soft
##Now, cleanup the age column b/c it's entered as a string and we'll need numbers

#Remove "dataset: validation"
AgeVecTrun <- gsub("^.*?age.at.diagnosis: ","",CITValidationDF$Meta.GSM..characteristics_ch1.3.)

#Remove quotes
AgeVecnoquote <- noquote(AgeVecTrun)

#Consider as numeric
AgeVecNumeric <- as.numeric(unlist(AgeVecnoquote))

#Ignore NAs
AgeVecNumeric <- AgeVecNumeric[!is.na(AgeVecNumeric)]


###Now, cleanup OS.delay column
OSVecTrun <- gsub("^.*?os.delay: ","",CITValidationDF$Meta.GSM..characteristics_ch1.14.)

#Remove quotes
OSVecnoquote <- noquote(OSVecTrun)

#Consider as numeric
OSVecNumeric <- as.numeric(unlist(OSVecnoquote))

#Ignore NAs
OSVecNumeric <- OSVecNumeric[!is.na(OSVecNumeric)]
###Find avg, stdev, and range for age in years
round(mean(AgeVecNumeric),0)
## [1] 68
round(sd(AgeVecNumeric),0)
## [1] 12
round(range(AgeVecNumeric),0)
## [1] 42 90
###Find # of M/F and %

#Male #
sum(CITValidationDF$Meta.GSM..characteristics_ch1.2. == "Sex: M", na.rm=TRUE)
## [1] 73
#Female #
sum(CITValidationDF$Meta.GSM..characteristics_ch1.2. == "Sex: F", na.rm=TRUE)
## [1] 50
#Male %
round(((sum(CITValidationDF$Meta.GSM..characteristics_ch1.2. == "Sex: M", na.rm=TRUE)/123)*100),0)
## [1] 59
#Female %
round(((sum(CITValidationDF$Meta.GSM..characteristics_ch1.2. == "Sex: F", na.rm=TRUE)/123)*100),0)
## [1] 41
###Find # and % of stage I, II, III, IV individuals

#Stage 1 #, note that the authors include the four stage 0 values, so there are
#actually only 24 instances of stage I
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 0", na.rm=TRUE) +
        sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 1", na.rm=TRUE),0)
## [1] 10
#Stage 1 %
round((((sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 0", na.rm=TRUE) +
           sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 1", na.rm=TRUE))/ 123)*100),0)
## [1] 8
#Stage 2 #
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE),0)
## [1] 66
#Stage 2 %
round(((sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE)/123)*100),0)
## [1] 54
#Stage 3 #
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)
## [1] 41
#Stage 3 %
round(((sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE)/123)*100),0)
## [1] 33
#Stage 4 #
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 4", na.rm=TRUE),0)
## [1] 6
#Stage 4 %
round(((sum(CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 4", na.rm=TRUE)/123)*100),0)
## [1] 5
###Find # located at proximal or distal & %

#Proximal #
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.8. == "tumor.location: proximal", na.rm=TRUE),0)
## [1] 48
#Proximal %
round(((sum(CITValidationDF$Meta.GSM..characteristics_ch1.8. == "tumor.location: proximal", na.rm=TRUE)/123)*100),0)
## [1] 39
#Distal #
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.8. == "tumor.location: distal", na.rm=TRUE),0)
## [1] 75
#Distal %
round(((sum(CITValidationDF$Meta.GSM..characteristics_ch1.8. == "tumor.location: distal", na.rm=TRUE)/123)*100),0)
## [1] 61
###Find # and % w/ adjuvant chemo

#Adjuvant chemo # Y
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: Y" &
            CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
        sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: Y" &
              CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)
## [1] 42
#Adjuvant chemo % Y
ceiling((round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: Y" &
                     CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
                 sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: Y" &
                       CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)/107)*100)
## [1] 40
#Adjuvant chemo # N
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: N" &
            CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
        sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: N" &
              CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)
## [1] 64
#Adjuvant chemo % N
round((round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: N" &
                   CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
               sum(CITValidationDF$Meta.GSM..characteristics_ch1.9. == "chemotherapy.adjuvant: N" &
                     CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)/107)*100,0)
## [1] 60
###Find median follow-up with stdev and range in months
#Note: I'm not sure why my median is 59 and the authors' is 58; all my other figs. match the authors'
median(OSVecNumeric)
## [1] 59
round(sd(OSVecNumeric),0)
## [1] 37
range(OSVecNumeric)
## [1]   0 146
###Find relapse # & %
###Note: the authors provide data for distant/locorgional/both in the paper,
###but I don't see this info in the metadata for me to evaluate
#No. Y
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 1" &
            CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
        sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 1" &
              CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)
## [1] 30
#% Y
#Note: the authors report 29 here, but I'm getting 28
round((round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 1" &
                   CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
               sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 1" &
                     CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)/107)*100,0)
## [1] 28
#No. N
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 0" &
            CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
        sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 0" &
              CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)
## [1] 72
#% N
#I'm not sure why the authors round all the way up to 71 other than to make 100%
#The acutal value that I get here is 67%
round((round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 0" &
                     CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
                 sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: 0" &
                       CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)/107)*100)
## [1] 67
#No. NA
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: NA" &
            CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 2", na.rm=TRUE) +
        sum(CITValidationDF$Meta.GSM..characteristics_ch1.11. == "rfs.event: NA" &
              CITValidationDF$Meta.GSM..characteristics_ch1.4. == "tnm.stage: 3", na.rm=TRUE),0)
## [1] 5
###Find # and % dMMR

#No. dMMR
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.15. == "mmr.status: dMMR", na.rm=TRUE),0)
## [1] 14
#% dMMR
#First, subtract NAs from total sample size
123-round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.15. == "mmr.status: NA", na.rm=TRUE),0)
## [1] 110
#Now find %
round((sum(CITValidationDF$Meta.GSM..characteristics_ch1.15. == "mmr.status: dMMR", na.rm=TRUE))/
        (123-sum(CITValidationDF$Meta.GSM..characteristics_ch1.15. == "mmr.status: NA", na.rm=TRUE))*100,0)
## [1] 13
###Find # and % CIMP+
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.16. == "cimp.status: +", na.rm=TRUE),0)
## [1] 17
#% CIMP+
#First, subtract NAs from total sample size
123-round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.16. == "cimp.status: NA", na.rm=TRUE),0)
## [1] 116
#Now find %
round((sum(CITValidationDF$Meta.GSM..characteristics_ch1.16. == "cimp.status: +", na.rm=TRUE))/
        (123-sum(CITValidationDF$Meta.GSM..characteristics_ch1.16. == "cimp.status: NA", na.rm=TRUE))*100,0)
## [1] 15
###Find # and % KRAS-mutant
#No. KRAS-mutant
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.22. == "kras.mutation: M", na.rm=TRUE),0)
## [1] 45
#% KRAS-mutant
#First, subtract NAs from total sample size
#Note: the authors mistakenly switched the total sample size for the kras and BRAF mutations
123-round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.22. == "kras.mutation: NA", na.rm=TRUE),0)
## [1] 121
#Now find %
round((sum(CITValidationDF$Meta.GSM..characteristics_ch1.22. == "kras.mutation: M", na.rm=TRUE))/
        (123-sum(CITValidationDF$Meta.GSM..characteristics_ch1.22. == "kras.mutation: NA", na.rm=TRUE))*100,0)
## [1] 37
###Find # and % BRAF-mutant
#No. BRAF-mutant
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.26. == "braf.mutation: M", na.rm=TRUE),0)
## [1] 7
#% BRAF-mutant
#First, subtract NAs from total sample size
123-round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.26. == "braf.mutation: NA", na.rm=TRUE),0)
## [1] 120
#Now find %
round((sum(CITValidationDF$Meta.GSM..characteristics_ch1.26. == "braf.mutation: M", na.rm=TRUE))/
        (123-sum(CITValidationDF$Meta.GSM..characteristics_ch1.26. == "braf.mutation: NA", na.rm=TRUE))*100,0)
## [1] 6
###Find # and % TP53-mutant
#No. TP53-mutant
round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.18. == "tp53.mutation: M", na.rm=TRUE),0)
## [1] 55
#% TP53-mutant
#First, subtract NAs from total sample size
123-round(sum(CITValidationDF$Meta.GSM..characteristics_ch1.18. == "tp53.mutation: NA", na.rm=TRUE),0)
## [1] 106
#Now find %
round((sum(CITValidationDF$Meta.GSM..characteristics_ch1.18. == "tp53.mutation: M", na.rm=TRUE))/
        (123-sum(CITValidationDF$Meta.GSM..characteristics_ch1.18. == "tp53.mutation: NA", na.rm=TRUE))*100,0)
## [1] 52