1. Download mutation data from TCGA paper on lung adenocarcinomas (Nature, 2014)

download.file(url = "https://tcga-data.nci.nih.gov/docs/publications/luad_2014/AN_TCGA_LUAD_PAIR_capture_freeze_FINAL_230.aggregated.capture.tcga.uuid.curated.somatic.maf",destfile ="AN_TCGA_LUAD_PAIR_capture_freeze_FINAL_230.aggregated.capture.tcga.uuid.curated.somatic.maf")
tcga_maf <- read.table(file="AN_TCGA_LUAD_PAIR_capture_freeze_FINAL_230.aggregated.capture.tcga.uuid.curated.somatic.maf",header = T,quote="",stringsAsFactors = F,sep="\t")

2. Only include TCGA-LUAD mutations classified as Somatic, ignore silent mutations, make tumor-normal ID

tcga_maf <- dplyr::select(tcga_maf, Hugo_Symbol,NCBI_Build,Variant_Classification,Mutation_Status,Sequence_Source,Tumor_Sample_UUID,Matched_Norm_Sample_UUID)
tcga_maf <- dplyr::filter(tcga_maf, Variant_Classification != 'Silent' & Mutation_Status == 'Somatic')
tcga_maf$SampleID <- paste0(tcga_maf$Tumor_Sample_UUID,"_",tcga_maf$Matched_Norm_Sample_UUID)

3. Load kinome gene set (612 protein-coding genes)

kinome_genes <- read.table(file="kinome_genes.txt",stringsAsFactors = F)
colnames(kinome_genes) <- c('Hugo_Symbol')

4. Only include TCGA-LUAD mutations in the kinome

tcga_maf_kinome <- dplyr::inner_join(tcga_maf, kinome_genes,by="Hugo_Symbol")

5. Coding mutations pr tumor-normal ID

mutrate_kinome <- dplyr::group_by(tcga_maf_kinome, SampleID) %>% dplyr::summarise(n = n())

6. Mutation rate TCGA-LUAD (2014) - kinome

Median coding mutations pr. tumor: 9

Mean coding mutations pr. tumor: 13.18