Software

library(ggplot2)
library(dplyr)
library(tidyr)
library(knitr)

Data

## CanFam 3.1
setwd("C:/Users/edlarsen/Documents/StructuralVariants")
vcf_Cfam3 <- read.csv("PTCL_StructuralVariants_CanFam3.filtered.ann.csv")

valid_chromosomes <- c(as.character(1:38), "X")
vcf_Cfam3 <- vcf_Cfam3[vcf_Cfam3$X.CHROM %in% valid_chromosomes, ]

## CanFam 4
vcf_Cfam4 <- read.csv("PTCL_StructuralVariants_CanFam4.filtered.ann.csv")
vcf_Cfam4 <- vcf_Cfam4[vcf_Cfam4$X.CHROM %in% valid_chromosomes, ]

Number of variants per chromosome

# CanFam 3.1
variant_counts_Cfam3 <- table(vcf_Cfam3$X.CHROM)
variant_df_Cfam3 <- as.data.frame(variant_counts_Cfam3)
colnames(variant_df_Cfam3) <- c("Chromosome", "VariantCount")

print(ggplot(variant_df_Cfam3, aes(x = Chromosome, y = VariantCount)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(
    title = "Structural Variant Calls per Chromosome (CanFam 3.1)",
    x = "Chromosome",
    y = "Number of Variants"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)))

# CanFam 4
variant_counts_Cfam4 <- table(vcf_Cfam4$X.CHROM)
variant_df_Cfam4 <- as.data.frame(variant_counts_Cfam4)
colnames(variant_df_Cfam4) <- c("Chromosome", "VariantCount")

print(ggplot(variant_df_Cfam4, aes(x = Chromosome, y = VariantCount)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(
    title = "Structural Variant Calls per Chromosome (CanFam4)",
    x = "Chromosome",
    y = "Number of Variants"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)))

Types of variants by chromosome

## CanFam 3.1
# Extract the variant type (SVTYPE) from the INFO column
vcf_Cfam3$SVTYPE <- sapply(vcf_Cfam3$INFO, function(info) {
  match <- regmatches(info, regexpr("SVTYPE=[^;]+", info))
  if (length(match) > 0) {
    sub("SVTYPE=", "", match)
  } else {
    NA
  }
})

vcf_Cfam3 <- vcf_Cfam3[!is.na(vcf_Cfam3$SVTYPE),] # remove NAs

# Summarize the counts of each variant type per chromosome
variant_summary_Cfam3 <- vcf_Cfam3 %>%
  group_by(X.CHROM, SVTYPE) %>%
  summarise(Count = n()) %>%
  ungroup()

# Rename chromosome column for better usability in plotting
colnames(variant_summary_Cfam3)[1] <- "Chromosome"

# Ensure the chromosome column is ordered correctly
variant_summary_Cfam3$Chromosome <- factor(variant_summary_Cfam3$Chromosome, levels = c(as.character(1:38), "X"))

# Plot the number of variants per chromosome by type
ggplot(variant_summary_Cfam3, aes(x = Chromosome, y = Count, fill = SVTYPE)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(
    title = "Structural Variants per Chromosome by Type (CanFam 3.1)",
    x = "Chromosome",
    y = "Number of Variants",
    fill = "Variant Type"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Set2")

kable(variant_summary_Cfam3, caption = "Variant count per chromosome (CanFam 3.1)")
Variant count per chromosome (CanFam 3.1)
Chromosome SVTYPE Count
1 BND 156
1 DEL 2275
1 DUP 268
1 INS 3552
1 INV 25
10 BND 46
10 DEL 1269
10 DUP 141
10 INS 1828
10 INV 10
11 BND 91
11 DEL 1396
11 DUP 149
11 INS 2025
11 INV 11
12 BND 83
12 DEL 1684
12 DUP 180
12 INS 2594
12 INV 14
13 BND 56
13 DEL 1224
13 DUP 161
13 INS 1955
13 INV 7
14 BND 44
14 DEL 1213
14 DUP 118
14 INS 1841
14 INV 13
15 BND 61
15 DEL 1344
15 DUP 187
15 INS 2085
15 INV 4
16 BND 67
16 DEL 1320
16 DUP 161
16 INS 1927
16 INV 8
17 BND 56
17 DEL 1369
17 DUP 137
17 INS 2060
17 INV 13
18 BND 69
18 DEL 1199
18 DUP 142
18 INS 1710
18 INV 16
19 BND 71
19 DEL 1203
19 DUP 160
19 INS 2015
19 INV 17
2 BND 47
2 DEL 1587
2 DUP 163
2 INS 2478
2 INV 10
20 BND 60
20 DEL 1179
20 DUP 88
20 INS 1729
20 INV 12
21 BND 61
21 DEL 1173
21 DUP 126
21 INS 1702
21 INV 5
22 BND 92
22 DEL 1208
22 DUP 144
22 INS 1811
22 INV 11
23 BND 69
23 DEL 1056
23 DUP 115
23 INS 1761
23 INV 6
24 BND 40
24 DEL 970
24 DUP 78
24 INS 1475
24 INV 5
25 BND 68
25 DEL 977
25 DUP 135
25 INS 1501
25 INV 2
26 BND 53
26 DEL 1038
26 DUP 78
26 INS 1489
26 INV 9
27 BND 47
27 DEL 1132
27 DUP 92
27 INS 1826
27 INV 10
28 BND 47
28 DEL 734
28 DUP 65
28 INS 1010
28 INV 3
29 BND 45
29 DEL 979
29 DUP 117
29 INS 1496
29 INV 9
3 BND 90
3 DEL 1774
3 DUP 215
3 INS 2681
3 INV 8
30 BND 41
30 DEL 810
30 DUP 60
30 INS 1378
30 INV 6
31 BND 51
31 DEL 869
31 DUP 106
31 INS 1389
31 INV 2
32 BND 58
32 DEL 1114
32 DUP 84
32 INS 1541
32 INV 7
33 BND 49
33 DEL 781
33 DUP 128
33 INS 1222
33 INV 6
34 BND 49
34 DEL 915
34 DUP 97
34 INS 1324
34 INV 9
35 BND 23
35 DEL 648
35 DUP 74
35 INS 950
36 BND 19
36 DEL 650
36 DUP 67
36 INS 1040
36 INV 6
37 BND 20
37 DEL 598
37 DUP 64
37 INS 972
37 INV 4
38 BND 10
38 DEL 578
38 DUP 60
38 INS 849
38 INV 1
4 BND 119
4 DEL 1669
4 DUP 196
4 INS 2433
4 INV 7
5 BND 63
5 DEL 1589
5 DUP 181
5 INS 2416
5 INV 7
6 BND 55
6 DEL 1519
6 DUP 175
6 INS 2216
6 INV 9
7 BND 63
7 DEL 1476
7 DUP 163
7 INS 2313
7 INV 11
8 BND 175
8 DEL 1737
8 DUP 148
8 INS 2697
8 INV 16
9 BND 58
9 DEL 1271
9 DUP 113
9 INS 1933
9 INV 13
X BND 218
X DEL 1634
X DUP 133
X INS 2278
X INV 18
total_variant_summary_Cfam3 <- vcf_Cfam3 %>%
  group_by(SVTYPE) %>%
  summarise(Total_Count = n()) %>%
  ungroup()

kable(total_variant_summary_Cfam3, caption = "Total variant count by type (CanFam 3.1)")
Total variant count by type (CanFam 3.1)
SVTYPE Total_Count
BND 2590
DEL 47161
DUP 5069
INS 71502
INV 350
## CanFam4
# Extract the variant type (SVTYPE) from the INFO column
vcf_Cfam4$SVTYPE <- sapply(vcf_Cfam4$INFO, function(info) {
  match <- regmatches(info, regexpr("SVTYPE=[^;]+", info))
  if (length(match) > 0) {
    sub("SVTYPE=", "", match)
  } else {
    NA
  }
})

vcf_Cfam4 <- vcf_Cfam4[!is.na(vcf_Cfam4$SVTYPE),] # remove NAs

# Summarize the counts of each variant type per chromosome
variant_summary_Cfam4 <- vcf_Cfam4 %>%
  group_by(X.CHROM, SVTYPE) %>%
  summarise(Count = n()) %>%
  ungroup()

# Rename chromosome column for better usability in plotting
colnames(variant_summary_Cfam4)[1] <- "Chromosome"

# Ensure the chromosome column is ordered correctly
variant_summary_Cfam4$Chromosome <- factor(variant_summary_Cfam4$Chromosome, levels = c(as.character(1:38), "X"))

# Plot the number of variants per chromosome by type
ggplot(variant_summary_Cfam4, aes(x = Chromosome, y = Count, fill = SVTYPE)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(
    title = "Structural Variants per Chromosome by Type (CanFam4)",
    x = "Chromosome",
    y = "Number of Variants",
    fill = "Variant Type"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Set2")

kable(variant_summary_Cfam4, caption = "Variant count per chromosome (CanFam4)")
Variant count per chromosome (CanFam4)
Chromosome SVTYPE Count
1 BND 157
1 DEL 2401
1 DUP 268
1 INS 3386
1 INV 26
10 BND 48
10 DEL 1379
10 DUP 174
10 INS 1836
10 INV 11
11 BND 88
11 DEL 1475
11 DUP 141
11 INS 2044
11 INV 8
12 BND 78
12 DEL 1764
12 DUP 202
12 INS 2590
12 INV 12
13 BND 75
13 DEL 1304
13 DUP 155
13 INS 1895
13 INV 10
14 BND 61
14 DEL 1238
14 DUP 124
14 INS 1827
14 INV 10
15 BND 69
15 DEL 1397
15 DUP 192
15 INS 2044
15 INV 9
16 BND 56
16 DEL 1262
16 DUP 131
16 INS 1739
16 INV 8
17 BND 77
17 DEL 1372
17 DUP 131
17 INS 1979
17 INV 10
18 BND 108
18 DEL 1278
18 DUP 155
18 INS 1709
18 INV 18
19 BND 100
19 DEL 1371
19 DUP 195
19 INS 1956
19 INV 15
2 BND 69
2 DEL 1626
2 DUP 178
2 INS 2348
2 INV 12
20 BND 59
20 DEL 1181
20 DUP 111
20 INS 1741
20 INV 9
21 BND 45
21 DEL 1225
21 DUP 130
21 INS 1685
21 INV 6
22 BND 68
22 DEL 1286
22 DUP 161
22 INS 1738
22 INV 15
23 BND 72
23 DEL 1163
23 DUP 118
23 INS 1699
23 INV 8
24 BND 39
24 DEL 1006
24 DUP 83
24 INS 1475
24 INV 6
25 BND 38
25 DEL 1034
25 DUP 153
25 INS 1492
25 INV 4
26 BND 27
26 DEL 1032
26 DUP 102
26 INS 1382
26 INV 10
27 BND 44
27 DEL 1220
27 DUP 118
27 INS 1775
27 INV 7
28 BND 35
28 DEL 816
28 DUP 73
28 INS 971
28 INV 6
29 BND 48
29 DEL 1099
29 DUP 131
29 INS 1457
29 INV 5
3 BND 142
3 DEL 1814
3 DUP 228
3 INS 2727
3 INV 9
30 BND 34
30 DEL 926
30 DUP 87
30 INS 1311
30 INV 6
31 BND 27
31 DEL 1076
31 DUP 127
31 INS 1340
31 INV 5
32 BND 41
32 DEL 1239
32 DUP 116
32 INS 1570
32 INV 7
33 BND 31
33 DEL 993
33 DUP 159
33 INS 1158
33 INV 7
34 BND 45
34 DEL 934
34 DUP 125
34 INS 1368
34 INV 10
35 BND 39
35 DEL 780
35 DUP 104
35 INS 1069
35 INV 1
36 BND 33
36 DEL 744
36 DUP 110
36 INS 1037
36 INV 8
37 BND 20
37 DEL 648
37 DUP 74
37 INS 945
37 INV 3
38 BND 20
38 DEL 712
38 DUP 75
38 INS 850
38 INV 5
4 BND 128
4 DEL 1708
4 DUP 175
4 INS 2393
4 INV 11
5 BND 76
5 DEL 1584
5 DUP 192
5 INS 2337
5 INV 5
6 BND 65
6 DEL 1549
6 DUP 197
6 INS 2162
6 INV 6
7 BND 95
7 DEL 1558
7 DUP 151
7 INS 2289
7 INV 9
8 BND 110
8 DEL 1660
8 DUP 157
8 INS 2551
8 INV 18
9 BND 54
9 DEL 1144
9 DUP 106
9 INS 1925
9 INV 10
X BND 246
X DEL 1667
X DUP 146
X INS 2042
X INV 23
total_variant_summary_Cfam4 <- vcf_Cfam4 %>%
  group_by(SVTYPE) %>%
  summarise(Total_Count = n()) %>%
  ungroup()

kable(total_variant_summary_Cfam4, caption = "Total variant count by type (CanFam4)")
Total variant count by type (CanFam4)
SVTYPE Total_Count
BND 2667
DEL 49665
DUP 5555
INS 69842
INV 368

Identify shared fusion partners with RNA-seq data

## CanFam 3.1
# read in csv file(s) of RNA fusion calls
star_rna_fusions <- read.csv("CanFam31_StarFusionSummary.csv")
fc_rna_fusions <- read.csv("fusionCatcher-results-ptcl-no-ctrls.csv")

# get list of implicated genes
star_genes_3 <- star_rna_fusions$X3_prime_gene
star_genes_5 <- star_rna_fusions$X5_prime_gene
fc_genes_3 <- fc_rna_fusions$X3_prime_gene
fc_genes_5 <- fc_rna_fusions$X5_prime_gene

all_star_genes <- c(star_genes_3, star_genes_5)
all_star_genes <- unique(all_star_genes)
all_fc_genes <- c(fc_genes_3, fc_genes_5)
all_fc_genes <- unique(all_fc_genes)

# compare to genes in INFO column of BND variant file
bnd_vcf <- read.csv("PTCL_BND_Variants.CanFam3.filtered.ann.bcsq.csv")

extract_gene <- function(info) {
  # Find the BCSQ field
  bcsc_field <- stringr::str_extract(info, "BCSQ=[^;]+")
  # Extract the gene name (second element after "|")
  if (!is.na(bcsc_field)) {
    gene <- strsplit(bcsc_field, "\\|")[[1]][2]
    return(gene)
  }
  return(NA)
}

bnd_vcf <- bnd_vcf %>%
  mutate(Gene = sapply(INFO, extract_gene))

shared_star_bnd_Cfam3 <- bnd_vcf %>%
  filter(Gene %in% all_star_genes)

shared_fc_bnd_Cfam3 <- bnd_vcf %>%
  filter(Gene %in% all_fc_genes)

# print summary
print(unique(shared_star_bnd_Cfam3$Gene))
##  [1] "PALM2AKAP2"         "DLA-64"             "ENSCAFG00000014478"
##  [4] "ENSCAFG00000048749" "FRG1"               "CPT1A"             
##  [7] "MITF"               "DOCK3"              "NBEA"              
## [10] "CPXM2"              "ST8SIA4"            "HIVEP1"            
## [13] "ATXN1"              "ENSCAFG00000043983" "UIMC1"             
## [16] "ENSCAFG00000047976" "ENSCAFG00000045242" "ENSCAFG00000031023"
## [19] "ENSCAFG00000043621" "ENSCAFG00000046168" "ENSCAFG00000043094"
## [22] "TRAV24"             "ENSCAFG00000043049" "ENSCAFG00000025128"
## [25] "ENSCAFG00000030258" "ENSCAFG00000028509" "ENSCAFG00000028642"
## [28] "ENSCAFG00000029953" "ENSCAFG00000029046" "ENSCAFG00000044010"
## [31] "STAG2"              "SH2D1A"
print(unique(shared_fc_bnd_Cfam3$Gene))
##  [1] "SERPINB5" "MYCT1"    "SAE1"     "KDM4C"    "DNAH8"    "KCNQ5"   
##  [7] "VPS13B"   "SND1"     "GATB"     "TRBV28"   "MAGI2"    "IGHMBP2" 
## [13] "CPT1A"    "INPP4B"   "VPS13D"   "MITF"     "NISCH"    "TBC1D4"  
## [19] "ANKRD28"  "SHLD1"    "TTC28"    "LCOR"     "ZFYVE27"  "ATE1"    
## [25] "CPXM2"    "RALYL"    "CERT1"    "TCF12"    "SETD4"    "GOLGB1"  
## [31] "UBE2E3"   "PDGFD"    "DYNC2H1"  "RABGAP1L" "PTPRM"    "RGS6"    
## [37] "ACACA"    "BCLAF3"
# export
write.csv(shared_star_bnd_Cfam3, file = "shared_PacBio_STARFusion_calls.CanFam3.csv")
write.csv(shared_fc_bnd_Cfam3, file = "shared_PacBio_FusionCatcher_calls.CanFam3.csv")
## CanFam4
# read in csv file of RNA fusion calls
star_rna_fusions <- read.csv("CanFam4_StarFusionSummary.csv")

# get list of implicated genes
genes_3 <- star_rna_fusions$X3_prime_gene
genes_5 <- star_rna_fusions$X5_prime_gene
all_genes <- c(genes_3, genes_5)
all_genes <- unique(all_genes)

# compare to genes in INFO column of BND variant file
bnd_vcf <- read.csv("PTCL_BND_Variants.CanFam4.filtered.ann.bcsq.csv")

extract_gene <- function(info) {
  # Find the BCSQ field
  bcsc_field <- stringr::str_extract(info, "BCSQ=[^;]+")
  # Extract the gene name (second element after "|")
  if (!is.na(bcsc_field)) {
    gene <- strsplit(bcsc_field, "\\|")[[1]][2]
    return(gene)
  }
  return(NA)
}

bnd_vcf <- bnd_vcf %>%
  mutate(Gene = sapply(INFO, extract_gene))

shared_bnd_Cfam4 <- bnd_vcf %>%
  filter(Gene %in% all_genes)

# print summary
print(unique(shared_bnd_Cfam4$Gene))
##  [1] "PALM2AKAP2"         "FBXW7"              "EXOC6B"            
##  [4] "ENSCAFG00805017629" "TSN"                "ENSCAFG00805002052"
##  [7] "TNFRSF1B"           "SUMF1"              "DOCK3"             
## [10] "ENSCAFG00805023894" "ENSCAFG00805023901" "PLEKHA5"           
## [13] "AEBP2"              "TMEM117"            "CPXM2"             
## [16] "ENSCAFG00805004162" "ST8SIA4"            "PDLIM5"            
## [19] "CASR"               "UIMC1"              "ENSCAFG00805008215"
## [22] "ENSCAFG00805007848" "ENSCAFG00805008376" "ENSCAFG00805008332"
## [25] "ENSCAFG00805008471" "ENSCAFG00805008844" "TRAV24"            
## [28] "ENSCAFG00805009255" "ENSCAFG00805009486" "ENSCAFG00805005681"
## [31] "STAG2"              "SH2D1A"
# export
write.csv(shared_bnd_Cfam4, file = "shared_PacBio_STARFusion_calls.CanFam4.csv")

Identify structural variants implicating cancer-associated genes

# import vcf file of only variants with a BCSQ tag in the INFO field
ann_vcf_Cfam3 <- read.csv("PTCL_StructuralVariants_CanFam3.filtered.ann.bcsq.csv")
ann_vcf_Cfam4 <- read.csv("PTCL_StructuralVariants_CanFam4.filtered.ann.bcsq.csv")

# list of cancer-associated genes from OncoKB
oncokb <- read.csv("cancerGeneList.csv")
oncokb_genes <- oncokb$Hugo.Symbol

extract_gene <- function(info) {
  # Find the BCSQ field
  bcsc_field <- stringr::str_extract(info, "BCSQ=[^;]+")
  # Extract the gene name (second element after "|")
  if (!is.na(bcsc_field)) {
    gene <- strsplit(bcsc_field, "\\|")[[1]][2]
    return(gene)
  }
  return(NA)
}

onco_vcf_Cfam3 <- ann_vcf_Cfam3 %>%
  mutate(Gene = sapply(INFO, extract_gene))

onco_vcf_Cfam3 <- onco_vcf_Cfam3 %>%
  filter(Gene %in% oncokb_genes)

onco_vcf_Cfam4 <- ann_vcf_Cfam4 %>%
  mutate(Gene = sapply(INFO, extract_gene))

onco_vcf_Cfam4 <- onco_vcf_Cfam4 %>%
  filter(Gene %in% oncokb_genes)

# export
write.csv(onco_vcf_Cfam3, file = "CanFam3_SVs_in_Cancer_Genes.csv")
write.csv(onco_vcf_Cfam4, file = "CanFam4_SVs_in_Cancer_Genes.csv")

Identify shared variable genes between CanFam3.1 and CanFam4 alignments

# Extract BCSQ gene annotation and SVTYPE from INFO field and place in separate columns called 'Gene' and 'SVTYPE'
extract_gene <- function(info) {
  # Find the BCSQ field
  bcsc_field <- stringr::str_extract(info, "BCSQ=[^;]+")
  # Extract the gene name (second element after "|")
  if (!is.na(bcsc_field)) {
    gene <- strsplit(bcsc_field, "\\|")[[1]][2]
    return(gene)
  }
  return(NA)
}

extract_variant_type <- function(info) {
  # Find the SVTYPE field
  svtype_field <- stringr::str_extract(info, "SVTYPE=[^;]+")
  # Extract the variant type (first element after "=")
  if (!is.na(svtype_field)) {
    var_type <- strsplit(svtype_field, "=")[[1]][2]
    return(var_type)
  }
  return(NA)
}

Cfam3_vcf_alt <- ann_vcf_Cfam3 %>%
  mutate(Gene = sapply(INFO, extract_gene))
Cfam3_vcf_alt <- Cfam3_vcf_alt %>%
  mutate(SVTYPE = sapply(INFO, extract_variant_type))

Cfam4_vcf_alt <- ann_vcf_Cfam4 %>%
  mutate(Gene = sapply(INFO, extract_gene))
Cfam4_vcf_alt <- Cfam4_vcf_alt %>%
  mutate(SVTYPE = sapply(INFO, extract_variant_type))

# Combine Gene and SVTYPE info into one column
Cfam3_vcf_alt <- Cfam3_vcf_alt %>%
  mutate(
    SVTYPE_Gene = paste(SVTYPE, Gene, sep = "_")
  )

Cfam4_vcf_alt <- Cfam4_vcf_alt %>%
  mutate(
    SVTYPE_Gene = paste(SVTYPE, Gene, sep = "_")
  )


# Create vector of SVTYPE_Gene column contents
Cfam3_variantGenes <- unique(Cfam3_vcf_alt$SVTYPE_Gene)
Cfam4_variantGenes <- unique(Cfam4_vcf_alt$SVTYPE_Gene)

# Filter variant calls of each alignment to include only those of the same variant type in shared genes
Cfam3_shared <- Cfam3_vcf_alt %>%
  filter(SVTYPE_Gene %in% Cfam4_variantGenes) %>%
  filter(Gene != "NA")

Cfam4_shared <- Cfam4_vcf_alt %>%
  filter(SVTYPE_Gene %in% Cfam3_variantGenes) %>%
  filter(Gene != "NA")

# print summary of total number of shared variable genes
paste("Number of genes with the same variant type called in CanFam3.1 and CanFam4: ", length(unique(Cfam3_shared$Gene)), sep = " ")
## [1] "Number of genes with the same variant type called in CanFam3.1 and CanFam4:  4717"
# export
write.csv(Cfam3_shared, file = "CanFam3_PacBio_Variants_in_Shared_Genes_with_CanFam4.csv")
write.csv(Cfam4_shared, file = "CanFam4_PacBio_Variants_in_Shared_Genes_with_CanFam3.csv")

# Limit to OncoKB cancer-associated genes
Cfam3_shared_oncokb <- Cfam3_shared %>%
  filter(Gene %in% oncokb_genes)

Cfam4_shared_oncokb <- Cfam4_shared %>%
  filter(Gene %in% oncokb_genes)

paste("Number of cancer-associated genes with same variant type called in CanFam3.1 and CanFam4:", length(unique(Cfam3_shared_oncokb$Gene)), sep = " ")
## [1] "Number of cancer-associated genes with same variant type called in CanFam3.1 and CanFam4: 419"
# export
write.csv(Cfam3_shared_oncokb, file = "CanFam3_PacBio_Variants_in_Shared_OncoKB_Genes_with_CanFam4.csv")
write.csv(Cfam4_shared_oncokb, file = "CanFam4_PacBio_Variants_in_Shared_OncoKB_Genes_with_CanFam3.csv")

Citations

sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] knitr_1.49    tidyr_1.3.1   dplyr_1.1.4   ggplot2_3.5.1
## 
## loaded via a namespace (and not attached):
##  [1] vctrs_0.6.5        cli_3.6.2          rlang_1.1.3        xfun_0.49         
##  [5] stringi_1.8.4      purrr_1.0.2        generics_0.1.3     jsonlite_1.8.9    
##  [9] labeling_0.4.3     glue_1.8.0         colorspace_2.1-1   htmltools_0.5.8.1 
## [13] sass_0.4.9         scales_1.3.0       rmarkdown_2.29     grid_4.4.0        
## [17] evaluate_1.0.3     munsell_0.5.1      jquerylib_0.1.4    tibble_3.2.1      
## [21] fastmap_1.2.0      yaml_2.3.10        lifecycle_1.0.4    stringr_1.5.1     
## [25] compiler_4.4.0     codetools_0.2-20   RColorBrewer_1.1-3 pkgconfig_2.0.3   
## [29] rstudioapi_0.17.1  farver_2.1.2       digest_0.6.35      R6_2.5.1          
## [33] tidyselect_1.2.1   pillar_1.10.1      magrittr_2.0.3     bslib_0.8.0       
## [37] withr_3.0.2        tools_4.4.0        gtable_0.3.6       cachem_1.1.0
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.