# read supplementary 2
supp2 <- read.csv("~/Documents/alternativeSplicing/supp2.csv")
# mutually exclusive exons
mx <- supp2 %>%
filter(MX == 1) %>% ## keep only mututal exclusive exons
mutate(coordinate = paste(Start.position, End.position, sep = "-")) %>% ## append start and end positions
mutate(coordinate = paste(substring(Chromosome, 4), coordinate, sep=":")) ## append chromosome # to positions
gene_and_mx_exon_count <- mx %>% select(Gene.Symbol) %>%
group_by(Gene.Symbol) %>%
summarize(count = n())
# only keep genes with more than one mx exons
gene_with_more_than_one_exon <- gene_and_mx_exon_count %>%
filter(count > 1) %>%
select(Gene.Symbol) %>%
inner_join(mx, by = "Gene.Symbol")
# cassette exons
cassette <- supp2 %>%
filter(CASSETTE== 1) %>%
mutate(coordinate = paste(Start.position, End.position, sep = "-")) %>%
mutate(coordinate = paste(substring(Chromosome, 4), coordinate, sep=":"))
Number of genes with multuplie mutually exclusive exons: 209
Number of total mutually exclusive exons: 559
Number of genes with cassette exons: 3032
Number of total cassette exons: 5051
-extract nucleotide sequences using the chromosome coordinates of these exons
-tblasx against lamprey, spotted gar, zebrafish, fugu, and coelacanth; e-value threshold: 0.1
-cds sequences of these five species are found https://uswest.ensembl.org/info/about/species.html
| Species | Mutually exclusive exons | Cassette exons |
|---|---|---|
| lamprey | 321 | 2751 |
| spotted gar | 380 | 3337 |
| zebrafish | 337 | 2934 |
| fugu | 353 | 3042 |
| coelacanth | 354 | 3145 |
# list of exon hits
# mutually exclusive
listInput <- list(lamprey = lamprey$qseqid, spotted_gar = spotted_gar$qseqid, zebrafish = zebrafish$qseqid, fugu = fugu$qseqid, coelacanth = coelacanth$qseqid)
upset(fromList(listInput), order.by = "freq", empty.intersections = "on")
437 mutually exclusive exons are conserved in at least 1 species
194 genes have mutually exclusive exons conserved in at least 1 species
239 mutually exclusive exons are conserved in all 5 species
118 genes have mutually exclusive exons conserved in all 5 species
# cassette
listInput_cassette <- list(lamprey_cassette = lamprey_cassette$qseqid, spotted_gar_cassette = spotted_gar_cassette$qseqid, zebrafish_cassette = zebrafish_cassette$qseqid, fugu_cassette = fugu_cassette$qseqid, coelacanth_cassette = coelacanth_cassette$qseqid)
upset(fromList(listInput_cassette), order.by = "freq", empty.intersections = "on")
3818 cassette exons are conserved in at least 1 species
2468 genes have cassette exons conserved in at least 1 species
2029 cassette exons are conserved in all 5 species
1433 genes have cassette exons conserved in all 5 species