gene.types.matrix <- t(matrix(nrow = 2, data = c("atp", "ATP synthase", "ccb",
"cytochrome c biogenesis", "ccm", "cytochrome c maturation", "clp", "maturases (clp)",
"cob", "complex III (ubichinol cytochrome c reductase)", "cox", "complex IV (cytochrome c oxidase)",
"mat", "maturases", "mtt", "membrane targeting and translocation", "nad",
"complex I (NADH dehydrogenase)", "ND5", "complex I (NADH dehydrogenase 5)",
"ori", "origin of replication", "orf", "open reading frames", "rpl", "ribosomal proteins (LSU)",
"rpo", "RNA polymerase", "rps", "ribosomal proteins (SSU)", "rrn", "ribosomal RNAs",
"sdh", "complex II (succinate dehydrogenase)", "trn", "transfer RNAs")))
gene.types <- data.frame(gene.types.matrix)
colnames(gene.types) <- c("Short.name", "Full.name")
rownames(gene.types) <- gene.types$Short.name
genome.size <- 6017035
gff.all <- readGff3("pg29mt-concat.gff")
gff <- gff.all[gff.all$type %in% c("mRNA", "tRNA"), ]
data <- droplevels(data.frame(Type = gff$type, Name = getGffAttribute(gff, "Name"),
Size = size(gff), stringsAsFactors = FALSE))
data$Family = factor(substr(data$Name, 1, 3), levels = gene.types$Short.name,
labels = gene.types$Full.name)
stopifnot(!is.na(data$Family))
data.grouped <- droplevels(chain(data, group_by(Type, Family), summarise(Count = n(),
Size = sum(Size), Proportion = Size/genome.size), filter(Size >= 1000)))
tab <- data.grouped[c("Count", "Size", "Proportion")]
rownames(tab) <- data.grouped$Family
tab["Sum", ] <- colSums(tab)
kable(tab)
| id | Count | Size | Proportion |
|---|---|---|---|
| ATP synthase | 7 | 4520 | 0.0008 |
| cytochrome c maturation | 3 | 3102 | 0.0005 |
| complex III (ubichinol cytochrome c reductase) | 1 | 1193 | 0.0002 |
| complex IV (cytochrome c oxidase) | 3 | 2739 | 0.0005 |
| maturases | 1 | 2144 | 0.0004 |
| complex I (NADH dehydrogenase) | 6 | 10328 | 0.0017 |
| complex I (NADH dehydrogenase 5) | 1 | 2451 | 0.0004 |
| ribosomal proteins (SSU) | 10 | 8689 | 0.0014 |
| transfer RNAs | 23 | 1690 | 0.0003 |
| Sum | 55 | 36856 | 0.0061 |
stripplot(Family ~ Size | Type, data, jitter = TRUE, layout = c(1, nlevels(data$Type)),
scales = list(relation = "free"), xlab = "Size (bp)")
barchart(Family ~ Count, data.grouped, origin = 0)
barchart(Type ~ Count, data.grouped, group = Family, stack = TRUE, auto.key = list(space = "right"))
barchart(~Count, data.grouped, group = Family, stack = TRUE, auto.key = list(space = "right"))
barchart(Family ~ Size/1000, data.grouped, origin = 0, xlab = "Size (kbp)")
barchart(~Size/1000, data.grouped, group = Family, stack = TRUE, auto.key = list(space = "right"),
xlab = "Size (kbp)")