Visualize repeat content annotated by RepeatMasker

genome.size <- 6017035
gff.all <- readGff3("query.masked.gff.orig")
gff <- gff.all[gff.all$type == "match", ]

data <- data.frame(Name = sub("/$", "", gsub("%2F", "/", getGffAttribute(gff, 
    "Name"))), Size = size(gff), stringsAsFactors = FALSE)
data$Family <- sub(".*genus:([^ ;]*).*", "\\1", data$Name)
data$Class <- factor(sub("/.*", "", data$Family))
data$Family <- factor(data$Family)
data.grouped <- droplevels(chain(data, group_by(Class, Family), summarise(Count = n(), 
    Size = sum(Size), Proportion = Size/genome.size), filter(Size >= 1000)))

tab <- data.grouped[c("Count", "Size", "Proportion")]
rownames(tab) <- data.grouped$Family
tab["Sum", ] <- colSums(tab)
kable(tab)
id Count Size Proportion
LINE 6 1089 0.0002
LINE/Jockey 105 31778 0.0053
LINE/R1 42 7081 0.0012
Low_complexity 293 12781 0.0021
LTR/Copia 71 17145 0.0028
LTR/Gypsy 185 74162 0.0123
NHF 119 23363 0.0039
rRNA 60 8298 0.0014
Simple_repeat 3079 141376 0.0235
UNK 7 1754 0.0003
Unknown 633 72525 0.0121
Sum 4600 391352 0.0650
stripplot(Family ~ Size, data, subset = Family %in% data.grouped$Family, jitter = TRUE)

plot of chunk Stripplot of repeat class vs size

barchart(Family ~ Size/1000, data.grouped, origin = 0, xlab = "Size (kbp)")

plot of chunk Barchart repeat class vs size


barchart(Class ~ Size/1000, data.grouped, group = Family, stack = TRUE, auto.key = list(space = "right"), 
    xlab = "Size (kbp)")

plot of chunk Barchart repeat class vs size


barchart(~Size/1000, data.grouped, group = Family, stack = TRUE, auto.key = list(space = "right"), 
    xlab = "Size (kbp)")

plot of chunk Barchart repeat class vs size