# plot Gene Body plots
getwd()
## [1] "D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/rproj/src"
# should be "D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/rproj"
setwd("D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/rproj")
library(GenomicFeatures)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
#library(GenomicRanges)
library(AnnotationFilter)
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
gff3_file = "D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/ROCKv4.curated.L5-Liftoff_Run4.AGAT.FixCDS.gff3"
txdb.filename <- "ROCKv4.VectorBase.sqlite"
txdb <- loadDb(txdb.filename)
gr.all <- genes(txdb)
# range(gr.all)
# start(gr.all)
# end(gr.all)
zoom.chr <- 3
zoom.range <- c(340000000, 370000000)
# Make window for subsetting
win <- GRanges("3:340000000-360000000")
# Get genes in the window
genes.window <- subsetByOverlaps(gr.all, win)
####
# Set up for plotting
require("gggenes")
## Loading required package: gggenes
## Warning: package 'gggenes' was built under R version 4.2.3
#### Aedes plotting
# 1) Make data frame of genes in window
genes.plot <- data.frame(genes.window)
str(genes.plot)
## 'data.frame': 218 obs. of 6 variables:
## $ seqnames: Factor w/ 54 levels "1","2","3","MT",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ start : int 357795498 357724548 357721103 357744012 357673343 352980816 353608665 353558627 353752522 353144157 ...
## $ end : int 358104745 357741950 357724244 357758664 357718416 353012278 353629816 353589703 353784810 353146396 ...
## $ width : int 309248 17403 3142 14653 45074 31463 21152 31077 32289 2240 ...
## $ strand : Factor w/ 3 levels "+","-","*": 2 2 2 1 1 1 2 2 2 1 ...
## $ gene_id : chr "AAEL001299" "AAEL001300" "AAEL001314" "AAEL001317" ...
# 2) Start building the ggplot object
# We need strand info to be TRUE for positive or FALSE for negative
genes.plot <- genes.plot %>%
mutate(forward=ifelse(strand=="+", TRUE, FALSE))
# 3) Create basic ggplot object
plot <- ggplot(genes.plot, aes(
xmin=start,
xmax=end,
y=seqnames,
forward=forward
))
# 4) Add the gene arrow geometry from gggenes
plot + geom_gene_arrow(fill="grey") +
theme_genes()

## 5) Dropping all the smallest genes to declutter the plot
length(genes.plot$gene_id) # 218 genes
## [1] 218
genes.plot <- filter(genes.plot, width > 1790)
length(genes.plot$gene_id) # 163 genes
## [1] 163
# Look at the distribution of gene body lengths now --
summary(genes.plot$width)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1797 7470 17890 70286 50513 877541
# What's the longest gene body on the plot?
genes.plot[genes.plot$width==max(genes.plot$width),]
## seqnames start end width strand gene_id forward
## 150 3 354739427 355616967 877541 + AAEL025111 TRUE
# It's AAEL025111 which is a sax3-like or 'robo' like transmembrane protein, or "Roundabout"
## Get the sodium channel gene
## gene_id == AAEL023266
vgsc <- filter(genes.plot, gene_id=="AAEL023266")
vgsc$gene_name="vgsc"
vgsc
## seqnames start end width strand gene_id forward gene_name
## 1 3 346240008 346637458 397451 - AAEL023266 FALSE vgsc
## I'm going to make a "features" table to plot a couple
## of gene names using the features options; vgsc and robo
# features <- filter(genes.plot, gene_id %in% c("AAEL023266", "AAEL025111"))
# features$gene_name <- c("vgsc", "robo-2-like")
# features
## 5) Make the full plot, with cuter arrows and the gene name labels
gene_body_plot <- plot +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1, "mm"),
fill="grey") +
geom_feature(data=vgsc,
aes(x=end, y=seqnames, forward=forward),
# Set the text label siE
feature_height=unit(15, "points")) +
geom_feature_label(data=vgsc,
aes(x=end, y=seqnames, forward=forward, label=gene_name),
size=20, #feature arrow height
feature_height=unit(18, "points")) + # label position
theme_genes() +
labs(y="ROCK Chromosome", x="Coordinate")
png(filename="GeneBodyMap.png", width=1280, height=720)
gene_body_plot
dev.off()
## png
## 2
gene_body_plot

### Garbage dump
## autoplot?
#autoplot(genes.window) + theme_minimal()
## That works but it's really ugly.
### Try gggenes
#install.packages("gggenes")