# plot Gene Body plots
getwd()
## [1] "D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/rproj/src"
# should be "D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/rproj"
setwd("D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/rproj")



library(GenomicFeatures)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
#library(GenomicRanges)
library(AnnotationFilter)
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:GenomicRanges':
## 
##     shift
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
gff3_file = "D:/Aedes_Illumina_Genomes/Analysis/Variants/ROCKv4_SNPs/All_Libs/ROCKv4.curated.L5-Liftoff_Run4.AGAT.FixCDS.gff3"
txdb.filename <- "ROCKv4.VectorBase.sqlite"
txdb <- loadDb(txdb.filename)

gr.all <- genes(txdb)
# range(gr.all)
# start(gr.all)
# end(gr.all)

zoom.chr <- 3
zoom.range <- c(340000000, 370000000)

# Make window for subsetting
win <- GRanges("3:340000000-360000000")
# Get genes in the window
genes.window <- subsetByOverlaps(gr.all, win)
####

# Set up for plotting
require("gggenes")
## Loading required package: gggenes
## Warning: package 'gggenes' was built under R version 4.2.3
#### Aedes plotting 

# 1) Make data frame of genes in window
genes.plot <- data.frame(genes.window)
str(genes.plot)
## 'data.frame':    218 obs. of  6 variables:
##  $ seqnames: Factor w/ 54 levels "1","2","3","MT",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ start   : int  357795498 357724548 357721103 357744012 357673343 352980816 353608665 353558627 353752522 353144157 ...
##  $ end     : int  358104745 357741950 357724244 357758664 357718416 353012278 353629816 353589703 353784810 353146396 ...
##  $ width   : int  309248 17403 3142 14653 45074 31463 21152 31077 32289 2240 ...
##  $ strand  : Factor w/ 3 levels "+","-","*": 2 2 2 1 1 1 2 2 2 1 ...
##  $ gene_id : chr  "AAEL001299" "AAEL001300" "AAEL001314" "AAEL001317" ...
# 2) Start building the ggplot object
# We need strand info to be TRUE for positive or FALSE for negative
genes.plot <- genes.plot %>% 
  mutate(forward=ifelse(strand=="+", TRUE, FALSE))

# 3) Create basic ggplot object
plot <- ggplot(genes.plot, aes(
  xmin=start, 
  xmax=end,
  y=seqnames, 
  forward=forward
))

# 4) Add the gene arrow geometry from gggenes

plot + geom_gene_arrow(fill="grey") + 
  theme_genes()

## 5) Dropping all the smallest genes to declutter the plot
length(genes.plot$gene_id) # 218 genes
## [1] 218
genes.plot <- filter(genes.plot, width > 1790)
length(genes.plot$gene_id) # 163 genes
## [1] 163
# Look at the distribution of gene body lengths now -- 
summary(genes.plot$width)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1797    7470   17890   70286   50513  877541
# What's the longest gene body on the plot?  
genes.plot[genes.plot$width==max(genes.plot$width),]
##     seqnames     start       end  width strand    gene_id forward
## 150        3 354739427 355616967 877541      + AAEL025111    TRUE
# It's AAEL025111 which is a sax3-like or 'robo' like transmembrane protein, or "Roundabout"
## Get the sodium channel gene
## gene_id == AAEL023266
vgsc <- filter(genes.plot, gene_id=="AAEL023266") 
vgsc$gene_name="vgsc"
vgsc
##   seqnames     start       end  width strand    gene_id forward gene_name
## 1        3 346240008 346637458 397451      - AAEL023266   FALSE      vgsc
## I'm going to make a "features" table to plot a couple 
## of gene names using the features options; vgsc and robo
# features <- filter(genes.plot, gene_id %in% c("AAEL023266", "AAEL025111"))
# features$gene_name <- c("vgsc", "robo-2-like")
# features

## 5) Make the full plot, with cuter arrows and the gene name labels
gene_body_plot <- plot + 
  geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1, "mm"), 
                  fill="grey") +
  geom_feature(data=vgsc, 
               aes(x=end, y=seqnames, forward=forward), 
               # Set the text label siE
               feature_height=unit(15, "points")) + 
  geom_feature_label(data=vgsc, 
                     aes(x=end, y=seqnames, forward=forward, label=gene_name),
                     size=20, #feature arrow height
                     feature_height=unit(18, "points")) + # label position
  theme_genes() + 
  labs(y="ROCK Chromosome", x="Coordinate") 

png(filename="GeneBodyMap.png", width=1280, height=720)
gene_body_plot
dev.off()
## png 
##   2
gene_body_plot

### Garbage dump 

## autoplot? 

#autoplot(genes.window) + theme_minimal()
## That works but it's really ugly. 

### Try gggenes
#install.packages("gggenes")