Load Libraries… Load coordinates

library(gggenes)
library(ggplot2)
library(RColorBrewer)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd("/home/prakki/sw/prokka2gggenes/Analysis1")

coords <- read.table("for_gggenes.coords",header = FALSE, sep = "\t")
head(coords)
##                                                V1   V2   V3 V4   V5      V6
## 1 batch8_28062019_ENT866_assembly_3.fa_prefix.gff    1  885  1 Hypo forward
## 2 batch8_28062019_ENT866_assembly_3.fa_prefix.gff 1555 1893  1 Hypo forward
## 3 batch8_28062019_ENT866_assembly_3.fa_prefix.gff 2001 2534  1  ssb forward
## 4 batch8_28062019_ENT866_assembly_3.fa_prefix.gff 2564 3349 -1 Hypo reverse
## 5 batch8_28062019_ENT866_assembly_3.fa_prefix.gff 3529 3951  1 Hypo forward
## 6 batch8_28062019_ENT866_assembly_3.fa_prefix.gff 3969 4259  1 Hypo forward
names(coords) <- c("molecule","start","end","source","gene","direction")
head(coords)
##                                          molecule start  end source gene
## 1 batch8_28062019_ENT866_assembly_3.fa_prefix.gff     1  885      1 Hypo
## 2 batch8_28062019_ENT866_assembly_3.fa_prefix.gff  1555 1893      1 Hypo
## 3 batch8_28062019_ENT866_assembly_3.fa_prefix.gff  2001 2534      1  ssb
## 4 batch8_28062019_ENT866_assembly_3.fa_prefix.gff  2564 3349     -1 Hypo
## 5 batch8_28062019_ENT866_assembly_3.fa_prefix.gff  3529 3951      1 Hypo
## 6 batch8_28062019_ENT866_assembly_3.fa_prefix.gff  3969 4259      1 Hypo
##   direction
## 1   forward
## 2   forward
## 3   forward
## 4   reverse
## 5   forward
## 6   forward
#coords

c25 <- c(
  "dodgerblue2", "#E31A1C", # red
  "green4",
  "#6A3D9A", # purple
  "#FF7F00", # orange
  "black", "gold1",
  "skyblue2", "#FB9A99", # lt pink
  "palegreen2",
  "#CAB2D6", # lt purple
  "#FDBF6F", # lt orange
  "gray70", "khaki2",
  "maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
  "darkturquoise", "green1", "yellow4", "yellow3",
  "darkorange4", "brown"
)

target=c("blaNDM-1")

match = which(coords$gene %in% target)
#match
getThese = unique(as.vector(mapply(seq,match-5,match+5)))
#getThese
getThese = getThese[getThese > 0 & getThese <= nrow(coords)]
#getThese
updown_stream_coords <- coords[getThese,]
#updown_stream_coords

dummies <- make_alignment_dummies(
  updown_stream_coords,
  aes(xmin = start, xmax = end, y = molecule, id = gene),
  on = "blaNDM-1"
)

Plot Align according to specific gene

ggplot(updown_stream_coords, aes(xmin = start, xmax = end, y = molecule, fill = gene, label = gene)) +
  geom_gene_arrow() +
  geom_gene_label(align = "left") +
  geom_blank(data = dummies) +
  facet_wrap(~ molecule, scales = "free", ncol = 1) +
  scale_fill_manual(values = c25) +
  theme_genes()

Align according to specific gene + change the arrow sizes and arrow heads

ggplot(updown_stream_coords, aes(xmin = start, xmax = end, y =molecule, fill = gene, label = gene)) +
  geom_gene_arrow(arrowhead_height = unit(5, "mm"), arrowhead_width = unit(3, "mm")) +
  geom_gene_label(align = "left") +
  geom_blank(data = dummies) +
  facet_wrap(~ molecule, scales = "free", ncol = 1) +
  scale_fill_manual(values = c25) +
  theme_genes()

Plot labels above genes using geom_text

ggplot(updown_stream_coords, aes(xmin = start, xmax = end, y = molecule, fill = gene)) +
  geom_gene_arrow() +
  facet_wrap(~ molecule, scales = "free", ncol = 1) +
  scale_fill_manual(values = c25) +
  theme_genes() + 
  geom_text(data=updown_stream_coords %>% mutate(start = (start + end)/2), aes(x=start, label = gene), nudge_y = 0.2 )

Align by a single gene as well as plot labels above genes using geom_text

ggplot(updown_stream_coords, aes(xmin = start, xmax = end, y = molecule, fill = gene)) +
  geom_gene_arrow() +
  geom_blank(data = dummies) +
  facet_wrap(~ molecule, scales = "free", ncol = 1) +
  scale_fill_manual(values = c25) +
  theme_genes() + 
  geom_text(data=updown_stream_coords %>% mutate(start = (start + end)/2), aes(x=start, label = gene), nudge_y = 0.4 )

Align by a single gene + plot labels above genes using geom_text + Cluster similar genomes according to genes

# Creating a presence/absence matrix for example genes
PA_matrix <- as.data.frame(with(updown_stream_coords, table(molecule, gene)) > 0L) +0L

# Sorting the presence/absence matrix for example genes
sorted_PA_matrix <- PA_matrix[do.call(order,as.data.frame(PA_matrix)),]
#sorted_PA_matrix
sorted_genomes <- row.names(sorted_PA_matrix)

# Creating sorted_dummies and sorted_updown_stream_coords which the final output figure should reflect
sorted_dummies <- dummies[order(unlist(sapply(dummies$molecule, function(x) which(sorted_genomes == x)))),]
sorted_updown_stream_coords <- updown_stream_coords[order(unlist(sapply(updown_stream_coords$molecule, function(x) which(sorted_genomes == x)))),]

# Convert molecule variable to a factor
sorted_updown_stream_coords$molecule <- factor(sorted_updown_stream_coords$molecule, levels = unique(sorted_updown_stream_coords$molecule))
sorted_dummies$molecule <- factor(sorted_dummies$molecule, levels = unique(sorted_dummies$molecule))

ggplot(sorted_updown_stream_coords, aes(xmin = start, xmax = end, y = factor(molecule), fill = gene)) +
  geom_gene_arrow() +
  geom_blank(data = sorted_dummies) +
  facet_wrap(~ molecule, scales = "free", ncol = 1) +
  scale_fill_manual(values = c25) +
  theme_genes() + 
  geom_text(data=sorted_updown_stream_coords %>% mutate(start = (start + end)/2), aes(x=start, label = gene), nudge_y = 0.4 )

Align by a single gene + plot labels above genes using geom_text + Cluster similar genomes according to genes + Change size of labels + Change grey line in the center

ggplot(sorted_updown_stream_coords, aes(xmin = start, xmax = end, y = factor(molecule), fill = gene)) +
  geom_gene_arrow() +
  geom_blank(data = sorted_dummies) +
  facet_wrap(~ molecule, scales = "free", ncol = 1) +
  scale_fill_manual(values = c25) +
  theme_genes()  %+replace% theme(panel.grid.major.y = element_line(colour = "red")) + 
  geom_text(data=sorted_updown_stream_coords %>% mutate(start = (start + end)/2), aes(x=start, label = gene), nudge_y = 0.4, size =3 )

Keep only genome if the gene content is exactly same for multiple

# Creating a presence/absence matrix for example genes
PA_matrix <- as.data.frame(with(updown_stream_coords, table(molecule, gene)) > 0L) +0L

# Sorting the presence/absence matrix for example genes
sorted_PA_matrix <- PA_matrix[do.call(order,as.data.frame(PA_matrix)),]
#sorted_PA_matrix
distict_sorted_PA_matrix <- distinct(sorted_PA_matrix)
#distict_sorted_PA_matrix
sorted_genomes <- row.names(distict_sorted_PA_matrix)

# Creating sorted_dummies and sorted_updown_stream_coords which the final output figure should reflect
sorted_dummies <- dummies[order(unlist(sapply(dummies$molecule, function(x) which(sorted_genomes == x)))),]
sorted_updown_stream_coords <- updown_stream_coords[order(unlist(sapply(updown_stream_coords$molecule, function(x) which(sorted_genomes == x)))),]

# Convert molecule variable to a factor
sorted_updown_stream_coords$molecule <- factor(sorted_updown_stream_coords$molecule, levels = unique(sorted_updown_stream_coords$molecule))
sorted_dummies$molecule <- factor(sorted_dummies$molecule, levels = unique(sorted_dummies$molecule))

ggplot(sorted_updown_stream_coords, aes(xmin = start, xmax = end, y = factor(molecule), fill = gene)) +
  geom_gene_arrow() +
  geom_blank(data = sorted_dummies) +
  facet_wrap(~ molecule, scales = "free", ncol = 1) +
  scale_fill_manual(values = c25) +
  theme_genes()  %+replace% theme(panel.grid.major.y = element_line(colour = "red")) + 
  geom_text(data=sorted_updown_stream_coords %>% mutate(start = (start + end)/2), aes(x=start, label = gene), nudge_y = 0.4, size =3 )

in progress –> Aligned Gene blaNDM-1 + Label on top + sorted genomes based on genes + forward reverse on the same strand

# sorted_updown_stream_coords %>%
#   mutate(direction = sample(c(-1, 1), 110, T)) %>%
#   ggplot(aes(xmin = start, xmax = end, y = factor(molecule), fill = gene, forward = direction)) +
#   geom_gene_arrow() +
#   geom_blank(data = sorted_dummies) +
#   facet_wrap(~ molecule, scales = "free", ncol = 1) +
#   scale_fill_manual(values = c25) +
#   theme_genes() +
#   geom_text(data=sorted_updown_stream_coords %>% mutate(start = (start + end)/2), aes(x=start, label = gene), nudge_y = 0.4 )
#