your caption

your caption

See the end of the document for a function/by/function heatmap

Final oxygenase/hydroxylase count has been saved as “Arhodomonas.KEGG.oxygenases.hydroxylases.csv”

Oxygenases and hydroxylases in Arhodomonas

Which oxygenases are present in the Arhodomonas genomes? How do the number and types of oxygenase systems in A.BTEX compare to other Arhodomonads? Are there any types that are consistent with degradation of petroleum components?

  1. define a set of oxygenase functional annotations in the KEGG system which encapsualate most types of oxygenases found in Bacteria
  2. quanitify these in the three protein sets using existing annotations (generated using GhostKoala)

Quick summary

  • Make a list of oxygenase (480) and hydroxylase (165) gene family annotations in the KEGG system
  • Count each gene family in the three Arhodomonas genome/protein set kegg annotation

Defining a set of oxygenase functional annotations

There are 480 “oxygenase” and 165 “hydroxylase” gene families in the KEGG system: https://www.genome.jp/dbget-bin/www_bfind_sub?mode=bfind&max_hit=1000&locale=en&serv=kegg&dbkey=orthology&keywords=oxygenase&page=1

Import these and parse:

oxy.kegg.all <- read.csv("Arhodomonas.oxygenases/KEGG.all.oxygenase.txt", stringsAsFactors = F, sep="\t",
                         header = F)
hydroxy.kegg.all <- read.csv("../../../KEGG/KEGG.hydroxylases.txt", stringsAsFactors = F, sep="\t",
                         header = F)
oxy.kegg.all <- rbind(oxy.kegg.all, hydroxy.kegg.all)
rows = nrow(oxy.kegg.all)

oxy.KEGG.all.table <- data.frame(KO = character(), description = character(), stringsAsFactors = F)

for (x in c(0:(rows/2-1))) {
  oxy.KEGG.all.table[x+1, 1] <- oxy.kegg.all[x*2+1, 1]
  oxy.KEGG.all.table[x+1, 2] <- oxy.kegg.all[x*2+2, 1]
}

head(oxy.KEGG.all.table)
##       KO
## 1 K00446
## 2 K00448
## 3 K00449
## 4 K00450
## 5 K00451
## 6 K00452
##                                                           description
## 1                 dmpB, xylE; catechol 2,3-dioxygenase [EC:1.13.11.2]
## 2 pcaG; protocatechuate 3,4-dioxygenase, alpha subunit [EC:1.13.11.3]
## 3  pcaH; protocatechuate 3,4-dioxygenase, beta subunit [EC:1.13.11.3]
## 4                E1.13.11.4; gentisate 1,2-dioxygenase [EC:1.13.11.4]
## 5             HGD, hmgA; homogentisate 1,2-dioxygenase [EC:1.13.11.5]
## 6          HAAO; 3-hydroxyanthranilate 3,4-dioxygenase [EC:1.13.11.6]

This will cast a very will cast a very wide net: this will require hand curation

Filter out oxygenases and hydroxylases in the Arhodomonas genomes

Read in the annotation tables / KO lists (from EggNOG) and limit it to KO numbers and locus names

kegg.btex <- read.csv("A.BTEX/user_ko.txt", stringsAsFactors = F, sep = "\t",header=F)
colnames(kegg.btex) <- c("A.BTEX","KO")
head(kegg.btex)
##        A.BTEX     KO
## 1 BTEX1_00001 K03304
## 2 BTEX1_00002 K01011
## 3 BTEX1_00003 K03321
## 4 BTEX1_00004       
## 5 BTEX1_00005       
## 6 BTEX1_00006 K08303
kegg.aqua <- read.csv("A.aquaeolei/user_ko.csv", stringsAsFactors = F, header=F)
colnames(kegg.aqua) <- c("A.aqua","KO")
head(kegg.aqua)
##       A.aqua KO
## 1 Aaqu_00001   
## 2 Aaqu_00002   
## 3 Aaqu_00003   
## 4 Aaqu_00004   
## 5 Aaqu_00005   
## 6 Aaqu_00006
kegg.2007 <- read.csv("Arhodomonas_2007/user_ko.csv", stringsAsFactors = F, header=F)
colnames(kegg.2007) <- c("A.2007","KO")
head(kegg.2007)
##           A.2007 KO
## 1    Orf1_3_1185   
## 2 Orf2_1178_1768   
## 3 Orf3_2076_3251   
## 4 Orf4_3347_3757   
## 5 Orf5_3864_4136   
## 6 Orf6_4329_4694

merge the annotations onto the full KO oxygenase list, count numbers of occurrences of each KO number, and join it all together

library(dplyr)
KO.oxy.aqua <- dplyr::left_join(oxy.KEGG.all.table, kegg.aqua, by = "KO")
by.KO.aqua <- KO.oxy.aqua %>% group_by(KO)
by.KO.aqua <- by.KO.aqua[complete.cases(by.KO.aqua),]
KO.count.oxy.aqua <- dplyr::summarise(by.KO.aqua, n = n())[c(1,2)]
KO.count.oxy.aqua <- KO.count.oxy.aqua[order(-KO.count.oxy.aqua$n),]
colnames(KO.count.oxy.aqua)[2] <- "A.aquaeoli"
kable(head(KO.count.oxy.aqua, n=20))
KO A.aquaeoli
K00446 2
K00471 2
K05712 2
K00451 1
K00457 1
K00459 1
K01633 1
K03185 1
K03863 1
K06134 1
K07215 1
K09461 1
K09906 1
K10674 1
K16242 1
K16243 1
K16244 1
K16245 1
K16246 1
K16249 1
KO.oxy.2007 <- dplyr::left_join(oxy.KEGG.all.table, kegg.2007, by = "KO")
by.KO.2007 <- KO.oxy.2007 %>% group_by(KO)
by.KO.2007 <- by.KO.2007[complete.cases(by.KO.2007),]
KO.count.oxy.2007 <- dplyr::summarise(by.KO.2007, n = n())[c(1,2)]
KO.count.oxy.2007 <- KO.count.oxy.2007[order(-KO.count.oxy.2007$n),]
colnames(KO.count.oxy.2007)[2] <- "A.2007"
kable(head(KO.count.oxy.2007, n=20))
KO A.2007
K00446 2
K00453 2
K00471 2
K00481 2
K10674 2
K16242 2
K16243 2
K16244 2
K16245 2
K16246 2
K16249 2
K00449 1
K00451 1
K00457 1
K01633 1
K03185 1
K05712 1
K06134 1
K07336 1
K09461 1
KO.oxy.BTEX <- dplyr::left_join(oxy.KEGG.all.table, kegg.btex, by = "KO")
by.KO.BTEX <- KO.oxy.BTEX %>% group_by(KO)
by.KO.BTEX <- by.KO.BTEX[complete.cases(by.KO.BTEX),]
KO.count.oxy.BTEX <- dplyr::summarise(by.KO.BTEX, n = n())[c(1,2)]
KO.count.oxy.BTEX <- KO.count.oxy.BTEX[order(-KO.count.oxy.BTEX$n),]
colnames(KO.count.oxy.BTEX)[2] <- "A.BTEX"
kable(head(KO.count.oxy.BTEX, n=20))
KO A.BTEX
K00446 2
K00471 2
K05712 2
K06134 2
K00451 1
K00455 1
K00457 1
K00459 1
K01633 1
K03185 1
K03863 1
K07215 1
K09461 1
K09906 1
K10674 1
K16242 1
K16243 1
K16244 1
K16245 1
K16246 1
KO.count.oxy <- dplyr::full_join(KO.count.oxy.aqua,KO.count.oxy.2007,by="KO")
KO.count.oxy <- dplyr::full_join(KO.count.oxy, KO.count.oxy.BTEX, by="KO")
KO.count.oxy <- dplyr::left_join(KO.count.oxy, oxy.KEGG.all.table)
KO.count.oxy[is.na(KO.count.oxy)] <- 0
KO.count.oxy$sum <- rowSums(KO.count.oxy[c(2,3,4)])
KO.count.oxy <- KO.count.oxy[order(-KO.count.oxy$sum),]
write.csv(KO.count.oxy, "Arhodomonas.KEGG.oxygenases.hydroxylases.csv",row.names = F)
head(KO.count.oxy)
## # A tibble: 6 x 6
##   KO     A.aquaeoli A.2007 A.BTEX description                           sum
##   <chr>       <dbl>  <dbl>  <dbl> <chr>                               <dbl>
## 1 K00446          2      2      2 dmpB, xylE; catechol 2,3-dioxygena…     6
## 2 K00471          2      2      2 E1.14.11.1; gamma-butyrobetaine di…     6
## 3 K05712          2      1      2 mhpA; 3-(3-hydroxy-phenyl)propiona…     5
## 4 K06134          1      1      2 COQ7; 3-demethoxyubiquinol 3-hydro…     4
## 5 K10674          1      2      1 ectD; ectoine hydroxylase [EC:1.14…     4
## 6 K16242          1      2      1 dmpN, poxD; phenol hydroxylase P3 …     4

This has been written to a file named “Arhodomonas.KEGG.oxygenases.hydroxylases.csv”.

Oxygenase heatmaps

library(heatmaply)
KO.count.oxy <- data.frame(KO.count.oxy, stringsAsFactors = F)
row.names(KO.count.oxy) <- KO.count.oxy$description
O.H.heat <- heatmaply(KO.count.oxy[c(2,3,4)], fontsize_row = 8, grid_color = "white", dendrogram = "none")
heatmaply(KO.count.oxy[c(2,3,4)], fontsize_row = 6, grid_color = "white", dendrogram = "none", 
          file = c("O.H.heatmap.html","O.H.heatmap.pdf","O.H.heatmap.png","O.H.heatmap.jpeg"))

It is important to note that the KEGG system has extensive annotation for aromatic dioxygenases (initial attack) and alkane monooxygenases: none were detected here, suggesting that Arhodomonas has no capacity to degrade primary petroleum components.

It is also important to note that this survey is not comprehensive. Because it is a text-based search of KO names, it relies upon clear naming of reactions. A more sound strategy would start rather with EC categories for the respective reaction classes (i.e. 1.14.x.x for oxygenase reactions involving dioxygen), but I have yet to locate a clean map of EC to any orthology system.

Venn Diagram

library(VennDiagram)
O.A.2007 <- as.list(by.KO.2007[1])
O.A.aqua <- as.list(by.KO.aqua[1])
O.A.btex <- as.list(by.KO.BTEX[1])

vd <- venn.diagram(c("A.2007"= O.A.2007, "A.aquaeoli" = O.A.aqua, "A.BTEX" = O.A.btex), filename = NULL, resolution = 150, height = 800, width = 800,
             fill = c("cornflowerblue", "green","darkorchid1"), main = "KEGG Oxygenases and hydroxylases in Arhodomonas Genomes",
             alpha = 0.2, main.cex = 1, cex = 1.4, cat.cex = 1, cat.dist = c(.05,.05,.04), main.pos = c(0.5,1.1))
jpeg("oxygenase.hyroxylase.vd.jpg")
grid.draw(vd)
dev.off()
## png 
##   2
png("oxygenase.hydroxylase.cd.png")
grid.draw(vd)
dev.off()
## png 
##   2