your caption
Which oxygenases are present in the Arhodomonas genomes? How do the number and types of oxygenase systems in A.BTEX compare to other Arhodomonads? Are there any types that are consistent with degradation of petroleum components?
There are 480 “oxygenase” and 165 “hydroxylase” gene families in the KEGG system: https://www.genome.jp/dbget-bin/www_bfind_sub?mode=bfind&max_hit=1000&locale=en&serv=kegg&dbkey=orthology&keywords=oxygenase&page=1
Import these and parse:
oxy.kegg.all <- read.csv("Arhodomonas.oxygenases/KEGG.all.oxygenase.txt", stringsAsFactors = F, sep="\t",
header = F)
hydroxy.kegg.all <- read.csv("../../../KEGG/KEGG.hydroxylases.txt", stringsAsFactors = F, sep="\t",
header = F)
oxy.kegg.all <- rbind(oxy.kegg.all, hydroxy.kegg.all)
rows = nrow(oxy.kegg.all)
oxy.KEGG.all.table <- data.frame(KO = character(), description = character(), stringsAsFactors = F)
for (x in c(0:(rows/2-1))) {
oxy.KEGG.all.table[x+1, 1] <- oxy.kegg.all[x*2+1, 1]
oxy.KEGG.all.table[x+1, 2] <- oxy.kegg.all[x*2+2, 1]
}
head(oxy.KEGG.all.table)
## KO
## 1 K00446
## 2 K00448
## 3 K00449
## 4 K00450
## 5 K00451
## 6 K00452
## description
## 1 dmpB, xylE; catechol 2,3-dioxygenase [EC:1.13.11.2]
## 2 pcaG; protocatechuate 3,4-dioxygenase, alpha subunit [EC:1.13.11.3]
## 3 pcaH; protocatechuate 3,4-dioxygenase, beta subunit [EC:1.13.11.3]
## 4 E1.13.11.4; gentisate 1,2-dioxygenase [EC:1.13.11.4]
## 5 HGD, hmgA; homogentisate 1,2-dioxygenase [EC:1.13.11.5]
## 6 HAAO; 3-hydroxyanthranilate 3,4-dioxygenase [EC:1.13.11.6]
This will cast a very will cast a very wide net: this will require hand curation
Read in the annotation tables / KO lists (from EggNOG) and limit it to KO numbers and locus names
kegg.btex <- read.csv("A.BTEX/user_ko.txt", stringsAsFactors = F, sep = "\t",header=F)
colnames(kegg.btex) <- c("A.BTEX","KO")
head(kegg.btex)
## A.BTEX KO
## 1 BTEX1_00001 K03304
## 2 BTEX1_00002 K01011
## 3 BTEX1_00003 K03321
## 4 BTEX1_00004
## 5 BTEX1_00005
## 6 BTEX1_00006 K08303
kegg.aqua <- read.csv("A.aquaeolei/user_ko.csv", stringsAsFactors = F, header=F)
colnames(kegg.aqua) <- c("A.aqua","KO")
head(kegg.aqua)
## A.aqua KO
## 1 Aaqu_00001
## 2 Aaqu_00002
## 3 Aaqu_00003
## 4 Aaqu_00004
## 5 Aaqu_00005
## 6 Aaqu_00006
kegg.2007 <- read.csv("Arhodomonas_2007/user_ko.csv", stringsAsFactors = F, header=F)
colnames(kegg.2007) <- c("A.2007","KO")
head(kegg.2007)
## A.2007 KO
## 1 Orf1_3_1185
## 2 Orf2_1178_1768
## 3 Orf3_2076_3251
## 4 Orf4_3347_3757
## 5 Orf5_3864_4136
## 6 Orf6_4329_4694
merge the annotations onto the full KO oxygenase list, count numbers of occurrences of each KO number, and join it all together
library(dplyr)
KO.oxy.aqua <- dplyr::left_join(oxy.KEGG.all.table, kegg.aqua, by = "KO")
by.KO.aqua <- KO.oxy.aqua %>% group_by(KO)
by.KO.aqua <- by.KO.aqua[complete.cases(by.KO.aqua),]
KO.count.oxy.aqua <- dplyr::summarise(by.KO.aqua, n = n())[c(1,2)]
KO.count.oxy.aqua <- KO.count.oxy.aqua[order(-KO.count.oxy.aqua$n),]
colnames(KO.count.oxy.aqua)[2] <- "A.aquaeoli"
kable(head(KO.count.oxy.aqua, n=20))
| KO | A.aquaeoli |
|---|---|
| K00446 | 2 |
| K00471 | 2 |
| K05712 | 2 |
| K00451 | 1 |
| K00457 | 1 |
| K00459 | 1 |
| K01633 | 1 |
| K03185 | 1 |
| K03863 | 1 |
| K06134 | 1 |
| K07215 | 1 |
| K09461 | 1 |
| K09906 | 1 |
| K10674 | 1 |
| K16242 | 1 |
| K16243 | 1 |
| K16244 | 1 |
| K16245 | 1 |
| K16246 | 1 |
| K16249 | 1 |
KO.oxy.2007 <- dplyr::left_join(oxy.KEGG.all.table, kegg.2007, by = "KO")
by.KO.2007 <- KO.oxy.2007 %>% group_by(KO)
by.KO.2007 <- by.KO.2007[complete.cases(by.KO.2007),]
KO.count.oxy.2007 <- dplyr::summarise(by.KO.2007, n = n())[c(1,2)]
KO.count.oxy.2007 <- KO.count.oxy.2007[order(-KO.count.oxy.2007$n),]
colnames(KO.count.oxy.2007)[2] <- "A.2007"
kable(head(KO.count.oxy.2007, n=20))
| KO | A.2007 |
|---|---|
| K00446 | 2 |
| K00453 | 2 |
| K00471 | 2 |
| K00481 | 2 |
| K10674 | 2 |
| K16242 | 2 |
| K16243 | 2 |
| K16244 | 2 |
| K16245 | 2 |
| K16246 | 2 |
| K16249 | 2 |
| K00449 | 1 |
| K00451 | 1 |
| K00457 | 1 |
| K01633 | 1 |
| K03185 | 1 |
| K05712 | 1 |
| K06134 | 1 |
| K07336 | 1 |
| K09461 | 1 |
KO.oxy.BTEX <- dplyr::left_join(oxy.KEGG.all.table, kegg.btex, by = "KO")
by.KO.BTEX <- KO.oxy.BTEX %>% group_by(KO)
by.KO.BTEX <- by.KO.BTEX[complete.cases(by.KO.BTEX),]
KO.count.oxy.BTEX <- dplyr::summarise(by.KO.BTEX, n = n())[c(1,2)]
KO.count.oxy.BTEX <- KO.count.oxy.BTEX[order(-KO.count.oxy.BTEX$n),]
colnames(KO.count.oxy.BTEX)[2] <- "A.BTEX"
kable(head(KO.count.oxy.BTEX, n=20))
| KO | A.BTEX |
|---|---|
| K00446 | 2 |
| K00471 | 2 |
| K05712 | 2 |
| K06134 | 2 |
| K00451 | 1 |
| K00455 | 1 |
| K00457 | 1 |
| K00459 | 1 |
| K01633 | 1 |
| K03185 | 1 |
| K03863 | 1 |
| K07215 | 1 |
| K09461 | 1 |
| K09906 | 1 |
| K10674 | 1 |
| K16242 | 1 |
| K16243 | 1 |
| K16244 | 1 |
| K16245 | 1 |
| K16246 | 1 |
KO.count.oxy <- dplyr::full_join(KO.count.oxy.aqua,KO.count.oxy.2007,by="KO")
KO.count.oxy <- dplyr::full_join(KO.count.oxy, KO.count.oxy.BTEX, by="KO")
KO.count.oxy <- dplyr::left_join(KO.count.oxy, oxy.KEGG.all.table)
KO.count.oxy[is.na(KO.count.oxy)] <- 0
KO.count.oxy$sum <- rowSums(KO.count.oxy[c(2,3,4)])
KO.count.oxy <- KO.count.oxy[order(-KO.count.oxy$sum),]
write.csv(KO.count.oxy, "Arhodomonas.KEGG.oxygenases.hydroxylases.csv",row.names = F)
head(KO.count.oxy)
## # A tibble: 6 x 6
## KO A.aquaeoli A.2007 A.BTEX description sum
## <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 K00446 2 2 2 dmpB, xylE; catechol 2,3-dioxygena… 6
## 2 K00471 2 2 2 E1.14.11.1; gamma-butyrobetaine di… 6
## 3 K05712 2 1 2 mhpA; 3-(3-hydroxy-phenyl)propiona… 5
## 4 K06134 1 1 2 COQ7; 3-demethoxyubiquinol 3-hydro… 4
## 5 K10674 1 2 1 ectD; ectoine hydroxylase [EC:1.14… 4
## 6 K16242 1 2 1 dmpN, poxD; phenol hydroxylase P3 … 4
This has been written to a file named “Arhodomonas.KEGG.oxygenases.hydroxylases.csv”.
library(heatmaply)
KO.count.oxy <- data.frame(KO.count.oxy, stringsAsFactors = F)
row.names(KO.count.oxy) <- KO.count.oxy$description
O.H.heat <- heatmaply(KO.count.oxy[c(2,3,4)], fontsize_row = 8, grid_color = "white", dendrogram = "none")
heatmaply(KO.count.oxy[c(2,3,4)], fontsize_row = 6, grid_color = "white", dendrogram = "none",
file = c("O.H.heatmap.html","O.H.heatmap.pdf","O.H.heatmap.png","O.H.heatmap.jpeg"))
It is important to note that the KEGG system has extensive annotation for aromatic dioxygenases (initial attack) and alkane monooxygenases: none were detected here, suggesting that Arhodomonas has no capacity to degrade primary petroleum components.
It is also important to note that this survey is not comprehensive. Because it is a text-based search of KO names, it relies upon clear naming of reactions. A more sound strategy would start rather with EC categories for the respective reaction classes (i.e. 1.14.x.x for oxygenase reactions involving dioxygen), but I have yet to locate a clean map of EC to any orthology system.
library(VennDiagram)
O.A.2007 <- as.list(by.KO.2007[1])
O.A.aqua <- as.list(by.KO.aqua[1])
O.A.btex <- as.list(by.KO.BTEX[1])
vd <- venn.diagram(c("A.2007"= O.A.2007, "A.aquaeoli" = O.A.aqua, "A.BTEX" = O.A.btex), filename = NULL, resolution = 150, height = 800, width = 800,
fill = c("cornflowerblue", "green","darkorchid1"), main = "KEGG Oxygenases and hydroxylases in Arhodomonas Genomes",
alpha = 0.2, main.cex = 1, cex = 1.4, cat.cex = 1, cat.dist = c(.05,.05,.04), main.pos = c(0.5,1.1))
jpeg("oxygenase.hyroxylase.vd.jpg")
grid.draw(vd)
dev.off()
## png
## 2
png("oxygenase.hydroxylase.cd.png")
grid.draw(vd)
dev.off()
## png
## 2