ユーザー定義関数
## 1.サンバーストチャート用のデータフレームへの変換
## input : kraken2krona.pyで変換済みのbracken_reportファイル
## output: サンバーストチャート用のデータフレーム
sunburst_k2 <- function(x, n){
# fun1
mmatch_sub <- function (pat, x, y, missing = NA) {
# Arguments check
if (length(pat) != length(y)) {
stop("the length of matched patterns and 'subst' must be the same")
}
# subfunction
mmatch <- function (pat, x, na.rm = FALSE) {
res <- sapply(pat, function(s) {
if (any(x %in% s)) {
which(x %in% s)
}
else {
NA
}
}, simplify = F)
if (na.rm == T)
res <- res[!is.na(res)]
res
}
# multiple match
x <- ifelse(x %in% pat, x, missing)
pos <- mmatch(pat, x, na.rm = F)
y <- y[!is.na(pos)]
pos <- pos[!is.na(pos)]
new_x <- x
invisible(lapply(seq_along(pos), function(i) new_x[pos[[i]]] <<- y[i]))
new_x
}
# fun2
mgrep <- function (pat, x, na.rm = FALSE, value = FALSE){
if (!is.atomic(x) | !is.atomic(pat)) stop("'x' and 'pat' are must be atomic.")
if (value == T) {
res <- sapply(pat, function(s) {
if (any(grep(s, x))) {
grep(s, x, value = T)
}
else {
NA
}
}, simplify = F)
}
else {
res <- sapply(pat, function(s) {
if (any(grep(s, x))) {
grep(s, x)
}
else {
NA
}
}, simplify = F)
}
if (na.rm == T) res <- res[!is.na(res)]
res
}
# library
suppressMessages(library(dplyr))
# Main
krna <- readLines(x)
rank <- c("kingdom","phylum","class","order","family","genus","species")
value <- sapply(krna, function(x)strsplit(x,"\t")) %>%
setNames(., NULL) %>%
.[sapply(., function(x)x[[2]]!="Unclassified")] %>%
sapply(., "[", 1) %>% as.numeric()
lineage <- sapply(krna, function(x)strsplit(x,"\t")) %>%
setNames(., NULL) %>%
.[sapply(., function(x)x[[2]]!="Unclassified")] %>%
sapply(., "[", -1) %>%
lapply(., function(x){
vstr <- c("p__Unclassified","c__Unclassified","o__Unclassified",
"f__Unclassified","g__Unclassified","s__Unclassified")
w <- c(x, vstr[is.na(mgrep(c("p__","c__","o__","f__","g__", "s__"),x))])
idx <-unlist(mgrep(c("k__", "p__","c__","o__","f__","g__", "s__"), w))
w[idx]
}) %>%
{data.frame(value=value, Reduce(rbind, .), row.names = NULL)} %>%
setNames(., c("value", rank)) %>%
filter(value > n)
## phylum
x <- lineage
punc <- unique(grep("Unclassified$",
paste(x$kingdom,x$phylum,sep=";"), value = T))
mx <- x
if(length(punc)>1){
vpunc <- paste0(punc, seq_along(punc))
mx$phylum <- sapply(
strsplit(mmatch_sub(punc, mx$phylum, vpunc, mx$phylum),";"),
function(x){x[length(x)]})
}
## class
cunc <- unique(grep("Unclassified$",
paste(x$kingdom,x$phylum,x$class, sep=";"), value = T))
if(length(cunc)>1){
vcunc <- paste0(cunc, seq_along(cunc))
mx <- mx %>% mutate(class = paste(kingdom,phylum,class,sep=";"))
mx$class <- sapply(
strsplit(mmatch_sub(cunc, mx$class, vcunc, mx$class),";"),
function(x){x[length(x)]})
}
## order
ounc <- unique(grep("Unclassified$",
paste(mx$kingdom,mx$phylum,mx$class,mx$order,sep=";"),value = T))
if(length(ounc)>1){
vounc <- paste0(ounc, seq_along(ounc))
mx <- mx %>% mutate(order = paste(kingdom,phylum,class,order,sep=";"))
mx$order <- sapply(
strsplit(mmatch_sub(ounc, mx$order, vounc, mx$order),";"),
function(x){x[length(x)]})
}
## family
func<-unique(grep("Unclassified$",
paste(mx$kingdom,mx$phylum,mx$class,mx$order,mx$family,sep=";"),value=T))
if(length(func)>1){
vfunc <- paste0(func, seq_along(func))
mx <- mx%>%mutate(family=paste(kingdom,phylum,class,order,family,sep=";"))
mx$family<-sapply(
strsplit(mmatch_sub(func,mx$family,vfunc,mx$family),";"),
function(x){x[length(x)]})
}
## genus
gunc<-unique(grep("Unclassified$",
paste(mx$kingdom,mx$phylum,mx$class,mx$order,mx$family,mx$genus,sep=";"),
value=T))
if(length(gunc)>1){
vgunc <- paste0(gunc, seq_along(gunc))
mx <- mx %>%
mutate(genus = paste(kingdom,phylum,class,order,family,genus,sep=";"))
mx$genus <- sapply(
strsplit(mmatch_sub(gunc, mx$genus, vgunc, mx$genus),";"),
function(x){x[length(x)]})
}
## species
sunc<-unique(grep("Unclassified$",
paste(mx$kingdom,mx$phylum,mx$class,mx$order,
mx$family,mx$genus,mx$species, sep=";"), value = T))
if(length(sunc)>1){
vsunc <- paste0(sunc, seq_along(sunc))
mx <- mx %>%
mutate(species=paste(kingdom,phylum,class,
order,family,genus,species,sep=";"))
mx$species <- sapply(
strsplit(mmatch_sub(sunc, mx$species, vsunc, mx$species),";"),
function(x){x[length(x)]})
}
# Create data-frame for sunburst
mx %>%
select(c("value","kingdom")) %>%
{tapply(.$value, .$kingdom, sum)} %>%
stack() %>%
mutate(parents=rep("", nrow(.)),
labels=sub("k__","Kingdom:",ind),
parents_phy=rep("", nrow(.))) %>%
select(c("values","labels","parents","parents_phy")) -> x0
mx %>% select(c("value","kingdom","phylum")) %>%
mutate(taxon=paste(kingdom,phylum,sep=";")) %>%
select(c("value","taxon")) %>%
{tapply(.$value, .$taxon, sum)} %>%
stack() %>%
mutate(ind=as.character(ind)) %>%
mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
mutate(parents=sub("k__", "Kingdom:", parents),
parents_phy=sub("p__", "Phylum:", parents_phy),
labels=sub("p__", "Phylum:", labels)) %>%
select(c("values","labels","parents","parents_phy")) -> x1
mx %>% select(c("value","kingdom","phylum","class")) %>%
mutate(taxon=paste(kingdom,phylum,class,sep=";")) %>%
select(c("value","taxon")) %>%
{tapply(.$value, .$taxon, sum)} %>%
stack() %>%
mutate(ind=as.character(ind)) %>%
mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
mutate(parents=sub("p__", "Phylum:", parents),
parents_phy=sub("p__", "Phylum:", parents_phy),
labels=sub("c__", "Class:", labels)) %>%
select(c("values","labels","parents","parents_phy")) -> x2
mx %>% select(c("value","kingdom","phylum","class","order")) %>%
mutate(taxon=paste(kingdom,phylum,class,order,sep=";")) %>%
select(c("value","taxon")) %>%
{tapply(.$value, .$taxon, sum)} %>%
stack() %>%
mutate(ind=as.character(ind)) %>%
mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
mutate(parents=sub("c__", "Class:", parents),
parents_phy=sub("p__", "Phylum:", parents_phy),
labels=sub("o__", "Order:", labels)) %>%
select(c("values","labels","parents","parents_phy")) -> x3
mx %>% select(c("value","kingdom","phylum","class","order","family")) %>%
mutate(taxon=paste(kingdom,phylum,class,order,family,sep=";")) %>%
select(c("value","taxon")) %>%
{tapply(.$value, .$taxon, sum)} %>%
stack() %>%
mutate(ind=as.character(ind)) %>%
mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
mutate(parents=sub("o__", "Order:", parents),
parents_phy=sub("p__", "Phylum:", parents_phy),
labels=sub("f__", "Family:", labels)) %>%
select(c("values","labels","parents","parents_phy")) -> x4
mx %>% select(c("value","kingdom","phylum","class","order","family","genus")) %>%
mutate(taxon=paste(kingdom,phylum,class,order,family,genus,sep=";")) %>%
select(c("value","taxon")) %>%
{tapply(.$value, .$taxon, sum)} %>%
stack() %>%
mutate(ind=as.character(ind)) %>%
mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
mutate(parents=sub("f__", "Family:", parents),
parents_phy=sub("p__", "Phylum:", parents_phy),
labels=sub("g__", "Genus:", labels)) %>%
select(c("values","labels","parents","parents_phy")) -> x5
mx %>% select(c("value","kingdom","phylum","class","order","family","genus","species")) %>%
mutate(taxon=paste(kingdom,phylum,class,order,family,genus,species,sep=";")) %>%
select(c("value","taxon")) %>%
{tapply(.$value, .$taxon, sum)} %>%
stack() %>%
mutate(ind=as.character(ind)) %>%
mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
mutate(parents=sub("g__", "Genus:", parents),
parents_phy=sub("p__", "Phylum:", parents_phy),
labels=sub("s__", "Species:", labels)) %>%
select(c("values","labels","parents","parents_phy")) -> x6
rbind(x0,x1,x2,x3,x4,x5,x6) %>% mutate(Rank=sub(":.*$","",labels))
}
## 2. plotlyでサンバーストチャート作成
sunburst_plty <- function(x, main){
res_plty <- plotly::plot_ly(labels=x$labels,
parents=x$parents,
values=x$values,
marker = list(colors = x$color),
type='sunburst',
branchvalues = 'total') %>%
plotly::layout(title = main)
return(res_plty)
}