1. サンバーストチャート用のデータフレーム

dat <- data.frame(
  values=c(1000,600,300,100,250,150,200,250,50,100), 
  labels=c("Domain_A","Phylum_A","Phylum_B","Phylum_C",
           "Class_A","Class_B","Class_C","Class_D","Class_E","Class_F"), 
  parents=c("",rep("Domain_A",3),rep(c("Phylum_A","Phylum_B","Phylum_C"),c(3,2,1))), 
  color=c("#E5E5E5","#FFA500","#0000FF", "#FF0000", rep("",6))
  )
dat
##    values   labels  parents   color
## 1    1000 Domain_A          #E5E5E5
## 2     600 Phylum_A Domain_A #FFA500
## 3     300 Phylum_B Domain_A #0000FF
## 4     100 Phylum_C Domain_A #FF0000
## 5     250  Class_A Phylum_A        
## 6     150  Class_B Phylum_A        
## 7     200  Class_C Phylum_A        
## 8     250  Class_D Phylum_B        
## 9      50  Class_E Phylum_B        
## 10    100  Class_F Phylum_C

2. plotlyで描画

library(dplyr)
x <- plotly::plot_ly(labels=dat$labels, 
                parents=dat$parents, 
                values=dat$values, 
                marker = list(colors = dat$color),
                type='sunburst', 
                branchvalues = 'total') 
x %>%  plotly::layout(x, title = "Sample")
# 複数のplotlyオブジェクトを描画する場合
# l <- htmltools::tagList()
# for (i in seq_along(dats)) {
#   x <- function_plty(dats[[i]], main = titles[i])
#   l[[i]] <- plotly::as_widget(x[[1]])
# }
# l

3. 系統組成表からサンバーストチャート作成

ユーザー定義関数

## 1.サンバーストチャート用のデータフレームへの変換
##   input : kraken2krona.pyで変換済みのbracken_reportファイル
##   output: サンバーストチャート用のデータフレーム
sunburst_k2 <- function(x, n){
  # fun1
  mmatch_sub <- function (pat, x, y, missing = NA) {
    # Arguments check
    if (length(pat) != length(y)) {
      stop("the length of matched patterns and 'subst' must be the same")
    }
    # subfunction
    mmatch <- function (pat, x, na.rm = FALSE) {
      res <- sapply(pat, function(s) {
        if (any(x %in% s)) {
          which(x %in% s)
        }
        else {
          NA
        }
      }, simplify = F)
      if (na.rm == T)
        res <- res[!is.na(res)]
      res
    }
    # multiple match
    x <- ifelse(x %in% pat, x, missing)
    pos <- mmatch(pat, x, na.rm = F)
    y <- y[!is.na(pos)]
    pos <- pos[!is.na(pos)]
    new_x <- x
    invisible(lapply(seq_along(pos), function(i) new_x[pos[[i]]] <<- y[i]))
    new_x
  }
  # fun2
  mgrep <- function (pat, x, na.rm = FALSE, value = FALSE){
    if (!is.atomic(x) | !is.atomic(pat)) stop("'x' and 'pat' are must be atomic.")
    if (value == T) {
        res <- sapply(pat, function(s) {
            if (any(grep(s, x))) {
                grep(s, x, value = T)
            }
            else {
                NA
            }
        }, simplify = F)
    }
    else {
        res <- sapply(pat, function(s) {
            if (any(grep(s, x))) {
                grep(s, x)
            }
            else {
                NA
            }
        }, simplify = F)
    }
    if (na.rm == T) res <- res[!is.na(res)]
    res
  }
  # library
  suppressMessages(library(dplyr))
  # Main
  krna <- readLines(x)
  rank <- c("kingdom","phylum","class","order","family","genus","species")
  value <- sapply(krna, function(x)strsplit(x,"\t")) %>%
    setNames(., NULL) %>%
   .[sapply(., function(x)x[[2]]!="Unclassified")] %>%
    sapply(., "[", 1) %>% as.numeric()

  lineage <- sapply(krna, function(x)strsplit(x,"\t")) %>%
    setNames(., NULL) %>%
   .[sapply(., function(x)x[[2]]!="Unclassified")] %>%
    sapply(., "[", -1) %>%
    lapply(., function(x){
      vstr <- c("p__Unclassified","c__Unclassified","o__Unclassified",
                "f__Unclassified","g__Unclassified","s__Unclassified")
      w <- c(x, vstr[is.na(mgrep(c("p__","c__","o__","f__","g__", "s__"),x))])
      idx <-unlist(mgrep(c("k__", "p__","c__","o__","f__","g__", "s__"), w))
      w[idx]
    }) %>%
    {data.frame(value=value, Reduce(rbind, .), row.names = NULL)} %>%
    setNames(., c("value", rank)) %>%
    filter(value > n)

  ## phylum
  x <- lineage
  punc <- unique(grep("Unclassified$",
            paste(x$kingdom,x$phylum,sep=";"), value = T))
  mx <- x
  if(length(punc)>1){
    vpunc <- paste0(punc, seq_along(punc))
    mx$phylum <- sapply(
      strsplit(mmatch_sub(punc, mx$phylum, vpunc, mx$phylum),";"),
      function(x){x[length(x)]})
  }
  ## class
  cunc <- unique(grep("Unclassified$",
            paste(x$kingdom,x$phylum,x$class, sep=";"), value = T))
  if(length(cunc)>1){
    vcunc <- paste0(cunc, seq_along(cunc))
    mx <- mx %>% mutate(class = paste(kingdom,phylum,class,sep=";"))
    mx$class <- sapply(
      strsplit(mmatch_sub(cunc, mx$class, vcunc, mx$class),";"),
      function(x){x[length(x)]})
  }
  ## order
  ounc <- unique(grep("Unclassified$",
            paste(mx$kingdom,mx$phylum,mx$class,mx$order,sep=";"),value = T))
  if(length(ounc)>1){
    vounc <- paste0(ounc, seq_along(ounc))
    mx <- mx %>% mutate(order = paste(kingdom,phylum,class,order,sep=";"))
    mx$order <- sapply(
      strsplit(mmatch_sub(ounc, mx$order, vounc, mx$order),";"),
      function(x){x[length(x)]})
  }
  ## family
  func<-unique(grep("Unclassified$",
    paste(mx$kingdom,mx$phylum,mx$class,mx$order,mx$family,sep=";"),value=T))
  if(length(func)>1){
   vfunc <- paste0(func, seq_along(func))
   mx <- mx%>%mutate(family=paste(kingdom,phylum,class,order,family,sep=";"))
   mx$family<-sapply(
     strsplit(mmatch_sub(func,mx$family,vfunc,mx$family),";"),
     function(x){x[length(x)]})
  }
  ## genus
  gunc<-unique(grep("Unclassified$",
   paste(mx$kingdom,mx$phylum,mx$class,mx$order,mx$family,mx$genus,sep=";"),
   value=T))
  if(length(gunc)>1){
   vgunc <- paste0(gunc, seq_along(gunc))
   mx <- mx %>%
     mutate(genus = paste(kingdom,phylum,class,order,family,genus,sep=";"))
   mx$genus <- sapply(
      strsplit(mmatch_sub(gunc, mx$genus, vgunc, mx$genus),";"),
      function(x){x[length(x)]})
  }
  ## species
  sunc<-unique(grep("Unclassified$",
   paste(mx$kingdom,mx$phylum,mx$class,mx$order,
         mx$family,mx$genus,mx$species, sep=";"), value = T))
  if(length(sunc)>1){
    vsunc <- paste0(sunc, seq_along(sunc))
    mx <- mx %>%
      mutate(species=paste(kingdom,phylum,class,
                           order,family,genus,species,sep=";"))
    mx$species <- sapply(
      strsplit(mmatch_sub(sunc, mx$species, vsunc, mx$species),";"),
      function(x){x[length(x)]})
  }

  # Create data-frame for sunburst
  mx %>%
    select(c("value","kingdom")) %>%
    {tapply(.$value, .$kingdom, sum)} %>%
    stack() %>%
    mutate(parents=rep("", nrow(.)),
           labels=sub("k__","Kingdom:",ind),
           parents_phy=rep("", nrow(.))) %>%
    select(c("values","labels","parents","parents_phy")) -> x0

  mx %>% select(c("value","kingdom","phylum")) %>%
    mutate(taxon=paste(kingdom,phylum,sep=";")) %>%
    select(c("value","taxon")) %>%
    {tapply(.$value, .$taxon, sum)} %>%
    stack() %>%
    mutate(ind=as.character(ind)) %>%
    mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
           parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
           labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
    mutate(parents=sub("k__", "Kingdom:", parents),
           parents_phy=sub("p__", "Phylum:", parents_phy),
           labels=sub("p__", "Phylum:", labels)) %>%
    select(c("values","labels","parents","parents_phy")) -> x1

  mx %>% select(c("value","kingdom","phylum","class")) %>%
    mutate(taxon=paste(kingdom,phylum,class,sep=";")) %>%
    select(c("value","taxon")) %>%
    {tapply(.$value, .$taxon, sum)} %>%
    stack() %>%
    mutate(ind=as.character(ind)) %>%
    mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
           parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
           labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
    mutate(parents=sub("p__", "Phylum:", parents),
           parents_phy=sub("p__", "Phylum:", parents_phy),
           labels=sub("c__", "Class:", labels)) %>%
    select(c("values","labels","parents","parents_phy")) -> x2

  mx %>% select(c("value","kingdom","phylum","class","order")) %>%
    mutate(taxon=paste(kingdom,phylum,class,order,sep=";")) %>%
    select(c("value","taxon")) %>%
    {tapply(.$value, .$taxon, sum)} %>%
    stack() %>%
    mutate(ind=as.character(ind)) %>%
    mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
           parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
           labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
    mutate(parents=sub("c__", "Class:", parents),
           parents_phy=sub("p__", "Phylum:", parents_phy),
           labels=sub("o__", "Order:", labels)) %>%
    select(c("values","labels","parents","parents_phy")) -> x3

  mx %>% select(c("value","kingdom","phylum","class","order","family")) %>%
    mutate(taxon=paste(kingdom,phylum,class,order,family,sep=";")) %>%
    select(c("value","taxon")) %>%
    {tapply(.$value, .$taxon, sum)} %>%
    stack() %>%
    mutate(ind=as.character(ind)) %>%
    mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
           parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
           labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
    mutate(parents=sub("o__", "Order:", parents),
           parents_phy=sub("p__", "Phylum:", parents_phy),
           labels=sub("f__", "Family:", labels)) %>%
    select(c("values","labels","parents","parents_phy")) -> x4

  mx %>% select(c("value","kingdom","phylum","class","order","family","genus")) %>%
    mutate(taxon=paste(kingdom,phylum,class,order,family,genus,sep=";")) %>%
    select(c("value","taxon")) %>%
    {tapply(.$value, .$taxon, sum)} %>%
    stack() %>%
    mutate(ind=as.character(ind)) %>%
    mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
           parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
           labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
    mutate(parents=sub("f__", "Family:", parents),
           parents_phy=sub("p__", "Phylum:", parents_phy),
           labels=sub("g__", "Genus:", labels)) %>%
    select(c("values","labels","parents","parents_phy")) -> x5
  
    mx %>% select(c("value","kingdom","phylum","class","order","family","genus","species")) %>%
    mutate(taxon=paste(kingdom,phylum,class,order,family,genus,species,sep=";")) %>%
    select(c("value","taxon")) %>%
    {tapply(.$value, .$taxon, sum)} %>%
    stack() %>%
    mutate(ind=as.character(ind)) %>%
    mutate(parents=sapply(strsplit(ind,";"),function(x){x[length(x)-1]}),
           parents_phy=sapply(strsplit(ind,";"),function(x){x[2]}),
           labels=sapply(strsplit(ind,";"),function(x){x[length(x)]})) %>%
    mutate(parents=sub("g__", "Genus:", parents),
           parents_phy=sub("p__", "Phylum:", parents_phy),
           labels=sub("s__", "Species:", labels)) %>%
    select(c("values","labels","parents","parents_phy")) -> x6

    rbind(x0,x1,x2,x3,x4,x5,x6) %>%  mutate(Rank=sub(":.*$","",labels))

}

## 2. plotlyでサンバーストチャート作成
sunburst_plty <- function(x, main){
  res_plty <- plotly::plot_ly(labels=x$labels,
                              parents=x$parents,
                              values=x$values,
                              marker = list(colors = x$color),
                              type='sunburst',
                              branchvalues = 'total') %>%
    plotly::layout(title = main)
  return(res_plty)
}

データ

  • kraken2のレポートファイルをあらかじめkreport2krona.pyで変換しておく.
    • kreport2krona.py --no-intermediate-ranks -r k2.report -o input.krona
in_k2 <- "./data/input.krona"
k2dat <- readLines(in_k2)
head(k2dat)
## [1] "12228\tUnclassified"                                                                  
## [2] "30075\tk__Bacteria"                                                                   
## [3] "12484\tk__Bacteria\tp__Bacillota"                                                     
## [4] "63\tk__Bacteria\tp__Bacillota\tc__Clostridia"                                         
## [5] "13216\tk__Bacteria\tp__Bacillota\tc__Clostridia\to__Eubacteriales"                    
## [6] "14848\tk__Bacteria\tp__Bacillota\tc__Clostridia\to__Eubacteriales\tf__Lachnospiraceae"

plotly用に変換

# plotlyに読み込ませるようなdfに変換する
sb_dat <- sunburst_k2(in_k2, 10)
head(sb_dat)
##    values                  labels          parents             parents_phy
## 1 1889125        Kingdom:Bacteria                                         
## 2  249036   Phylum:Actinomycetota Kingdom:Bacteria   Phylum:Actinomycetota
## 3  982918        Phylum:Bacillota Kingdom:Bacteria        Phylum:Bacillota
## 4  280841     Phylum:Bacteroidota Kingdom:Bacteria     Phylum:Bacteroidota
## 5      39 Phylum:Campylobacterota Kingdom:Bacteria Phylum:Campylobacterota
## 6  267322   Phylum:Pseudomonadota Kingdom:Bacteria   Phylum:Pseudomonadota
##      Rank
## 1 Kingdom
## 2  Phylum
## 3  Phylum
## 4  Phylum
## 5  Phylum
## 6  Phylum

plotly

sunburst_plty(sb_dat, main="sample")

krona

  • kronaにインポート
    • ktImportText input.krona -o output.html
  • htmlファイルの相対パスを記述 output.html