library(rcdk)
## Loading required package: rcdklibs
## Loading required package: rJava
# 目的:计算“芳香环数”
# 思路:
# 1) 用 CDK Cycles API 找到一组环(默认 SSSR,也可选 relevant)
# 2) 对每个环检查:环内所有原子是否都已被标记为 aromatic
# 3) 满足者计为 1 个芳香环
#
# 注意:
# - 该函数假定 mol 已经完成:
#   set.atom.types(mol)
#   do.isotopes(mol)
#   do.aromaticity(mol)
# - 如果你更在意 fused/bridged 系统中的“唯一性”,可把 method 改成 "relevant"
aromatic_ring_count <- function(mol, method = c("sssr", "relevant")) {
  method <- match.arg(method)
  
  # 目的:通过 CDK Cycles API 获取环集合
  cycles <- switch(
    method,
    sssr = rJava::.jcall(
      "org/openscience/cdk/graph/Cycles",
      "Lorg/openscience/cdk/graph/Cycles;",
      "sssr",
      mol
    ),
    relevant = rJava::.jcall(
      "org/openscience/cdk/graph/Cycles",
      "Lorg/openscience/cdk/graph/Cycles;",
      "relevant",
      mol
    )
  )
  
  # 目的:把 cycles 转成 IRingSet,便于逐个 ring 读取
  ring_set <- rJava::.jcall(
    cycles,
    "Lorg/openscience/cdk/interfaces/IRingSet;",
    "toRingSet"
  )
  
  # 目的:获取环总数;如果没有环,直接返回 0
  n_rings <- rJava::.jcall(ring_set, "I", "getAtomContainerCount")
  if (n_rings == 0) return(0L)
  
  # 目的:逐个环检查“该环是否为芳香环”
  aromatic_n <- 0L
  for (i in seq_len(n_rings)) {
    
    # 目的:取出第 i 个环;Java 下标从 0 开始,所以要减 1
    ring <- rJava::.jcall(
      ring_set,
      "Lorg/openscience/cdk/interfaces/IAtomContainer;",
      "getAtomContainer",
      as.integer(i - 1)
    )
    
    # 目的:获取该环中的全部原子
    ring_atoms <- get.atoms(ring)
    
    # 目的:如果环中所有原子都被标记为 aromatic,则记作一个芳香环
    # 这里直接依赖 rcdk::is.aromatic(atom)
    if (all(vapply(ring_atoms, function(a) isTRUE(is.aromatic(a)), logical(1)))) {
      aromatic_n <- aromatic_n + 1L
    }
  }
  
  aromatic_n
}


check_druglikeness <- function(
    smiles,
    keep_largest_fragment = TRUE,
    allowed_elements = c("C", "H", "O", "N", "S", "F", "Cl", "Br", "I", "P"),
    aromatic_ring_method = c("sssr", "relevant")
) {
  aromatic_ring_method <- match.arg(aromatic_ring_method)
  
  analyze_one <- function(smi) {
    
    # 目的:构造统一的 NA 输出,保证报错时也能返回相同列结构
    na_row <- function(msg = NA_character_) {
      data.frame(
        SMILES = smi,
        Error = msg,
        PresentElements = NA_character_,
        pass_elements = NA,
        ExactMW = NA_real_,
        XLogP = NA_real_,
        HBD = NA_integer_,
        HBA = NA_integer_,
        HBD_HBA = NA_integer_,
        TPSA = NA_real_,
        RotBonds = NA_integer_,
        AromaticAtoms = NA_integer_,
        AromaticRings = NA_integer_,
        Fsp3 = NA_real_,
        FormalCharge = NA_integer_,
        TotalCharge = NA_real_,
        IsNeutral = NA,
        Ro5_violations = NA_integer_,
        pass_Ro5 = NA,
        pass_Veber_TPSA = NA,
        pass_Veber_HB = NA,
        pass_Veber = NA,
        Pfizer_3_75_alert = NA,
        pass_GSK_4_400 = NA,
        stringsAsFactors = FALSE
      )
    }
    
    # 目的:检查输入是否为空
    if (is.na(smi) || !nzchar(trimws(smi))) {
      return(na_row("Empty SMILES"))
    }
    
    # 目的:把 SMILES 解析成分子对象
    mol <- parse.smiles(smi, omit.nulls = FALSE)[[1]]
    if (is.null(mol)) {
      return(na_row("Invalid SMILES"))
    }
    
    out <- tryCatch({
      
      # 目的:如果是盐型/多组分结构,只保留最大连通组分
      if (keep_largest_fragment && !is.connected(mol)) {
        mol <- get.largest.component(mol)
      }
      
      # 目的:分子预处理,为后续 aromaticity / descriptor 计算做准备
      set.atom.types(mol)
      do.isotopes(mol)
      do.aromaticity(mol)
      
      # 目的:将隐式氢转为显式氢,提升部分描述符计算稳定性
      convert.implicit.to.explicit(mol)
      
      # 目的:显式加氢后重新配置
      set.atom.types(mol)
      do.isotopes(mol)
      do.aromaticity(mol)
      
      # 目的:提取原子与元素信息,用于元素过滤
      atoms <- get.atoms(mol)
      symbols <- vapply(atoms, get.symbol, character(1))
      present_elements <- paste(sort(unique(symbols)), collapse = ",")
      pass_elements <- all(symbols %in% allowed_elements)
      
      # 目的:集中计算常用描述符
      desc_names <- c(
        XLogP         = "org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor",
        HBD           = "org.openscience.cdk.qsar.descriptors.molecular.HBondDonorCountDescriptor",
        HBA           = "org.openscience.cdk.qsar.descriptors.molecular.HBondAcceptorCountDescriptor",
        TPSA          = "org.openscience.cdk.qsar.descriptors.molecular.TPSADescriptor",
        RotBonds      = "org.openscience.cdk.qsar.descriptors.molecular.RotatableBondsCountDescriptor",
        AromaticAtoms = "org.openscience.cdk.qsar.descriptors.molecular.AromaticAtomsCountDescriptor",
        Fsp3          = "org.openscience.cdk.qsar.descriptors.molecular.FractionalCSP3Descriptor"
      )
      
      desc <- eval.desc(list(mol), unname(desc_names))
      names(desc) <- names(desc_names)
      
      # 目的:转成普通数值,便于规则判断
      mw      <- as.numeric(get.mol.weight(mol))
      xlogp   <- as.numeric(desc$XLogP[1])
      hbd     <- as.integer(desc$HBD[1])
      hba     <- as.integer(desc$HBA[1])
      tpsa    <- as.numeric(desc$TPSA[1])
      rotb    <- as.integer(desc$RotBonds[1])
      aromatm <- as.integer(desc$AromaticAtoms[1])
      fsp3    <- as.numeric(desc$Fsp3[1])
      hbsum   <- if (all(!is.na(c(hbd, hba)))) hbd + hba else NA_integer_
      
      # 目的:真正计算“芳香环数”
      aromring <- aromatic_ring_count(mol, method = aromatic_ring_method)
      
      # 目的:计算电荷信息
      formal_charge <- as.integer(get.total.formal.charge(mol))
      total_charge  <- as.numeric(get.total.charge(mol))
      is_neutral    <- isTRUE(is.neutral(mol))
      
      # 目的:统计 Ro5 违反项数量
      ro5_violations <- if (all(!is.na(c(mw, xlogp, hbd, hba)))) {
        sum(c(mw > 500, xlogp > 5, hbd > 5, hba > 10))
      } else {
        NA_integer_
      }
      
      # 目的:Lipinski Rule of Five
      pass_Ro5 <- if (!is.na(ro5_violations)) {
        ro5_violations == 0
      } else {
        NA
      }
      
      # 目的:Veber(TPSA 版本)
      pass_Veber_TPSA <- if (all(!is.na(c(rotb, tpsa)))) {
        rotb <= 10 && tpsa <= 140
      } else {
        NA
      }
      
      # 目的:Veber(HBD+HBA 替代表达)
      pass_Veber_HB <- if (all(!is.na(c(rotb, hbsum)))) {
        rotb <= 10 && hbsum <= 12
      } else {
        NA
      }
      
      # 目的:给出总的 Veber 判断
      pass_Veber <- if (all(!is.na(c(rotb, tpsa, hbsum)))) {
        rotb <= 10 && (tpsa <= 140 || hbsum <= 12)
      } else {
        NA
      }
      
      # 目的:Pfizer 3/75 预警
      Pfizer_3_75_alert <- if (all(!is.na(c(xlogp, tpsa)))) {
        xlogp > 3 && tpsa < 75
      } else {
        NA
      }
      
      # 目的:GSK 4/400
      pass_GSK_4_400 <- if (all(!is.na(c(mw, xlogp)))) {
        mw <= 400 && xlogp <= 4
      } else {
        NA
      }
      
      # 目的:汇总输出
      data.frame(
        SMILES = smi,
        Error = NA_character_,
        PresentElements = present_elements,
        pass_elements = pass_elements,
        ExactMW = round(mw, 4),
        XLogP = round(xlogp, 3),
        HBD = hbd,
        HBA = hba,
        HBD_HBA = hbsum,
        TPSA = round(tpsa, 2),
        RotBonds = rotb,
        AromaticAtoms = aromatm,
        AromaticRings = aromring,
        Fsp3 = round(fsp3, 3),
        FormalCharge = formal_charge,
        TotalCharge = round(total_charge, 3),
        IsNeutral = is_neutral,
        Ro5_violations = ro5_violations,
        pass_Ro5 = pass_Ro5,
        pass_Veber_TPSA = pass_Veber_TPSA,
        pass_Veber_HB = pass_Veber_HB,
        pass_Veber = pass_Veber,
        Pfizer_3_75_alert = Pfizer_3_75_alert,
        pass_GSK_4_400 = pass_GSK_4_400,
        stringsAsFactors = FALSE
      )
      
    }, error = function(e) {
      na_row(conditionMessage(e))
    })
    
    out
  }
  
  # 目的:支持单个或多个 SMILES 批量输入
  if (!is.character(smiles) || length(smiles) < 1) {
    stop("Please provide one or more SMILES strings.")
  }
  
  res <- lapply(smiles, analyze_one)
  do.call(rbind, res)
}
######################################################################
check_druglikeness("c1ccccc1")                   # benzene
##     SMILES                                    Error PresentElements
## 1 c1ccccc1 could not find function "get.mol.weight"            <NA>
##   pass_elements ExactMW XLogP HBD HBA HBD_HBA TPSA RotBonds AromaticAtoms
## 1            NA      NA    NA  NA  NA      NA   NA       NA            NA
##   AromaticRings Fsp3 FormalCharge TotalCharge IsNeutral Ro5_violations pass_Ro5
## 1            NA   NA           NA          NA        NA             NA       NA
##   pass_Veber_TPSA pass_Veber_HB pass_Veber Pfizer_3_75_alert pass_GSK_4_400
## 1              NA            NA         NA                NA             NA
check_druglikeness("c1ccc2ccccc2c1")             # naphthalene
##           SMILES                                    Error PresentElements
## 1 c1ccc2ccccc2c1 could not find function "get.mol.weight"            <NA>
##   pass_elements ExactMW XLogP HBD HBA HBD_HBA TPSA RotBonds AromaticAtoms
## 1            NA      NA    NA  NA  NA      NA   NA       NA            NA
##   AromaticRings Fsp3 FormalCharge TotalCharge IsNeutral Ro5_violations pass_Ro5
## 1            NA   NA           NA          NA        NA             NA       NA
##   pass_Veber_TPSA pass_Veber_HB pass_Veber Pfizer_3_75_alert pass_GSK_4_400
## 1              NA            NA         NA                NA             NA
check_druglikeness(
  c("c1ccccc1", "c1ccc2ccccc2c1"),
  aromatic_ring_method = "sssr"
)
##           SMILES                                    Error PresentElements
## 1       c1ccccc1 could not find function "get.mol.weight"            <NA>
## 2 c1ccc2ccccc2c1 could not find function "get.mol.weight"            <NA>
##   pass_elements ExactMW XLogP HBD HBA HBD_HBA TPSA RotBonds AromaticAtoms
## 1            NA      NA    NA  NA  NA      NA   NA       NA            NA
## 2            NA      NA    NA  NA  NA      NA   NA       NA            NA
##   AromaticRings Fsp3 FormalCharge TotalCharge IsNeutral Ro5_violations pass_Ro5
## 1            NA   NA           NA          NA        NA             NA       NA
## 2            NA   NA           NA          NA        NA             NA       NA
##   pass_Veber_TPSA pass_Veber_HB pass_Veber Pfizer_3_75_alert pass_GSK_4_400
## 1              NA            NA         NA                NA             NA
## 2              NA            NA         NA                NA             NA
# 如果你更想要“更唯一”的环集合定义:
check_druglikeness(
  "c1ccc2ccccc2c1",
  aromatic_ring_method = "relevant"
)
##           SMILES                                    Error PresentElements
## 1 c1ccc2ccccc2c1 could not find function "get.mol.weight"            <NA>
##   pass_elements ExactMW XLogP HBD HBA HBD_HBA TPSA RotBonds AromaticAtoms
## 1            NA      NA    NA  NA  NA      NA   NA       NA            NA
##   AromaticRings Fsp3 FormalCharge TotalCharge IsNeutral Ro5_violations pass_Ro5
## 1            NA   NA           NA          NA        NA             NA       NA
##   pass_Veber_TPSA pass_Veber_HB pass_Veber Pfizer_3_75_alert pass_GSK_4_400
## 1              NA            NA         NA                NA             NA
smiles_list <- c("CC(=O)OC1=CC=CC=C1C(=O)O", "CC(=O)OC1", "CCO") 


check_druglikeness(
  smiles_list,
  aromatic_ring_method = "relevant"
)
## Warning in parse.smiles(smi, omit.nulls = FALSE): 1 out of 1 SMILES were not
## successfully parsed, resulting in NULLs.
##                     SMILES                                    Error
## 1 CC(=O)OC1=CC=CC=C1C(=O)O could not find function "get.mol.weight"
## 2                CC(=O)OC1                           Invalid SMILES
## 3                      CCO could not find function "get.mol.weight"
##   PresentElements pass_elements ExactMW XLogP HBD HBA HBD_HBA TPSA RotBonds
## 1            <NA>            NA      NA    NA  NA  NA      NA   NA       NA
## 2            <NA>            NA      NA    NA  NA  NA      NA   NA       NA
## 3            <NA>            NA      NA    NA  NA  NA      NA   NA       NA
##   AromaticAtoms AromaticRings Fsp3 FormalCharge TotalCharge IsNeutral
## 1            NA            NA   NA           NA          NA        NA
## 2            NA            NA   NA           NA          NA        NA
## 3            NA            NA   NA           NA          NA        NA
##   Ro5_violations pass_Ro5 pass_Veber_TPSA pass_Veber_HB pass_Veber
## 1             NA       NA              NA            NA         NA
## 2             NA       NA              NA            NA         NA
## 3             NA       NA              NA            NA         NA
##   Pfizer_3_75_alert pass_GSK_4_400
## 1                NA             NA
## 2                NA             NA
## 3                NA             NA
###################################################
#Rule                       ColumnQuestion           Answered
#Element Filter             pass_elements            Does the molecule contain only conventional medicinal elements? Molecules containing rare or heavy metals are generally unsuitable for drug development.
#Lipinski Rule of Five      pass_Ro5                 Can the molecule be passively absorbed in the gastrointestinal tract? The classic predictor of oral bioavailability.
#Veber Rules                pass_Veber               Does the molecule have sufficient membrane permeability and metabolic stability in vivo? Complements Ro5, especially for macrocycles and peptide-like structures.
#Pfizer 3/75               AlertPfizer_3_75_alert    Does the molecule fall into the "high lipophilicity + low polarity" danger zone? This region is strongly associated with elevated cardiotoxicity and hepatotoxicity risk.
#GSK 4/400 Rule            pass_GSK_4_400            Is the molecule sufficiently small and light? Lighter molecules tend to exhibit better selectivity and lower toxicity risk.
####################################################