rm(list = ls())
library(rcdk)
## Warning: 程序包'rcdk'是用R版本4.4.3 来建造的
## 载入需要的程序包:rcdklibs
## 载入需要的程序包:rJava
#
desalt_smiles <- function(smiles) {
mols <- parse.smiles(smiles)
if (length(mols) == 0 || is.null(mols[[1]])) return(NA)
fragments <- strsplit(smiles, "\\.")[[1]]
parsed <- lapply(fragments, function(frag) parse.smiles(frag)[[1]])
parsed <- Filter(Negate(is.null), parsed)
if (length(parsed) == 0) return(NA)
atom_counts <- sapply(parsed, function(mol) length(get.atoms(mol)))
largest_index <- which.max(atom_counts)
largest_mol <- parsed[[largest_index]]
return(get.smiles(largest_mol))
}
# example
smiles_vec <- c("CC[NH+](C)C.Cl", "CN.CN.Cl", "C1=CC=CC=C1", "C(=O)(O)[O-].[Na+]")
data.frame(sapply(smiles_vec, desalt_smiles))
## sapply.smiles_vec..desalt_smiles.
## CC[NH+](C)C.Cl CC[NH+](C)C
## CN.CN.Cl CN
## C1=CC=CC=C1 C1=CC=CC=C1
## C(=O)(O)[O-].[Na+] C(=O)(O)[O-]