rm(list = ls())
library(rcdk)
## Warning: 程序包'rcdk'是用R版本4.4.3 来建造的
## 载入需要的程序包:rcdklibs
## 载入需要的程序包:rJava
#
desalt_smiles <- function(smiles) {
  mols <- parse.smiles(smiles)
  if (length(mols) == 0 || is.null(mols[[1]])) return(NA)
  fragments <- strsplit(smiles, "\\.")[[1]]
  parsed <- lapply(fragments, function(frag) parse.smiles(frag)[[1]])
  parsed <- Filter(Negate(is.null), parsed)
  
  if (length(parsed) == 0) return(NA)
  
  atom_counts <- sapply(parsed, function(mol) length(get.atoms(mol)))
  largest_index <- which.max(atom_counts)
  largest_mol <- parsed[[largest_index]]
  
  return(get.smiles(largest_mol))
}

# example
smiles_vec <- c("CC[NH+](C)C.Cl", "CN.CN.Cl", "C1=CC=CC=C1", "C(=O)(O)[O-].[Na+]")

data.frame(sapply(smiles_vec, desalt_smiles))
##                    sapply.smiles_vec..desalt_smiles.
## CC[NH+](C)C.Cl                           CC[NH+](C)C
## CN.CN.Cl                                          CN
## C1=CC=CC=C1                              C1=CC=CC=C1
## C(=O)(O)[O-].[Na+]                      C(=O)(O)[O-]