diff --git a/Cargo.toml b/Cargo.toml index 1086859..93aeeb6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rdkit" version = "0.4.12" -edition = "2021" +edition = "2024" authors = ["Xavier Lange ", "Javier Pineda = SMILES_SET + .iter() + .map(|s| ROMol::from_smiles(s).unwrap()) + .collect(); + + b.iter(|| { + for mol in &mols { + let n = mol.num_atoms(true); + for i in 0..n { + let atom = mol.atom_ref(i); + test::black_box(atom.symbol()); + test::black_box(atom.get_atomic_num()); + test::black_box(atom.get_formal_charge()); + test::black_box(atom.get_is_aromatic()); + test::black_box(atom.get_hybridization_type()); + test::black_box(atom.get_degree()); + test::black_box(atom.get_total_num_hs()); + } + } + }); +} + +/// Same workload via atom_with_idx (&mut self). +/// Regression guard: should be the same speed as atom_ref. +#[bench] +fn bench_atom_mut_all_properties(b: &mut test::bench::Bencher) { + let mut mols: Vec = SMILES_SET + .iter() + .map(|s| ROMol::from_smiles(s).unwrap()) + .collect(); + + b.iter(|| { + for mol in &mut mols { + let n = mol.num_atoms(true); + for i in 0..n { + let atom = mol.atom_with_idx(i); + test::black_box(atom.symbol()); + test::black_box(atom.get_atomic_num()); + test::black_box(atom.get_formal_charge()); + test::black_box(atom.get_is_aromatic()); + test::black_box(atom.get_hybridization_type()); + test::black_box(atom.get_degree()); + test::black_box(atom.get_total_num_hs()); + } + } + }); +} + +/// Clone cost alone. Useful for understanding the cost of cloning +/// molecules when only &ROMol is available but mutation is needed. +#[bench] +fn bench_clone_molecules(b: &mut test::bench::Bencher) { + let mols: Vec = SMILES_SET + .iter() + .map(|s| ROMol::from_smiles(s).unwrap()) + .collect(); + + b.iter(|| { + for mol in &mols { + test::black_box(mol.clone()); + } + }); +} diff --git a/rdkit-sys/Cargo.toml b/rdkit-sys/Cargo.toml index 722c75a..553f31b 100644 --- a/rdkit-sys/Cargo.toml +++ b/rdkit-sys/Cargo.toml @@ -2,7 +2,7 @@ name = "rdkit-sys" authors = ["Xavier Lange (xrlange@gmail.com)", "chrissly31415"] version = "0.4.12" -edition = "2021" +edition = "2024" license = "MIT" description = "RDKit CFFI library builder and bindings" repository = "https://github.com/rdkit-rs/rdkit/tree/main/rdkit-sys" @@ -14,9 +14,9 @@ exclude = ["rdkit-*", "*.tar.gz", "examples/"] cxx = "1.0.109" [build-dependencies] -env_logger = "0.10.0" +env_logger = "0.11" cxx-build = "1.0.109" -which = "4.4.2" +which = "8" [features] default = [] diff --git a/rdkit-sys/build.rs b/rdkit-sys/build.rs index 782861d..d5ba17e 100644 --- a/rdkit-sys/build.rs +++ b/rdkit-sys/build.rs @@ -1,4 +1,4 @@ -const CPP_VERSION_FLAG: &str = "-std=c++17"; +const CPP_VERSION_FLAG: &str = "-std=c++20"; fn main() { if std::env::var("DOCS_RS").is_ok() { diff --git a/rdkit-sys/rustfmt.toml b/rdkit-sys/rustfmt.toml index 7de3b73..6edb4d8 100644 --- a/rdkit-sys/rustfmt.toml +++ b/rdkit-sys/rustfmt.toml @@ -48,8 +48,8 @@ trailing_comma = "Vertical" match_block_trailing_comma = false blank_lines_upper_bound = 1 blank_lines_lower_bound = 0 -edition = "2021" -version = "One" +edition = "2024" +style_edition = "2024" inline_attribute_width = 0 format_generated_files = true merge_derives = true diff --git a/rdkit-sys/src/bridge/descriptors.rs b/rdkit-sys/src/bridge/descriptors.rs index ec0b2c1..a79dac4 100644 --- a/rdkit-sys/src/bridge/descriptors.rs +++ b/rdkit-sys/src/bridge/descriptors.rs @@ -15,5 +15,27 @@ pub mod ffi { properties: &SharedPtr, mol: &SharedPtr, ) -> UniquePtr>; + + // Targeted descriptors + pub fn calc_exact_mw(mol: &SharedPtr) -> f64; + pub fn calc_amw(mol: &SharedPtr) -> f64; + pub fn calc_mol_formula(mol: &SharedPtr) -> String; + pub fn calc_num_heavy_atoms(mol: &SharedPtr) -> u32; + pub fn calc_fraction_csp3(mol: &SharedPtr) -> f64; + pub fn calc_labute_asa(mol: &SharedPtr) -> f64; + pub fn calc_tpsa(mol: &SharedPtr) -> f64; + pub fn calc_clog_p(mol: &SharedPtr) -> f64; + pub fn calc_num_hbd(mol: &SharedPtr) -> u32; + pub fn calc_num_hba(mol: &SharedPtr) -> u32; + pub fn calc_num_rotatable_bonds(mol: &SharedPtr) -> u32; + pub fn calc_num_amide_bonds(mol: &SharedPtr) -> u32; + pub fn calc_num_heteroatoms(mol: &SharedPtr) -> u32; + pub fn calc_num_aromatic_rings(mol: &SharedPtr) -> u32; + pub fn calc_num_aliphatic_rings(mol: &SharedPtr) -> u32; + pub fn calc_num_saturated_rings(mol: &SharedPtr) -> u32; + pub fn calc_num_heterocycles(mol: &SharedPtr) -> u32; + pub fn calc_num_aromatic_heterocycles(mol: &SharedPtr) -> u32; + pub fn calc_num_spiro_atoms(mol: &SharedPtr) -> u32; + pub fn calc_num_bridgehead_atoms(mol: &SharedPtr) -> u32; } } diff --git a/rdkit-sys/src/bridge/mod.rs b/rdkit-sys/src/bridge/mod.rs index 1593316..24d4ba8 100644 --- a/rdkit-sys/src/bridge/mod.rs +++ b/rdkit-sys/src/bridge/mod.rs @@ -11,7 +11,10 @@ mod mol_standardize; pub use mol_standardize::ffi as mol_standardize_ffi; mod periodic_table; -pub use periodic_table::{ffi as periodic_table_ffi, PeriodicTableOps}; +pub use periodic_table::{PeriodicTableOps, ffi as periodic_table_ffi}; + +mod ring_info; +pub use ring_info::ffi as ring_info_ffi; mod ro_mol; pub use ro_mol::ffi as ro_mol_ffi; diff --git a/rdkit-sys/src/bridge/periodic_table.rs b/rdkit-sys/src/bridge/periodic_table.rs index b08b1fb..21e0dfa 100644 --- a/rdkit-sys/src/bridge/periodic_table.rs +++ b/rdkit-sys/src/bridge/periodic_table.rs @@ -43,7 +43,7 @@ pub trait PeriodicTableOps { fn getElementName(self, atomic_number: u32) -> String; fn getValenceList(self, atomic_number: u32) -> &'static CxxVector; } -impl<'a> PeriodicTableOps for UniquePtr { +impl PeriodicTableOps for UniquePtr { fn getElementSymbol(self, atomic_number: u32) -> String { ffi::getElementSymbol(atomic_number) } diff --git a/rdkit-sys/src/bridge/ring_info.rs b/rdkit-sys/src/bridge/ring_info.rs new file mode 100644 index 0000000..79a86be --- /dev/null +++ b/rdkit-sys/src/bridge/ring_info.rs @@ -0,0 +1,30 @@ +#[cxx::bridge(namespace = "RDKit")] +pub mod ffi { + unsafe extern "C++" { + include!("wrapper/include/ring_info.h"); + + pub type ROMol = crate::ro_mol_ffi::ROMol; + + pub fn mol_num_rings(mol: &SharedPtr) -> u32; + pub fn mol_is_atom_in_ring_of_size( + mol: &SharedPtr, + atom_idx: u32, + size: u32, + ) -> bool; + pub fn mol_is_bond_in_ring_of_size( + mol: &SharedPtr, + bond_idx: u32, + size: u32, + ) -> bool; + pub fn mol_num_atom_rings(mol: &SharedPtr, atom_idx: u32) -> u32; + pub fn mol_num_bond_rings(mol: &SharedPtr, bond_idx: u32) -> u32; + pub fn mol_atom_ring_sizes( + mol: &SharedPtr, + atom_idx: u32, + ) -> UniquePtr>; + pub fn mol_bond_ring_sizes( + mol: &SharedPtr, + bond_idx: u32, + ) -> UniquePtr>; + } +} diff --git a/rdkit-sys/src/bridge/ro_mol.rs b/rdkit-sys/src/bridge/ro_mol.rs index fffa0e7..2e5ac6e 100644 --- a/rdkit-sys/src/bridge/ro_mol.rs +++ b/rdkit-sys/src/bridge/ro_mol.rs @@ -59,6 +59,7 @@ pub mod ffi { pub fn get_num_atoms(mol: &SharedPtr, onlyExplicit: bool) -> u32; pub fn get_atom_with_idx(mol: &mut SharedPtr, idx: u32) -> Pin<&mut Atom>; + pub fn get_atom_with_idx_const(mol: &SharedPtr, idx: u32) -> Pin<&Atom>; pub fn get_symbol(atom: Pin<&Atom>) -> String; pub fn get_is_aromatic(atom: Pin<&Atom>) -> bool; pub fn get_atomic_num(atom: Pin<&Atom>) -> i32; diff --git a/rdkit-sys/tests/test_atoms.rs b/rdkit-sys/tests/test_atoms.rs index f5d9d5c..fe12146 100644 --- a/rdkit-sys/tests/test_atoms.rs +++ b/rdkit-sys/tests/test_atoms.rs @@ -17,7 +17,9 @@ fn test_atoms() { assert_eq!( &atoms, - &["C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"] + &[ + "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C" + ] ); let mut atom = rdkit_sys::ro_mol_ffi::get_atom_with_idx(&mut romol, 2); diff --git a/rdkit-sys/tests/test_ro_mol.rs b/rdkit-sys/tests/test_ro_mol.rs index 50b6bc8..db77dc7 100644 --- a/rdkit-sys/tests/test_ro_mol.rs +++ b/rdkit-sys/tests/test_ro_mol.rs @@ -69,7 +69,10 @@ fn mol_to_molblock_test() { cxx::let_cxx_string!(smiles = "CC"); let romol = rdkit_sys::ro_mol_ffi::smiles_to_mol(&smiles).unwrap(); let molblock = rdkit_sys::ro_mol_ffi::mol_to_molblock(&romol); - assert_eq!(molblock, "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n"); + assert_eq!( + molblock, + "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n" + ); } #[test] diff --git a/rdkit-sys/tests/test_rw_mol.rs b/rdkit-sys/tests/test_rw_mol.rs index f31ee43..c42dfeb 100644 --- a/rdkit-sys/tests/test_rw_mol.rs +++ b/rdkit-sys/tests/test_rw_mol.rs @@ -1,4 +1,4 @@ -use cxx::{let_cxx_string, SharedPtr}; +use cxx::{SharedPtr, let_cxx_string}; use rdkit_sys::{ro_mol_ffi::ROMol, rw_mol_ffi::RWMol}; #[test] @@ -191,7 +191,10 @@ CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C let ro_mol = unsafe { std::mem::transmute::, SharedPtr>(rw_mol) }; let smiles = rdkit_sys::ro_mol_ffi::mol_to_smiles(&ro_mol); - assert_eq!("[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", &smiles); + assert_eq!( + "[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", + &smiles + ); } #[test] diff --git a/rdkit-sys/wrapper/include/descriptors.h b/rdkit-sys/wrapper/include/descriptors.h index c7a87e1..d835107 100644 --- a/rdkit-sys/wrapper/include/descriptors.h +++ b/rdkit-sys/wrapper/include/descriptors.h @@ -1,6 +1,7 @@ #pragma once #include "rust/cxx.h" +#include #include namespace RDKit { @@ -10,4 +11,25 @@ std::shared_ptr new_properties(); std::unique_ptr> get_property_names(const std::shared_ptr &props); std::unique_ptr> compute_properties(const std::shared_ptr &props, const std::shared_ptr &mol); +// Targeted descriptors +double calc_exact_mw(const std::shared_ptr &mol); +double calc_amw(const std::shared_ptr &mol); +rust::String calc_mol_formula(const std::shared_ptr &mol); +unsigned int calc_num_heavy_atoms(const std::shared_ptr &mol); +double calc_fraction_csp3(const std::shared_ptr &mol); +double calc_labute_asa(const std::shared_ptr &mol); +double calc_tpsa(const std::shared_ptr &mol); +double calc_clog_p(const std::shared_ptr &mol); +unsigned int calc_num_hbd(const std::shared_ptr &mol); +unsigned int calc_num_hba(const std::shared_ptr &mol); +unsigned int calc_num_rotatable_bonds(const std::shared_ptr &mol); +unsigned int calc_num_amide_bonds(const std::shared_ptr &mol); +unsigned int calc_num_heteroatoms(const std::shared_ptr &mol); +unsigned int calc_num_aromatic_rings(const std::shared_ptr &mol); +unsigned int calc_num_aliphatic_rings(const std::shared_ptr &mol); +unsigned int calc_num_saturated_rings(const std::shared_ptr &mol); +unsigned int calc_num_heterocycles(const std::shared_ptr &mol); +unsigned int calc_num_aromatic_heterocycles(const std::shared_ptr &mol); +unsigned int calc_num_spiro_atoms(const std::shared_ptr &mol); +unsigned int calc_num_bridgehead_atoms(const std::shared_ptr &mol); } // namespace RDKit \ No newline at end of file diff --git a/rdkit-sys/wrapper/include/ring_info.h b/rdkit-sys/wrapper/include/ring_info.h new file mode 100644 index 0000000..336c019 --- /dev/null +++ b/rdkit-sys/wrapper/include/ring_info.h @@ -0,0 +1,17 @@ +#pragma once + +#include "rust/cxx.h" +#include +#include + +namespace RDKit { + +unsigned int mol_num_rings(const std::shared_ptr &mol); +bool mol_is_atom_in_ring_of_size(const std::shared_ptr &mol, unsigned int atom_idx, unsigned int size); +bool mol_is_bond_in_ring_of_size(const std::shared_ptr &mol, unsigned int bond_idx, unsigned int size); +unsigned int mol_num_atom_rings(const std::shared_ptr &mol, unsigned int atom_idx); +unsigned int mol_num_bond_rings(const std::shared_ptr &mol, unsigned int bond_idx); +std::unique_ptr> mol_atom_ring_sizes(const std::shared_ptr &mol, unsigned int atom_idx); +std::unique_ptr> mol_bond_ring_sizes(const std::shared_ptr &mol, unsigned int bond_idx); + +} // namespace RDKit diff --git a/rdkit-sys/wrapper/include/ro_mol.h b/rdkit-sys/wrapper/include/ro_mol.h index 6cbe18d..6fbe6ea 100644 --- a/rdkit-sys/wrapper/include/ro_mol.h +++ b/rdkit-sys/wrapper/include/ro_mol.h @@ -29,6 +29,8 @@ unsigned int atom_sanitize_exception_get_atom_idx(const MolSanitizeExceptionUniq unsigned int get_num_atoms(const std::shared_ptr &mol, bool only_explicit); Atom &get_atom_with_idx(std::shared_ptr &mol, unsigned int idx); +const Atom &get_atom_with_idx_const(const std::shared_ptr &mol, + unsigned int idx); rust::String get_symbol(const Atom &atom); bool get_is_aromatic(const Atom &atom); int get_atomic_num(const Atom &atom); diff --git a/rdkit-sys/wrapper/src/descriptors.cc b/rdkit-sys/wrapper/src/descriptors.cc index 2e5b2fa..39412e7 100644 --- a/rdkit-sys/wrapper/src/descriptors.cc +++ b/rdkit-sys/wrapper/src/descriptors.cc @@ -1,4 +1,8 @@ #include "rust/cxx.h" +#include +#include +#include +#include #include #include @@ -19,4 +23,59 @@ std::unique_ptr> compute_properties(const std::shared_ptr(computed); return std::unique_ptr>(computed_heap); } +double calc_exact_mw(const std::shared_ptr &mol) { return Descriptors::calcExactMW(*mol); } + +double calc_amw(const std::shared_ptr &mol) { return Descriptors::calcAMW(*mol); } + +rust::String calc_mol_formula(const std::shared_ptr &mol) { return Descriptors::calcMolFormula(*mol); } + +unsigned int calc_num_heavy_atoms(const std::shared_ptr &mol) { return mol->getNumHeavyAtoms(); } + +double calc_fraction_csp3(const std::shared_ptr &mol) { return Descriptors::calcFractionCSP3(*mol); } + +double calc_labute_asa(const std::shared_ptr &mol) { return Descriptors::calcLabuteASA(*mol); } + +double calc_tpsa(const std::shared_ptr &mol) { return Descriptors::calcTPSA(*mol); } + +double calc_clog_p(const std::shared_ptr &mol) { + double logp, mr; + Descriptors::calcCrippenDescriptors(*mol, logp, mr); + return logp; +} + +unsigned int calc_num_hbd(const std::shared_ptr &mol) { return Descriptors::calcNumHBD(*mol); } + +unsigned int calc_num_hba(const std::shared_ptr &mol) { return Descriptors::calcNumHBA(*mol); } + +unsigned int calc_num_rotatable_bonds(const std::shared_ptr &mol) { + return Descriptors::calcNumRotatableBonds(*mol); +} + +unsigned int calc_num_amide_bonds(const std::shared_ptr &mol) { return Descriptors::calcNumAmideBonds(*mol); } + +unsigned int calc_num_heteroatoms(const std::shared_ptr &mol) { return Descriptors::calcNumHeteroatoms(*mol); } + +unsigned int calc_num_aromatic_rings(const std::shared_ptr &mol) { + return Descriptors::calcNumAromaticRings(*mol); +} + +unsigned int calc_num_aliphatic_rings(const std::shared_ptr &mol) { + return Descriptors::calcNumAliphaticRings(*mol); +} + +unsigned int calc_num_saturated_rings(const std::shared_ptr &mol) { + return Descriptors::calcNumSaturatedRings(*mol); +} + +unsigned int calc_num_heterocycles(const std::shared_ptr &mol) { return Descriptors::calcNumHeterocycles(*mol); } + +unsigned int calc_num_aromatic_heterocycles(const std::shared_ptr &mol) { + return Descriptors::calcNumAromaticHeterocycles(*mol); +} + +unsigned int calc_num_spiro_atoms(const std::shared_ptr &mol) { return Descriptors::calcNumSpiroAtoms(*mol); } + +unsigned int calc_num_bridgehead_atoms(const std::shared_ptr &mol) { + return Descriptors::calcNumBridgeheadAtoms(*mol); +} } // namespace RDKit \ No newline at end of file diff --git a/rdkit-sys/wrapper/src/ring_info.cc b/rdkit-sys/wrapper/src/ring_info.cc new file mode 100644 index 0000000..e62c475 --- /dev/null +++ b/rdkit-sys/wrapper/src/ring_info.cc @@ -0,0 +1,35 @@ +#include "rust/cxx.h" +#include +#include + +namespace RDKit { + +unsigned int mol_num_rings(const std::shared_ptr &mol) { return mol->getRingInfo()->numRings(); } + +bool mol_is_atom_in_ring_of_size(const std::shared_ptr &mol, unsigned int atom_idx, unsigned int size) { + return mol->getRingInfo()->isAtomInRingOfSize(atom_idx, size); +} + +bool mol_is_bond_in_ring_of_size(const std::shared_ptr &mol, unsigned int bond_idx, unsigned int size) { + return mol->getRingInfo()->isBondInRingOfSize(bond_idx, size); +} + +unsigned int mol_num_atom_rings(const std::shared_ptr &mol, unsigned int atom_idx) { + return mol->getRingInfo()->numAtomRings(atom_idx); +} + +unsigned int mol_num_bond_rings(const std::shared_ptr &mol, unsigned int bond_idx) { + return mol->getRingInfo()->numBondRings(bond_idx); +} + +std::unique_ptr> mol_atom_ring_sizes(const std::shared_ptr &mol, unsigned int atom_idx) { + auto sizes = mol->getRingInfo()->atomRingSizes(atom_idx); + return std::make_unique>(sizes.begin(), sizes.end()); +} + +std::unique_ptr> mol_bond_ring_sizes(const std::shared_ptr &mol, unsigned int bond_idx) { + auto sizes = mol->getRingInfo()->bondRingSizes(bond_idx); + return std::make_unique>(sizes.begin(), sizes.end()); +} + +} // namespace RDKit diff --git a/rdkit-sys/wrapper/src/ro_mol.cc b/rdkit-sys/wrapper/src/ro_mol.cc index 2442a0b..d3ee865 100644 --- a/rdkit-sys/wrapper/src/ro_mol.cc +++ b/rdkit-sys/wrapper/src/ro_mol.cc @@ -69,6 +69,10 @@ unsigned int get_num_atoms(const std::shared_ptr &mol, bool only_explicit Atom &get_atom_with_idx(std::shared_ptr &mol, unsigned int idx) { return *mol->getAtomWithIdx(idx); } +const Atom &get_atom_with_idx_const(const std::shared_ptr &mol, unsigned int idx) { + return *mol->getAtomWithIdx(idx); +} + rust::String get_symbol(const Atom &atom) { return atom.getSymbol(); } bool get_is_aromatic(const Atom &atom) { return atom.getIsAromatic(); } diff --git a/rustfmt.toml b/rustfmt.toml index 7de3b73..6edb4d8 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -48,8 +48,8 @@ trailing_comma = "Vertical" match_block_trailing_comma = false blank_lines_upper_bound = 1 blank_lines_lower_bound = 0 -edition = "2021" -version = "One" +edition = "2024" +style_edition = "2024" inline_attribute_width = 0 format_generated_files = true merge_derives = true diff --git a/src/descriptors.rs b/src/descriptors.rs index a3272c9..e4d882f 100644 --- a/src/descriptors.rs +++ b/src/descriptors.rs @@ -4,6 +4,88 @@ use cxx::SharedPtr; use crate::ROMol; +impl ROMol { + pub fn calc_exact_mw(&self) -> f64 { + rdkit_sys::descriptors_ffi::calc_exact_mw(&self.ptr) + } + + pub fn calc_amw(&self) -> f64 { + rdkit_sys::descriptors_ffi::calc_amw(&self.ptr) + } + + pub fn calc_mol_formula(&self) -> String { + rdkit_sys::descriptors_ffi::calc_mol_formula(&self.ptr) + } + + pub fn calc_num_heavy_atoms(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_heavy_atoms(&self.ptr) + } + + pub fn calc_fraction_csp3(&self) -> f64 { + rdkit_sys::descriptors_ffi::calc_fraction_csp3(&self.ptr) + } + + pub fn calc_labute_asa(&self) -> f64 { + rdkit_sys::descriptors_ffi::calc_labute_asa(&self.ptr) + } + + pub fn calc_tpsa(&self) -> f64 { + rdkit_sys::descriptors_ffi::calc_tpsa(&self.ptr) + } + + pub fn calc_clog_p(&self) -> f64 { + rdkit_sys::descriptors_ffi::calc_clog_p(&self.ptr) + } + + pub fn calc_num_hbd(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_hbd(&self.ptr) + } + + pub fn calc_num_hba(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_hba(&self.ptr) + } + + pub fn calc_num_rotatable_bonds(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_rotatable_bonds(&self.ptr) + } + + pub fn calc_num_amide_bonds(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_amide_bonds(&self.ptr) + } + + pub fn calc_num_heteroatoms(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_heteroatoms(&self.ptr) + } + + pub fn calc_num_aromatic_rings(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_aromatic_rings(&self.ptr) + } + + pub fn calc_num_aliphatic_rings(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_aliphatic_rings(&self.ptr) + } + + pub fn calc_num_saturated_rings(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_saturated_rings(&self.ptr) + } + + pub fn calc_num_heterocycles(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_heterocycles(&self.ptr) + } + + pub fn calc_num_aromatic_heterocycles(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_aromatic_heterocycles(&self.ptr) + } + + pub fn calc_num_spiro_atoms(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_spiro_atoms(&self.ptr) + } + + pub fn calc_num_bridgehead_atoms(&self) -> u32 { + rdkit_sys::descriptors_ffi::calc_num_bridgehead_atoms(&self.ptr) + } +} + pub struct Properties { ptr: SharedPtr, } @@ -25,7 +107,7 @@ impl Properties { let names = rdkit_sys::descriptors_ffi::get_property_names(&self.ptr); let computed = rdkit_sys::descriptors_ffi::compute_properties(&self.ptr, &ro_mol.ptr); - assert!(names.len() != 0); + assert!(!names.is_empty()); assert!(computed.len() == names.len()); names diff --git a/src/graphmol/atom_ref.rs b/src/graphmol/atom_ref.rs new file mode 100644 index 0000000..c6422d7 --- /dev/null +++ b/src/graphmol/atom_ref.rs @@ -0,0 +1,88 @@ +use std::{fmt::Formatter, pin::Pin}; + +use rdkit_sys::ro_mol_ffi; +pub use rdkit_sys::ro_mol_ffi::HybridizationType; + +/// Read-only view of an atom within a molecule. +/// +/// Unlike [`Atom`](crate::Atom), this borrows the parent molecule immutably +/// (`&self`), so multiple `AtomRef`s can coexist and no clone is needed. +pub struct AtomRef<'a> { + ptr: Pin<&'a ro_mol_ffi::Atom>, +} + +impl<'a> std::fmt::Display for AtomRef<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let symbol = self.symbol(); + f.write_str(&symbol) + } +} + +impl<'a> std::fmt::Debug for AtomRef<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let symbol = self.symbol(); + f.write_str(&symbol) + } +} + +impl<'a> AtomRef<'a> { + pub fn from_ptr(ptr: Pin<&'a ro_mol_ffi::Atom>) -> Self { + Self { ptr } + } + + pub fn symbol(&self) -> String { + ro_mol_ffi::get_symbol(self.ptr) + } + + pub fn get_is_aromatic(&self) -> bool { + ro_mol_ffi::get_is_aromatic(self.ptr) + } + + pub fn get_atomic_num(&self) -> i32 { + ro_mol_ffi::get_atomic_num(self.ptr) + } + + pub fn get_formal_charge(&self) -> i32 { + ro_mol_ffi::get_formal_charge(self.ptr) + } + + pub fn get_total_num_hs(&self) -> u32 { + ro_mol_ffi::get_total_num_hs(self.ptr) + } + + pub fn get_total_valence(&self) -> u32 { + ro_mol_ffi::get_total_valence(self.ptr) + } + + pub fn get_hybridization_type(&self) -> HybridizationType { + ro_mol_ffi::atom_get_hybridization(self.ptr) + } + + pub fn get_num_radical_electrons(&self) -> u32 { + ro_mol_ffi::get_num_radical_electrons(self.ptr) + } + + pub fn get_degree(&self) -> u32 { + ro_mol_ffi::get_degree(self.ptr) + } + + pub fn get_int_prop(&self, key: &str) -> Result { + cxx::let_cxx_string!(key = key); + ro_mol_ffi::get_int_prop(self.ptr, &key) + } + + pub fn get_float_prop(&self, key: &str) -> Result { + cxx::let_cxx_string!(key = key); + ro_mol_ffi::get_float_prop(self.ptr, &key) + } + + pub fn get_bool_prop(&self, key: &str) -> Result { + cxx::let_cxx_string!(key = key); + ro_mol_ffi::get_bool_prop(self.ptr, &key) + } + + pub fn get_prop(&self, key: &str) -> Result { + cxx::let_cxx_string!(key = key); + ro_mol_ffi::get_prop(self.ptr, &key) + } +} diff --git a/src/graphmol/mod.rs b/src/graphmol/mod.rs index f23a721..5824ec1 100644 --- a/src/graphmol/mod.rs +++ b/src/graphmol/mod.rs @@ -1,6 +1,11 @@ mod atom; pub use atom::*; +mod atom_ref; +pub use atom_ref::*; + +mod ring_info; + mod mol_ops; pub use mol_ops::*; diff --git a/src/graphmol/ring_info.rs b/src/graphmol/ring_info.rs new file mode 100644 index 0000000..2d3e85e --- /dev/null +++ b/src/graphmol/ring_info.rs @@ -0,0 +1,33 @@ +use crate::ROMol; + +impl ROMol { + pub fn num_rings(&self) -> u32 { + rdkit_sys::ring_info_ffi::mol_num_rings(&self.ptr) + } + + pub fn is_atom_in_ring_of_size(&self, atom_idx: u32, size: u32) -> bool { + rdkit_sys::ring_info_ffi::mol_is_atom_in_ring_of_size(&self.ptr, atom_idx, size) + } + + pub fn is_bond_in_ring_of_size(&self, bond_idx: u32, size: u32) -> bool { + rdkit_sys::ring_info_ffi::mol_is_bond_in_ring_of_size(&self.ptr, bond_idx, size) + } + + pub fn num_atom_rings(&self, atom_idx: u32) -> u32 { + rdkit_sys::ring_info_ffi::mol_num_atom_rings(&self.ptr, atom_idx) + } + + pub fn num_bond_rings(&self, bond_idx: u32) -> u32 { + rdkit_sys::ring_info_ffi::mol_num_bond_rings(&self.ptr, bond_idx) + } + + pub fn atom_ring_sizes(&self, atom_idx: u32) -> Vec { + let sizes = rdkit_sys::ring_info_ffi::mol_atom_ring_sizes(&self.ptr, atom_idx); + sizes.iter().copied().collect() + } + + pub fn bond_ring_sizes(&self, bond_idx: u32) -> Vec { + let sizes = rdkit_sys::ring_info_ffi::mol_bond_ring_sizes(&self.ptr, bond_idx); + sizes.iter().copied().collect() + } +} diff --git a/src/graphmol/ro_mol.rs b/src/graphmol/ro_mol.rs index bcdd30c..045d2ed 100644 --- a/src/graphmol/ro_mol.rs +++ b/src/graphmol/ro_mol.rs @@ -3,7 +3,7 @@ use std::fmt::{Debug, Formatter}; use cxx::let_cxx_string; use rdkit_sys::*; -use crate::{Atom, Fingerprint, RWMol}; +use crate::{Atom, AtomRef, Fingerprint, RWMol}; pub struct ROMol { pub(crate) ptr: cxx::SharedPtr, @@ -89,11 +89,20 @@ impl ROMol { ro_mol_ffi::get_num_atoms(&self.ptr, only_explicit) } - pub fn atom_with_idx(&mut self, idx: u32) -> Atom { + pub fn atom_with_idx(&mut self, idx: u32) -> Atom<'_> { let ptr = ro_mol_ffi::get_atom_with_idx(&mut self.ptr, idx); Atom::from_ptr(ptr) } + /// Returns a read-only reference to the atom at `idx`. + /// + /// Unlike [`atom_with_idx`](Self::atom_with_idx), this takes `&self`, + /// so no mutable borrow (or clone) is needed for read-only access. + pub fn atom_ref(&self, idx: u32) -> AtomRef<'_> { + let ptr = ro_mol_ffi::get_atom_with_idx_const(&self.ptr, idx); + AtomRef::from_ptr(ptr) + } + pub fn update_property_cache(&mut self, strict: bool) { ro_mol_ffi::ro_mol_update_property_cache(&mut self.ptr, strict) } diff --git a/src/graphmol/rw_mol.rs b/src/graphmol/rw_mol.rs index 6467120..b640d50 100644 --- a/src/graphmol/rw_mol.rs +++ b/src/graphmol/rw_mol.rs @@ -1,6 +1,6 @@ use std::fmt::Formatter; -use cxx::{let_cxx_string, SharedPtr}; +use cxx::{SharedPtr, let_cxx_string}; use rdkit_sys::*; use crate::ROMol; diff --git a/src/periodic_table.rs b/src/periodic_table.rs index 60fc473..cf66658 100644 --- a/src/periodic_table.rs +++ b/src/periodic_table.rs @@ -1,4 +1,4 @@ -use cxx::{let_cxx_string, CxxVector}; +use cxx::{CxxVector, let_cxx_string}; use rdkit_sys::PeriodicTableOps; pub struct PeriodicTable {} @@ -18,8 +18,8 @@ impl PeriodicTable { /// * `atom` - The symbol of the element pub fn get_most_common_isotope_mass(atom: &str) -> f64 { let_cxx_string!(atom_cxx_string = atom); - rdkit_sys::periodic_table_ffi::get_periodic_table() - .getMostCommonIsotopeMass(&atom_cxx_string) + let pt = rdkit_sys::periodic_table_ffi::get_periodic_table(); + pt.getMostCommonIsotopeMass(&atom_cxx_string) } /// Returns the atomic weight of the atom @@ -32,7 +32,8 @@ impl PeriodicTable { /// * `atom` - The symbol of the element pub fn get_atomic_number(atom: &str) -> i32 { let_cxx_string!(atom_cxx_string = atom); - rdkit_sys::periodic_table_ffi::get_periodic_table().getAtomicNumber(&atom_cxx_string) + let pt = rdkit_sys::periodic_table_ffi::get_periodic_table(); + pt.getAtomicNumber(&atom_cxx_string) } /// Returns the symbol of the element diff --git a/tests/test_atom_ref.rs b/tests/test_atom_ref.rs new file mode 100644 index 0000000..55c80e9 --- /dev/null +++ b/tests/test_atom_ref.rs @@ -0,0 +1,81 @@ +/// Verify AtomRef returns identical results to Atom for all read-only methods. +#[test] +fn test_atom_ref_parity() { + let mut romol = rdkit::ROMol::from_smiles("[NH4+]").unwrap(); + + // Read via mutable Atom + let atom = romol.atom_with_idx(0); + let symbol = atom.symbol(); + let is_aromatic = atom.get_is_aromatic(); + let atomic_num = atom.get_atomic_num(); + let hybridization = atom.get_hybridization_type(); + let formal_charge = atom.get_formal_charge(); + let total_num_hs = atom.get_total_num_hs(); + let total_valence = atom.get_total_valence(); + let num_radical_electrons = atom.get_num_radical_electrons(); + let degree = atom.get_degree(); + + // Read via immutable AtomRef — must match exactly + let atom_ref = romol.atom_ref(0); + assert_eq!(atom_ref.symbol(), symbol); + assert_eq!(atom_ref.get_is_aromatic(), is_aromatic); + assert_eq!(atom_ref.get_atomic_num(), atomic_num); + assert_eq!(atom_ref.get_hybridization_type(), hybridization); + assert_eq!(atom_ref.get_formal_charge(), formal_charge); + assert_eq!(atom_ref.get_total_num_hs(), total_num_hs); + assert_eq!(atom_ref.get_total_valence(), total_valence); + assert_eq!(atom_ref.get_num_radical_electrons(), num_radical_electrons); + assert_eq!(atom_ref.get_degree(), degree); +} + +/// Verify AtomRef property getters work. +#[test] +fn test_atom_ref_properties() { + let mut romol = rdkit::ROMol::from_smiles("CC").unwrap(); + + // Set properties via mutable Atom + { + let mut carbon = romol.atom_with_idx(0); + carbon.set_prop("int_key", 42); + carbon.set_prop("float_key", 3.14); + carbon.set_prop("bool_key", true); + carbon.set_prop("str_key", "hello"); + } + + // Read back via immutable AtomRef + let atom_ref = romol.atom_ref(0); + assert_eq!(atom_ref.get_int_prop("int_key").unwrap(), 42); + assert_eq!(atom_ref.get_float_prop("float_key").unwrap(), 3.14); + assert_eq!(atom_ref.get_bool_prop("bool_key").unwrap(), true); + assert_eq!(atom_ref.get_prop("str_key").unwrap(), "hello"); +} + +/// Verify multiple AtomRefs can coexist (no &mut self needed). +#[test] +fn test_atom_ref_no_clone_needed() { + let romol = rdkit::ROMol::from_smiles("CCO").unwrap(); + + // This would not compile with atom_with_idx since it needs &mut self. + // With atom_ref, we can hold multiple references simultaneously. + let c1 = romol.atom_ref(0); + let c2 = romol.atom_ref(1); + let o = romol.atom_ref(2); + + assert_eq!(c1.symbol(), "C"); + assert_eq!(c2.symbol(), "C"); + assert_eq!(o.symbol(), "O"); +} + +/// Verify AtomRef works across all atoms in a molecule. +#[test] +fn test_atom_ref_iteration() { + let romol = rdkit::ROMol::from_smiles("c1ccccc1").unwrap(); + let n = romol.num_atoms(true); + assert_eq!(n, 6); + + for i in 0..n { + let atom = romol.atom_ref(i); + assert_eq!(atom.symbol(), "C"); + assert!(atom.get_is_aromatic()); + } +} diff --git a/tests/test_extended_descriptors.rs b/tests/test_extended_descriptors.rs new file mode 100644 index 0000000..9a38d6d --- /dev/null +++ b/tests/test_extended_descriptors.rs @@ -0,0 +1,60 @@ +use rdkit::ROMol; + +#[test] +fn test_exact_mw_water() { + let mol = ROMol::from_smiles("O").unwrap(); + assert!((mol.calc_exact_mw() - 18.010565).abs() < 0.001); +} + +#[test] +fn test_mol_formula() { + let mol = ROMol::from_smiles("c1ccccc1").unwrap(); + assert_eq!(mol.calc_mol_formula(), "C6H6"); +} + +#[test] +fn test_num_heavy_atoms() { + let mol = ROMol::from_smiles("CCO").unwrap(); + assert_eq!(mol.calc_num_heavy_atoms(), 3); +} + +#[test] +fn test_fraction_csp3() { + let mol = ROMol::from_smiles("CCO").unwrap(); + assert_eq!(mol.calc_fraction_csp3(), 1.0); + let mol = ROMol::from_smiles("c1ccccc1").unwrap(); + assert_eq!(mol.calc_fraction_csp3(), 0.0); +} + +#[test] +fn test_tpsa() { + let mol = ROMol::from_smiles("CCO").unwrap(); + assert!(mol.calc_tpsa() > 0.0); +} + +#[test] +fn test_clog_p() { + let mol = ROMol::from_smiles("c1ccccc1").unwrap(); + assert!(mol.calc_clog_p() > 0.0); +} + +#[test] +fn test_hbd_hba() { + let mol = ROMol::from_smiles("CCO").unwrap(); + assert_eq!(mol.calc_num_hbd(), 1); + assert_eq!(mol.calc_num_hba(), 1); +} + +#[test] +fn test_aromatic_rings() { + let mol = ROMol::from_smiles("c1ccccc1").unwrap(); + assert_eq!(mol.calc_num_aromatic_rings(), 1); + assert_eq!(mol.calc_num_aliphatic_rings(), 0); +} + +#[test] +fn test_heterocycles() { + let mol = ROMol::from_smiles("c1ccncc1").unwrap(); + assert_eq!(mol.calc_num_heterocycles(), 1); + assert_eq!(mol.calc_num_aromatic_heterocycles(), 1); +} diff --git a/tests/test_graphmol.rs b/tests/test_graphmol.rs index 64587f1..e167672 100644 --- a/tests/test_graphmol.rs +++ b/tests/test_graphmol.rs @@ -1,7 +1,7 @@ use rdkit::{ - detect_chemistry_problems, fragment_parent, substruct_match, CleanupParameters, - MolSanitizeException, ROMol, ROMolError, RWMol, SmilesParserParams, SubstructMatchParameters, - TautomerEnumerator, Uncharger, + CleanupParameters, MolSanitizeException, ROMol, ROMolError, RWMol, SmilesParserParams, + SubstructMatchParameters, TautomerEnumerator, Uncharger, detect_chemistry_problems, + fragment_parent, substruct_match, }; #[test] @@ -15,7 +15,10 @@ fn test_neutralize() { let romol = ROMol::from_smiles(smiles).unwrap(); let uncharger = Uncharger::new(false); let uncharged_mol = uncharger.uncharge(&romol); - assert_eq!("CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.CO.Nc1nc2ncc(CNc3ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc3)nc2c(=O)[nH]1", uncharged_mol.as_smiles()); + assert_eq!( + "CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.CO.Nc1nc2ncc(CNc3ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc3)nc2c(=O)[nH]1", + uncharged_mol.as_smiles() + ); } #[test] @@ -29,7 +32,10 @@ fn test_fragment_parent() { "Nc1nc2ncc(CNc3ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc3)nc2c(=O)[nH]1", parent_rwmol.as_smiles() ); - assert_eq!("CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.CO.Nc1nc2ncc(CNc3ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc3)nc2c(=O)[nH]1", rwmol.as_smiles()); + assert_eq!( + "CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1.CO.Nc1nc2ncc(CNc3ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc3)nc2c(=O)[nH]1", + rwmol.as_smiles() + ); } #[test] @@ -271,7 +277,10 @@ CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C "#; let rw_mol = RWMol::from_mol_block(mol_block, false, false, false).unwrap(); - assert_eq!("[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", &rw_mol.as_smiles()); + assert_eq!( + "[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", + &rw_mol.as_smiles() + ); } #[test] @@ -326,5 +335,8 @@ fn mol_to_molblock_test() { let smiles = "CC"; let romol = ROMol::from_smiles(&smiles).unwrap(); let molblock = romol.to_molblock(); - assert_eq!(molblock, "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n"); + assert_eq!( + molblock, + "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n" + ); } diff --git a/tests/test_mol_ops.rs b/tests/test_mol_ops.rs index 42e4cb3..56dc92c 100644 --- a/tests/test_mol_ops.rs +++ b/tests/test_mol_ops.rs @@ -1,4 +1,4 @@ -use rdkit::{add_hs, clean_up, remove_hs, set_hybridization, ROMol, RemoveHsParameters}; +use rdkit::{ROMol, RemoveHsParameters, add_hs, clean_up, remove_hs, set_hybridization}; #[test] fn test_remove_hs() { diff --git a/tests/test_ring_info.rs b/tests/test_ring_info.rs new file mode 100644 index 0000000..8990e4f --- /dev/null +++ b/tests/test_ring_info.rs @@ -0,0 +1,47 @@ +use rdkit::ROMol; + +#[test] +fn test_benzene_ring_count() { + let mol = ROMol::from_smiles("c1ccccc1").unwrap(); + assert_eq!(mol.num_rings(), 1); +} + +#[test] +fn test_naphthalene_ring_count() { + let mol = ROMol::from_smiles("c1ccc2ccccc2c1").unwrap(); + assert_eq!(mol.num_rings(), 2); +} + +#[test] +fn test_no_rings() { + let mol = ROMol::from_smiles("CCCC").unwrap(); + assert_eq!(mol.num_rings(), 0); +} + +#[test] +fn test_atom_in_ring_of_size() { + let mol = ROMol::from_smiles("c1ccccc1").unwrap(); + for i in 0..6 { + assert!(mol.is_atom_in_ring_of_size(i, 6)); + assert!(!mol.is_atom_in_ring_of_size(i, 5)); + } +} + +#[test] +fn test_atom_ring_sizes() { + let mol = ROMol::from_smiles("c1ccccc1").unwrap(); + assert_eq!(mol.atom_ring_sizes(0), vec![6]); +} + +#[test] +fn test_naphthalene_junction_atoms() { + let mol = ROMol::from_smiles("c1ccc2ccccc2c1").unwrap(); + let mut found = false; + for i in 0..mol.num_atoms(true) { + if mol.num_atom_rings(i) == 2 { + found = true; + break; + } + } + assert!(found); +} diff --git a/tests/test_substruct.rs b/tests/test_substruct.rs index 851e0bc..dbe2b7f 100644 --- a/tests/test_substruct.rs +++ b/tests/test_substruct.rs @@ -1,4 +1,4 @@ -use rdkit::{substruct_match, ROMol, SubstructMatchItem, SubstructMatchParameters}; +use rdkit::{ROMol, SubstructMatchItem, SubstructMatchParameters, substruct_match}; #[test] fn test_substruct_match() {