Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "rdkit"
version = "0.4.12"
edition = "2021"
edition = "2024"
authors = ["Xavier Lange <xrlange@gmail.com>", "Javier Pineda <javierjpineda13@gmail.com"]
license = "MIT"
description = "High level RDKit functionality for rust"
Expand All @@ -17,7 +17,7 @@ cxx = "1"
flate2 = "1"
log = "0.4"
rdkit-sys = { path = "rdkit-sys", version = "0.4.9" }
thiserror = "1"
thiserror = "2"

[dev-dependencies]
env_logger = "0.9.0"
env_logger = "0.11"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ RDKit
---

A high-level library for performing common RDKit tasks such as SMILES parsing, molecule normalization, etc. Uses
the C++ API via bindings from [rdkit-sys](https://crates.io/crate/rdkit-sys).
the C++ API via bindings from [rdkit-sys](https://crates.io/crates/rdkit-sys).

Notice: Requires rdkit 2023.09.1 or higher (like Ubuntu Noble 24.04)

Expand Down
102 changes: 102 additions & 0 deletions benches/atom_iteration_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#![allow(soft_unstable)]
#![feature(test)]
extern crate test;

use rdkit::ROMol;

/// Drug-like molecules of varying size for realistic benchmarking.
/// These cover common pharmaceutical scaffolds and natural products.
const SMILES_SET: &[&str] = &[
// aspirin
"CC(=O)Oc1ccccc1C(=O)O",
// ibuprofen
"CC(C)Cc1ccc(cc1)C(C)C(=O)O",
// caffeine
"Cn1c(=O)c2c(ncn2C)n(C)c1=O",
// diazepam
"O=C1CN=C(c2ccccc2)c2cc(Cl)ccc2N1C",
// atorvastatin (lipitor)
"CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CC[C@@H](O)C[C@@H](O)CC(=O)O",
// taxol core
"CC1=C2C(OC(=O)c3ccccc3)C(O)C4(OC(=O)C(O)(CC(OC(=O)c5ccccc5)C1O)C24C)C(=O)c1ccc(OC)cc1",
// vancomycin fragment
"OC1C(O)C(OC2C(O)C(O)C(O)C(CO)O2)OC(CO)C1NC(=O)C1CC(O)CN1C(=O)C(NC(=O)C1CC(=O)NC(=O)C1O)C(O)c1ccc(O)cc1",
];

/// Baseline: SMILES parsing cost.
#[bench]
fn bench_parse_smiles(b: &mut test::bench::Bencher) {
b.iter(|| {
for smiles in SMILES_SET {
test::black_box(ROMol::from_smiles(smiles).unwrap());
}
});
}

/// Iterate all atoms via atom_ref (&self), read 7 properties per atom.
/// This is the realistic featurization workload.
#[bench]
fn bench_atom_ref_all_properties(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_ref(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Same workload via atom_with_idx (&mut self).
/// Regression guard: should be the same speed as atom_ref.
#[bench]
fn bench_atom_mut_all_properties(b: &mut test::bench::Bencher) {
let mut mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mut mols {
let n = mol.num_atoms(true);
for i in 0..n {
let atom = mol.atom_with_idx(i);
test::black_box(atom.symbol());
test::black_box(atom.get_atomic_num());
test::black_box(atom.get_formal_charge());
test::black_box(atom.get_is_aromatic());
test::black_box(atom.get_hybridization_type());
test::black_box(atom.get_degree());
test::black_box(atom.get_total_num_hs());
}
}
});
}

/// Clone cost alone. Useful for understanding the cost of cloning
/// molecules when only &ROMol is available but mutation is needed.
#[bench]
fn bench_clone_molecules(b: &mut test::bench::Bencher) {
let mols: Vec<ROMol> = SMILES_SET
.iter()
.map(|s| ROMol::from_smiles(s).unwrap())
.collect();

b.iter(|| {
for mol in &mols {
test::black_box(mol.clone());
}
});
}
6 changes: 3 additions & 3 deletions rdkit-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "rdkit-sys"
authors = ["Xavier Lange (xrlange@gmail.com)", "chrissly31415"]
version = "0.4.12"
edition = "2021"
edition = "2024"
license = "MIT"
description = "RDKit CFFI library builder and bindings"
repository = "https://github.com/rdkit-rs/rdkit/tree/main/rdkit-sys"
Expand All @@ -14,9 +14,9 @@ exclude = ["rdkit-*", "*.tar.gz", "examples/"]
cxx = "1.0.109"

[build-dependencies]
env_logger = "0.10.0"
env_logger = "0.11"
cxx-build = "1.0.109"
which = "4.4.2"
which = "8"

[features]
default = []
Expand Down
2 changes: 1 addition & 1 deletion rdkit-sys/build.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const CPP_VERSION_FLAG: &str = "-std=c++17";
const CPP_VERSION_FLAG: &str = "-std=c++20";

fn main() {
if std::env::var("DOCS_RS").is_ok() {
Expand Down
4 changes: 2 additions & 2 deletions rdkit-sys/rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ trailing_comma = "Vertical"
match_block_trailing_comma = false
blank_lines_upper_bound = 1
blank_lines_lower_bound = 0
edition = "2021"
version = "One"
edition = "2024"
style_edition = "2024"
inline_attribute_width = 0
format_generated_files = true
merge_derives = true
Expand Down
22 changes: 22 additions & 0 deletions rdkit-sys/src/bridge/descriptors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,27 @@ pub mod ffi {
properties: &SharedPtr<Properties>,
mol: &SharedPtr<ROMol>,
) -> UniquePtr<CxxVector<f64>>;

// Targeted descriptors
pub fn calc_exact_mw(mol: &SharedPtr<ROMol>) -> f64;
pub fn calc_amw(mol: &SharedPtr<ROMol>) -> f64;
pub fn calc_mol_formula(mol: &SharedPtr<ROMol>) -> String;
pub fn calc_num_heavy_atoms(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_fraction_csp3(mol: &SharedPtr<ROMol>) -> f64;
pub fn calc_labute_asa(mol: &SharedPtr<ROMol>) -> f64;
pub fn calc_tpsa(mol: &SharedPtr<ROMol>) -> f64;
pub fn calc_clog_p(mol: &SharedPtr<ROMol>) -> f64;
pub fn calc_num_hbd(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_hba(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_rotatable_bonds(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_amide_bonds(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_heteroatoms(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_aromatic_rings(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_aliphatic_rings(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_saturated_rings(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_heterocycles(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_aromatic_heterocycles(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_spiro_atoms(mol: &SharedPtr<ROMol>) -> u32;
pub fn calc_num_bridgehead_atoms(mol: &SharedPtr<ROMol>) -> u32;
}
}
5 changes: 4 additions & 1 deletion rdkit-sys/src/bridge/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ mod mol_standardize;
pub use mol_standardize::ffi as mol_standardize_ffi;

mod periodic_table;
pub use periodic_table::{ffi as periodic_table_ffi, PeriodicTableOps};
pub use periodic_table::{PeriodicTableOps, ffi as periodic_table_ffi};

mod ring_info;
pub use ring_info::ffi as ring_info_ffi;

mod ro_mol;
pub use ro_mol::ffi as ro_mol_ffi;
Expand Down
2 changes: 1 addition & 1 deletion rdkit-sys/src/bridge/periodic_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub trait PeriodicTableOps {
fn getElementName(self, atomic_number: u32) -> String;
fn getValenceList(self, atomic_number: u32) -> &'static CxxVector<i32>;
}
impl<'a> PeriodicTableOps for UniquePtr<PeriodicTable> {
impl PeriodicTableOps for UniquePtr<PeriodicTable> {
fn getElementSymbol(self, atomic_number: u32) -> String {
ffi::getElementSymbol(atomic_number)
}
Expand Down
30 changes: 30 additions & 0 deletions rdkit-sys/src/bridge/ring_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#[cxx::bridge(namespace = "RDKit")]
pub mod ffi {
unsafe extern "C++" {
include!("wrapper/include/ring_info.h");

pub type ROMol = crate::ro_mol_ffi::ROMol;

pub fn mol_num_rings(mol: &SharedPtr<ROMol>) -> u32;
pub fn mol_is_atom_in_ring_of_size(
mol: &SharedPtr<ROMol>,
atom_idx: u32,
size: u32,
) -> bool;
pub fn mol_is_bond_in_ring_of_size(
mol: &SharedPtr<ROMol>,
bond_idx: u32,
size: u32,
) -> bool;
pub fn mol_num_atom_rings(mol: &SharedPtr<ROMol>, atom_idx: u32) -> u32;
pub fn mol_num_bond_rings(mol: &SharedPtr<ROMol>, bond_idx: u32) -> u32;
pub fn mol_atom_ring_sizes(
mol: &SharedPtr<ROMol>,
atom_idx: u32,
) -> UniquePtr<CxxVector<i32>>;
pub fn mol_bond_ring_sizes(
mol: &SharedPtr<ROMol>,
bond_idx: u32,
) -> UniquePtr<CxxVector<i32>>;
}
}
1 change: 1 addition & 0 deletions rdkit-sys/src/bridge/ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub mod ffi {

pub fn get_num_atoms(mol: &SharedPtr<ROMol>, onlyExplicit: bool) -> u32;
pub fn get_atom_with_idx(mol: &mut SharedPtr<ROMol>, idx: u32) -> Pin<&mut Atom>;
pub fn get_atom_with_idx_const(mol: &SharedPtr<ROMol>, idx: u32) -> Pin<&Atom>;
pub fn get_symbol(atom: Pin<&Atom>) -> String;
pub fn get_is_aromatic(atom: Pin<&Atom>) -> bool;
pub fn get_atomic_num(atom: Pin<&Atom>) -> i32;
Expand Down
4 changes: 3 additions & 1 deletion rdkit-sys/tests/test_atoms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ fn test_atoms() {

assert_eq!(
&atoms,
&["C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"]
&[
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C"
]
);

let mut atom = rdkit_sys::ro_mol_ffi::get_atom_with_idx(&mut romol, 2);
Expand Down
5 changes: 4 additions & 1 deletion rdkit-sys/tests/test_ro_mol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ fn mol_to_molblock_test() {
cxx::let_cxx_string!(smiles = "CC");
let romol = rdkit_sys::ro_mol_ffi::smiles_to_mol(&smiles).unwrap();
let molblock = rdkit_sys::ro_mol_ffi::mol_to_molblock(&romol);
assert_eq!(molblock, "\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n");
assert_eq!(
molblock,
"\n RDKit 2D\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\nM END\n"
);
}

#[test]
Expand Down
7 changes: 5 additions & 2 deletions rdkit-sys/tests/test_rw_mol.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cxx::{let_cxx_string, SharedPtr};
use cxx::{SharedPtr, let_cxx_string};
use rdkit_sys::{ro_mol_ffi::ROMol, rw_mol_ffi::RWMol};

#[test]
Expand Down Expand Up @@ -191,7 +191,10 @@ CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C
let ro_mol = unsafe { std::mem::transmute::<SharedPtr<RWMol>, SharedPtr<ROMol>>(rw_mol) };

let smiles = rdkit_sys::ro_mol_ffi::mol_to_smiles(&ro_mol);
assert_eq!("[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]", &smiles);
assert_eq!(
"[H]C([H])([H])C(=O)OC([H])(C([H])([H])C(=O)[O-])C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])C([H])([H])[H]",
&smiles
);
}

#[test]
Expand Down
22 changes: 22 additions & 0 deletions rdkit-sys/wrapper/include/descriptors.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "rust/cxx.h"
#include <GraphMol/Descriptors/MolDescriptors.h>
#include <GraphMol/Descriptors/Property.h>

namespace RDKit {
Expand All @@ -10,4 +11,25 @@ std::shared_ptr<Properties> new_properties();
std::unique_ptr<std::vector<std::string>> get_property_names(const std::shared_ptr<Properties> &props);
std::unique_ptr<std::vector<double>> compute_properties(const std::shared_ptr<Properties> &props,
const std::shared_ptr<ROMol> &mol);
// Targeted descriptors
double calc_exact_mw(const std::shared_ptr<ROMol> &mol);
double calc_amw(const std::shared_ptr<ROMol> &mol);
rust::String calc_mol_formula(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_heavy_atoms(const std::shared_ptr<ROMol> &mol);
double calc_fraction_csp3(const std::shared_ptr<ROMol> &mol);
double calc_labute_asa(const std::shared_ptr<ROMol> &mol);
double calc_tpsa(const std::shared_ptr<ROMol> &mol);
double calc_clog_p(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_hbd(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_hba(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_rotatable_bonds(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_amide_bonds(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_heteroatoms(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_aromatic_rings(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_aliphatic_rings(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_saturated_rings(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_heterocycles(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_aromatic_heterocycles(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_spiro_atoms(const std::shared_ptr<ROMol> &mol);
unsigned int calc_num_bridgehead_atoms(const std::shared_ptr<ROMol> &mol);
} // namespace RDKit
17 changes: 17 additions & 0 deletions rdkit-sys/wrapper/include/ring_info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "rust/cxx.h"
#include <GraphMol/GraphMol.h>
#include <GraphMol/RingInfo.h>

namespace RDKit {

unsigned int mol_num_rings(const std::shared_ptr<ROMol> &mol);
bool mol_is_atom_in_ring_of_size(const std::shared_ptr<ROMol> &mol, unsigned int atom_idx, unsigned int size);
bool mol_is_bond_in_ring_of_size(const std::shared_ptr<ROMol> &mol, unsigned int bond_idx, unsigned int size);
unsigned int mol_num_atom_rings(const std::shared_ptr<ROMol> &mol, unsigned int atom_idx);
unsigned int mol_num_bond_rings(const std::shared_ptr<ROMol> &mol, unsigned int bond_idx);
std::unique_ptr<std::vector<int32_t>> mol_atom_ring_sizes(const std::shared_ptr<ROMol> &mol, unsigned int atom_idx);
std::unique_ptr<std::vector<int32_t>> mol_bond_ring_sizes(const std::shared_ptr<ROMol> &mol, unsigned int bond_idx);

} // namespace RDKit
2 changes: 2 additions & 0 deletions rdkit-sys/wrapper/include/ro_mol.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ unsigned int atom_sanitize_exception_get_atom_idx(const MolSanitizeExceptionUniq

unsigned int get_num_atoms(const std::shared_ptr<ROMol> &mol, bool only_explicit);
Atom &get_atom_with_idx(std::shared_ptr<ROMol> &mol, unsigned int idx);
const Atom &get_atom_with_idx_const(const std::shared_ptr<ROMol> &mol,
unsigned int idx);
rust::String get_symbol(const Atom &atom);
bool get_is_aromatic(const Atom &atom);
int get_atomic_num(const Atom &atom);
Expand Down
Loading