From a32dd7017ddd38dcf47ffd269ddf761ed201ffdf Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Thu, 16 Apr 2026 16:53:49 -0400 Subject: [PATCH] refactor serialize_json to use custom JSONEncoder for ~3.7x speedup Instead of two passes (Python dict-building + json.dumps), serialize_json now uses a SpiffEncoder that lets the C-level JSON encoder handle plain data traversal natively, only calling back to Python for registered types. Periodic serialization test (300 items, 180 serializations): | Metric | Before | After | Improvement | |--------------------------------|----------|---------|---------------| | Total time | 162.5s | 45.0s | 3.6x faster | | Total serialization | 160.7s | 43.3s | 3.7x faster | | Avg per serialization | 0.893s | 0.241s | 3.7x faster | | Final checkpoint (308 tasks) | 2.07s | 0.56s | 3.7x faster | Co-Authored-By: Claude Opus 4.6 --- .../bpmn/serializer/helpers/encoder.py | 19 +++++++++++++++++++ .../bpmn/serializer/helpers/registry.py | 12 ++++++++++++ SpiffWorkflow/bpmn/serializer/workflow.py | 12 +++++++++--- .../bpmn/test_performance_test.py | 18 +++++++++--------- 4 files changed, 49 insertions(+), 12 deletions(-) create mode 100644 SpiffWorkflow/bpmn/serializer/helpers/encoder.py diff --git a/SpiffWorkflow/bpmn/serializer/helpers/encoder.py b/SpiffWorkflow/bpmn/serializer/helpers/encoder.py new file mode 100644 index 00000000..1d6afd30 --- /dev/null +++ b/SpiffWorkflow/bpmn/serializer/helpers/encoder.py @@ -0,0 +1,19 @@ +import json +from types import ModuleType + + +def create_encoder(registry, user_encoder_cls=None): + base = user_encoder_cls or json.JSONEncoder + + class SpiffEncoder(base): + def default(self, obj): + typename = registry.typenames.get(type(obj)) + if typename is not None: + return registry.convert_to_dict[typename](obj) + if callable(obj) or isinstance(obj, ModuleType): + return None + if isinstance(obj, set): + return list(obj) + return super().default(obj) + + return SpiffEncoder diff --git a/SpiffWorkflow/bpmn/serializer/helpers/registry.py b/SpiffWorkflow/bpmn/serializer/helpers/registry.py index a81497e7..4e60d2a7 100644 --- a/SpiffWorkflow/bpmn/serializer/helpers/registry.py +++ b/SpiffWorkflow/bpmn/serializer/helpers/registry.py @@ -33,6 +33,7 @@ class DefaultRegistry(DictionaryConverter): def __init__(self): super().__init__() + self._encoder_mode = False self.register(UUID, lambda v: { 'value': str(v) }, lambda v: UUID(v['value'])) self.register(datetime, lambda v: { 'value': v.isoformat() }, lambda v: datetime.fromisoformat(v['value'])) self.register(timedelta, lambda v: { 'days': v.days, 'seconds': v.seconds }, lambda v: timedelta(**v)) @@ -46,9 +47,20 @@ def convert(self, obj): Returns: the result of `convert` conversion after preprocessing """ + if self._encoder_mode: + return self._convert_for_encoder(obj) cleaned = self.clean(obj) return super().convert(cleaned) + def _convert_for_encoder(self, obj): + typename = self.typenames.get(obj.__class__) + if typename in self.convert_to_dict: + return self.convert_to_dict[typename](obj) + elif isinstance(obj, dict): + return self.clean(obj) + else: + return obj + def clean(self, obj): """A method that can be used to preprocess an object before conversion to a dict. diff --git a/SpiffWorkflow/bpmn/serializer/workflow.py b/SpiffWorkflow/bpmn/serializer/workflow.py index a13af7d0..6ac7fcd6 100644 --- a/SpiffWorkflow/bpmn/serializer/workflow.py +++ b/SpiffWorkflow/bpmn/serializer/workflow.py @@ -21,6 +21,7 @@ from .migration.version_migration import MIGRATIONS from .helpers import DefaultRegistry +from .helpers.encoder import create_encoder from .config import DEFAULT_CONFIG @@ -97,6 +98,7 @@ def __init__(self, registry=None, version=VERSION, json_encoder_cls=None, json_d self.json_encoder_cls = json_encoder_cls self.json_decoder_cls = json_decoder_cls self.VERSION = version + self._encoder_cls = create_encoder(self.registry, json_encoder_cls) def serialize_json(self, workflow, use_gzip=False): """Serialize the dictionary representation of the workflow to JSON. @@ -108,9 +110,13 @@ def serialize_json(self, workflow, use_gzip=False): Returns: a JSON dump of the dictionary representation or a gzipped version of it """ - dct = self.to_dict(workflow) - dct[self.VERSION_KEY] = self.VERSION - json_str = json.dumps(dct, cls=self.json_encoder_cls) + self.registry._encoder_mode = True + try: + dct = self.to_dict(workflow) + dct[self.VERSION_KEY] = self.VERSION + json_str = json.dumps(dct, cls=self._encoder_cls) + finally: + self.registry._encoder_mode = False return gzip.compress(json_str.encode('utf-8')) if use_gzip else json_str def deserialize_json(self, serialization, use_gzip=False): diff --git a/tests/SpiffWorkflow/bpmn/test_performance_test.py b/tests/SpiffWorkflow/bpmn/test_performance_test.py index 26720cd5..8a3527dd 100644 --- a/tests/SpiffWorkflow/bpmn/test_performance_test.py +++ b/tests/SpiffWorkflow/bpmn/test_performance_test.py @@ -80,13 +80,13 @@ def test_performance_20_items(self): # Measure serialization start_serialize = time.time() - state = self.serializer.to_dict(workflow) + state = self.serializer.serialize_json(workflow) end_serialize = time.time() serialize_time = end_serialize - start_serialize # Measure deserialization start_deserialize = time.time() - restored_workflow = self.serializer.from_dict(state) + restored_workflow = self.serializer.deserialize_json(state) end_deserialize = time.time() deserialize_time = end_deserialize - start_deserialize @@ -119,13 +119,13 @@ def test_performance_100_items(self): # Measure serialization start_serialize = time.time() - state = self.serializer.to_dict(workflow) + state = self.serializer.serialize_json(workflow) end_serialize = time.time() serialize_time = end_serialize - start_serialize # Measure deserialization start_deserialize = time.time() - restored_workflow = self.serializer.from_dict(state) + restored_workflow = self.serializer.deserialize_json(state) end_deserialize = time.time() deserialize_time = end_deserialize - start_deserialize @@ -158,13 +158,13 @@ def test_performance_200_items(self): # Measure serialization start_serialize = time.time() - state = self.serializer.to_dict(workflow) + state = self.serializer.serialize_json(workflow) end_serialize = time.time() serialize_time = end_serialize - start_serialize # Measure deserialization start_deserialize = time.time() - restored_workflow = self.serializer.from_dict(state) + restored_workflow = self.serializer.deserialize_json(state) end_deserialize = time.time() deserialize_time = end_deserialize - start_deserialize @@ -197,13 +197,13 @@ def test_performance_300_items(self): # Measure serialization start_serialize = time.time() - state = self.serializer.to_dict(workflow) + state = self.serializer.serialize_json(workflow) end_serialize = time.time() serialize_time = end_serialize - start_serialize # Measure deserialization start_deserialize = time.time() - restored_workflow = self.serializer.from_dict(state) + restored_workflow = self.serializer.deserialize_json(state) end_deserialize = time.time() deserialize_time = end_deserialize - start_deserialize @@ -237,7 +237,7 @@ def did_complete_task(task): # Serialize at checkpoints if tasks_completed % checkpoint_interval == 0: start_serialize = time.time() - state = self.serializer.to_dict(workflow) + state = self.serializer.serialize_json(workflow) end_serialize = time.time() serialize_time = end_serialize - start_serialize