From c756409bf1f33154ff8bd8e7f69f168c0fc050e9 Mon Sep 17 00:00:00 2001 From: magnurud Date: Tue, 5 Aug 2025 11:08:19 +0200 Subject: [PATCH 1/4] Use required information from model field config --- .../preprocess/preprocess/make_predictions.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py index 907e4e3..1696e33 100644 --- a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py +++ b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py @@ -52,6 +52,16 @@ def get_num_pages(las_client, document_id, max_prediction_pages): return min(len(pdf.pages), max_prediction_pages) +def add_info_from_field_config(form_config, field_config): + try: + for key, value in field_config.items(): + form_config['config']['fields'][key]['required'] = value.get('required', True) + return form_config + except Exception as e: + logging.error(f'Error adding info from field config: {e}') + return form_config + + @las.transition_handler def make_predictions(las_client, event): document_id = event['documentId'] @@ -84,6 +94,8 @@ def make_predictions(las_client, event): form_config = create_form_config_from_model(model_field_config, form_config) logging.info(f'\nlabels in fieldConfig does not match form_config. Updated form_config used is: {form_config}') + form_config = add_info_from_field_config(form_config, model_field_config) + no_empty_prediction_fields = set() if not (predictions := event.get('predictions')): @@ -191,9 +203,12 @@ def make_predictions(las_client, event): prediction['confidence'] = 0.0 if not above_threshold_or_optional(prediction, field_config): all_above_threshold_or_optional = False - - has_all_required_labels = required_labels(field_config) <= set(map(lambda p: p['label'], top1_preds)) + _required_labels = required_labels(model_field_config) + _top_1_labels = set(map(lambda p: p['label'], top1_preds)) + has_all_required_labels = _required_labels <= _top_1_labels needs_validation = not has_all_required_labels or not all_above_threshold_or_optional + logging.info(f'required labels: {_required_labels}') + logging.info(f"existing labels: {_top_1_labels}") logging.info(f'All predictions above threshold (or optional): {all_above_threshold_or_optional}') logging.info(f'All required labels exist: {has_all_required_labels}') From f378b168989c7ff69c3387a446cb696b69a77438 Mon Sep 17 00:00:00 2001 From: magnurud Date: Tue, 5 Aug 2025 19:45:12 +0200 Subject: [PATCH 2/4] fix test and comment --- .../transitions/preprocess/preprocess/make_predictions.py | 2 +- docs/workflows/transitions/preprocess/tests/test_handler.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py index 1696e33..588d292 100644 --- a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py +++ b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py @@ -203,7 +203,7 @@ def make_predictions(las_client, event): prediction['confidence'] = 0.0 if not above_threshold_or_optional(prediction, field_config): all_above_threshold_or_optional = False - _required_labels = required_labels(model_field_config) + _required_labels = required_labels(field_config) _top_1_labels = set(map(lambda p: p['label'], top1_preds)) has_all_required_labels = _required_labels <= _top_1_labels needs_validation = not has_all_required_labels or not all_above_threshold_or_optional diff --git a/docs/workflows/transitions/preprocess/tests/test_handler.py b/docs/workflows/transitions/preprocess/tests/test_handler.py index 3681076..3bf18ba 100644 --- a/docs/workflows/transitions/preprocess/tests/test_handler.py +++ b/docs/workflows/transitions/preprocess/tests/test_handler.py @@ -77,7 +77,7 @@ def simple_model_field_config(): return { 'total_amount': {}, 'due_date': {}, - 'invoice_id': {}, + 'invoice_id': {'required': False}, 'currency': {}, 'line_items': { 'type': 'lines', From b9a7bb0e329b4a9fbe74da0255209fbf1c033984 Mon Sep 17 00:00:00 2001 From: magnurud Date: Tue, 5 Aug 2025 19:55:19 +0200 Subject: [PATCH 3/4] intent to fix the readme checks --- .github/workflows/markdown_link_check_config.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/markdown_link_check_config.json b/.github/workflows/markdown_link_check_config.json index fe66697..3503d2c 100644 --- a/.github/workflows/markdown_link_check_config.json +++ b/.github/workflows/markdown_link_check_config.json @@ -1,5 +1,8 @@ { "ignorePatterns": [ + { + "pattern": "^https://docs.lucidtech.ai/getting-started/tutorials/" + }, { "pattern": "^https://sourcey.com/moxygen" }, From c60648aca36900703b40125ec2f234536297f9b2 Mon Sep 17 00:00:00 2001 From: magnurud Date: Wed, 6 Aug 2025 07:57:03 +0200 Subject: [PATCH 4/4] Make sure high-confidence empty values for optional fields also gets accepted --- docs/workflows/transitions/preprocess/preprocess/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/workflows/transitions/preprocess/preprocess/utils.py b/docs/workflows/transitions/preprocess/preprocess/utils.py index e4e9ea4..5b047f5 100644 --- a/docs/workflows/transitions/preprocess/preprocess/utils.py +++ b/docs/workflows/transitions/preprocess/preprocess/utils.py @@ -105,14 +105,15 @@ def patch_and_filter_predictions(predictions, field_config, labels, merge_contin def above_threshold_or_optional(prediction, field_config): - label, confidence = prediction['label'], prediction.get('confidence') + label, confidence, value = prediction['label'], prediction.get('confidence'), prediction.get('value') if label not in field_config: return False threshold = field_config[label]['confidenceLevels'] is_optional = not field_config[label].get('required', True) + valid_optional_prediction = confidence < threshold['low'] or not value - return (threshold['automated'] <= confidence) or (is_optional and confidence < threshold['low']) + return (threshold['automated'] <= confidence) or (is_optional and valid_optional_prediction) def threshold_is_zero_for_all(field_config):