From c756409bf1f33154ff8bd8e7f69f168c0fc050e9 Mon Sep 17 00:00:00 2001
From: magnurud <mag.aars@gmail.com>
Date: Tue, 5 Aug 2025 11:08:19 +0200
Subject: [PATCH 1/4] Use required information from model field config

---
 .../preprocess/preprocess/make_predictions.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py
index 907e4e3..1696e33 100644
--- a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py
+++ b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py
@@ -52,6 +52,16 @@ def get_num_pages(las_client, document_id, max_prediction_pages):
     return min(len(pdf.pages), max_prediction_pages)
 
 
+def add_info_from_field_config(form_config, field_config):
+    try:
+        for key, value in field_config.items():
+            form_config['config']['fields'][key]['required'] = value.get('required', True)
+        return form_config
+    except Exception as e:
+        logging.error(f'Error adding info from field config: {e}')
+        return form_config
+
+
 @las.transition_handler
 def make_predictions(las_client, event):
     document_id = event['documentId']
@@ -84,6 +94,8 @@ def make_predictions(las_client, event):
         form_config = create_form_config_from_model(model_field_config, form_config)
         logging.info(f'\nlabels in fieldConfig does not match form_config. Updated form_config used is: {form_config}')
 
+    form_config = add_info_from_field_config(form_config, model_field_config)
+
     no_empty_prediction_fields = set()
 
     if not (predictions := event.get('predictions')):
@@ -191,9 +203,12 @@ def make_predictions(las_client, event):
                         prediction['confidence'] = 0.0
                     if not above_threshold_or_optional(prediction, field_config):
                         all_above_threshold_or_optional = False
-
-                has_all_required_labels = required_labels(field_config) <= set(map(lambda p: p['label'], top1_preds))
+                _required_labels = required_labels(model_field_config)
+                _top_1_labels = set(map(lambda p: p['label'], top1_preds))
+                has_all_required_labels = _required_labels <= _top_1_labels
                 needs_validation = not has_all_required_labels or not all_above_threshold_or_optional
+                logging.info(f'required labels: {_required_labels}')
+                logging.info(f"existing labels: {_top_1_labels}")
 
             logging.info(f'All predictions above threshold (or optional): {all_above_threshold_or_optional}')
             logging.info(f'All required labels exist: {has_all_required_labels}')

From f378b168989c7ff69c3387a446cb696b69a77438 Mon Sep 17 00:00:00 2001
From: magnurud <mag.aars@gmail.com>
Date: Tue, 5 Aug 2025 19:45:12 +0200
Subject: [PATCH 2/4] fix test and comment

---
 .../transitions/preprocess/preprocess/make_predictions.py       | 2 +-
 docs/workflows/transitions/preprocess/tests/test_handler.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py
index 1696e33..588d292 100644
--- a/docs/workflows/transitions/preprocess/preprocess/make_predictions.py
+++ b/docs/workflows/transitions/preprocess/preprocess/make_predictions.py
@@ -203,7 +203,7 @@ def make_predictions(las_client, event):
                         prediction['confidence'] = 0.0
                     if not above_threshold_or_optional(prediction, field_config):
                         all_above_threshold_or_optional = False
-                _required_labels = required_labels(model_field_config)
+                _required_labels = required_labels(field_config)
                 _top_1_labels = set(map(lambda p: p['label'], top1_preds))
                 has_all_required_labels = _required_labels <= _top_1_labels
                 needs_validation = not has_all_required_labels or not all_above_threshold_or_optional
diff --git a/docs/workflows/transitions/preprocess/tests/test_handler.py b/docs/workflows/transitions/preprocess/tests/test_handler.py
index 3681076..3bf18ba 100644
--- a/docs/workflows/transitions/preprocess/tests/test_handler.py
+++ b/docs/workflows/transitions/preprocess/tests/test_handler.py
@@ -77,7 +77,7 @@ def simple_model_field_config():
     return {
         'total_amount': {},
         'due_date': {},
-        'invoice_id': {},
+        'invoice_id': {'required': False},
         'currency': {},
         'line_items': {
             'type': 'lines',

From b9a7bb0e329b4a9fbe74da0255209fbf1c033984 Mon Sep 17 00:00:00 2001
From: magnurud <mag.aars@gmail.com>
Date: Tue, 5 Aug 2025 19:55:19 +0200
Subject: [PATCH 3/4] intent to fix the readme checks

---
 .github/workflows/markdown_link_check_config.json | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/markdown_link_check_config.json b/.github/workflows/markdown_link_check_config.json
index fe66697..3503d2c 100644
--- a/.github/workflows/markdown_link_check_config.json
+++ b/.github/workflows/markdown_link_check_config.json
@@ -1,5 +1,8 @@
 {
   "ignorePatterns": [
+    {
+      "pattern": "^https://docs.lucidtech.ai/getting-started/tutorials/"
+    },
     {
       "pattern": "^https://sourcey.com/moxygen"
     },

From c60648aca36900703b40125ec2f234536297f9b2 Mon Sep 17 00:00:00 2001
From: magnurud <mag.aars@gmail.com>
Date: Wed, 6 Aug 2025 07:57:03 +0200
Subject: [PATCH 4/4] Make sure high-confidence empty values for optional
 fields also gets accepted

---
 docs/workflows/transitions/preprocess/preprocess/utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/workflows/transitions/preprocess/preprocess/utils.py b/docs/workflows/transitions/preprocess/preprocess/utils.py
index e4e9ea4..5b047f5 100644
--- a/docs/workflows/transitions/preprocess/preprocess/utils.py
+++ b/docs/workflows/transitions/preprocess/preprocess/utils.py
@@ -105,14 +105,15 @@ def patch_and_filter_predictions(predictions, field_config, labels, merge_contin
 
 
 def above_threshold_or_optional(prediction, field_config):
-    label, confidence = prediction['label'], prediction.get('confidence')
+    label, confidence, value = prediction['label'], prediction.get('confidence'), prediction.get('value')
     if label not in field_config:
         return False
 
     threshold = field_config[label]['confidenceLevels']
     is_optional = not field_config[label].get('required', True)
+    valid_optional_prediction = confidence < threshold['low'] or not value
 
-    return (threshold['automated'] <= confidence) or (is_optional and confidence < threshold['low'])
+    return (threshold['automated'] <= confidence) or (is_optional and valid_optional_prediction)
 
 
 def threshold_is_zero_for_all(field_config):