diff --git a/contrib/workflow_with_ai_parse_document/.gitignore b/contrib/job_with_ai_parse_document/.gitignore similarity index 100% rename from contrib/workflow_with_ai_parse_document/.gitignore rename to contrib/job_with_ai_parse_document/.gitignore diff --git a/contrib/workflow_with_ai_parse_document/README.md b/contrib/job_with_ai_parse_document/README.md similarity index 99% rename from contrib/workflow_with_ai_parse_document/README.md rename to contrib/job_with_ai_parse_document/README.md index 2e0e921e..f899e1d0 100644 --- a/contrib/workflow_with_ai_parse_document/README.md +++ b/contrib/job_with_ai_parse_document/README.md @@ -96,7 +96,7 @@ Uses `ai_parse_document` to extract text, tables, and metadata from PDFs/images: Extracts clean concatenated text using `transform()`: - Reads from previous task's table via streaming -- Handles both parser v1.0 and v2.0 formats +- Extracts text from parsed document elements - Uses `transform()` for efficient text extraction - Includes error handling for failed parses diff --git a/contrib/workflow_with_ai_parse_document/assets/document_summary.png b/contrib/job_with_ai_parse_document/assets/document_summary.png similarity index 100% rename from contrib/workflow_with_ai_parse_document/assets/document_summary.png rename to contrib/job_with_ai_parse_document/assets/document_summary.png diff --git a/contrib/workflow_with_ai_parse_document/assets/figure_description.png b/contrib/job_with_ai_parse_document/assets/figure_description.png similarity index 100% rename from contrib/workflow_with_ai_parse_document/assets/figure_description.png rename to contrib/job_with_ai_parse_document/assets/figure_description.png diff --git a/contrib/workflow_with_ai_parse_document/assets/page1_bounding_boxes.png b/contrib/job_with_ai_parse_document/assets/page1_bounding_boxes.png similarity index 100% rename from contrib/workflow_with_ai_parse_document/assets/page1_bounding_boxes.png rename to contrib/job_with_ai_parse_document/assets/page1_bounding_boxes.png diff --git a/contrib/workflow_with_ai_parse_document/assets/page1_elements_list.png b/contrib/job_with_ai_parse_document/assets/page1_elements_list.png similarity index 100% rename from contrib/workflow_with_ai_parse_document/assets/page1_elements_list.png rename to contrib/job_with_ai_parse_document/assets/page1_elements_list.png diff --git a/contrib/workflow_with_ai_parse_document/assets/page2_contents_table.png b/contrib/job_with_ai_parse_document/assets/page2_contents_table.png similarity index 100% rename from contrib/workflow_with_ai_parse_document/assets/page2_contents_table.png rename to contrib/job_with_ai_parse_document/assets/page2_contents_table.png diff --git a/contrib/workflow_with_ai_parse_document/assets/table_extraction.png b/contrib/job_with_ai_parse_document/assets/table_extraction.png similarity index 100% rename from contrib/workflow_with_ai_parse_document/assets/table_extraction.png rename to contrib/job_with_ai_parse_document/assets/table_extraction.png diff --git a/contrib/workflow_with_ai_parse_document/databricks.yml b/contrib/job_with_ai_parse_document/databricks.yml similarity index 100% rename from contrib/workflow_with_ai_parse_document/databricks.yml rename to contrib/job_with_ai_parse_document/databricks.yml diff --git a/contrib/workflow_with_ai_parse_document/resources/ai_parse_document_job.job.yml b/contrib/job_with_ai_parse_document/resources/ai_parse_document_job.job.yml similarity index 100% rename from contrib/workflow_with_ai_parse_document/resources/ai_parse_document_job.job.yml rename to contrib/job_with_ai_parse_document/resources/ai_parse_document_job.job.yml diff --git a/contrib/workflow_with_ai_parse_document/src/explorations/ai_parse_document -- debug output.py b/contrib/job_with_ai_parse_document/src/explorations/ai_parse_document -- debug output.py similarity index 100% rename from contrib/workflow_with_ai_parse_document/src/explorations/ai_parse_document -- debug output.py rename to contrib/job_with_ai_parse_document/src/explorations/ai_parse_document -- debug output.py diff --git a/contrib/workflow_with_ai_parse_document/src/transformations/01_parse_documents.py b/contrib/job_with_ai_parse_document/src/transformations/01_parse_documents.py similarity index 100% rename from contrib/workflow_with_ai_parse_document/src/transformations/01_parse_documents.py rename to contrib/job_with_ai_parse_document/src/transformations/01_parse_documents.py diff --git a/contrib/workflow_with_ai_parse_document/src/transformations/02_extract_text.py b/contrib/job_with_ai_parse_document/src/transformations/02_extract_text.py similarity index 87% rename from contrib/workflow_with_ai_parse_document/src/transformations/02_extract_text.py rename to contrib/job_with_ai_parse_document/src/transformations/02_extract_text.py index 29fa9097..28a22449 100644 --- a/contrib/workflow_with_ai_parse_document/src/transformations/02_extract_text.py +++ b/contrib/job_with_ai_parse_document/src/transformations/02_extract_text.py @@ -47,11 +47,7 @@ "\n\n", expr(""" transform( - CASE - WHEN try_cast(parsed:metadata:version AS STRING) = '1.0' - THEN try_cast(parsed:document:pages AS ARRAY) - ELSE try_cast(parsed:document:elements AS ARRAY) - END, + try_cast(parsed:document:elements AS ARRAY), element -> try_cast(element:content AS STRING) ) """), diff --git a/contrib/workflow_with_ai_parse_document/src/transformations/03_extract_structured_data.py b/contrib/job_with_ai_parse_document/src/transformations/03_extract_structured_data.py similarity index 100% rename from contrib/workflow_with_ai_parse_document/src/transformations/03_extract_structured_data.py rename to contrib/job_with_ai_parse_document/src/transformations/03_extract_structured_data.py