diff --git a/cloud_pipelines_backend/api_router.py b/cloud_pipelines_backend/api_router.py index e4a331e..f5efeaf 100644 --- a/cloud_pipelines_backend/api_router.py +++ b/cloud_pipelines_backend/api_router.py @@ -125,6 +125,24 @@ def handle_item_already_exists_error( content={"message": str(exc)}, ) + @app.exception_handler(errors.ApiValidationError) + def handle_api_validation_error( + request: fastapi.Request, exc: errors.ApiValidationError + ): + return fastapi.responses.JSONResponse( + status_code=422, + content={"detail": str(exc)}, + ) + + @app.exception_handler(NotImplementedError) + def handle_not_implemented_error( + request: fastapi.Request, exc: NotImplementedError + ): + return fastapi.responses.JSONResponse( + status_code=501, + content={"detail": str(exc)}, + ) + get_user_details_dependency = fastapi.Depends(user_details_getter) def get_user_name( diff --git a/cloud_pipelines_backend/api_server_sql.py b/cloud_pipelines_backend/api_server_sql.py index 13b6c35..ed9c7bc 100644 --- a/cloud_pipelines_backend/api_server_sql.py +++ b/cloud_pipelines_backend/api_server_sql.py @@ -12,6 +12,7 @@ from . import backend_types_sql as bts from . import component_structures as structures from . import errors +from . import filter_query_models if typing.TYPE_CHECKING: from cloud_pipelines.orchestration.storage_providers import ( @@ -167,10 +168,20 @@ def list( session: orm.Session, page_token: str | None = None, filter: str | None = None, + filter_query: str | None = None, current_user: str | None = None, include_pipeline_names: bool = False, include_execution_stats: bool = False, ) -> ListPipelineJobsResponse: + if filter and filter_query: + raise errors.ApiValidationError( + "Cannot use both 'filter' and 'filter_query'. Use one or the other." + ) + + if filter_query: + filter_query_models.FilterQuery.model_validate_json(filter_query) + raise NotImplementedError("filter_query is not yet implemented.") + filter_value, offset = _resolve_filter_value( filter=filter, page_token=page_token, diff --git a/cloud_pipelines_backend/errors.py b/cloud_pipelines_backend/errors.py index 5b21d5d..f1b8d51 100644 --- a/cloud_pipelines_backend/errors.py +++ b/cloud_pipelines_backend/errors.py @@ -8,3 +8,9 @@ class ItemAlreadyExistsError(Exception): class PermissionError(Exception): pass + + +class ApiValidationError(Exception): + """Base for all filter/annotation validation errors -> 422.""" + + pass diff --git a/cloud_pipelines_backend/filter_query_models.py b/cloud_pipelines_backend/filter_query_models.py new file mode 100644 index 0000000..821afd2 --- /dev/null +++ b/cloud_pipelines_backend/filter_query_models.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +from typing import Annotated + +import pydantic + +NonEmptyStr = Annotated[str, pydantic.StringConstraints(min_length=1)] + + +class _BaseModel(pydantic.BaseModel): + model_config = {"extra": "forbid"} + + +# --- Leaf argument models --- + + +class KeyExists(_BaseModel): + key: NonEmptyStr + + +class ValueContains(_BaseModel): + key: NonEmptyStr + value_substring: NonEmptyStr + + +class ValueIn(_BaseModel): + key: NonEmptyStr + values: list[NonEmptyStr] = pydantic.Field(min_length=1) + + +class ValueEquals(_BaseModel): + key: NonEmptyStr + value: str + + +class TimeRange(_BaseModel): + """At least one of start_time or end_time is required. + + Valid combinations: start+end (range), start-only (after), end-only (before). + AwareDatetime requires timezone info (e.g. "2024-01-01T00:00:00Z"). + Naive datetimes like "2024-01-01T00:00:00" are rejected, preventing + ambiguous timestamps that could silently resolve to the wrong timezone. + """ + + key: NonEmptyStr + start_time: pydantic.AwareDatetime | None = None + end_time: pydantic.AwareDatetime | None = None + + @pydantic.model_validator(mode="after") + def _at_least_one_time_bound(self) -> TimeRange: + if self.start_time is None and self.end_time is None: + raise ValueError( + "TimeRange requires at least one of 'start_time' or 'end_time'." + ) + return self + + +# --- Predicate wrapper models (one field each) --- + + +class KeyExistsPredicate(_BaseModel): + key_exists: KeyExists + + +class ValueContainsPredicate(_BaseModel): + value_contains: ValueContains + + +class ValueInPredicate(_BaseModel): + value_in: ValueIn + + +class ValueEqualsPredicate(_BaseModel): + value_equals: ValueEquals + + +class TimeRangePredicate(_BaseModel): + time_range: TimeRange + + +LeafPredicate = ( + KeyExistsPredicate + | ValueContainsPredicate + | ValueInPredicate + | ValueEqualsPredicate + | TimeRangePredicate +) + + +class NotPredicate(_BaseModel): + not_: LeafPredicate = pydantic.Field(alias="not") + + +class AndPredicate(_BaseModel): + and_: list["Predicate"] = pydantic.Field(alias="and", min_length=1) + + +class OrPredicate(_BaseModel): + or_: list["Predicate"] = pydantic.Field(alias="or", min_length=1) + + +Predicate = ( + KeyExistsPredicate + | ValueContainsPredicate + | ValueInPredicate + | ValueEqualsPredicate + | TimeRangePredicate + | NotPredicate + | AndPredicate + | OrPredicate +) + +# Resolve forward reference to "Predicate" in recursive and/or models +AndPredicate.model_rebuild() +OrPredicate.model_rebuild() + + +class FilterQuery(_BaseModel): + """Root: must be exactly one of {"and": [...]} or {"or": [...]}.""" + + and_: list[Predicate] | None = pydantic.Field(None, alias="and", min_length=1) + or_: list[Predicate] | None = pydantic.Field(None, alias="or", min_length=1) + + @pydantic.model_validator(mode="after") + def _exactly_one_root_operator(self) -> FilterQuery: + has_and = self.and_ is not None + has_or = self.or_ is not None + if has_and == has_or: + raise ValueError("FilterQuery root must have exactly one of 'and' or 'or'.") + return self diff --git a/tests/test_api_server_sql.py b/tests/test_api_server_sql.py index eab8c95..3a4f662 100644 --- a/tests/test_api_server_sql.py +++ b/tests/test_api_server_sql.py @@ -1,10 +1,11 @@ import pytest from sqlalchemy import orm +from cloud_pipelines_backend import api_server_sql from cloud_pipelines_backend import backend_types_sql as bts from cloud_pipelines_backend import component_structures as structures -from cloud_pipelines_backend import api_server_sql from cloud_pipelines_backend import database_ops +from cloud_pipelines_backend import errors class TestExecutionStatusSummary: @@ -537,3 +538,34 @@ def test_text_search_raises(self): filter_value="some_text_without_colon", current_user=None, ) + + +class TestFilterQueryApiWiring: + def test_filter_query_returns_not_implemented(self, session_factory, service): + valid_json = '{"and": [{"key_exists": {"key": "team"}}]}' + with session_factory() as session: + with pytest.raises(NotImplementedError, match="not yet implemented"): + service.list( + session=session, + filter_query=valid_json, + ) + + def test_filter_query_validates_before_501(self, session_factory, service): + from pydantic import ValidationError + + invalid_json = '{"bad_key": "not_valid"}' + with session_factory() as session: + with pytest.raises(ValidationError): + service.list( + session=session, + filter_query=invalid_json, + ) + + def test_mutual_exclusivity_rejected(self, session_factory, service): + with session_factory() as session: + with pytest.raises(errors.ApiValidationError, match="Cannot use both"): + service.list( + session=session, + filter="created_by:alice", + filter_query='{"and": [{"key_exists": {"key": "team"}}]}', + ) diff --git a/tests/test_filter_query_models.py b/tests/test_filter_query_models.py new file mode 100644 index 0000000..498f852 --- /dev/null +++ b/tests/test_filter_query_models.py @@ -0,0 +1,239 @@ +import pydantic +import pytest + +from cloud_pipelines_backend import filter_query_models + + +class TestFilterQuery: + def test_full_example_from_design_doc(self): + json_str = """ + { + "and": [ + {"key_exists": {"key": "team"}}, + {"value_equals": {"key": "env", "value": "prod"}}, + {"not": {"key_exists": {"key": "deprecated"}}}, + {"or": [{"value_contains": {"key": "name", "value_substring": "nightly"}}]} + ] + } + """ + result = filter_query_models.FilterQuery.model_validate_json(json_str) + assert len(result.and_) == 4 + assert isinstance(result.and_[0], filter_query_models.KeyExistsPredicate) + assert isinstance(result.and_[1], filter_query_models.ValueEqualsPredicate) + assert isinstance(result.and_[2], filter_query_models.NotPredicate) + assert isinstance(result.and_[3], filter_query_models.OrPredicate) + + +class TestLeafPredicates: + def test_key_exists(self): + json_str = '{"key_exists": {"key": "team"}}' + result = filter_query_models.KeyExistsPredicate.model_validate_json(json_str) + assert result.key_exists == filter_query_models.KeyExists(key="team") + + def test_value_equals(self): + json_str = '{"value_equals": {"key": "env", "value": "prod"}}' + result = filter_query_models.ValueEqualsPredicate.model_validate_json(json_str) + assert result.value_equals == filter_query_models.ValueEquals( + key="env", value="prod" + ) + + def test_value_contains(self): + json_str = '{"value_contains": {"key": "name", "value_substring": "nightly"}}' + result = filter_query_models.ValueContainsPredicate.model_validate_json( + json_str + ) + assert result.value_contains == filter_query_models.ValueContains( + key="name", value_substring="nightly" + ) + + def test_value_in(self): + json_str = '{"value_in": {"key": "env", "values": ["prod", "staging"]}}' + result = filter_query_models.ValueInPredicate.model_validate_json(json_str) + assert result.value_in == filter_query_models.ValueIn( + key="env", values=["prod", "staging"] + ) + + def test_time_range_with_both_times(self): + json_str = '{"time_range": {"key": "system/pipeline_run.date.created_at", "start_time": "2024-01-01T00:00:00Z", "end_time": "2024-12-31T23:59:59Z"}}' + result = filter_query_models.TimeRangePredicate.model_validate_json(json_str) + assert result.time_range.key == "system/pipeline_run.date.created_at" + assert result.time_range.start_time is not None + assert result.time_range.end_time is not None + + def test_time_range_start_only(self): + json_str = '{"time_range": {"key": "system/pipeline_run.date.created_at", "start_time": "2024-01-01T00:00:00Z"}}' + result = filter_query_models.TimeRangePredicate.model_validate_json(json_str) + assert result.time_range.key == "system/pipeline_run.date.created_at" + assert result.time_range.start_time is not None + assert result.time_range.end_time is None + + def test_time_range_end_only(self): + json_str = '{"time_range": {"key": "system/pipeline_run.date.created_at", "end_time": "2024-12-31T23:59:59Z"}}' + result = filter_query_models.TimeRangePredicate.model_validate_json(json_str) + assert result.time_range.key == "system/pipeline_run.date.created_at" + assert result.time_range.start_time is None + assert result.time_range.end_time is not None + + def test_time_range_rejects_both_none(self): + json_str = '{"time_range": {"key": "system/pipeline_run.date.created_at"}}' + with pytest.raises(pydantic.ValidationError, match="at least one"): + filter_query_models.TimeRangePredicate.model_validate_json(json_str) + + def test_time_range_rejects_naive_datetime(self): + json_str = '{"time_range": {"key": "k", "start_time": "2024-01-01T00:00:00"}}' + with pytest.raises(pydantic.ValidationError, match="timezone"): + filter_query_models.TimeRangePredicate.model_validate_json(json_str) + + +class TestEmptyStringRejections: + def test_key_exists_empty_key(self): + json_str = '{"key_exists": {"key": ""}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.KeyExistsPredicate.model_validate_json(json_str) + + def test_value_equals_empty_key(self): + json_str = '{"value_equals": {"key": "", "value": "prod"}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.ValueEqualsPredicate.model_validate_json(json_str) + + def test_value_equals_empty_value_allowed(self): + json_str = '{"value_equals": {"key": "env", "value": ""}}' + result = filter_query_models.ValueEqualsPredicate.model_validate_json(json_str) + assert result.value_equals.value == "" + + def test_value_contains_empty_key(self): + json_str = '{"value_contains": {"key": "", "value_substring": "nightly"}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.ValueContainsPredicate.model_validate_json(json_str) + + def test_value_contains_empty_substring(self): + json_str = '{"value_contains": {"key": "name", "value_substring": ""}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.ValueContainsPredicate.model_validate_json(json_str) + + def test_value_in_empty_key(self): + json_str = '{"value_in": {"key": "", "values": ["prod"]}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.ValueInPredicate.model_validate_json(json_str) + + def test_value_in_empty_values_list(self): + json_str = '{"value_in": {"key": "env", "values": []}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.ValueInPredicate.model_validate_json(json_str) + + def test_value_in_empty_string_in_list(self): + json_str = '{"value_in": {"key": "env", "values": ["prod", ""]}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.ValueInPredicate.model_validate_json(json_str) + + def test_time_range_empty_key(self): + json_str = '{"time_range": {"key": "", "start_time": "2024-01-01T00:00:00Z"}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.TimeRangePredicate.model_validate_json(json_str) + + def test_time_range_empty_start_time(self): + json_str = '{"time_range": {"key": "k", "start_time": ""}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.TimeRangePredicate.model_validate_json(json_str) + + +class TestLogicalOperators: + def test_not_predicate(self): + json_str = '{"not": {"key_exists": {"key": "deprecated"}}}' + result = filter_query_models.NotPredicate.model_validate_json(json_str) + assert isinstance(result.not_, filter_query_models.KeyExistsPredicate) + + def test_and_predicate(self): + json_str = '{"and": [{"key_exists": {"key": "team"}}, {"value_equals": {"key": "env", "value": "prod"}}]}' + result = filter_query_models.AndPredicate.model_validate_json(json_str) + assert len(result.and_) == 2 + + def test_or_predicate(self): + json_str = ( + '{"or": [{"key_exists": {"key": "a"}}, {"key_exists": {"key": "b"}}]}' + ) + result = filter_query_models.OrPredicate.model_validate_json(json_str) + assert len(result.or_) == 2 + + def test_nested_and_or(self): + json_str = """ + { + "and": [ + {"or": [ + {"key_exists": {"key": "a"}}, + {"key_exists": {"key": "b"}}, + {"key_exists": {"key": "c"}} + ]}, + {"value_equals": {"key": "d", "value": "e"}} + ] + } + """ + result = filter_query_models.AndPredicate.model_validate_json(json_str) + assert len(result.and_) == 2 + assert isinstance(result.and_[0], filter_query_models.OrPredicate) + assert len(result.and_[0].or_) == 3 + + def test_deeply_nested(self): + json_str = """ + { + "and": [ + {"or": [ + {"and": [ + {"key_exists": {"key": "deep"}} + ]} + ]} + ] + } + """ + result = filter_query_models.AndPredicate.model_validate_json(json_str) + assert len(result.and_) == 1 + inner_or = result.and_[0] + assert isinstance(inner_or, filter_query_models.OrPredicate) + assert len(inner_or.or_) == 1 + inner_and = inner_or.or_[0] + assert isinstance(inner_and, filter_query_models.AndPredicate) + assert len(inner_and.and_) == 1 + + +class TestValidationRejections: + def test_two_keys_in_one_predicate_rejected(self): + json_str = '{"key_exists": {"key": "team"}, "value_equals": {"key": "env", "value": "prod"}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.FilterQuery.model_validate_json(json_str) + + def test_root_must_be_and_or_or(self): + json_str = '{"key_exists": {"key": "team"}}' + with pytest.raises( + pydantic.ValidationError, match="Extra inputs are not permitted" + ): + filter_query_models.FilterQuery.model_validate_json(json_str) + + def test_missing_required_field(self): + json_str = '{"value_equals": {"key": "env"}}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.ValueEqualsPredicate.model_validate_json(json_str) + + def test_root_both_and_or_rejected(self): + json_str = '{"and": [{"key_exists": {"key": "a"}}], "or": [{"key_exists": {"key": "b"}}]}' + with pytest.raises(pydantic.ValidationError, match="exactly one"): + filter_query_models.FilterQuery.model_validate_json(json_str) + + def test_empty_object_rejected(self): + json_str = "{}" + with pytest.raises(pydantic.ValidationError, match="exactly one"): + filter_query_models.FilterQuery.model_validate_json(json_str) + + def test_empty_and_list_rejected(self): + json_str = '{"and": []}' + with pytest.raises(pydantic.ValidationError, match="too_short"): + filter_query_models.FilterQuery.model_validate_json(json_str) + + def test_empty_or_list_rejected(self): + json_str = '{"or": []}' + with pytest.raises(pydantic.ValidationError, match="too_short"): + filter_query_models.FilterQuery.model_validate_json(json_str) + + def test_nested_empty_and_rejected(self): + json_str = '{"and": [{"and": []}]}' + with pytest.raises(pydantic.ValidationError): + filter_query_models.FilterQuery.model_validate_json(json_str)