From 60f351cd2e6ac595f84721c30f0725c530760421 Mon Sep 17 00:00:00 2001
From: Adam Sachs <adam@ethyca.com>
Date: Wed, 10 Dec 2025 07:37:28 -0500
Subject: [PATCH 1/2] initial commit for mssql query hints

---
 src/fides/api/graph/config.py                 |   3 +
 src/fides/api/models/datasetconfig.py         |  20 +++
 src/fides/api/schemas/query_hints/__init__.py |  11 ++
 src/fides/api/schemas/query_hints/base.py     | 100 +++++++++++++++
 .../schemas/query_hints/mssql_query_hints.py  |  64 ++++++++++
 .../microsoft_sql_server_query_config.py      |  26 ++++
 tests/ops/schemas/test_query_hints.py         | 116 +++++++++++++++++
 .../connectors/test_mssql_query_config.py     | 120 ++++++++++++++++++
 8 files changed, 460 insertions(+)
 create mode 100644 src/fides/api/schemas/query_hints/__init__.py
 create mode 100644 src/fides/api/schemas/query_hints/base.py
 create mode 100644 src/fides/api/schemas/query_hints/mssql_query_hints.py
 create mode 100644 tests/ops/schemas/test_query_hints.py
 create mode 100644 tests/ops/service/connectors/test_mssql_query_config.py

diff --git a/src/fides/api/graph/config.py b/src/fides/api/graph/config.py
index 3716676fb65..b2cbbe94535 100644
--- a/src/fides/api/graph/config.py
+++ b/src/fides/api/graph/config.py
@@ -99,6 +99,7 @@
 from fides.api.schemas.partitioning.time_based_partitioning import (
     validate_partitioning_list,
 )
+from fides.api.schemas.query_hints.base import QueryHints
 from fides.api.util.collection_util import merge_dicts
 from fides.api.util.querytoken import QueryToken
 
@@ -464,6 +465,8 @@ class Collection(BaseModel):
     data_categories: Set[FidesKey] = set()
     masking_strategy_override: Optional[MaskingStrategyOverride] = None
     partitioning: Optional[Union[List[TimeBasedPartitioning], Dict[str, Any]]] = None
+    # Query hints for optimizing database queries (e.g., MAXDOP for MSSQL)
+    query_hints: Optional[QueryHints] = None
 
     @property
     def field_dict(self) -> Dict[FieldPath, Field]:
diff --git a/src/fides/api/models/datasetconfig.py b/src/fides/api/models/datasetconfig.py
index 835c9d91762..1727e4ac152 100644
--- a/src/fides/api/models/datasetconfig.py
+++ b/src/fides/api/models/datasetconfig.py
@@ -19,6 +19,7 @@
 )
 from fides.api.graph.data_type import parse_data_type_string
 from fides.api.models.connectionconfig import ConnectionConfig, ConnectionType
+from fides.api.schemas.query_hints.base import QueryHints
 from fides.api.service.masking.strategy.masking_strategy import MaskingStrategy
 from fides.api.util.saas_util import merge_datasets
 
@@ -336,6 +337,24 @@ def convert_dataset_to_graph(
         if collection.fides_meta and collection.fides_meta.partitioning:
             collection_partitioning = collection.fides_meta.partitioning
 
+        # Extract query hints from collection metadata if present
+        collection_query_hints = None
+        if (
+            collection.fides_meta
+            and hasattr(collection.fides_meta, "query_hints")
+            and collection.fides_meta.query_hints
+        ):
+            try:
+                collection_query_hints = QueryHints(
+                    hints=collection.fides_meta.query_hints
+                )
+            except Exception:
+                logger.warning(
+                    "Invalid query_hints on collection {}.{}, ignoring",
+                    dataset_name,
+                    collection.name,
+                )
+
         graph_collection = Collection(
             name=collection.name,
             fields=graph_fields,
@@ -347,6 +366,7 @@ def convert_dataset_to_graph(
                 set(collection.data_categories) if collection.data_categories else set()
             ),
             partitioning=collection_partitioning,
+            query_hints=collection_query_hints,
         )
         graph_collections.append(graph_collection)
     logger.debug(
diff --git a/src/fides/api/schemas/query_hints/__init__.py b/src/fides/api/schemas/query_hints/__init__.py
new file mode 100644
index 00000000000..acd6472368c
--- /dev/null
+++ b/src/fides/api/schemas/query_hints/__init__.py
@@ -0,0 +1,11 @@
+"""Query hints schemas for database-specific query optimization."""
+
+from fides.api.schemas.query_hints.base import QueryHint, QueryHints
+from fides.api.schemas.query_hints.mssql_query_hints import MSSQLHintType, MSSQLQueryHint
+
+__all__ = [
+    "QueryHint",
+    "QueryHints",
+    "MSSQLHintType",
+    "MSSQLQueryHint",
+]
diff --git a/src/fides/api/schemas/query_hints/base.py b/src/fides/api/schemas/query_hints/base.py
new file mode 100644
index 00000000000..f07dc8c8451
--- /dev/null
+++ b/src/fides/api/schemas/query_hints/base.py
@@ -0,0 +1,100 @@
+"""Base classes for query hints."""
+
+from abc import ABC, abstractmethod
+from typing import Any, ClassVar, Dict, List, Optional, Set, Type
+
+from loguru import logger
+from pydantic import BaseModel
+
+
+class QueryHint(BaseModel, ABC):
+    """
+    Base class for database-specific query hints.
+
+    Each database implementation must define:
+    - The hint type enum
+    - Validation for hint values
+    - How to render the hint as SQL
+    """
+
+    # Registry of implementations by connection type
+    _implementations: ClassVar[Dict[str, Type["QueryHint"]]] = {}
+
+    # The connection types this hint applies to
+    connection_types: ClassVar[Set[str]] = set()
+
+    def __init_subclass__(cls, **kwargs: Any) -> None:
+        super().__init_subclass__(**kwargs)
+        for conn_type in cls.connection_types:
+            cls._implementations[conn_type] = cls
+
+    @classmethod
+    def get_implementation(cls, connection_type: str) -> Optional[Type["QueryHint"]]:
+        """Get the QueryHint implementation for a connection type."""
+        return cls._implementations.get(connection_type)
+
+    @classmethod
+    def get_supported_connection_types(cls) -> Set[str]:
+        """Get all connection types that support query hints."""
+        return set(cls._implementations.keys())
+
+    @abstractmethod
+    def to_sql_option(self) -> str:
+        """
+        Render this hint as a SQL OPTION clause component.
+
+        Returns the hint without the OPTION() wrapper, e.g., "MAXDOP 1"
+        """
+
+
+class QueryHints(BaseModel):
+    """
+    Container for multiple query hints that can be specified on a Collection.
+
+    Example YAML:
+        fides_meta:
+          query_hints:
+            - hint_type: maxdop
+              value: 1
+    """
+
+    hints: List[Dict[str, Any]] = []
+
+    def get_hints_for_connection_type(self, connection_type: str) -> List[QueryHint]:
+        """
+        Parse and validate hints for a specific connection type.
+        Returns only hints that are valid for this connection type.
+        """
+        implementation = QueryHint.get_implementation(connection_type)
+        if implementation is None:
+            return []
+
+        valid_hints = []
+        for hint_dict in self.hints:
+            try:
+                hint = implementation.model_validate(hint_dict)
+                valid_hints.append(hint)
+            except (ValueError, Exception) as exc:
+                # Skip hints that don't validate for this connection type
+                logger.debug(
+                    "Skipping invalid query hint for connection type {}: {}",
+                    connection_type,
+                    exc,
+                )
+                continue
+
+        return valid_hints
+
+    def to_sql_option_clause(self, connection_type: str) -> Optional[str]:
+        """
+        Generate the full SQL OPTION clause for this connection type.
+
+        Returns None if no valid hints exist for this connection type.
+        Returns e.g., "OPTION (MAXDOP 1)" for MSSQL.
+        """
+        hints = self.get_hints_for_connection_type(connection_type)
+        if not hints:
+            return None
+
+        hint_parts = [hint.to_sql_option() for hint in hints]
+        return f"OPTION ({', '.join(hint_parts)})"
diff --git a/src/fides/api/schemas/query_hints/mssql_query_hints.py b/src/fides/api/schemas/query_hints/mssql_query_hints.py
new file mode 100644
index 00000000000..9aee619af38
--- /dev/null
+++ b/src/fides/api/schemas/query_hints/mssql_query_hints.py
@@ -0,0 +1,64 @@
+"""Microsoft SQL Server specific query hints."""
+
+from enum import Enum
+from typing import ClassVar, Optional, Set
+
+from pydantic import model_validator
+
+from fides.api.schemas.query_hints.base import QueryHint
+
+
+class MSSQLHintType(str, Enum):
+    """
+    Supported Microsoft SQL Server query hints.
+
+    Reference: https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-query
+
+    We explicitly enumerate only safe, performance-related hints.
+    This prevents SQL injection by only allowing known hint types.
+    """
+
+    # Parallelism hints
+    MAXDOP = "maxdop"
+
+    # Future hints can be added here as needed:
+    # RECOMPILE = "recompile"
+    # OPTIMIZE_FOR_UNKNOWN = "optimize_for_unknown"
+    # FAST = "fast"
+    # MAXRECURSION = "maxrecursion"
+
+
+class MSSQLQueryHint(QueryHint):
+    """
+    Microsoft SQL Server query hint.
+
+    Example usage in Dataset YAML:
+        fides_meta:
+          query_hints:
+            - hint_type: maxdop
+              value: 1
+    """
+
+    connection_types: ClassVar[Set[str]] = {"mssql"}
+
+    hint_type: MSSQLHintType
+    value: Optional[int] = None
+
+    @model_validator(mode="after")
+    def validate_hint_value(self) -> "MSSQLQueryHint":
+        """Validate that the hint has appropriate values."""
+        if self.hint_type == MSSQLHintType.MAXDOP:
+            if self.value is None:
+                raise ValueError("MAXDOP hint requires a value")
+            if not isinstance(self.value, int) or self.value < 0 or self.value > 64:
+                raise ValueError("MAXDOP value must be an integer between 0 and 64")
+
+        return self
+
+    def to_sql_option(self) -> str:
+        """Render as SQL OPTION clause component."""
+        if self.hint_type == MSSQLHintType.MAXDOP:
+            return f"MAXDOP {self.value}"
+
+        # Future hints would be handled here
+        raise ValueError(f"Unknown hint type: {self.hint_type}")
diff --git a/src/fides/api/service/connectors/query_configs/microsoft_sql_server_query_config.py b/src/fides/api/service/connectors/query_configs/microsoft_sql_server_query_config.py
index 427f2b2e223..631e85dd274 100644
--- a/src/fides/api/service/connectors/query_configs/microsoft_sql_server_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/microsoft_sql_server_query_config.py
@@ -1,3 +1,5 @@
+from typing import List
+
 from fides.api.service.connectors.query_configs.query_config import (
     QueryStringWithoutTuplesOverrideQueryConfig,
 )
@@ -7,3 +9,27 @@ class MicrosoftSQLServerQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig)
     """
     Generates SQL valid for SQLServer.
     """
+
+    def get_formatted_query_string(
+        self,
+        field_list: str,
+        clauses: List[str],
+    ) -> str:
+        """
+        Returns an SQL query string with optional MSSQL query hints.
+
+        If query_hints are configured on the collection, appends an OPTION clause.
+        Example output:
+            SELECT a, b FROM table WHERE x IN (:x) OPTION (MAXDOP 1)
+        """
+        base_query = f"SELECT {field_list} FROM {self.node.collection.name} WHERE {' OR '.join(clauses)}"
+
+        # Check if collection has query hints configured
+        if self.node.collection.query_hints:
+            option_clause = self.node.collection.query_hints.to_sql_option_clause(
+                "mssql"
+            )
+            if option_clause:
+                return f"{base_query} {option_clause}"
+
+        return base_query
diff --git a/tests/ops/schemas/test_query_hints.py b/tests/ops/schemas/test_query_hints.py
new file mode 100644
index 00000000000..ebdfc8d15d1
--- /dev/null
+++ b/tests/ops/schemas/test_query_hints.py
@@ -0,0 +1,116 @@
+"""Tests for query hints schemas."""
+
+import pytest
+from pydantic import ValidationError
+
+from fides.api.schemas.query_hints.base import QueryHint, QueryHints
+from fides.api.schemas.query_hints.mssql_query_hints import MSSQLHintType, MSSQLQueryHint
+
+
+class TestMSSQLQueryHint:
+    """Tests for Microsoft SQL Server query hints."""
+
+    def test_maxdop_hint_valid(self):
+        """Test that a valid MAXDOP hint is created successfully."""
+        hint = MSSQLQueryHint(hint_type=MSSQLHintType.MAXDOP, value=1)
+        assert hint.hint_type == MSSQLHintType.MAXDOP
+        assert hint.value == 1
+        assert hint.to_sql_option() == "MAXDOP 1"
+
+    def test_maxdop_hint_zero(self):
+        """Test that MAXDOP 0 is valid (unlimited parallelism)."""
+        hint = MSSQLQueryHint(hint_type=MSSQLHintType.MAXDOP, value=0)
+        assert hint.to_sql_option() == "MAXDOP 0"
+
+    def test_maxdop_hint_max_value(self):
+        """Test that MAXDOP 64 is valid (max allowed)."""
+        hint = MSSQLQueryHint(hint_type=MSSQLHintType.MAXDOP, value=64)
+        assert hint.to_sql_option() == "MAXDOP 64"
+
+    def test_maxdop_hint_missing_value(self):
+        """Test that MAXDOP hint requires a value."""
+        with pytest.raises(ValidationError) as exc_info:
+            MSSQLQueryHint(hint_type=MSSQLHintType.MAXDOP)
+        assert "MAXDOP hint requires a value" in str(exc_info.value)
+
+    def test_maxdop_hint_negative_value(self):
+        """Test that negative MAXDOP values are rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            MSSQLQueryHint(hint_type=MSSQLHintType.MAXDOP, value=-1)
+        assert "MAXDOP value must be an integer between 0 and 64" in str(exc_info.value)
+
+    def test_maxdop_hint_value_too_high(self):
+        """Test that MAXDOP values > 64 are rejected."""
+        with pytest.raises(ValidationError) as exc_info:
+            MSSQLQueryHint(hint_type=MSSQLHintType.MAXDOP, value=65)
+        assert "MAXDOP value must be an integer between 0 and 64" in str(exc_info.value)
+
+    def test_from_dict(self):
+        """Test creating hint from dictionary (as would come from YAML)."""
+        hint_dict = {"hint_type": "maxdop", "value": 1}
+        hint = MSSQLQueryHint.model_validate(hint_dict)
+        assert hint.hint_type == MSSQLHintType.MAXDOP
+        assert hint.value == 1
+
+
+class TestQueryHint:
+    """Tests for the base QueryHint class."""
+
+    def test_mssql_implementation_registered(self):
+        """Test that MSSQL implementation is registered."""
+        impl = QueryHint.get_implementation("mssql")
+        assert impl == MSSQLQueryHint
+
+    def test_unknown_connection_type_returns_none(self):
+        """Test that unknown connection types return None."""
+        impl = QueryHint.get_implementation("unknown_db")
+        assert impl is None
+
+    def test_supported_connection_types(self):
+        """Test getting supported connection types."""
+        supported = QueryHint.get_supported_connection_types()
+        assert "mssql" in supported
+
+
+class TestQueryHints:
+    """Tests for the QueryHints container."""
+
+    def test_empty_hints(self):
+        """Test empty hints container."""
+        hints = QueryHints(hints=[])
+        assert hints.get_hints_for_connection_type("mssql") == []
+        assert hints.to_sql_option_clause("mssql") is None
+
+    def test_single_mssql_hint(self):
+        """Test single MSSQL hint."""
+        hints = QueryHints(hints=[{"hint_type": "maxdop", "value": 1}])
+        mssql_hints = hints.get_hints_for_connection_type("mssql")
+        assert len(mssql_hints) == 1
+        assert mssql_hints[0].to_sql_option() == "MAXDOP 1"
+
+    def test_to_sql_option_clause(self):
+        """Test generating full OPTION clause."""
+        hints = QueryHints(hints=[{"hint_type": "maxdop", "value": 1}])
+        clause = hints.to_sql_option_clause("mssql")
+        assert clause == "OPTION (MAXDOP 1)"
+
+    def test_invalid_hints_skipped(self):
+        """Test that invalid hints are skipped silently."""
+        hints = QueryHints(
+            hints=[
+                {"hint_type": "maxdop", "value": 1},  # Valid
+                {"hint_type": "invalid_hint", "value": 99},  # Invalid
+                {"hint_type": "maxdop", "value": -1},  # Invalid value
+            ]
+        )
+        mssql_hints = hints.get_hints_for_connection_type("mssql")
+        # Only the valid hint should be returned
+        assert len(mssql_hints) == 1
+        assert mssql_hints[0].value == 1
+
+    def test_hints_for_unsupported_connection_type(self):
+        """Test that unsupported connection types return no hints."""
+        hints = QueryHints(hints=[{"hint_type": "maxdop", "value": 1}])
+        postgres_hints = hints.get_hints_for_connection_type("postgres")
+        assert postgres_hints == []
+        assert hints.to_sql_option_clause("postgres") is None
diff --git a/tests/ops/service/connectors/test_mssql_query_config.py b/tests/ops/service/connectors/test_mssql_query_config.py
new file mode 100644
index 00000000000..a7b3b1d517e
--- /dev/null
+++ b/tests/ops/service/connectors/test_mssql_query_config.py
@@ -0,0 +1,120 @@
+"""Tests for Microsoft SQL Server query configuration."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from fides.api.graph.config import Collection, FieldPath, ScalarField
+from fides.api.schemas.query_hints.base import QueryHints
+from fides.api.service.connectors.query_configs.microsoft_sql_server_query_config import (
+    MicrosoftSQLServerQueryConfig,
+)
+
+
+@pytest.fixture
+def mock_execution_node():
+    """Create a mock execution node for testing."""
+    node = MagicMock()
+    node.collection = Collection(
+        name="test_table",
+        fields=[
+            ScalarField(name="id", primary_key=True),
+            ScalarField(name="email"),
+            ScalarField(name="name"),
+        ],
+    )
+    node.address = MagicMock()
+    node.address.value = "test_dataset:test_table"
+    return node
+
+
+@pytest.fixture
+def mock_execution_node_with_hints():
+    """Create a mock execution node with query hints configured."""
+    node = MagicMock()
+    node.collection = Collection(
+        name="test_table",
+        fields=[
+            ScalarField(name="id", primary_key=True),
+            ScalarField(name="email"),
+            ScalarField(name="name"),
+        ],
+        query_hints=QueryHints(hints=[{"hint_type": "maxdop", "value": 1}]),
+    )
+    node.address = MagicMock()
+    node.address.value = "test_dataset:test_table"
+    return node
+
+
+class TestMicrosoftSQLServerQueryConfig:
+    """Tests for MSSQL query configuration."""
+
+    def test_get_formatted_query_string_without_hints(self, mock_execution_node):
+        """Test query string generation without hints."""
+        config = MicrosoftSQLServerQueryConfig(mock_execution_node)
+
+        query = config.get_formatted_query_string(
+            field_list="id, email, name",
+            clauses=["email = :email"],
+        )
+
+        assert query == "SELECT id, email, name FROM test_table WHERE email = :email"
+        assert "OPTION" not in query
+
+    def test_get_formatted_query_string_with_maxdop_hint(
+        self, mock_execution_node_with_hints
+    ):
+        """Test query string generation with MAXDOP hint."""
+        config = MicrosoftSQLServerQueryConfig(mock_execution_node_with_hints)
+
+        query = config.get_formatted_query_string(
+            field_list="id, email, name",
+            clauses=["email = :email"],
+        )
+
+        assert (
+            query
+            == "SELECT id, email, name FROM test_table WHERE email = :email OPTION (MAXDOP 1)"
+        )
+
+    def test_get_formatted_query_string_with_multiple_clauses(
+        self, mock_execution_node_with_hints
+    ):
+        """Test query string with multiple WHERE clauses and hints."""
+        config = MicrosoftSQLServerQueryConfig(mock_execution_node_with_hints)
+
+        query = config.get_formatted_query_string(
+            field_list="id, email, name",
+            clauses=["email = :email", "id IN (:id_0, :id_1)"],
+        )
+
+        expected = "SELECT id, email, name FROM test_table WHERE email = :email OR id IN (:id_0, :id_1) OPTION (MAXDOP 1)"
+        assert query == expected
+
+    def test_get_formatted_query_string_empty_hints(self, mock_execution_node):
+        """Test that empty hints don't add OPTION clause."""
+        mock_execution_node.collection.query_hints = QueryHints(hints=[])
+        config = MicrosoftSQLServerQueryConfig(mock_execution_node)
+
+        query = config.get_formatted_query_string(
+            field_list="id, email",
+            clauses=["email = :email"],
+        )
+
+        assert "OPTION" not in query
+
+    def test_get_formatted_query_string_invalid_hints_ignored(self, mock_execution_node):
+        """Test that invalid hints are ignored and don't break query generation."""
+        mock_execution_node.collection.query_hints = QueryHints(
+            hints=[{"hint_type": "invalid", "value": 999}]
+        )
+        config = MicrosoftSQLServerQueryConfig(mock_execution_node)
+
+        query = config.get_formatted_query_string(
+            field_list="id, email",
+            clauses=["email = :email"],
+        )
+
+        # Invalid hints should be ignored, no OPTION clause added
+        assert "OPTION" not in query
+        assert query == "SELECT id, email FROM test_table WHERE email = :email"

From aed7f95d6a504792ffc5b54d039b036c879427a1 Mon Sep 17 00:00:00 2001
From: Adam Sachs <adam@ethyca.com>
Date: Wed, 10 Dec 2025 21:55:40 -0500
Subject: [PATCH 2/2] use fideslang alpha tag with query hint support

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9a634e40946..dffde2befbb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -77,4 +77,4 @@ toml==0.10.2
 twilio==7.15.0
 typing-extensions==4.12.2
 versioneer==0.19
-fideslang==3.1.2
+fideslang==3.1.3a0