Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- **`aicertify demo` rewritten for the canonical rich-UX flow.** The previous demo runner produced plain `print()` output; it now mirrors [`examples/quickstart.py`](examples/quickstart.py) exactly — uses the high-level `application.create()` + `app.evaluate()` API and wraps each step in `print_banner`, `spinner`, `MessageGroup`, and `success` markers from `aicertify.utils.logging_config`. Visually identical to the canonical SDK experience.
- **CLI default verbosity now WARNING, not INFO.** `aicertify demo` and `aicertify evaluate` no longer flood the terminal with INFO-level chatter from `langfair`, `deepeval`, the OPA policy loader, etc. Pass `--verbose` to opt back in (raises root logger to INFO and `aicertify` namespace to DEBUG).
- **OPA `policy_loader` no longer warns on `helper_functions/`** — those `.rego` files are shared library code (reporting helpers, validation helpers), not policies, and were always meant to be skipped silently. Same for dot-prefixed config directories.

### Added

- **`docs/demo.cast` + `docs/demo.gif`** — asciinema recording of `aicertify demo` running end-to-end, embedded near the top of the README so visitors see the rich UX before installing anything.

## [0.7.1] — 2026-05-14

### Added
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ aicertify demo

`aicertify demo` loads a bundled sample contract, evaluates it against the EU AI Act policy set via OPA, and writes `aicertify_demo_report.md` to the current directory. Open the report — that's what your audit deliverable looks like.

<p align="center">
<img src="docs/demo.gif" alt="aicertify demo recording — banner, spinners, evaluation progress, generated report path" width="85%" />
</p>

For richer evaluations (LangFair fairness metrics, DeepEval content-safety scoring, PDF reports), see [`examples/quickstart.py`](examples/quickstart.py) and the [forkable example bots](examples/) — each ships an `input_contract.json`, a `policy_config.yaml`, and a `run.py`.

### For development
Expand Down
257 changes: 176 additions & 81 deletions aicertify/_demo/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,25 @@
vendored policy folder, and writes a Markdown report to the user's CWD.

Designed to work after ``pip install aicertify`` with no extra configuration
beyond the OPA binary on PATH. Heavy ML-based evaluators are skipped by
default; the OPA verdict is the substance.
beyond the OPA binary on PATH. Mirrors ``examples/quickstart.py`` exactly:
banner + spinners + MessageGroup + success markers via
``aicertify.utils.logging_config``.

The evaluation runs through the canonical ``application.create() +
app.evaluate()`` API. Heavy ML evaluators (DeepEval, LangFair) skip
gracefully if OPENAI_API_KEY is unset; the OPA verdict is the substance.
"""

from __future__ import annotations

import contextlib
import json
import logging
import os
import platform
import shutil
import sys
import tempfile
from importlib.resources import files
from pathlib import Path
from typing import Optional
Expand All @@ -26,7 +34,8 @@
DEFAULT_REPORT_NAME = "aicertify_demo_report.md"

# Map friendly framework names to the bundled directory under aicertify/opa_policies/
# that we use to verify the framework is present in the wheel.
# that we use as an existence probe (so the demo fails fast with a clear
# message if the wheel was stripped or the framework name is unknown).
_BUNDLED_POLICY_PROBE_PATH = {
"eu_ai_act": ("international", "eu_ai_act", "v1"),
"nist": ("international", "nist", "v1"),
Expand Down Expand Up @@ -89,13 +98,10 @@ def bundled_policy_path(policy: str) -> Path:
Used only as an existence probe so the demo can fail fast with a friendly
message if the wheel was stripped or the framework name is unknown. The
actual evaluation passes the friendly framework name (e.g. ``eu_ai_act``)
to the lib's ``find_matching_policy_folders``, which then resolves it to
the absolute directory and recurses for ``.rego`` files.
to the high-level ``application.evaluate()`` API, which resolves it.
"""
probe = _BUNDLED_POLICY_PROBE_PATH.get(policy)
if probe is None:
# Unknown friendly name; fall back to treating the input as a
# path relative to opa_policies/.
probe = ("opa_policies", *policy.split("/"))
else:
probe = ("opa_policies", *probe)
Expand All @@ -111,87 +117,176 @@ async def run_demo(
policy: str = DEFAULT_POLICY,
) -> int:
"""Run the bundled demo. Returns a shell-style exit code."""
# The OPA-binary check uses only stdlib (shutil.which) so it's safe to
# run BEFORE the stderr redirect — failure messages stay visible.
if opa_binary_path() is None:
print_opa_install_instructions()
return 1

contract_file = bundled_contract_path()
if not contract_file.exists():
print(
f"✗ Bundled sample contract missing at {contract_file}. "
f"This is a packaging bug — please file an issue.",
file=sys.stderr,
)
return 1

policy_dir = bundled_policy_path(policy)
if not policy_dir.exists():
print(
f"✗ Bundled policy directory {policy} not found at {policy_dir}. "
f"Try one of: international/eu_ai_act/v1, global/v1, "
f"international/nist/v1",
file=sys.stderr,
)
return 1

# Load sample contract as an AiCertifyContract
from aicertify.api import load_contract

contract_data = json.loads(contract_file.read_text())
# load_contract accepts a path; serialise the bundled JSON to a tmp file
# via the API's existing path-based loader so we don't reimplement.
import tempfile

with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
json.dump(contract_data, tmp)
tmp_path = tmp.name

try:
contract = load_contract(tmp_path)
finally:
Path(tmp_path).unlink(missing_ok=True)

output_path = Path(output).resolve()
output_dir = output_path.parent

print(
f"→ Running AICertify demo:\n"
f" contract: {contract.application_name} "
f"({len(contract.interactions)} interactions)\n"
f" policy: {policy}\n"
f" report: {report_format}\n"
# Don't expose CUDA — matches examples/quickstart.py to keep behaviour
# reproducible across machines with and without GPUs.
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")

# The downstream evaluators emit a lot of WARNING / ERROR log chatter —
# much of it expected in the no-API-key demo path. Capture stderr to a
# tempfile and only surface it on failure. Started BEFORE any
# ``importlib.resources.files("aicertify…")`` or ``from aicertify`` call
# — those trigger the aicertify package init which eagerly imports the
# OPA policy_loader and emits "Skipping policy file…" warnings.
logging.getLogger().setLevel(logging.WARNING)
saved_stderr_fd = os.dup(2)
captured_stderr = tempfile.NamedTemporaryFile(
mode="w+b", prefix="aicertify-demo-stderr-", delete=False
)
os.dup2(captured_stderr.fileno(), 2)

from aicertify.api import aicertify_app_for_policy
exit_code = 0
try:
# Remaining bundled-resource probes now run quietly (aicertify package
# init lives in the captured-stderr window).
contract_file = bundled_contract_path()
if not contract_file.exists():
print(
f"✗ Bundled sample contract missing at {contract_file}. "
f"This is a packaging bug — please file an issue.",
file=sys.__stderr__,
)
exit_code = 1
return exit_code

policy_dir = bundled_policy_path(policy)
if not policy_dir.exists():
print(
f"✗ Bundled policy directory '{policy}' not found at "
f"{policy_dir}. Try one of: eu_ai_act, nist, global",
file=sys.__stderr__,
)
exit_code = 1
return exit_code

# Deferred imports happen inside the capture so eager-import-time
# warnings go to the tempfile, not the user's terminal.
from aicertify import application, regulations
from aicertify.utils.logging_config import (
AIC_LOGO,
MessageGroup,
error,
info,
spinner,
success,
print_banner,
)

# Pass the relative policy name (not the absolute path); the library's
# find_matching_policy_folders() rejects absolute patterns.
results = await aicertify_app_for_policy(
contract=contract,
policy_folder=policy,
output_dir=str(output_dir),
report_format=report_format,
generate_report=True,
)
print_banner()
info(
"Self-contained demo: bundled sample contract → "
f"{policy} policy set → {report_format} report.",
category="EVALUATION",
)

# The API writes a timestamped report; surface the path it produced.
report_path = results.get("report_path")
if report_path:
print(f"\n✓ Report written to: {report_path}")
print(
f"\nOpen the report to see what an AICertify audit deliverable "
f"looks like.\n"
# Step 1: regulations set
with spinner("Creating regulations set", emoji="🔍"):
regs_set = regulations.create("aicertify-demo")

try:
with spinner(f"Adding {policy} regulations", emoji="⚖️"):
regs_set.add(policy)
success(f"Loaded {policy} policy set")
except ValueError as exc:
error(f"Could not add regulation '{policy}': {exc}")
exit_code = 2
return exit_code

# Step 2: application + interactions from the bundled fixture
contract_data = json.loads(contract_file.read_text())
model_info = contract_data.get("model_info", {})

info(
f"Building application from bundled fixture: " f"{contract_file.name}",
category="APPLICATION",
)
with spinner(
f"Creating application: {contract_data['application_name']}",
emoji="🤖",
):
app = application.create(
name=contract_data["application_name"],
model_name=model_info.get("model_name", "demo-model"),
model_version=model_info.get("model_version", "v1"),
model_metadata=model_info.get("metadata", {}),
)
success(f"Created application: {contract_data['application_name']}")

interactions = contract_data.get("interactions", [])
with spinner(f"Loading {len(interactions)} bundled interactions", emoji="💬"):
for ix in interactions:
app.add_interaction(
input_text=ix["input_text"],
output_text=ix["output_text"],
)
success(f"Added {len(interactions)} interactions to the application")

# Step 3: evaluate
output_path = Path(output).resolve()
output_dir = output_path.parent
output_dir.mkdir(parents=True, exist_ok=True)

info(
f"\n{AIC_LOGO} Starting evaluation against {policy}",
category="EVALUATION",
)
with MessageGroup("Evaluation progress") as eval_group:
with spinner(
f"Evaluating {contract_data['application_name']} against {policy}",
emoji="🧪",
):
eval_group.add("Initializing evaluators")
eval_group.add("Loading policy files")
eval_group.add("Running OPA policy evaluation")
await app.evaluate(
regulations=regs_set,
report_format=report_format,
output_dir=str(output_dir),
)
eval_group.add(f"Writing {report_format} report")
success("Evaluation complete")

# Step 4: surface the produced report path
reports = app.get_report()
if not reports:
error("Evaluation finished but no report path was returned.")
exit_code = 3
return exit_code

for reg_name, report_path in reports.items():
success(f"Report for {reg_name}: {report_path}")

success("\n🎉 Demo complete 🎉")
info(
"Open the report above to see what an AICertify audit deliverable "
"looks like — generated, not handwritten."
)
return 0

err = results.get("error")
if err:
print(f"\n✗ Demo failed: {err}", file=sys.stderr)
return 2

print(
"\n⚠ Demo completed but no report path was returned. "
"Check logs above for details.",
file=sys.stderr,
)
return 3
except Exception:
exit_code = 99
raise
finally:
# Restore real stderr
sys.stderr.flush()
os.dup2(saved_stderr_fd, 2)
os.close(saved_stderr_fd)
captured_stderr.flush()
captured_stderr.close()
try:
if exit_code != 0:
# Demo failed — replay the captured chatter for debugging
with open(captured_stderr.name, "rb") as f:
data = f.read()
if data:
sys.stderr.write(
"\n--- captured downstream output (demo failed) ---\n"
)
sys.stderr.write(data.decode("utf-8", errors="replace"))
finally:
with contextlib.suppress(FileNotFoundError):
Path(captured_stderr.name).unlink()
8 changes: 7 additions & 1 deletion aicertify/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,13 @@ def _inject_evaluate_for_legacy_invocation(argv: list) -> list:


def main() -> int:
# Quiet by default. CLI tools should not flood the terminal with INFO-level
# chatter from downstream libraries (langfair, deepeval, transformers, the
# OPA policy loader, …) unless the user opts in via --verbose. Note: this
# runs BEFORE argparse so it's in effect when the (deferred) aicertify
# package imports happen inside the subcommand handlers.
logging.basicConfig(
level=logging.INFO,
level=logging.WARNING,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

Expand All @@ -222,6 +227,7 @@ def main() -> int:
args = parser.parse_args()

if args.verbose:
logging.getLogger().setLevel(logging.INFO)
logging.getLogger("aicertify").setLevel(logging.DEBUG)

if not hasattr(args, "func"):
Expand Down
16 changes: 13 additions & 3 deletions aicertify/opa_core/policy_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,19 @@ def _load_policies(self) -> Dict[str, Dict[str, Dict[str, List[str]]]]:

# Skip legacy paths or unexpected structures
if parts[0] not in policies:
logging.warning(
f"Skipping policy file in unrecognized category: {policy_file}"
)
# ``helper_functions/`` holds shared Rego helpers
# (reporting.rego, validation.rego, …) — not policies,
# so don't warn on them. Same for the version-tracking
# ``.github/``, ``.regal/`` etc. dotfile dirs.
if parts[0] in ("helper_functions",) or parts[0].startswith("."):
logging.debug(
f"Skipping shared-helper / config file (not a policy): "
f"{policy_file}"
)
else:
logging.warning(
f"Skipping policy file in unrecognized category: {policy_file}"
)
continue

category = parts[0] # global, international, etc.
Expand Down
Loading
Loading