From 5da045d993d6c8f6c62593a06a4b3a8da7b9f77c Mon Sep 17 00:00:00 2001 From: Ivar Vong Date: Tue, 30 Jun 2026 09:13:27 -0400 Subject: [PATCH 1/3] fix(deps): make pyex installable as a bare dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pyex referenced three deps in lib/ that a fresh consumer wouldn't have, so `{:pyex, "~> x"}` failed to compile for anyone — it only worked in-repo because the deps were present transitively. Found while composing pyex with another library; it's also a release blocker. - decimal: used pervasively in the numeric tower but never declared. Now a real runtime dependency (it isn't optional — it's core). - postgrex (the `sql` backend): lib/pyex/stdlib/sql.ex pattern-matches %Postgrex.Error{} structs — a hard compile-time requirement. The module is now wrapped in `if Code.ensure_loaded?(Postgrex)`. - explorer (the `pandas` backend): scattered Explorer.Series/DataFrame calls, type specs, and struct patterns across core modules. Fixed idiomatically: `@compile {:no_warn_undefined, ...}` for the calls, `term()` for the specs, `is_struct(x, Explorer.Series)` guards (a runtime atom check, no compile-time struct) for the patterns, and the producer module wrapped in `Code.ensure_loaded?(Explorer)`. postgrex/explorer stay `optional: true`: `import sql`/`import pandas` now raise a clean Python ImportError when the backend isn't installed (Pyex.Stdlib.fetch/1 and module_names/0 degrade gracefully), so the core library carries no heavy native deps. Proven by `scripts/consumer_smoke.sh` + a `consumer-smoke` CI job: a throwaway project depending on pyex and nothing else compiles and runs, with the optional features degrading to ImportError — the regression class pyex's own build (where the optional deps ARE present) cannot catch. Full suite (6185) + Dialyzer green with the deps present. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_019NokzcR7BiAigPgC78zpk9 --- .github/workflows/ci.yml | 15 + README.md | 13 + lib/pyex/builtins.ex | 1 + lib/pyex/interpreter.ex | 7 +- lib/pyex/interpreter/binary_ops.ex | 9 +- lib/pyex/methods.ex | 1 + lib/pyex/stdlib.ex | 24 +- lib/pyex/stdlib/pandas.ex | 224 +++++++-------- lib/pyex/stdlib/sql.ex | 436 +++++++++++++++-------------- mix.exs | 1 + scripts/consumer_smoke.sh | 56 ++++ 11 files changed, 450 insertions(+), 337 deletions(-) create mode 100755 scripts/consumer_smoke.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b914937..7349094 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -214,3 +214,18 @@ jobs: - name: Run library conformance tests run: mix test --include library_conformance test/pyex/library_conformance/ + + consumer-smoke: + name: Consumer smoke (installable as a bare dependency) + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: erlef/setup-beam@v1 + with: + otp-version: "28.1" + elixir-version: "1.19.5" + + - name: Compile + run pyex as a bare dependency (no optional backends) + run: bash scripts/consumer_smoke.sh diff --git a/README.md b/README.md index ac7667f..95485ed 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,19 @@ Pyex.run(source, limits: [timeout: 5_000, max_memory_bytes: 50_000_000]) ``` +### Optional backends + +Two stdlib modules light up only when you add their backend dependency, so +the core library stays small and free of heavy native deps: + +- **`import pandas`** needs [`:explorer`](https://hex.pm/packages/explorer) + (Polars). +- **`import sql`** needs [`:postgrex`](https://hex.pm/packages/postgrex). + +Without them, importing the module raises a normal Python `ImportError` — +nothing else is affected. A CI job (`consumer-smoke`) proves pyex compiles +and runs as a bare dependency with neither installed. + ## Sandbox model Pyex is a tree-walking interpreter, not an `eval`. Python source diff --git a/lib/pyex/builtins.ex b/lib/pyex/builtins.ex index a8098a0..527d18e 100644 --- a/lib/pyex/builtins.ex +++ b/lib/pyex/builtins.ex @@ -1,4 +1,5 @@ defmodule Pyex.Builtins do + @compile {:no_warn_undefined, [Explorer.Series, Explorer.DataFrame]} @moduledoc """ Built-in Python functions available without import. diff --git a/lib/pyex/interpreter.ex b/lib/pyex/interpreter.ex index 3260d66..7b07d90 100644 --- a/lib/pyex/interpreter.ex +++ b/lib/pyex/interpreter.ex @@ -1,4 +1,5 @@ defmodule Pyex.Interpreter do + @compile {:no_warn_undefined, [Explorer.Series, Explorer.DataFrame]} @moduledoc """ Tree-walking evaluator for the Pyex AST. @@ -73,9 +74,9 @@ defmodule Pyex.Interpreter do | {:generator_error, [pyvalue()], String.t()} | {:iterator, non_neg_integer()} | {:super_proxy, pyvalue(), [pyvalue()]} - | {:pandas_series, Explorer.Series.t()} - | {:pandas_rolling, Explorer.Series.t(), pos_integer()} - | {:pandas_dataframe, Explorer.DataFrame.t()} + | {:pandas_series, term()} + | {:pandas_rolling, term(), pos_integer()} + | {:pandas_dataframe, term()} | {:py_dict, %{optional(pyvalue()) => pyvalue()}, [pyvalue()]} | {:pyex_decimal, Decimal.t()} | {:object, integer()} diff --git a/lib/pyex/interpreter/binary_ops.ex b/lib/pyex/interpreter/binary_ops.ex index ec61a9f..a4884c5 100644 --- a/lib/pyex/interpreter/binary_ops.ex +++ b/lib/pyex/interpreter/binary_ops.ex @@ -1,4 +1,5 @@ defmodule Pyex.Interpreter.BinaryOps do + @compile {:no_warn_undefined, [Explorer.Series]} @moduledoc """ Binary-operation evaluation for `Pyex.Interpreter`. @@ -233,7 +234,7 @@ defmodule Pyex.Interpreter.BinaryOps do case result do {:exception, _} = err -> err - %Explorer.Series{} = s -> {:pandas_series, s} + s when is_struct(s, Explorer.Series) -> {:pandas_series, s} end end @@ -957,11 +958,13 @@ defmodule Pyex.Interpreter.BinaryOps do defp series_unwrap(true), do: 1 defp series_unwrap(false), do: 0 - defp series_bool_and(%Explorer.Series{} = l, %Explorer.Series{} = r) do + defp series_bool_and(l, r) + when is_struct(l, Explorer.Series) and is_struct(r, Explorer.Series) do Explorer.Series.and(l, r) end - defp series_bool_or(%Explorer.Series{} = l, %Explorer.Series{} = r) do + defp series_bool_or(l, r) + when is_struct(l, Explorer.Series) and is_struct(r, Explorer.Series) do Explorer.Series.or(l, r) end diff --git a/lib/pyex/methods.ex b/lib/pyex/methods.ex index e8513e7..f0b35ca 100644 --- a/lib/pyex/methods.ex +++ b/lib/pyex/methods.ex @@ -1,4 +1,5 @@ defmodule Pyex.Methods do + @compile {:no_warn_undefined, [Explorer.Series, Explorer.DataFrame]} @moduledoc """ Method dispatch for Python built-in types. diff --git a/lib/pyex/stdlib.ex b/lib/pyex/stdlib.ex index ee11cf9..6bd3c36 100644 --- a/lib/pyex/stdlib.ex +++ b/lib/pyex/stdlib.ex @@ -77,8 +77,19 @@ defmodule Pyex.Stdlib do @spec fetch(String.t()) :: {:ok, Pyex.Stdlib.Module.module_value()} | :unknown_module def fetch(name) do case Map.fetch(@modules, name) do - {:ok, mod} -> {:ok, mod.module_value()} - :error -> :unknown_module + # Some stdlib modules are compiled only when their optional backend dep is + # present (`sql` needs :postgrex, `pandas` needs :explorer). When the dep + # isn't installed the module isn't defined, so `import` it like any other + # absent module — a clean ImportError, not a host crash. + {:ok, mod} -> + if Code.ensure_loaded?(mod) and function_exported?(mod, :module_value, 0) do + {:ok, mod.module_value()} + else + :unknown_module + end + + :error -> + :unknown_module end end @@ -86,5 +97,12 @@ defmodule Pyex.Stdlib do Returns a sorted list of all available stdlib module names. """ @spec module_names() :: [String.t()] - def module_names, do: @modules |> Map.keys() |> Enum.sort() + def module_names do + @modules + |> Enum.filter(fn {_name, mod} -> + Code.ensure_loaded?(mod) and function_exported?(mod, :module_value, 0) + end) + |> Enum.map(&elem(&1, 0)) + |> Enum.sort() + end end diff --git a/lib/pyex/stdlib/pandas.ex b/lib/pyex/stdlib/pandas.ex index 3c4a2b4..90c1a39 100644 --- a/lib/pyex/stdlib/pandas.ex +++ b/lib/pyex/stdlib/pandas.ex @@ -1,121 +1,123 @@ -defmodule Pyex.Stdlib.Pandas do - @moduledoc """ - Python `pandas` module backed by Explorer (Polars/Rust). - - Provides `pd.Series()` for creating series from lists and - `pd.DataFrame()` for creating DataFrames from dicts. - Series and DataFrame methods (`.mean()`, `.rolling()`, - `.sum()`, etc.) are dispatched via `Pyex.Methods`. - - Requires the optional `explorer` dependency at runtime. - All heavy computation runs in Rust via Polars NIFs -- - no interpreter loop overhead for numeric operations. - - ## Supported API - - ### Module-level - - import pandas as pd - s = pd.Series([1, 2, 3]) - df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) - - ### Series methods - - s.sum(), s.mean(), s.std(), s.min(), s.max(), s.median() - s.cumsum(), s.diff(), s.shift(n), s.abs() - s.rolling(window) # returns Rolling object - s.tolist() # convert back to Python list - len(s) # via __len__ - - ### Rolling methods - - s.rolling(50).mean() - s.rolling(50).sum() - s.rolling(50).min() - s.rolling(50).max() - s.rolling(50).std() - - ### DataFrame methods - - df["col"] # column access returns Series - df.columns # list of column names - len(df) # number of rows - - ### Vectorized operations - - s + s, s - s, s * s, s / s # element-wise arithmetic - s > 0, s < 0, s >= 0, s <= 0 # element-wise comparison (returns bool Series) - s[bool_series] # boolean indexing - """ - - @behaviour Pyex.Stdlib.Module - - @impl Pyex.Stdlib.Module - @spec module_value() :: Pyex.Stdlib.Module.module_value() - def module_value do - if Code.ensure_loaded?(Explorer.Series) do - %{ - "Series" => {:builtin, &do_series/1}, - "DataFrame" => {:builtin, &do_dataframe/1} - } - else - %{ - "Series" => - {:builtin, - fn _ -> - {:exception, - "ImportError: pandas requires the :explorer dependency. " <> - "Add {:explorer, \"~> 0.10\"} to your mix.exs deps."} - end}, - "DataFrame" => - {:builtin, - fn _ -> - {:exception, - "ImportError: pandas requires the :explorer dependency. " <> - "Add {:explorer, \"~> 0.10\"} to your mix.exs deps."} - end} - } +if Code.ensure_loaded?(Explorer) do + defmodule Pyex.Stdlib.Pandas do + @moduledoc """ + Python `pandas` module backed by Explorer (Polars/Rust). + + Provides `pd.Series()` for creating series from lists and + `pd.DataFrame()` for creating DataFrames from dicts. + Series and DataFrame methods (`.mean()`, `.rolling()`, + `.sum()`, etc.) are dispatched via `Pyex.Methods`. + + Requires the optional `explorer` dependency at runtime. + All heavy computation runs in Rust via Polars NIFs -- + no interpreter loop overhead for numeric operations. + + ## Supported API + + ### Module-level + + import pandas as pd + s = pd.Series([1, 2, 3]) + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + + ### Series methods + + s.sum(), s.mean(), s.std(), s.min(), s.max(), s.median() + s.cumsum(), s.diff(), s.shift(n), s.abs() + s.rolling(window) # returns Rolling object + s.tolist() # convert back to Python list + len(s) # via __len__ + + ### Rolling methods + + s.rolling(50).mean() + s.rolling(50).sum() + s.rolling(50).min() + s.rolling(50).max() + s.rolling(50).std() + + ### DataFrame methods + + df["col"] # column access returns Series + df.columns # list of column names + len(df) # number of rows + + ### Vectorized operations + + s + s, s - s, s * s, s / s # element-wise arithmetic + s > 0, s < 0, s >= 0, s <= 0 # element-wise comparison (returns bool Series) + s[bool_series] # boolean indexing + """ + + @behaviour Pyex.Stdlib.Module + + @impl Pyex.Stdlib.Module + @spec module_value() :: Pyex.Stdlib.Module.module_value() + def module_value do + if Code.ensure_loaded?(Explorer.Series) do + %{ + "Series" => {:builtin, &do_series/1}, + "DataFrame" => {:builtin, &do_dataframe/1} + } + else + %{ + "Series" => + {:builtin, + fn _ -> + {:exception, + "ImportError: pandas requires the :explorer dependency. " <> + "Add {:explorer, \"~> 0.10\"} to your mix.exs deps."} + end}, + "DataFrame" => + {:builtin, + fn _ -> + {:exception, + "ImportError: pandas requires the :explorer dependency. " <> + "Add {:explorer, \"~> 0.10\"} to your mix.exs deps."} + end} + } + end end - end - @spec do_series([term()]) :: {:pandas_series, Explorer.Series.t()} - defp do_series([{:py_list, reversed, _}]) do - {:pandas_series, Explorer.Series.from_list(coerce_values(Enum.reverse(reversed)))} - end + @spec do_series([term()]) :: {:pandas_series, term()} + defp do_series([{:py_list, reversed, _}]) do + {:pandas_series, Explorer.Series.from_list(coerce_values(Enum.reverse(reversed)))} + end - defp do_series([values]) when is_list(values) do - {:pandas_series, Explorer.Series.from_list(coerce_values(values))} - end + defp do_series([values]) when is_list(values) do + {:pandas_series, Explorer.Series.from_list(coerce_values(values))} + end - @spec do_dataframe([term()]) :: {:pandas_dataframe, Explorer.DataFrame.t()} - defp do_dataframe([{:py_dict, _, _} = dict]) do - columns = - Enum.map(Pyex.PyDict.items(dict), fn {name, values} -> - {name, coerce_values(normalize_list(values))} - end) + @spec do_dataframe([term()]) :: {:pandas_dataframe, term()} + defp do_dataframe([{:py_dict, _, _} = dict]) do + columns = + Enum.map(Pyex.PyDict.items(dict), fn {name, values} -> + {name, coerce_values(normalize_list(values))} + end) - {:pandas_dataframe, Explorer.DataFrame.new(columns)} - end + {:pandas_dataframe, Explorer.DataFrame.new(columns)} + end - defp do_dataframe([dict]) when is_map(dict) do - columns = - Enum.map(dict, fn {name, values} -> - {name, coerce_values(normalize_list(values))} - end) + defp do_dataframe([dict]) when is_map(dict) do + columns = + Enum.map(dict, fn {name, values} -> + {name, coerce_values(normalize_list(values))} + end) - {:pandas_dataframe, Explorer.DataFrame.new(columns)} - end + {:pandas_dataframe, Explorer.DataFrame.new(columns)} + end - @spec normalize_list(term()) :: [term()] - defp normalize_list({:py_list, reversed, _}), do: Enum.reverse(reversed) - defp normalize_list(list) when is_list(list), do: list - defp normalize_list(other), do: [other] - - @spec coerce_values([term()]) :: [number()] | [String.t()] - defp coerce_values(values) do - Enum.map(values, fn - nil -> nil - v -> v - end) + @spec normalize_list(term()) :: [term()] + defp normalize_list({:py_list, reversed, _}), do: Enum.reverse(reversed) + defp normalize_list(list) when is_list(list), do: list + defp normalize_list(other), do: [other] + + @spec coerce_values([term()]) :: [number()] | [String.t()] + defp coerce_values(values) do + Enum.map(values, fn + nil -> nil + v -> v + end) + end end end diff --git a/lib/pyex/stdlib/sql.ex b/lib/pyex/stdlib/sql.ex index b48d424..65b4b4c 100644 --- a/lib/pyex/stdlib/sql.ex +++ b/lib/pyex/stdlib/sql.ex @@ -1,248 +1,250 @@ -defmodule Pyex.Stdlib.SQL do - @moduledoc """ - Python `sql` module backed by Postgrex. - - Provides `sql.query(sql, params)` for parameterized queries - against a PostgreSQL database. The connection URL is read from - `DATABASE_URL` in the execution context's environ. - - Returns a list of dicts (one per row) with column names as keys. - - import sql - rows = sql.query("SELECT id, name FROM users WHERE id = $1", [42]) - # [{"id": 42, "name": "Alice"}] - """ - - @behaviour Pyex.Stdlib.Module - - alias Pyex.PyDict - - @doc """ - Returns the module value -- a map with callable attributes. - """ - @impl Pyex.Stdlib.Module - @spec module_value() :: Pyex.Stdlib.Module.module_value() - def module_value do - %{ - "query" => {:builtin, &do_query/1} - } - end +if Code.ensure_loaded?(Postgrex) do + defmodule Pyex.Stdlib.SQL do + @moduledoc """ + Python `sql` module backed by Postgrex. + + Provides `sql.query(sql, params)` for parameterized queries + against a PostgreSQL database. The connection URL is read from + `DATABASE_URL` in the execution context's environ. + + Returns a list of dicts (one per row) with column names as keys. + + import sql + rows = sql.query("SELECT id, name FROM users WHERE id = $1", [42]) + # [{"id": 42, "name": "Alice"}] + """ + + @behaviour Pyex.Stdlib.Module + + alias Pyex.PyDict + + @doc """ + Returns the module value -- a map with callable attributes. + """ + @impl Pyex.Stdlib.Module + @spec module_value() :: Pyex.Stdlib.Module.module_value() + def module_value do + %{ + "query" => {:builtin, &do_query/1} + } + end - @spec do_query([Pyex.Interpreter.pyvalue()]) :: - {:io_call, (Pyex.Env.t(), Pyex.Ctx.t() -> {term(), Pyex.Env.t(), Pyex.Ctx.t()})} - | {:exception, String.t()} - defp do_query([sql, {:py_list, params, _}]) when is_binary(sql) and is_list(params) do - do_query([sql, Enum.reverse(params)]) - end + @spec do_query([Pyex.Interpreter.pyvalue()]) :: + {:io_call, (Pyex.Env.t(), Pyex.Ctx.t() -> {term(), Pyex.Env.t(), Pyex.Ctx.t()})} + | {:exception, String.t()} + defp do_query([sql, {:py_list, params, _}]) when is_binary(sql) and is_list(params) do + do_query([sql, Enum.reverse(params)]) + end - defp do_query([sql, params]) when is_binary(sql) and is_list(params) do - Pyex.Ctx.guarded_io_call(:sql, fn env, ctx -> - case Map.fetch(ctx.env, "DATABASE_URL") do - {:ok, url} when is_binary(url) -> - run_query(sql, params, url, env, ctx) + defp do_query([sql, params]) when is_binary(sql) and is_list(params) do + Pyex.Ctx.guarded_io_call(:sql, fn env, ctx -> + case Map.fetch(ctx.env, "DATABASE_URL") do + {:ok, url} when is_binary(url) -> + run_query(sql, params, url, env, ctx) - _ -> - {{:exception, "sql.query: DATABASE_URL not set in environ"}, env, ctx} - end - end) - end + _ -> + {{:exception, "sql.query: DATABASE_URL not set in environ"}, env, ctx} + end + end) + end - defp do_query([sql]) when is_binary(sql) do - do_query([sql, []]) - end + defp do_query([sql]) when is_binary(sql) do + do_query([sql, []]) + end - defp do_query(_args) do - {:exception, "TypeError: sql.query(sql_string, params_list)"} - end + defp do_query(_args) do + {:exception, "TypeError: sql.query(sql_string, params_list)"} + end + + @spec run_query( + String.t(), + [Pyex.Interpreter.pyvalue()], + String.t(), + Pyex.Env.t(), + Pyex.Ctx.t() + ) :: + {term(), Pyex.Env.t(), Pyex.Ctx.t()} + defp run_query(sql, params, url, env, ctx) do + start_mono = System.monotonic_time() + telemetry_meta = %{statement: sql} + + :telemetry.execute( + [:pyex, :query, :start], + %{system_time: System.system_time()}, + telemetry_meta + ) + + case parse_url(url) do + {:ok, opts} -> + conn = Postgrex.start_link(opts) + + case conn do + {:ok, conn} -> + try do + pg_params = Enum.map(params, &to_pg/1) + result = Postgrex.query(conn, sql, pg_params, timeout: 15_000) + + case result do + {:ok, %Postgrex.Result{columns: nil}} -> + duration = System.monotonic_time() - start_mono + + :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ + statement: sql, + rows_returned: 0 + }) + + {[], env, ctx} + + {:ok, %Postgrex.Result{columns: cols, rows: rows, num_rows: n}} -> + duration = System.monotonic_time() - start_mono + + :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ + statement: sql, + rows_returned: n + }) + + result = Enum.map(rows, fn row -> row_to_dict(cols, row) end) + {result, env, ctx} + + {:error, %Postgrex.Error{postgres: %{message: msg}}} -> + duration = System.monotonic_time() - start_mono - @spec run_query( - String.t(), - [Pyex.Interpreter.pyvalue()], - String.t(), - Pyex.Env.t(), - Pyex.Ctx.t() - ) :: - {term(), Pyex.Env.t(), Pyex.Ctx.t()} - defp run_query(sql, params, url, env, ctx) do - start_mono = System.monotonic_time() - telemetry_meta = %{statement: sql} - - :telemetry.execute( - [:pyex, :query, :start], - %{system_time: System.system_time()}, - telemetry_meta - ) - - case parse_url(url) do - {:ok, opts} -> - conn = Postgrex.start_link(opts) - - case conn do - {:ok, conn} -> - try do - pg_params = Enum.map(params, &to_pg/1) - result = Postgrex.query(conn, sql, pg_params, timeout: 15_000) - - case result do - {:ok, %Postgrex.Result{columns: nil}} -> - duration = System.monotonic_time() - start_mono - - :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ - statement: sql, - rows_returned: 0 - }) - - {[], env, ctx} - - {:ok, %Postgrex.Result{columns: cols, rows: rows, num_rows: n}} -> - duration = System.monotonic_time() - start_mono - - :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ - statement: sql, - rows_returned: n - }) - - result = Enum.map(rows, fn row -> row_to_dict(cols, row) end) - {result, env, ctx} - - {:error, %Postgrex.Error{postgres: %{message: msg}}} -> - duration = System.monotonic_time() - start_mono - - :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ - statement: sql, - error: msg - }) - - {{:exception, "sql.DatabaseError: #{msg}"}, env, ctx} - - {:error, reason} -> - duration = System.monotonic_time() - start_mono - - :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ - statement: sql, - error: inspect(reason) - }) - - {{:exception, "sql.DatabaseError: #{inspect(reason)}"}, env, ctx} + :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ + statement: sql, + error: msg + }) + + {{:exception, "sql.DatabaseError: #{msg}"}, env, ctx} + + {:error, reason} -> + duration = System.monotonic_time() - start_mono + + :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ + statement: sql, + error: inspect(reason) + }) + + {{:exception, "sql.DatabaseError: #{inspect(reason)}"}, env, ctx} + end + after + GenServer.stop(conn) end - after - GenServer.stop(conn) - end - {:error, reason} -> - duration = System.monotonic_time() - start_mono + {:error, reason} -> + duration = System.monotonic_time() - start_mono - :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ - statement: sql, - error: inspect(reason) - }) + :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ + statement: sql, + error: inspect(reason) + }) - {{:exception, "sql.ConnectionError: #{inspect(reason)}"}, env, ctx} - end + {{:exception, "sql.ConnectionError: #{inspect(reason)}"}, env, ctx} + end - {:error, msg} -> - duration = System.monotonic_time() - start_mono + {:error, msg} -> + duration = System.monotonic_time() - start_mono - :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ - statement: sql, - error: msg - }) + :telemetry.execute([:pyex, :query, :stop], %{duration: duration}, %{ + statement: sql, + error: msg + }) - {{:exception, msg}, env, ctx} + {{:exception, msg}, env, ctx} + end end - end - @spec parse_url(String.t()) :: {:ok, keyword()} | {:error, String.t()} - defp parse_url(url) do - case URI.parse(url) do - %URI{scheme: scheme, host: host, port: port, path: path, userinfo: userinfo} - when scheme in ["postgres", "postgresql"] and is_binary(host) -> - database = - case path do - "/" <> db -> db - _ -> nil - end + @spec parse_url(String.t()) :: {:ok, keyword()} | {:error, String.t()} + defp parse_url(url) do + case URI.parse(url) do + %URI{scheme: scheme, host: host, port: port, path: path, userinfo: userinfo} + when scheme in ["postgres", "postgresql"] and is_binary(host) -> + database = + case path do + "/" <> db -> db + _ -> nil + end - {username, password} = parse_userinfo(userinfo) + {username, password} = parse_userinfo(userinfo) - opts = - [ - hostname: host, - port: port || 5432, - database: database, - username: username, - password: password, - show_sensitive_data_on_connection_error: false - ] - |> Enum.reject(fn {_k, v} -> is_nil(v) end) + opts = + [ + hostname: host, + port: port || 5432, + database: database, + username: username, + password: password, + show_sensitive_data_on_connection_error: false + ] + |> Enum.reject(fn {_k, v} -> is_nil(v) end) - {:ok, opts} + {:ok, opts} - _ -> - {:error, "sql.ConnectionError: invalid DATABASE_URL"} + _ -> + {:error, "sql.ConnectionError: invalid DATABASE_URL"} + end end - end - @spec parse_userinfo(String.t() | nil) :: {String.t() | nil, String.t() | nil} - defp parse_userinfo(nil), do: {nil, nil} + @spec parse_userinfo(String.t() | nil) :: {String.t() | nil, String.t() | nil} + defp parse_userinfo(nil), do: {nil, nil} - defp parse_userinfo(info) do - case String.split(info, ":", parts: 2) do - [user, pass] -> {URI.decode(user), URI.decode(pass)} - [user] -> {URI.decode(user), nil} + defp parse_userinfo(info) do + case String.split(info, ":", parts: 2) do + [user, pass] -> {URI.decode(user), URI.decode(pass)} + [user] -> {URI.decode(user), nil} + end end - end - @spec to_pg(Pyex.Interpreter.pyvalue()) :: term() - defp to_pg(nil), do: nil - defp to_pg(val) when is_binary(val), do: val - defp to_pg(val) when is_integer(val), do: val - defp to_pg(val) when is_float(val), do: Decimal.from_float(val) - defp to_pg(true), do: true - defp to_pg(false), do: false - defp to_pg(val), do: to_string(val) - - @spec row_to_dict([String.t()], [term()]) :: PyDict.t() - defp row_to_dict(columns, values) do - columns - |> Enum.zip(values) - |> Enum.map(fn {col, val} -> {col, from_pg(val)} end) - |> PyDict.from_pairs() - end + @spec to_pg(Pyex.Interpreter.pyvalue()) :: term() + defp to_pg(nil), do: nil + defp to_pg(val) when is_binary(val), do: val + defp to_pg(val) when is_integer(val), do: val + defp to_pg(val) when is_float(val), do: Decimal.from_float(val) + defp to_pg(true), do: true + defp to_pg(false), do: false + defp to_pg(val), do: to_string(val) + + @spec row_to_dict([String.t()], [term()]) :: PyDict.t() + defp row_to_dict(columns, values) do + columns + |> Enum.zip(values) + |> Enum.map(fn {col, val} -> {col, from_pg(val)} end) + |> PyDict.from_pairs() + end - @spec from_pg(term()) :: Pyex.Interpreter.pyvalue() - defp from_pg(nil), do: nil + @spec from_pg(term()) :: Pyex.Interpreter.pyvalue() + defp from_pg(nil), do: nil - defp from_pg(val) when is_binary(val) do - if byte_size(val) == 16 and not String.printable?(val) do - <> = val + defp from_pg(val) when is_binary(val) do + if byte_size(val) == 16 and not String.printable?(val) do + <> = val - :io_lib.format( - "~8.16.0b-~4.16.0b-~4.16.0b-~4.16.0b-~12.16.0b", - [a, b, c, d, e] - ) - |> to_string() - else - val + :io_lib.format( + "~8.16.0b-~4.16.0b-~4.16.0b-~4.16.0b-~12.16.0b", + [a, b, c, d, e] + ) + |> to_string() + else + val + end end - end - defp from_pg(val) when is_integer(val), do: val - defp from_pg(val) when is_float(val), do: val - defp from_pg(true), do: true - defp from_pg(false), do: false - defp from_pg(%Decimal{} = d), do: Decimal.to_float(d) - defp from_pg(%Date{} = d), do: Date.to_iso8601(d) - defp from_pg(%Time{} = t), do: Time.to_iso8601(t) - defp from_pg(%NaiveDateTime{} = dt), do: NaiveDateTime.to_iso8601(dt) - defp from_pg(%DateTime{} = dt), do: DateTime.to_iso8601(dt) - defp from_pg(list) when is_list(list), do: Enum.map(list, &from_pg/1) - defp from_pg(%Postgrex.INET{address: addr}), do: :inet.ntoa(addr) |> to_string() - - defp from_pg(val) when is_map(val) do - pairs = Enum.map(val, fn {k, v} -> {to_string(k), from_pg(v)} end) - PyDict.from_pairs(pairs) - end + defp from_pg(val) when is_integer(val), do: val + defp from_pg(val) when is_float(val), do: val + defp from_pg(true), do: true + defp from_pg(false), do: false + defp from_pg(%Decimal{} = d), do: Decimal.to_float(d) + defp from_pg(%Date{} = d), do: Date.to_iso8601(d) + defp from_pg(%Time{} = t), do: Time.to_iso8601(t) + defp from_pg(%NaiveDateTime{} = dt), do: NaiveDateTime.to_iso8601(dt) + defp from_pg(%DateTime{} = dt), do: DateTime.to_iso8601(dt) + defp from_pg(list) when is_list(list), do: Enum.map(list, &from_pg/1) + defp from_pg(%Postgrex.INET{address: addr}), do: :inet.ntoa(addr) |> to_string() + + defp from_pg(val) when is_map(val) do + pairs = Enum.map(val, fn {k, v} -> {to_string(k), from_pg(v)} end) + PyDict.from_pairs(pairs) + end - defp from_pg(val), do: to_string(val) + defp from_pg(val), do: to_string(val) + end end diff --git a/mix.exs b/mix.exs index 903641d..d3ec6a8 100644 --- a/mix.exs +++ b/mix.exs @@ -41,6 +41,7 @@ defmodule Pyex.MixProject do {:jason, "~> 1.4"}, {:nimble_parsec, "~> 1.4"}, {:req, "~> 0.5"}, + {:decimal, "~> 2.0"}, {:vfs, "~> 0.1.0"}, {:postgrex, "~> 0.22", optional: true}, {:yaml_elixir, "~> 2.12", only: :test}, diff --git a/scripts/consumer_smoke.sh b/scripts/consumer_smoke.sh new file mode 100755 index 0000000..c31020c --- /dev/null +++ b/scripts/consumer_smoke.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# +# Proves pyex compiles AND runs as a *bare* dependency — with none of its +# optional backends (:postgrex for `sql`, :explorer for `pandas`) installed. +# +# pyex's own build always has the optional deps present (they're `optional: true`, +# which still fetches them in the defining project), so its own compile/tests +# cannot catch the "references an undeclared or optional dep at compile time" +# regression class. A throwaway consumer that depends on pyex and nothing else +# can. This is the guard that keeps `{:pyex, "~> x"}` actually installable. +set -euo pipefail + +PYEX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT +cd "$WORK" + +cat > mix.exs < 1.18", + deps: [{:pyex, path: "$PYEX_DIR"}] + ] + end + + def application, do: [extra_applications: [:logger]] +end +EOF +mkdir -p lib config +cp "$PYEX_DIR/.tool-versions" . 2>/dev/null || true + +echo "==> resolving + compiling pyex as a bare dependency" +mix deps.get +mix compile + +echo "==> asserting behavior with no optional backends present" +mix run -e ' + # The optional backends must genuinely be absent for this to mean anything. + false = Code.ensure_loaded?(Explorer) + false = Code.ensure_loaded?(Postgrex) + + # Core interpreter runs. + {:ok, [1, 2, 3], _} = Pyex.run("sorted([3, 1, 2])") + {:ok, ~s({"a": 1}), _} = Pyex.run("import json\njson.dumps({\"a\": 1})") + + # Optional features degrade to a clean ImportError — never a host crash. + {:error, %Pyex.Error{kind: :import}} = Pyex.run("import pandas") + {:error, %Pyex.Error{kind: :import}} = Pyex.run("import sql") + + IO.puts("consumer smoke: OK — pyex compiles + runs with no optional deps") +' From 7e4854d1773d9693ab01f97e3b0bd746795b2ce1 Mon Sep 17 00:00:00 2001 From: Ivar Vong Date: Tue, 30 Jun 2026 09:30:38 -0400 Subject: [PATCH 2/3] docs(mix): document the optional-backend strategy at the deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture why pandas/sql are optional and how to add another optional backend, citing the idiomatic shapes (the same ones :explorer uses for its own optional :nx — @compile {:no_warn_undefined} + is_struct guards), with the consumer-smoke CI job as the regression guard. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_019NokzcR7BiAigPgC78zpk9 --- mix.exs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/mix.exs b/mix.exs index d3ec6a8..fcb9189 100644 --- a/mix.exs +++ b/mix.exs @@ -43,11 +43,29 @@ defmodule Pyex.MixProject do {:req, "~> 0.5"}, {:decimal, "~> 2.0"}, {:vfs, "~> 0.1.0"}, + # Optional backends. Two stdlib modules need a heavy/native dependency the + # core has no reason to carry, so they're optional: the feature lights up + # when the caller adds the dep, and `import sql`/`import pandas` raise a + # clean Python ImportError otherwise (`Pyex.Stdlib.fetch/1` degrades). For + # a consumer that doesn't add them, pyex must still COMPILE without them — + # `scripts/consumer_smoke.sh` (the `consumer-smoke` CI job) proves it does, + # the regression class this project's own build can't catch since the + # optional deps are present here. + # + # Adding another optional backend? Use the same shapes — they're the + # idiomatic ones (and exactly how `:explorer` itself treats *its* optional + # `:nx`: `@compile {:no_warn_undefined, Nx}` + `is_struct(x, Nx.Tensor)`): + # - `optional: true` here; + # - wrap the producer module in `if Code.ensure_loaded?(Dep) do …` + # (struct patterns/expansion are a hard compile-time requirement); + # - `@compile {:no_warn_undefined, [Dep.Mod]}` for scattered calls + # elsewhere, and `is_struct(x, Dep.Struct)` (a runtime atom check, no + # compile-time struct) instead of `%Dep.Struct{}` in patterns. {:postgrex, "~> 0.22", optional: true}, + {:explorer, "~> 0.11.1", optional: true}, {:yaml_elixir, "~> 2.12", only: :test}, {:telemetry, "~> 0.4 or ~> 1.0"}, {:ex_doc, "~> 0.35", only: :dev, runtime: false}, - {:explorer, "~> 0.11.1", optional: true}, {:stream_data, "~> 1.1", only: :test}, {:bypass, "~> 2.1", only: :test}, {:tz, "~> 0.28"}, From b5c9102bd31016e9e4724901dff7cc2c8133d34f Mon Sep 17 00:00:00 2001 From: Ivar Vong Date: Tue, 30 Jun 2026 10:39:47 -0400 Subject: [PATCH 3/3] test: consumer-smoke tests the built hex package, not a path dep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A path dep ships the whole working tree, so it can't catch a compile-time file (an @external_resource, a data dir) left out of `package/0`'s `:files`. Verified the gap exists in principle, then closed it: consumer_smoke.sh now runs `mix hex.build`, unpacks the package, and compiles *that* as a bare dependency — the true "installs from hex" gate. Still asserts core runs and pandas/sql degrade to ImportError with no optional backends present. (The package was already complete — 145 files, incl. the spreadsheet.py @external_resource — so this is the guard, not a fix.) Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_019NokzcR7BiAigPgC78zpk9 --- .github/workflows/ci.yml | 4 ++-- scripts/consumer_smoke.sh | 42 +++++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7349094..b3db8b6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -216,7 +216,7 @@ jobs: run: mix test --include library_conformance test/pyex/library_conformance/ consumer-smoke: - name: Consumer smoke (installable as a bare dependency) + name: Consumer smoke (installs from the hex package) runs-on: ubuntu-latest steps: @@ -227,5 +227,5 @@ jobs: otp-version: "28.1" elixir-version: "1.19.5" - - name: Compile + run pyex as a bare dependency (no optional backends) + - name: Build the hex package + compile it as a bare dependency (no optional backends) run: bash scripts/consumer_smoke.sh diff --git a/scripts/consumer_smoke.sh b/scripts/consumer_smoke.sh index c31020c..7d56761 100755 --- a/scripts/consumer_smoke.sh +++ b/scripts/consumer_smoke.sh @@ -1,21 +1,31 @@ #!/usr/bin/env bash # -# Proves pyex compiles AND runs as a *bare* dependency — with none of its +# Proves pyex installs and runs the way a real consumer gets it: from the built +# HEX PACKAGE (only the files in `package/0`'s `:files` list), with none of its # optional backends (:postgrex for `sql`, :explorer for `pandas`) installed. # -# pyex's own build always has the optional deps present (they're `optional: true`, -# which still fetches them in the defining project), so its own compile/tests -# cannot catch the "references an undeclared or optional dep at compile time" -# regression class. A throwaway consumer that depends on pyex and nothing else -# can. This is the guard that keeps `{:pyex, "~> x"}` actually installable. +# This catches two regression classes pyex's own build cannot: +# 1. Uses-but-doesn't-declare / can't-compile-without an optional dep — pyex's +# own build always has the optional deps present. +# 2. A compile-time file (an @external_resource, a data dir) left out of the +# package's `:files` — invisible to a path dep, which ships the whole repo. set -euo pipefail PYEX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" WORK="$(mktemp -d)" -trap 'rm -rf "$WORK"' EXIT -cd "$WORK" +trap 'rm -rf "$WORK" "$PYEX_DIR"/pyex-*.tar' EXIT + +# 1. Build the package exactly as `mix hex.publish` would, and unpack the +# contents so we depend on the shipped files, not the working tree. +cd "$PYEX_DIR" +mix deps.get >/dev/null +mix hex.build >/dev/null +tar xf pyex-*.tar -C "$WORK" +mkdir -p "$WORK/pkg" +tar xzf "$WORK/contents.tar.gz" -C "$WORK/pkg" -cat > mix.exs < "$WORK/mix.exs" < 1.18", - deps: [{:pyex, path: "$PYEX_DIR"}] + deps: [{:pyex, path: "$WORK/pkg"}] ] end def application, do: [extra_applications: [:logger]] end EOF -mkdir -p lib config -cp "$PYEX_DIR/.tool-versions" . 2>/dev/null || true +mkdir -p "$WORK/lib" "$WORK/config" +cp "$PYEX_DIR/.tool-versions" "$WORK/" 2>/dev/null || true -echo "==> resolving + compiling pyex as a bare dependency" +cd "$WORK" +echo "==> resolving + compiling the packaged pyex as a bare dependency" mix deps.get mix compile @@ -44,13 +55,14 @@ mix run -e ' false = Code.ensure_loaded?(Explorer) false = Code.ensure_loaded?(Postgrex) - # Core interpreter runs. + # Core interpreter runs (incl. a stdlib that touches zoneinfo data). {:ok, [1, 2, 3], _} = Pyex.run("sorted([3, 1, 2])") {:ok, ~s({"a": 1}), _} = Pyex.run("import json\njson.dumps({\"a\": 1})") + {:ok, _, _} = Pyex.run("from datetime import datetime, timezone\ndatetime.now(timezone.utc).year") # Optional features degrade to a clean ImportError — never a host crash. {:error, %Pyex.Error{kind: :import}} = Pyex.run("import pandas") {:error, %Pyex.Error{kind: :import}} = Pyex.run("import sql") - IO.puts("consumer smoke: OK — pyex compiles + runs with no optional deps") + IO.puts("consumer smoke: OK — the hex package compiles + runs with no optional deps") '