Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,18 @@ jobs:

- name: Run library conformance tests
run: mix test --include library_conformance test/pyex/library_conformance/

consumer-smoke:
name: Consumer smoke (installs from the hex package)
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- uses: erlef/setup-beam@v1
with:
otp-version: "28.1"
elixir-version: "1.19.5"

- name: Build the hex package + compile it as a bare dependency (no optional backends)
run: bash scripts/consumer_smoke.sh
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,19 @@ Pyex.run(source,
limits: [timeout: 5_000, max_memory_bytes: 50_000_000])
```

### Optional backends

Two stdlib modules light up only when you add their backend dependency, so
the core library stays small and free of heavy native deps:

- **`import pandas`** needs [`:explorer`](https://hex.pm/packages/explorer)
(Polars).
- **`import sql`** needs [`:postgrex`](https://hex.pm/packages/postgrex).

Without them, importing the module raises a normal Python `ImportError` —
nothing else is affected. A CI job (`consumer-smoke`) proves pyex compiles
and runs as a bare dependency with neither installed.

## Sandbox model

Pyex is a tree-walking interpreter, not an `eval`. Python source
Expand Down
1 change: 1 addition & 0 deletions lib/pyex/builtins.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
defmodule Pyex.Builtins do
@compile {:no_warn_undefined, [Explorer.Series, Explorer.DataFrame]}
@moduledoc """
Built-in Python functions available without import.

Expand Down
7 changes: 4 additions & 3 deletions lib/pyex/interpreter.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
defmodule Pyex.Interpreter do
@compile {:no_warn_undefined, [Explorer.Series, Explorer.DataFrame]}
@moduledoc """

Tree-walking evaluator for the Pyex AST.
Expand Down Expand Up @@ -73,9 +74,9 @@ defmodule Pyex.Interpreter do
| {:generator_error, [pyvalue()], String.t()}
| {:iterator, non_neg_integer()}
| {:super_proxy, pyvalue(), [pyvalue()]}
| {:pandas_series, Explorer.Series.t()}
| {:pandas_rolling, Explorer.Series.t(), pos_integer()}
| {:pandas_dataframe, Explorer.DataFrame.t()}
| {:pandas_series, term()}
| {:pandas_rolling, term(), pos_integer()}
| {:pandas_dataframe, term()}
| {:py_dict, %{optional(pyvalue()) => pyvalue()}, [pyvalue()]}
| {:pyex_decimal, Decimal.t()}
| {:object, integer()}
Expand Down
9 changes: 6 additions & 3 deletions lib/pyex/interpreter/binary_ops.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
defmodule Pyex.Interpreter.BinaryOps do
@compile {:no_warn_undefined, [Explorer.Series]}
@moduledoc """
Binary-operation evaluation for `Pyex.Interpreter`.

Expand Down Expand Up @@ -233,7 +234,7 @@ defmodule Pyex.Interpreter.BinaryOps do

case result do
{:exception, _} = err -> err
%Explorer.Series{} = s -> {:pandas_series, s}
s when is_struct(s, Explorer.Series) -> {:pandas_series, s}
end
end

Expand Down Expand Up @@ -957,11 +958,13 @@ defmodule Pyex.Interpreter.BinaryOps do
defp series_unwrap(true), do: 1
defp series_unwrap(false), do: 0

defp series_bool_and(%Explorer.Series{} = l, %Explorer.Series{} = r) do
defp series_bool_and(l, r)
when is_struct(l, Explorer.Series) and is_struct(r, Explorer.Series) do
Explorer.Series.and(l, r)
end

defp series_bool_or(%Explorer.Series{} = l, %Explorer.Series{} = r) do
defp series_bool_or(l, r)
when is_struct(l, Explorer.Series) and is_struct(r, Explorer.Series) do
Explorer.Series.or(l, r)
end

Expand Down
1 change: 1 addition & 0 deletions lib/pyex/methods.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
defmodule Pyex.Methods do
@compile {:no_warn_undefined, [Explorer.Series, Explorer.DataFrame]}
@moduledoc """

Method dispatch for Python built-in types.
Expand Down
24 changes: 21 additions & 3 deletions lib/pyex/stdlib.ex
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,32 @@ defmodule Pyex.Stdlib do
@spec fetch(String.t()) :: {:ok, Pyex.Stdlib.Module.module_value()} | :unknown_module
def fetch(name) do
case Map.fetch(@modules, name) do
{:ok, mod} -> {:ok, mod.module_value()}
:error -> :unknown_module
# Some stdlib modules are compiled only when their optional backend dep is
# present (`sql` needs :postgrex, `pandas` needs :explorer). When the dep
# isn't installed the module isn't defined, so `import` it like any other
# absent module — a clean ImportError, not a host crash.
{:ok, mod} ->
if Code.ensure_loaded?(mod) and function_exported?(mod, :module_value, 0) do
{:ok, mod.module_value()}
else
:unknown_module
end

:error ->
:unknown_module
end
end

@doc """
Returns a sorted list of all available stdlib module names.
"""
@spec module_names() :: [String.t()]
def module_names, do: @modules |> Map.keys() |> Enum.sort()
def module_names do
@modules
|> Enum.filter(fn {_name, mod} ->
Code.ensure_loaded?(mod) and function_exported?(mod, :module_value, 0)
end)
|> Enum.map(&elem(&1, 0))
|> Enum.sort()
end
end
224 changes: 113 additions & 111 deletions lib/pyex/stdlib/pandas.ex
Original file line number Diff line number Diff line change
@@ -1,121 +1,123 @@
defmodule Pyex.Stdlib.Pandas do
@moduledoc """
Python `pandas` module backed by Explorer (Polars/Rust).

Provides `pd.Series()` for creating series from lists and
`pd.DataFrame()` for creating DataFrames from dicts.
Series and DataFrame methods (`.mean()`, `.rolling()`,
`.sum()`, etc.) are dispatched via `Pyex.Methods`.

Requires the optional `explorer` dependency at runtime.
All heavy computation runs in Rust via Polars NIFs --
no interpreter loop overhead for numeric operations.

## Supported API

### Module-level

import pandas as pd
s = pd.Series([1, 2, 3])
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

### Series methods

s.sum(), s.mean(), s.std(), s.min(), s.max(), s.median()
s.cumsum(), s.diff(), s.shift(n), s.abs()
s.rolling(window) # returns Rolling object
s.tolist() # convert back to Python list
len(s) # via __len__

### Rolling methods

s.rolling(50).mean()
s.rolling(50).sum()
s.rolling(50).min()
s.rolling(50).max()
s.rolling(50).std()

### DataFrame methods

df["col"] # column access returns Series
df.columns # list of column names
len(df) # number of rows

### Vectorized operations

s + s, s - s, s * s, s / s # element-wise arithmetic
s > 0, s < 0, s >= 0, s <= 0 # element-wise comparison (returns bool Series)
s[bool_series] # boolean indexing
"""

@behaviour Pyex.Stdlib.Module

@impl Pyex.Stdlib.Module
@spec module_value() :: Pyex.Stdlib.Module.module_value()
def module_value do
if Code.ensure_loaded?(Explorer.Series) do
%{
"Series" => {:builtin, &do_series/1},
"DataFrame" => {:builtin, &do_dataframe/1}
}
else
%{
"Series" =>
{:builtin,
fn _ ->
{:exception,
"ImportError: pandas requires the :explorer dependency. " <>
"Add {:explorer, \"~> 0.10\"} to your mix.exs deps."}
end},
"DataFrame" =>
{:builtin,
fn _ ->
{:exception,
"ImportError: pandas requires the :explorer dependency. " <>
"Add {:explorer, \"~> 0.10\"} to your mix.exs deps."}
end}
}
if Code.ensure_loaded?(Explorer) do
defmodule Pyex.Stdlib.Pandas do
@moduledoc """
Python `pandas` module backed by Explorer (Polars/Rust).

Provides `pd.Series()` for creating series from lists and
`pd.DataFrame()` for creating DataFrames from dicts.
Series and DataFrame methods (`.mean()`, `.rolling()`,
`.sum()`, etc.) are dispatched via `Pyex.Methods`.

Requires the optional `explorer` dependency at runtime.
All heavy computation runs in Rust via Polars NIFs --
no interpreter loop overhead for numeric operations.

## Supported API

### Module-level

import pandas as pd
s = pd.Series([1, 2, 3])
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

### Series methods

s.sum(), s.mean(), s.std(), s.min(), s.max(), s.median()
s.cumsum(), s.diff(), s.shift(n), s.abs()
s.rolling(window) # returns Rolling object
s.tolist() # convert back to Python list
len(s) # via __len__

### Rolling methods

s.rolling(50).mean()
s.rolling(50).sum()
s.rolling(50).min()
s.rolling(50).max()
s.rolling(50).std()

### DataFrame methods

df["col"] # column access returns Series
df.columns # list of column names
len(df) # number of rows

### Vectorized operations

s + s, s - s, s * s, s / s # element-wise arithmetic
s > 0, s < 0, s >= 0, s <= 0 # element-wise comparison (returns bool Series)
s[bool_series] # boolean indexing
"""

@behaviour Pyex.Stdlib.Module

@impl Pyex.Stdlib.Module
@spec module_value() :: Pyex.Stdlib.Module.module_value()
def module_value do
if Code.ensure_loaded?(Explorer.Series) do
%{
"Series" => {:builtin, &do_series/1},
"DataFrame" => {:builtin, &do_dataframe/1}
}
else
%{
"Series" =>
{:builtin,
fn _ ->
{:exception,
"ImportError: pandas requires the :explorer dependency. " <>
"Add {:explorer, \"~> 0.10\"} to your mix.exs deps."}
end},
"DataFrame" =>
{:builtin,
fn _ ->
{:exception,
"ImportError: pandas requires the :explorer dependency. " <>
"Add {:explorer, \"~> 0.10\"} to your mix.exs deps."}
end}
}
end
end
end

@spec do_series([term()]) :: {:pandas_series, Explorer.Series.t()}
defp do_series([{:py_list, reversed, _}]) do
{:pandas_series, Explorer.Series.from_list(coerce_values(Enum.reverse(reversed)))}
end
@spec do_series([term()]) :: {:pandas_series, term()}
defp do_series([{:py_list, reversed, _}]) do
{:pandas_series, Explorer.Series.from_list(coerce_values(Enum.reverse(reversed)))}
end

defp do_series([values]) when is_list(values) do
{:pandas_series, Explorer.Series.from_list(coerce_values(values))}
end
defp do_series([values]) when is_list(values) do
{:pandas_series, Explorer.Series.from_list(coerce_values(values))}
end

@spec do_dataframe([term()]) :: {:pandas_dataframe, Explorer.DataFrame.t()}
defp do_dataframe([{:py_dict, _, _} = dict]) do
columns =
Enum.map(Pyex.PyDict.items(dict), fn {name, values} ->
{name, coerce_values(normalize_list(values))}
end)
@spec do_dataframe([term()]) :: {:pandas_dataframe, term()}
defp do_dataframe([{:py_dict, _, _} = dict]) do
columns =
Enum.map(Pyex.PyDict.items(dict), fn {name, values} ->
{name, coerce_values(normalize_list(values))}
end)

{:pandas_dataframe, Explorer.DataFrame.new(columns)}
end
{:pandas_dataframe, Explorer.DataFrame.new(columns)}
end

defp do_dataframe([dict]) when is_map(dict) do
columns =
Enum.map(dict, fn {name, values} ->
{name, coerce_values(normalize_list(values))}
end)
defp do_dataframe([dict]) when is_map(dict) do
columns =
Enum.map(dict, fn {name, values} ->
{name, coerce_values(normalize_list(values))}
end)

{:pandas_dataframe, Explorer.DataFrame.new(columns)}
end
{:pandas_dataframe, Explorer.DataFrame.new(columns)}
end

@spec normalize_list(term()) :: [term()]
defp normalize_list({:py_list, reversed, _}), do: Enum.reverse(reversed)
defp normalize_list(list) when is_list(list), do: list
defp normalize_list(other), do: [other]

@spec coerce_values([term()]) :: [number()] | [String.t()]
defp coerce_values(values) do
Enum.map(values, fn
nil -> nil
v -> v
end)
@spec normalize_list(term()) :: [term()]
defp normalize_list({:py_list, reversed, _}), do: Enum.reverse(reversed)
defp normalize_list(list) when is_list(list), do: list
defp normalize_list(other), do: [other]

@spec coerce_values([term()]) :: [number()] | [String.t()]
defp coerce_values(values) do
Enum.map(values, fn
nil -> nil
v -> v
end)
end
end
end
Loading
Loading