diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bbbb95..a2c9c3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## 0.2.0 + +Targets powerio C ABI version 4 (powerio v0.3.0); an older library or artifact +is refused at first use. Breaking. + +- `to_dense` renames: `reference_bus → ref_bus_index`, `n_components → + n_islands` (both topology scalars; `ref_bus_index` is a dense 0-based index, + unlike the 1-based ids in `branch.from`/`to`). +- `PowerIO.warnings(net)`: the fidelity warnings a lossy reader attaches to the + handle, sized exactly. Unexported: `text, warnings = convert_file(...)` stays + the documented destructuring idiom. +- Zero-copy `to_arrow(...; copy=false)` columns are `ArrowColumn`s that root + the shared producer buffers, so a column extracted from its `ArrowTable` + stays valid on its own. `close(t)` releases the buffers eagerly; the table is + no longer a finalizable object, so 0.1.0 code calling `finalize(t)` must move + to `close(t)`. +- Use after free hardening: every helper that lowers a `NetworkHandle` to a raw + pointer runs its ccalls under `GC.@preserve`; dense extractors pass caps and + verify the returned totals. + ## 0.1.0 - `read_gridfm` / `read_gridfm_scenarios` read a gridfm-datakit Parquet dataset diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ef032e7..a3c15e6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,6 +40,10 @@ After each powerio binary release: regenerates Artifacts.toml from the release tarballs and opens a PR when anything changed. If the release bumped `PIO_ABI_VERSION`, update the constant and the affected ccalls in that PR (see "ABI lockstep" above). + Never register a version whose `PIO_ABI_VERSION` does not match the ABI of + the tarballs Artifacts.toml points at: the bundled artifact would fail the + load handshake, so the registered package errors at first use for everyone + on the default install path. 2. Merge the artifacts PR if one was opened, then dispatch "Register Package" with the new version (no leading v, or major/minor/patch to bump). It commits the Project.toml bump to main and posts the diff --git a/Project.toml b/Project.toml index 3c71df3..6d07632 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PowerIO" uuid = "05ed8b54-f668-4096-9d0d-e8c3dd9dc169" authors = ["Sam Talkington "] -version = "0.1.0" +version = "0.2.0" [deps] JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" diff --git a/README.md b/README.md index 17596d9..0f88d18 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Supported formats (each reads and writes, so any pair converts): - [PowerWorld](https://www.powerworld.com/WebHelp/Content/MainDocumentation_HTML/Case_Formats.htm) `.aux` - [PowerModels.jl](https://github.com/lanl-ansi/PowerModels.jl) network data JSON - [egret](https://pypi.org/project/gridx-egret/) `ModelData` JSON +- [pandapower](https://www.pandapower.org/) network JSON A same-format round trip is byte exact; cross-format conversion reports fields the target cannot represent as warnings. @@ -77,6 +78,7 @@ to_matpower(net) # ::String, byte exact when the input was MAT to_json(net) # the JSON transport to_format(net, "powermodels-json") # (text, warnings) from_json(to_json(net)) # Network with a live handle +PowerIO.warnings(net) # fidelity warnings the reader attached to net ``` `to_normalized` derives a computation-ready copy: powers per unit, angles in @@ -96,21 +98,23 @@ d = to_dense(net) # or to_dense("case14.m") for parse + extract d.n, d.m, d.ng # bus / branch / generator counts d.bus_ids # 1-based ids; row k of every per-bus table is bus_ids[k] d.branch.from, d.branch.x # branch endpoints and reactances -d.reference_bus, d.n_components, d.is_radial +d.ref_bus_index, d.n_islands, d.is_radial ``` `to_arrow` brings one table across the Arrow C Data Interface (needs the library built with `--features arrow`; `arrow_available()` reports it). By default it returns a NamedTuple of **owned** Julia Vectors (Tables.jl-shaped, flows into `Arrow.write`, `DataFrame`, etc.), so there is no lifetime caveat. `copy=false` -returns a zero-copy `ArrowTable` whose columns view the producer's memory; keep it -alive while reading them. For the numeric tables alone, `to_dense` is a copy-free, -`unsafe_wrap`-free fast path (the C ABI fills Julia-owned buffers). +returns a zero-copy `ArrowTable` whose columns view the producer's memory; each +column roots the shared buffers, so a column extracted from the table stays +valid on its own, and `close(z)` frees the buffers without waiting for GC. For +the numeric tables alone, `to_dense` is a copy-free, `unsafe_wrap`-free fast +path (the C ABI fills Julia-owned buffers). ```julia t = to_arrow(net, :branch) # :bus, :branch, :gen, :load, :shunt; owned columns t.from, t.x, t.tap -z = to_arrow(net, :branch; copy=false) # zero-copy views; keep `z` alive while reading +z = to_arrow(net, :branch; copy=false) # zero-copy views; close(z) releases eagerly ``` `read_gridfm` reads a gridfm-datakit Parquet dataset back into a `Network` — the diff --git a/docs/binary.md b/docs/binary.md index e1fe026..b796854 100644 --- a/docs/binary.md +++ b/docs/binary.md @@ -7,7 +7,7 @@ per-platform binary; users never compile it. 1. A version tag on [eigenergy/powerio](https://github.com/eigenergy/powerio) triggers its `release-binaries` workflow, which builds - `libpowerio_capi..tar.gz` with the `arrow` feature for Linux glibc + `libpowerio_capi..tar.gz` with the `arrow` and `gridfm` features for Linux glibc (x86_64, aarch64), macOS (x86_64, arm64), and Windows (x86_64), and attaches the five tarballs to the GitHub release. Each tarball holds the cdylib under `lib/` (`bin/` on Windows), the C header under diff --git a/gen/build_tarballs.jl b/gen/build_tarballs.jl index 8a22027..9804121 100644 --- a/gen/build_tarballs.jl +++ b/gen/build_tarballs.jl @@ -15,17 +15,19 @@ using BinaryBuilder name = "PowerIO" -# Tracks the powerio-capi *crate* version (the binary), unified with the -# PowerIO.jl *package* version for the first release. -version = v"0.0.1" +# Tracks the powerio-capi *crate* version (the binary), not the PowerIO.jl +# package version: the binding is 0.2.0 while the v4 binary ships as powerio +# v0.3.0. +version = v"0.3.0" -# Must pin a commit reporting `PIO_ABI_VERSION` 3: the binding's load-time -# handshake refuses anything else, and `to_arrow` needs the Arrow export. Pinned -# to the v0.0.1 release tag (the right long-term anchor for a reproducible -# build and the Yggdrasil submission). +# Must pin a commit reporting `PIO_ABI_VERSION` 4: the binding's load-time +# handshake refuses anything else. Pinned to the powerio#116 head (the ABI v4 +# lockstep partner of this release); move to the v0.3.0 release tag once it is +# cut (the right long-term anchor for a reproducible build and the Yggdrasil +# submission). sources = [ GitSource("https://github.com/eigenergy/powerio.git", - "db944afe3819b2efd67060eda056003846f8e8ad"), + "b92d9abee9a8e22df5f79d3a9d41caf30813fa1f"), ] # `cargo build` writes the cdylib under target//release. Names differ @@ -33,11 +35,10 @@ sources = [ # (windows, no lib prefix, ships under bin/). `install_license` keeps AutoMerge happy. script = raw""" cd $WORKSPACE/srcdir/powerio -# --features arrow ships pio_export_arrow (the zero-copy Arrow C Data Interface -# export to_arrow calls); --features gridfm ships pio_read_gridfm (the +# --features arrow ships pio_to_arrow (the zero-copy Arrow C Data Interface +# export to_arrow calls); --features gridfm ships pio_read_dir / pio_scenario_ids (the # gridfm-datakit Parquet reader read_gridfm calls). The base ABI is identical -# without them. NOTE: the `sources` commit above must include pio_read_gridfm -# (powerio PR #70 and later) before re-cutting the artifact with gridfm enabled. +# without them. cargo build --release -p powerio-capi --target ${rust_target} --features arrow,gridfm out=target/${rust_target}/release if [[ "${target}" == *-mingw32* ]]; then diff --git a/src/PowerIO.jl b/src/PowerIO.jl index 0c6d1a5..efe2932 100644 --- a/src/PowerIO.jl +++ b/src/PowerIO.jl @@ -37,6 +37,8 @@ using JSON3 using LazyArtifacts import Libdl +# `warnings` stays unexported (accessor surface, like `n_buses`): the documented +# destructuring convention `text, warnings = convert_file(...)` would shadow it. export Network, parse_file, parse_str, from_json, convert_file, to_format, to_normalized, to_json, to_dense, to_matpower, to_arrow, ArrowTable, to_powermodels, from_powermodels, to_powerdata, parse_ac_power_data, @@ -71,7 +73,11 @@ Point PowerIO at a locally built `libpowerio_capi` (`cargo build -p powerio-capi --release` in the PowerIO Rust tree → `target/release/libpowerio_capi.{dylib,so}`). A development override that wins over the bundled artifact. """ -set_library!(path::AbstractString) = (_LIBRARY[] = String(path)) +function set_library!(path::AbstractString) + _LIBRARY[] = String(path) + _ABI_OK[] = false # the new library must pass its own handshake + return +end function _lib() isempty(_LIBRARY[]) || return _LIBRARY[] @@ -122,12 +128,17 @@ end # # The C ABI carries an integer ABI version (`pio_abi_version`, added alongside the # typed extractors). This binding targets exactly `PIO_ABI_VERSION`; bump the two in -# lockstep when an existing `pio_*` signature or the JSON transport schema changes. +# lockstep when an existing `pio_*` signature or the snapshot schema changes. # Checking it once at first use turns "library predates this binding" and "library is # from an incompatible commit" into a clear error at the boundary, instead of a # cryptic ccall fault (a wrong signature) or silently wrong numbers deep in a solver. +# +# ABI 4 is the naming-grammar freeze: format strings instead of format symbols +# (the JSON transport is the `powerio-json` format through pio_to_format / +# pio_parse_str), cap/count array extractors, byte-length pio_warnings, and the +# bus/node/branch vocabulary. See powerio-capi's header preamble. -const PIO_ABI_VERSION = UInt32(3) +const PIO_ABI_VERSION = UInt32(4) const _ABI_OK = Ref{Bool}(false) """ @@ -186,10 +197,47 @@ function library_available() end end +# Shared probe behind `arrow_available`/`gridfm_available`: true if the resolved +# library exports `sym` (the feature-gated entry points come and go with cargo +# features, not the ABI version). +function _exports_symbol(sym::Symbol) + try + handle = Libdl.dlopen(_lib()) + try + return Libdl.dlsym(handle, sym; throw_error=false) !== nothing + finally + Libdl.dlclose(handle) + end + catch e + @debug "PowerIO: $sym probe failed" exception = (e, catch_backtrace()) + return false + end +end + const _ERRLEN = 512 +# Per-call fidelity warnings (pio_to_format / pio_convert_file) can run long on a +# lossy conversion; give them headroom. Overflow truncates silently (the C side +# cuts on a UTF-8 boundary, no signal). Handle-attached warnings size exactly via +# pio_warnings instead. +const _WARNLEN = 4096 # --- handle layer ------------------------------------------------------- +# The allocating library's `pio_network_free`, memoized per resolved path: +# resolving `_lib()` at finalization time would cross allocators after a +# `set_library!` swap. The un-dlclosed handle deliberately pins the library so +# the pointer stays valid for every outstanding finalizer. +const _FREE_FN = Ref{Ptr{Cvoid}}(C_NULL) +const _FREE_FN_LIB = Ref{String}("") +function _network_free_fn() + lib = _lib() + if _FREE_FN[] == C_NULL || _FREE_FN_LIB[] != lib + _FREE_FN[] = Libdl.dlsym(Libdl.dlopen(lib), :pio_network_free) + _FREE_FN_LIB[] = lib + end + return _FREE_FN[] +end + """ NetworkHandle @@ -200,9 +248,12 @@ mutable struct NetworkHandle ptr::Ptr{Cvoid} function NetworkHandle(ptr::Ptr{Cvoid}) ptr == C_NULL && error("PowerIO: null network handle") + # Resolve before `new`: a failed lookup must not strand a handle with + # no finalizer attached. + free = _network_free_fn() h = new(ptr) finalizer(h) do x - x.ptr == C_NULL || ccall((:pio_network_free, _lib()), Cvoid, (Ptr{Cvoid},), x.ptr) + x.ptr == C_NULL || ccall(free, Cvoid, (Ptr{Cvoid},), x.ptr) x.ptr = C_NULL end return h @@ -216,6 +267,18 @@ _lib_call_error(e) = error( "(`cargo build -p powerio-capi --release` in a sibling powerio checkout) " * "or set POWERIO_CAPI / call `set_library!`. Underlying: $e") +# Sibling of `_lib_call_error` for the feature-gated entry points: the ccall threw +# because the resolved library lacks `sym`. Anything other than the missing +# symbol/library ErrorException (e.g. an ArgumentError from argument conversion) +# is not a toolchain problem — rethrow it untouched. +function _feature_call_error(fname::AbstractString, sym::AbstractString, + feature::AbstractString, e) + e isa ErrorException || throw(e) + error("PowerIO.$fname: could not call $sym: the C ABI at \"$(_lib())\" was " * + "built without the $feature feature. Rebuild with " * + "`cargo build -p powerio-capi --release --features $feature`. Underlying: $e") +end + function _parse_handle(path::AbstractString; from=nothing) _ensure_compatible() err = zeros(UInt8, _ERRLEN) @@ -233,8 +296,10 @@ function _parse_handle(path::AbstractString; from=nothing) end # In-memory sibling of `_parse_handle`: parse `text` under an explicit `format` -# (no path, so no extension to infer from) via `pio_parse_str`. -function _parse_handle_str(text::AbstractString, format::AbstractString) +# (no path, so no extension to infer from) via `pio_parse_str`. `what` labels the +# error with the public entry point (`from_json` also lands here). +function _parse_handle_str(text::AbstractString, format::AbstractString, + what::AbstractString="parse_str") _ensure_compatible() err = zeros(UInt8, _ERRLEN) ptr = try @@ -244,35 +309,30 @@ function _parse_handle_str(text::AbstractString, format::AbstractString) catch e _lib_call_error(e) end - ptr == C_NULL && error("PowerIO.parse_str: " * _cstr(err)) + ptr == C_NULL && error("PowerIO.$what: " * _cstr(err)) return NetworkHandle(ptr) end -function _from_json_handle(text::AbstractString) - _ensure_compatible() - err = zeros(UInt8, _ERRLEN) - ptr = try - ccall((:pio_from_json, _lib()), Ptr{Cvoid}, - (Cstring, Ptr{UInt8}, Csize_t), - String(text), err, length(err)) - catch e - _lib_call_error(e) - end - ptr == C_NULL && error("PowerIO.from_json: " * _cstr(err)) - return NetworkHandle(ptr) +# `buf` must stay rooted across the unsafe_string read; without the preserve the +# compiler may drop the buffer after `pointer(buf)` and a GC mid-copy dangles. +_cstr(buf::Vector{UInt8}) = GC.@preserve buf unsafe_string(pointer(buf)) + +# Split a `\n`-joined warn buffer into owned Strings (a SubString would pin the +# whole buffer-sized parent). `capped`: the fixed-size per-call channel truncates +# silently on a UTF-8 boundary at the cap, so a fill within 4 bytes of it is the +# truncation signature — surface it rather than under-count fidelity warnings. +function _warn_lines(buf::Vector{UInt8}; capped::Bool=false) + s = _cstr(buf) + warns = String.(filter(!isempty, split(s, '\n'))) + capped && ncodeunits(s) >= length(buf) - 4 && + push!(warns, "... warning list truncated at $(length(buf)) bytes") + return warns end -_cstr(buf::Vector{UInt8}) = unsafe_string(pointer(buf)) - -function _to_json(h::NetworkHandle) - err = zeros(UInt8, _ERRLEN) - s = ccall((:pio_to_json, _lib()), Cstring, (Ptr{Cvoid}, Ptr{UInt8}, Csize_t), - h.ptr, err, length(err)) - s == C_NULL && error("PowerIO: to_json failed: " * _cstr(err)) - json = unsafe_string(s) - ccall((:pio_string_free, _lib()), Cvoid, (Cstring,), s) - return json -end +# Serialize a live handle to the canonical `powerio-json` snapshot — the rich +# transport the accessors materialize from. A format string under ABI 4, not a +# symbol; the snapshot is lossless, so the warnings it returns are empty. +_to_json(h::NetworkHandle) = first(_format_from_handle(h, "powerio-json", "to_json"; warn=false)) # --- public surface ----------------------------------------------------- @@ -310,8 +370,11 @@ From a file `path` the format is inferred from the extension unless `from` is gi `parse_file(IOBuffer(text), "matpower")`. Accepted format tokens (case-insensitive): `"matpower"`/`"m"`, `"powermodels-json"`/ -`"powermodels"`/`"pm"`, `"egret-json"`/`"egret"`, `"psse"`/`"raw"`, -`"powerworld"`/`"aux"`. +`"powermodels"`/`"pm"`, `"egret-json"`/`"egret"`, `"pandapower-json"`/ +`"pandapower"`/`"pp"`, `"psse"`/`"raw"`, `"powerworld"`/`"aux"`, +`"powerio-json"`/`"json"` (the canonical snapshot [`to_json`](@ref) writes). +A PyPSA CSV folder is a directory, not text, so it enters only through the +`path` method: `parse_file(dir; from="pypsa-csv")`. """ function parse_file(path::AbstractString; from=nothing) h = _parse_handle(path; from=from) @@ -334,11 +397,12 @@ parse_str(text::AbstractString, format::AbstractString="matpower") = """ from_json(text) -> Network -Rebuild a live [`Network`](@ref) from the JSON transport produced by -[`to_json`](@ref). The result has a Rust handle, so `to_*` transforms work on it. +Rebuild a live [`Network`](@ref) from the snapshot produced by [`to_json`](@ref) +(the `powerio-json` format, validated on read). The result has a Rust handle, so +`to_*` transforms work on it. """ function from_json(text::AbstractString) - h = _from_json_handle(text) + h = _parse_handle_str(text, "powerio-json", "from_json") return Network(JSON3.read(_to_json(h)), h) end @@ -352,12 +416,16 @@ function _live_handle(net::Network, fname::AbstractString) return h end -# Derive a normalized handle from a live one via `pio_to_normalized` (a read-only -# borrow of the source case, so the source handle stays valid). +# Derive a normalized handle from a live one via `pio_normalize` (a read-only +# borrow of the source case, so the source handle stays valid). GC.@preserve: +# Julia frees an object after its last use, not at end of call, so without it a +# GC triggered between extracting `h.ptr` and the ccall could finalize `h` and +# hand the Rust side a freed pointer. Every helper that lowers a handle to a raw +# pointer carries the same guard. function _normalize_handle(h::NetworkHandle) err = zeros(UInt8, _ERRLEN) - ptr = ccall((:pio_to_normalized, _lib()), Ptr{Cvoid}, - (Ptr{Cvoid}, Ptr{UInt8}, Csize_t), h.ptr, err, length(err)) + ptr = GC.@preserve h ccall((:pio_normalize, _lib()), Ptr{Cvoid}, + (Ptr{Cvoid}, Ptr{UInt8}, Csize_t), h.ptr, err, length(err)) ptr == C_NULL && error("PowerIO.to_normalized: " * _cstr(err)) return NetworkHandle(ptr) end @@ -385,75 +453,99 @@ end Serialize `net` to the C ABI's JSON transport, the same text [`from_json`](@ref) reads back. Uses the live handle when present, else the cached `net.data`. """ -to_json(net::Network) = net.handle === nothing ? JSON3.write(net.data) : _to_json(net.handle) - -# Serialize a live handle to MATPOWER `.m` text. -function _matpower_from_handle(p::Ptr{Cvoid}, what::AbstractString) - err = zeros(UInt8, _ERRLEN) - s = ccall((:pio_to_matpower, _lib()), Cstring, - (Ptr{Cvoid}, Ptr{UInt8}, Csize_t), - p, err, length(err)) - s == C_NULL && error("PowerIO.to_matpower: " * _cstr(err) * " ($what)") - out = unsafe_string(s) - ccall((:pio_string_free, _lib()), Cvoid, (Cstring,), s) - return out +function to_json(net::Network) + h = net.handle + # A finalized handle (explicit `finalize(net.handle)`) is non-`nothing` but + # null; the cached-data fallback covers it like the handleless case. + return (h === nothing || h.ptr == C_NULL) ? JSON3.write(net.data) : _to_json(h) end -function _format_from_handle(p::Ptr{Cvoid}, to::AbstractString, what::AbstractString) - warn = zeros(UInt8, _ERRLEN) +# Serialize a live handle to the named format via `pio_to_format` — the one text +# serializer; every format is a string under ABI 4. Takes the handle (not a raw +# pointer) and preserves it across the ccall; see `_normalize_handle`. `warn=false` +# skips the warning channel (length 0 discards it, per the header) for callers +# that drop the warnings anyway. +function _format_from_handle(h::NetworkHandle, to::AbstractString, what::AbstractString; + warn::Bool=true) + warnbuf = zeros(UInt8, warn ? _WARNLEN : 0) err = zeros(UInt8, _ERRLEN) - s = ccall((:pio_to_format, _lib()), Cstring, - (Ptr{Cvoid}, Cstring, Ptr{UInt8}, Csize_t, Ptr{UInt8}, Csize_t), - p, String(to), warn, length(warn), err, length(err)) + s = GC.@preserve h ccall((:pio_to_format, _lib()), Cstring, + (Ptr{Cvoid}, Cstring, Ptr{UInt8}, Csize_t, Ptr{UInt8}, Csize_t), + h.ptr, String(to), warnbuf, length(warnbuf), err, length(err)) s == C_NULL && error("PowerIO.to_format: " * _cstr(err) * " ($what)") text = unsafe_string(s) ccall((:pio_string_free, _lib()), Cvoid, (Cstring,), s) - warnings = filter(!isempty, split(_cstr(warn), '\n')) - return (text, warnings) + return (text, warn ? _warn_lines(warnbuf; capped=true) : String[]) end """ to_matpower(net::Network) -> String -Serialize `net` to MATPOWER `.m` text, byte exact when the input was MATPOWER. For a -file in one shot use [`convert_file`](@ref)`(path, "matpower")`. +Serialize `net` to MATPOWER `.m` text, byte exact when the input was MATPOWER — +a convenience over [`to_format`](@ref)`(net, "matpower")` that drops the +warnings. For a file in one shot use [`convert_file`](@ref)`(path, "matpower")`. """ to_matpower(net::Network) = - _matpower_from_handle(_live_handle(net, "to_matpower").ptr, repr(network_name(net))) + first(_format_from_handle(_live_handle(net, "to_matpower"), "matpower", + repr(network_name(net)); warn=false)) """ to_format(net::Network, to) -> (text, warnings) Serialize a parsed network to format `to` without reparsing the input file. -Returns the target text and any fidelity warnings. +Returns the target text and any fidelity warnings. `"powerio-json"` (alias +`"json"`) is the canonical lossless snapshot [`from_json`](@ref) reads back. """ to_format(net::Network, to::AbstractString) = - _format_from_handle(_live_handle(net, "to_format").ptr, to, repr(network_name(net))) + _format_from_handle(_live_handle(net, "to_format"), to, repr(network_name(net))) + +""" + PowerIO.warnings(net::Network) -> Vector{String} + +The fidelity warnings attached to `net`'s handle by whichever constructor built +it (a lossy reader itemizes what it ignored; total readers attach none). Sized +exactly via the byte-length query of `pio_warnings`. Unexported, because +`text, warnings = convert_file(...)` is the documented destructuring idiom and +would shadow it. +""" +function warnings(net::Network) + h = _live_handle(net, "warnings") + GC.@preserve h begin + len = ccall((:pio_warnings, _lib()), Csize_t, + (Ptr{Cvoid}, Ptr{UInt8}, Csize_t), h.ptr, C_NULL, 0) + len == 0 && return String[] + buf = zeros(UInt8, len + 1) + ccall((:pio_warnings, _lib()), Csize_t, + (Ptr{Cvoid}, Ptr{UInt8}, Csize_t), h.ptr, buf, length(buf)) + return _warn_lines(buf) + end +end """ convert_file(path, to; from=nothing) -> (text, warnings) -Convert `path` to format `to`. All five formats read and write, so any pair -converts. A same-format conversion is byte exact; a cross-format one is -maximal fidelity and reports whatever the target can't carry in `warnings`. Tokens -(case-insensitive): `"matpower"`/`"m"`, `"powermodels-json"`/`"powermodels"`/`"pm"`, -`"egret-json"`/`"egret"`, `"psse"`/`"raw"`, `"powerworld"`/`"aux"`. `from` overrides -extension inference (needed to tell egret and PowerModels `.json` apart). +Convert `path` to format `to`; every reader/writer pair converts. A same-format +conversion is byte exact; a cross-format one is maximal fidelity and reports +whatever the target can't carry in `warnings`. Takes the same format tokens as +[`parse_file`](@ref), except `"pypsa-csv"`: PyPSA folders are directories, not +text documents. `from` overrides extension inference (needed to tell the +`.json` formats apart). """ function convert_file(path::AbstractString, to::AbstractString; from=nothing) _ensure_compatible() - warn = zeros(UInt8, _ERRLEN) + warnbuf = zeros(UInt8, _WARNLEN) err = zeros(UInt8, _ERRLEN) # Pass the format hint as a `String` (ccall roots it) or `C_NULL` for inference. + # ABI 4 argument order: (path, from, to) — the converters read as + # "convert from to ". fromc = from === nothing ? C_NULL : String(from) s = ccall((:pio_convert_file, _lib()), Cstring, (Cstring, Cstring, Cstring, Ptr{UInt8}, Csize_t, Ptr{UInt8}, Csize_t), - path, to, fromc, warn, length(warn), err, length(err)) + path, fromc, to, warnbuf, length(warnbuf), err, length(err)) s == C_NULL && error("PowerIO.convert_file: " * _cstr(err)) text = unsafe_string(s) ccall((:pio_string_free, _lib()), Cvoid, (Cstring,), s) - warnings = filter(!isempty, split(_cstr(warn), '\n')) - return (text, warnings) + return (text, _warn_lines(warnbuf; capped=true)) end # --- accessor surface --------------------------------------------------- @@ -523,7 +615,9 @@ n_gens(net::Network) = length(net.data.generators) The format the case was read from, verbatim from the Rust `SourceFormat` enum: one of `"Matpower"`, `"PowerModelsJson"`, `"EgretJson"`, `"Psse"`, `"PowerWorld"`, -`"InMemory"`, `"Normalized"` (the last is the output of [`to_normalized`](@ref)). +`"PandapowerJson"`, `"PowerWorldBinary"`, `"PypsaCsv"`, `"Gridfm"` (the +[`read_gridfm`](@ref) reconstruction), `"InMemory"`, or `"Normalized"` (the +output of [`to_normalized`](@ref)). """ source_format(net::Network) = String(net.data.source_format) @@ -532,7 +626,7 @@ source_format(net::Network) = String(net.data.source_format) The 1-based id of the reference (slack) bus, or `nothing` unless exactly one bus has `kind == "REF"`. This mirrors the "exactly one" rule of the C ABI's -`pio_reference_bus` (which returns a dense 0-based index, not an id), but returns +`pio_ref_bus_index` (which returns a dense 0-based index, not an id), but returns the 1-based id space the other accessors use. """ function reference_bus_id(net::Network) @@ -563,71 +657,93 @@ end # --- dense numeric surface ---------------------------------------------- # -# The JSON transport above is the rich, lossless view (every field + extras). For -# the matrix-assembly path a consumer wants the numeric tables as dense typed -# arrays without parsing JSON: the C ABI fills caller-allocated buffers -# (`pio_bus_ids` / `pio_branches` / `pio_gens` / `pio_nodal_*`) straight from the -# IndexCore the handle built once at parse, and answers the topology scalars -# (`pio_n_components` / `pio_is_radial` / `pio_reference_bus`) off the same core. +# The snapshot above is the rich, lossless view (every field + extras). For the +# matrix-assembly path a consumer wants the numeric tables as dense typed arrays +# without parsing JSON: the C ABI fills caller-allocated buffers (`pio_bus_ids` / +# `pio_branches` / `pio_gens` / `pio_bus_demand` / `pio_bus_shunt`) straight from +# the IndexCore the handle built once at parse, and answers the topology scalars +# (`pio_n_islands` / `pio_is_radial` / `pio_ref_bus_index`) off the same core. +# Every extractor takes a cap and returns the total available (asserted below: a +# count drifting between the n_* query and the fill would mean a torn handle). # Raw MATPOWER units throughout: 1-based bus ids in `bus_ids`, branch `from`/`to`, # and gen `bus` (the same id space — invert `bus_ids` to map an endpoint to a dense # row), degrees for `shift`, total line charging in `b`, raw `tap` (0 means 1). +# +# Every helper takes the NetworkHandle and preserves it across its ccalls (the +# raw pointer never travels alone); see `_normalize_handle` for why. + +_expect_total(total, expected::Int, what::AbstractString) = + Int(total) == expected || + error("PowerIO.to_dense: $what count changed under us ($total vs $expected)") -function _branch_tables(p::Ptr{Cvoid}, m::Int) +function _branch_tables(h::NetworkHandle, m::Int) from = Vector{Int64}(undef, m); to = Vector{Int64}(undef, m) r = Vector{Float64}(undef, m); x = Vector{Float64}(undef, m); b = Vector{Float64}(undef, m) tap = Vector{Float64}(undef, m); shift = Vector{Float64}(undef, m) insvc = Vector{UInt8}(undef, m) - ccall((:pio_branches, _lib()), Cvoid, + total = GC.@preserve h ccall((:pio_branches, _lib()), Csize_t, (Ptr{Cvoid}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}, - Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{UInt8}), - p, from, to, r, x, b, tap, shift, insvc) + Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{UInt8}, Csize_t), + h.ptr, from, to, r, x, b, tap, shift, insvc, m) + _expect_total(total, m, "branch") return (; from, to, r, x, b, tap, shift, in_service = insvc) end -function _gen_tables(p::Ptr{Cvoid}, ng::Int) +function _gen_tables(h::NetworkHandle, ng::Int) bus = Vector{Int64}(undef, ng); pg = Vector{Float64}(undef, ng) pmax = Vector{Float64}(undef, ng); pmin = Vector{Float64}(undef, ng) insvc = Vector{UInt8}(undef, ng) - ccall((:pio_gens, _lib()), Cvoid, - (Ptr{Cvoid}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{UInt8}), - p, bus, pg, pmax, pmin, insvc) + total = GC.@preserve h ccall((:pio_gens, _lib()), Csize_t, + (Ptr{Cvoid}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{UInt8}, Csize_t), + h.ptr, bus, pg, pmax, pmin, insvc, ng) + _expect_total(total, ng, "gen") return (; bus, pg, pmax, pmin, in_service = insvc) end -function _nodal_demand(p::Ptr{Cvoid}, n::Int) +function _bus_demand(h::NetworkHandle, n::Int) pd = Vector{Float64}(undef, n); qd = Vector{Float64}(undef, n) - ccall((:pio_nodal_demand, _lib()), Cvoid, (Ptr{Cvoid}, Ptr{Float64}, Ptr{Float64}), p, pd, qd) + total = GC.@preserve h ccall((:pio_bus_demand, _lib()), Csize_t, + (Ptr{Cvoid}, Ptr{Float64}, Ptr{Float64}, Csize_t), h.ptr, pd, qd, n) + _expect_total(total, n, "bus demand") return (pd, qd) end -function _nodal_shunt(p::Ptr{Cvoid}, n::Int) +function _bus_shunt(h::NetworkHandle, n::Int) gs = Vector{Float64}(undef, n); bs = Vector{Float64}(undef, n) - ccall((:pio_nodal_shunt, _lib()), Cvoid, (Ptr{Cvoid}, Ptr{Float64}, Ptr{Float64}), p, gs, bs) + total = GC.@preserve h ccall((:pio_bus_shunt, _lib()), Csize_t, + (Ptr{Cvoid}, Ptr{Float64}, Ptr{Float64}, Csize_t), h.ptr, gs, bs, n) + _expect_total(total, n, "bus shunt") return (gs, bs) end -# Dense numeric extraction off a live handle, shared by the Network and path methods. -function _dense_from_handle(p::Ptr{Cvoid}) - n = Int(ccall((:pio_n_buses, _lib()), Csize_t, (Ptr{Cvoid},), p)) - m = Int(ccall((:pio_n_branches, _lib()), Csize_t, (Ptr{Cvoid},), p)) - ng = Int(ccall((:pio_n_gens, _lib()), Csize_t, (Ptr{Cvoid},), p)) - bus_ids = Vector{Int64}(undef, n) - ccall((:pio_bus_ids, _lib()), Cvoid, (Ptr{Cvoid}, Ptr{Int64}), p, bus_ids) - pd, qd = _nodal_demand(p, n) - gs, bs = _nodal_shunt(p, n) - return (; - n, m, ng, - base_mva = ccall((:pio_base_mva, _lib()), Cdouble, (Ptr{Cvoid},), p), - bus_ids, - branch = _branch_tables(p, m), - gen = _gen_tables(p, ng), - demand = (; pd, qd), - shunt = (; gs, bs), - reference_bus = Int(ccall((:pio_reference_bus, _lib()), Cptrdiff_t, (Ptr{Cvoid},), p)), - n_components = Int(ccall((:pio_n_components, _lib()), Csize_t, (Ptr{Cvoid},), p)), - is_radial = ccall((:pio_is_radial, _lib()), Cint, (Ptr{Cvoid},), p) != 0, - ) +# Dense numeric extraction off a live handle, shared by the Network and path +# methods. The whole body runs under GC.@preserve h: a dozen ccalls with Julia +# allocations between them, exactly the shape where a finalizer racing the raw +# pointer would be a use after free. +function _dense_from_handle(h::NetworkHandle) + GC.@preserve h begin + p = h.ptr + n = Int(ccall((:pio_n_buses, _lib()), Csize_t, (Ptr{Cvoid},), p)) + m = Int(ccall((:pio_n_branches, _lib()), Csize_t, (Ptr{Cvoid},), p)) + ng = Int(ccall((:pio_n_gens, _lib()), Csize_t, (Ptr{Cvoid},), p)) + bus_ids = Vector{Int64}(undef, n) + total = ccall((:pio_bus_ids, _lib()), Csize_t, (Ptr{Cvoid}, Ptr{Int64}, Csize_t), p, bus_ids, n) + _expect_total(total, n, "bus id") + pd, qd = _bus_demand(h, n) + gs, bs = _bus_shunt(h, n) + return (; + n, m, ng, + base_mva = ccall((:pio_base_mva, _lib()), Cdouble, (Ptr{Cvoid},), p), + bus_ids, + branch = _branch_tables(h, m), + gen = _gen_tables(h, ng), + demand = (; pd, qd), + shunt = (; gs, bs), + ref_bus_index = Int(ccall((:pio_ref_bus_index, _lib()), Int64, (Ptr{Cvoid},), p)), + n_islands = Int(ccall((:pio_n_islands, _lib()), Csize_t, (Ptr{Cvoid},), p)), + is_radial = ccall((:pio_is_radial, _lib()), Cint, (Ptr{Cvoid},), p) != 0, + ) + end end """ @@ -649,18 +765,21 @@ builds the JSON view). Fields: - `gen` — NamedTuple of `bus` (1-based id, one row per machine), `pg, pmax, pmin` (MW), `in_service`. - `demand`, `shunt` — NamedTuples of per-bus `(pd, qd)` and `(gs, bs)` in dense order. -- `reference_bus::Int` — dense 0-based index of the single reference bus, or `-1`. -- `n_components::Int`, `is_radial::Bool` — connectivity of the in-service topology. +- `ref_bus_index::Int` — dense 0-based INDEX of the single reference bus, or `-1` + (an index into `bus_ids` order, unlike the 1-based ids in `branch.from`/`to`). + `-1` covers both no reference anywhere and several (e.g. one per island; check + `n_islands`); the C ABI's `pio_ref_bus_indices` disambiguates, not yet bound. +- `n_islands::Int`, `is_radial::Bool` — connectivity of the in-service topology. For the rich, lossless element tables (costs, extras, storage, HVDC) use the accessors on a [`parse_file`](@ref) `Network`; for self-describing columnar export use [`to_arrow`](@ref). """ -to_dense(net::Network) = _dense_from_handle(_live_handle(net, "to_dense").ptr) +to_dense(net::Network) = _dense_from_handle(_live_handle(net, "to_dense")) function to_dense(path::AbstractString; from=nothing) h = _parse_handle(path; from=from) try - return _dense_from_handle(h.ptr) + return _dense_from_handle(h) finally finalize(h) # buffers are copied out; free the handle now rather than at GC end diff --git a/src/arrow.jl b/src/arrow.jl index af2f731..0b69e19 100644 --- a/src/arrow.jl +++ b/src/arrow.jl @@ -1,21 +1,22 @@ # Columnar export over the Arrow C Data Interface. # -# `pio_export_arrow` (powerio-capi built `--features arrow`) lends one raw network +# `pio_to_arrow` (powerio-capi built `--features arrow`) lends one raw network # table as an Arrow struct array across the C Data Interface: self-describing, the -# in-memory sibling of the JSON transport and the dense extractors. Arrow.jl is an -# IPC-format reader and does not import the C Data Interface, so we decode the two -# FFI structs here directly: read the schema's child fields and, per column, either -# copy the data buffer into an owned Julia Vector (the default) or wrap it in place -# (`copy=false`, zero copy). +# in-memory sibling of the powerio-json snapshot and the dense extractors. Arrow.jl +# is an IPC-format reader and does not import the C Data Interface, so we decode the +# two FFI structs here directly: read the schema's child fields and, per column, +# either copy the data buffer into an owned Julia Vector (the default) or wrap it in +# place (`copy=false`, zero copy). # # Reading a foreign buffer is inherently one unsafe op; the design keeps it bounded. # `copy=true` (default) memcpys each column out while the producer is provably alive, # then releases it before returning: only Julia-owned memory escapes, so there is no # finalizer and no use after free if a column outlives the call. `copy=false` returns -# zero-copy views in an `ArrowTable` that holds the producer alive and frees it on -# finalize; the views then carry the standard keep-the-owner-alive caveat. For the -# numeric tables alone, `to_dense` is the copy-free, `unsafe_wrap`-free fast path -# (the C ABI fills Julia-owned buffers directly). +# zero-copy `ArrowColumn` views that each root the shared `ArrowBuffers` owner, so a +# column extracted from its `ArrowTable` keeps the producer alive on its own; the +# buffers free once nothing references them. For the numeric tables alone, +# `to_dense` is the copy-free, `unsafe_wrap`-free fast path (the C ABI fills +# Julia-owned buffers directly). # # The powerio export is the simple case the decoder is scoped to: every column is a # non-nullable primitive (Int64 "l", Float64 "g", UInt8 "C") with no null buffer, so @@ -64,44 +65,84 @@ end const _ARROW_TABLE_IDS = (bus = Cint(0), branch = Cint(1), gen = Cint(2), load = Cint(3), shunt = Cint(4)) +# Release the producer's array and schema (frees the columnar buffers). Each +# release callback NULLs its own struct's `release`, so a second call is a no-op — +# the explicit-finalize-then-GC-finalize path is safe. +function _release_ffi!(arr::Base.RefValue{CArrowArray}, sch::Base.RefValue{CArrowSchema}) + arr[].release == C_NULL || ccall(arr[].release, Cvoid, (Ptr{CArrowArray},), arr) + sch[].release == C_NULL || ccall(sch[].release, Cvoid, (Ptr{CArrowSchema},), sch) + return +end + +# The producer-owned FFI structs and their release callbacks. The one owner the +# zero-copy table AND each of its columns root, so whichever of them stays +# reachable keeps the buffers alive; the finalizer releases once nothing does. +mutable struct ArrowBuffers + array::Base.RefValue{CArrowArray} + schema::Base.RefValue{CArrowSchema} + function ArrowBuffers(array, schema) + b = new(array, schema) + finalizer(x -> _release_ffi!(x.array, x.schema), b) + return b + end +end + """ - ArrowTable + ArrowColumn{T} <: AbstractVector{T} -The zero-copy result of `to_arrow(...; copy=false)`. Its `columns` are a -NamedTuple of vectors that view the producer's buffers directly; the table holds -the producer alive and releases it (frees the buffers) when finalized. (The -default `copy=true` returns a plain NamedTuple of owned Vectors instead, no -`ArrowTable` involved.) +One zero-copy column of `to_arrow(...; copy=false)`: a view into the producer's +buffer that roots the shared `ArrowBuffers` owner, so the column alone keeps the +memory alive — extracting it from its [`ArrowTable`](@ref) is safe. `collect` it +for a plain owned `Vector`. +""" +struct ArrowColumn{T} <: AbstractVector{T} + data::Vector{T} # unsafe_wrap view into the producer's buffer + buffers::ArrowBuffers # roots the producer for the column's lifetime +end +Base.size(c::ArrowColumn) = size(getfield(c, :data)) +Base.IndexStyle(::Type{<:ArrowColumn}) = IndexLinear() +# The raw view must not escape its rooting wrapper: a bare `c.data` does not root +# `buffers`, which is exactly the use after free this type exists to prevent. +Base.getproperty(c::ArrowColumn, name::Symbol) = error( + "PowerIO.ArrowColumn has no public fields; `collect(c)` copies it to an owned Vector") +Base.propertynames(::ArrowColumn) = () +# Preserve `c` (hence its ArrowBuffers) across the read: the wrapped Vector's +# memory is the producer's, not Julia's, so `c` being collectible mid-read would +# let the release finalizer free it. +Base.@propagate_inbounds Base.getindex(c::ArrowColumn, i::Int) = + GC.@preserve c getfield(c, :data)[i] + +""" + ArrowTable -Keep the `ArrowTable` reachable while you use its columns: a column kept after -the table is garbage collected points into freed memory. Copy a column -(`collect(t.x)`) to outlive the table. +The zero-copy result of `to_arrow(...; copy=false)`: a NamedTuple of +[`ArrowColumn`](@ref) views into the producer's buffers, behind property access +(`t.id`, `t.from`, ...). Every property name resolves to a column — including +`t.columns`, which would look up a column called `columns` — so the NamedTuple +itself comes from the unexported accessor `PowerIO.columns(t)`. The columns and +the table each root the shared buffer owner, which frees the buffers once none +of them is reachable; a column extracted from the table is safe on its own. +`close(t)` frees the buffers eagerly instead of waiting for GC. (The default +`copy=true` returns a plain NamedTuple of owned Vectors instead, no `ArrowTable` +involved.) """ -mutable struct ArrowTable +struct ArrowTable columns::NamedTuple - _array::Base.RefValue{CArrowArray} - _schema::Base.RefValue{CArrowSchema} - function ArrowTable(columns, array, schema) - t = new(columns, array, schema) - finalizer(_release!, t) - return t - end + _buffers::ArrowBuffers end columns(t::ArrowTable) = getfield(t, :columns) Base.getproperty(t::ArrowTable, name::Symbol) = getfield(getfield(t, :columns), name) Base.propertynames(t::ArrowTable) = propertynames(getfield(t, :columns)) -# Release the producer's array and schema (frees the columnar buffers). Each -# release callback NULLs its own struct's `release`, so a second call is a no-op — -# the explicit-finalize-then-GC-finalize path is safe. -function _release_ffi!(arr::Base.RefValue{CArrowArray}, sch::Base.RefValue{CArrowSchema}) - arr[].release == C_NULL || ccall(arr[].release, Cvoid, (Ptr{CArrowArray},), arr) - sch[].release == C_NULL || ccall(sch[].release, Cvoid, (Ptr{CArrowSchema},), sch) - return -end +""" + close(t::ArrowTable) -_release!(t::ArrowTable) = _release_ffi!(getfield(t, :_array), getfield(t, :_schema)) +Release the producer's buffers now instead of at GC. Every [`ArrowColumn`](@ref) +of `t` is invalid afterwards; reading one is undefined behavior. Idempotent (the +release callbacks NULL themselves). +""" +Base.close(t::ArrowTable) = (finalize(getfield(t, :_buffers)); nothing) # Read one primitive column. The data pointer is borrowed from the producer (valid # until release). `copy=true` memcpys it into an owned Vector under `GC.@preserve` so @@ -146,8 +187,9 @@ function _decode_arrow(arr::Base.RefValue{CArrowArray}, sch::Base.RefValue{CArro end # Export one table off a live handle over the Arrow C Data Interface, shared by the -# Network and path methods of `to_arrow`. -function _arrow_from_handle(p::Ptr{Cvoid}, table::Symbol, copy::Bool) +# Network and path methods of `to_arrow`. Takes the handle and preserves it across +# the ccall (see `_normalize_handle` for why the raw pointer never travels alone). +function _arrow_from_handle(h::NetworkHandle, table::Symbol, copy::Bool) id = get(_ARROW_TABLE_IDS, table, nothing) id === nothing && throw(ArgumentError( "PowerIO.to_arrow: unknown table $(repr(table)); expected one of $(keys(_ARROW_TABLE_IDS))")) @@ -155,29 +197,33 @@ function _arrow_from_handle(p::Ptr{Cvoid}, table::Symbol, copy::Bool) sch = Ref(_zero(CArrowSchema)) err = zeros(UInt8, _ERRLEN) rc = try - ccall((:pio_export_arrow, _lib()), Cint, + GC.@preserve h ccall((:pio_to_arrow, _lib()), Cint, (Ptr{Cvoid}, Cint, Ptr{CArrowArray}, Ptr{CArrowSchema}, Ptr{UInt8}, Csize_t), - p, id, arr, sch, err, length(err)) + h.ptr, id, arr, sch, err, length(err)) catch e - error("PowerIO.to_arrow: could not call pio_export_arrow: the C ABI at " * - "\"$(_lib())\" was built without the arrow feature. Rebuild with " * - "`cargo build -p powerio-capi --release --features arrow`. Underlying: $e") + _feature_call_error("to_arrow", "pio_to_arrow", "arrow", e) end rc == 0 || error("PowerIO.to_arrow: " * _cstr(err)) - # Export succeeded, so the producer set live release callbacks. If decoding - # throws (a contract violation: unknown format code, child count mismatch), - # release here so the buffers don't leak. - cols = try - _decode_arrow(arr, sch; copy=copy) - catch + if copy + # The columns are owned copies: release the producer before returning, and + # on a decode error (a contract violation: unknown format code, child count + # mismatch) release too so the buffers don't leak. + cols = try + _decode_arrow(arr, sch; copy=true) + catch + _release_ffi!(arr, sch) + rethrow() + end _release_ffi!(arr, sch) - rethrow() + return cols end - # copy=true: the columns are owned, so free the producer now and hand back plain - # Vectors. copy=false: the ArrowTable owns the producer and releases on finalize. - copy || return ArrowTable(cols, arr, sch) - _release_ffi!(arr, sch) - return cols + # Zero copy: hand ownership to ArrowBuffers FIRST — from here its finalizer + # releases the producer even if decoding throws — then wrap each view so every + # column roots the owner on its own. + buffers = ArrowBuffers(arr, sch) + cols = _decode_arrow(arr, sch; copy=false) + rooted = map(v -> ArrowColumn(v, buffers), cols) + return ArrowTable(rooted, buffers) end """ @@ -192,18 +238,21 @@ powerio-capi built `--features arrow`; see [`arrow_available`](@ref). `copy=true` (default) returns a NamedTuple of **owned** Julia Vectors and releases the producer before returning: plain arrays, no lifetime caveat. `copy=false` -returns a zero-copy [`ArrowTable`](@ref) whose columns view the producer's buffers; -keep it alive while reading them. Both support `result.` access and are -Tables.jl-shaped. For the numeric tables alone, [`to_dense`](@ref) is a copy-free, +returns a zero-copy [`ArrowTable`](@ref) of [`ArrowColumn`](@ref) views; each +column roots the shared buffers, so columns may outlive the table, and +`close(t)` frees the buffers eagerly. Both support `result.` access, +but only the `copy=true` NamedTuple is Tables.jl-shaped (flows into +`Arrow.write`, `DataFrame`, etc.); `collect` a zero-copy column for an owned +Vector. For the numeric tables alone, [`to_dense`](@ref) is a copy-free, `unsafe_wrap`-free fast path. """ function to_arrow(net::Network, table::Symbol; copy::Bool=true) - return _arrow_from_handle(_live_handle(net, "to_arrow").ptr, table, copy) + return _arrow_from_handle(_live_handle(net, "to_arrow"), table, copy) end function to_arrow(path::AbstractString, table::Symbol; from=nothing, copy::Bool=true) h = _parse_handle(path; from=from) try - return _arrow_from_handle(h.ptr, table, copy) + return _arrow_from_handle(h, table, copy) finally # The exported buffers are owned by the Arrow array, independent of the # network handle — free the handle now. @@ -214,19 +263,7 @@ end """ arrow_available() -> Bool -True if the resolved C library exports `pio_export_arrow` (built `--features +True if the resolved C library exports `pio_to_arrow` (built `--features arrow`). """ -function arrow_available() - try - handle = Libdl.dlopen(_lib()) - try - return Libdl.dlsym(handle, :pio_export_arrow; throw_error=false) !== nothing - finally - Libdl.dlclose(handle) - end - catch e - @debug "PowerIO: arrow_available probe failed" exception = (e, catch_backtrace()) - return false - end -end +arrow_available() = _exports_symbol(:pio_to_arrow) diff --git a/src/gridfm.jl b/src/gridfm.jl index 72b4fe2..5b3e3b1 100644 --- a/src/gridfm.jl +++ b/src/gridfm.jl @@ -1,12 +1,12 @@ # gridfm-datakit Parquet reader over the C ABI. # -# `pio_read_gridfm` (powerio-capi built `--features gridfm`) reads a gridfm-datakit -# Parquet dataset back into a network handle — the inverse of the gridfm writer, the -# ML→classical return leg. The reader itself lives in powerio-matrix, so it ships only -# when the C ABI is built with the gridfm feature; `gridfm_available()` probes the symbol -# (the mirror of `arrow_available`). The read is lossy but power-flow-complete; what the -# schema couldn't round-trip comes back in `warnings`, the same `\n`-joined warn buffer -# `convert_file` uses. +# `pio_read_dir` with the `gridfm` format string (powerio-capi built `--features +# gridfm`) reads a gridfm-datakit Parquet dataset back into a network handle — the +# inverse of the gridfm writer, the ML→classical return leg. The reader itself lives +# in powerio-matrix, so it ships only when the C ABI is built with the gridfm feature; +# `gridfm_available()` probes the symbol (the mirror of `arrow_available`). The read is +# lossy but power-flow-complete; what the schema couldn't round-trip attaches to the +# handle, like every constructor's warnings (query with `warnings(net)`). """ read_gridfm(dir; scenario=0) -> (; network::Network, scenario::Int, warnings::Vector{String}) @@ -18,10 +18,11 @@ the parquet files, a `/` directory with a `raw/` child, or a parent with o default). The read is lossy but power-flow-complete: it recovers bus types, voltages and limits, -nodal load and shunt totals, generator dispatch and bounds, branch +per-bus load and shunt totals, generator dispatch and bounds, branch `r/x/b/tap/shift/rate_a`/angle-limits, and `base_mva` — enough to write a runnable case — but not original bus ids (synthesized `1..n`), per-element load/shunt granularity, -piecewise/cubic costs, or HVDC/storage. What it can't recover is listed in `warnings`. +piecewise/cubic costs, or HVDC/storage. What it can't recover is listed in `warnings` +(also available later as [`warnings`](@ref)`(network)`). The returned `network` carries a live Rust handle, so the `to_*` transforms work on it. Needs powerio-capi built `--features gridfm`; see [`gridfm_available`](@ref). For every @@ -29,22 +30,18 @@ scenario in a batch use [`read_gridfm_scenarios`](@ref). """ function read_gridfm(dir::AbstractString; scenario::Integer=0) _ensure_compatible() - warn = zeros(UInt8, _ERRLEN) err = zeros(UInt8, _ERRLEN) ptr = try - ccall((:pio_read_gridfm, _lib()), Ptr{Cvoid}, - (Cstring, Int64, Ptr{UInt8}, Csize_t, Ptr{UInt8}, Csize_t), - String(dir), Int64(scenario), warn, length(warn), err, length(err)) + ccall((:pio_read_dir, _lib()), Ptr{Cvoid}, + (Cstring, Cstring, Int64, Ptr{UInt8}, Csize_t), + String(dir), "gridfm", Int64(scenario), err, length(err)) catch e - error("PowerIO.read_gridfm: could not call pio_read_gridfm: the C ABI at " * - "\"$(_lib())\" was built without the gridfm feature. Rebuild with " * - "`cargo build -p powerio-capi --release --features gridfm`. Underlying: $e") + _feature_call_error("read_gridfm", "pio_read_dir", "gridfm", e) end ptr == C_NULL && error("PowerIO.read_gridfm: " * _cstr(err)) h = NetworkHandle(ptr) net = Network(JSON3.read(_to_json(h)), h) - warnings = String.(filter(!isempty, split(_cstr(warn), '\n'))) - return (; network = net, scenario = Int(scenario), warnings = warnings) + return (; network = net, scenario = Int(scenario), warnings = warnings(net)) end """ @@ -60,28 +57,31 @@ function read_gridfm_scenarios(dir::AbstractString) return [read_gridfm(dir; scenario = id) for id in _gridfm_scenario_ids(dir)] end -# The dataset's distinct scenario ids (ascending), via `pio_gridfm_scenario_ids`: a -# zero-capacity probe returns the count, then a second call fills the buffer (the count / -# caller-buffer pattern the dense extractors use). +# The dataset's distinct scenario ids (ascending), via `pio_scenario_ids` with the +# `gridfm` format string: a zero-capacity probe returns the count, then a second +# call fills the buffer (the cap/count pattern every extractor uses). function _gridfm_scenario_ids(dir::AbstractString) err = zeros(UInt8, _ERRLEN) d = String(dir) count = try - ccall((:pio_gridfm_scenario_ids, _lib()), Cptrdiff_t, - (Cstring, Ptr{Int64}, Csize_t, Ptr{UInt8}, Csize_t), - d, C_NULL, 0, err, length(err)) + ccall((:pio_scenario_ids, _lib()), Cptrdiff_t, + (Cstring, Cstring, Ptr{Int64}, Csize_t, Ptr{UInt8}, Csize_t), + d, "gridfm", C_NULL, 0, err, length(err)) catch e - error("PowerIO.read_gridfm_scenarios: could not call pio_gridfm_scenario_ids: the " * - "C ABI at \"$(_lib())\" was built without the gridfm feature. Rebuild with " * - "`cargo build -p powerio-capi --release --features gridfm`. Underlying: $e") + _feature_call_error("read_gridfm_scenarios", "pio_scenario_ids", "gridfm", e) end count < 0 && error("PowerIO.read_gridfm_scenarios: " * _cstr(err)) ids = Vector{Int64}(undef, count) if count > 0 - n = ccall((:pio_gridfm_scenario_ids, _lib()), Cptrdiff_t, - (Cstring, Ptr{Int64}, Csize_t, Ptr{UInt8}, Csize_t), - d, ids, length(ids), err, length(err)) + n = ccall((:pio_scenario_ids, _lib()), Cptrdiff_t, + (Cstring, Cstring, Ptr{Int64}, Csize_t, Ptr{UInt8}, Csize_t), + d, "gridfm", ids, length(ids), err, length(err)) n < 0 && error("PowerIO.read_gridfm_scenarios: " * _cstr(err)) + # Unlike the dense extractors' immutable handle, both calls re-read the + # filesystem, so the count genuinely can change between probe and fill — + # and a short fill would leave heap garbage in the tail of `ids`. + Int(n) == count || error("PowerIO.read_gridfm_scenarios: scenario count " * + "changed between probe and fill ($n vs $count)") end return ids end @@ -89,18 +89,6 @@ end """ gridfm_available() -> Bool -True if the resolved C library exports `pio_read_gridfm` (built `--features gridfm`). +True if the resolved C library exports `pio_read_dir` (built `--features gridfm`). """ -function gridfm_available() - try - handle = Libdl.dlopen(_lib()) - try - return Libdl.dlsym(handle, :pio_read_gridfm; throw_error=false) !== nothing - finally - Libdl.dlclose(handle) - end - catch e - @debug "PowerIO: gridfm_available probe failed" exception = (e, catch_backtrace()) - return false - end -end +gridfm_available() = _exports_symbol(:pio_read_dir) diff --git a/test/runtests.jl b/test/runtests.jl index e95426e..7b3d9ee 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,6 +74,11 @@ using JSON3 @test PowerIO.n_buses(net_from_json) == 14 @test PowerIO.source_format(net_from_json) == "Matpower" + # The snapshot is also a first-class format name (ABI 4: no format + # symbols), and the MATPOWER reader is total: no handle warnings. + @test PowerIO.n_buses(parse_str(to_json(net), "powerio-json")) == 14 + @test isempty(PowerIO.warnings(net)) + # EGRET and PowerModels both use .json (fixtures produced by convert_file). # The positive cases confirm each fixture parses under its own format; the # negative cases prove `from` overrides inference, since forcing the wrong @@ -242,8 +247,8 @@ using JSON3 @test (d.n, d.m, d.ng) == (14, 20, 5) @test d.base_mva == 100.0 @test d.bus_ids == collect(1:14) # case14 buses are 1..14 - @test d.reference_bus == 0 # dense 0-based index of the REF bus - @test d.n_components == 1 + @test d.ref_bus_index == 0 # dense 0-based INDEX of the REF bus + @test d.n_islands == 1 @test d.is_radial == false # case14 has loops @test length(d.branch.from) == 20 && length(d.branch.x) == 20 @test all(>(0), d.branch.x) # reactances are positive @@ -289,14 +294,34 @@ using JSON3 @test_throws ArgumentError to_arrow(m, :nonesuch) - # copy=false: the zero-copy ArrowTable path, same values; releasing it - # (finalizer) must not fault, and a second finalize is a no-op. + # copy=false: the zero-copy ArrowTable path, same values. A column + # extracted from the table roots the shared buffers on its own, so + # it survives the table being collected (the old footgun). z = to_arrow(m, :bus; copy=false) @test z isa ArrowTable @test z.id == collect(1:14) - finalize(z) - finalize(z) - @test true + @test z.id isa PowerIO.ArrowColumn{Int64} + @test PowerIO.columns(z) isa NamedTuple + # The raw unsafe_wrap view must not escape its rooting wrapper. + @test_throws ErrorException z.id.data + @test collect(z.id) isa Vector{Int64} + col = z.id + z = nothing + GC.gc(); GC.gc() + @test col == collect(1:14) + col = nothing + GC.gc() + + # close releases the producer eagerly: both release callbacks NULL + # themselves, so a second close (and the later GC finalize) is a + # no-op. + z2 = to_arrow(m, :bus; copy=false) + b = getfield(z2, :_buffers) + @test b.array[].release != C_NULL + close(z2) + @test b.array[].release == C_NULL + @test b.schema[].release == C_NULL + close(z2) end end @@ -314,6 +339,9 @@ using JSON3 @test r.scenario == 0 @test r.warnings isa Vector{String} @test !isempty(r.warnings) + # ABI 4: the lossy read's warnings attach to the handle itself, and + # the documented bus id synthesis is itemized there. + @test any(w -> occursin("synthesized bus ids", w), PowerIO.warnings(r.network)) @test PowerIO.n_buses(r.network) == 14 @test PowerIO.n_branches(r.network) == 20 @test PowerIO.n_gens(r.network) == 5