spotfiresoftware · stewjb · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,13 @@ __pycache__/
 
 # virtual environments
 /venv/
+/.venv/
+
+# uv lock file (this is a library; lock files are for applications)
+/uv.lock
+
+# Claude Code
+/.claude
 
 # PyCharm project files
 /.idea

diff --git a/README.md b/README.md
@@ -20,7 +20,16 @@ simply `spotfire`) to include the required Python packages to support optional f
 | `spotfire[plot-matplotlib]` | Plotting support using just `matplotlib`     |
 | `spotfire[plot-pil]`        | Plotting support using just `Pillow`         |
 | `spotfire[plot-seaborn]`    | Plotting support using just `seaborn`        |
+| `spotfire[polars]`          | Polars DataFrame support                     |
 | `spotfire[dev,lint]`        | Internal development                         |
 
+Once installed, `export_data()` accepts `polars.DataFrame` and `polars.Series` directly, and
+`import_data()` can return a `polars.DataFrame` via `output_format="polars"`.
+
+> **Note for Spotfire data functions:** Spotfire's bundled Python interpreter does not include
+> Polars. To use Polars inside a data function, configure Spotfire to use a custom Python
+> environment that has `polars` installed. Polars is a large binary package (~44 MB), so
+> Spotfire Packages (SPKs) that bundle it will be significantly larger than typical packages.
+
 ### License
 BSD-type 3-Clause License.  See the file ```LICENSE``` included in the package.
diff --git a/pyproject.toml b/pyproject.toml
@@ -54,9 +54,13 @@ plot-seaborn = [
     "seaborn >= 0.13.2",
 ]
 plot = [ "spotfire[plot-matplotlib,plot-pil,plot-seaborn]" ]
+# Polars support
+polars = [
+    "polars >= 0.20.0",
+]
 # Development requirements
 dev = [
-    "spotfire[geo,plot]",
+    "spotfire[geo,plot,polars]",
     "Cython >= 3.0.4",
     "html-testRunner",
 ]

diff --git a/spotfire/sbdf.pyi b/spotfire/sbdf.pyi
@@ -13,6 +13,10 @@ class SBDFError(Exception): ...
 class SBDFWarning(Warning): ...
 
 def spotfire_typename_to_valuetype_id(typename: str) -> typing.Optional[int]: ...
-def import_data(sbdf_file: _FilenameLike): ...
+@typing.overload
+def import_data(sbdf_file: _FilenameLike, output_format: typing.Literal["pandas"] = ...) -> pd.DataFrame: ...
+@typing.overload
+def import_data(sbdf_file: _FilenameLike, output_format: typing.Literal["polars"]) -> typing.Any: ...
+def import_data(sbdf_file: _FilenameLike, output_format: str = "pandas") -> typing.Any: ...
 def export_data(obj: typing.Any, sbdf_file: _FilenameLike, default_column_name: str = "x",
                 rows_per_slice: int = 0, encoding_rle: bool = True) -> None: ...
diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
@@ -55,6 +55,11 @@ try:
 except ImportError:
     PIL = None
 
+try:
+    import polars as pl
+except ImportError:
+    pl = None
+
 
 # Various utility helper functions for doing things that are problematic in PYX files
 include "sbdf_helpers.pxi"
@@ -420,6 +425,18 @@ cdef class _ImportContext:
         """
         return _valuetype_id_to_spotfire_typename(self.value_type.id)
 
+    cpdef bint is_object_numpy_type(self):
+        """Return True if the numpy type for this column is NPY_OBJECT.
+
+        :return: True if the numpy type is object, False otherwise
+
+        .. note:: ``numpy_type_num`` is a ``cdef`` attribute and is therefore inaccessible from
+                  Python-side ``cdef object`` functions.  This ``cpdef`` wrapper exposes it so that
+                  :func:`_import_build_polars_dataframe` can branch on it without touching the
+                  Cython-only attribute directly.
+        """
+        return self.numpy_type_num == np_c.NPY_OBJECT
+
 
 # Individual functions for importing each value type.
 ctypedef int(*importer_fn)(_ImportContext, sbdf_c.sbdf_columnslice*)
@@ -654,13 +671,87 @@ cdef dict _import_metadata(sbdf_c.sbdf_metadata_head* md, int column_num):
     return metadata
 
 
-def import_data(sbdf_file):
-    """Import data from an SBDF file and create a 'pandas' DataFrame.
+cdef object _import_polars_dtype(_ImportContext context):
+    """Return the Polars dtype corresponding to the SBDF value type in the import context.
+
+    :param context: import context for a column
+    :return: the Polars dtype object
+    """
+    vt_id = context.value_type.id
+    if vt_id == sbdf_c.SBDF_BOOLTYPEID:
+        return pl.Boolean
+    elif vt_id == sbdf_c.SBDF_INTTYPEID:
+        return pl.Int32
+    elif vt_id == sbdf_c.SBDF_LONGTYPEID:
+        return pl.Int64
+    elif vt_id == sbdf_c.SBDF_FLOATTYPEID:
+        return pl.Float32
+    elif vt_id == sbdf_c.SBDF_DOUBLETYPEID:
+        return pl.Float64
+    elif vt_id == sbdf_c.SBDF_STRINGTYPEID:
+        return pl.Utf8
+    elif vt_id == sbdf_c.SBDF_DATETIMETYPEID:
+        return pl.Datetime
+    elif vt_id == sbdf_c.SBDF_DATETYPEID:
+        return pl.Date
+    elif vt_id == sbdf_c.SBDF_TIMETYPEID:
+        return pl.Time
+    elif vt_id == sbdf_c.SBDF_TIMESPANTYPEID:
+        return pl.Duration
+    elif vt_id == sbdf_c.SBDF_BINARYTYPEID:
+        return pl.Binary
+    elif vt_id == sbdf_c.SBDF_DECIMALTYPEID:
+        return pl.Decimal
+    else:
+        raise SBDFError(f"unsupported SBDF value type id {vt_id} for Polars output")
+
+
+cdef object _import_build_polars_dataframe(column_names, importer_contexts):
+    """Build a Polars DataFrame directly from import context data, with no Pandas intermediary.
+
+    :param column_names: list of column name strings
+    :param importer_contexts: list of _ImportContext objects
+    :return: a Polars DataFrame
+    """
+    series_list = []
+    for i, name in enumerate(column_names):
+        context = importer_contexts[i]
+        values = context.get_values_array()
+        invalids = context.get_invalid_array()
+        polars_dtype = _import_polars_dtype(context)
+
+        if context.is_object_numpy_type():
+            # Object arrays hold Python objects (str, date, datetime, etc.); Polars cannot
+            # construct a typed series from a numpy object array directly — use a Python list.
+            values_list = values.tolist()
+            if invalids.any():
+                for idx in np.where(invalids)[0]:
+                    values_list[idx] = None
+            col = pl.Series(name=name, values=values_list, dtype=polars_dtype)
+        else:
+            # Numeric arrays: numpy → Polars Series directly, then scatter nulls if needed.
+            col = pl.Series(name=name, values=values, dtype=polars_dtype)
+            if invalids.any():
+                indices = np.where(invalids)[0].tolist()
+                col = col.scatter(indices, None)
+
+        series_list.append(col)
+
+    return pl.DataFrame(series_list)
+
+
+def import_data(sbdf_file, output_format="pandas"):
+    """Import data from an SBDF file and create a DataFrame.
 
     :param sbdf_file: the filename of the SBDF file to import
+    :param output_format: the format of the returned DataFrame; either 'pandas' (default) or 'polars'
     :return: the DataFrame containing the imported data
     :raises SBDFError: if a problem is encountered during import
     """
+    # Validate output_format before opening the file so we fail fast on bad input.
+    if output_format not in ("pandas", "polars"):
+        raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'")
+
     cdef int error, i
     cdef stdio.FILE* input_file = NULL
     cdef int major_v, minor_v
@@ -774,7 +865,16 @@ def import_data(sbdf_file):
         if error != sbdf_c.SBDF_OK and error != sbdf_c.SBDF_TABLEEND:
             raise SBDFError(f"error reading '{sbdf_file}': {sbdf_c.sbdf_err_get_str(error).decode('utf-8')}")
 
-        # Build a new DataFrame with the results
+        # Short-circuit before pd.concat to avoid the Pandas intermediary entirely.
+        # This keeps the import zero-copy for large DataFrames: numpy arrays collected
+        # by each _ImportContext go straight into Polars Series without ever becoming
+        # a Pandas DataFrame.
+        if output_format == "polars":
+            if pl is None:
+                raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'")
+            return _import_build_polars_dataframe(column_names, importer_contexts)
+
+        # Build a new Pandas DataFrame with the results
         imported_columns = []
         for i in range(num_columns):
             column_series = pd.Series(importer_contexts[i].get_values_array(),
@@ -1030,6 +1130,149 @@ cdef _export_obj_series(obj, default_column_name):
     return {}, [column_name], [column_metadata], [context]
 
 
+cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
+    """Determine a value type for a data set based on the Polars dtype for the series.
+
+    :param dtype: the Polars dtype object
+    :param series_description: description of series (for error reporting)
+    :return: the integer value type id representing the type of series
+    :raise SBDFError: if the dtype is unknown
+    """
+    # Use __class__.__name__ rather than isinstance() checks.  Polars dtype objects are
+    # not ordinary Python classes resolvable at Cython compile time, so isinstance() would
+    # require importing the exact dtype class — which breaks when Polars isn't installed.
+    # Class name strings are stable across the Polars versions we support (>= 0.20).
+    dtype_name = dtype.__class__.__name__
+    if dtype_name == "Boolean":
+        return sbdf_c.SBDF_BOOLTYPEID
+    elif dtype_name in ("Int8", "Int16", "Int32", "UInt8", "UInt16"):
+        return sbdf_c.SBDF_INTTYPEID
+    elif dtype_name in ("Int64", "UInt32", "UInt64"):
+        if dtype_name == "UInt64":
+            warnings.warn(f"Polars UInt64 type in {series_description} will be exported as LongInteger (signed "
+                          f"64-bit); values above 9,223,372,036,854,775,807 will overflow", SBDFWarning)
+        return sbdf_c.SBDF_LONGTYPEID
+    elif dtype_name == "Float32":
+        return sbdf_c.SBDF_FLOATTYPEID
+    elif dtype_name == "Float64":
+        return sbdf_c.SBDF_DOUBLETYPEID
+    elif dtype_name in ("Utf8", "String"):
+        return sbdf_c.SBDF_STRINGTYPEID
+    elif dtype_name == "Date":
+        return sbdf_c.SBDF_DATETYPEID
+    elif dtype_name == "Datetime":
+        if getattr(dtype, 'time_zone', None) is not None:
+            warnings.warn(f"Polars Datetime type in {series_description} has timezone '{dtype.time_zone}'; "
+                          f"timezone information will not be preserved in SBDF", SBDFWarning)
+        return sbdf_c.SBDF_DATETIMETYPEID
+    elif dtype_name == "Duration":
+        return sbdf_c.SBDF_TIMESPANTYPEID
+    elif dtype_name == "Time":
+        return sbdf_c.SBDF_TIMETYPEID
+    elif dtype_name == "Binary":
+        return sbdf_c.SBDF_BINARYTYPEID
+    elif dtype_name == "Decimal":
+        warnings.warn(f"Polars Decimal type in {series_description} export is experimental; "
+                      f"precision may not be fully preserved", SBDFWarning)
+        return sbdf_c.SBDF_DECIMALTYPEID
+    elif dtype_name in ("Categorical", "Enum"):
+        # SBDF has no categorical type; export as String
+        warnings.warn(f"Polars {dtype_name} type in {series_description} will be exported as String; "
+                      f"category information will not be preserved", SBDFWarning)
+        return sbdf_c.SBDF_STRINGTYPEID
+    elif dtype_name == "Null":
+        # pl.Series([None, None]) has dtype Null when no type can be inferred.  Export as
+        # String; _export_polars_series_to_numpy produces a placeholder array and the
+        # invalids mask marks every row missing, so the stored values are never read.
+        return sbdf_c.SBDF_STRINGTYPEID
+    else:
+        raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}")
+
+
+cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series):
+    """Convert a Polars Series to a NumPy array suitable for the SBDF exporter.
+
+    :param context: export context holding the resolved value type
+    :param series: Polars Series to convert
+    :return: NumPy ndarray of values
+    """
+    dtype_name = series.dtype.__class__.__name__
+    if dtype_name == "Null":
+        # A Null-dtype series has no values to convert; return a same-length placeholder array.
+        # The invalids mask (set by the caller via series.is_null()) marks every row as missing,
+        # so the placeholder values are never read by the SBDF writer.
+        return np.full(len(series), None, dtype=object)
+    if dtype_name in ("Categorical", "Enum"):
+        # Cast to String so .to_numpy() returns plain Python strings
+        series = series.cast(pl.Utf8)
+        dtype_name = "Utf8"
+    if dtype_name in ("Date", "Time"):
+        # The Date/Time exporters require Python date/time objects;
+        # Polars .to_numpy() returns numpy datetime64/int64 which those exporters do not accept.
+        return np.asarray(series.to_list(), dtype=object)
+    if dtype_name in ("Datetime", "Duration"):
+        # Keep native datetime64/timedelta64 arrays; the invalids mask handles nulls (NaT cells
+        # are marked invalid and ignored by the SBDF writer).  Boxing to object would be slower.
+        return series.to_numpy(allow_copy=True)
+    na_value = context.get_numpy_na_value()
+    if na_value is not None:
+        return np.asarray(series.fill_null(na_value).to_numpy(allow_copy=True),
+                          dtype=context.get_numpy_dtype())
+    else:
-    else:
+    else:
+        # For Datetime/Duration, keep native NumPy datetime64/timedelta64 dtypes instead of boxing to object.
+        if dtype_name in ("Datetime", "Duration"):
+            return series.to_numpy(allow_copy=True)
-    else:
+    else:
+        # For Datetime/Duration, keep native NumPy datetime64/timedelta64 dtypes instead of boxing to object.
+        if dtype_name in ("Datetime", "Duration"):
+            return series.to_numpy(allow_copy=True)
+        return np.asarray(series.to_numpy(allow_copy=True), dtype=object)
+
+
+cdef _export_obj_polars_dataframe(obj):
+    """Extract column information for a Polars ``DataFrame``.
+
+    :param obj: Polars DataFrame object to export
+    :return: tuple containing dictionary of table metadata, list of column names, list of dictionaries of column
+              metadata, and list of export context objects
+    """
+    if len(set(obj.columns)) != len(obj.columns):
+        raise SBDFError("obj does not have unique column names")
+
+    column_names = []
+    column_metadata = []
+    exporter_contexts = []
+    for col in obj.columns:
+        series = obj[col]
+        column_names.append(col)
+        context = _ExportContext()
+        context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(series.dtype, f"column '{col}'"))
+        if series.dtype.__class__.__name__ in ("Float32", "Float64"):
+            invalids = (series.is_null() | series.is_nan()).to_numpy()
+        else:
+            invalids = series.is_null().to_numpy()
+        context.set_arrays(_export_polars_series_to_numpy(context, series), invalids)
+        column_metadata.append({})
+        exporter_contexts.append(context)
+
+    return {}, column_names, column_metadata, exporter_contexts
+
+
+cdef _export_obj_polars_series(obj, default_column_name):
+    """Extract column information for a Polars ``Series``.
+
+    :param obj: Polars Series object to export
+    :param default_column_name: column name to use when obj does not have a name
+    :return: tuple containing dict of table metadata, list of column names, list of dicts of column metadata, and
+              list of export context objects
+    """
+    column_name = obj.name if obj.name else default_column_name
+    description = f"series '{obj.name}'" if obj.name else "series"
+
+    context = _ExportContext()
+    context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(obj.dtype, description))
+    if obj.dtype.__class__.__name__ in ("Float32", "Float64"):
+        invalids = (obj.is_null() | obj.is_nan()).to_numpy()
+    else:
+        invalids = obj.is_null().to_numpy()
+    context.set_arrays(_export_polars_series_to_numpy(context, obj), invalids)
+
+    return {}, [column_name], [{}], [context]
+
+
 cdef _export_obj_numpy(np_c.ndarray obj, default_column_name):
     """Extract column information for a NumPy ``ndarray``.
 
@@ -1801,8 +2044,14 @@ def export_data(obj, sbdf_file, default_column_name="x", Py_ssize_t rows_per_sli
 
     try:
         # Extract data and metadata from obj
+        # Polars DataFrames (tabular)
+        if pl is not None and isinstance(obj, pl.DataFrame):
+            exported = _export_obj_polars_dataframe(obj)
+        # Polars Series (columnar)
+        elif pl is not None and isinstance(obj, pl.Series):
+            exported = _export_obj_polars_series(obj, default_column_name)
         # Pandas DataFrames (tabular)
-        if isinstance(obj, pd.DataFrame):
+        elif isinstance(obj, pd.DataFrame):
             exported = _export_obj_dataframe(obj)
         # Pandas Series (columnar)
         elif isinstance(obj, pd.Series):