From 4868db9bc4408063ccf0968525de49e41f730859 Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 20:06:54 -0500 Subject: [PATCH 01/10] feat: polars functionality --- .gitignore | 7 +++ pyproject.toml | 6 +- spotfire/sbdf.pyi | 2 +- spotfire/sbdf.pyx | 126 ++++++++++++++++++++++++++++++++++++- spotfire/test/test_sbdf.py | 77 +++++++++++++++++++++++ 5 files changed, 213 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 8b22a61..9f0e1c7 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,13 @@ __pycache__/ # virtual environments /venv/ +/.venv/ + +# uv lock file (this is a library; lock files are for applications) +/uv.lock + +# Claude Code +/.claude # PyCharm project files /.idea diff --git a/pyproject.toml b/pyproject.toml index 9b68bf9..4588961 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,9 +54,13 @@ plot-seaborn = [ "seaborn >= 0.13.2", ] plot = [ "spotfire[plot-matplotlib,plot-pil,plot-seaborn]" ] +# Polars support +polars = [ + "polars >= 0.20.0", +] # Development requirements dev = [ - "spotfire[geo,plot]", + "spotfire[geo,plot,polars]", "Cython >= 3.0.4", "html-testRunner", ] diff --git a/spotfire/sbdf.pyi b/spotfire/sbdf.pyi index 625aff6..80d8fc4 100644 --- a/spotfire/sbdf.pyi +++ b/spotfire/sbdf.pyi @@ -13,6 +13,6 @@ class SBDFError(Exception): ... class SBDFWarning(Warning): ... def spotfire_typename_to_valuetype_id(typename: str) -> typing.Optional[int]: ... -def import_data(sbdf_file: _FilenameLike): ... +def import_data(sbdf_file: _FilenameLike, output_format: str = "pandas") -> typing.Any: ... def export_data(obj: typing.Any, sbdf_file: _FilenameLike, default_column_name: str = "x", rows_per_slice: int = 0, encoding_rle: bool = True) -> None: ... diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index 2f005bf..ff10672 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -55,6 +55,11 @@ try: except ImportError: PIL = None +try: + import polars as pl +except ImportError: + pl = None + # Various utility helper functions for doing things that are problematic in PYX files include "sbdf_helpers.pxi" @@ -654,10 +659,11 @@ cdef dict _import_metadata(sbdf_c.sbdf_metadata_head* md, int column_num): return metadata -def import_data(sbdf_file): - """Import data from an SBDF file and create a 'pandas' DataFrame. +def import_data(sbdf_file, output_format="pandas"): + """Import data from an SBDF file and create a DataFrame. :param sbdf_file: the filename of the SBDF file to import + :param output_format: the format of the returned DataFrame; either 'pandas' (default) or 'polars' :return: the DataFrame containing the imported data :raises SBDFError: if a problem is encountered during import """ @@ -812,6 +818,10 @@ def import_data(sbdf_file): with warnings.catch_warnings(): warnings.simplefilter("ignore") dataframe.spotfire_table_metadata = table_metadata + if output_format == "polars": + if pl is None: + raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'") + return pl.from_pandas(dataframe) return dataframe finally: @@ -1030,6 +1040,110 @@ cdef _export_obj_series(obj, default_column_name): return {}, [column_name], [column_metadata], [context] +cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): + """Determine a value type for a data set based on the Polars dtype for the series. + + :param dtype: the Polars dtype object + :param series_description: description of series (for error reporting) + :return: the integer value type id representing the type of series + :raise SBDFError: if the dtype is unknown + """ + dtype_name = dtype.__class__.__name__ + if dtype_name == "Boolean": + return sbdf_c.SBDF_BOOLTYPEID + elif dtype_name in ("Int8", "Int16", "Int32", "UInt8", "UInt16"): + return sbdf_c.SBDF_INTTYPEID + elif dtype_name in ("Int64", "UInt32", "UInt64"): + return sbdf_c.SBDF_LONGTYPEID + elif dtype_name == "Float32": + return sbdf_c.SBDF_FLOATTYPEID + elif dtype_name == "Float64": + return sbdf_c.SBDF_DOUBLETYPEID + elif dtype_name in ("Utf8", "String"): + return sbdf_c.SBDF_STRINGTYPEID + elif dtype_name == "Date": + return sbdf_c.SBDF_DATETYPEID + elif dtype_name == "Datetime": + return sbdf_c.SBDF_DATETIMETYPEID + elif dtype_name == "Duration": + return sbdf_c.SBDF_TIMESPANTYPEID + elif dtype_name == "Time": + return sbdf_c.SBDF_TIMETYPEID + elif dtype_name == "Binary": + return sbdf_c.SBDF_BINARYTYPEID + elif dtype_name == "Decimal": + return sbdf_c.SBDF_DECIMALTYPEID + elif dtype_name == "Categorical": + return _export_infer_valuetype_from_polars_dtype(dtype.categories, series_description) + else: + raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}") + + +cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series): + """Convert a Polars Series to a NumPy array suitable for the SBDF exporter. + + :param context: export context holding the resolved value type + :param series: Polars Series to convert + :return: NumPy ndarray of values + """ + dtype_name = series.dtype.__class__.__name__ + if dtype_name in ("Date", "Time"): + # The Date/Time exporters require Python date/time objects; + # Polars .to_numpy() returns numpy datetime64/int64 which those exporters do not accept. + return np.asarray(series.to_list(), dtype=object) + na_value = context.get_numpy_na_value() + if na_value is not None: + return np.asarray(series.fill_null(na_value).to_numpy(allow_copy=True), + dtype=context.get_numpy_dtype()) + else: + return np.asarray(series.to_numpy(allow_copy=True), dtype=object) + + +cdef _export_obj_polars_dataframe(obj): + """Extract column information for a Polars ``DataFrame``. + + :param obj: Polars DataFrame object to export + :return: tuple containing dictionary of table metadata, list of column names, list of dictionaries of column + metadata, and list of export context objects + """ + if len(set(obj.columns)) != len(obj.columns): + raise SBDFError("obj does not have unique column names") + + column_names = [] + column_metadata = [] + exporter_contexts = [] + for col in obj.columns: + series = obj[col] + column_names.append(col) + context = _ExportContext() + context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(series.dtype, f"column '{col}'")) + invalids = series.is_null().to_numpy() + context.set_arrays(_export_polars_series_to_numpy(context, series), invalids) + column_metadata.append({}) + exporter_contexts.append(context) + + return {}, column_names, column_metadata, exporter_contexts + + +cdef _export_obj_polars_series(obj, default_column_name): + """Extract column information for a Polars ``Series``. + + :param obj: Polars Series object to export + :param default_column_name: column name to use when obj does not have a name + :return: tuple containing dict of table metadata, list of column names, list of dicts of column metadata, and + list of export context objects + """ + column_name = obj.name if obj.name else default_column_name + description = f"series '{obj.name}'" if obj.name else "series" + + context = _ExportContext() + context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(obj.dtype, description)) + invalids = obj.is_null().to_numpy() + context.set_arrays(_export_polars_series_to_numpy(context, obj), invalids) + + return {}, [column_name], [{}], [context] + + cdef _export_obj_numpy(np_c.ndarray obj, default_column_name): """Extract column information for a NumPy ``ndarray``. @@ -1801,8 +1915,14 @@ def export_data(obj, sbdf_file, default_column_name="x", Py_ssize_t rows_per_sli try: # Extract data and metadata from obj + # Polars DataFrames (tabular) + if pl is not None and isinstance(obj, pl.DataFrame): + exported = _export_obj_polars_dataframe(obj) + # Polars Series (columnar) + elif pl is not None and isinstance(obj, pl.Series): + exported = _export_obj_polars_series(obj, default_column_name) # Pandas DataFrames (tabular) - if isinstance(obj, pd.DataFrame): + elif isinstance(obj, pd.DataFrame): exported = _export_obj_dataframe(obj) # Pandas Series (columnar) elif isinstance(obj, pd.Series): diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index de89774..13d2035 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -18,6 +18,11 @@ from packaging import version import spotfire + +try: + import polars as pl +except ImportError: + pl = None from spotfire import sbdf from spotfire.test import utils @@ -539,3 +544,75 @@ def _assert_dataframe_shape(self, dataframe: pd.DataFrame, rows: int, column_nam def _assert_is_png_image(self, expr: bytes) -> None: """Assert that a bytes object represents PNG image data.""" self.assertEqual(expr[0:8], b'\x89PNG\x0d\x0a\x1a\x0a') + + +@unittest.skipIf(pl is None, "polars not installed") +class SbdfPolarsTest(unittest.TestCase): + """Unit tests for Polars DataFrame support in 'spotfire.sbdf' module.""" + + def test_write_polars_dataframe_basic(self): + """Exporting a Polars DataFrame with common types should produce a valid SBDF file.""" + df = pl.DataFrame({ + "flag": [True, False, True], + "count": [1, 2, 3], + "value": [1.1, 2.2, 3.3], + "label": ["a", "b", "c"], + }) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/output.sbdf" + sbdf.export_data(df, path) + result = sbdf.import_data(path) + self.assertEqual(len(result), 3) + self.assertEqual(list(result.columns), ["flag", "count", "value", "label"]) + self.assertEqual(result["flag"].tolist(), [True, False, True]) + self.assertEqual(result["count"].dropna().astype(int).tolist(), [1, 2, 3]) + self.assertAlmostEqual(result["value"][0], 1.1) + self.assertEqual(result["label"].tolist(), ["a", "b", "c"]) + + def test_write_polars_dataframe_nulls(self): + """Exporting a Polars DataFrame with null values should preserve nulls.""" + df = pl.DataFrame({ + "ints": [1, None, 3], + "floats": [1.0, None, 3.0], + "strings": ["x", None, "z"], + }) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/output.sbdf" + sbdf.export_data(df, path) + result = sbdf.import_data(path) + self.assertTrue(pd.isnull(result["ints"][1])) + self.assertTrue(pd.isnull(result["floats"][1])) + self.assertTrue(pd.isnull(result["strings"][1])) + + def test_write_polars_series(self): + """Exporting a Polars Series should produce a valid SBDF file.""" + series = pl.Series("vals", [10, 20, 30]) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/output.sbdf" + sbdf.export_data(series, path) + result = sbdf.import_data(path) + self.assertEqual(len(result), 3) + self.assertEqual(result.columns[0], "vals") + self.assertEqual(result["vals"].dropna().astype(int).tolist(), [10, 20, 30]) + + def test_import_as_polars(self): + """Importing an SBDF file with output_format='polars' should return a Polars DataFrame.""" + dataframe = sbdf.import_data(utils.get_test_data_file("sbdf/1.sbdf"), output_format="polars") + self.assertIsInstance(dataframe, pl.DataFrame) + self.assertIn("Boolean", dataframe.columns) + self.assertIn("Integer", dataframe.columns) + + def test_polars_roundtrip(self): + """A Polars DataFrame should survive an export/import roundtrip.""" + original = pl.DataFrame({ + "integers": [1, 2, 3], + "floats": [1.5, 2.5, 3.5], + "strings": ["foo", "bar", "baz"], + }) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/roundtrip.sbdf" + sbdf.export_data(original, path) + result = sbdf.import_data(path, output_format="polars") + self.assertIsInstance(result, pl.DataFrame) + self.assertEqual(result["strings"].to_list(), ["foo", "bar", "baz"]) + self.assertAlmostEqual(result["floats"][0], 1.5) From 82492e5d3f2429f1988e196f2246f5cb919a3283 Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 20:45:54 -0500 Subject: [PATCH 02/10] linting and testing --- spotfire/sbdf.pyx | 87 +++++++++++++++++++++++++++++++++++--- spotfire/test/test_sbdf.py | 19 +++++---- 2 files changed, 93 insertions(+), 13 deletions(-) diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index ff10672..234b588 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -425,6 +425,13 @@ cdef class _ImportContext: """ return _valuetype_id_to_spotfire_typename(self.value_type.id) + cpdef bint is_object_numpy_type(self): + """Return True if the numpy type for this column is NPY_OBJECT. + + :return: True if the numpy type is object, False otherwise + """ + return self.numpy_type_num == np_c.NPY_OBJECT + # Individual functions for importing each value type. ctypedef int(*importer_fn)(_ImportContext, sbdf_c.sbdf_columnslice*) @@ -659,6 +666,74 @@ cdef dict _import_metadata(sbdf_c.sbdf_metadata_head* md, int column_num): return metadata +cdef object _import_polars_dtype(_ImportContext context): + """Return the Polars dtype corresponding to the SBDF value type in the import context. + + :param context: import context for a column + :return: the Polars dtype object + """ + vt_id = context.value_type.id + if vt_id == sbdf_c.SBDF_BOOLTYPEID: + return pl.Boolean + elif vt_id == sbdf_c.SBDF_INTTYPEID: + return pl.Int32 + elif vt_id == sbdf_c.SBDF_LONGTYPEID: + return pl.Int64 + elif vt_id == sbdf_c.SBDF_FLOATTYPEID: + return pl.Float32 + elif vt_id == sbdf_c.SBDF_DOUBLETYPEID: + return pl.Float64 + elif vt_id == sbdf_c.SBDF_STRINGTYPEID: + return pl.Utf8 + elif vt_id == sbdf_c.SBDF_DATETIMETYPEID: + return pl.Datetime + elif vt_id == sbdf_c.SBDF_DATETYPEID: + return pl.Date + elif vt_id == sbdf_c.SBDF_TIMETYPEID: + return pl.Time + elif vt_id == sbdf_c.SBDF_TIMESPANTYPEID: + return pl.Duration + elif vt_id == sbdf_c.SBDF_BINARYTYPEID: + return pl.Binary + elif vt_id == sbdf_c.SBDF_DECIMALTYPEID: + return pl.Decimal + else: + return pl.Utf8 + + +cdef object _import_build_polars_dataframe(column_names, importer_contexts): + """Build a Polars DataFrame directly from import context data, with no Pandas intermediary. + + :param column_names: list of column name strings + :param importer_contexts: list of _ImportContext objects + :return: a Polars DataFrame + """ + series_list = [] + for i, name in enumerate(column_names): + context = importer_contexts[i] + values = context.get_values_array() + invalids = context.get_invalid_array() + polars_dtype = _import_polars_dtype(context) + + if context.is_object_numpy_type(): + # Object arrays hold Python objects (str, date, datetime, etc.); Polars cannot + # construct a typed series from a numpy object array directly — use a Python list. + values_list = values.tolist() + if invalids.any(): + for idx in np.where(invalids)[0]: + values_list[idx] = None + col = pl.Series(name=name, values=values_list, dtype=polars_dtype) + else: + # Numeric arrays: numpy → Polars Series directly, then scatter nulls if needed. + col = pl.Series(name=name, values=values, dtype=polars_dtype) + if invalids.any(): + col = col.scatter(np.where(invalids)[0].tolist(), None) + + series_list.append(col) + + return pl.DataFrame(series_list) + + def import_data(sbdf_file, output_format="pandas"): """Import data from an SBDF file and create a DataFrame. @@ -780,7 +855,13 @@ def import_data(sbdf_file, output_format="pandas"): if error != sbdf_c.SBDF_OK and error != sbdf_c.SBDF_TABLEEND: raise SBDFError(f"error reading '{sbdf_file}': {sbdf_c.sbdf_err_get_str(error).decode('utf-8')}") - # Build a new DataFrame with the results + # Build a Polars DataFrame directly if requested, with no Pandas intermediary + if output_format == "polars": + if pl is None: + raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'") + return _import_build_polars_dataframe(column_names, importer_contexts) + + # Build a new Pandas DataFrame with the results imported_columns = [] for i in range(num_columns): column_series = pd.Series(importer_contexts[i].get_values_array(), @@ -818,10 +899,6 @@ def import_data(sbdf_file, output_format="pandas"): with warnings.catch_warnings(): warnings.simplefilter("ignore") dataframe.spotfire_table_metadata = table_metadata - if output_format == "polars": - if pl is None: - raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'") - return pl.from_pandas(dataframe) return dataframe finally: diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index 13d2035..c9e9e79 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -22,7 +22,7 @@ try: import polars as pl except ImportError: - pl = None + pl = None # type: ignore[assignment] from spotfire import sbdf from spotfire.test import utils @@ -550,9 +550,9 @@ def _assert_is_png_image(self, expr: bytes) -> None: class SbdfPolarsTest(unittest.TestCase): """Unit tests for Polars DataFrame support in 'spotfire.sbdf' module.""" - def test_write_polars_dataframe_basic(self): + def test_write_polars_basic(self): """Exporting a Polars DataFrame with common types should produce a valid SBDF file.""" - df = pl.DataFrame({ + polars_df = pl.DataFrame({ "flag": [True, False, True], "count": [1, 2, 3], "value": [1.1, 2.2, 3.3], @@ -560,7 +560,7 @@ def test_write_polars_dataframe_basic(self): }) with tempfile.TemporaryDirectory() as tempdir: path = f"{tempdir}/output.sbdf" - sbdf.export_data(df, path) + sbdf.export_data(polars_df, path) result = sbdf.import_data(path) self.assertEqual(len(result), 3) self.assertEqual(list(result.columns), ["flag", "count", "value", "label"]) @@ -569,16 +569,16 @@ def test_write_polars_dataframe_basic(self): self.assertAlmostEqual(result["value"][0], 1.1) self.assertEqual(result["label"].tolist(), ["a", "b", "c"]) - def test_write_polars_dataframe_nulls(self): + def test_write_polars_nulls(self): """Exporting a Polars DataFrame with null values should preserve nulls.""" - df = pl.DataFrame({ + polars_df = pl.DataFrame({ "ints": [1, None, 3], "floats": [1.0, None, 3.0], "strings": ["x", None, "z"], }) with tempfile.TemporaryDirectory() as tempdir: path = f"{tempdir}/output.sbdf" - sbdf.export_data(df, path) + sbdf.export_data(polars_df, path) result = sbdf.import_data(path) self.assertTrue(pd.isnull(result["ints"][1])) self.assertTrue(pd.isnull(result["floats"][1])) @@ -596,11 +596,14 @@ def test_write_polars_series(self): self.assertEqual(result["vals"].dropna().astype(int).tolist(), [10, 20, 30]) def test_import_as_polars(self): - """Importing an SBDF file with output_format='polars' should return a Polars DataFrame.""" + """Importing an SBDF file with output_format='polars' should return a native Polars DataFrame.""" dataframe = sbdf.import_data(utils.get_test_data_file("sbdf/1.sbdf"), output_format="polars") self.assertIsInstance(dataframe, pl.DataFrame) + self.assertNotIsInstance(dataframe, pd.DataFrame) self.assertIn("Boolean", dataframe.columns) self.assertIn("Integer", dataframe.columns) + # Verify nulls are preserved natively + self.assertIsNone(dataframe["Long"][0]) def test_polars_roundtrip(self): """A Polars DataFrame should survive an export/import roundtrip.""" From 003029192d2499296b577ed89c4f01b295515dc0 Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 21:07:12 -0500 Subject: [PATCH 03/10] Fix Polars edge cases: Categorical/Enum, UInt64 overflow, tz-aware Datetime, scatter compat - Fix Categorical/Enum dtype: was incorrectly trying to recurse into dtype.categories (which doesn't exist on the dtype object); now casts series to Utf8 and maps to SBDF_STRINGTYPEID directly - Add Enum dtype support (previously raised SBDFError) - Warn on UInt64 export: values above Int64 max will overflow silently - Warn on timezone-aware Datetime export: tz info is not preserved in SBDF - Warn on Decimal export: marked experimental, precision may be lost - Fix scatter() compatibility: add AttributeError fallback to set_at_idx() for older Polars versions within the supported range - Add tests for all of the above Co-Authored-By: Claude Sonnet 4.6 --- spotfire/sbdf.pyx | 24 +++++++++++++++++++++--- spotfire/test/test_sbdf.py | 27 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index 234b588..4b0097f 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -727,7 +727,12 @@ cdef object _import_build_polars_dataframe(column_names, importer_contexts): # Numeric arrays: numpy → Polars Series directly, then scatter nulls if needed. col = pl.Series(name=name, values=values, dtype=polars_dtype) if invalids.any(): - col = col.scatter(np.where(invalids)[0].tolist(), None) + indices = np.where(invalids)[0].tolist() + try: + col = col.scatter(indices, None) + except AttributeError: + # Fallback for older Polars versions that use set_at_idx + col = col.set_at_idx(indices, None) series_list.append(col) @@ -1131,6 +1136,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): elif dtype_name in ("Int8", "Int16", "Int32", "UInt8", "UInt16"): return sbdf_c.SBDF_INTTYPEID elif dtype_name in ("Int64", "UInt32", "UInt64"): + if dtype_name == "UInt64": + warnings.warn(f"Polars UInt64 type in {series_description} will be exported as LongInteger (signed " + f"64-bit); values above 9,223,372,036,854,775,807 will overflow", SBDFWarning) return sbdf_c.SBDF_LONGTYPEID elif dtype_name == "Float32": return sbdf_c.SBDF_FLOATTYPEID @@ -1141,6 +1149,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): elif dtype_name == "Date": return sbdf_c.SBDF_DATETYPEID elif dtype_name == "Datetime": + if getattr(dtype, 'time_zone', None) is not None: + warnings.warn(f"Polars Datetime type in {series_description} has timezone '{dtype.time_zone}'; " + f"timezone information will not be preserved in SBDF", SBDFWarning) return sbdf_c.SBDF_DATETIMETYPEID elif dtype_name == "Duration": return sbdf_c.SBDF_TIMESPANTYPEID @@ -1149,9 +1160,12 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): elif dtype_name == "Binary": return sbdf_c.SBDF_BINARYTYPEID elif dtype_name == "Decimal": + warnings.warn(f"Polars Decimal type in {series_description} export is experimental; " + f"precision may not be fully preserved", SBDFWarning) return sbdf_c.SBDF_DECIMALTYPEID - elif dtype_name == "Categorical": - return _export_infer_valuetype_from_polars_dtype(dtype.categories, series_description) + elif dtype_name in ("Categorical", "Enum"): + # SBDF has no categorical type; export as String + return sbdf_c.SBDF_STRINGTYPEID else: raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}") @@ -1164,6 +1178,10 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series) :return: NumPy ndarray of values """ dtype_name = series.dtype.__class__.__name__ + if dtype_name in ("Categorical", "Enum"): + # Cast to String so .to_numpy() returns plain Python strings + series = series.cast(pl.Utf8) + dtype_name = "Utf8" if dtype_name in ("Date", "Time"): # The Date/Time exporters require Python date/time objects; # Polars .to_numpy() returns numpy datetime64/int64 which those exporters do not accept. diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index c9e9e79..8c2a709 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -605,6 +605,33 @@ def test_import_as_polars(self): # Verify nulls are preserved natively self.assertIsNone(dataframe["Long"][0]) + def test_write_polars_categorical(self): + """Exporting a Polars Categorical column should export as String.""" + polars_df = pl.DataFrame({"cat": pl.Series(["a", "b", "a"]).cast(pl.Categorical)}) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/output.sbdf" + sbdf.export_data(polars_df, path) + result = sbdf.import_data(path) + self.assertEqual(result["cat"].tolist(), ["a", "b", "a"]) + + def test_write_polars_uint64_warns(self): + """Exporting a Polars UInt64 column should emit a warning about overflow risk.""" + polars_df = pl.DataFrame({"big": pl.Series([1, 2, 3], dtype=pl.UInt64)}) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/output.sbdf" + with self.assertWarns(sbdf.SBDFWarning): + sbdf.export_data(polars_df, path) + + def test_write_polars_datetime_tz(self): + """Exporting a timezone-aware Polars Datetime column should warn about timezone loss.""" + polars_df = pl.DataFrame({ + "ts": pl.Series([datetime.datetime(2024, 1, 1)]).dt.replace_time_zone("UTC") + }) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/output.sbdf" + with self.assertWarns(sbdf.SBDFWarning): + sbdf.export_data(polars_df, path) + def test_polars_roundtrip(self): """A Polars DataFrame should survive an export/import roundtrip.""" original = pl.DataFrame({ From cef91075583b311f7bc56c898d362c18b3f2abfd Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 21:20:04 -0500 Subject: [PATCH 04/10] Add polars to CI test requirements and document in README - Add polars to test_requirements_default.txt so SbdfPolarsTest is actually executed in CI (previously skipped due to missing import) - Add spotfire[polars] row to extras table in README - Add usage note explaining Spotfire's bundled Python lacks Polars and that SPKs bundling Polars will be ~44 MB larger than typical packages Co-Authored-By: Claude Sonnet 4.6 --- README.md | 9 +++++++++ test_requirements_default.txt | 1 + 2 files changed, 10 insertions(+) diff --git a/README.md b/README.md index 14b0297..62dab02 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,16 @@ simply `spotfire`) to include the required Python packages to support optional f | `spotfire[plot-matplotlib]` | Plotting support using just `matplotlib` | | `spotfire[plot-pil]` | Plotting support using just `Pillow` | | `spotfire[plot-seaborn]` | Plotting support using just `seaborn` | +| `spotfire[polars]` | Polars DataFrame support | | `spotfire[dev,lint]` | Internal development | +Once installed, `export_data()` accepts `polars.DataFrame` and `polars.Series` directly, and +`import_data()` can return a `polars.DataFrame` via `output_format="polars"`. + +> **Note for Spotfire data functions:** Spotfire's bundled Python interpreter does not include +> Polars. To use Polars inside a data function, configure Spotfire to use a custom Python +> environment that has `polars` installed. Polars is a large binary package (~44 MB), so +> Spotfire Packages (SPKs) that bundle it will be significantly larger than typical packages. + ### License BSD-type 3-Clause License. See the file ```LICENSE``` included in the package. \ No newline at end of file diff --git a/test_requirements_default.txt b/test_requirements_default.txt index 73ab30d..7468679 100644 --- a/test_requirements_default.txt +++ b/test_requirements_default.txt @@ -2,5 +2,6 @@ html-testRunner geopandas matplotlib pillow +polars seaborn shapely \ No newline at end of file From 1bd219849b44847aad92e6cbac0a2da978f396cc Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 21:36:05 -0500 Subject: [PATCH 05/10] Harden Polars support: validation, warnings, and edge case tests - Raise SBDFError for unknown output_format values (previously fell through silently to Pandas) - Emit SBDFWarning when Categorical/Enum columns are exported as String, consistent with existing UInt64 and timezone warnings - Add test_invalid_output_format: verifies bad output_format raises - Add test_write_polars_empty: verifies empty DataFrame exports cleanly - Add test_write_polars_series_nulls: verifies null preservation in Series - Add test_polars_categorical_warns: verifies Categorical warning fires Co-Authored-By: Claude Sonnet 4.6 --- spotfire/sbdf.pyx | 6 ++++++ spotfire/test/test_sbdf.py | 40 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index 4b0097f..faea3b6 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -860,6 +860,10 @@ def import_data(sbdf_file, output_format="pandas"): if error != sbdf_c.SBDF_OK and error != sbdf_c.SBDF_TABLEEND: raise SBDFError(f"error reading '{sbdf_file}': {sbdf_c.sbdf_err_get_str(error).decode('utf-8')}") + # Validate output_format before doing anything with it + if output_format not in ("pandas", "polars"): + raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'") + # Build a Polars DataFrame directly if requested, with no Pandas intermediary if output_format == "polars": if pl is None: @@ -1165,6 +1169,8 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): return sbdf_c.SBDF_DECIMALTYPEID elif dtype_name in ("Categorical", "Enum"): # SBDF has no categorical type; export as String + warnings.warn(f"Polars {dtype_name} type in {series_description} will be exported as String; " + f"category information will not be preserved", SBDFWarning) return sbdf_c.SBDF_STRINGTYPEID else: raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}") diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index 8c2a709..eb4cf17 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -646,3 +646,43 @@ def test_polars_roundtrip(self): self.assertIsInstance(result, pl.DataFrame) self.assertEqual(result["strings"].to_list(), ["foo", "bar", "baz"]) self.assertAlmostEqual(result["floats"][0], 1.5) + + def test_invalid_output_format(self): + """Passing an unknown output_format should raise SBDFError immediately.""" + polars_df = pl.DataFrame({"x": [1, 2, 3]}) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/output.sbdf" + sbdf.export_data(polars_df, path) + with self.assertRaises(sbdf.SBDFError): + sbdf.import_data(path, output_format="numpy") + + def test_write_polars_empty(self): + """Exporting an empty Polars DataFrame should produce a valid (empty) SBDF file.""" + polars_df = pl.DataFrame({"a": pl.Series([], dtype=pl.Int32), + "b": pl.Series([], dtype=pl.Utf8)}) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/empty.sbdf" + sbdf.export_data(polars_df, path) + result = sbdf.import_data(path) + self.assertEqual(len(result), 0) + self.assertIn("a", result.columns) + self.assertIn("b", result.columns) + + def test_write_polars_series_nulls(self): + """Exporting a Polars Series with null values should preserve those nulls.""" + series = pl.Series("vals", [1, None, 3], dtype=pl.Int32) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/series_nulls.sbdf" + sbdf.export_data(series, path) + result = sbdf.import_data(path) + self.assertTrue(pd.isnull(result["vals"][1])) + self.assertEqual(int(result["vals"][0]), 1) + self.assertEqual(int(result["vals"][2]), 3) + + def test_polars_categorical_warns(self): + """Exporting a Polars Categorical column should emit a SBDFWarning.""" + polars_df = pl.DataFrame({"cat": pl.Series(["x", "y", "x"]).cast(pl.Categorical)}) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/cat_warn.sbdf" + with self.assertWarns(sbdf.SBDFWarning): + sbdf.export_data(polars_df, path) From 6761de013f0f01d956dbe3f57d9de4f1dfa80bb6 Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 21:44:50 -0500 Subject: [PATCH 06/10] Handle Polars Null dtype on export A Polars Series of [None, None, None] has dtype pl.Null (no type can be inferred). Previously this raised SBDFError with "unknown dtype". Now it exports as an all-invalid String column, consistent with how all-None Pandas columns are handled. Co-Authored-By: Claude Sonnet 4.6 --- spotfire/sbdf.pyx | 6 ++++++ spotfire/test/test_sbdf.py | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index faea3b6..b247a5b 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -1172,6 +1172,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): warnings.warn(f"Polars {dtype_name} type in {series_description} will be exported as String; " f"category information will not be preserved", SBDFWarning) return sbdf_c.SBDF_STRINGTYPEID + elif dtype_name == "Null": + # All-null series with no inferred type; export as an all-invalid String column + return sbdf_c.SBDF_STRINGTYPEID else: raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}") @@ -1184,6 +1187,9 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series) :return: NumPy ndarray of values """ dtype_name = series.dtype.__class__.__name__ + if dtype_name == "Null": + # All-null series: produce an object array of Nones; invalids mask will cover all rows + return np.full(len(series), None, dtype=object) if dtype_name in ("Categorical", "Enum"): # Cast to String so .to_numpy() returns plain Python strings series = series.cast(pl.Utf8) diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index eb4cf17..ce1008b 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -686,3 +686,15 @@ def test_polars_categorical_warns(self): path = f"{tempdir}/cat_warn.sbdf" with self.assertWarns(sbdf.SBDFWarning): sbdf.export_data(polars_df, path) + + def test_write_polars_null_dtype(self): + """Exporting a Polars all-null Series (dtype=Null) should produce an all-invalid column.""" + polars_df = pl.DataFrame({"nothing": pl.Series([None, None, None])}) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/null_dtype.sbdf" + sbdf.export_data(polars_df, path) + result = sbdf.import_data(path) + self.assertEqual(len(result), 3) + self.assertTrue(pd.isnull(result["nothing"][0])) + self.assertTrue(pd.isnull(result["nothing"][1])) + self.assertTrue(pd.isnull(result["nothing"][2])) From 441cddbe0cd8bf1c6fe4e6b217f359bafa198b2b Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 22:00:56 -0500 Subject: [PATCH 07/10] Fix mypy error for polars import in test file CI static analysis runs mypy without polars installed; add type: ignore[import-not-found] so mypy skips the missing stub. Co-Authored-By: Claude Sonnet 4.6 --- spotfire/test/test_sbdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index ce1008b..4cf944b 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -20,7 +20,7 @@ import spotfire try: - import polars as pl + import polars as pl # type: ignore[import-not-found] except ImportError: pl = None # type: ignore[assignment] from spotfire import sbdf From a0a86ceb851b338ef4a92d25604555430ede25db Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 22:12:13 -0500 Subject: [PATCH 08/10] Add reviewer-facing comments to Polars implementation Explain non-obvious choices that would otherwise prompt review questions: - Why dtype.__class__.__name__ instead of isinstance() - Why scatter()/set_at_idx() try/except exists and which versions it covers - Why is_object_numpy_type() cpdef wrapper is needed for a cdef attribute - Why the output_format polars path short-circuits before pd.concat - Why the Null dtype path returns a placeholder array Co-Authored-By: Claude Sonnet 4.6 --- spotfire/sbdf.pyx | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index b247a5b..20890d6 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -429,6 +429,11 @@ cdef class _ImportContext: """Return True if the numpy type for this column is NPY_OBJECT. :return: True if the numpy type is object, False otherwise + + .. note:: ``numpy_type_num`` is a ``cdef`` attribute and is therefore inaccessible from + Python-side ``cdef object`` functions. This ``cpdef`` wrapper exposes it so that + :func:`_import_build_polars_dataframe` can branch on it without touching the + Cython-only attribute directly. """ return self.numpy_type_num == np_c.NPY_OBJECT @@ -729,10 +734,9 @@ cdef object _import_build_polars_dataframe(column_names, importer_contexts): if invalids.any(): indices = np.where(invalids)[0].tolist() try: - col = col.scatter(indices, None) + col = col.scatter(indices, None) # Polars >= 0.19 except AttributeError: - # Fallback for older Polars versions that use set_at_idx - col = col.set_at_idx(indices, None) + col = col.set_at_idx(indices, None) # Polars < 0.19 API series_list.append(col) @@ -864,7 +868,10 @@ def import_data(sbdf_file, output_format="pandas"): if output_format not in ("pandas", "polars"): raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'") - # Build a Polars DataFrame directly if requested, with no Pandas intermediary + # Short-circuit before pd.concat to avoid the Pandas intermediary entirely. + # This keeps the import zero-copy for large DataFrames: numpy arrays collected + # by each _ImportContext go straight into Polars Series without ever becoming + # a Pandas DataFrame. if output_format == "polars": if pl is None: raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'") @@ -1134,6 +1141,10 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): :return: the integer value type id representing the type of series :raise SBDFError: if the dtype is unknown """ + # Use __class__.__name__ rather than isinstance() checks. Polars dtype objects are + # not ordinary Python classes resolvable at Cython compile time, so isinstance() would + # require importing the exact dtype class — which breaks when Polars isn't installed. + # Class name strings are stable across the Polars versions we support (>= 0.20). dtype_name = dtype.__class__.__name__ if dtype_name == "Boolean": return sbdf_c.SBDF_BOOLTYPEID @@ -1173,7 +1184,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description): f"category information will not be preserved", SBDFWarning) return sbdf_c.SBDF_STRINGTYPEID elif dtype_name == "Null": - # All-null series with no inferred type; export as an all-invalid String column + # pl.Series([None, None]) has dtype Null when no type can be inferred. Export as + # String; _export_polars_series_to_numpy produces a placeholder array and the + # invalids mask marks every row missing, so the stored values are never read. return sbdf_c.SBDF_STRINGTYPEID else: raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}") @@ -1188,7 +1201,9 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series) """ dtype_name = series.dtype.__class__.__name__ if dtype_name == "Null": - # All-null series: produce an object array of Nones; invalids mask will cover all rows + # A Null-dtype series has no values to convert; return a same-length placeholder array. + # The invalids mask (set by the caller via series.is_null()) marks every row as missing, + # so the placeholder values are never read by the SBDF writer. return np.full(len(series), None, dtype=object) if dtype_name in ("Categorical", "Enum"): # Cast to String so .to_numpy() returns plain Python strings From bf8e984ded4cc10385b08303cee1bcb23346cf5e Mon Sep 17 00:00:00 2001 From: stewjb Date: Mon, 23 Mar 2026 22:15:56 -0500 Subject: [PATCH 09/10] Remove set_at_idx fallback; scatter() is available in all supported Polars versions (>= 0.20) Co-Authored-By: Claude Sonnet 4.6 --- spotfire/sbdf.pyx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index 20890d6..7b90a09 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -733,10 +733,7 @@ cdef object _import_build_polars_dataframe(column_names, importer_contexts): col = pl.Series(name=name, values=values, dtype=polars_dtype) if invalids.any(): indices = np.where(invalids)[0].tolist() - try: - col = col.scatter(indices, None) # Polars >= 0.19 - except AttributeError: - col = col.set_at_idx(indices, None) # Polars < 0.19 API + col = col.scatter(indices, None) series_list.append(col) From 00d81cff097e7d3593cc80b43e702525b354ada6 Mon Sep 17 00:00:00 2001 From: stewjb Date: Tue, 24 Mar 2026 05:23:24 -0500 Subject: [PATCH 10/10] Address Copilot review comments - Move output_format validation to top of import_data() for fail-fast behaviour before the file is opened - Raise SBDFError in _import_polars_dtype fallback instead of silently returning Utf8 for unknown SBDF type IDs - Treat NaN as invalid (missing) for Float32/Float64 columns, matching Pandas pd.isnull() behaviour; add test_write_polars_float_nan - Keep native datetime64/timedelta64 arrays for Datetime/Duration columns instead of boxing to object dtype (avoids unnecessary copy) - Add @overload signatures to sbdf.pyi so callers get pd.DataFrame for the default output_format="pandas" and Any for output_format="polars" Co-Authored-By: Claude Sonnet 4.6 --- spotfire/sbdf.pyi | 4 ++++ spotfire/sbdf.pyx | 24 +++++++++++++++++------- spotfire/test/test_sbdf.py | 11 +++++++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/spotfire/sbdf.pyi b/spotfire/sbdf.pyi index 80d8fc4..9bd2812 100644 --- a/spotfire/sbdf.pyi +++ b/spotfire/sbdf.pyi @@ -13,6 +13,10 @@ class SBDFError(Exception): ... class SBDFWarning(Warning): ... def spotfire_typename_to_valuetype_id(typename: str) -> typing.Optional[int]: ... +@typing.overload +def import_data(sbdf_file: _FilenameLike, output_format: typing.Literal["pandas"] = ...) -> pd.DataFrame: ... +@typing.overload +def import_data(sbdf_file: _FilenameLike, output_format: typing.Literal["polars"]) -> typing.Any: ... def import_data(sbdf_file: _FilenameLike, output_format: str = "pandas") -> typing.Any: ... def export_data(obj: typing.Any, sbdf_file: _FilenameLike, default_column_name: str = "x", rows_per_slice: int = 0, encoding_rle: bool = True) -> None: ... diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx index 7b90a09..28770f5 100644 --- a/spotfire/sbdf.pyx +++ b/spotfire/sbdf.pyx @@ -703,7 +703,7 @@ cdef object _import_polars_dtype(_ImportContext context): elif vt_id == sbdf_c.SBDF_DECIMALTYPEID: return pl.Decimal else: - return pl.Utf8 + raise SBDFError(f"unsupported SBDF value type id {vt_id} for Polars output") cdef object _import_build_polars_dataframe(column_names, importer_contexts): @@ -748,6 +748,10 @@ def import_data(sbdf_file, output_format="pandas"): :return: the DataFrame containing the imported data :raises SBDFError: if a problem is encountered during import """ + # Validate output_format before opening the file so we fail fast on bad input. + if output_format not in ("pandas", "polars"): + raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'") + cdef int error, i cdef stdio.FILE* input_file = NULL cdef int major_v, minor_v @@ -861,10 +865,6 @@ def import_data(sbdf_file, output_format="pandas"): if error != sbdf_c.SBDF_OK and error != sbdf_c.SBDF_TABLEEND: raise SBDFError(f"error reading '{sbdf_file}': {sbdf_c.sbdf_err_get_str(error).decode('utf-8')}") - # Validate output_format before doing anything with it - if output_format not in ("pandas", "polars"): - raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'") - # Short-circuit before pd.concat to avoid the Pandas intermediary entirely. # This keeps the import zero-copy for large DataFrames: numpy arrays collected # by each _ImportContext go straight into Polars Series without ever becoming @@ -1210,6 +1210,10 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series) # The Date/Time exporters require Python date/time objects; # Polars .to_numpy() returns numpy datetime64/int64 which those exporters do not accept. return np.asarray(series.to_list(), dtype=object) + if dtype_name in ("Datetime", "Duration"): + # Keep native datetime64/timedelta64 arrays; the invalids mask handles nulls (NaT cells + # are marked invalid and ignored by the SBDF writer). Boxing to object would be slower. + return series.to_numpy(allow_copy=True) na_value = context.get_numpy_na_value() if na_value is not None: return np.asarray(series.fill_null(na_value).to_numpy(allow_copy=True), @@ -1236,7 +1240,10 @@ cdef _export_obj_polars_dataframe(obj): column_names.append(col) context = _ExportContext() context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(series.dtype, f"column '{col}'")) - invalids = series.is_null().to_numpy() + if series.dtype.__class__.__name__ in ("Float32", "Float64"): + invalids = (series.is_null() | series.is_nan()).to_numpy() + else: + invalids = series.is_null().to_numpy() context.set_arrays(_export_polars_series_to_numpy(context, series), invalids) column_metadata.append({}) exporter_contexts.append(context) @@ -1257,7 +1264,10 @@ cdef _export_obj_polars_series(obj, default_column_name): context = _ExportContext() context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(obj.dtype, description)) - invalids = obj.is_null().to_numpy() + if obj.dtype.__class__.__name__ in ("Float32", "Float64"): + invalids = (obj.is_null() | obj.is_nan()).to_numpy() + else: + invalids = obj.is_null().to_numpy() context.set_arrays(_export_polars_series_to_numpy(context, obj), invalids) return {}, [column_name], [{}], [context] diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index 4cf944b..b048ac5 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -698,3 +698,14 @@ def test_write_polars_null_dtype(self): self.assertTrue(pd.isnull(result["nothing"][0])) self.assertTrue(pd.isnull(result["nothing"][1])) self.assertTrue(pd.isnull(result["nothing"][2])) + + def test_write_polars_float_nan(self): + """NaN in a Polars float column should be treated as invalid (missing), not a real value.""" + polars_df = pl.DataFrame({"vals": pl.Series([1.0, float("nan"), 3.0])}) + with tempfile.TemporaryDirectory() as tempdir: + path = f"{tempdir}/float_nan.sbdf" + sbdf.export_data(polars_df, path) + result = sbdf.import_data(path) + self.assertAlmostEqual(result["vals"][0], 1.0) + self.assertTrue(pd.isnull(result["vals"][1])) + self.assertAlmostEqual(result["vals"][2], 3.0)