From 4868db9bc4408063ccf0968525de49e41f730859 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 20:06:54 -0500
Subject: [PATCH 01/10] feat: polars functionality

---
 .gitignore                 |   7 +++
 pyproject.toml             |   6 +-
 spotfire/sbdf.pyi          |   2 +-
 spotfire/sbdf.pyx          | 126 ++++++++++++++++++++++++++++++++++++-
 spotfire/test/test_sbdf.py |  77 +++++++++++++++++++++++
 5 files changed, 213 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8b22a61..9f0e1c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,13 @@ __pycache__/
 
 # virtual environments
 /venv/
+/.venv/
+
+# uv lock file (this is a library; lock files are for applications)
+/uv.lock
+
+# Claude Code
+/.claude
 
 # PyCharm project files
 /.idea
diff --git a/pyproject.toml b/pyproject.toml
index 9b68bf9..4588961 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,9 +54,13 @@ plot-seaborn = [
     "seaborn >= 0.13.2",
 ]
 plot = [ "spotfire[plot-matplotlib,plot-pil,plot-seaborn]" ]
+# Polars support
+polars = [
+    "polars >= 0.20.0",
+]
 # Development requirements
 dev = [
-    "spotfire[geo,plot]",
+    "spotfire[geo,plot,polars]",
     "Cython >= 3.0.4",
     "html-testRunner",
 ]
diff --git a/spotfire/sbdf.pyi b/spotfire/sbdf.pyi
index 625aff6..80d8fc4 100644
--- a/spotfire/sbdf.pyi
+++ b/spotfire/sbdf.pyi
@@ -13,6 +13,6 @@ class SBDFError(Exception): ...
 class SBDFWarning(Warning): ...
 
 def spotfire_typename_to_valuetype_id(typename: str) -> typing.Optional[int]: ...
-def import_data(sbdf_file: _FilenameLike): ...
+def import_data(sbdf_file: _FilenameLike, output_format: str = "pandas") -> typing.Any: ...
 def export_data(obj: typing.Any, sbdf_file: _FilenameLike, default_column_name: str = "x",
                 rows_per_slice: int = 0, encoding_rle: bool = True) -> None: ...
diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 2f005bf..ff10672 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -55,6 +55,11 @@ try:
 except ImportError:
     PIL = None
 
+try:
+    import polars as pl
+except ImportError:
+    pl = None
+
 
 # Various utility helper functions for doing things that are problematic in PYX files
 include "sbdf_helpers.pxi"
@@ -654,10 +659,11 @@ cdef dict _import_metadata(sbdf_c.sbdf_metadata_head* md, int column_num):
     return metadata
 
 
-def import_data(sbdf_file):
-    """Import data from an SBDF file and create a 'pandas' DataFrame.
+def import_data(sbdf_file, output_format="pandas"):
+    """Import data from an SBDF file and create a DataFrame.
 
     :param sbdf_file: the filename of the SBDF file to import
+    :param output_format: the format of the returned DataFrame; either 'pandas' (default) or 'polars'
     :return: the DataFrame containing the imported data
     :raises SBDFError: if a problem is encountered during import
     """
@@ -812,6 +818,10 @@ def import_data(sbdf_file):
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             dataframe.spotfire_table_metadata = table_metadata
+        if output_format == "polars":
+            if pl is None:
+                raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'")
+            return pl.from_pandas(dataframe)
         return dataframe
 
     finally:
@@ -1030,6 +1040,110 @@ cdef _export_obj_series(obj, default_column_name):
     return {}, [column_name], [column_metadata], [context]
 
 
+cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
+    """Determine a value type for a data set based on the Polars dtype for the series.
+
+    :param dtype: the Polars dtype object
+    :param series_description: description of series (for error reporting)
+    :return: the integer value type id representing the type of series
+    :raise SBDFError: if the dtype is unknown
+    """
+    dtype_name = dtype.__class__.__name__
+    if dtype_name == "Boolean":
+        return sbdf_c.SBDF_BOOLTYPEID
+    elif dtype_name in ("Int8", "Int16", "Int32", "UInt8", "UInt16"):
+        return sbdf_c.SBDF_INTTYPEID
+    elif dtype_name in ("Int64", "UInt32", "UInt64"):
+        return sbdf_c.SBDF_LONGTYPEID
+    elif dtype_name == "Float32":
+        return sbdf_c.SBDF_FLOATTYPEID
+    elif dtype_name == "Float64":
+        return sbdf_c.SBDF_DOUBLETYPEID
+    elif dtype_name in ("Utf8", "String"):
+        return sbdf_c.SBDF_STRINGTYPEID
+    elif dtype_name == "Date":
+        return sbdf_c.SBDF_DATETYPEID
+    elif dtype_name == "Datetime":
+        return sbdf_c.SBDF_DATETIMETYPEID
+    elif dtype_name == "Duration":
+        return sbdf_c.SBDF_TIMESPANTYPEID
+    elif dtype_name == "Time":
+        return sbdf_c.SBDF_TIMETYPEID
+    elif dtype_name == "Binary":
+        return sbdf_c.SBDF_BINARYTYPEID
+    elif dtype_name == "Decimal":
+        return sbdf_c.SBDF_DECIMALTYPEID
+    elif dtype_name == "Categorical":
+        return _export_infer_valuetype_from_polars_dtype(dtype.categories, series_description)
+    else:
+        raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}")
+
+
+cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series):
+    """Convert a Polars Series to a NumPy array suitable for the SBDF exporter.
+
+    :param context: export context holding the resolved value type
+    :param series: Polars Series to convert
+    :return: NumPy ndarray of values
+    """
+    dtype_name = series.dtype.__class__.__name__
+    if dtype_name in ("Date", "Time"):
+        # The Date/Time exporters require Python date/time objects;
+        # Polars .to_numpy() returns numpy datetime64/int64 which those exporters do not accept.
+        return np.asarray(series.to_list(), dtype=object)
+    na_value = context.get_numpy_na_value()
+    if na_value is not None:
+        return np.asarray(series.fill_null(na_value).to_numpy(allow_copy=True),
+                          dtype=context.get_numpy_dtype())
+    else:
+        return np.asarray(series.to_numpy(allow_copy=True), dtype=object)
+
+
+cdef _export_obj_polars_dataframe(obj):
+    """Extract column information for a Polars ``DataFrame``.
+
+    :param obj: Polars DataFrame object to export
+    :return: tuple containing dictionary of table metadata, list of column names, list of dictionaries of column
+              metadata, and list of export context objects
+    """
+    if len(set(obj.columns)) != len(obj.columns):
+        raise SBDFError("obj does not have unique column names")
+
+    column_names = []
+    column_metadata = []
+    exporter_contexts = []
+    for col in obj.columns:
+        series = obj[col]
+        column_names.append(col)
+        context = _ExportContext()
+        context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(series.dtype, f"column '{col}'"))
+        invalids = series.is_null().to_numpy()
+        context.set_arrays(_export_polars_series_to_numpy(context, series), invalids)
+        column_metadata.append({})
+        exporter_contexts.append(context)
+
+    return {}, column_names, column_metadata, exporter_contexts
+
+
+cdef _export_obj_polars_series(obj, default_column_name):
+    """Extract column information for a Polars ``Series``.
+
+    :param obj: Polars Series object to export
+    :param default_column_name: column name to use when obj does not have a name
+    :return: tuple containing dict of table metadata, list of column names, list of dicts of column metadata, and
+              list of export context objects
+    """
+    column_name = obj.name if obj.name else default_column_name
+    description = f"series '{obj.name}'" if obj.name else "series"
+
+    context = _ExportContext()
+    context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(obj.dtype, description))
+    invalids = obj.is_null().to_numpy()
+    context.set_arrays(_export_polars_series_to_numpy(context, obj), invalids)
+
+    return {}, [column_name], [{}], [context]
+
+
 cdef _export_obj_numpy(np_c.ndarray obj, default_column_name):
     """Extract column information for a NumPy ``ndarray``.
 
@@ -1801,8 +1915,14 @@ def export_data(obj, sbdf_file, default_column_name="x", Py_ssize_t rows_per_sli
 
     try:
         # Extract data and metadata from obj
+        # Polars DataFrames (tabular)
+        if pl is not None and isinstance(obj, pl.DataFrame):
+            exported = _export_obj_polars_dataframe(obj)
+        # Polars Series (columnar)
+        elif pl is not None and isinstance(obj, pl.Series):
+            exported = _export_obj_polars_series(obj, default_column_name)
         # Pandas DataFrames (tabular)
-        if isinstance(obj, pd.DataFrame):
+        elif isinstance(obj, pd.DataFrame):
             exported = _export_obj_dataframe(obj)
         # Pandas Series (columnar)
         elif isinstance(obj, pd.Series):
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index de89774..13d2035 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -18,6 +18,11 @@
 from packaging import version
 
 import spotfire
+
+try:
+    import polars as pl
+except ImportError:
+    pl = None
 from spotfire import sbdf
 from spotfire.test import utils
 
@@ -539,3 +544,75 @@ def _assert_dataframe_shape(self, dataframe: pd.DataFrame, rows: int, column_nam
     def _assert_is_png_image(self, expr: bytes) -> None:
         """Assert that a bytes object represents PNG image data."""
         self.assertEqual(expr[0:8], b'\x89PNG\x0d\x0a\x1a\x0a')
+
+
+@unittest.skipIf(pl is None, "polars not installed")
+class SbdfPolarsTest(unittest.TestCase):
+    """Unit tests for Polars DataFrame support in 'spotfire.sbdf' module."""
+
+    def test_write_polars_dataframe_basic(self):
+        """Exporting a Polars DataFrame with common types should produce a valid SBDF file."""
+        df = pl.DataFrame({
+            "flag": [True, False, True],
+            "count": [1, 2, 3],
+            "value": [1.1, 2.2, 3.3],
+            "label": ["a", "b", "c"],
+        })
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/output.sbdf"
+            sbdf.export_data(df, path)
+            result = sbdf.import_data(path)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(list(result.columns), ["flag", "count", "value", "label"])
+        self.assertEqual(result["flag"].tolist(), [True, False, True])
+        self.assertEqual(result["count"].dropna().astype(int).tolist(), [1, 2, 3])
+        self.assertAlmostEqual(result["value"][0], 1.1)
+        self.assertEqual(result["label"].tolist(), ["a", "b", "c"])
+
+    def test_write_polars_dataframe_nulls(self):
+        """Exporting a Polars DataFrame with null values should preserve nulls."""
+        df = pl.DataFrame({
+            "ints": [1, None, 3],
+            "floats": [1.0, None, 3.0],
+            "strings": ["x", None, "z"],
+        })
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/output.sbdf"
+            sbdf.export_data(df, path)
+            result = sbdf.import_data(path)
+        self.assertTrue(pd.isnull(result["ints"][1]))
+        self.assertTrue(pd.isnull(result["floats"][1]))
+        self.assertTrue(pd.isnull(result["strings"][1]))
+
+    def test_write_polars_series(self):
+        """Exporting a Polars Series should produce a valid SBDF file."""
+        series = pl.Series("vals", [10, 20, 30])
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/output.sbdf"
+            sbdf.export_data(series, path)
+            result = sbdf.import_data(path)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(result.columns[0], "vals")
+        self.assertEqual(result["vals"].dropna().astype(int).tolist(), [10, 20, 30])
+
+    def test_import_as_polars(self):
+        """Importing an SBDF file with output_format='polars' should return a Polars DataFrame."""
+        dataframe = sbdf.import_data(utils.get_test_data_file("sbdf/1.sbdf"), output_format="polars")
+        self.assertIsInstance(dataframe, pl.DataFrame)
+        self.assertIn("Boolean", dataframe.columns)
+        self.assertIn("Integer", dataframe.columns)
+
+    def test_polars_roundtrip(self):
+        """A Polars DataFrame should survive an export/import roundtrip."""
+        original = pl.DataFrame({
+            "integers": [1, 2, 3],
+            "floats": [1.5, 2.5, 3.5],
+            "strings": ["foo", "bar", "baz"],
+        })
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/roundtrip.sbdf"
+            sbdf.export_data(original, path)
+            result = sbdf.import_data(path, output_format="polars")
+        self.assertIsInstance(result, pl.DataFrame)
+        self.assertEqual(result["strings"].to_list(), ["foo", "bar", "baz"])
+        self.assertAlmostEqual(result["floats"][0], 1.5)

From 82492e5d3f2429f1988e196f2246f5cb919a3283 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 20:45:54 -0500
Subject: [PATCH 02/10] linting and testing

---
 spotfire/sbdf.pyx          | 87 +++++++++++++++++++++++++++++++++++---
 spotfire/test/test_sbdf.py | 19 +++++----
 2 files changed, 93 insertions(+), 13 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index ff10672..234b588 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -425,6 +425,13 @@ cdef class _ImportContext:
         """
         return _valuetype_id_to_spotfire_typename(self.value_type.id)
 
+    cpdef bint is_object_numpy_type(self):
+        """Return True if the numpy type for this column is NPY_OBJECT.
+
+        :return: True if the numpy type is object, False otherwise
+        """
+        return self.numpy_type_num == np_c.NPY_OBJECT
+
 
 # Individual functions for importing each value type.
 ctypedef int(*importer_fn)(_ImportContext, sbdf_c.sbdf_columnslice*)
@@ -659,6 +666,74 @@ cdef dict _import_metadata(sbdf_c.sbdf_metadata_head* md, int column_num):
     return metadata
 
 
+cdef object _import_polars_dtype(_ImportContext context):
+    """Return the Polars dtype corresponding to the SBDF value type in the import context.
+
+    :param context: import context for a column
+    :return: the Polars dtype object
+    """
+    vt_id = context.value_type.id
+    if vt_id == sbdf_c.SBDF_BOOLTYPEID:
+        return pl.Boolean
+    elif vt_id == sbdf_c.SBDF_INTTYPEID:
+        return pl.Int32
+    elif vt_id == sbdf_c.SBDF_LONGTYPEID:
+        return pl.Int64
+    elif vt_id == sbdf_c.SBDF_FLOATTYPEID:
+        return pl.Float32
+    elif vt_id == sbdf_c.SBDF_DOUBLETYPEID:
+        return pl.Float64
+    elif vt_id == sbdf_c.SBDF_STRINGTYPEID:
+        return pl.Utf8
+    elif vt_id == sbdf_c.SBDF_DATETIMETYPEID:
+        return pl.Datetime
+    elif vt_id == sbdf_c.SBDF_DATETYPEID:
+        return pl.Date
+    elif vt_id == sbdf_c.SBDF_TIMETYPEID:
+        return pl.Time
+    elif vt_id == sbdf_c.SBDF_TIMESPANTYPEID:
+        return pl.Duration
+    elif vt_id == sbdf_c.SBDF_BINARYTYPEID:
+        return pl.Binary
+    elif vt_id == sbdf_c.SBDF_DECIMALTYPEID:
+        return pl.Decimal
+    else:
+        return pl.Utf8
+
+
+cdef object _import_build_polars_dataframe(column_names, importer_contexts):
+    """Build a Polars DataFrame directly from import context data, with no Pandas intermediary.
+
+    :param column_names: list of column name strings
+    :param importer_contexts: list of _ImportContext objects
+    :return: a Polars DataFrame
+    """
+    series_list = []
+    for i, name in enumerate(column_names):
+        context = importer_contexts[i]
+        values = context.get_values_array()
+        invalids = context.get_invalid_array()
+        polars_dtype = _import_polars_dtype(context)
+
+        if context.is_object_numpy_type():
+            # Object arrays hold Python objects (str, date, datetime, etc.); Polars cannot
+            # construct a typed series from a numpy object array directly — use a Python list.
+            values_list = values.tolist()
+            if invalids.any():
+                for idx in np.where(invalids)[0]:
+                    values_list[idx] = None
+            col = pl.Series(name=name, values=values_list, dtype=polars_dtype)
+        else:
+            # Numeric arrays: numpy → Polars Series directly, then scatter nulls if needed.
+            col = pl.Series(name=name, values=values, dtype=polars_dtype)
+            if invalids.any():
+                col = col.scatter(np.where(invalids)[0].tolist(), None)
+
+        series_list.append(col)
+
+    return pl.DataFrame(series_list)
+
+
 def import_data(sbdf_file, output_format="pandas"):
     """Import data from an SBDF file and create a DataFrame.
 
@@ -780,7 +855,13 @@ def import_data(sbdf_file, output_format="pandas"):
         if error != sbdf_c.SBDF_OK and error != sbdf_c.SBDF_TABLEEND:
             raise SBDFError(f"error reading '{sbdf_file}': {sbdf_c.sbdf_err_get_str(error).decode('utf-8')}")
 
-        # Build a new DataFrame with the results
+        # Build a Polars DataFrame directly if requested, with no Pandas intermediary
+        if output_format == "polars":
+            if pl is None:
+                raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'")
+            return _import_build_polars_dataframe(column_names, importer_contexts)
+
+        # Build a new Pandas DataFrame with the results
         imported_columns = []
         for i in range(num_columns):
             column_series = pd.Series(importer_contexts[i].get_values_array(),
@@ -818,10 +899,6 @@ def import_data(sbdf_file, output_format="pandas"):
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             dataframe.spotfire_table_metadata = table_metadata
-        if output_format == "polars":
-            if pl is None:
-                raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'")
-            return pl.from_pandas(dataframe)
         return dataframe
 
     finally:
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 13d2035..c9e9e79 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -22,7 +22,7 @@
 try:
     import polars as pl
 except ImportError:
-    pl = None
+    pl = None  # type: ignore[assignment]
 from spotfire import sbdf
 from spotfire.test import utils
 
@@ -550,9 +550,9 @@ def _assert_is_png_image(self, expr: bytes) -> None:
 class SbdfPolarsTest(unittest.TestCase):
     """Unit tests for Polars DataFrame support in 'spotfire.sbdf' module."""
 
-    def test_write_polars_dataframe_basic(self):
+    def test_write_polars_basic(self):
         """Exporting a Polars DataFrame with common types should produce a valid SBDF file."""
-        df = pl.DataFrame({
+        polars_df = pl.DataFrame({
             "flag": [True, False, True],
             "count": [1, 2, 3],
             "value": [1.1, 2.2, 3.3],
@@ -560,7 +560,7 @@ def test_write_polars_dataframe_basic(self):
         })
         with tempfile.TemporaryDirectory() as tempdir:
             path = f"{tempdir}/output.sbdf"
-            sbdf.export_data(df, path)
+            sbdf.export_data(polars_df, path)
             result = sbdf.import_data(path)
         self.assertEqual(len(result), 3)
         self.assertEqual(list(result.columns), ["flag", "count", "value", "label"])
@@ -569,16 +569,16 @@ def test_write_polars_dataframe_basic(self):
         self.assertAlmostEqual(result["value"][0], 1.1)
         self.assertEqual(result["label"].tolist(), ["a", "b", "c"])
 
-    def test_write_polars_dataframe_nulls(self):
+    def test_write_polars_nulls(self):
         """Exporting a Polars DataFrame with null values should preserve nulls."""
-        df = pl.DataFrame({
+        polars_df = pl.DataFrame({
             "ints": [1, None, 3],
             "floats": [1.0, None, 3.0],
             "strings": ["x", None, "z"],
         })
         with tempfile.TemporaryDirectory() as tempdir:
             path = f"{tempdir}/output.sbdf"
-            sbdf.export_data(df, path)
+            sbdf.export_data(polars_df, path)
             result = sbdf.import_data(path)
         self.assertTrue(pd.isnull(result["ints"][1]))
         self.assertTrue(pd.isnull(result["floats"][1]))
@@ -596,11 +596,14 @@ def test_write_polars_series(self):
         self.assertEqual(result["vals"].dropna().astype(int).tolist(), [10, 20, 30])
 
     def test_import_as_polars(self):
-        """Importing an SBDF file with output_format='polars' should return a Polars DataFrame."""
+        """Importing an SBDF file with output_format='polars' should return a native Polars DataFrame."""
         dataframe = sbdf.import_data(utils.get_test_data_file("sbdf/1.sbdf"), output_format="polars")
         self.assertIsInstance(dataframe, pl.DataFrame)
+        self.assertNotIsInstance(dataframe, pd.DataFrame)
         self.assertIn("Boolean", dataframe.columns)
         self.assertIn("Integer", dataframe.columns)
+        # Verify nulls are preserved natively
+        self.assertIsNone(dataframe["Long"][0])
 
     def test_polars_roundtrip(self):
         """A Polars DataFrame should survive an export/import roundtrip."""

From 003029192d2499296b577ed89c4f01b295515dc0 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 21:07:12 -0500
Subject: [PATCH 03/10] Fix Polars edge cases: Categorical/Enum, UInt64
 overflow, tz-aware Datetime, scatter compat

- Fix Categorical/Enum dtype: was incorrectly trying to recurse into
  dtype.categories (which doesn't exist on the dtype object); now casts
  series to Utf8 and maps to SBDF_STRINGTYPEID directly
- Add Enum dtype support (previously raised SBDFError)
- Warn on UInt64 export: values above Int64 max will overflow silently
- Warn on timezone-aware Datetime export: tz info is not preserved in SBDF
- Warn on Decimal export: marked experimental, precision may be lost
- Fix scatter() compatibility: add AttributeError fallback to set_at_idx()
  for older Polars versions within the supported range
- Add tests for all of the above

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx          | 24 +++++++++++++++++++++---
 spotfire/test/test_sbdf.py | 27 +++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 234b588..4b0097f 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -727,7 +727,12 @@ cdef object _import_build_polars_dataframe(column_names, importer_contexts):
             # Numeric arrays: numpy → Polars Series directly, then scatter nulls if needed.
             col = pl.Series(name=name, values=values, dtype=polars_dtype)
             if invalids.any():
-                col = col.scatter(np.where(invalids)[0].tolist(), None)
+                indices = np.where(invalids)[0].tolist()
+                try:
+                    col = col.scatter(indices, None)
+                except AttributeError:
+                    # Fallback for older Polars versions that use set_at_idx
+                    col = col.set_at_idx(indices, None)
 
         series_list.append(col)
 
@@ -1131,6 +1136,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
     elif dtype_name in ("Int8", "Int16", "Int32", "UInt8", "UInt16"):
         return sbdf_c.SBDF_INTTYPEID
     elif dtype_name in ("Int64", "UInt32", "UInt64"):
+        if dtype_name == "UInt64":
+            warnings.warn(f"Polars UInt64 type in {series_description} will be exported as LongInteger (signed "
+                          f"64-bit); values above 9,223,372,036,854,775,807 will overflow", SBDFWarning)
         return sbdf_c.SBDF_LONGTYPEID
     elif dtype_name == "Float32":
         return sbdf_c.SBDF_FLOATTYPEID
@@ -1141,6 +1149,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
     elif dtype_name == "Date":
         return sbdf_c.SBDF_DATETYPEID
     elif dtype_name == "Datetime":
+        if getattr(dtype, 'time_zone', None) is not None:
+            warnings.warn(f"Polars Datetime type in {series_description} has timezone '{dtype.time_zone}'; "
+                          f"timezone information will not be preserved in SBDF", SBDFWarning)
         return sbdf_c.SBDF_DATETIMETYPEID
     elif dtype_name == "Duration":
         return sbdf_c.SBDF_TIMESPANTYPEID
@@ -1149,9 +1160,12 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
     elif dtype_name == "Binary":
         return sbdf_c.SBDF_BINARYTYPEID
     elif dtype_name == "Decimal":
+        warnings.warn(f"Polars Decimal type in {series_description} export is experimental; "
+                      f"precision may not be fully preserved", SBDFWarning)
         return sbdf_c.SBDF_DECIMALTYPEID
-    elif dtype_name == "Categorical":
-        return _export_infer_valuetype_from_polars_dtype(dtype.categories, series_description)
+    elif dtype_name in ("Categorical", "Enum"):
+        # SBDF has no categorical type; export as String
+        return sbdf_c.SBDF_STRINGTYPEID
     else:
         raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}")
 
@@ -1164,6 +1178,10 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series)
     :return: NumPy ndarray of values
     """
     dtype_name = series.dtype.__class__.__name__
+    if dtype_name in ("Categorical", "Enum"):
+        # Cast to String so .to_numpy() returns plain Python strings
+        series = series.cast(pl.Utf8)
+        dtype_name = "Utf8"
     if dtype_name in ("Date", "Time"):
         # The Date/Time exporters require Python date/time objects;
         # Polars .to_numpy() returns numpy datetime64/int64 which those exporters do not accept.
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index c9e9e79..8c2a709 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -605,6 +605,33 @@ def test_import_as_polars(self):
         # Verify nulls are preserved natively
         self.assertIsNone(dataframe["Long"][0])
 
+    def test_write_polars_categorical(self):
+        """Exporting a Polars Categorical column should export as String."""
+        polars_df = pl.DataFrame({"cat": pl.Series(["a", "b", "a"]).cast(pl.Categorical)})
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/output.sbdf"
+            sbdf.export_data(polars_df, path)
+            result = sbdf.import_data(path)
+        self.assertEqual(result["cat"].tolist(), ["a", "b", "a"])
+
+    def test_write_polars_uint64_warns(self):
+        """Exporting a Polars UInt64 column should emit a warning about overflow risk."""
+        polars_df = pl.DataFrame({"big": pl.Series([1, 2, 3], dtype=pl.UInt64)})
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/output.sbdf"
+            with self.assertWarns(sbdf.SBDFWarning):
+                sbdf.export_data(polars_df, path)
+
+    def test_write_polars_datetime_tz(self):
+        """Exporting a timezone-aware Polars Datetime column should warn about timezone loss."""
+        polars_df = pl.DataFrame({
+            "ts": pl.Series([datetime.datetime(2024, 1, 1)]).dt.replace_time_zone("UTC")
+        })
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/output.sbdf"
+            with self.assertWarns(sbdf.SBDFWarning):
+                sbdf.export_data(polars_df, path)
+
     def test_polars_roundtrip(self):
         """A Polars DataFrame should survive an export/import roundtrip."""
         original = pl.DataFrame({

From cef91075583b311f7bc56c898d362c18b3f2abfd Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 21:20:04 -0500
Subject: [PATCH 04/10] Add polars to CI test requirements and document in
 README

- Add polars to test_requirements_default.txt so SbdfPolarsTest is
  actually executed in CI (previously skipped due to missing import)
- Add spotfire[polars] row to extras table in README
- Add usage note explaining Spotfire's bundled Python lacks Polars and
  that SPKs bundling Polars will be ~44 MB larger than typical packages

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md                     | 9 +++++++++
 test_requirements_default.txt | 1 +
 2 files changed, 10 insertions(+)

diff --git a/README.md b/README.md
index 14b0297..62dab02 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,16 @@ simply `spotfire`) to include the required Python packages to support optional f
 | `spotfire[plot-matplotlib]` | Plotting support using just `matplotlib`     |
 | `spotfire[plot-pil]`        | Plotting support using just `Pillow`         |
 | `spotfire[plot-seaborn]`    | Plotting support using just `seaborn`        |
+| `spotfire[polars]`          | Polars DataFrame support                     |
 | `spotfire[dev,lint]`        | Internal development                         |
 
+Once installed, `export_data()` accepts `polars.DataFrame` and `polars.Series` directly, and
+`import_data()` can return a `polars.DataFrame` via `output_format="polars"`.
+
+> **Note for Spotfire data functions:** Spotfire's bundled Python interpreter does not include
+> Polars. To use Polars inside a data function, configure Spotfire to use a custom Python
+> environment that has `polars` installed. Polars is a large binary package (~44 MB), so
+> Spotfire Packages (SPKs) that bundle it will be significantly larger than typical packages.
+
 ### License
 BSD-type 3-Clause License.  See the file ```LICENSE``` included in the package.
\ No newline at end of file
diff --git a/test_requirements_default.txt b/test_requirements_default.txt
index 73ab30d..7468679 100644
--- a/test_requirements_default.txt
+++ b/test_requirements_default.txt
@@ -2,5 +2,6 @@ html-testRunner
 geopandas
 matplotlib
 pillow
+polars
 seaborn
 shapely
\ No newline at end of file

From 1bd219849b44847aad92e6cbac0a2da978f396cc Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 21:36:05 -0500
Subject: [PATCH 05/10] Harden Polars support: validation, warnings, and edge
 case tests

- Raise SBDFError for unknown output_format values (previously fell
  through silently to Pandas)
- Emit SBDFWarning when Categorical/Enum columns are exported as String,
  consistent with existing UInt64 and timezone warnings
- Add test_invalid_output_format: verifies bad output_format raises
- Add test_write_polars_empty: verifies empty DataFrame exports cleanly
- Add test_write_polars_series_nulls: verifies null preservation in Series
- Add test_polars_categorical_warns: verifies Categorical warning fires

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx          |  6 ++++++
 spotfire/test/test_sbdf.py | 40 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 4b0097f..faea3b6 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -860,6 +860,10 @@ def import_data(sbdf_file, output_format="pandas"):
         if error != sbdf_c.SBDF_OK and error != sbdf_c.SBDF_TABLEEND:
             raise SBDFError(f"error reading '{sbdf_file}': {sbdf_c.sbdf_err_get_str(error).decode('utf-8')}")
 
+        # Validate output_format before doing anything with it
+        if output_format not in ("pandas", "polars"):
+            raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'")
+
         # Build a Polars DataFrame directly if requested, with no Pandas intermediary
         if output_format == "polars":
             if pl is None:
@@ -1165,6 +1169,8 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
         return sbdf_c.SBDF_DECIMALTYPEID
     elif dtype_name in ("Categorical", "Enum"):
         # SBDF has no categorical type; export as String
+        warnings.warn(f"Polars {dtype_name} type in {series_description} will be exported as String; "
+                      f"category information will not be preserved", SBDFWarning)
         return sbdf_c.SBDF_STRINGTYPEID
     else:
         raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}")
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 8c2a709..eb4cf17 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -646,3 +646,43 @@ def test_polars_roundtrip(self):
         self.assertIsInstance(result, pl.DataFrame)
         self.assertEqual(result["strings"].to_list(), ["foo", "bar", "baz"])
         self.assertAlmostEqual(result["floats"][0], 1.5)
+
+    def test_invalid_output_format(self):
+        """Passing an unknown output_format should raise SBDFError immediately."""
+        polars_df = pl.DataFrame({"x": [1, 2, 3]})
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/output.sbdf"
+            sbdf.export_data(polars_df, path)
+            with self.assertRaises(sbdf.SBDFError):
+                sbdf.import_data(path, output_format="numpy")
+
+    def test_write_polars_empty(self):
+        """Exporting an empty Polars DataFrame should produce a valid (empty) SBDF file."""
+        polars_df = pl.DataFrame({"a": pl.Series([], dtype=pl.Int32),
+                                  "b": pl.Series([], dtype=pl.Utf8)})
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/empty.sbdf"
+            sbdf.export_data(polars_df, path)
+            result = sbdf.import_data(path)
+        self.assertEqual(len(result), 0)
+        self.assertIn("a", result.columns)
+        self.assertIn("b", result.columns)
+
+    def test_write_polars_series_nulls(self):
+        """Exporting a Polars Series with null values should preserve those nulls."""
+        series = pl.Series("vals", [1, None, 3], dtype=pl.Int32)
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/series_nulls.sbdf"
+            sbdf.export_data(series, path)
+            result = sbdf.import_data(path)
+        self.assertTrue(pd.isnull(result["vals"][1]))
+        self.assertEqual(int(result["vals"][0]), 1)
+        self.assertEqual(int(result["vals"][2]), 3)
+
+    def test_polars_categorical_warns(self):
+        """Exporting a Polars Categorical column should emit a SBDFWarning."""
+        polars_df = pl.DataFrame({"cat": pl.Series(["x", "y", "x"]).cast(pl.Categorical)})
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/cat_warn.sbdf"
+            with self.assertWarns(sbdf.SBDFWarning):
+                sbdf.export_data(polars_df, path)

From 6761de013f0f01d956dbe3f57d9de4f1dfa80bb6 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 21:44:50 -0500
Subject: [PATCH 06/10] Handle Polars Null dtype on export

A Polars Series of [None, None, None] has dtype pl.Null (no type can
be inferred). Previously this raised SBDFError with "unknown dtype".
Now it exports as an all-invalid String column, consistent with how
all-None Pandas columns are handled.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx          |  6 ++++++
 spotfire/test/test_sbdf.py | 12 ++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index faea3b6..b247a5b 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -1172,6 +1172,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
         warnings.warn(f"Polars {dtype_name} type in {series_description} will be exported as String; "
                       f"category information will not be preserved", SBDFWarning)
         return sbdf_c.SBDF_STRINGTYPEID
+    elif dtype_name == "Null":
+        # All-null series with no inferred type; export as an all-invalid String column
+        return sbdf_c.SBDF_STRINGTYPEID
     else:
         raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}")
 
@@ -1184,6 +1187,9 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series)
     :return: NumPy ndarray of values
     """
     dtype_name = series.dtype.__class__.__name__
+    if dtype_name == "Null":
+        # All-null series: produce an object array of Nones; invalids mask will cover all rows
+        return np.full(len(series), None, dtype=object)
     if dtype_name in ("Categorical", "Enum"):
         # Cast to String so .to_numpy() returns plain Python strings
         series = series.cast(pl.Utf8)
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index eb4cf17..ce1008b 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -686,3 +686,15 @@ def test_polars_categorical_warns(self):
             path = f"{tempdir}/cat_warn.sbdf"
             with self.assertWarns(sbdf.SBDFWarning):
                 sbdf.export_data(polars_df, path)
+
+    def test_write_polars_null_dtype(self):
+        """Exporting a Polars all-null Series (dtype=Null) should produce an all-invalid column."""
+        polars_df = pl.DataFrame({"nothing": pl.Series([None, None, None])})
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/null_dtype.sbdf"
+            sbdf.export_data(polars_df, path)
+            result = sbdf.import_data(path)
+        self.assertEqual(len(result), 3)
+        self.assertTrue(pd.isnull(result["nothing"][0]))
+        self.assertTrue(pd.isnull(result["nothing"][1]))
+        self.assertTrue(pd.isnull(result["nothing"][2]))

From 441cddbe0cd8bf1c6fe4e6b217f359bafa198b2b Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 22:00:56 -0500
Subject: [PATCH 07/10] Fix mypy error for polars import in test file

CI static analysis runs mypy without polars installed; add
type: ignore[import-not-found] so mypy skips the missing stub.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/test/test_sbdf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index ce1008b..4cf944b 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -20,7 +20,7 @@
 import spotfire
 
 try:
-    import polars as pl
+    import polars as pl  # type: ignore[import-not-found]
 except ImportError:
     pl = None  # type: ignore[assignment]
 from spotfire import sbdf

From a0a86ceb851b338ef4a92d25604555430ede25db Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 22:12:13 -0500
Subject: [PATCH 08/10] Add reviewer-facing comments to Polars implementation

Explain non-obvious choices that would otherwise prompt review questions:
- Why dtype.__class__.__name__ instead of isinstance()
- Why scatter()/set_at_idx() try/except exists and which versions it covers
- Why is_object_numpy_type() cpdef wrapper is needed for a cdef attribute
- Why the output_format polars path short-circuits before pd.concat
- Why the Null dtype path returns a placeholder array

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index b247a5b..20890d6 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -429,6 +429,11 @@ cdef class _ImportContext:
         """Return True if the numpy type for this column is NPY_OBJECT.
 
         :return: True if the numpy type is object, False otherwise
+
+        .. note:: ``numpy_type_num`` is a ``cdef`` attribute and is therefore inaccessible from
+                  Python-side ``cdef object`` functions.  This ``cpdef`` wrapper exposes it so that
+                  :func:`_import_build_polars_dataframe` can branch on it without touching the
+                  Cython-only attribute directly.
         """
         return self.numpy_type_num == np_c.NPY_OBJECT
 
@@ -729,10 +734,9 @@ cdef object _import_build_polars_dataframe(column_names, importer_contexts):
             if invalids.any():
                 indices = np.where(invalids)[0].tolist()
                 try:
-                    col = col.scatter(indices, None)
+                    col = col.scatter(indices, None)  # Polars >= 0.19
                 except AttributeError:
-                    # Fallback for older Polars versions that use set_at_idx
-                    col = col.set_at_idx(indices, None)
+                    col = col.set_at_idx(indices, None)  # Polars < 0.19 API
 
         series_list.append(col)
 
@@ -864,7 +868,10 @@ def import_data(sbdf_file, output_format="pandas"):
         if output_format not in ("pandas", "polars"):
             raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'")
 
-        # Build a Polars DataFrame directly if requested, with no Pandas intermediary
+        # Short-circuit before pd.concat to avoid the Pandas intermediary entirely.
+        # This keeps the import zero-copy for large DataFrames: numpy arrays collected
+        # by each _ImportContext go straight into Polars Series without ever becoming
+        # a Pandas DataFrame.
         if output_format == "polars":
             if pl is None:
                 raise SBDFError("polars is not installed; install it with 'pip install spotfire[polars]'")
@@ -1134,6 +1141,10 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
     :return: the integer value type id representing the type of series
     :raise SBDFError: if the dtype is unknown
     """
+    # Use __class__.__name__ rather than isinstance() checks.  Polars dtype objects are
+    # not ordinary Python classes resolvable at Cython compile time, so isinstance() would
+    # require importing the exact dtype class — which breaks when Polars isn't installed.
+    # Class name strings are stable across the Polars versions we support (>= 0.20).
     dtype_name = dtype.__class__.__name__
     if dtype_name == "Boolean":
         return sbdf_c.SBDF_BOOLTYPEID
@@ -1173,7 +1184,9 @@ cdef int _export_infer_valuetype_from_polars_dtype(dtype, series_description):
                       f"category information will not be preserved", SBDFWarning)
         return sbdf_c.SBDF_STRINGTYPEID
     elif dtype_name == "Null":
-        # All-null series with no inferred type; export as an all-invalid String column
+        # pl.Series([None, None]) has dtype Null when no type can be inferred.  Export as
+        # String; _export_polars_series_to_numpy produces a placeholder array and the
+        # invalids mask marks every row missing, so the stored values are never read.
         return sbdf_c.SBDF_STRINGTYPEID
     else:
         raise SBDFError(f"unknown Polars dtype '{dtype_name}' in {series_description}")
@@ -1188,7 +1201,9 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series)
     """
     dtype_name = series.dtype.__class__.__name__
     if dtype_name == "Null":
-        # All-null series: produce an object array of Nones; invalids mask will cover all rows
+        # A Null-dtype series has no values to convert; return a same-length placeholder array.
+        # The invalids mask (set by the caller via series.is_null()) marks every row as missing,
+        # so the placeholder values are never read by the SBDF writer.
         return np.full(len(series), None, dtype=object)
     if dtype_name in ("Categorical", "Enum"):
         # Cast to String so .to_numpy() returns plain Python strings

From bf8e984ded4cc10385b08303cee1bcb23346cf5e Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Mon, 23 Mar 2026 22:15:56 -0500
Subject: [PATCH 09/10] Remove set_at_idx fallback; scatter() is available in
 all supported Polars versions (>= 0.20)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyx | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 20890d6..7b90a09 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -733,10 +733,7 @@ cdef object _import_build_polars_dataframe(column_names, importer_contexts):
             col = pl.Series(name=name, values=values, dtype=polars_dtype)
             if invalids.any():
                 indices = np.where(invalids)[0].tolist()
-                try:
-                    col = col.scatter(indices, None)  # Polars >= 0.19
-                except AttributeError:
-                    col = col.set_at_idx(indices, None)  # Polars < 0.19 API
+                col = col.scatter(indices, None)
 
         series_list.append(col)
 

From 00d81cff097e7d3593cc80b43e702525b354ada6 Mon Sep 17 00:00:00 2001
From: stewjb <jeffreyrs@gmail.com>
Date: Tue, 24 Mar 2026 05:23:24 -0500
Subject: [PATCH 10/10] Address Copilot review comments

- Move output_format validation to top of import_data() for fail-fast
  behaviour before the file is opened
- Raise SBDFError in _import_polars_dtype fallback instead of silently
  returning Utf8 for unknown SBDF type IDs
- Treat NaN as invalid (missing) for Float32/Float64 columns, matching
  Pandas pd.isnull() behaviour; add test_write_polars_float_nan
- Keep native datetime64/timedelta64 arrays for Datetime/Duration columns
  instead of boxing to object dtype (avoids unnecessary copy)
- Add @overload signatures to sbdf.pyi so callers get pd.DataFrame for
  the default output_format="pandas" and Any for output_format="polars"

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 spotfire/sbdf.pyi          |  4 ++++
 spotfire/sbdf.pyx          | 24 +++++++++++++++++-------
 spotfire/test/test_sbdf.py | 11 +++++++++++
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/spotfire/sbdf.pyi b/spotfire/sbdf.pyi
index 80d8fc4..9bd2812 100644
--- a/spotfire/sbdf.pyi
+++ b/spotfire/sbdf.pyi
@@ -13,6 +13,10 @@ class SBDFError(Exception): ...
 class SBDFWarning(Warning): ...
 
 def spotfire_typename_to_valuetype_id(typename: str) -> typing.Optional[int]: ...
+@typing.overload
+def import_data(sbdf_file: _FilenameLike, output_format: typing.Literal["pandas"] = ...) -> pd.DataFrame: ...
+@typing.overload
+def import_data(sbdf_file: _FilenameLike, output_format: typing.Literal["polars"]) -> typing.Any: ...
 def import_data(sbdf_file: _FilenameLike, output_format: str = "pandas") -> typing.Any: ...
 def export_data(obj: typing.Any, sbdf_file: _FilenameLike, default_column_name: str = "x",
                 rows_per_slice: int = 0, encoding_rle: bool = True) -> None: ...
diff --git a/spotfire/sbdf.pyx b/spotfire/sbdf.pyx
index 7b90a09..28770f5 100644
--- a/spotfire/sbdf.pyx
+++ b/spotfire/sbdf.pyx
@@ -703,7 +703,7 @@ cdef object _import_polars_dtype(_ImportContext context):
     elif vt_id == sbdf_c.SBDF_DECIMALTYPEID:
         return pl.Decimal
     else:
-        return pl.Utf8
+        raise SBDFError(f"unsupported SBDF value type id {vt_id} for Polars output")
 
 
 cdef object _import_build_polars_dataframe(column_names, importer_contexts):
@@ -748,6 +748,10 @@ def import_data(sbdf_file, output_format="pandas"):
     :return: the DataFrame containing the imported data
     :raises SBDFError: if a problem is encountered during import
     """
+    # Validate output_format before opening the file so we fail fast on bad input.
+    if output_format not in ("pandas", "polars"):
+        raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'")
+
     cdef int error, i
     cdef stdio.FILE* input_file = NULL
     cdef int major_v, minor_v
@@ -861,10 +865,6 @@ def import_data(sbdf_file, output_format="pandas"):
         if error != sbdf_c.SBDF_OK and error != sbdf_c.SBDF_TABLEEND:
             raise SBDFError(f"error reading '{sbdf_file}': {sbdf_c.sbdf_err_get_str(error).decode('utf-8')}")
 
-        # Validate output_format before doing anything with it
-        if output_format not in ("pandas", "polars"):
-            raise SBDFError(f"unknown output_format {output_format!r}; expected 'pandas' or 'polars'")
-
         # Short-circuit before pd.concat to avoid the Pandas intermediary entirely.
         # This keeps the import zero-copy for large DataFrames: numpy arrays collected
         # by each _ImportContext go straight into Polars Series without ever becoming
@@ -1210,6 +1210,10 @@ cdef np_c.ndarray _export_polars_series_to_numpy(_ExportContext context, series)
         # The Date/Time exporters require Python date/time objects;
         # Polars .to_numpy() returns numpy datetime64/int64 which those exporters do not accept.
         return np.asarray(series.to_list(), dtype=object)
+    if dtype_name in ("Datetime", "Duration"):
+        # Keep native datetime64/timedelta64 arrays; the invalids mask handles nulls (NaT cells
+        # are marked invalid and ignored by the SBDF writer).  Boxing to object would be slower.
+        return series.to_numpy(allow_copy=True)
     na_value = context.get_numpy_na_value()
     if na_value is not None:
         return np.asarray(series.fill_null(na_value).to_numpy(allow_copy=True),
@@ -1236,7 +1240,10 @@ cdef _export_obj_polars_dataframe(obj):
         column_names.append(col)
         context = _ExportContext()
         context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(series.dtype, f"column '{col}'"))
-        invalids = series.is_null().to_numpy()
+        if series.dtype.__class__.__name__ in ("Float32", "Float64"):
+            invalids = (series.is_null() | series.is_nan()).to_numpy()
+        else:
+            invalids = series.is_null().to_numpy()
         context.set_arrays(_export_polars_series_to_numpy(context, series), invalids)
         column_metadata.append({})
         exporter_contexts.append(context)
@@ -1257,7 +1264,10 @@ cdef _export_obj_polars_series(obj, default_column_name):
 
     context = _ExportContext()
     context.set_valuetype_id(_export_infer_valuetype_from_polars_dtype(obj.dtype, description))
-    invalids = obj.is_null().to_numpy()
+    if obj.dtype.__class__.__name__ in ("Float32", "Float64"):
+        invalids = (obj.is_null() | obj.is_nan()).to_numpy()
+    else:
+        invalids = obj.is_null().to_numpy()
     context.set_arrays(_export_polars_series_to_numpy(context, obj), invalids)
 
     return {}, [column_name], [{}], [context]
diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py
index 4cf944b..b048ac5 100644
--- a/spotfire/test/test_sbdf.py
+++ b/spotfire/test/test_sbdf.py
@@ -698,3 +698,14 @@ def test_write_polars_null_dtype(self):
         self.assertTrue(pd.isnull(result["nothing"][0]))
         self.assertTrue(pd.isnull(result["nothing"][1]))
         self.assertTrue(pd.isnull(result["nothing"][2]))
+
+    def test_write_polars_float_nan(self):
+        """NaN in a Polars float column should be treated as invalid (missing), not a real value."""
+        polars_df = pl.DataFrame({"vals": pl.Series([1.0, float("nan"), 3.0])})
+        with tempfile.TemporaryDirectory() as tempdir:
+            path = f"{tempdir}/float_nan.sbdf"
+            sbdf.export_data(polars_df, path)
+            result = sbdf.import_data(path)
+        self.assertAlmostEqual(result["vals"][0], 1.0)
+        self.assertTrue(pd.isnull(result["vals"][1]))
+        self.assertAlmostEqual(result["vals"][2], 3.0)