From dc45edd1e74095db4225b792b6cbb64208cd0f72 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Wed, 22 Apr 2026 12:06:25 +0200 Subject: [PATCH 01/11] Add NaN behavior coverage --- tests/core/CMakeLists.txt | 1 + tests/core/test_codecs_end_to_end.cc | 78 ++++++++++++++++++++++++++++ tests/core/test_comparator.cc | 33 ++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 tests/core/test_comparator.cc diff --git a/tests/core/CMakeLists.txt b/tests/core/CMakeLists.txt index 1a07dbc6..b6e3facc 100644 --- a/tests/core/CMakeLists.txt +++ b/tests/core/CMakeLists.txt @@ -25,6 +25,7 @@ list( APPEND _core_odc_tests test_text_reader test_table_iterator test_initial_missing + test_comparator ) foreach( _test ${_core_odc_tests} ) diff --git a/tests/core/test_codecs_end_to_end.cc b/tests/core/test_codecs_end_to_end.cc index 2a244ec9..bdcc28b2 100644 --- a/tests/core/test_codecs_end_to_end.cc +++ b/tests/core/test_codecs_end_to_end.cc @@ -8,6 +8,9 @@ * does it submit to any jurisdiction. */ +#include +#include + #include "eckit/io/Buffer.h" #include "eckit/io/DataHandle.h" #include "eckit/io/MemoryHandle.h" @@ -15,6 +18,7 @@ #include "odc/Reader.h" #include "odc/Writer.h" +#include "odc/api/ColumnType.h" #include "odc/codec/Integer.h" #include "odc/codec/String.h" #include "odc/core/MetaData.h" @@ -304,6 +308,80 @@ CASE("Missing values are encoded and decoded correctly") { } +CASE("NaN before real values preserves correct min/max in column metadata") { + + const double NaN = std::numeric_limits::quiet_NaN(); + + eckit::Buffer buf(4096); + eckit::MemoryHandle writeDH(buf); + + { + odc::Writer<> oda(writeDH); + odc::Writer<>::iterator writer = oda.begin(); + + writer->setNumberOfColumns(1); + writer->setColumn(0, "col0", odc::api::DOUBLE); + writer->writeHeader(); + + (*writer)[0] = NaN; + ++writer; + (*writer)[0] = 1.1; + ++writer; + (*writer)[0] = 3.3; + ++writer; + } + + eckit::MemoryHandle dh(buf.data(), static_cast(writeDH.position())); + dh.openForRead(); + odc::Reader oda(dh); + odc::Reader::iterator it = oda.begin(); + + EXPECT(it->columns()[0]->min() == 1.1); + EXPECT(it->columns()[0]->max() == 3.3); +} + + +CASE("NaN are encoded and decoded correctly") { + + const double NaN = std::numeric_limits::quiet_NaN(); + + for (auto type : {odc::api::DOUBLE, odc::api::REAL, odc::api::INTEGER, odc::api::BITFIELD}) { + + eckit::Buffer buf(4096); + eckit::MemoryHandle writeDH(buf); + + { + odc::Writer<> oda(writeDH); + odc::Writer<>::iterator writer = oda.begin(); + + writer->setNumberOfColumns(1); + writer->setColumn(0, "col0", type); + writer->writeHeader(); + + (*writer)[0] = 1.0; + ++writer; + (*writer)[0] = NaN; + ++writer; + (*writer)[0] = 3.0; + ++writer; + } + + eckit::MemoryHandle dh(buf.data(), static_cast(writeDH.position())); + dh.openForRead(); + odc::Reader oda(dh); + + odc::Reader::iterator it = oda.begin(); + EXPECT((*it)[0] == 1.0); + ++it; + EXPECT(std::isnan((*it)[0])); + ++it; + EXPECT((*it)[0] == 3.0); + ++it; + EXPECT(it == oda.end()); + } +} + + // ------------------------------------------------------------------------------------------------------ int main(int argc, char* argv[]) { diff --git a/tests/core/test_comparator.cc b/tests/core/test_comparator.cc new file mode 100644 index 00000000..4a8a043c --- /dev/null +++ b/tests/core/test_comparator.cc @@ -0,0 +1,33 @@ +/* + * (C) Copyright 1996-2012 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +#include + +#include "eckit/testing/Test.h" + +#include "odc/Comparator.h" + +using namespace eckit::testing; + +// ------------------------------------------------------------------------------------------------------ + +CASE("Comparator treats two NaNs as equal") { + const double NaN = std::numeric_limits::quiet_NaN(); + + EXPECT(odc::Comparator::same(NaN, NaN) == 1); + EXPECT(odc::Comparator::same(NaN, 1.0) == 0); + EXPECT(odc::Comparator::same(1.0, NaN) == 0); +} + +// ------------------------------------------------------------------------------------------------------ + +int main(int argc, char* argv[]) { + return run_tests(argc, argv); +} From 91a9fc62cac8d39add572abb5fac1d479f11f86b Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Thu, 23 Apr 2026 09:35:49 +0200 Subject: [PATCH 02/11] Add explicit handling of NaN comparisons --- src/odc/Comparator.cc | 8 +++----- src/odc/Comparator.h | 6 ++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/odc/Comparator.cc b/src/odc/Comparator.cc index 2f2641fe..1089a019 100644 --- a/src/odc/Comparator.cc +++ b/src/odc/Comparator.cc @@ -8,7 +8,6 @@ * does it submit to any jurisdiction. */ -#include "eckit/config/Resource.h" #include "eckit/exception/Exceptions.h" #include "eckit/filesystem/PathName.h" #include "eckit/log/Log.h" @@ -42,7 +41,7 @@ class ValuesDifferent : public Exception { namespace odc { Comparator::Comparator(bool skipTestingHaveMissing) : - skipTestingHaveMissing_(skipTestingHaveMissing), nRow_(0), NaN_isOK_(Resource("$odc_NAN_IS_OK", false)) {} + skipTestingHaveMissing_(skipTestingHaveMissing), nRow_(0) {} void Comparator::compare(const PathName& p1, const PathName& p2) { @@ -168,12 +167,11 @@ void Comparator::compare(int nCols, const double* data1, const double* data2, co case INTEGER: case BITFIELD: case DOUBLE: - if (!(same(*pdata1, *pdata2) || (NaN_isOK_ && (::isnan(*pdata1) && ::isnan(*pdata2))))) + if (!same(*pdata1, *pdata2)) raiseNotEqual(column, *pdata1, *pdata2); break; case REAL: - if (!(same(float(*pdata1), float(*pdata2)) || - (NaN_isOK_ && (::isnan(*pdata1) && ::isnan(*pdata2))))) + if (!same(float(*pdata1), float(*pdata2))) raiseNotEqual(column, *pdata1, *pdata2); break; case IGNORE: diff --git a/src/odc/Comparator.h b/src/odc/Comparator.h index 51ec3460..e4bde737 100644 --- a/src/odc/Comparator.h +++ b/src/odc/Comparator.h @@ -77,7 +77,10 @@ class Comparator { return relativeError; } - inline static int same(double A, double B) { return err(A, B) < maxRelativeError; } + inline static int same(double A, double B) { + if (std::isnan(A) || std::isnan(B)) return std::isnan(A) && std::isnan(B); + return err(A, B) < maxRelativeError; + } void raiseNotEqual(const core::Column&, double, double); @@ -85,7 +88,6 @@ class Comparator { bool skipTestingHaveMissing_; long nRow_; - bool NaN_isOK_; }; template From 25db7887c7c27f3585b2917f05d0ba491c3520d5 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Thu, 23 Apr 2026 09:37:38 +0200 Subject: [PATCH 03/11] Handle NaN gracefully during gather stats for REAL/DOUBLE --- src/odc/core/Codec.cc | 8 ++++++++ src/odc/core/Codec.h | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/src/odc/core/Codec.cc b/src/odc/core/Codec.cc index 303e5505..17e6258d 100644 --- a/src/odc/core/Codec.cc +++ b/src/odc/core/Codec.cc @@ -10,6 +10,8 @@ #include "odc/core/Codec.h" +#include + #include "eckit/exception/Exceptions.h" #include "odc/core/CodecFactory.h" @@ -27,6 +29,7 @@ Codec::Codec(const std::string& name, api::ColumnType type) : missingValue_(odc::MDI::realMDI()), min_(missingValue_), max_(missingValue_), + hasNaN_(false), type_(type) {} std::unique_ptr Codec::clone() { @@ -35,6 +38,7 @@ std::unique_ptr Codec::clone() { c->missingValue_ = missingValue_; c->min_ = min_; c->max_ = max_; + c->hasNaN_ = hasNaN_; return c; } @@ -98,6 +102,10 @@ void Codec::missingValue(double v) { } void Codec::gatherStats(const double& v) { + if (std::isnan(v)) { + hasNaN_ = true; + return; + } if (v == missingValue_) { hasMissing_ = 1; } diff --git a/src/odc/core/Codec.h b/src/odc/core/Codec.h index 108c7117..71c76383 100644 --- a/src/odc/core/Codec.h +++ b/src/odc/core/Codec.h @@ -62,6 +62,7 @@ class Codec { void resetStats() { min_ = max_ = missingValue_; hasMissing_ = false; + hasNaN_ = false; } virtual void gatherStats(const double& v); @@ -69,6 +70,9 @@ class Codec { void hasMissing(bool h) { hasMissing_ = h; } int32_t hasMissing() const { return hasMissing_; } + void hasNaN(bool s) { hasNaN_ = s; } + bool hasNaN() const { return hasNaN_; } + void min(double m) { min_ = m; } double min() const { return min_; } @@ -106,6 +110,7 @@ class Codec { double missingValue_; double min_; double max_; + bool hasNaN_; api::ColumnType type_; From 644a17f8fe739b6800cbf2ee72627c852f2bf23b Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Thu, 23 Apr 2026 09:37:48 +0200 Subject: [PATCH 04/11] Coerce NaN values to Missing value for INT/BITFIELD --- src/odc/codec/Integer.h | 13 ++++++++ src/odc/codec/IntegerMissing.h | 13 +++++--- tests/core/test_codecs_end_to_end.cc | 45 ++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 683f1721..3aea4bf7 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -11,6 +11,10 @@ #ifndef odc_core_codec_Integer_H #define odc_core_codec_Integer_H +#include + +#include "eckit/log/Log.h" + #include "odc/core/Codec.h" /// @note We have some strange behaviour in here. In particular, we support BOTH decoding @@ -65,6 +69,15 @@ class BaseCodecInteger : public core::DataStreamCodec { void gatherStats(const double& v) override { static_assert(sizeof(ValueType) == sizeof(v), "unsafe casting check"); + if (std::isnan(v)) { + if (!this->hasNaN_) { + eckit::Log::warning() << "odc: NaN value found in INTEGER/BITFIELD column (codec '" + << this->name() << "'); coerced to missing." << std::endl; + } + this->hasNaN_ = true; + this->hasMissing_ = 1; + return; + } const ValueType& val(reinterpret_cast(v)); core::Codec::gatherStats(val); } diff --git a/src/odc/codec/IntegerMissing.h b/src/odc/codec/IntegerMissing.h index b6312d13..3924dfce 100644 --- a/src/odc/codec/IntegerMissing.h +++ b/src/odc/codec/IntegerMissing.h @@ -38,14 +38,19 @@ class BaseCodecMissing : public BaseCodecInteger { unsigned char* encode(unsigned char* p, const double& d) override { static_assert(sizeof(ValueType) == sizeof(d), "unsafe casting check"); - const ValueType& val(reinterpret_cast(d)); InternalValueType s; - if (val == this->missingValue_) { + if (std::isnan(d)) { s = DerivedCodec::missingMarker; } else { - s = val - this->min_; - ASSERT(s != DerivedCodec::missingMarker); + const ValueType& val(reinterpret_cast(d)); + if (val == this->missingValue_) { + s = DerivedCodec::missingMarker; + } + else { + s = val - this->min_; + ASSERT(s != DerivedCodec::missingMarker); + } } ByteOrder::swap(s); ::memcpy(p, &s, sizeof(s)); diff --git a/tests/core/test_codecs_end_to_end.cc b/tests/core/test_codecs_end_to_end.cc index bdcc28b2..790a59e9 100644 --- a/tests/core/test_codecs_end_to_end.cc +++ b/tests/core/test_codecs_end_to_end.cc @@ -341,11 +341,11 @@ CASE("NaN before real values preserves correct min/max in column metadata") { } -CASE("NaN are encoded and decoded correctly") { +CASE("NaN round-trips as NaN in REAL/DOUBLE columns") { const double NaN = std::numeric_limits::quiet_NaN(); - for (auto type : {odc::api::DOUBLE, odc::api::REAL, odc::api::INTEGER, odc::api::BITFIELD}) { + for (auto type : {odc::api::DOUBLE, odc::api::REAL}) { eckit::Buffer buf(4096); eckit::MemoryHandle writeDH(buf); @@ -382,6 +382,47 @@ CASE("NaN are encoded and decoded correctly") { } +CASE("NaN is coerced to missing in INTEGER/BITFIELD columns") { + + const double NaN = std::numeric_limits::quiet_NaN(); + + for (auto type : {odc::api::INTEGER, odc::api::BITFIELD}) { + + eckit::Buffer buf(4096); + eckit::MemoryHandle writeDH(buf); + + { + odc::Writer<> oda(writeDH); + odc::Writer<>::iterator writer = oda.begin(); + + writer->setNumberOfColumns(1); + writer->setColumn(0, "col0", type); + writer->writeHeader(); + + (*writer)[0] = 1.0; + ++writer; + (*writer)[0] = NaN; + ++writer; + (*writer)[0] = 3.0; + ++writer; + } + + eckit::MemoryHandle dh(buf.data(), static_cast(writeDH.position())); + dh.openForRead(); + odc::Reader oda(dh); + + odc::Reader::iterator it = oda.begin(); + EXPECT((*it)[0] == 1.0); + ++it; + EXPECT((*it)[0] == odc::MDI::integerMDI()); + ++it; + EXPECT((*it)[0] == 3.0); + ++it; + EXPECT(it == oda.end()); + } +} + + // ------------------------------------------------------------------------------------------------------ int main(int argc, char* argv[]) { From 4296f89f2e529bb5546c335a9bc9d31021b23495 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Thu, 23 Apr 2026 09:42:28 +0200 Subject: [PATCH 05/11] Apply clang-format --- src/odc/Comparator.cc | 3 +-- src/odc/Comparator.h | 3 ++- src/odc/codec/Integer.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/odc/Comparator.cc b/src/odc/Comparator.cc index 1089a019..44a12610 100644 --- a/src/odc/Comparator.cc +++ b/src/odc/Comparator.cc @@ -40,8 +40,7 @@ class ValuesDifferent : public Exception { namespace odc { -Comparator::Comparator(bool skipTestingHaveMissing) : - skipTestingHaveMissing_(skipTestingHaveMissing), nRow_(0) {} +Comparator::Comparator(bool skipTestingHaveMissing) : skipTestingHaveMissing_(skipTestingHaveMissing), nRow_(0) {} void Comparator::compare(const PathName& p1, const PathName& p2) { diff --git a/src/odc/Comparator.h b/src/odc/Comparator.h index e4bde737..089284ef 100644 --- a/src/odc/Comparator.h +++ b/src/odc/Comparator.h @@ -78,7 +78,8 @@ class Comparator { } inline static int same(double A, double B) { - if (std::isnan(A) || std::isnan(B)) return std::isnan(A) && std::isnan(B); + if (std::isnan(A) || std::isnan(B)) + return std::isnan(A) && std::isnan(B); return err(A, B) < maxRelativeError; } diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 3aea4bf7..53cc708e 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -71,8 +71,8 @@ class BaseCodecInteger : public core::DataStreamCodec { static_assert(sizeof(ValueType) == sizeof(v), "unsafe casting check"); if (std::isnan(v)) { if (!this->hasNaN_) { - eckit::Log::warning() << "odc: NaN value found in INTEGER/BITFIELD column (codec '" - << this->name() << "'); coerced to missing." << std::endl; + eckit::Log::warning() << "odc: NaN value found in INTEGER/BITFIELD column (codec '" << this->name() + << "'); coerced to missing." << std::endl; } this->hasNaN_ = true; this->hasMissing_ = 1; From 104bace0213981f3c8bc185a5339d628a6975dc9 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Thu, 23 Apr 2026 10:42:25 +0200 Subject: [PATCH 06/11] Revert NaN coercion to Missing value --- src/odc/codec/Integer.h | 13 ------------- src/odc/codec/IntegerMissing.h | 13 ++++--------- tests/core/test_codecs_end_to_end.cc | 3 ++- 3 files changed, 6 insertions(+), 23 deletions(-) diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 53cc708e..683f1721 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -11,10 +11,6 @@ #ifndef odc_core_codec_Integer_H #define odc_core_codec_Integer_H -#include - -#include "eckit/log/Log.h" - #include "odc/core/Codec.h" /// @note We have some strange behaviour in here. In particular, we support BOTH decoding @@ -69,15 +65,6 @@ class BaseCodecInteger : public core::DataStreamCodec { void gatherStats(const double& v) override { static_assert(sizeof(ValueType) == sizeof(v), "unsafe casting check"); - if (std::isnan(v)) { - if (!this->hasNaN_) { - eckit::Log::warning() << "odc: NaN value found in INTEGER/BITFIELD column (codec '" << this->name() - << "'); coerced to missing." << std::endl; - } - this->hasNaN_ = true; - this->hasMissing_ = 1; - return; - } const ValueType& val(reinterpret_cast(v)); core::Codec::gatherStats(val); } diff --git a/src/odc/codec/IntegerMissing.h b/src/odc/codec/IntegerMissing.h index 3924dfce..b6312d13 100644 --- a/src/odc/codec/IntegerMissing.h +++ b/src/odc/codec/IntegerMissing.h @@ -38,19 +38,14 @@ class BaseCodecMissing : public BaseCodecInteger { unsigned char* encode(unsigned char* p, const double& d) override { static_assert(sizeof(ValueType) == sizeof(d), "unsafe casting check"); + const ValueType& val(reinterpret_cast(d)); InternalValueType s; - if (std::isnan(d)) { + if (val == this->missingValue_) { s = DerivedCodec::missingMarker; } else { - const ValueType& val(reinterpret_cast(d)); - if (val == this->missingValue_) { - s = DerivedCodec::missingMarker; - } - else { - s = val - this->min_; - ASSERT(s != DerivedCodec::missingMarker); - } + s = val - this->min_; + ASSERT(s != DerivedCodec::missingMarker); } ByteOrder::swap(s); ::memcpy(p, &s, sizeof(s)); diff --git a/tests/core/test_codecs_end_to_end.cc b/tests/core/test_codecs_end_to_end.cc index 790a59e9..3dfc1234 100644 --- a/tests/core/test_codecs_end_to_end.cc +++ b/tests/core/test_codecs_end_to_end.cc @@ -381,7 +381,7 @@ CASE("NaN round-trips as NaN in REAL/DOUBLE columns") { } } - +#if 0 CASE("NaN is coerced to missing in INTEGER/BITFIELD columns") { const double NaN = std::numeric_limits::quiet_NaN(); @@ -421,6 +421,7 @@ CASE("NaN is coerced to missing in INTEGER/BITFIELD columns") { EXPECT(it == oda.end()); } } +#endif // ------------------------------------------------------------------------------------------------------ From 6015bc5d9c045c13a2a1afd34276fe512343de46 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Fri, 24 Apr 2026 14:25:54 +0200 Subject: [PATCH 07/11] Coerce NaN to Missing for INT/BITFIELD when integersAsDoubles --- src/odc/codec/Integer.h | 17 +++++++++++++ src/odc/codec/IntegerMissing.h | 16 +++++++++--- tests/core/test_codecs_end_to_end.cc | 38 ++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 683f1721..3bad7754 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -11,6 +11,11 @@ #ifndef odc_core_codec_Integer_H #define odc_core_codec_Integer_H +#include + +#include "eckit/log/Log.h" + +#include "odc/ODBAPISettings.h" #include "odc/core/Codec.h" /// @note We have some strange behaviour in here. In particular, we support BOTH decoding @@ -65,6 +70,18 @@ class BaseCodecInteger : public core::DataStreamCodec { void gatherStats(const double& v) override { static_assert(sizeof(ValueType) == sizeof(v), "unsafe casting check"); + // n.b. NaN has no meaningful integer bit pattern. Coerce to missing and flag + // hasMissing_ so the optimiser picks an appropriate codec. Gated on + // integersAsDoubles: int64 values (e.g. -1) overlap the NaN bit pattern. + if (ODBAPISettings::instance().integersAsDoubles() && std::isnan(v)) { + if (!this->hasNaN_) { + eckit::Log::warning() << "odc: NaN value in INTEGER/BITFIELD column (codec '" << this->name() + << "'); coerced to missing." << std::endl; + } + this->hasNaN_ = true; + this->hasMissing_ = 1; + return; + } const ValueType& val(reinterpret_cast(v)); core::Codec::gatherStats(val); } diff --git a/src/odc/codec/IntegerMissing.h b/src/odc/codec/IntegerMissing.h index b6312d13..f11a3985 100644 --- a/src/odc/codec/IntegerMissing.h +++ b/src/odc/codec/IntegerMissing.h @@ -11,6 +11,9 @@ #ifndef odc_core_codec_IntegerMissing_H #define odc_core_codec_IntegerMissing_H +#include + +#include "odc/ODBAPISettings.h" #include "odc/codec/Integer.h" namespace odc { @@ -38,14 +41,19 @@ class BaseCodecMissing : public BaseCodecInteger { unsigned char* encode(unsigned char* p, const double& d) override { static_assert(sizeof(ValueType) == sizeof(d), "unsafe casting check"); - const ValueType& val(reinterpret_cast(d)); InternalValueType s; - if (val == this->missingValue_) { + if (ODBAPISettings::instance().integersAsDoubles() && std::isnan(d)) { s = DerivedCodec::missingMarker; } else { - s = val - this->min_; - ASSERT(s != DerivedCodec::missingMarker); + const ValueType& val(reinterpret_cast(d)); + if (val == this->missingValue_) { + s = DerivedCodec::missingMarker; + } + else { + s = val - this->min_; + ASSERT(s != DerivedCodec::missingMarker); + } } ByteOrder::swap(s); ::memcpy(p, &s, sizeof(s)); diff --git a/tests/core/test_codecs_end_to_end.cc b/tests/core/test_codecs_end_to_end.cc index 3dfc1234..fc47dfe1 100644 --- a/tests/core/test_codecs_end_to_end.cc +++ b/tests/core/test_codecs_end_to_end.cc @@ -16,9 +16,11 @@ #include "eckit/io/MemoryHandle.h" #include "eckit/testing/Test.h" +#include "odc/MDI.h" #include "odc/Reader.h" #include "odc/Writer.h" #include "odc/api/ColumnType.h" +#include "odc/api/Odb.h" #include "odc/codec/Integer.h" #include "odc/codec/String.h" #include "odc/core/MetaData.h" @@ -381,7 +383,6 @@ CASE("NaN round-trips as NaN in REAL/DOUBLE columns") { } } -#if 0 CASE("NaN is coerced to missing in INTEGER/BITFIELD columns") { const double NaN = std::numeric_limits::quiet_NaN(); @@ -421,7 +422,40 @@ CASE("NaN is coerced to missing in INTEGER/BITFIELD columns") { EXPECT(it == oda.end()); } } -#endif + + +CASE("api::encode coerces NaN to Missing for INTEGER/BITFIELD columns") { + + const double NaN = std::numeric_limits::quiet_NaN(); + std::vector values{1.0, NaN, 3.0}; + + for (auto type : {odc::api::INTEGER, odc::api::BITFIELD}) { + + eckit::Buffer buf(4096); + eckit::MemoryHandle writeDH(buf); + writeDH.openForWrite(0); + + { + std::vector columns{{"col0", type, sizeof(double), {}}}; + std::vector strides{ + {values.data(), values.size(), sizeof(double), sizeof(double)}}; + odc::api::encode(writeDH, columns, strides); + } + + eckit::MemoryHandle dh(buf.data(), static_cast(writeDH.position())); + dh.openForRead(); + odc::Reader oda(dh); + + odc::Reader::iterator it = oda.begin(); + EXPECT((*it)[0] == 1.0); + ++it; + EXPECT((*it)[0] == odc::MDI::integerMDI()); + ++it; + EXPECT((*it)[0] == 3.0); + ++it; + EXPECT(it == oda.end()); + } +} // ------------------------------------------------------------------------------------------------------ From 879da35d109cf9bd261ae703154be33bff039dc5 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Thu, 30 Apr 2026 17:27:28 +0200 Subject: [PATCH 08/11] Swap bitfield/int NaN coercion logic for UserError; address pr comments --- src/odc/Comparator.h | 6 +-- src/odc/codec/Integer.h | 22 ++++------ src/odc/codec/IntegerMissing.h | 16 ++----- src/odc/core/Codec.cc | 6 +-- src/odc/core/Codec.h | 5 --- tests/core/test_codecs_end_to_end.cc | 62 +++++++--------------------- tests/core/test_comparator.cc | 18 ++++++-- 7 files changed, 46 insertions(+), 89 deletions(-) diff --git a/src/odc/Comparator.h b/src/odc/Comparator.h index 089284ef..950491f0 100644 --- a/src/odc/Comparator.h +++ b/src/odc/Comparator.h @@ -77,10 +77,8 @@ class Comparator { return relativeError; } - inline static int same(double A, double B) { - if (std::isnan(A) || std::isnan(B)) - return std::isnan(A) && std::isnan(B); - return err(A, B) < maxRelativeError; + inline static bool same(double A, double B) { + return err(A, B) < maxRelativeError || (std::isnan(A) && std::isnan(B)); } void raiseNotEqual(const core::Column&, double, double); diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 3bad7754..623a5c3d 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -13,7 +13,7 @@ #include -#include "eckit/log/Log.h" +#include "eckit/exception/Exceptions.h" #include "odc/ODBAPISettings.h" #include "odc/core/Codec.h" @@ -41,7 +41,9 @@ class BaseCodecInteger : public core::DataStreamCodec { public: // methods BaseCodecInteger(api::ColumnType type, const std::string& name, double minmaxmissing = odc::MDI::integerMDI()) : - core::DataStreamCodec(name, type), castedMissingValue_(static_cast(minmaxmissing)) { + core::DataStreamCodec(name, type), + castedMissingValue_(static_cast(minmaxmissing)), + rejectNaN_(ODBAPISettings::instance().integersAsDoubles()) { this->min_ = minmaxmissing; this->max_ = minmaxmissing; @@ -70,17 +72,9 @@ class BaseCodecInteger : public core::DataStreamCodec { void gatherStats(const double& v) override { static_assert(sizeof(ValueType) == sizeof(v), "unsafe casting check"); - // n.b. NaN has no meaningful integer bit pattern. Coerce to missing and flag - // hasMissing_ so the optimiser picks an appropriate codec. Gated on - // integersAsDoubles: int64 values (e.g. -1) overlap the NaN bit pattern. - if (ODBAPISettings::instance().integersAsDoubles() && std::isnan(v)) { - if (!this->hasNaN_) { - eckit::Log::warning() << "odc: NaN value in INTEGER/BITFIELD column (codec '" << this->name() - << "'); coerced to missing." << std::endl; - } - this->hasNaN_ = true; - this->hasMissing_ = 1; - return; + if (rejectNaN_ && std::isnan(v)) { + throw eckit::UserError( + "NaN is not a valid value in INTEGER/BITFIELD column '" + this->name() + "'"); } const ValueType& val(reinterpret_cast(v)); core::Codec::gatherStats(val); @@ -92,6 +86,8 @@ class BaseCodecInteger : public core::DataStreamCodec { /// directly where needed is to work around a Cray 8.7 compiler bug, where /// where the punned version gets optimised out ValueType castedMissingValue_; + + const bool rejectNaN_; }; diff --git a/src/odc/codec/IntegerMissing.h b/src/odc/codec/IntegerMissing.h index f11a3985..b6312d13 100644 --- a/src/odc/codec/IntegerMissing.h +++ b/src/odc/codec/IntegerMissing.h @@ -11,9 +11,6 @@ #ifndef odc_core_codec_IntegerMissing_H #define odc_core_codec_IntegerMissing_H -#include - -#include "odc/ODBAPISettings.h" #include "odc/codec/Integer.h" namespace odc { @@ -41,19 +38,14 @@ class BaseCodecMissing : public BaseCodecInteger { unsigned char* encode(unsigned char* p, const double& d) override { static_assert(sizeof(ValueType) == sizeof(d), "unsafe casting check"); + const ValueType& val(reinterpret_cast(d)); InternalValueType s; - if (ODBAPISettings::instance().integersAsDoubles() && std::isnan(d)) { + if (val == this->missingValue_) { s = DerivedCodec::missingMarker; } else { - const ValueType& val(reinterpret_cast(d)); - if (val == this->missingValue_) { - s = DerivedCodec::missingMarker; - } - else { - s = val - this->min_; - ASSERT(s != DerivedCodec::missingMarker); - } + s = val - this->min_; + ASSERT(s != DerivedCodec::missingMarker); } ByteOrder::swap(s); ::memcpy(p, &s, sizeof(s)); diff --git a/src/odc/core/Codec.cc b/src/odc/core/Codec.cc index 17e6258d..12c969bb 100644 --- a/src/odc/core/Codec.cc +++ b/src/odc/core/Codec.cc @@ -29,7 +29,6 @@ Codec::Codec(const std::string& name, api::ColumnType type) : missingValue_(odc::MDI::realMDI()), min_(missingValue_), max_(missingValue_), - hasNaN_(false), type_(type) {} std::unique_ptr Codec::clone() { @@ -38,7 +37,6 @@ std::unique_ptr Codec::clone() { c->missingValue_ = missingValue_; c->min_ = min_; c->max_ = max_; - c->hasNaN_ = hasNaN_; return c; } @@ -102,10 +100,8 @@ void Codec::missingValue(double v) { } void Codec::gatherStats(const double& v) { - if (std::isnan(v)) { - hasNaN_ = true; + if (std::isnan(v)) return; - } if (v == missingValue_) { hasMissing_ = 1; } diff --git a/src/odc/core/Codec.h b/src/odc/core/Codec.h index 71c76383..108c7117 100644 --- a/src/odc/core/Codec.h +++ b/src/odc/core/Codec.h @@ -62,7 +62,6 @@ class Codec { void resetStats() { min_ = max_ = missingValue_; hasMissing_ = false; - hasNaN_ = false; } virtual void gatherStats(const double& v); @@ -70,9 +69,6 @@ class Codec { void hasMissing(bool h) { hasMissing_ = h; } int32_t hasMissing() const { return hasMissing_; } - void hasNaN(bool s) { hasNaN_ = s; } - bool hasNaN() const { return hasNaN_; } - void min(double m) { min_ = m; } double min() const { return min_; } @@ -110,7 +106,6 @@ class Codec { double missingValue_; double min_; double max_; - bool hasNaN_; api::ColumnType type_; diff --git a/tests/core/test_codecs_end_to_end.cc b/tests/core/test_codecs_end_to_end.cc index fc47dfe1..11f8dce9 100644 --- a/tests/core/test_codecs_end_to_end.cc +++ b/tests/core/test_codecs_end_to_end.cc @@ -383,7 +383,7 @@ CASE("NaN round-trips as NaN in REAL/DOUBLE columns") { } } -CASE("NaN is coerced to missing in INTEGER/BITFIELD columns") { +CASE("NaN is rejected in INTEGER/BITFIELD columns") { const double NaN = std::numeric_limits::quiet_NaN(); @@ -392,39 +392,22 @@ CASE("NaN is coerced to missing in INTEGER/BITFIELD columns") { eckit::Buffer buf(4096); eckit::MemoryHandle writeDH(buf); - { - odc::Writer<> oda(writeDH); - odc::Writer<>::iterator writer = oda.begin(); - - writer->setNumberOfColumns(1); - writer->setColumn(0, "col0", type); - writer->writeHeader(); - - (*writer)[0] = 1.0; - ++writer; - (*writer)[0] = NaN; - ++writer; - (*writer)[0] = 3.0; - ++writer; - } + odc::Writer<> oda(writeDH); + odc::Writer<>::iterator writer = oda.begin(); - eckit::MemoryHandle dh(buf.data(), static_cast(writeDH.position())); - dh.openForRead(); - odc::Reader oda(dh); + writer->setNumberOfColumns(1); + writer->setColumn(0, "col0", type); + writer->writeHeader(); - odc::Reader::iterator it = oda.begin(); - EXPECT((*it)[0] == 1.0); - ++it; - EXPECT((*it)[0] == odc::MDI::integerMDI()); - ++it; - EXPECT((*it)[0] == 3.0); - ++it; - EXPECT(it == oda.end()); + (*writer)[0] = 1.0; + ++writer; + (*writer)[0] = NaN; + EXPECT_THROWS_AS(++writer, eckit::UserError); } } -CASE("api::encode coerces NaN to Missing for INTEGER/BITFIELD columns") { +CASE("api::encode rejects NaN in INTEGER/BITFIELD columns") { const double NaN = std::numeric_limits::quiet_NaN(); std::vector values{1.0, NaN, 3.0}; @@ -435,25 +418,10 @@ CASE("api::encode coerces NaN to Missing for INTEGER/BITFIELD columns") { eckit::MemoryHandle writeDH(buf); writeDH.openForWrite(0); - { - std::vector columns{{"col0", type, sizeof(double), {}}}; - std::vector strides{ - {values.data(), values.size(), sizeof(double), sizeof(double)}}; - odc::api::encode(writeDH, columns, strides); - } - - eckit::MemoryHandle dh(buf.data(), static_cast(writeDH.position())); - dh.openForRead(); - odc::Reader oda(dh); - - odc::Reader::iterator it = oda.begin(); - EXPECT((*it)[0] == 1.0); - ++it; - EXPECT((*it)[0] == odc::MDI::integerMDI()); - ++it; - EXPECT((*it)[0] == 3.0); - ++it; - EXPECT(it == oda.end()); + std::vector columns{{"col0", type, sizeof(double), {}}}; + std::vector strides{ + {values.data(), values.size(), sizeof(double), sizeof(double)}}; + EXPECT_THROWS_AS(odc::api::encode(writeDH, columns, strides), eckit::UserError); } } diff --git a/tests/core/test_comparator.cc b/tests/core/test_comparator.cc index 4a8a043c..c59423d9 100644 --- a/tests/core/test_comparator.cc +++ b/tests/core/test_comparator.cc @@ -21,9 +21,21 @@ using namespace eckit::testing; CASE("Comparator treats two NaNs as equal") { const double NaN = std::numeric_limits::quiet_NaN(); - EXPECT(odc::Comparator::same(NaN, NaN) == 1); - EXPECT(odc::Comparator::same(NaN, 1.0) == 0); - EXPECT(odc::Comparator::same(1.0, NaN) == 0); + EXPECT(odc::Comparator::same(NaN, NaN)); + EXPECT(!odc::Comparator::same(NaN, 1.0)); + EXPECT(!odc::Comparator::same(1.0, NaN)); +} + +CASE("Comparator treats identical values as equal") { + EXPECT(odc::Comparator::same(0.0, 0.0)); + EXPECT(odc::Comparator::same(1.0, 1.0)); + EXPECT(odc::Comparator::same(-1.0, -1.0)); +} + +CASE("Comparator treats different values as unequal") { + EXPECT(!odc::Comparator::same(0.0, 1.0)); + EXPECT(!odc::Comparator::same(1.0, 2.0)); + EXPECT(!odc::Comparator::same(1.0, -1.0)); } // ------------------------------------------------------------------------------------------------------ From 33a0742f38a46b779603d23990e328f1ea5f94ee Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Mon, 4 May 2026 10:54:32 +0200 Subject: [PATCH 09/11] Swap EXPECT for EXPECT_EQUAL --- tests/core/test_codecs_end_to_end.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/core/test_codecs_end_to_end.cc b/tests/core/test_codecs_end_to_end.cc index 11f8dce9..8435ce07 100644 --- a/tests/core/test_codecs_end_to_end.cc +++ b/tests/core/test_codecs_end_to_end.cc @@ -338,8 +338,8 @@ CASE("NaN before real values preserves correct min/max in column metadata") { odc::Reader oda(dh); odc::Reader::iterator it = oda.begin(); - EXPECT(it->columns()[0]->min() == 1.1); - EXPECT(it->columns()[0]->max() == 3.3); + EXPECT_EQUAL(it->columns()[0]->min(), 1.1); + EXPECT_EQUAL(it->columns()[0]->max(), 3.3); } @@ -373,13 +373,13 @@ CASE("NaN round-trips as NaN in REAL/DOUBLE columns") { odc::Reader oda(dh); odc::Reader::iterator it = oda.begin(); - EXPECT((*it)[0] == 1.0); + EXPECT_EQUAL((*it)[0], 1.0); ++it; EXPECT(std::isnan((*it)[0])); ++it; - EXPECT((*it)[0] == 3.0); + EXPECT_EQUAL((*it)[0], 3.0); ++it; - EXPECT(it == oda.end()); + EXPECT_EQUAL(it, oda.end()); } } From 0697ddcb4bc32cbaa22ab97bde2e63e9b3cca750 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Mon, 4 May 2026 10:54:42 +0200 Subject: [PATCH 10/11] rename cached settings --- src/odc/codec/Integer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 623a5c3d..22470c1c 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -43,7 +43,7 @@ class BaseCodecInteger : public core::DataStreamCodec { BaseCodecInteger(api::ColumnType type, const std::string& name, double minmaxmissing = odc::MDI::integerMDI()) : core::DataStreamCodec(name, type), castedMissingValue_(static_cast(minmaxmissing)), - rejectNaN_(ODBAPISettings::instance().integersAsDoubles()) { + integersAsDoubles_(ODBAPISettings::instance().integersAsDoubles()) { this->min_ = minmaxmissing; this->max_ = minmaxmissing; @@ -72,7 +72,7 @@ class BaseCodecInteger : public core::DataStreamCodec { void gatherStats(const double& v) override { static_assert(sizeof(ValueType) == sizeof(v), "unsafe casting check"); - if (rejectNaN_ && std::isnan(v)) { + if (integersAsDoubles_ && std::isnan(v)) { throw eckit::UserError( "NaN is not a valid value in INTEGER/BITFIELD column '" + this->name() + "'"); } @@ -87,7 +87,7 @@ class BaseCodecInteger : public core::DataStreamCodec { /// where the punned version gets optimised out ValueType castedMissingValue_; - const bool rejectNaN_; + const bool integersAsDoubles_; }; From 7fbc235a8418fa8b4d6697a3122fd7cb5a29da73 Mon Sep 17 00:00:00 2001 From: Joshua Harwood Date: Mon, 4 May 2026 11:25:45 +0200 Subject: [PATCH 11/11] format --- src/odc/codec/Integer.h | 3 +-- tests/core/test_codecs_end_to_end.cc | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 22470c1c..f2226d53 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -73,8 +73,7 @@ class BaseCodecInteger : public core::DataStreamCodec { void gatherStats(const double& v) override { static_assert(sizeof(ValueType) == sizeof(v), "unsafe casting check"); if (integersAsDoubles_ && std::isnan(v)) { - throw eckit::UserError( - "NaN is not a valid value in INTEGER/BITFIELD column '" + this->name() + "'"); + throw eckit::UserError("NaN is not a valid value in INTEGER/BITFIELD column '" + this->name() + "'"); } const ValueType& val(reinterpret_cast(v)); core::Codec::gatherStats(val); diff --git a/tests/core/test_codecs_end_to_end.cc b/tests/core/test_codecs_end_to_end.cc index 8435ce07..b1c48bdd 100644 --- a/tests/core/test_codecs_end_to_end.cc +++ b/tests/core/test_codecs_end_to_end.cc @@ -419,8 +419,7 @@ CASE("api::encode rejects NaN in INTEGER/BITFIELD columns") { writeDH.openForWrite(0); std::vector columns{{"col0", type, sizeof(double), {}}}; - std::vector strides{ - {values.data(), values.size(), sizeof(double), sizeof(double)}}; + std::vector strides{{values.data(), values.size(), sizeof(double), sizeof(double)}}; EXPECT_THROWS_AS(odc::api::encode(writeDH, columns, strides), eckit::UserError); } }