diff --git a/ManiVault/src/plugins/PointData/CMakeLists.txt b/ManiVault/src/plugins/PointData/CMakeLists.txt index e3f86f6ff..e5e54bf64 100644 --- a/ManiVault/src/plugins/PointData/CMakeLists.txt +++ b/ManiVault/src/plugins/PointData/CMakeLists.txt @@ -30,6 +30,8 @@ set(POINTS_SOURCES src/PointDataRange.h src/PointView.h src/RandomAccessRange.h + src/SparseMatrix.cpp + src/SparseMatrix.h ) set(POINTS_HEADERS @@ -53,6 +55,8 @@ set(POINTS_HEADERS src/DimensionsPickerProxyModel.h src/ModelResetter.h src/CreateSetFromSelectionAction.h + src/SparseMatrix.cpp + src/SparseMatrix.h ) # Note: This nesting of folders is intentional to allow uniform header inclusion in all projects diff --git a/ManiVault/src/plugins/PointData/src/PointData.cpp b/ManiVault/src/plugins/PointData/src/PointData.cpp index 0518529f1..85929558d 100644 --- a/ManiVault/src/plugins/PointData/src/PointData.cpp +++ b/ManiVault/src/plugins/PointData/src/PointData.cpp @@ -49,7 +49,10 @@ mv::Dataset PointData::createDataSet(const QString& guid /*= ""*/) std::uint32_t PointData::getNumPoints() const { - return static_cast(getSizeOfVector() / _numDimensions); + if (_isDense) + return static_cast(getSizeOfVector() / _numDimensions); + else + return _numRows; } std::uint32_t PointData::getNumDimensions() const @@ -65,8 +68,15 @@ std::uint64_t PointData::getNumberOfElements() const std::uint64_t PointData::getRawDataSize() const { - std::uint64_t elementSize = std::visit([](auto& vec) { return vec.empty() ? 0u : sizeof(vec[0]); }, _variantOfVectors); - return elementSize * getNumberOfElements(); + if (_isDense) + { + std::uint64_t elementSize = std::visit([](auto& vec) { return vec.empty() ? 0u : sizeof(vec[0]); }, _variantOfVectors); + return elementSize * getNumberOfElements(); + } + else + { + return (_numRows + 4) * sizeof(size_t) + _sparseData.getNumNonZeros() * 2 * sizeof(uint16_t); + } } void* PointData::getDataVoidPtr() @@ -101,6 +111,18 @@ void PointData::setDimensionNames(const std::vector& dimNames) qWarning() << "PointData: Number of dimension names does not equal the number of data dimensions"; } +std::vector PointData::row(size_t rowIndex) const +{ + if (!_isDense) + { + return _sparseData.getDenseRow(rowIndex); + } + else + { + qWarning() << ".row() not implemented for dense data"; + } +} + float PointData::getValueAt(const std::size_t index) const { return std::visit([index](const auto& vec) @@ -125,37 +147,97 @@ void PointData::fromVariantMap(const QVariantMap& variantMap) variantMapMustContain(variantMap, "Data"); variantMapMustContain(variantMap, "NumberOfPoints"); variantMapMustContain(variantMap, "NumberOfDimensions"); + variantMapMustContain(variantMap, "Dense"); const auto data = variantMap["Data"].toMap(); const auto numberOfPoints = static_cast(variantMap["NumberOfPoints"].toInt()); - const auto numberOfDimensions =variantMap["NumberOfDimensions"].toUInt(); + const auto numberOfDimensions = variantMap["NumberOfDimensions"].toUInt(); const auto numberOfElements = numberOfPoints * numberOfDimensions; const auto elementTypeIndex = static_cast(data["TypeIndex"].toInt()); const auto rawData = data["Raw"].toMap(); + const bool isDense = variantMap["Dense"].toBool(); + _isDense = isDense; - setElementTypeSpecifier(elementTypeIndex); - resizeVector(numberOfElements); - populateDataBufferFromVariantMap(rawData, (char*)getDataVoidPtr()); - _numDimensions = numberOfDimensions; - + if (_isDense) + { + setElementTypeSpecifier(elementTypeIndex); + resizeVector(numberOfElements); + populateDataBufferFromVariantMap(rawData, (char*)getDataVoidPtr()); + _numDimensions = numberOfDimensions; + } + else + { + variantMapMustContain(variantMap, "NumberOfNonZeroElements"); + + const auto numberOfNonZeroElements = variantMap["NumberOfNonZeroElements"].toULongLong(); + + std::vector bytes((numberOfPoints + 1) * sizeof(size_t) + numberOfNonZeroElements * sizeof(uint16_t) * 2); + + populateDataBufferFromVariantMap(rawData, bytes.data()); + _numRows = numberOfPoints; // FIXME should be redundant + _numDimensions = numberOfDimensions; + + size_t offset = 0; + std::vector rowPointers(numberOfPoints + 1); + std::memcpy(rowPointers.data(), bytes.data() + offset, rowPointers.size() * sizeof(size_t)); + + offset += rowPointers.size() * sizeof(size_t); + std::vector colIndices(numberOfNonZeroElements); + std::memcpy(colIndices.data(), bytes.data() + offset, colIndices.size() * sizeof(uint16_t)); + + offset += colIndices.size() * sizeof(uint16_t); + std::vector values(numberOfNonZeroElements); + std::memcpy(values.data(), bytes.data() + offset, values.size() * sizeof(uint16_t)); + + _sparseData.setData(numberOfPoints, numberOfDimensions, rowPointers, colIndices, values); + + qDebug() << "Loaded sparse data with" << _numRows << "points and" << _numDimensions << "dimensions."; + } } QVariantMap PointData::toVariantMap() const { - const auto typeSpecifier = getElementTypeSpecifier(); - const auto typeSpecifierName = getElementTypeNames()[static_cast(typeSpecifier)]; - const auto typeIndex = static_cast(typeSpecifier); - const auto numberOfElements = getNumberOfElements(); - - QVariantMap rawData = rawDataToVariantMap((const char*)getDataConstVoidPtr(), getRawDataSize(), true); - - return { - { "TypeIndex", QVariant::fromValue(typeIndex) }, - { "TypeName", QVariant(typeSpecifierName) }, - { "Raw", QVariant::fromValue(rawData) }, - { "NumberOfElements", QVariant::fromValue(numberOfElements) } - }; + if (_isDense) + { + const auto typeSpecifier = getElementTypeSpecifier(); + const auto typeSpecifierName = getElementTypeNames()[static_cast(typeSpecifier)]; + const auto typeIndex = static_cast(typeSpecifier); + const auto numberOfElements = getNumberOfElements(); + + QVariantMap rawData = rawDataToVariantMap((const char*)getDataConstVoidPtr(), getRawDataSize(), true); + + return { + { "TypeIndex", QVariant::fromValue(typeIndex) }, + { "TypeName", QVariant(typeSpecifierName) }, + { "Raw", QVariant::fromValue(rawData) }, + { "NumberOfElements", QVariant::fromValue(numberOfElements) } + }; + } + else + { + const auto numberOfElements = getNumberOfElements(); + + std::vector bytes; + + const std::vector& indexPointers = _sparseData.getIndexPointers(); + const std::vector& colIndices = _sparseData.getColIndices(); + const std::vector& values = _sparseData.getValues(); + + const char* indexPointersBytes = (const char*) (indexPointers.data()); + const char* colIndicesBytes = (const char*) (colIndices.data()); + const char* valuesBytes = (const char*) (values.data()); + + bytes.insert(bytes.end(), indexPointersBytes, indexPointersBytes + indexPointers.size() * sizeof(size_t)); + bytes.insert(bytes.end(), colIndicesBytes, colIndicesBytes + colIndices.size() * sizeof(uint16_t)); + bytes.insert(bytes.end(), valuesBytes, valuesBytes + values.size() * sizeof(uint16_t)); + + QVariantMap rawData = rawDataToVariantMap(bytes.data(), bytes.size(), true); + + return { + { "Raw", QVariant::fromValue(rawData) } + }; + } } void PointData::extractFullDataForDimension(std::vector& result, const int dimensionIndex) const @@ -180,23 +262,38 @@ void PointData::extractFullDataForDimension(std::vector& result, const in void PointData::extractFullDataForDimensions(std::vector& result, const int dimensionIndex1, const int dimensionIndex2) const { - CheckDimensionIndex(dimensionIndex1); - CheckDimensionIndex(dimensionIndex2); - - result.resize(getNumPoints()); + if (_isDense) + { + CheckDimensionIndex(dimensionIndex1); + CheckDimensionIndex(dimensionIndex2); - std::visit( - [&result, this, dimensionIndex1, dimensionIndex2](const auto& vec) - { - const auto resultSize = result.size(); + result.resize(getNumPoints()); - for (std::size_t i{}; i < resultSize; ++i) + std::visit( + [&result, this, dimensionIndex1, dimensionIndex2](const auto& vec) { - const auto n = i * _numDimensions; - result[i].set(vec[n + dimensionIndex1], vec[n + dimensionIndex2]); - } - }, - _variantOfVectors); + const auto resultSize = result.size(); + + for (std::size_t i{}; i < resultSize; ++i) + { + const auto n = i * _numDimensions; + result[i].set(vec[n + dimensionIndex1], vec[n + dimensionIndex2]); + } + }, + _variantOfVectors); + } + else + { + result.resize(getNumPoints()); + + std::vector col1 = _sparseData.getDenseCol(dimensionIndex1); + std::vector col2 = _sparseData.getDenseCol(dimensionIndex2); + + for (size_t i = 0; i < result.size(); i++) + { + result[i].set(col1[i], col2[i]); + } + } } @@ -723,6 +820,11 @@ void Points::setDimensionNames(const std::vector& dimNames) mv::events().notifyDatasetDataDimensionsChanged(this); } +std::vector Points::row(size_t rowIndex) const +{ + return getRawData()->row(rowIndex); +} + float Points::getValueAt(const std::size_t index) const { return getRawData()->getValueAt(index); @@ -1056,6 +1158,11 @@ QVariantMap Points::toVariantMap() const variantMap["DimensionNames"] = (dimensionNames.size() > 1000) ? rawDataToVariantMap((char*)dimensionsByteArray.data(), dimensionsByteArray.size(), true) : QVariant::fromValue(dimensionNames); variantMap["NumberOfDimensions"] = getNumDimensions(); variantMap["Dimensions"] = _dimensionsPickerAction->toVariantMap(); + variantMap["Dense"] = getRawData()->isDense(); + if (!getRawData()->isDense()) + { + variantMap["NumberOfNonZeroElements"] = QVariant::fromValue(getRawData()->getNumNonZeroElements()); + } return variantMap; } diff --git a/ManiVault/src/plugins/PointData/src/PointData.h b/ManiVault/src/plugins/PointData/src/PointData.h index 1812993b7..7c1f39135 100644 --- a/ManiVault/src/plugins/PointData/src/PointData.h +++ b/ManiVault/src/plugins/PointData/src/PointData.h @@ -11,6 +11,7 @@ #include "LinkedData.h" #include "PointDataRange.h" #include "Set.h" +#include "SparseMatrix.h" #include "event/EventListener.h" @@ -216,6 +217,8 @@ class POINTDATA_EXPORT PointData : public mv::plugin::RawData std::uint64_t getNumberOfElements() const; + SparseMatrix& getSparseData() { return _sparseData; } + /** * Get amount of data occupied by the raw data * @return Size of the raw data in bytes @@ -269,7 +272,7 @@ class POINTDATA_EXPORT PointData : public mv::plugin::RawData void extractFullDataForDimension(std::vector& result, const int dimensionIndex) const; void extractFullDataForDimensions(std::vector& result, const int dimensionIndex1, const int dimensionIndex2) const; void extractDataForDimensions(std::vector& result, const int dimensionIndex1, const int dimensionIndex2, const std::vector& indices) const; - + template void populateFullDataForDimensions(ResultContainer& resultContainer, const DimensionIndices& dimensionIndices) const { @@ -399,8 +402,38 @@ class POINTDATA_EXPORT PointData : public mv::plugin::RawData _numDimensions = static_cast(numDimensions); } + template + void setSparseData(size_t numRows, size_t numCols, std::vector rowPointers, std::vector colIndices, std::vector values) + { + _numRows = numRows; + _numDimensions = numCols; + _sparseData.setData(numRows, numCols, rowPointers, colIndices, values); + + _isDense = false; + } + + void setSparseData(size_t numRows, size_t numCols) + { + _numRows = numRows; + _numDimensions = numCols; + + _isDense = false; + } + + bool isDense() const + { + return _isDense; + } + + size_t getNumNonZeroElements() + { + return _sparseData.getNumNonZeros(); + } + void setDimensionNames(const std::vector& dimNames); + std::vector row(size_t rowIndex) const; + // Returns the value of the element at the specified position in the current // data vector, converted to float. // Will work fine, even when the internal data element type is not float. @@ -432,6 +465,12 @@ class POINTDATA_EXPORT PointData : public mv::plugin::RawData unsigned int _numDimensions = 1; std::vector _dimNames; + + // Sparse data + unsigned int _numRows = 0; + SparseMatrix _sparseData; + + bool _isDense = true; }; // ============================================================================= @@ -634,6 +673,11 @@ class POINTDATA_EXPORT Points : public mv::DatasetImpl mv::events().notifyDatasetDataDimensionsChanged(this); } + SparseMatrix& getSparseData() + { + return getRawData()->getSparseData(); + } + /// Just calls the corresponding member function of its PointData. void setData(std::nullptr_t data, std::size_t numPoints, std::size_t numDimensions); @@ -661,6 +705,17 @@ class POINTDATA_EXPORT Points : public mv::DatasetImpl mv::events().notifyDatasetDataDimensionsChanged(this); } + template + void setSparseData(size_t numRows, size_t numCols, std::vector rowPointers, std::vector colIndices, std::vector values) + { + getRawData()->setSparseData(numRows, numCols, rowPointers, colIndices, values); + } + + void setSparseData(size_t numRows, size_t numCols) + { + getRawData()->setSparseData(numRows, numCols); + } + void extractDataForDimension(std::vector& result, const int dimensionIndex) const; void extractDataForDimensions(std::vector& result, const int dimensionIndex1, const int dimensionIndex2) const; @@ -766,10 +821,13 @@ class POINTDATA_EXPORT Points : public mv::DatasetImpl std::uint64_t getRawDataSize() const override { if (isProxy()) return 0; - else + else { return getRawData()->getRawDataSize(); + } } + std::vector row(size_t rowIndex) const; + // Returns the value of the element at the specified position in the current // data vector, converted to float. // Will work fine, even when the internal data element type is not float. diff --git a/ManiVault/src/plugins/PointData/src/SparseMatrix.cpp b/ManiVault/src/plugins/PointData/src/SparseMatrix.cpp new file mode 100644 index 000000000..68f36a528 --- /dev/null +++ b/ManiVault/src/plugins/PointData/src/SparseMatrix.cpp @@ -0,0 +1,89 @@ +#pragma once + +#include "SparseMatrix.h" + +template +SparseMatrix::SparseMatrix() +{ + +} + +template +SparseMatrix::SparseMatrix(size_t numRows, size_t numCols, size_t numNonZero) : + _numRows(numRows), + _numCols(numCols), + _numNonZero(numNonZero) +{ + _rowPointers.resize(numRows + 1); + _colIndices.resize(numNonZero); + _values.resize(numNonZero); +} + +template +void SparseMatrix::setData(size_t numRows, size_t numCols, std::vector rowPointers, std::vector colIndices, std::vector values) +{ + _numRows = numRows; + _numCols = numCols; + _numNonZero = colIndices.size(); + qDebug() << "Num non zero: " << _numNonZero; + qDebug() << "CSR vector sizes: " << rowPointers.size() << colIndices.size() << values.size(); + _rowPointers = rowPointers; + _colIndices = colIndices; + _values = values; +} + +template +SparseRow SparseMatrix::getSparseRow(unsigned int rowIndex) +{ + size_t nzStart = _rowPointers[rowIndex]; + size_t nzEnd = _rowPointers[rowIndex + 1]; + + SparseRow row; + row._colIndices = std::vector(_colIndices.begin() + nzStart, _colIndices.begin() + nzEnd); + row._values = std::vector(_values.begin() + nzStart, _values.begin() + nzEnd); + + return row; +} + +template +std::vector SparseMatrix::getDenseRow(unsigned int rowIndex) const +{ + size_t nzStart = _rowPointers[rowIndex]; + size_t nzEnd = _rowPointers[rowIndex + 1]; + + std::vector row(_numCols, 0); + + for (size_t nzIndex = nzStart; nzIndex < nzEnd; nzIndex++) + { + ColIndexType col = _colIndices[nzIndex]; + ValueType val = _values[nzIndex]; + row[col] = static_cast(val); + } + + return row; +} + +//SparseRow SparseMatrix::getSparseCol(unsigned int colIndex) +//{ +// +//} + +template +std::vector SparseMatrix::getDenseCol(unsigned int colIndex) const +{ + std::vector col(_numRows, 0); + + for (size_t rowIndex = 0; rowIndex < _numRows; rowIndex++) + { + size_t nzStart = _rowPointers[rowIndex]; + size_t nzEnd = _rowPointers[rowIndex + 1]; + + for (size_t nzIndex = nzStart; nzIndex < nzEnd; nzIndex++) + { + if (_colIndices[nzIndex] == colIndex) + col[rowIndex] = static_cast(_values[nzIndex]); + } + } + + return col; +} diff --git a/ManiVault/src/plugins/PointData/src/SparseMatrix.h b/ManiVault/src/plugins/PointData/src/SparseMatrix.h new file mode 100644 index 000000000..5ebf93932 --- /dev/null +++ b/ManiVault/src/plugins/PointData/src/SparseMatrix.h @@ -0,0 +1,56 @@ +#pragma once + +#include + +#include + +template +class SparseRow +{ +public: + std::vector _colIndices; + std::vector _values; +}; + +//template +//class SparseCol +//{ +//public: +// std::vector _colIndices; +// std::vector _values; +//}; + +template +class SparseMatrix +{ +public: + SparseMatrix(); + SparseMatrix(size_t numRows, size_t numCols, size_t numNonZero); + + size_t getNumRows() const { return _numRows; } + size_t getNumCols() const { return _numCols; } + size_t getNumNonZeros() const { return _numNonZero; } + + const std::vector& getIndexPointers() const { return _rowPointers; } + const std::vector& getColIndices() const { return _colIndices; } + const std::vector& getValues() const { return _values; } + + void setData(size_t numRows, size_t numCols, std::vector rowPointers, std::vector colIndices, std::vector values); + + SparseRow getSparseRow(unsigned int rowIndex); + std::vector getDenseRow(unsigned int rowIndex) const; + + //SparseRow getSparseCol(unsigned int colIndex); + std::vector getDenseCol(unsigned int colIndex) const; + +private: + size_t _numRows; + size_t _numCols; + size_t _numNonZero; + + std::vector _rowPointers; + std::vector _colIndices; + std::vector _values; +}; + +#include "SparseMatrix.cpp"