From ee9e94f555c3efe82f575b0520f1f9f804d69e2b Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Wed, 7 Jan 2026 13:39:06 +0100
Subject: [PATCH 01/24] Second skeleton for ColGroupDDCLZW using the
 IMapToDataInterface and extending on APreAgg like ColGroupDDC for easier
 implementation. Idea: store only compressed version of _data vector and
 important metadata. If decompression is needed we reconstruct the _data
 vector using the metadata and the compressed _data vector. Decompression
 takes place at most once. This is just an idea and theres other ways of
 implementing.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 1283 +++++++++++++++++
 1 file changed, 1283 insertions(+)
 create mode 100644 src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
new file mode 100644
index 00000000000..01a87aafd7b
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -0,0 +1,1283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.compress.colgroup;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorSpecies;
+import org.apache.arrow.vector.complex.writer.BitWriter;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.runtime.compress.DMLCompressionException;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary;
+import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
+import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
+import org.apache.sysds.runtime.compress.colgroup.indexes.RangeIndex;
+import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
+import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
+import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator;
+import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
+import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme;
+import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme;
+import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
+import org.apache.sysds.runtime.compress.estim.EstimationFactors;
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
+import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
+import org.apache.sysds.runtime.data.DenseBlock;
+import org.apache.sysds.runtime.data.SparseBlock;
+import org.apache.sysds.runtime.data.SparseBlockMCSR;
+import org.apache.sysds.runtime.data.SparseRow;
+import org.apache.sysds.runtime.functionobjects.Builtin;
+import org.apache.sysds.runtime.functionobjects.Minus;
+import org.apache.sysds.runtime.functionobjects.Plus;
+import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
+import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
+import org.jboss.netty.handler.codec.compression.CompressionException;
+import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
+import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
+
+/**
+ * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
+ * mapping vector is additionally lzw compressed.
+ * <p>
+ * Idea:
+ * - The dictionary (_dict) is stored exactly like a normal DDC group (in the super class APreAgg).
+ * - The mapping vector (row -> dictionary-id) is NOT stored directly as an AMapToData field permanently.
+ * - Instead, the mapping is stored in compressed form as a byte[] (_dataLZW).
+ * - When an operation needs the mapping, it is decoded on-demand into a transient cached AMapToData (_data).
+ */
+public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
+    private static final long serialVersionUID = -5769772089913918987L;
+
+    private transient volatile AMapToData _data; // Decoded mapping cache
+    private final int[] _dataLZW; // LZW compressed representation of the mapping (TODO optimize!)
+
+    private final int _nRows; // Number of rows in the mapping vector
+    private final int _nUnique; // Number of unique values in the mapping vector
+
+    // Compresses a decoded mapping (AMapToData) into an LZW-compressed byte array.
+    private static int[] compress(final AMapToData data) {
+        if (data == null)
+            throw new IllegalArgumentException("Invalid input: data is null");
+
+        final int nRows = data.size();
+        if (nRows <= 0) {
+            throw new IllegalArgumentException("Invalid input: data has no rows");
+        }
+
+        final int nUnique = data.getUnique();
+        if (nUnique <= 0) {
+            throw new IllegalArgumentException("Invalid input: data has no unique values");
+        }
+
+        // Extract _data values as int array.
+        final int[] dataIntVals = new int[nRows];
+        for (int i = 0; i < nRows; i++) {
+            dataIntVals[i] = data.getIndex(i);
+        }
+
+        // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
+        // Using fastutil keeps lookups fast.
+        final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
+        dict.defaultReturnValue(-1);
+
+        // Codes {0,...,nUnique - 1} are reserved for the original symbols.
+        int nextCode = nUnique;
+
+        // Output buffer.
+        IntArrayList out = new IntArrayList();
+
+        // Initialize w with the first input symbol.
+        int w = data.getIndex(0);
+
+        // Process the remaining input symbols.
+        for (int i = 1; i < nRows; i++) {
+            int k = data.getIndex(i); // next input symbol
+            long key = packKey(w, k); // encode (w,k) into long key
+
+            int wk = dict.get(key); // look if wk exists in dict
+            if (wk != -1) {
+                w = wk; // wk exists in dict so replace w by wk and continue.
+            } else {
+                // wk does not exist in dict.
+                out.add(w);
+                dict.put(key, nextCode++);
+                w = k; // Start new phrase with k
+            }
+        }
+
+        out.add(w);
+        return out.toIntArray();
+    }
+
+    /**
+     * Builds a packed 64-bit key for (prefixCode, nextSymbol) pairs used in the LZW dictionary.
+     * Upper 32 bits: prefixCode (current pattern code w)
+     * Lower 32 bits: nextSymbol (k)
+     */
+    private static long packKey(int prefixCode, int nextSymbol) {
+        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
+    }
+
+    // Lazily decode the mapping from _dataLZW into an AMapToData instance and cache it in _data.
+    private AMapToData decompress() {
+        /*AMapToData d = _data;
+        if (d == null) {
+            synchronized (this) {
+                d = _data;
+                if (d == null) {
+                    d = decode(_dataLZW, _nRows, _nUnique);
+                    _data = d;
+                }
+            }
+        }*/
+        return null;
+    }
+
+    // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
+    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        super(colIndexes, dict, cachedCounts);
+
+        // Derive metadata
+        _nRows = data.size();
+        _nUnique = dict.getNumberOfValues(colIndexes.size());
+
+        // Compress mapping to LZW
+        _dataLZW = compress(data);
+
+        if (CompressedMatrixBlock.debug) {
+            _data = data;
+            if (getNumValues() == 0)
+                throw new DMLCompressionException("Invalid construction with empty dictionary");
+            if (data.size() == 0)
+                throw new DMLCompressionException("Invalid length of the data. is zero");
+
+            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
+                        + dict.getNumberOfValues(colIndexes.size()));
+            int[] c = getCounts();
+            if (c.length != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid DDC Construction");
+            data.verify();
+        } else {
+            _data = null;
+        }
+    }
+
+    // Read Constructor: Used when creating this group from a serialized form (e.g., reading a compressed matrix from disk/memory stream). (TODO)
+    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, int nRows, int nUnique, int[] cachedCounts) {
+        super(colIndexes, dict, cachedCounts);
+
+        _dataLZW = dataLZW;
+        _data = null;
+        _nRows = nRows;
+        _nUnique = nUnique;
+
+        if (CompressedMatrixBlock.debug) {
+            final AMapToData d = decompress();
+            if (getNumValues() == 0)
+                throw new DMLCompressionException("Invalid construction with empty dictionary");
+            if (d.size() == 0)
+                throw new DMLCompressionException("Invalid length of the data. is zero");
+
+            if (d.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + d.getUnique() + " while dict has "
+                        + dict.getNumberOfValues(colIndexes.size()));
+            int[] c = getCounts();
+            if (c.length != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid DDC Construction");
+            d.verify();
+        }
+    }
+
+    // Factory method for creating a column group.
+    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        if (dict == null)
+            return new ColGroupEmpty(colIndexes);
+        else if (data.getUnique() == 1)
+            return ColGroupConst.create(colIndexes, dict);
+        else
+            return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
+    }
+
+    public CompressionType getCompType() {
+        return CompressionType.DDCLZW; // TODO add new compression type DDCLZW
+    }
+
+    public ColGroupType getColGroupType() {
+        return ColGroupType.DDCLZW; // TODO add new ColGroup type DDCLZW
+    }
+
+    @Override
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                          SparseBlock sb) {
+        AMapToData d = decompress();
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = d.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                         double[] values) {
+        final int idxSize = _colIndexes.size();
+        if (db.isContiguous()) {
+            final int nColOut = db.getDim(1);
+            if (idxSize == 1 && nColOut == 1)
+                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == 1)
+                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == nColOut) // offC == 0 implied
+                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
+            else if (offC == 0 && offR == 0)
+                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
+            else if (offC == 0)
+                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
+            else
+                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+        } else
+            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                          int offC, double[] values) {
+        AMapToData d = decompress();
+        final double[] c = db.values(0);
+        final int nCols = db.getDim(1);
+        final int colOff = _colIndexes.get(0) + offC;
+        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
+            c[offT] += values[d.getIndex(i)];
+
+    }
+
+    // TODO: implement necessary logic in decompress to ensure correctness of getMapToData()
+    @Override
+    public AMapToData getMapToData() {
+        return decompress();
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                             int offC, double[] values) {
+        AMapToData d = decompress();
+        final double[] c = db.values(0);
+        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, d);
+    }
+
+    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
+                                                                                    double[] values, AMapToData data) {
+        data.decompressToRange(c, rl, ru, offR, values);
+
+    }
+
+    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                           double[] values, int nCol) {
+        AMapToData d = decompress();
+        final double[] c = db.values(0);
+        for (int r = rl; r < ru; r++) {
+            final int start = d.getIndex(r) * nCol;
+            final int offStart = (offR + r) * nCol;
+            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
+                                                                  double[] values, int nCol, int colOut) {
+        AMapToData d = decompress();
+        int off = (rl + offR) * colOut;
+        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
+            final double[] c = db.values(offT);
+            final int rowIndex = d.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
+        AMapToData d = decompress();
+        final int nCol = _colIndexes.size();
+        final int nColU = db.getDim(1);
+        final double[] c = db.values(0);
+        for (int i = rl; i < ru; i++) {
+            final int off = i * nColU;
+            final int rowIndex = d.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                              double[] values, int nCol) {
+        AMapToData d = decompress();
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            final int rowIndex = d.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                           SparseBlock sb) {
+        AMapToData d = decompress();
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = d.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++)
+                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values) {
+        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
+    }
+
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values, int nCol) {
+        AMapToData d = decompress();
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final int rowIndex = d.getIndex(i) * nCol;
+            for (int j = 0; j < nCol; j++)
+                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
+        AMapToData d = decompress();
+        for (int i = rl; i < ru; i++) {
+            final int vr = d.getIndex(i);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++) {
+                final int rowOut = _colIndexes.get(aix[j]);
+                final double[] c = db.values(rowOut);
+                final int off = db.pos(rowOut);
+                c[off + i] += aval[j];
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+        AMapToData d = decompress();
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            final double[] c = db.values(rowOut);
+            final int off = db.pos(rowOut);
+            for (int i = rl; i < ru; i++) {
+                final double v = dict[d.getIndex(i) * nCol + j];
+                c[off + i] += v;
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
+        AMapToData d = decompress();
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        for (int i = 0; i < d.size(); i++) {
+            int di = d.getIndex(i);
+            if (sb.isEmpty(di))
+                continue;
+
+            final int apos = sb.pos(di);
+            final int alen = sb.size(di) + apos;
+            final int[] aix = sb.indexes(di);
+            final double[] aval = sb.values(di);
+
+            for (int j = apos; j < alen; j++) {
+                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
+            }
+        }
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
+        AMapToData d = decompress();
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            SparseRow r = sbr.get(rowOut);
+
+            for (int i = 0; i < d.size(); i++) {
+                final double v = dict[d.getIndex(i) * nCol + j];
+                r = r.append(i, v);
+            }
+            sbr.set(rowOut, r, false);
+        }
+    }
+
+    @Override
+    public double getIdx(int r, int colIdx) {
+        AMapToData d = decompress();
+        return _dict.getValue(d.getIndex(r), colIdx, _colIndexes.size());
+    }
+
+    @Override
+    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+        AMapToData d = decompress();
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] += preAgg[d.getIndex(rix)];
+    }
+
+    @Override
+    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+        AMapToData d = decompress();
+        for (int i = rl; i < ru; i++)
+            c[i] = builtin.execute(c[i], preAgg[d.getIndex(i)]);
+    }
+
+    @Override
+    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+        AMapToData d = decompress();
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] *= preAgg[d.getIndex(rix)];
+    }
+
+    @Override
+    public int[] getCounts(int[] counts) {
+        return decompress().getCounts(counts);
+    }
+
+    @Override
+    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (_colIndexes.size() == 1)
+            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                   int cu) {
+        final DenseBlock retV = result.getDenseBlock();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
+        if (matrix.isEmpty())
+            return;
+        else if (matrix.isInSparseFormat()) {
+            AMapToData d = decompress();
+            if (cl != 0 || cu != d.size())
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+            else
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
+        } else if (!matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+                    cu);
+        else
+            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru) {
+
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            // final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
+                                                             double[] vals, int rl, int ru, int cl, int cu) {
+        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
+                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
+                                                       double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
+                                                          double[] vals, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
+        }
+    }
+
+    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+        if (matrix.isInSparseFormat())
+            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        final DenseBlock db = result.getDenseBlock();
+        final SparseBlock sb = matrix.getSparseBlock();
+
+        if (cl != 0 || cu != d.size()) {
+            // sub part
+            for (int r = rl; r < ru; r++) {
+                if (sb.isEmpty(r))
+                    continue;
+                final double[] retV = db.values(r);
+                final int pos = db.pos(r);
+                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
+            }
+        } else {
+            for (int r = rl; r < ru; r++)
+                d.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
+        }
+    }
+
+    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
+        AMapToData d = decompress();
+        final int apos = sb.pos(r);
+        final int aposSkip = sb.posFIndexGTE(r, cl);
+        final int[] aix = sb.indexes(r);
+        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+            return;
+        final int alen = sb.size(r) + apos;
+        final double[] aval = sb.values(r);
+        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+            _dict.multiplyScalar(aval[i], retV, offR, d.getIndex(aix[i]), _colIndexes);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                         int cu) {
+        AMapToData d = decompress();
+        final double[] retV = result.getDenseBlockValues();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] mV = matrix.getDenseBlockValues();
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
+                                                            int cu) {
+        AMapToData d = decompress();
+        final double[] retV = result.getDenseBlockValues();
+        final int nColRet = result.getNumColumns();
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
+        }
+    }
+
+    @Override
+    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        d.preAggregateDense(m, preAgg, rl, ru, cl, cu);
+    }
+
+    @Override
+    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+        DenseBlock db = that.getDenseBlock();
+        DenseBlock retDB = ret.getDenseBlock();
+        for (int i = rl; i < ru; i++)
+            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
+    }
+
+    @Override
+    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
+        if (_dict instanceof IdentityDictionary)
+            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
+        else
+            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
+    }
+
+    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES); // TODO: SPECIES Vector in ColGroupDDC. What do ?
+        final int vLen = SPECIES.length();
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        AMapToData d = decompress();
+        for (int i = rl; i < ru; i++) {
+            int k = d.getIndex(i);
+            final int offOut = i * jd + crl;
+            final double aa = 1;
+            final int k_right = _colIndexes.get(k);
+            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+        }
+    }
+
+    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        AMapToData d = decompress();
+        final double[] a = _dict.getValues();
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int kd = _colIndexes.size();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES);
+        final int vLen = SPECIES.length();
+
+        final int blkzI = 32;
+        final int blkzK = 24;
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        for (int bi = rl; bi < ru; bi += blkzI) {
+            final int bie = Math.min(ru, bi + blkzI);
+            for (int bk = 0; bk < kd; bk += blkzK) {
+                final int bke = Math.min(kd, bk + blkzK);
+                for (int i = bi; i < bie; i++) {
+                    int offi = d.getIndex(i) * kd;
+                    final int offOut = i * jd + crl;
+                    for (int k = bk; k < bke; k++) {
+                        final double aa = a[offi + k];
+                        final int k_right = _colIndexes.get(k);
+                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+                    }
+                }
+            }
+        }
+    }
+
+    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
+        vVec = vVec.broadcast(aa);
+        final int offj = k * jd;
+        final int end = endT + offj;
+        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
+            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
+            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
+            res = vVec.fma(bVec, res);
+            res.intoArray(c, offOut);
+        }
+        for (int j = end; j < cru + offj; j++, offOut++) {
+            double bb = b[j];
+            c[offOut] += bb * aa;
+        }
+    }
+
+    @Override
+    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+        AMapToData d = decompress();
+        if (cl != 0 || cu != d.size()) {
+            throw new NotImplementedException();
+        }
+        d.preAggregateSparse(sb, preAgg, rl, ru);
+    }
+
+    @Override
+    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+        AMapToData d = decompress();
+        try {
+
+            d.preAggregateDDC_DDC(that.d, that._dict, ret, that._colIndexes.size());
+        } catch (Exception e) {
+            throw new CompressionException(that.toString(), e);
+        }
+    }
+
+    @Override
+    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+        AMapToData d = decompress();
+        d.preAggregateDDC_SDCZ(that.d, that._dict, that._indexes, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
+        final int nCol = that._colIndexes.size();
+        final int finalOff = that._indexes.getOffsetToLast();
+        final double[] v = ret.getValues();
+        AMapToData d = decompress();
+        while (true) {
+            final int to = d.getIndex(itThat.value());
+            that._dict.addToEntry(v, 0, to, nCol);
+            if (itThat.value() == finalOff)
+                break;
+            itThat.next();
+        }
+    }
+
+    @Override
+    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+        AMapToData d = decompress();
+        d.preAggregateDDC_RLE(that._ptr, that.d, that._dict, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        AMapToData d = decompress();
+        return that instanceof ColGroupDDC && ((ColGroupDDC) that).d == d;
+    }
+
+    @Override
+    public long estimateInMemorySize() {
+        AMapToData d = decompress();
+        long size = super.estimateInMemorySize();
+        size += d.getInMemorySize();
+        return size;
+    }
+
+    @Override
+    public AColGroup scalarOperation(ScalarOperator op) {
+        AMapToData d = decompress();
+        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
+            final double v0 = op.executeScalar(0);
+            if (v0 == 0)
+                return this;
+            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
+        }
+        return create(_colIndexes, _dict.applyScalarOp(op), d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup unaryOperation(UnaryOperator op) {
+        AMapToData d = decompress();
+        return create(_colIndexes, _dict.applyUnaryOp(op), d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
+        AMapToData d = decompress();
+        return create(_colIndexes, ret, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
+                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
+            AMapToData d = decompress();
+            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
+        }
+        final IDictionary ret;
+        if (_colIndexes.size() == 1)
+            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
+        else
+            ret = _dict.binOpRight(op, v, _colIndexes);
+        AMapToData d = decompress();
+        return create(_colIndexes, ret, d, getCachedCounts());
+    }
+
+    // TODO
+    @Override
+    public void write(DataOutput out) throws IOException {
+        super.write(out);
+        AMapToData d = decompress();
+        d.write(out);
+    }
+
+    // TODO
+    public static ColGroupDDC read(DataInput in) throws IOException {
+        IColIndex cols = ColIndexFactory.read(in);
+        IDictionary dict = DictionaryFactory.read(in);
+        AMapToData data = MapToFactory.readIn(in);
+        return new ColGroupDDC(cols, dict, data, null);
+    }
+
+    @Override
+    public long getExactSizeOnDisk() {
+        AMapToData d = decompress();
+        long ret = super.getExactSizeOnDisk();
+        ret += d.getExactSizeOnDisk();
+        return ret;
+    }
+
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        final int nVals = getNumValues();
+        final int nCols = getNumCols();
+        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+    }
+
+    @Override
+    protected int numRowsToMultiply() {
+        AMapToData d = decompress();
+        return d.size();
+    }
+
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return _dict.aggregate(c, builtin);
+    }
+
+    @Override
+    protected void computeColMxx(double[] c, Builtin builtin) {
+        _dict.aggregateCols(c, builtin, _colIndexes);
+    }
+
+    @Override
+    public boolean containsValue(double pattern) {
+        return _dict.containsValue(pattern);
+    }
+
+    @Override
+    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+        if (preAgg != null) {
+            AMapToData d = decompress();
+            return create(colIndexes, preAgg, d, getCachedCounts());
+        } else
+            return null;
+    }
+
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        try {
+            AMapToData d = decompress();
+            return ColGroupDDC.create(_colIndexes, _dict, d.slice(rl, ru), null);
+        } catch (Exception e) {
+            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
+        }
+    }
+
+    @Override
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        AMapToData d = decompress();
+        return create(colIndexes, newDictionary, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup append(AColGroup g) {
+        if (g instanceof ColGroupDDC) {
+            if (g.getColIndices().equals(_colIndexes)) {
+
+                ColGroupDDC gDDC = (ColGroupDDC) g;
+                if (gDDC._dict.equals(_dict)) {
+                    AMapToData d = decompress();
+                    AMapToData nd = d.append(gDDC.d);
+                    return create(_colIndexes, _dict, nd, null);
+                } else
+                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+            } else
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
+        } else
+            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
+        return null;
+    }
+
+    @Override
+    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
+        for (int i = 1; i < g.length; i++) {
+            if (!_colIndexes.equals(g[i]._colIndexes)) {
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
+                return null;
+            }
+
+            if (!(g[i] instanceof ColGroupDDC)) {
+                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
+                return null;
+            }
+
+            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
+            if (!gDDC._dict.equals(_dict)) {
+                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+                return null;
+            }
+        }
+        AMapToData d = decompress();
+        AMapToData nd = d.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
+        return create(_colIndexes, _dict, nd, null);
+    }
+
+    @Override
+    public ICLAScheme getCompressionScheme() {
+        throw new NotImplementedException();
+    }
+
+    @Override
+    public AColGroup recompress() {
+        throw new NotImplementedException();
+    }
+
+    @Override
+    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+        try {
+
+            IEncode enc = getEncoding();
+            AMapToData d = decompress();
+            EstimationFactors ef = new EstimationFactors(d.getUnique(), d.size(), d.size(),
+                    _dict.getSparsity());
+            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
+        } catch (Exception e) {
+            throw new DMLCompressionException(this.toString(), e);
+        }
+    }
+
+    @Override
+    public IEncode getEncoding() {
+        AMapToData d = decompress();
+        return EncodingFactory.create(d);
+    }
+
+    @Override
+    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+        AMapToData d = decompress();
+        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), d, getCachedCounts());
+    }
+
+    @Override
+    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        final SparseBlock sb = selection.getSparseBlock();
+        final SparseBlock retB = ret.getSparseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    @Override
+    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
+        final SparseBlock sb = selection.getSparseBlock();
+        final DenseBlock retB = ret.getDenseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
+                                                          int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+        if (_colIndexes instanceof RangeIndex)
+            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
+        else {
+
+            pos += cl; // left side matrix position offset.
+            if (a.withEmpty()) {
+                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+                for (int rc = cl; rc < cu; rc++, pos++) {
+                    final int idx = _data.getIndex(rc);
+                    if (idx != nVal)
+                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
+                }
+            } else {
+                AMapToData d = decompress();
+                for (int rc = cl; rc < cu; rc++, pos++)
+                    values2[pos2 + _colIndexes.get(d.getIndex(rc))] += values[pos];
+            }
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
+                                                                    int cl, int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+
+        final int firstCol = pos2 + _colIndexes.get(0);
+        pos += cl; // left side matrix position offset.
+        AMapToData d = decompress();
+        if (a.withEmpty()) {
+            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+            for (int rc = cl; rc < cu; rc++, pos++) {
+                final int idx = d.getIndex(rc);
+                if (idx != nVal)
+                    values2[firstCol + idx] += values[pos];
+            }
+        } else {
+            for (int rc = cl; rc < cu; rc++, pos++)
+                values2[firstCol + d.getIndex(rc)] += values[pos];
+        }
+    }
+
+    @Override
+    public AColGroup morph(CompressionType ct, int nRow) {
+        // return this;
+        if (ct == getCompType())
+            return this;
+        else if (ct == CompressionType.SDC) {
+            // return this;
+            AMapToData d = decompress();
+            int[] counts = getCounts();
+            int maxId = maxIndex(counts);
+            double[] def = _dict.getRow(maxId, _colIndexes.size());
+
+            int offsetSize = nRow - counts[maxId];
+            int[] offsets = new int[offsetSize];
+            AMapToData reducedData = MapToFactory.create(offsetSize, d.getUnique());
+            int o = 0;
+            for (int i = 0; i < nRow; i++) {
+                int v = d.getIndex(i);
+                if (v != maxId) {
+                    offsets[o] = i;
+                    reducedData.set(o, v);
+                    o++;
+                }
+            }
+
+            return ColGroupSDC.create(_colIndexes, d.size(), _dict, def, OffsetFactory.createOffset(offsets),
+                    reducedData, null);
+        } else if (ct == CompressionType.CONST) {
+            // if(1 < getNumValues()) {
+            String thisS = this.toString();
+            if (thisS.length() > 10000)
+                thisS = thisS.substring(0, 10000) + "...";
+            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
+            return this;
+            // }
+        } else if (ct == CompressionType.DDCFOR)
+            return this; // it does not make sense to change to FOR.
+        else
+            return super.morph(ct, nRow);
+    }
+
+    private static int maxIndex(int[] counts) {
+        int id = 0;
+        for (int i = 1; i < counts.length; i++) {
+            if (counts[i] > counts[id]) {
+                id = i;
+            }
+        }
+        return id;
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
+        final IDictionary combined = combineDictionaries(nCol, right);
+        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
+        AMapToData d = decompress();
+        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
+        IDictionary b = ((ColGroupDDC) right).getDictionary();
+        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
+        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
+        AMapToData d = decompress();
+        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+        AMapToData d = decompress();
+        AMapToData[] maps = d.splitReshapeDDC(multiplier);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
+            throws Exception {
+        AMapToData d = decompress();
+        AMapToData[] maps = d.splitReshapeDDCPushDown(multiplier, pool);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(super.toString());
+        sb.append(String.format("\n%15s", "Data: "));
+        AMapToData d = decompress();
+        sb.append(d);
+        return sb.toString();
+    }
+
+    @Override
+    protected boolean allowShallowIdentityRightMult() {
+        return true;
+    }
+}

From 007611c60c2bfadf2d4ed13e89c1eb01b734b814 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Thu, 8 Jan 2026 15:48:50 +0100
Subject: [PATCH 02/24] Idea:  * - DDCLZW stores the mapping vector exclusively
 in compressed form.  * - No persistent MapToData cache is maintained.  * -
 Sequential operations decode on-the-fly, while operations requiring random
 access explicitly materialize and fall back to DDC.  */

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 1120 +----------------
 1 file changed, 31 insertions(+), 1089 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 01a87aafd7b..d83120e1345 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -74,24 +74,27 @@
 /**
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
  * mapping vector is additionally lzw compressed.
- * <p>
  * Idea:
- * - The dictionary (_dict) is stored exactly like a normal DDC group (in the super class APreAgg).
- * - The mapping vector (row -> dictionary-id) is NOT stored directly as an AMapToData field permanently.
- * - Instead, the mapping is stored in compressed form as a byte[] (_dataLZW).
- * - When an operation needs the mapping, it is decoded on-demand into a transient cached AMapToData (_data).
+ * - DDCLZW stores the mapping vector exclusively in compressed form.
+ * - No persistent MapToData cache is maintained.
+ * - Sequential operations decode on-the-fly, while operations requiring random access explicitly materialize and fall back to DDC.
  */
 public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
     private static final long serialVersionUID = -5769772089913918987L;
 
-    private transient volatile AMapToData _data; // Decoded mapping cache
-    private final int[] _dataLZW; // LZW compressed representation of the mapping (TODO optimize!)
+    private final int[] _dataLZW; // LZW compressed representation of the mapping
 
     private final int _nRows; // Number of rows in the mapping vector
     private final int _nUnique; // Number of unique values in the mapping vector
 
-    // Compresses a decoded mapping (AMapToData) into an LZW-compressed byte array.
-    private static int[] compress(final AMapToData data) {
+
+    // Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
+    private static long packKey(int prefixCode, int nextSymbol) {
+        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
+    }
+
+    // Compresses a mapping (AMapToData) into an LZW-compressed byte/integer/? array. (TODO)
+    private static int[] compress(AMapToData data) {
         if (data == null)
             throw new IllegalArgumentException("Invalid input: data is null");
 
@@ -145,35 +148,18 @@ private static int[] compress(final AMapToData data) {
         return out.toIntArray();
     }
 
-    /**
-     * Builds a packed 64-bit key for (prefixCode, nextSymbol) pairs used in the LZW dictionary.
-     * Upper 32 bits: prefixCode (current pattern code w)
-     * Lower 32 bits: nextSymbol (k)
-     */
-    private static long packKey(int prefixCode, int nextSymbol) {
-        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
-    }
-
-    // Lazily decode the mapping from _dataLZW into an AMapToData instance and cache it in _data.
-    private AMapToData decompress() {
-        /*AMapToData d = _data;
-        if (d == null) {
-            synchronized (this) {
-                d = _data;
-                if (d == null) {
-                    d = decode(_dataLZW, _nRows, _nUnique);
-                    _data = d;
-                }
-            }
-        }*/
+    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form. (TODO)
+    private AMapToData decompress(int[] _dataLZW) {
+        AMapToData d = null;
         return null;
     }
 
+
     // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
     private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
         super(colIndexes, dict, cachedCounts);
 
-        // Derive metadata
+        // Derive metadadata
         _nRows = data.size();
         _nUnique = dict.getNumberOfValues(colIndexes.size());
 
@@ -181,12 +167,10 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data,
         _dataLZW = compress(data);
 
         if (CompressedMatrixBlock.debug) {
-            _data = data;
             if (getNumValues() == 0)
                 throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (data.size() == 0)
+            if (_nRows == 0)
                 throw new DMLCompressionException("Invalid length of the data. is zero");
-
             if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
                         + dict.getNumberOfValues(colIndexes.size()));
@@ -194,8 +178,6 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data,
             if (c.length != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid DDC Construction");
             data.verify();
-        } else {
-            _data = null;
         }
     }
 
@@ -204,28 +186,28 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, in
         super(colIndexes, dict, cachedCounts);
 
         _dataLZW = dataLZW;
-        _data = null;
         _nRows = nRows;
         _nUnique = nUnique;
 
         if (CompressedMatrixBlock.debug) {
-            final AMapToData d = decompress();
             if (getNumValues() == 0)
                 throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (d.size() == 0)
+            if (_nRows <= 0)
                 throw new DMLCompressionException("Invalid length of the data. is zero");
-
-            if (d.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + d.getUnique() + " while dict has "
+            if (_nUnique <= dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has "
                         + dict.getNumberOfValues(colIndexes.size()));
             int[] c = getCounts();
             if (c.length != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid DDC Construction");
-            d.verify();
+
+            // Optional: validate that decoding works (expensive)
+            // AMapToData decoded = decode(_dataLZW, _nRows, _nUnique);
+            // decoded.verify();
         }
     }
 
-    // Factory method for creating a column group.
+    // Factory method for creating a column group. (AColGroup g = ColGroupDDCLZW.create(...);)
     public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
         if (dict == null)
             return new ColGroupEmpty(colIndexes);
@@ -235,1049 +217,9 @@ else if (data.getUnique() == 1)
             return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
     }
 
-    public CompressionType getCompType() {
-        return CompressionType.DDCLZW; // TODO add new compression type DDCLZW
-    }
-
-    public ColGroupType getColGroupType() {
-        return ColGroupType.DDCLZW; // TODO add new ColGroup type DDCLZW
-    }
-
-    @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                          SparseBlock sb) {
-        AMapToData d = decompress();
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = d.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                         double[] values) {
-        final int idxSize = _colIndexes.size();
-        if (db.isContiguous()) {
-            final int nColOut = db.getDim(1);
-            if (idxSize == 1 && nColOut == 1)
-                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == 1)
-                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == nColOut) // offC == 0 implied
-                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
-            else if (offC == 0 && offR == 0)
-                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
-            else if (offC == 0)
-                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
-            else
-                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-        } else
-            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                          int offC, double[] values) {
-        AMapToData d = decompress();
-        final double[] c = db.values(0);
-        final int nCols = db.getDim(1);
-        final int colOff = _colIndexes.get(0) + offC;
-        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
-            c[offT] += values[d.getIndex(i)];
-
-    }
-
-    // TODO: implement necessary logic in decompress to ensure correctness of getMapToData()
-    @Override
-    public AMapToData getMapToData() {
-        return decompress();
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                             int offC, double[] values) {
-        AMapToData d = decompress();
-        final double[] c = db.values(0);
-        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, d);
-    }
-
-    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
-                                                                                    double[] values, AMapToData data) {
-        data.decompressToRange(c, rl, ru, offR, values);
-
-    }
-
-    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                           double[] values, int nCol) {
-        AMapToData d = decompress();
-        final double[] c = db.values(0);
-        for (int r = rl; r < ru; r++) {
-            final int start = d.getIndex(r) * nCol;
-            final int offStart = (offR + r) * nCol;
-            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
-                                                                  double[] values, int nCol, int colOut) {
-        AMapToData d = decompress();
-        int off = (rl + offR) * colOut;
-        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
-            final double[] c = db.values(offT);
-            final int rowIndex = d.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
-        AMapToData d = decompress();
-        final int nCol = _colIndexes.size();
-        final int nColU = db.getDim(1);
-        final double[] c = db.values(0);
-        for (int i = rl; i < ru; i++) {
-            final int off = i * nColU;
-            final int rowIndex = d.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                              double[] values, int nCol) {
-        AMapToData d = decompress();
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            final int rowIndex = d.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                           SparseBlock sb) {
-        AMapToData d = decompress();
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = d.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++)
-                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values) {
-        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
-    }
-
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values, int nCol) {
-        AMapToData d = decompress();
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final int rowIndex = d.getIndex(i) * nCol;
-            for (int j = 0; j < nCol; j++)
-                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
-        AMapToData d = decompress();
-        for (int i = rl; i < ru; i++) {
-            final int vr = d.getIndex(i);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++) {
-                final int rowOut = _colIndexes.get(aix[j]);
-                final double[] c = db.values(rowOut);
-                final int off = db.pos(rowOut);
-                c[off + i] += aval[j];
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-        AMapToData d = decompress();
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            final double[] c = db.values(rowOut);
-            final int off = db.pos(rowOut);
-            for (int i = rl; i < ru; i++) {
-                final double v = dict[d.getIndex(i) * nCol + j];
-                c[off + i] += v;
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
-        AMapToData d = decompress();
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        for (int i = 0; i < d.size(); i++) {
-            int di = d.getIndex(i);
-            if (sb.isEmpty(di))
-                continue;
-
-            final int apos = sb.pos(di);
-            final int alen = sb.size(di) + apos;
-            final int[] aix = sb.indexes(di);
-            final double[] aval = sb.values(di);
-
-            for (int j = apos; j < alen; j++) {
-                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
-            }
-        }
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
-        AMapToData d = decompress();
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            SparseRow r = sbr.get(rowOut);
-
-            for (int i = 0; i < d.size(); i++) {
-                final double v = dict[d.getIndex(i) * nCol + j];
-                r = r.append(i, v);
-            }
-            sbr.set(rowOut, r, false);
-        }
-    }
-
-    @Override
-    public double getIdx(int r, int colIdx) {
-        AMapToData d = decompress();
-        return _dict.getValue(d.getIndex(r), colIdx, _colIndexes.size());
-    }
-
-    @Override
-    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
-        AMapToData d = decompress();
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] += preAgg[d.getIndex(rix)];
-    }
-
-    @Override
-    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-        AMapToData d = decompress();
-        for (int i = rl; i < ru; i++)
-            c[i] = builtin.execute(c[i], preAgg[d.getIndex(i)]);
-    }
-
-    @Override
-    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
-        AMapToData d = decompress();
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] *= preAgg[d.getIndex(rix)];
-    }
-
-    @Override
-    public int[] getCounts(int[] counts) {
-        return decompress().getCounts(counts);
-    }
-
-    @Override
-    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (_colIndexes.size() == 1)
-            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                   int cu) {
-        final DenseBlock retV = result.getDenseBlock();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
-        if (matrix.isEmpty())
-            return;
-        else if (matrix.isInSparseFormat()) {
-            AMapToData d = decompress();
-            if (cl != 0 || cu != d.size())
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-            else
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
-        } else if (!matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
-                    cu);
-        else
-            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru) {
-
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            // final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[d.getIndex(aix[i])];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
-                                                             double[] vals, int rl, int ru, int cl, int cu) {
-        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
-                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
-                                                       double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
-                                                          double[] vals, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[d.getIndex(c)];
-        }
-    }
-
-    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-
-        if (matrix.isInSparseFormat())
-            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        final DenseBlock db = result.getDenseBlock();
-        final SparseBlock sb = matrix.getSparseBlock();
-
-        if (cl != 0 || cu != d.size()) {
-            // sub part
-            for (int r = rl; r < ru; r++) {
-                if (sb.isEmpty(r))
-                    continue;
-                final double[] retV = db.values(r);
-                final int pos = db.pos(r);
-                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
-            }
-        } else {
-            for (int r = rl; r < ru; r++)
-                d.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
-        }
-    }
-
-    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
-        AMapToData d = decompress();
-        final int apos = sb.pos(r);
-        final int aposSkip = sb.posFIndexGTE(r, cl);
-        final int[] aix = sb.indexes(r);
-        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-            return;
-        final int alen = sb.size(r) + apos;
-        final double[] aval = sb.values(r);
-        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-            _dict.multiplyScalar(aval[i], retV, offR, d.getIndex(aix[i]), _colIndexes);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                         int cu) {
-        AMapToData d = decompress();
-        final double[] retV = result.getDenseBlockValues();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] mV = matrix.getDenseBlockValues();
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
-                                                            int cu) {
-        AMapToData d = decompress();
-        final double[] retV = result.getDenseBlockValues();
-        final int nColRet = result.getNumColumns();
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, d.getIndex(c), _colIndexes);
-        }
-    }
-
-    @Override
-    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        d.preAggregateDense(m, preAgg, rl, ru, cl, cu);
-    }
-
-    @Override
-    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-        DenseBlock db = that.getDenseBlock();
-        DenseBlock retDB = ret.getDenseBlock();
-        for (int i = rl; i < ru; i++)
-            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
-    }
-
-    @Override
-    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
-        if (_dict instanceof IdentityDictionary)
-            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
-        else
-            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
-    }
-
-    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES); // TODO: SPECIES Vector in ColGroupDDC. What do ?
-        final int vLen = SPECIES.length();
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        AMapToData d = decompress();
-        for (int i = rl; i < ru; i++) {
-            int k = d.getIndex(i);
-            final int offOut = i * jd + crl;
-            final double aa = 1;
-            final int k_right = _colIndexes.get(k);
-            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-        }
-    }
-
-    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        AMapToData d = decompress();
-        final double[] a = _dict.getValues();
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int kd = _colIndexes.size();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES);
-        final int vLen = SPECIES.length();
-
-        final int blkzI = 32;
-        final int blkzK = 24;
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        for (int bi = rl; bi < ru; bi += blkzI) {
-            final int bie = Math.min(ru, bi + blkzI);
-            for (int bk = 0; bk < kd; bk += blkzK) {
-                final int bke = Math.min(kd, bk + blkzK);
-                for (int i = bi; i < bie; i++) {
-                    int offi = d.getIndex(i) * kd;
-                    final int offOut = i * jd + crl;
-                    for (int k = bk; k < bke; k++) {
-                        final double aa = a[offi + k];
-                        final int k_right = _colIndexes.get(k);
-                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-                    }
-                }
-            }
-        }
-    }
-
-    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
-        vVec = vVec.broadcast(aa);
-        final int offj = k * jd;
-        final int end = endT + offj;
-        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
-            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
-            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
-            res = vVec.fma(bVec, res);
-            res.intoArray(c, offOut);
-        }
-        for (int j = end; j < cru + offj; j++, offOut++) {
-            double bb = b[j];
-            c[offOut] += bb * aa;
-        }
-    }
-
-    @Override
-    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-        AMapToData d = decompress();
-        if (cl != 0 || cu != d.size()) {
-            throw new NotImplementedException();
-        }
-        d.preAggregateSparse(sb, preAgg, rl, ru);
-    }
-
-    @Override
-    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-        AMapToData d = decompress();
-        try {
-
-            d.preAggregateDDC_DDC(that.d, that._dict, ret, that._colIndexes.size());
-        } catch (Exception e) {
-            throw new CompressionException(that.toString(), e);
-        }
-    }
-
-    @Override
-    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-        AMapToData d = decompress();
-        d.preAggregateDDC_SDCZ(that.d, that._dict, that._indexes, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
-        final int nCol = that._colIndexes.size();
-        final int finalOff = that._indexes.getOffsetToLast();
-        final double[] v = ret.getValues();
-        AMapToData d = decompress();
-        while (true) {
-            final int to = d.getIndex(itThat.value());
-            that._dict.addToEntry(v, 0, to, nCol);
-            if (itThat.value() == finalOff)
-                break;
-            itThat.next();
-        }
-    }
-
-    @Override
-    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-        AMapToData d = decompress();
-        d.preAggregateDDC_RLE(that._ptr, that.d, that._dict, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        AMapToData d = decompress();
-        return that instanceof ColGroupDDC && ((ColGroupDDC) that).d == d;
-    }
-
-    @Override
-    public long estimateInMemorySize() {
-        AMapToData d = decompress();
-        long size = super.estimateInMemorySize();
-        size += d.getInMemorySize();
-        return size;
-    }
-
-    @Override
-    public AColGroup scalarOperation(ScalarOperator op) {
-        AMapToData d = decompress();
-        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
-            final double v0 = op.executeScalar(0);
-            if (v0 == 0)
-                return this;
-            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
-        }
-        return create(_colIndexes, _dict.applyScalarOp(op), d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup unaryOperation(UnaryOperator op) {
-        AMapToData d = decompress();
-        return create(_colIndexes, _dict.applyUnaryOp(op), d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
-        AMapToData d = decompress();
-        return create(_colIndexes, ret, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
-                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
-            AMapToData d = decompress();
-            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, d, getCachedCounts(), reference);
-        }
-        final IDictionary ret;
-        if (_colIndexes.size() == 1)
-            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
-        else
-            ret = _dict.binOpRight(op, v, _colIndexes);
-        AMapToData d = decompress();
-        return create(_colIndexes, ret, d, getCachedCounts());
-    }
-
-    // TODO
-    @Override
-    public void write(DataOutput out) throws IOException {
-        super.write(out);
-        AMapToData d = decompress();
-        d.write(out);
-    }
-
-    // TODO
-    public static ColGroupDDC read(DataInput in) throws IOException {
-        IColIndex cols = ColIndexFactory.read(in);
-        IDictionary dict = DictionaryFactory.read(in);
-        AMapToData data = MapToFactory.readIn(in);
-        return new ColGroupDDC(cols, dict, data, null);
-    }
-
-    @Override
-    public long getExactSizeOnDisk() {
-        AMapToData d = decompress();
-        long ret = super.getExactSizeOnDisk();
-        ret += d.getExactSizeOnDisk();
-        return ret;
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        final int nVals = getNumValues();
-        final int nCols = getNumCols();
-        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-    }
-
-    @Override
-    protected int numRowsToMultiply() {
-        AMapToData d = decompress();
-        return d.size();
-    }
-
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return _dict.aggregate(c, builtin);
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-        _dict.aggregateCols(c, builtin, _colIndexes);
-    }
-
-    @Override
-    public boolean containsValue(double pattern) {
-        return _dict.containsValue(pattern);
-    }
-
-    @Override
-    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-        if (preAgg != null) {
-            AMapToData d = decompress();
-            return create(colIndexes, preAgg, d, getCachedCounts());
-        } else
-            return null;
-    }
-
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        try {
-            AMapToData d = decompress();
-            return ColGroupDDC.create(_colIndexes, _dict, d.slice(rl, ru), null);
-        } catch (Exception e) {
-            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
-        }
-    }
-
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        AMapToData d = decompress();
-        return create(colIndexes, newDictionary, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup append(AColGroup g) {
-        if (g instanceof ColGroupDDC) {
-            if (g.getColIndices().equals(_colIndexes)) {
-
-                ColGroupDDC gDDC = (ColGroupDDC) g;
-                if (gDDC._dict.equals(_dict)) {
-                    AMapToData d = decompress();
-                    AMapToData nd = d.append(gDDC.d);
-                    return create(_colIndexes, _dict, nd, null);
-                } else
-                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-            } else
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
-        } else
-            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
-        return null;
-    }
-
-    @Override
-    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
-        for (int i = 1; i < g.length; i++) {
-            if (!_colIndexes.equals(g[i]._colIndexes)) {
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
-                return null;
-            }
-
-            if (!(g[i] instanceof ColGroupDDC)) {
-                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
-                return null;
-            }
-
-            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
-            if (!gDDC._dict.equals(_dict)) {
-                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-                return null;
-            }
-        }
-        AMapToData d = decompress();
-        AMapToData nd = d.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
-        return create(_colIndexes, _dict, nd, null);
-    }
-
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    public AColGroup recompress() {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-        try {
-
-            IEncode enc = getEncoding();
-            AMapToData d = decompress();
-            EstimationFactors ef = new EstimationFactors(d.getUnique(), d.size(), d.size(),
-                    _dict.getSparsity());
-            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
-        } catch (Exception e) {
-            throw new DMLCompressionException(this.toString(), e);
-        }
-    }
-
-    @Override
-    public IEncode getEncoding() {
-        AMapToData d = decompress();
-        return EncodingFactory.create(d);
-    }
-
-    @Override
-    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-        AMapToData d = decompress();
-        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), d, getCachedCounts());
-    }
-
-    @Override
-    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        final SparseBlock sb = selection.getSparseBlock();
-        final SparseBlock retB = ret.getSparseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    @Override
-    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
-        final SparseBlock sb = selection.getSparseBlock();
-        final DenseBlock retB = ret.getDenseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
-                                                          int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-        if (_colIndexes instanceof RangeIndex)
-            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
-        else {
-
-            pos += cl; // left side matrix position offset.
-            if (a.withEmpty()) {
-                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-                for (int rc = cl; rc < cu; rc++, pos++) {
-                    final int idx = _data.getIndex(rc);
-                    if (idx != nVal)
-                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
-                }
-            } else {
-                AMapToData d = decompress();
-                for (int rc = cl; rc < cu; rc++, pos++)
-                    values2[pos2 + _colIndexes.get(d.getIndex(rc))] += values[pos];
-            }
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
-                                                                    int cl, int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-
-        final int firstCol = pos2 + _colIndexes.get(0);
-        pos += cl; // left side matrix position offset.
-        AMapToData d = decompress();
-        if (a.withEmpty()) {
-            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-            for (int rc = cl; rc < cu; rc++, pos++) {
-                final int idx = d.getIndex(rc);
-                if (idx != nVal)
-                    values2[firstCol + idx] += values[pos];
-            }
-        } else {
-            for (int rc = cl; rc < cu; rc++, pos++)
-                values2[firstCol + d.getIndex(rc)] += values[pos];
-        }
-    }
-
-    @Override
-    public AColGroup morph(CompressionType ct, int nRow) {
-        // return this;
-        if (ct == getCompType())
-            return this;
-        else if (ct == CompressionType.SDC) {
-            // return this;
-            AMapToData d = decompress();
-            int[] counts = getCounts();
-            int maxId = maxIndex(counts);
-            double[] def = _dict.getRow(maxId, _colIndexes.size());
-
-            int offsetSize = nRow - counts[maxId];
-            int[] offsets = new int[offsetSize];
-            AMapToData reducedData = MapToFactory.create(offsetSize, d.getUnique());
-            int o = 0;
-            for (int i = 0; i < nRow; i++) {
-                int v = d.getIndex(i);
-                if (v != maxId) {
-                    offsets[o] = i;
-                    reducedData.set(o, v);
-                    o++;
-                }
-            }
-
-            return ColGroupSDC.create(_colIndexes, d.size(), _dict, def, OffsetFactory.createOffset(offsets),
-                    reducedData, null);
-        } else if (ct == CompressionType.CONST) {
-            // if(1 < getNumValues()) {
-            String thisS = this.toString();
-            if (thisS.length() > 10000)
-                thisS = thisS.substring(0, 10000) + "...";
-            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
-            return this;
-            // }
-        } else if (ct == CompressionType.DDCFOR)
-            return this; // it does not make sense to change to FOR.
-        else
-            return super.morph(ct, nRow);
-    }
-
-    private static int maxIndex(int[] counts) {
-        int id = 0;
-        for (int i = 1; i < counts.length; i++) {
-            if (counts[i] > counts[id]) {
-                id = i;
-            }
-        }
-        return id;
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
-        final IDictionary combined = combineDictionaries(nCol, right);
-        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
-        AMapToData d = decompress();
-        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
-        IDictionary b = ((ColGroupDDC) right).getDictionary();
-        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
-        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
-        AMapToData d = decompress();
-        return new ColGroupDDC(combinedColIndex, combined, d, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-        AMapToData d = decompress();
-        AMapToData[] maps = d.splitReshapeDDC(multiplier);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
-            throws Exception {
-        AMapToData d = decompress();
-        AMapToData[] maps = d.splitReshapeDDCPushDown(multiplier, pool);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append(super.toString());
-        sb.append(String.format("\n%15s", "Data: "));
-        AMapToData d = decompress();
-        sb.append(d);
-        return sb.toString();
-    }
-
-    @Override
-    protected boolean allowShallowIdentityRightMult() {
-        return true;
-    }
+    /*
+     * TODO: Operations with complex access patterns shall be uncompressed to ddc format.
+     *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
+     *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
+     * */
 }

From b1bf90696636a5d36c7579ad69beb19adecd0664 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Fri, 9 Jan 2026 10:06:39 +0100
Subject: [PATCH 03/24] More TODOS written and cleaned up project.

---
 .../sysds/runtime/compress/colgroup/ColGroupDDCLZW.java       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index d83120e1345..12ac52186a9 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -114,8 +114,10 @@ private static int[] compress(AMapToData data) {
             dataIntVals[i] = data.getIndex(i);
         }
 
+        // TODO: Dictionary befüllen mit uniquen values.
+
         // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
-        // Using fastutil keeps lookups fast.
+        // Using fastutil keeps lookups fast. (TODO Dictionary)
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 

From 80274581cb1353a103ffe248696818a0ab5d4ab1 Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Sat, 10 Jan 2026 08:46:40 +0100
Subject: [PATCH 04/24] =?UTF-8?q?Dictionary=20initialisierung=20f=C3=BCr?=
 =?UTF-8?q?=20Compress=20und=20rudiment=C3=A4re=20Implementierung=20von=20?=
 =?UTF-8?q?Decompress?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 132 +++++++++++++++++-
 1 file changed, 126 insertions(+), 6 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 12ac52186a9..9cc25cdb99d 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -70,6 +70,9 @@
 import org.jboss.netty.handler.codec.compression.CompressionException;
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Stack;
 
 /**
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
@@ -114,6 +117,10 @@ private static int[] compress(AMapToData data) {
             dataIntVals[i] = data.getIndex(i);
         }
 
+        // Output buffer.
+        IntArrayList out = new IntArrayList();
+        out.add(nUnique);
+
         // TODO: Dictionary befüllen mit uniquen values.
 
         // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
@@ -121,12 +128,26 @@ private static int[] compress(AMapToData data) {
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 
+        // Befüllen des Dictionary
+        // Abspeichern der Symbole im Output stream
+        int index = 0;
+        for (int i = 0; i < nRows; i++) {
+            if (index == nUnique){
+                break;
+            }
+            int ct = dict.get(dataIntVals[i]);
+            if  (ct == -1) {
+                dict.put(dataIntVals[i], index++);
+                out.add(dataIntVals[i]);
+            }
+        }
+        if (index != nUnique) {
+            throw new IllegalArgumentException("Not enough symbols found for number of unique values");
+        }
+
         // Codes {0,...,nUnique - 1} are reserved for the original symbols.
         int nextCode = nUnique;
 
-        // Output buffer.
-        IntArrayList out = new IntArrayList();
-
         // Initialize w with the first input symbol.
         int w = data.getIndex(0);
 
@@ -150,10 +171,108 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
+    private static int unpackfirst(long key){
+        return (int)(key >>> 32);
+    }
+
+    private static int unpacksecond(long key){
+        return (int)(key);
+    }
+
     // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form. (TODO)
-    private AMapToData decompress(int[] _dataLZW) {
-        AMapToData d = null;
-        return null;
+    private static int[] packint(int[] arr, int last){
+        int[] result = Arrays.copyOf(arr, arr.length+1);
+        result[arr.length] = last;
+        return result;
+    }
+
+    private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict) {
+
+        Stack<Integer> stack = new Stack<>();
+
+        int c = code;
+
+        while (c >= alphabetSize) {
+            long key = dict.get(c);
+            int symbol = unpacksecond(key);
+            stack.push(symbol);
+            c = unpackfirst(key);
+        }
+
+        // Basissymbol
+        stack.push(c);
+        int [] outarray = new int[stack.size()];
+        int i = 0;
+        // korrekt ins Output schreiben
+        while (!stack.isEmpty()) {
+            outarray[i++] = stack.pop();
+        }
+        return outarray;
+    }
+
+    private static void addtoOutput(IntArrayList outarray, int[] code) {
+        for (int i = 0; i < code.length; i++) {
+            outarray.add(code[i]);
+        }
+    }
+
+    private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
+
+        Map<Integer, Long> dict = new HashMap<>();
+
+        //HashMap<Integer, int[]> dict = new HashMap<>();
+        int alphabetSize = code[0];
+        //int nextCode = 0;
+
+
+        // Fill dictionary with values 0-255
+        for (int i = 0; i < alphabetSize; i++) {
+            //dict.put(i, new int[]{code[1+i]}); // TODO: Automatisch Zahl nehmen, wenn < AlphabetSize?
+            //_dict.put(List.of(i), nextCode++);
+            dict.put(i, packKey(-1, code[i]));
+        }
+
+        // Result der Decompression
+        IntArrayList o = new IntArrayList();
+        //List<Integer> o = new ArrayList<>();
+
+        int old = code[1+alphabetSize];
+        //long next = dict.get(old);
+        int[] next = unpack(old, alphabetSize, dict);
+        addtoOutput(o, next);
+        int c = next[0];
+
+
+        for (int i = alphabetSize+2; i < code.length; i++) {
+            int key = code[i];
+            if (! dict.containsKey(key)) {
+                int[] oldnext = unpack(old, alphabetSize, dict);
+                int first = oldnext[0];
+                next = packint(oldnext, first);
+            } else {
+                next = unpack(key, alphabetSize, dict);
+            }
+            for (int inh : next){ // TODO: extra Methode
+                o.add(inh);
+            }
+            int first = next[0];
+            long s = packKey(old, first);
+            dict.put(alphabetSize+i, s); // count statt alphabet
+            //count++;
+            old = key;
+        }
+        return o;
+   /*AMapToData d = _data;
+   if (d == null) {
+       synchronized (this) {
+           d = _data;
+           if (d == null) {
+               d = decode(_dataLZW, _nRows, _nUnique);
+               _data = d;
+           }
+       }
+   }*/
+        //return null;
     }
 
 
@@ -225,3 +344,4 @@ else if (data.getUnique() == 1)
      *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
      * */
 }
+

From ef3b8347b7974ccccdc160d2a315306abae82e51 Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Sat, 10 Jan 2026 08:54:56 +0100
Subject: [PATCH 05/24] Uebersichtlichkeit verbessert

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 29 ++++---------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 9cc25cdb99d..383a90d4cd2 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -121,7 +121,6 @@ private static int[] compress(AMapToData data) {
         IntArrayList out = new IntArrayList();
         out.add(nUnique);
 
-        // TODO: Dictionary befüllen mit uniquen values.
 
         // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
         // Using fastutil keeps lookups fast. (TODO Dictionary)
@@ -217,27 +216,23 @@ private static void addtoOutput(IntArrayList outarray, int[] code) {
     }
 
     private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
-
+        // Dictionary
         Map<Integer, Long> dict = new HashMap<>();
 
-        //HashMap<Integer, int[]> dict = new HashMap<>();
+        // Extract alphabet size
         int alphabetSize = code[0];
-        //int nextCode = 0;
 
 
-        // Fill dictionary with values 0-255
+        // Dictionary Initalisierung
         for (int i = 0; i < alphabetSize; i++) {
-            //dict.put(i, new int[]{code[1+i]}); // TODO: Automatisch Zahl nehmen, wenn < AlphabetSize?
-            //_dict.put(List.of(i), nextCode++);
             dict.put(i, packKey(-1, code[i]));
         }
 
         // Result der Decompression
         IntArrayList o = new IntArrayList();
-        //List<Integer> o = new ArrayList<>();
 
+        // Decompression
         int old = code[1+alphabetSize];
-        //long next = dict.get(old);
         int[] next = unpack(old, alphabetSize, dict);
         addtoOutput(o, next);
         int c = next[0];
@@ -252,27 +247,15 @@ private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
             } else {
                 next = unpack(key, alphabetSize, dict);
             }
-            for (int inh : next){ // TODO: extra Methode
+            for (int inh : next){ // TODO: effizienz
                 o.add(inh);
             }
             int first = next[0];
             long s = packKey(old, first);
-            dict.put(alphabetSize+i, s); // count statt alphabet
-            //count++;
+            dict.put(alphabetSize+i, s);
             old = key;
         }
         return o;
-   /*AMapToData d = _data;
-   if (d == null) {
-       synchronized (this) {
-           d = _data;
-           if (d == null) {
-               d = decode(_dataLZW, _nRows, _nUnique);
-               _data = d;
-           }
-       }
-   }*/
-        //return null;
     }
 
 

From 988682114c176b64abc41abc93b18cdd4736487d Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 11:25:24 +0100
Subject: [PATCH 06/24] Minor error fixing. Redesigned compress method.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 40 +++++--------------
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 383a90d4cd2..4ad6fff6feb 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -111,38 +111,18 @@ private static int[] compress(AMapToData data) {
             throw new IllegalArgumentException("Invalid input: data has no unique values");
         }
 
-        // Extract _data values as int array.
-        final int[] dataIntVals = new int[nRows];
-        for (int i = 0; i < nRows; i++) {
-            dataIntVals[i] = data.getIndex(i);
-        }
-
-        // Output buffer.
-        IntArrayList out = new IntArrayList();
-        out.add(nUnique);
+        // Fast-path: single symbol
+        if (nRows == 1)
+            return new int[]{data.getIndex(0)};
 
 
-        // LZW dictionary. Maps (prefixCode, nextSymbol) to a new code.
+        // LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
         // Using fastutil keeps lookups fast. (TODO Dictionary)
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 
-        // Befüllen des Dictionary
-        // Abspeichern der Symbole im Output stream
-        int index = 0;
-        for (int i = 0; i < nRows; i++) {
-            if (index == nUnique){
-                break;
-            }
-            int ct = dict.get(dataIntVals[i]);
-            if  (ct == -1) {
-                dict.put(dataIntVals[i], index++);
-                out.add(dataIntVals[i]);
-            }
-        }
-        if (index != nUnique) {
-            throw new IllegalArgumentException("Not enough symbols found for number of unique values");
-        }
+        // Output buffer (heuristic capacity; avoids frequent reallocs)
+        final IntArrayList out = new IntArrayList(Math.max(16, nRows / 2));
 
         // Codes {0,...,nUnique - 1} are reserved for the original symbols.
         int nextCode = nUnique;
@@ -151,15 +131,16 @@ private static int[] compress(AMapToData data) {
         int w = data.getIndex(0);
 
         // Process the remaining input symbols.
+        // Example: _data = [2,0,2,3,0,2,1,0,2].
         for (int i = 1; i < nRows; i++) {
-            int k = data.getIndex(i); // next input symbol
-            long key = packKey(w, k); // encode (w,k) into long key
+            final int k = data.getIndex(i); // next input symbol
+            final long key = packKey(w, k); // encode (w,k) into long key
 
             int wk = dict.get(key); // look if wk exists in dict
             if (wk != -1) {
                 w = wk; // wk exists in dict so replace w by wk and continue.
             } else {
-                // wk does not exist in dict.
+                // wk does not exist in dict. output current phrase, add new phrase, restart at k
                 out.add(w);
                 dict.put(key, nextCode++);
                 w = k; // Start new phrase with k
@@ -170,6 +151,7 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
+
     private static int unpackfirst(long key){
         return (int)(key >>> 32);
     }

From e0d5d75d99880b53b799d370dbf698587a3158b4 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 11:30:58 +0100
Subject: [PATCH 07/24] Added red/write methods to serialize and deserialize
 from stream.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 36 +++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 4ad6fff6feb..4b182c8b653 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -117,7 +117,7 @@ private static int[] compress(AMapToData data) {
 
 
         // LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
-        // Using fastutil keeps lookups fast. (TODO Dictionary)
+        // Using fastutil keeps lookups fast. (TODO improve time/space complexity)
         final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
         dict.defaultReturnValue(-1);
 
@@ -151,7 +151,6 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
-
     private static int unpackfirst(long key){
         return (int)(key >>> 32);
     }
@@ -308,5 +307,38 @@ else if (data.getUnique() == 1)
      *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
      *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
      * */
+
+    // Deserialize ColGroupDDCLZW object in binary stream.
+    public static ColGroupDDCLZW read(DataInput in) throws IOException {
+        final IColIndex colIndexes = ColIndexFactory.read(in);
+        final IDictionary dict = DictionaryFactory.read(in);
+
+        // Metadata for lzw mapping.
+        final int nRows = in.readInt();
+        final int nUnique = in.readInt();
+
+        // Read compressed mapping array.
+        final int len = in.readInt();
+        if (len < 0)
+            throw new IOException("Invalid LZW data length: " + len);
+
+        final int[] dataLZW = new int[len];
+        for (int i = 0; i < len; i++)
+            dataLZW[i] = in.readInt();
+
+        // cachedCounts currently not serialized (mirror ColGroupDDC.read which passes null)
+        return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
+    }
+
+    // Serialize a ColGroupDDC-object into binary stream.
+    @Override
+    public void write(DataOutput out) throws IOException {
+        _colIndexes.write(out);
+        _dict.write(out);
+        out.writeInt(_nRows);
+        out.writeInt(_nUnique);
+        out.writeInt(_dataLZW.length);
+        for (int i : _dataLZW) out.writeInt(i);
+    }
 }
 

From beb4613b990e513f85ac6e35360d00fc9fe7d13a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 12:16:24 +0100
Subject: [PATCH 08/24] Commented code, error handling for compress. next step
 make compress and decompress and its used data structures compatible.

---
 .../runtime/compress/colgroup/ColGroupDDCLZW.java  | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 4b182c8b653..8e8bc7dfbb1 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -128,12 +128,18 @@ private static int[] compress(AMapToData data) {
         int nextCode = nUnique;
 
         // Initialize w with the first input symbol.
+        // AMapToData stores dictionary indices, not actual data values.
+        // Since indices reference positions in an IDictionary, they are always in the valid index range 0 … nUnique−1;
         int w = data.getIndex(0);
 
         // Process the remaining input symbols.
         // Example: _data = [2,0,2,3,0,2,1,0,2].
         for (int i = 1; i < nRows; i++) {
             final int k = data.getIndex(i); // next input symbol
+            
+            if(k < 0 || k >= nUnique)
+                throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
+
             final long key = packKey(w, k); // encode (w,k) into long key
 
             int wk = dict.get(key); // look if wk exists in dict
@@ -151,21 +157,24 @@ private static int[] compress(AMapToData data) {
         return out.toIntArray();
     }
 
+    // Unpack upper 32 bits (w) of (w,k) key pair.
     private static int unpackfirst(long key){
         return (int)(key >>> 32);
     }
 
+    // Unpack lower 32 bits (k) of (w,k) key pair.
     private static int unpacksecond(long key){
         return (int)(key);
     }
 
-    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form. (TODO)
+    // Append symbol to end of int-array.
     private static int[] packint(int[] arr, int last){
         int[] result = Arrays.copyOf(arr, arr.length+1);
         result[arr.length] = last;
         return result;
     }
 
+    // Reconstruct phrase to lzw-code.
     private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict) {
 
         Stack<Integer> stack = new Stack<>();
@@ -190,12 +199,15 @@ private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict)
         return outarray;
     }
 
+    // Append phrase to output.
     private static void addtoOutput(IntArrayList outarray, int[] code) {
         for (int i = 0; i < code.length; i++) {
             outarray.add(code[i]);
         }
     }
 
+    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
+    // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
     private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
         // Dictionary
         Map<Integer, Long> dict = new HashMap<>();

From 620e03aa0abc953af90f05480e053d2d03928b17 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 11 Jan 2026 18:18:56 +0100
Subject: [PATCH 09/24] Added first stages of tests. improved compression and
 decompression algorithms and try made them compatible.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 122 +++---
 .../compress/colgroup/ColGroupDDCTest.java    | 363 +++++++++---------
 2 files changed, 261 insertions(+), 224 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 8e8bc7dfbb1..80fc69a7371 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -70,6 +70,8 @@
 import org.jboss.netty.handler.codec.compression.CompressionException;
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
+
+
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Stack;
@@ -136,8 +138,8 @@ private static int[] compress(AMapToData data) {
         // Example: _data = [2,0,2,3,0,2,1,0,2].
         for (int i = 1; i < nRows; i++) {
             final int k = data.getIndex(i); // next input symbol
-            
-            if(k < 0 || k >= nUnique)
+
+            if (k < 0 || k >= nUnique)
                 throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
 
             final long key = packKey(w, k); // encode (w,k) into long key
@@ -158,31 +160,36 @@ private static int[] compress(AMapToData data) {
     }
 
     // Unpack upper 32 bits (w) of (w,k) key pair.
-    private static int unpackfirst(long key){
-        return (int)(key >>> 32);
+    private static int unpackfirst(long key) {
+        return (int) (key >>> 32);
     }
 
     // Unpack lower 32 bits (k) of (w,k) key pair.
-    private static int unpacksecond(long key){
-        return (int)(key);
+    private static int unpacksecond(long key) {
+        return (int) (key);
     }
 
     // Append symbol to end of int-array.
-    private static int[] packint(int[] arr, int last){
-        int[] result = Arrays.copyOf(arr, arr.length+1);
+    private static int[] packint(int[] arr, int last) {
+        int[] result = Arrays.copyOf(arr, arr.length + 1);
         result[arr.length] = last;
         return result;
     }
 
     // Reconstruct phrase to lzw-code.
-    private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict) {
+    private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
+        // Base symbol (implicit alphabet)
+        if (code < nUnique)
+            return new int[]{code};
 
         Stack<Integer> stack = new Stack<>();
-
         int c = code;
 
-        while (c >= alphabetSize) {
-            long key = dict.get(c);
+        while (c >= nUnique) {
+            Long key = dict.get(c);
+            if (key == null)
+                throw new IllegalStateException("Missing dictionary entry for code: " + c);
+
             int symbol = unpacksecond(key);
             stack.push(symbol);
             c = unpackfirst(key);
@@ -190,7 +197,7 @@ private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict)
 
         // Basissymbol
         stack.push(c);
-        int [] outarray = new int[stack.size()];
+        int[] outarray = new int[stack.size()];
         int i = 0;
         // korrekt ins Output schreiben
         while (!stack.isEmpty()) {
@@ -199,56 +206,71 @@ private static int[] unpack(int code, int alphabetSize, Map<Integer, Long> dict)
         return outarray;
     }
 
-    // Append phrase to output.
-    private static void addtoOutput(IntArrayList outarray, int[] code) {
-        for (int i = 0; i < code.length; i++) {
-            outarray.add(code[i]);
-        }
-    }
-
     // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
     // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
-    private static IntArrayList decompress(int[] code) { //TODO: return AMapToData
-        // Dictionary
-        Map<Integer, Long> dict = new HashMap<>();
-
-        // Extract alphabet size
-        int alphabetSize = code[0];
+    private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
+        // Validate input arguments.
+        if (codes == null)
+            throw new IllegalArgumentException("codes is null");
+        if (codes.length == 0)
+            throw new IllegalArgumentException("codes is empty");
+        if (nUnique <= 0)
+            throw new IllegalArgumentException("Invalid alphabet size: " + nUnique);
+        if (nRows <= 0) {
+            throw new IllegalArgumentException("Invalid nRows: " + nRows);
+        }
 
+        // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
+        // Base symbols (0..nUnique-1) are implicit and not stored here.
+        final Map<Integer, Long> dict = new HashMap<>();
 
-        // Dictionary Initalisierung
-        for (int i = 0; i < alphabetSize; i++) {
-            dict.put(i, packKey(-1, code[i]));
-        }
+        // Output mapping that will be reconstructed.
+        AMapToData out = MapToFactory.create(nRows, nUnique);
+        int outPos = 0; // Current write position in the output mapping.
 
-        // Result der Decompression
-        IntArrayList o = new IntArrayList();
+        // Decode the first code. The first code always expands to a valid phrase without needing
+        // any dictionary entries.
+        int old = codes[0];
+        int[] oldPhrase = unpack(old, nUnique, dict);
+        for (int v : oldPhrase)
+            out.set(outPos++, v);
 
-        // Decompression
-        int old = code[1+alphabetSize];
-        int[] next = unpack(old, alphabetSize, dict);
-        addtoOutput(o, next);
-        int c = next[0];
+        // Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
+        int nextCode = nUnique;
 
+        // Process remaining codes.
+        for (int i = 1; i < codes.length; i++) {
+            int key = codes[i];
 
-        for (int i = alphabetSize+2; i < code.length; i++) {
-            int key = code[i];
-            if (! dict.containsKey(key)) {
-                int[] oldnext = unpack(old, alphabetSize, dict);
-                int first = oldnext[0];
-                next = packint(oldnext, first);
+            int[] next;
+            if (key < nUnique || dict.containsKey(key)) {
+                // Normal case: The code is either a base symbol or already present in the dictionary.
+                next = unpack(key, nUnique, dict);
             } else {
-                next = unpack(key, alphabetSize, dict);
-            }
-            for (int inh : next){ // TODO: effizienz
-                o.add(inh);
+                // KwKwK special case: The current code refers to a phrase that is being defined right now.
+                // next = oldPhrase + first(oldPhrase).
+                int first = oldPhrase[0];
+                next = packint(oldPhrase, first);
             }
+
+            // Append the reconstructed phrase to the output mapping.
+            for (int v : next) out.set(outPos++, v);
+
+            // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
             int first = next[0];
-            long s = packKey(old, first);
-            dict.put(alphabetSize+i, s);
+            dict.put(nextCode++, packKey(old, first));
+
+            // Advance.
             old = key;
+            oldPhrase = next;
         }
-        return o;
+
+        // Safety check: decoder must produce exactly nRows symbols.
+        if (outPos != nRows)
+            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + nRows);
+
+        // Return the reconstructed mapping.
+        return out;
     }
 
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 0f04cfc9c27..f3b1350cdc0 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -27,6 +27,7 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.compress.colgroup.AColGroup;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDCLZW;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
@@ -38,179 +39,193 @@
 
 public class ColGroupDDCTest {
 
-	protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
-
-	@Test
-	public void testConvertToDeltaDDCBasic() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(10.0, mb.get(0, 0), 0.0);
-		assertEquals(20.0, mb.get(0, 1), 0.0);
-		assertEquals(11.0, mb.get(1, 0), 0.0);
-		assertEquals(21.0, mb.get(1, 1), 0.0);
-		assertEquals(12.0, mb.get(2, 0), 0.0);
-		assertEquals(22.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCSingleColumn() {
-		IColIndex colIndexes = ColIndexFactory.create(1);
-		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(5, 5);
-		for(int i = 0; i < 5; i++)
-			data.set(i, i);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(5, 1, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
-
-		assertEquals(1.0, mb.get(0, 0), 0.0);
-		assertEquals(2.0, mb.get(1, 0), 0.0);
-		assertEquals(3.0, mb.get(2, 0), 0.0);
-		assertEquals(4.0, mb.get(3, 0), 0.0);
-		assertEquals(5.0, mb.get(4, 0), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCWithRepeatedValues() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(10.0, mb.get(0, 0), 0.0);
-		assertEquals(20.0, mb.get(0, 1), 0.0);
-		assertEquals(10.0, mb.get(1, 0), 0.0);
-		assertEquals(20.0, mb.get(1, 1), 0.0);
-		assertEquals(10.0, mb.get(2, 0), 0.0);
-		assertEquals(20.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCWithNegativeDeltas() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(10.0, mb.get(0, 0), 0.0);
-		assertEquals(20.0, mb.get(0, 1), 0.0);
-		assertEquals(8.0, mb.get(1, 0), 0.0);
-		assertEquals(15.0, mb.get(1, 1), 0.0);
-		assertEquals(12.0, mb.get(2, 0), 0.0);
-		assertEquals(25.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCWithZeroDeltas() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(3, 3);
-		data.set(0, 0);
-		data.set(1, 1);
-		data.set(2, 2);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(3, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-		assertEquals(5.0, mb.get(0, 0), 0.0);
-		assertEquals(0.0, mb.get(0, 1), 0.0);
-		assertEquals(5.0, mb.get(1, 0), 0.0);
-		assertEquals(0.0, mb.get(1, 1), 0.0);
-		assertEquals(0.0, mb.get(2, 0), 0.0);
-		assertEquals(5.0, mb.get(2, 1), 0.0);
-	}
-
-	@Test
-	public void testConvertToDeltaDDCMultipleUniqueDeltas() {
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
-		Dictionary dict = Dictionary.create(dictValues);
-		AMapToData data = MapToFactory.create(4, 4);
-		for(int i = 0; i < 4; i++)
-			data.set(i, i);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDeltaDDC();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDeltaDDC);
-		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-		MatrixBlock mb = new MatrixBlock(4, 2, false);
-		mb.allocateDenseBlock();
-		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
-
-		assertEquals(1.0, mb.get(0, 0), 0.0);
-		assertEquals(2.0, mb.get(0, 1), 0.0);
-		assertEquals(3.0, mb.get(1, 0), 0.0);
-		assertEquals(4.0, mb.get(1, 1), 0.0);
-		assertEquals(5.0, mb.get(2, 0), 0.0);
-		assertEquals(6.0, mb.get(2, 1), 0.0);
-		assertEquals(7.0, mb.get(3, 0), 0.0);
-		assertEquals(8.0, mb.get(3, 1), 0.0);
-	}
+    protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
+
+    @Test
+    public void testLZWRoundTripMapping() throws Exception {
+        // Build a mapping with repetition to actually exercise LZW
+        // Example: [2,0,2,3,0,2,1,0,2]
+        final int nRows = 9;
+        final int nUnique = 4;
+        AMapToData data = MapToFactory.create(nRows, nUnique);
+        int[] src = new int[]{2, 0, 2, 3, 0, 2, 1, 0, 2};
+        for (int i = 0; i < nRows; i++)
+            data.set(i, src[i]);
+
+        // TODO: Write tests for ColGroupDDCLZW.
+    }
+
+    @Test
+    public void testConvertToDeltaDDCBasic() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(10.0, mb.get(0, 0), 0.0);
+        assertEquals(20.0, mb.get(0, 1), 0.0);
+        assertEquals(11.0, mb.get(1, 0), 0.0);
+        assertEquals(21.0, mb.get(1, 1), 0.0);
+        assertEquals(12.0, mb.get(2, 0), 0.0);
+        assertEquals(22.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCSingleColumn() {
+        IColIndex colIndexes = ColIndexFactory.create(1);
+        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(5, 5);
+        for (int i = 0; i < 5; i++)
+            data.set(i, i);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(5, 1, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
+
+        assertEquals(1.0, mb.get(0, 0), 0.0);
+        assertEquals(2.0, mb.get(1, 0), 0.0);
+        assertEquals(3.0, mb.get(2, 0), 0.0);
+        assertEquals(4.0, mb.get(3, 0), 0.0);
+        assertEquals(5.0, mb.get(4, 0), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCWithRepeatedValues() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(10.0, mb.get(0, 0), 0.0);
+        assertEquals(20.0, mb.get(0, 1), 0.0);
+        assertEquals(10.0, mb.get(1, 0), 0.0);
+        assertEquals(20.0, mb.get(1, 1), 0.0);
+        assertEquals(10.0, mb.get(2, 0), 0.0);
+        assertEquals(20.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCWithNegativeDeltas() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(10.0, mb.get(0, 0), 0.0);
+        assertEquals(20.0, mb.get(0, 1), 0.0);
+        assertEquals(8.0, mb.get(1, 0), 0.0);
+        assertEquals(15.0, mb.get(1, 1), 0.0);
+        assertEquals(12.0, mb.get(2, 0), 0.0);
+        assertEquals(25.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCWithZeroDeltas() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(3, 3);
+        data.set(0, 0);
+        data.set(1, 1);
+        data.set(2, 2);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(3, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+        assertEquals(5.0, mb.get(0, 0), 0.0);
+        assertEquals(0.0, mb.get(0, 1), 0.0);
+        assertEquals(5.0, mb.get(1, 0), 0.0);
+        assertEquals(0.0, mb.get(1, 1), 0.0);
+        assertEquals(0.0, mb.get(2, 0), 0.0);
+        assertEquals(5.0, mb.get(2, 1), 0.0);
+    }
+
+    @Test
+    public void testConvertToDeltaDDCMultipleUniqueDeltas() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
+        Dictionary dict = Dictionary.create(dictValues);
+        AMapToData data = MapToFactory.create(4, 4);
+        for (int i = 0; i < 4; i++)
+            data.set(i, i);
+
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDeltaDDC();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDeltaDDC);
+        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+        MatrixBlock mb = new MatrixBlock(4, 2, false);
+        mb.allocateDenseBlock();
+        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
+
+        assertEquals(1.0, mb.get(0, 0), 0.0);
+        assertEquals(2.0, mb.get(0, 1), 0.0);
+        assertEquals(3.0, mb.get(1, 0), 0.0);
+        assertEquals(4.0, mb.get(1, 1), 0.0);
+        assertEquals(5.0, mb.get(2, 0), 0.0);
+        assertEquals(6.0, mb.get(2, 1), 0.0);
+        assertEquals(7.0, mb.get(3, 0), 0.0);
+        assertEquals(8.0, mb.get(3, 1), 0.0);
+    }
 }
 

From b7911d7492c7ba832c5d66ca1b50d6e4cab5d0ca Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Mon, 12 Jan 2026 11:37:28 +0100
Subject: [PATCH 10/24] Added convertToDDCLZW() method to ColGroupDDC Class.
 Added convertToDDC test for ColGroupDDCTest. Improved compress/decompress
 methods in LZW class.

---
 .../compress/colgroup/ColGroupDDC.java        | 2164 ++++++++---------
 .../compress/colgroup/ColGroupDDCLZW.java     |  260 +-
 .../compress/colgroup/ColGroupDDCTest.java    |   67 +-
 3 files changed, 1394 insertions(+), 1097 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index ac4defcabd5..c0d78e11783 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -75,1091 +75,1081 @@
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC).
  */
 public class ColGroupDDC extends APreAgg implements IMapToDataGroup {
-	private static final long serialVersionUID = -5769772089913918987L;
-
-	protected final AMapToData _data;
-
-	static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
-
-	protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-		super(colIndexes, dict, cachedCounts);
-		_data = data;
-
-		if(CompressedMatrixBlock.debug) {
-			if(getNumValues() == 0)
-				throw new DMLCompressionException("Invalid construction with empty dictionary");
-			if(data.size() == 0)
-				throw new DMLCompressionException("Invalid length of the data. is zero");
-
-			if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-				throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
-					+ dict.getNumberOfValues(colIndexes.size()));
-			int[] c = getCounts();
-			if(c.length != dict.getNumberOfValues(colIndexes.size()))
-				throw new DMLCompressionException("Invalid DDC Construction");
-			data.verify();
-		}
-	}
-
-	public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-		if(data.getUnique() == 1)
-			return ColGroupConst.create(colIndexes, dict);
-		else if(dict == null)
-			return new ColGroupEmpty(colIndexes);
-		else
-			return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
-	}
-
-	public AColGroup sparsifyFOR() {
-		return ColGroupDDCFOR.sparsifyFOR(this);
-	}
-
-	public CompressionType getCompType() {
-		return CompressionType.DDC;
-	}
-
-	@Override
-	protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-		SparseBlock sb) {
-		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-			final int vr = _data.getIndex(r);
-			if(sb.isEmpty(vr))
-				continue;
-			final double[] c = db.values(offT);
-			final int off = db.pos(offT) + offC;
-			_colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
-		}
-	}
-
-	@Override
-	protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-		double[] values) {
-		final int idxSize = _colIndexes.size();
-		if(db.isContiguous()) {
-			final int nColOut = db.getDim(1);
-			if(idxSize == 1 && nColOut == 1)
-				decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
-			else if(idxSize == 1)
-				decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
-			else if(idxSize == nColOut) // offC == 0 implied
-				decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
-			else if(offC == 0 && offR == 0)
-				decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
-			else if(offC == 0)
-				decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
-			else
-				decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-		}
-		else
-			decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-	}
-
-	private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
-		int offC, double[] values) {
-		final double[] c = db.values(0);
-		final int nCols = db.getDim(1);
-		final int colOff = _colIndexes.get(0) + offC;
-		for(int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
-			c[offT] += values[_data.getIndex(i)];
-
-	}
-
-	@Override
-	public AMapToData getMapToData() {
-		return _data;
-	}
-
-	private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
-		int offC, double[] values) {
-		final double[] c = db.values(0);
-		decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
-	}
-
-	private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
-		double[] values, AMapToData data) {
-		data.decompressToRange(c, rl, ru, offR, values);
-
-	}
-
-	private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
-		double[] values, int nCol) {
-		final double[] c = db.values(0);
-		for(int r = rl; r < ru; r++) {
-			final int start = _data.getIndex(r) * nCol;
-			final int offStart = (offR + r) * nCol;
-			LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
-		}
-	}
-
-	private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
-		double[] values, int nCol, int colOut) {
-		int off = (rl + offR) * colOut;
-		for(int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
-			final double[] c = db.values(offT);
-			final int rowIndex = _data.getIndex(i) * nCol;
-			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-		}
-	}
-
-	private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
-		final int nCol = _colIndexes.size();
-		final int nColU = db.getDim(1);
-		final double[] c = db.values(0);
-		for(int i = rl; i < ru; i++) {
-			final int off = i * nColU;
-			final int rowIndex = _data.getIndex(i) * nCol;
-			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-		}
-	}
-
-	private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
-		double[] values, int nCol) {
-		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-			final double[] c = db.values(offT);
-			final int off = db.pos(offT) + offC;
-			final int rowIndex = _data.getIndex(i) * nCol;
-			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-		}
-	}
-
-	@Override
-	protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-		SparseBlock sb) {
-		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-			final int vr = _data.getIndex(r);
-			if(sb.isEmpty(vr))
-				continue;
-			final int apos = sb.pos(vr);
-			final int alen = sb.size(vr) + apos;
-			final int[] aix = sb.indexes(vr);
-			final double[] aval = sb.values(vr);
-			for(int j = apos; j < alen; j++)
-				ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
-		}
-	}
-
-	@Override
-	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-		double[] values) {
-		decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
-	}
-
-	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-		double[] values, int nCol) {
-		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-			final int rowIndex = _data.getIndex(i) * nCol;
-			for(int j = 0; j < nCol; j++)
-				ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
-		}
-	}
-
-	@Override
-	protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
-		for(int i = rl; i < ru; i++) {
-			final int vr = _data.getIndex(i);
-			if(sb.isEmpty(vr))
-				continue;
-			final int apos = sb.pos(vr);
-			final int alen = sb.size(vr) + apos;
-			final int[] aix = sb.indexes(vr);
-			final double[] aval = sb.values(vr);
-			for(int j = apos; j < alen; j++) {
-				final int rowOut = _colIndexes.get(aix[j]);
-				final double[] c = db.values(rowOut);
-				final int off = db.pos(rowOut);
-				c[off + i] += aval[j];
-			}
-		}
-	}
-
-	@Override
-	protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-		final int nCol = _colIndexes.size();
-		for(int j = 0; j < nCol; j++) {
-			final int rowOut = _colIndexes.get(j);
-			final double[] c = db.values(rowOut);
-			final int off = db.pos(rowOut);
-			for(int i = rl; i < ru; i++) {
-				final double v = dict[_data.getIndex(i) * nCol + j];
-				c[off + i] += v;
-			}
-		}
-	}
-
-	@Override
-	protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
-
-		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-		for(int j = 0; j < _colIndexes.size(); j++)
-			sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-		for(int i = 0; i < _data.size(); i++) {
-			int di = _data.getIndex(i);
-			if(sb.isEmpty(di))
-				continue;
-
-			final int apos = sb.pos(di);
-			final int alen = sb.size(di) + apos;
-			final int[] aix = sb.indexes(di);
-			final double[] aval = sb.values(di);
-
-			for(int j = apos; j < alen; j++) {
-				sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
-			}
-		}
-
-	}
-
-	@Override
-	protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
-		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-		for(int j = 0; j < _colIndexes.size(); j++)
-			sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-		final int nCol = _colIndexes.size();
-		for(int j = 0; j < nCol; j++) {
-			final int rowOut = _colIndexes.get(j);
-			SparseRow r = sbr.get(rowOut);
-
-			for(int i = 0; i < _data.size(); i++) {
-				final double v = dict[_data.getIndex(i) * nCol + j];
-				r = r.append(i, v);
-			}
-			sbr.set(rowOut, r, false);
-		}
-	}
-
-	@Override
-	public double getIdx(int r, int colIdx) {
-		return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
-	}
-
-	@Override
-	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
-		for(int rix = rl; rix < ru; rix++)
-			c[rix] += preAgg[_data.getIndex(rix)];
-	}
-
-	@Override
-	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-		for(int i = rl; i < ru; i++)
-			c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
-	}
-
-	@Override
-	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
-		for(int rix = rl; rix < ru; rix++)
-			c[rix] *= preAgg[_data.getIndex(rix)];
-	}
-
-	@Override
-	public int[] getCounts(int[] counts) {
-		return _data.getCounts(counts);
-	}
-
-	@Override
-	public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-		if(_colIndexes.size() == 1)
-			leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
-		else
-			lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-	}
-
-	private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-		int cu) {
-		final DenseBlock retV = result.getDenseBlock();
-		final int nColM = matrix.getNumColumns();
-		final int nColRet = result.getNumColumns();
-		final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
-		if(matrix.isEmpty())
-			return;
-		else if(matrix.isInSparseFormat()) {
-			if(cl != 0 || cu != _data.size())
-				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-			else
-				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
-		}
-		else if(!matrix.getDenseBlock().isContiguous())
-			lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
-				cu);
-		else
-			lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-	}
-
-	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-		int rl, int ru) {
-
-		if(retV.isContiguous())
-			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
-		else
-			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-		double[] vals, int rl, int ru) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int alen = sb.size(r) + apos;
-			final int[] aix = sb.indexes(r);
-			final double[] aval = sb.values(r);
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-
-			for(int i = apos; i < alen; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-		double[] vals, int rl, int ru) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int alen = sb.size(r) + apos;
-			final int[] aix = sb.indexes(r);
-			final double[] aval = sb.values(r);
-			final int offR = r * nColRet;
-			for(int i = apos; i < alen; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-		int rl, int ru, int cl, int cu) {
-		if(retV.isContiguous())
-			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-		else
-			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int aposSkip = sb.posFIndexGTE(r, cl);
-			final int[] aix = sb.indexes(r);
-			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-				continue;
-			final int alen = sb.size(r) + apos;
-			final double[] aval = sb.values(r);
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-			// final int offR = r * nColRet;
-			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int apos = sb.pos(r);
-			final int aposSkip = sb.posFIndexGTE(r, cl);
-			final int[] aix = sb.indexes(r);
-			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-				continue;
-			final int alen = sb.size(r) + apos;
-			final double[] aval = sb.values(r);
-			final int offR = r * nColRet;
-			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-	}
-
-	private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
-		int rl, int ru, int cl, int cu) {
-		if(retV.isContiguous())
-			lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-		else
-			lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
-		int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-		for(int r = rl; r < ru; r++) {
-			final int offL = db.pos(r);
-			final double[] mV = db.values(r);
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-			for(int c = cl; c < cu; c++)
-				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-		for(int r = rl; r < ru; r++) {
-			final int offL = r * nColM;
-			final int offR = ret.pos(r);
-			final double[] retV = ret.values(r);
-			for(int c = cl; c < cu; c++)
-				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
-		double[] vals, int rl, int ru, int cl, int cu) {
-		final int colOut = _colIndexes.get(0);
-		for(int r = rl; r < ru; r++) {
-			final int offL = r * nColM;
-			final int offR = r * nColRet;
-			for(int c = cl; c < cu; c++)
-				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-		}
-	}
-
-	private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-
-		if(matrix.isInSparseFormat())
-			lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-		else
-			lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-	}
-
-	private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-		final DenseBlock db = result.getDenseBlock();
-		final SparseBlock sb = matrix.getSparseBlock();
-
-		if(cl != 0 || cu != _data.size()) {
-			// sub part
-			for(int r = rl; r < ru; r++) {
-				if(sb.isEmpty(r))
-					continue;
-				final double[] retV = db.values(r);
-				final int pos = db.pos(r);
-				lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
-			}
-		}
-		else {
-			for(int r = rl; r < ru; r++)
-				_data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
-		}
-	}
-
-	private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
-		final int apos = sb.pos(r);
-		final int aposSkip = sb.posFIndexGTE(r, cl);
-		final int[] aix = sb.indexes(r);
-		if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-			return;
-		final int alen = sb.size(r) + apos;
-		final double[] aval = sb.values(r);
-		for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-			_dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
-	}
-
-	private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-		if(matrix.getDenseBlock().isContiguous())
-			lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
-		else
-			lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
-	}
-
-	private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-		int cu) {
-		final double[] retV = result.getDenseBlockValues();
-		final int nColM = matrix.getNumColumns();
-		final int nColRet = result.getNumColumns();
-		final double[] mV = matrix.getDenseBlockValues();
-		for(int r = rl; r < ru; r++) {
-			final int offL = r * nColM;
-			final int offR = r * nColRet;
-			for(int c = cl; c < cu; c++)
-				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-		}
-	}
-
-	private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
-		int cu) {
-		final double[] retV = result.getDenseBlockValues();
-		final int nColRet = result.getNumColumns();
-		for(int r = rl; r < ru; r++) {
-			final int offL = db.pos(r);
-			final double[] mV = db.values(r);
-			final int offR = r * nColRet;
-			for(int c = cl; c < cu; c++)
-				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-		}
-	}
-
-	@Override
-	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-		_data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
-	}
-
-	@Override
-	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-		DenseBlock db = that.getDenseBlock();
-		DenseBlock retDB = ret.getDenseBlock();
-		for(int i = rl; i < ru; i++)
-			leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
-	}
-
-	@Override
-	public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
-		if(_dict instanceof IdentityDictionary)
-			identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
-		else
-			defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
-	}
-
-	private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-		final double[] b = right.getDenseBlockValues();
-		final double[] c = ret.getDenseBlockValues();
-		final int jd = right.getNumColumns();
-		final DoubleVector vVec = DoubleVector.zero(SPECIES);
-		final int vLen = SPECIES.length();
-		final int lenJ = cru - crl;
-		final int end = cru - (lenJ % vLen);
-		for(int i = rl; i < ru; i++) {
-			int k = _data.getIndex(i);
-			final int offOut = i * jd + crl;
-			final double aa = 1;
-			final int k_right = _colIndexes.get(k);
-			vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-		}
-	}
-
-	private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-		final double[] a = _dict.getValues();
-		final double[] b = right.getDenseBlockValues();
-		final double[] c = ret.getDenseBlockValues();
-		final int kd = _colIndexes.size();
-		final int jd = right.getNumColumns();
-		final DoubleVector vVec = DoubleVector.zero(SPECIES);
-		final int vLen = SPECIES.length();
-
-		final int blkzI = 32;
-		final int blkzK = 24;
-		final int lenJ = cru - crl;
-		final int end = cru - (lenJ % vLen);
-		for(int bi = rl; bi < ru; bi += blkzI) {
-			final int bie = Math.min(ru, bi + blkzI);
-			for(int bk = 0; bk < kd; bk += blkzK) {
-				final int bke = Math.min(kd, bk + blkzK);
-				for(int i = bi; i < bie; i++) {
-					int offi = _data.getIndex(i) * kd;
-					final int offOut = i * jd + crl;
-					for(int k = bk; k < bke; k++) {
-						final double aa = a[offi + k];
-						final int k_right = _colIndexes.get(k);
-						vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-					}
-				}
-			}
-		}
-	}
-
-	final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
-		vVec = vVec.broadcast(aa);
-		final int offj = k * jd;
-		final int end = endT + offj;
-		for(int j = offj + crl; j < end; j += vLen, offOut += vLen) {
-			DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
-			DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
-			res = vVec.fma(bVec, res);
-			res.intoArray(c, offOut);
-		}
-		for(int j = end; j < cru + offj; j++, offOut++) {
-			double bb = b[j];
-			c[offOut] += bb * aa;
-		}
-	}
-
-	@Override
-	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-		if(cl != 0 || cu != _data.size()) {
-			throw new NotImplementedException();
-		}
-		_data.preAggregateSparse(sb, preAgg, rl, ru);
-	}
-
-	@Override
-	public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-		try {
-
-			_data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
-		}
-		catch(Exception e) {
-			throw new CompressionException(that.toString(), e);
-		}
-	}
-
-	@Override
-	public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-		_data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
-	}
-
-	@Override
-	public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-		final AOffsetIterator itThat = that._indexes.getOffsetIterator();
-		final int nCol = that._colIndexes.size();
-		final int finalOff = that._indexes.getOffsetToLast();
-		final double[] v = ret.getValues();
-		while(true) {
-			final int to = _data.getIndex(itThat.value());
-			that._dict.addToEntry(v, 0, to, nCol);
-			if(itThat.value() == finalOff)
-				break;
-			itThat.next();
-		}
-	}
-
-	@Override
-	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-		_data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
-	}
-
-	@Override
-	public boolean sameIndexStructure(AColGroupCompressed that) {
-		return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
-	}
-
-	@Override
-	public ColGroupType getColGroupType() {
-		return ColGroupType.DDC;
-	}
-
-	@Override
-	public long estimateInMemorySize() {
-		long size = super.estimateInMemorySize();
-		size += _data.getInMemorySize();
-		return size;
-	}
-
-	@Override
-	public AColGroup scalarOperation(ScalarOperator op) {
-		if((op.fn instanceof Plus || op.fn instanceof Minus)) {
-			final double v0 = op.executeScalar(0);
-			if(v0 == 0)
-				return this;
-			final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
-			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-		}
-		return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup unaryOperation(UnaryOperator op) {
-		return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-		IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
-		return create(_colIndexes, ret, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-		if((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
-			((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
-			final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
-			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-		}
-		final IDictionary ret;
-		if(_colIndexes.size() == 1)
-			ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
-		else
-			ret = _dict.binOpRight(op, v, _colIndexes);
-		return create(_colIndexes, ret, _data, getCachedCounts());
-	}
-
-	@Override
-	public void write(DataOutput out) throws IOException {
-		super.write(out);
-		_data.write(out);
-	}
-
-	public static ColGroupDDC read(DataInput in) throws IOException {
-		IColIndex cols = ColIndexFactory.read(in);
-		IDictionary dict = DictionaryFactory.read(in);
-		AMapToData data = MapToFactory.readIn(in);
-		return new ColGroupDDC(cols, dict, data, null);
-	}
-
-	@Override
-	public long getExactSizeOnDisk() {
-		long ret = super.getExactSizeOnDisk();
-		ret += _data.getExactSizeOnDisk();
-		return ret;
-	}
-
-	@Override
-	public double getCost(ComputationCostEstimator e, int nRows) {
-		final int nVals = getNumValues();
-		final int nCols = getNumCols();
-		return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-	}
-
-	@Override
-	protected int numRowsToMultiply() {
-		return _data.size();
-	}
-
-	@Override
-	protected double computeMxx(double c, Builtin builtin) {
-		return _dict.aggregate(c, builtin);
-	}
-
-	@Override
-	protected void computeColMxx(double[] c, Builtin builtin) {
-		_dict.aggregateCols(c, builtin, _colIndexes);
-	}
-
-	@Override
-	public boolean containsValue(double pattern) {
-		return _dict.containsValue(pattern);
-	}
-
-	@Override
-	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-		if(preAgg != null)
-			return create(colIndexes, preAgg, _data, getCachedCounts());
-		else
-			return null;
-	}
-
-	@Override
-	public AColGroup sliceRows(int rl, int ru) {
-		try {
-			return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
-		}
-		catch(Exception e) {
-			throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
-		}
-	}
-
-	@Override
-	protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-		return create(colIndexes, newDictionary, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup append(AColGroup g) {
-		if(g instanceof ColGroupDDC) {
-			if(g.getColIndices().equals(_colIndexes)) {
-
-				ColGroupDDC gDDC = (ColGroupDDC) g;
-				if(gDDC._dict.equals(_dict)) {
-					AMapToData nd = _data.append(gDDC._data);
-					return create(_colIndexes, _dict, nd, null);
-				}
-				else
-					LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-			}
-			else
-				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
-		}
-		else
-			LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
-		return null;
-	}
-
-	@Override
-	public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
-		for(int i = 1; i < g.length; i++) {
-			if(!_colIndexes.equals(g[i]._colIndexes)) {
-				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
-				return null;
-			}
-
-			if(!(g[i] instanceof ColGroupDDC)) {
-				LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
-				return null;
-			}
-
-			final ColGroupDDC gDDC = (ColGroupDDC) g[i];
-			if(!gDDC._dict.equals(_dict)) {
-				LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-				return null;
-			}
-		}
-		AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
-		return create(_colIndexes, _dict, nd, null);
-	}
-
-	@Override
-	public ICLAScheme getCompressionScheme() {
-		return DDCScheme.create(this);
-	}
-
-	@Override
-	public AColGroup recompress() {
-		return this;
-	}
-
-	@Override
-	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-		try {
-
-			IEncode enc = getEncoding();
-			EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
-				_dict.getSparsity());
-			return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
-		}
-		catch(Exception e) {
-			throw new DMLCompressionException(this.toString(), e);
-		}
-	}
-
-	@Override
-	public IEncode getEncoding() {
-		return EncodingFactory.create(_data);
-	}
-
-	@Override
-	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-		return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
-	}
-
-	@Override
-	public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-		final SparseBlock sb = selection.getSparseBlock();
-		final SparseBlock retB = ret.getSparseBlock();
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int sPos = sb.pos(r);
-			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-			decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-		}
-	}
-
-	@Override
-	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-		// morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
-		final SparseBlock sb = selection.getSparseBlock();
-		final DenseBlock retB = ret.getDenseBlock();
-		for(int r = rl; r < ru; r++) {
-			if(sb.isEmpty(r))
-				continue;
-			final int sPos = sb.pos(r);
-			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-			decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-		}
-	}
-
-	private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
-		int cu) {
-		IdentityDictionary a = (IdentityDictionary) _dict;
-		if(_colIndexes instanceof RangeIndex)
-			leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
-		else {
-
-			pos += cl; // left side matrix position offset.
-			if(a.withEmpty()) {
-				final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-				for(int rc = cl; rc < cu; rc++, pos++) {
-					final int idx = _data.getIndex(rc);
-					if(idx != nVal)
-						values2[pos2 + _colIndexes.get(idx)] += values[pos];
-				}
-			}
-			else {
-				for(int rc = cl; rc < cu; rc++, pos++)
-					values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
-			}
-		}
-	}
-
-	private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
-		int cl, int cu) {
-		IdentityDictionary a = (IdentityDictionary) _dict;
-
-		final int firstCol = pos2 + _colIndexes.get(0);
-		pos += cl; // left side matrix position offset.
-		if(a.withEmpty()) {
-			final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-			for(int rc = cl; rc < cu; rc++, pos++) {
-				final int idx = _data.getIndex(rc);
-				if(idx != nVal)
-					values2[firstCol + idx] += values[pos];
-			}
-		}
-		else {
-			for(int rc = cl; rc < cu; rc++, pos++)
-				values2[firstCol + _data.getIndex(rc)] += values[pos];
-		}
-	}
-
-	@Override
-	public AColGroup morph(CompressionType ct, int nRow) {
-		// return this;
-		if(ct == getCompType())
-			return this;
-		else if(ct == CompressionType.SDC) {
-			// return this;
-			int[] counts = getCounts();
-			int maxId = maxIndex(counts);
-			double[] def = _dict.getRow(maxId, _colIndexes.size());
-
-			int offsetSize = nRow - counts[maxId];
-			int[] offsets = new int[offsetSize];
-			AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
-			int o = 0;
-			for(int i = 0; i < nRow; i++) {
-				int v = _data.getIndex(i);
-				if(v != maxId) {
-					offsets[o] = i;
-					reducedData.set(o, v);
-					o++;
-				}
-			}
-
-			return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
-				reducedData, null);
-		}
-		else if(ct == CompressionType.CONST) {
-			// if(1 < getNumValues()) {
-			String thisS = this.toString();
-			if(thisS.length() > 10000)
-				thisS = thisS.substring(0, 10000) + "...";
-			LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
-			return this;
-			// }
-		}
-		else if(ct == CompressionType.DDCFOR)
-			return this; // it does not make sense to change to FOR.
-		else
-			return super.morph(ct, nRow);
-	}
-
-	private static int maxIndex(int[] counts) {
-		int id = 0;
-		for(int i = 1; i < counts.length; i++) {
-			if(counts[i] > counts[id]) {
-				id = i;
-			}
-		}
-		return id;
-	}
-
-	@Override
-	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
-		final IDictionary combined = combineDictionaries(nCol, right);
-		final IColIndex combinedColIndex = combineColIndexes(nCol, right);
-		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
-		IDictionary b = ((ColGroupDDC) right).getDictionary();
-		IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
-		IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
-		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-	}
-
-	@Override
-	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-		AMapToData[] maps = _data.splitReshapeDDC(multiplier);
-		AColGroup[] res = new AColGroup[multiplier];
-		for(int i = 0; i < multiplier; i++) {
-			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-			res[i] = create(ci, _dict, maps[i], null);
-		}
-		return res;
-	}
-
-	@Override
-	public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
-		throws Exception {
-		AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
-		AColGroup[] res = new AColGroup[multiplier];
-		for(int i = 0; i < multiplier; i++) {
-			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-			res[i] = create(ci, _dict, maps[i], null);
-		}
-		return res;
-	}
-
-	@Override
-	public String toString() {
-		StringBuilder sb = new StringBuilder();
-		sb.append(super.toString());
-		sb.append(String.format("\n%15s", "Data: "));
-		sb.append(_data);
-		return sb.toString();
-	}
-
-	@Override
-	protected boolean allowShallowIdentityRightMult() {
-		return true;
-	}
-
-	public AColGroup convertToDeltaDDC() {
-		int numCols = _colIndexes.size();
-		int numRows = _data.size();
-		
-		DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
-		double[] rowDelta = new double[numCols];
-		double[] prevRow = new double[numCols];
-		DblArray dblArray = new DblArray(rowDelta);
-		int[] rowToDictId = new int[numRows];
-		
-		double[] dictVals = _dict.getValues();
-
-		for(int i = 0; i < numRows; i++) {
-			int dictIdx = _data.getIndex(i);
-			int off = dictIdx * numCols;
-			for(int j = 0; j < numCols; j++) {
-				double val = dictVals[off + j];
-				if(i == 0) {
-					rowDelta[j] = val;
-					prevRow[j] = val;
-				} else {
-					rowDelta[j] = val - prevRow[j];
-					prevRow[j] = val;
-				}
-			}
-			
-			rowToDictId[i] = map.increment(dblArray);
-		}
-		
-		if(map.size() == 0)
-			return new ColGroupEmpty(_colIndexes);
-		
-		ACount<DblArray>[] vals = map.extractValues();
-		final int nVals = vals.length;
-		final double[] dictValues = new double[nVals * numCols];
-		final int[] oldIdToNewId = new int[map.size()];
-		int idx = 0;
-		for(int i = 0; i < nVals; i++) {
-			final ACount<DblArray> dac = vals[i];
-			final double[] arrData = dac.key().getData();
-			System.arraycopy(arrData, 0, dictValues, idx, numCols);
-			oldIdToNewId[dac.id] = i;
-			idx += numCols;
-		}
-		
-		DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
-		AMapToData newData = MapToFactory.create(numRows, nVals);
-		for(int i = 0; i < numRows; i++) {
-			newData.set(i, oldIdToNewId[rowToDictId[i]]);
-		}
-		return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
-	}
-
+    private static final long serialVersionUID = -5769772089913918987L;
+
+    protected final AMapToData _data;
+
+    static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
+
+    protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        super(colIndexes, dict, cachedCounts);
+        _data = data;
+
+        if (CompressedMatrixBlock.debug) {
+            if (getNumValues() == 0)
+                throw new DMLCompressionException("Invalid construction with empty dictionary");
+            if (data.size() == 0)
+                throw new DMLCompressionException("Invalid length of the data. is zero");
+
+            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
+                        + dict.getNumberOfValues(colIndexes.size()));
+            int[] c = getCounts();
+            if (c.length != dict.getNumberOfValues(colIndexes.size()))
+                throw new DMLCompressionException("Invalid DDC Construction");
+            data.verify();
+        }
+    }
+
+    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+        if (data.getUnique() == 1)
+            return ColGroupConst.create(colIndexes, dict);
+        else if (dict == null)
+            return new ColGroupEmpty(colIndexes);
+        else
+            return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
+    }
+
+    public AColGroup sparsifyFOR() {
+        return ColGroupDDCFOR.sparsifyFOR(this);
+    }
+
+    public CompressionType getCompType() {
+        return CompressionType.DDC;
+    }
+
+    @Override
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                          SparseBlock sb) {
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = _data.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                         double[] values) {
+        final int idxSize = _colIndexes.size();
+        if (db.isContiguous()) {
+            final int nColOut = db.getDim(1);
+            if (idxSize == 1 && nColOut == 1)
+                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == 1)
+                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
+            else if (idxSize == nColOut) // offC == 0 implied
+                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
+            else if (offC == 0 && offR == 0)
+                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
+            else if (offC == 0)
+                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
+            else
+                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+        } else
+            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                          int offC, double[] values) {
+        final double[] c = db.values(0);
+        final int nCols = db.getDim(1);
+        final int colOff = _colIndexes.get(0) + offC;
+        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
+            c[offT] += values[_data.getIndex(i)];
+
+    }
+
+    @Override
+    public AMapToData getMapToData() {
+        return _data;
+    }
+
+    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                             int offC, double[] values) {
+        final double[] c = db.values(0);
+        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
+    }
+
+    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
+                                                                                    double[] values, AMapToData data) {
+        data.decompressToRange(c, rl, ru, offR, values);
+
+    }
+
+    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
+                                                                           double[] values, int nCol) {
+        final double[] c = db.values(0);
+        for (int r = rl; r < ru; r++) {
+            final int start = _data.getIndex(r) * nCol;
+            final int offStart = (offR + r) * nCol;
+            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
+                                                                  double[] values, int nCol, int colOut) {
+        int off = (rl + offR) * colOut;
+        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
+            final double[] c = db.values(offT);
+            final int rowIndex = _data.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
+        final int nCol = _colIndexes.size();
+        final int nColU = db.getDim(1);
+        final double[] c = db.values(0);
+        for (int i = rl; i < ru; i++) {
+            final int off = i * nColU;
+            final int rowIndex = _data.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
+                                                              double[] values, int nCol) {
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final double[] c = db.values(offT);
+            final int off = db.pos(offT) + offC;
+            final int rowIndex = _data.getIndex(i) * nCol;
+            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                           SparseBlock sb) {
+        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+            final int vr = _data.getIndex(r);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++)
+                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values) {
+        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
+    }
+
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+                                                          double[] values, int nCol) {
+        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+            final int rowIndex = _data.getIndex(i) * nCol;
+            for (int j = 0; j < nCol; j++)
+                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
+        for (int i = rl; i < ru; i++) {
+            final int vr = _data.getIndex(i);
+            if (sb.isEmpty(vr))
+                continue;
+            final int apos = sb.pos(vr);
+            final int alen = sb.size(vr) + apos;
+            final int[] aix = sb.indexes(vr);
+            final double[] aval = sb.values(vr);
+            for (int j = apos; j < alen; j++) {
+                final int rowOut = _colIndexes.get(aix[j]);
+                final double[] c = db.values(rowOut);
+                final int off = db.pos(rowOut);
+                c[off + i] += aval[j];
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            final double[] c = db.values(rowOut);
+            final int off = db.pos(rowOut);
+            for (int i = rl; i < ru; i++) {
+                final double v = dict[_data.getIndex(i) * nCol + j];
+                c[off + i] += v;
+            }
+        }
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
+
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        for (int i = 0; i < _data.size(); i++) {
+            int di = _data.getIndex(i);
+            if (sb.isEmpty(di))
+                continue;
+
+            final int apos = sb.pos(di);
+            final int alen = sb.size(di) + apos;
+            final int[] aix = sb.indexes(di);
+            final double[] aval = sb.values(di);
+
+            for (int j = apos; j < alen; j++) {
+                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
+            }
+        }
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
+        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+        for (int j = 0; j < _colIndexes.size(); j++)
+            sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+        final int nCol = _colIndexes.size();
+        for (int j = 0; j < nCol; j++) {
+            final int rowOut = _colIndexes.get(j);
+            SparseRow r = sbr.get(rowOut);
+
+            for (int i = 0; i < _data.size(); i++) {
+                final double v = dict[_data.getIndex(i) * nCol + j];
+                r = r.append(i, v);
+            }
+            sbr.set(rowOut, r, false);
+        }
+    }
+
+    @Override
+    public double getIdx(int r, int colIdx) {
+        return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
+    }
+
+    @Override
+    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] += preAgg[_data.getIndex(rix)];
+    }
+
+    @Override
+    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+        for (int i = rl; i < ru; i++)
+            c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
+    }
+
+    @Override
+    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] *= preAgg[_data.getIndex(rix)];
+    }
+
+    @Override
+    public int[] getCounts(int[] counts) {
+        return _data.getCounts(counts);
+    }
+
+    @Override
+    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (_colIndexes.size() == 1)
+            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                   int cu) {
+        final DenseBlock retV = result.getDenseBlock();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
+        if (matrix.isEmpty())
+            return;
+        else if (matrix.isInSparseFormat()) {
+            if (cl != 0 || cu != _data.size())
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+            else
+                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
+        } else if (!matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+                    cu);
+        else
+            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru) {
+
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int alen = sb.size(r) + apos;
+            final int[] aix = sb.indexes(r);
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos; i < alen; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                 int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+                                                        double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            // final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+                                                           double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int apos = sb.pos(r);
+            final int aposSkip = sb.posFIndexGTE(r, cl);
+            final int[] aix = sb.indexes(r);
+            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+                continue;
+            final int alen = sb.size(r) + apos;
+            final double[] aval = sb.values(r);
+            final int offR = r * nColRet;
+            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
+                                                             double[] vals, int rl, int ru, int cl, int cu) {
+        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
+                                                int rl, int ru, int cl, int cu) {
+        if (retV.isContiguous())
+            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
+                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
+                                                       double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = ret.pos(r);
+            final double[] retV = ret.values(r);
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
+                                                          double[] vals, int rl, int ru, int cl, int cu) {
+        final int colOut = _colIndexes.get(0);
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+        }
+    }
+
+    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+        if (matrix.isInSparseFormat())
+            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+    }
+
+    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        final DenseBlock db = result.getDenseBlock();
+        final SparseBlock sb = matrix.getSparseBlock();
+
+        if (cl != 0 || cu != _data.size()) {
+            // sub part
+            for (int r = rl; r < ru; r++) {
+                if (sb.isEmpty(r))
+                    continue;
+                final double[] retV = db.values(r);
+                final int pos = db.pos(r);
+                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
+            }
+        } else {
+            for (int r = rl; r < ru; r++)
+                _data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
+        }
+    }
+
+    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
+        final int apos = sb.pos(r);
+        final int aposSkip = sb.posFIndexGTE(r, cl);
+        final int[] aix = sb.indexes(r);
+        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+            return;
+        final int alen = sb.size(r) + apos;
+        final double[] aval = sb.values(r);
+        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+            _dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+        if (matrix.getDenseBlock().isContiguous())
+            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
+        else
+            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+                                                         int cu) {
+        final double[] retV = result.getDenseBlockValues();
+        final int nColM = matrix.getNumColumns();
+        final int nColRet = result.getNumColumns();
+        final double[] mV = matrix.getDenseBlockValues();
+        for (int r = rl; r < ru; r++) {
+            final int offL = r * nColM;
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+        }
+    }
+
+    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
+                                                            int cu) {
+        final double[] retV = result.getDenseBlockValues();
+        final int nColRet = result.getNumColumns();
+        for (int r = rl; r < ru; r++) {
+            final int offL = db.pos(r);
+            final double[] mV = db.values(r);
+            final int offR = r * nColRet;
+            for (int c = cl; c < cu; c++)
+                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+        }
+    }
+
+    @Override
+    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+        _data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
+    }
+
+    @Override
+    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+        DenseBlock db = that.getDenseBlock();
+        DenseBlock retDB = ret.getDenseBlock();
+        for (int i = rl; i < ru; i++)
+            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
+    }
+
+    @Override
+    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
+        if (_dict instanceof IdentityDictionary)
+            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
+        else
+            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
+    }
+
+    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES);
+        final int vLen = SPECIES.length();
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        for (int i = rl; i < ru; i++) {
+            int k = _data.getIndex(i);
+            final int offOut = i * jd + crl;
+            final double aa = 1;
+            final int k_right = _colIndexes.get(k);
+            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+        }
+    }
+
+    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+        final double[] a = _dict.getValues();
+        final double[] b = right.getDenseBlockValues();
+        final double[] c = ret.getDenseBlockValues();
+        final int kd = _colIndexes.size();
+        final int jd = right.getNumColumns();
+        final DoubleVector vVec = DoubleVector.zero(SPECIES);
+        final int vLen = SPECIES.length();
+
+        final int blkzI = 32;
+        final int blkzK = 24;
+        final int lenJ = cru - crl;
+        final int end = cru - (lenJ % vLen);
+        for (int bi = rl; bi < ru; bi += blkzI) {
+            final int bie = Math.min(ru, bi + blkzI);
+            for (int bk = 0; bk < kd; bk += blkzK) {
+                final int bke = Math.min(kd, bk + blkzK);
+                for (int i = bi; i < bie; i++) {
+                    int offi = _data.getIndex(i) * kd;
+                    final int offOut = i * jd + crl;
+                    for (int k = bk; k < bke; k++) {
+                        final double aa = a[offi + k];
+                        final int k_right = _colIndexes.get(k);
+                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+                    }
+                }
+            }
+        }
+    }
+
+    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
+        vVec = vVec.broadcast(aa);
+        final int offj = k * jd;
+        final int end = endT + offj;
+        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
+            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
+            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
+            res = vVec.fma(bVec, res);
+            res.intoArray(c, offOut);
+        }
+        for (int j = end; j < cru + offj; j++, offOut++) {
+            double bb = b[j];
+            c[offOut] += bb * aa;
+        }
+    }
+
+    @Override
+    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+        if (cl != 0 || cu != _data.size()) {
+            throw new NotImplementedException();
+        }
+        _data.preAggregateSparse(sb, preAgg, rl, ru);
+    }
+
+    @Override
+    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+        try {
+
+            _data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
+        } catch (Exception e) {
+            throw new CompressionException(that.toString(), e);
+        }
+    }
+
+    @Override
+    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+        _data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
+        final int nCol = that._colIndexes.size();
+        final int finalOff = that._indexes.getOffsetToLast();
+        final double[] v = ret.getValues();
+        while (true) {
+            final int to = _data.getIndex(itThat.value());
+            that._dict.addToEntry(v, 0, to, nCol);
+            if (itThat.value() == finalOff)
+                break;
+            itThat.next();
+        }
+    }
+
+    @Override
+    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+        _data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
+    }
+
+    @Override
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
+    }
+
+    @Override
+    public ColGroupType getColGroupType() {
+        return ColGroupType.DDC;
+    }
+
+    @Override
+    public long estimateInMemorySize() {
+        long size = super.estimateInMemorySize();
+        size += _data.getInMemorySize();
+        return size;
+    }
+
+    @Override
+    public AColGroup scalarOperation(ScalarOperator op) {
+        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
+            final double v0 = op.executeScalar(0);
+            if (v0 == 0)
+                return this;
+            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+        }
+        return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup unaryOperation(UnaryOperator op) {
+        return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
+        return create(_colIndexes, ret, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
+                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
+            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
+            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+        }
+        final IDictionary ret;
+        if (_colIndexes.size() == 1)
+            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
+        else
+            ret = _dict.binOpRight(op, v, _colIndexes);
+        return create(_colIndexes, ret, _data, getCachedCounts());
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+        super.write(out);
+        _data.write(out);
+    }
+
+    public static ColGroupDDC read(DataInput in) throws IOException {
+        IColIndex cols = ColIndexFactory.read(in);
+        IDictionary dict = DictionaryFactory.read(in);
+        AMapToData data = MapToFactory.readIn(in);
+        return new ColGroupDDC(cols, dict, data, null);
+    }
+
+    @Override
+    public long getExactSizeOnDisk() {
+        long ret = super.getExactSizeOnDisk();
+        ret += _data.getExactSizeOnDisk();
+        return ret;
+    }
+
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        final int nVals = getNumValues();
+        final int nCols = getNumCols();
+        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+    }
+
+    @Override
+    protected int numRowsToMultiply() {
+        return _data.size();
+    }
+
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return _dict.aggregate(c, builtin);
+    }
+
+    @Override
+    protected void computeColMxx(double[] c, Builtin builtin) {
+        _dict.aggregateCols(c, builtin, _colIndexes);
+    }
+
+    @Override
+    public boolean containsValue(double pattern) {
+        return _dict.containsValue(pattern);
+    }
+
+    @Override
+    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+        if (preAgg != null)
+            return create(colIndexes, preAgg, _data, getCachedCounts());
+        else
+            return null;
+    }
+
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        try {
+            return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
+        } catch (Exception e) {
+            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
+        }
+    }
+
+    @Override
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        return create(colIndexes, newDictionary, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup append(AColGroup g) {
+        if (g instanceof ColGroupDDC) {
+            if (g.getColIndices().equals(_colIndexes)) {
+
+                ColGroupDDC gDDC = (ColGroupDDC) g;
+                if (gDDC._dict.equals(_dict)) {
+                    AMapToData nd = _data.append(gDDC._data);
+                    return create(_colIndexes, _dict, nd, null);
+                } else
+                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+            } else
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
+        } else
+            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
+        return null;
+    }
+
+    @Override
+    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
+        for (int i = 1; i < g.length; i++) {
+            if (!_colIndexes.equals(g[i]._colIndexes)) {
+                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
+                return null;
+            }
+
+            if (!(g[i] instanceof ColGroupDDC)) {
+                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
+                return null;
+            }
+
+            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
+            if (!gDDC._dict.equals(_dict)) {
+                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+                return null;
+            }
+        }
+        AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
+        return create(_colIndexes, _dict, nd, null);
+    }
+
+    @Override
+    public ICLAScheme getCompressionScheme() {
+        return DDCScheme.create(this);
+    }
+
+    @Override
+    public AColGroup recompress() {
+        return this;
+    }
+
+    @Override
+    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+        try {
+
+            IEncode enc = getEncoding();
+            EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
+                    _dict.getSparsity());
+            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
+        } catch (Exception e) {
+            throw new DMLCompressionException(this.toString(), e);
+        }
+    }
+
+    @Override
+    public IEncode getEncoding() {
+        return EncodingFactory.create(_data);
+    }
+
+    @Override
+    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
+    }
+
+    @Override
+    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        final SparseBlock sb = selection.getSparseBlock();
+        final SparseBlock retB = ret.getSparseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    @Override
+    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
+        final SparseBlock sb = selection.getSparseBlock();
+        final DenseBlock retB = ret.getDenseBlock();
+        for (int r = rl; r < ru; r++) {
+            if (sb.isEmpty(r))
+                continue;
+            final int sPos = sb.pos(r);
+            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
+                                                          int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+        if (_colIndexes instanceof RangeIndex)
+            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
+        else {
+
+            pos += cl; // left side matrix position offset.
+            if (a.withEmpty()) {
+                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+                for (int rc = cl; rc < cu; rc++, pos++) {
+                    final int idx = _data.getIndex(rc);
+                    if (idx != nVal)
+                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
+                }
+            } else {
+                for (int rc = cl; rc < cu; rc++, pos++)
+                    values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
+            }
+        }
+    }
+
+    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
+                                                                    int cl, int cu) {
+        IdentityDictionary a = (IdentityDictionary) _dict;
+
+        final int firstCol = pos2 + _colIndexes.get(0);
+        pos += cl; // left side matrix position offset.
+        if (a.withEmpty()) {
+            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+            for (int rc = cl; rc < cu; rc++, pos++) {
+                final int idx = _data.getIndex(rc);
+                if (idx != nVal)
+                    values2[firstCol + idx] += values[pos];
+            }
+        } else {
+            for (int rc = cl; rc < cu; rc++, pos++)
+                values2[firstCol + _data.getIndex(rc)] += values[pos];
+        }
+    }
+
+    @Override
+    public AColGroup morph(CompressionType ct, int nRow) {
+        // return this;
+        if (ct == getCompType())
+            return this;
+        else if (ct == CompressionType.SDC) {
+            // return this;
+            int[] counts = getCounts();
+            int maxId = maxIndex(counts);
+            double[] def = _dict.getRow(maxId, _colIndexes.size());
+
+            int offsetSize = nRow - counts[maxId];
+            int[] offsets = new int[offsetSize];
+            AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
+            int o = 0;
+            for (int i = 0; i < nRow; i++) {
+                int v = _data.getIndex(i);
+                if (v != maxId) {
+                    offsets[o] = i;
+                    reducedData.set(o, v);
+                    o++;
+                }
+            }
+
+            return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
+                    reducedData, null);
+        } else if (ct == CompressionType.CONST) {
+            // if(1 < getNumValues()) {
+            String thisS = this.toString();
+            if (thisS.length() > 10000)
+                thisS = thisS.substring(0, 10000) + "...";
+            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
+            return this;
+            // }
+        } else if (ct == CompressionType.DDCFOR)
+            return this; // it does not make sense to change to FOR.
+        else
+            return super.morph(ct, nRow);
+    }
+
+    private static int maxIndex(int[] counts) {
+        int id = 0;
+        for (int i = 1; i < counts.length; i++) {
+            if (counts[i] > counts[id]) {
+                id = i;
+            }
+        }
+        return id;
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
+        final IDictionary combined = combineDictionaries(nCol, right);
+        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
+        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
+        IDictionary b = ((ColGroupDDC) right).getDictionary();
+        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
+        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
+        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+    }
+
+    @Override
+    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+        AMapToData[] maps = _data.splitReshapeDDC(multiplier);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
+            throws Exception {
+        AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
+        AColGroup[] res = new AColGroup[multiplier];
+        for (int i = 0; i < multiplier; i++) {
+            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+            res[i] = create(ci, _dict, maps[i], null);
+        }
+        return res;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(super.toString());
+        sb.append(String.format("\n%15s", "Data: "));
+        sb.append(_data);
+        return sb.toString();
+    }
+
+    @Override
+    protected boolean allowShallowIdentityRightMult() {
+        return true;
+    }
+
+    public AColGroup convertToDeltaDDC() {
+        int numCols = _colIndexes.size();
+        int numRows = _data.size();
+
+        DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
+        double[] rowDelta = new double[numCols];
+        double[] prevRow = new double[numCols];
+        DblArray dblArray = new DblArray(rowDelta);
+        int[] rowToDictId = new int[numRows];
+
+        double[] dictVals = _dict.getValues();
+
+        for (int i = 0; i < numRows; i++) {
+            int dictIdx = _data.getIndex(i);
+            int off = dictIdx * numCols;
+            for (int j = 0; j < numCols; j++) {
+                double val = dictVals[off + j];
+                if (i == 0) {
+                    rowDelta[j] = val;
+                    prevRow[j] = val;
+                } else {
+                    rowDelta[j] = val - prevRow[j];
+                    prevRow[j] = val;
+                }
+            }
+
+            rowToDictId[i] = map.increment(dblArray);
+        }
+
+        if (map.size() == 0)
+            return new ColGroupEmpty(_colIndexes);
+
+        ACount<DblArray>[] vals = map.extractValues();
+        final int nVals = vals.length;
+        final double[] dictValues = new double[nVals * numCols];
+        final int[] oldIdToNewId = new int[map.size()];
+        int idx = 0;
+        for (int i = 0; i < nVals; i++) {
+            final ACount<DblArray> dac = vals[i];
+            final double[] arrData = dac.key().getData();
+            System.arraycopy(arrData, 0, dictValues, idx, numCols);
+            oldIdToNewId[dac.id] = i;
+            idx += numCols;
+        }
+
+        DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
+        AMapToData newData = MapToFactory.create(numRows, nVals);
+        for (int i = 0; i < numRows; i++) {
+            newData.set(i, oldIdToNewId[rowToDictId[i]]);
+        }
+        return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
+    }
+
+    public AColGroup convertToDDCLZW() {
+        return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
+    }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 80fc69a7371..d403b6e124d 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -88,11 +88,9 @@ public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
     private static final long serialVersionUID = -5769772089913918987L;
 
     private final int[] _dataLZW; // LZW compressed representation of the mapping
-
     private final int _nRows; // Number of rows in the mapping vector
     private final int _nUnique; // Number of unique values in the mapping vector
 
-
     // Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
     private static long packKey(int prefixCode, int nextSymbol) {
         return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
@@ -273,7 +271,6 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
         return out;
     }
 
-
     // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
     private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
         super(colIndexes, dict, cachedCounts);
@@ -342,6 +339,28 @@ else if (data.getUnique() == 1)
      *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
      * */
 
+    public AColGroup convertToDDC() {
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows);
+        final int[] counts = getCounts(); // may be null depending on your group
+        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+    }
+
+
+    // Temporary getters for testing ! Remove before PR!
+    /*public int[] get_dataLZW() {
+        return _dataLZW;
+    }
+
+    public int get_nRows() {
+        return _nRows;
+    }
+
+    public int get_nUnique() {
+        return _nUnique;
+    }*/
+    // Temporary getters for testing ! Remove before PR!
+
+
     // Deserialize ColGroupDDCLZW object in binary stream.
     public static ColGroupDDCLZW read(DataInput in) throws IOException {
         final IColIndex colIndexes = ColIndexFactory.read(in);
@@ -364,6 +383,46 @@ public static ColGroupDDCLZW read(DataInput in) throws IOException {
         return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
     }
 
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
+
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+
+    }
+
+    @Override
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
+    }
+
+    @Override
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+
+    }
+
     // Serialize a ColGroupDDC-object into binary stream.
     @Override
     public void write(DataOutput out) throws IOException {
@@ -374,5 +433,200 @@ public void write(DataOutput out) throws IOException {
         out.writeInt(_dataLZW.length);
         for (int i : _dataLZW) out.writeInt(i);
     }
+
+    @Override
+    public double getIdx(int r, int colIdx) {
+        return 0;
+    }
+
+    @Override
+    public CompressionType getCompType() {
+        return null;
+    }
+
+    @Override
+    protected ColGroupType getColGroupType() {
+        return null;
+    }
+
+    @Override
+    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    public AColGroup scalarOperation(ScalarOperator op) {
+        return null;
+    }
+
+    @Override
+    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+        return null;
+    }
+
+    @Override
+    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+        return null;
+    }
+
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        return null;
+    }
+
+    @Override
+    public boolean containsValue(double pattern) {
+        return false;
+    }
+
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        return 0;
+    }
+
+    @Override
+    public AColGroup unaryOperation(UnaryOperator op) {
+        return null;
+    }
+
+    @Override
+    public AColGroup append(AColGroup g) {
+        return null;
+    }
+
+    @Override
+    protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
+        return null;
+    }
+
+    @Override
+    public ICLAScheme getCompressionScheme() {
+        return null;
+    }
+
+    @Override
+    public AColGroup recompress() {
+        return null;
+    }
+
+    @Override
+    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+        return null;
+    }
+
+    @Override
+    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+        return null;
+    }
+
+    @Override
+    protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+
+    }
+
+    @Override
+    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+
+    }
+
+    @Override
+    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+        return new AColGroup[0];
+    }
+
+    @Override
+    protected boolean allowShallowIdentityRightMult() {
+        return false;
+    }
+
+    @Override
+    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+        return null;
+    }
+
+    @Override
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        return null;
+    }
+
+    @Override
+    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+
+    }
+
+    @Override
+    protected int numRowsToMultiply() {
+        return 0;
+    }
+
+    @Override
+    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+
+    }
+
+    @Override
+    protected int[] getCounts(int[] out) {
+        return new int[0];
+    }
+
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return 0;
+    }
+
+    @Override
+    protected void computeColMxx(double[] c, Builtin builtin) {
+
+    }
+
+    @Override
+    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+
+    }
+
+    @Override
+    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+
+    }
+
+    @Override
+    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+
+    }
+
+    @Override
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        return false;
+    }
+
+    @Override
+    public AMapToData getMapToData() {
+        return null;
+    }
 }
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index f3b1350cdc0..cd36b31e86d 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -42,17 +42,70 @@ public class ColGroupDDCTest {
     protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
 
     @Test
-    public void testLZWRoundTripMapping() throws Exception {
-        // Build a mapping with repetition to actually exercise LZW
-        // Example: [2,0,2,3,0,2,1,0,2]
-        final int nRows = 9;
-        final int nUnique = 4;
+    public void testConvertToDDCLZWBasic() {
+        IColIndex colIndexes = ColIndexFactory.create(2);
+        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+        Dictionary dict = Dictionary.create(dictValues);
+
+        int[] src = new int[]{
+                // repeating base pattern
+                2, 0, 2, 1, 0, 2, 1, 0, 2,
+                2, 0, 2, 1, 0, 2, 1, 0, 2,
+
+                // variation / shifted pattern
+                1, 0, 1, 2, 0, 1, 2, 0, 1,
+                1, 0, 1, 2, 0, 1, 2, 0, 1,
+
+                // longer runs (good for phrase growth)
+                2, 2, 2, 2, 2,
+                0, 0, 0, 0, 0,
+                1, 1, 1, 1, 1,
+
+                // mixed noise
+                2, 1, 0, 2, 1, 0, 2, 1, 0,
+                0, 2, 1, 0, 2, 1, 0, 2, 1,1,1,1,1,1,1,
+
+                // repeating tail (tests dictionary reuse)
+                2, 0, 2, 1, 0, 2, 1, 0, 2,
+                2, 0, 2, 1, 0, 2, 1, 0, 2,0,0,0,0,0,1
+        };
+
+        final int nRows = src.length;
+        final int nUnique = 3;
         AMapToData data = MapToFactory.create(nRows, nUnique);
-        int[] src = new int[]{2, 0, 2, 3, 0, 2, 1, 0, 2};
         for (int i = 0; i < nRows; i++)
             data.set(i, src[i]);
 
-        // TODO: Write tests for ColGroupDDCLZW.
+        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+        AColGroup result = ddc.convertToDDCLZW();
+
+        assertNotNull(result);
+        assertTrue(result instanceof ColGroupDDCLZW);
+
+        ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
+        AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
+
+        assertNotNull(ddclzwDecompressed);
+        assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
+
+        ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
+
+        AMapToData d1 = ddc.getMapToData();
+        AMapToData d2 = ddc2.getMapToData();
+
+        assertEquals(d1.size(), d2.size());
+        assertEquals(d1.getUnique(), d2.getUnique());
+        for (int i = 0; i < d1.size(); i++)
+            assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
+
+        assertEquals(d1.size(), d2.size());
+        assertEquals(d1.getUnique(), d2.getUnique());
+
+        for (int i = 0; i < d1.size(); i++) {
+            assertEquals(d1.getIndex(i), d2.getIndex(i));
+        }
+
+        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
     }
 
     @Test

From 1dfe91ee42afdb1d55e5aed7f52e4d80e9865e01 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Mon, 12 Jan 2026 12:33:29 +0100
Subject: [PATCH 11/24] Started working on ColGroupDDCLZW's other methods that
 need to be implemted from its Interface.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 152 ++++++++----------
 1 file changed, 69 insertions(+), 83 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index d403b6e124d..ced9d315cc9 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -345,22 +345,6 @@ public AColGroup convertToDDC() {
         return ColGroupDDC.create(_colIndexes, _dict, map, counts);
     }
 
-
-    // Temporary getters for testing ! Remove before PR!
-    /*public int[] get_dataLZW() {
-        return _dataLZW;
-    }
-
-    public int get_nRows() {
-        return _nRows;
-    }
-
-    public int get_nUnique() {
-        return _nUnique;
-    }*/
-    // Temporary getters for testing ! Remove before PR!
-
-
     // Deserialize ColGroupDDCLZW object in binary stream.
     public static ColGroupDDCLZW read(DataInput in) throws IOException {
         final IColIndex colIndexes = ColIndexFactory.read(in);
@@ -383,70 +367,117 @@ public static ColGroupDDCLZW read(DataInput in) throws IOException {
         return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
     }
 
+    // Serialize a ColGroupDDC-object into binary stream.
     @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
+    public void write(DataOutput out) throws IOException {
+        _colIndexes.write(out);
+        _dict.write(out);
+        out.writeInt(_nRows);
+        out.writeInt(_nUnique);
+        out.writeInt(_dataLZW.length);
+        for (int i : _dataLZW) out.writeInt(i);
+    }
 
+    @Override
+    public double getIdx(int r, int colIdx) {
+        return 0;
     }
 
     @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+    public CompressionType getCompType() {
+        return null;
+    }
 
+    @Override
+    protected ColGroupType getColGroupType() {
+        return null;
     }
 
     @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
+    public boolean containsValue(double pattern) {
+        return _dict.containsValue(pattern);
+    }
 
+    @Override
+    public double getCost(ComputationCostEstimator e, int nRows) {
+        final int nVals = getNumValues();
+        final int nCols = getNumCols();
+        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
     }
 
     @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+    public ICLAScheme getCompressionScheme() {
+        throw new NotImplementedException();
+    }
 
+    @Override
+    protected int numRowsToMultiply() {
+        return _nRows;
     }
 
     @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
+    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+        return new ColGroupDDCLZW(colIndexes, newDictionary, _dataLZW, _nRows, _nUnique, getCachedCounts());
+    }
 
+    @Override
+    public AMapToData getMapToData() {
+        throw new NotImplementedException(); // or decompress and return data...
     }
 
     @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+    public boolean sameIndexStructure(AColGroupCompressed that) {
+        return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == _dataLZW;
+    }
 
+    @Override
+    protected double computeMxx(double c, Builtin builtin) {
+        return _dict.aggregate(c, builtin);
     }
 
     @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+    protected void computeColMxx(double[] c, Builtin builtin) {
+        _dict.aggregateCols(c, builtin, _colIndexes);
+    }
+
+    @Override
+    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
 
     }
 
     @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
 
     }
 
-    // Serialize a ColGroupDDC-object into binary stream.
     @Override
-    public void write(DataOutput out) throws IOException {
-        _colIndexes.write(out);
-        _dict.write(out);
-        out.writeInt(_nRows);
-        out.writeInt(_nUnique);
-        out.writeInt(_dataLZW.length);
-        for (int i : _dataLZW) out.writeInt(i);
+    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
+
     }
 
     @Override
-    public double getIdx(int r, int colIdx) {
-        return 0;
+    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+
     }
 
     @Override
-    public CompressionType getCompType() {
-        return null;
+    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
     }
 
     @Override
-    protected ColGroupType getColGroupType() {
-        return null;
+    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+
+    }
+
+    @Override
+    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+
     }
 
     @Override
@@ -474,16 +505,6 @@ public AColGroup sliceRows(int rl, int ru) {
         return null;
     }
 
-    @Override
-    public boolean containsValue(double pattern) {
-        return false;
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        return 0;
-    }
-
     @Override
     public AColGroup unaryOperation(UnaryOperator op) {
         return null;
@@ -499,11 +520,6 @@ protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
         return null;
     }
 
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        return null;
-    }
-
     @Override
     public AColGroup recompress() {
         return null;
@@ -544,11 +560,6 @@ protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex col
         return null;
     }
 
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        return null;
-    }
-
     @Override
     public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
 
@@ -579,11 +590,6 @@ protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
 
     }
 
-    @Override
-    protected int numRowsToMultiply() {
-        return 0;
-    }
-
     @Override
     public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
 
@@ -594,16 +600,6 @@ protected int[] getCounts(int[] out) {
         return new int[0];
     }
 
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return 0;
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-
-    }
-
     @Override
     protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
 
@@ -618,15 +614,5 @@ protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double
     protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
 
     }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        return false;
-    }
-
-    @Override
-    public AMapToData getMapToData() {
-        return null;
-    }
 }
 

From 31568637f6f7ea783b66cc924ee18dd7454fba6a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Tue, 13 Jan 2026 11:27:24 +0100
Subject: [PATCH 12/24] test commit

---
 .../test/component/compress/colgroup/ColGroupDDCTest.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index cd36b31e86d..5b6a19848a4 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -63,11 +63,11 @@ public void testConvertToDDCLZWBasic() {
 
                 // mixed noise
                 2, 1, 0, 2, 1, 0, 2, 1, 0,
-                0, 2, 1, 0, 2, 1, 0, 2, 1,1,1,1,1,1,1,
+                0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
 
                 // repeating tail (tests dictionary reuse)
                 2, 0, 2, 1, 0, 2, 1, 0, 2,
-                2, 0, 2, 1, 0, 2, 1, 0, 2,0,0,0,0,0,1
+                2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1
         };
 
         final int nRows = src.length;

From 10d577690dbaaab00362b729e725d929b3f34d4a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Tue, 13 Jan 2026 13:38:30 +0100
Subject: [PATCH 13/24] [SYSTEMDS-3779] Added new Compression and ColGroup
 Types DDCLZW.

---
 .../sysds/runtime/compress/colgroup/AColGroup.java |  4 ++--
 .../runtime/compress/colgroup/ColGroupDDCLZW.java  | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
index 003703f86a4..eb2a5a356b7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
@@ -65,7 +65,7 @@ public abstract class AColGroup implements Serializable {
 
 	/** Public super types of compression ColGroups supported */
 	public static enum CompressionType {
-		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, LinearFunctional;
+		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCFOR, DDCFOR, DeltaDDC, DDCLZW, LinearFunctional;
 
 		public boolean isDense() {
 			return this == DDC || this == CONST || this == DDCFOR || this == DDCFOR;
@@ -86,7 +86,7 @@ public boolean isSDC() {
 	 * Protected such that outside the ColGroup package it should be unknown which specific subtype is used.
 	 */
 	protected static enum ColGroupType {
-		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros, SDCFOR, DDCFOR, DeltaDDC,
+		UNCOMPRESSED, RLE, OLE, DDC, CONST, EMPTY, SDC, SDCSingle, SDCSingleZeros, SDCZeros, SDCFOR, DDCFOR, DDCLZW, DeltaDDC,
 		LinearFunctional;
 	}
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index ced9d315cc9..6a73fcb3cda 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -385,12 +385,12 @@ public double getIdx(int r, int colIdx) {
 
     @Override
     public CompressionType getCompType() {
-        return null;
+        return CompressionType.DDCLZW;
     }
 
     @Override
     protected ColGroupType getColGroupType() {
-        return null;
+        return ColGroupType.DDCLZW;
     }
 
     @Override
@@ -440,6 +440,11 @@ protected void computeColMxx(double[] c, Builtin builtin) {
         _dict.aggregateCols(c, builtin, _colIndexes);
     }
 
+    @Override
+    public AColGroup sliceRows(int rl, int ru) {
+        return null;
+    }
+
     @Override
     protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
 
@@ -500,11 +505,6 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa
         return null;
     }
 
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        return null;
-    }
-
     @Override
     public AColGroup unaryOperation(UnaryOperator op) {
         return null;

From 3c9e2ed9447d5b792b91b8af6eb932dedee27a3f Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Tue, 13 Jan 2026 13:52:44 +0100
Subject: [PATCH 14/24] [SYSTEMDS-3779] Introduce initial ColGroupDDCLZW with
 LZW-compressed mapping

This commit adds an initial implementation of ColGroupDDCLZW, a new column
group that stores the mapping vector in LZW-compressed form instead of
materializing MapToData explicitly.

The design focuses on enabling sequential access directly on the compressed
representation, while complex access patterns are intended to fall back to
DDC. No cache or lazy decompression mechanism is introduced at this stage.
---
 .../apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 6a73fcb3cda..b706b7a5e59 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -445,6 +445,7 @@ public AColGroup sliceRows(int rl, int ru) {
         return null;
     }
 
+
     @Override
     protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
 

From a8df1fe7fa85cf1e52fd498d22e570f81099563f Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Thu, 15 Jan 2026 13:18:51 +0100
Subject: [PATCH 15/24] Decompression to a specific index

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 28 ++++++++++++++-----
 .../compress/colgroup/ColGroupDDCTest.java    | 18 ++++++++----
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index b706b7a5e59..ca68bd7b3ce 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -206,7 +206,7 @@ private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
 
     // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
     // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
-    private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
+    private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
         // Validate input arguments.
         if (codes == null)
             throw new IllegalArgumentException("codes is null");
@@ -217,21 +217,26 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
         if (nRows <= 0) {
             throw new IllegalArgumentException("Invalid nRows: " + nRows);
         }
+        if (index > nRows){
+            throw new IllegalArgumentException("Index is larger than Data Length: " + index);
+        }
 
         // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
         // Base symbols (0..nUnique-1) are implicit and not stored here.
         final Map<Integer, Long> dict = new HashMap<>();
 
         // Output mapping that will be reconstructed.
-        AMapToData out = MapToFactory.create(nRows, nUnique);
+        AMapToData out = MapToFactory.create(index, nUnique);
         int outPos = 0; // Current write position in the output mapping.
 
         // Decode the first code. The first code always expands to a valid phrase without needing
         // any dictionary entries.
         int old = codes[0];
         int[] oldPhrase = unpack(old, nUnique, dict);
-        for (int v : oldPhrase)
+        for (int v : oldPhrase){
+            if (outPos == index) break;
             out.set(outPos++, v);
+        }
 
         // Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
         int nextCode = nUnique;
@@ -252,7 +257,10 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
             }
 
             // Append the reconstructed phrase to the output mapping.
-            for (int v : next) out.set(outPos++, v);
+            for (int v : next) {
+                if (outPos == index) break;
+                out.set(outPos++, v);
+            }
 
             // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
             int first = next[0];
@@ -264,8 +272,8 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows) {
         }
 
         // Safety check: decoder must produce exactly nRows symbols.
-        if (outPos != nRows)
-            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + nRows);
+        if (outPos != index)
+            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + index);
 
         // Return the reconstructed mapping.
         return out;
@@ -340,7 +348,13 @@ else if (data.getUnique() == 1)
      * */
 
     public AColGroup convertToDDC() {
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows);
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, _nRows);
+        final int[] counts = getCounts(); // may be null depending on your group
+        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+    }
+
+    public AColGroup convertToDDC(int index) {
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, index);
         final int[] counts = getCounts(); // may be null depending on your group
         return ColGroupDDC.create(_colIndexes, _dict, map, counts);
     }
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 5b6a19848a4..5aab22059fc 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -98,14 +98,20 @@ public void testConvertToDDCLZWBasic() {
         for (int i = 0; i < d1.size(); i++)
             assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
 
-        assertEquals(d1.size(), d2.size());
-        assertEquals(d1.getUnique(), d2.getUnique());
+        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
 
-        for (int i = 0; i < d1.size(); i++) {
-            assertEquals(d1.getIndex(i), d2.getIndex(i));
-        }
+        // Testen der Teildekompression:
+        // Index entspricht der Anzahl der Zeichen, die dekodiert werden sollen (0 bis Index-1)
+        int index = 10;
+        ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
 
-        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
+        AMapToData d3 = ddcIndex.getMapToData();
+        assertEquals(index, d3.size());
+        assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
+
+        for(int i = 0; i < index; i++){
+            assertEquals(d1.getIndex(i), d3.getIndex(i));
+        }
     }
 
     @Test

From 96cb6e9f914a5747e025ca1f9a0ee09e2bd90afc Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Fri, 16 Jan 2026 09:04:52 +0100
Subject: [PATCH 16/24] slice Rows

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 15 ++++++++---
 .../compress/colgroup/ColGroupDDCTest.java    | 25 ++++++++++++++++---
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index ca68bd7b3ce..d2a7dfbce62 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -394,7 +394,10 @@ public void write(DataOutput out) throws IOException {
 
     @Override
     public double getIdx(int r, int colIdx) {
-        return 0;
+        // TODO: soll schnell sein
+        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
+        // TODO: ColumnIndex
+        return map.getIndex(r);
     }
 
     @Override
@@ -421,6 +424,7 @@ public double getCost(ComputationCostEstimator e, int nRows) {
 
     @Override
     public ICLAScheme getCompressionScheme() {
+        //TODO: in ColGroupDDCFor nicht implementiert - sollen wir das erstellen? Inhalt: ncols wie DDC
         throw new NotImplementedException();
     }
 
@@ -436,7 +440,7 @@ protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary)
 
     @Override
     public AMapToData getMapToData() {
-        throw new NotImplementedException(); // or decompress and return data...
+        throw new NotImplementedException(); // or decompress and return data... decompress(_dataLZW, _nUnique, _nRows, _nRows)
     }
 
     @Override
@@ -456,7 +460,12 @@ protected void computeColMxx(double[] c, Builtin builtin) {
 
     @Override
     public AColGroup sliceRows(int rl, int ru) {
-        return null;
+        try{
+            AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
+            return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
+        } catch(Exception e){
+            throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
+        }
     }
 
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 5aab22059fc..e7d73edde4f 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -25,10 +25,7 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.compress.colgroup.AColGroup;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDCLZW;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC;
+import org.apache.sysds.runtime.compress.colgroup.*;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
 import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
@@ -43,6 +40,7 @@ public class ColGroupDDCTest {
 
     @Test
     public void testConvertToDDCLZWBasic() {
+        // TODO: neue Methode zum Vergleich
         IColIndex colIndexes = ColIndexFactory.create(2);
         double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
         Dictionary dict = Dictionary.create(dictValues);
@@ -112,6 +110,25 @@ public void testConvertToDDCLZWBasic() {
         for(int i = 0; i < index; i++){
             assertEquals(d1.getIndex(i), d3.getIndex(i));
         }
+
+        // Testen von SliceRows
+        int low = 3;
+        int high = 10;
+        AColGroup slice = ddclzw.sliceRows(low, high);
+        if(slice instanceof ColGroupDDCLZW ddclzwslice){
+            ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
+            ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
+
+            AMapToData d4 = ddcSlice.getMapToData();
+            AMapToData d5 = ddcSlice2.getMapToData();
+
+            assertEquals(d5.size(), d4.size());
+            assertEquals(d5.getUnique(), d4.getUnique());
+
+            for (int i = 0; i < d4.size(); i++)
+                assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
+        }
+
     }
 
     @Test

From a30cc91150b3698ec63a5915e82868f59c55605a Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Fri, 16 Jan 2026 11:41:16 +0100
Subject: [PATCH 17/24] [SYSTEMDS-3779] Add imemdiate stop after index certain
 index in decompress(). Decompress will now return an empty map if the index
 is zero.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 24 ++++++++++++-------
 .../compress/colgroup/ColGroupDDCTest.java    |  2 +-
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index d2a7dfbce62..50c37936943 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -71,7 +71,6 @@
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
 
-
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Stack;
@@ -204,7 +203,7 @@ private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
         return outarray;
     }
 
-    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form.
+    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
     // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
     private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
         // Validate input arguments.
@@ -217,10 +216,14 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows, int in
         if (nRows <= 0) {
             throw new IllegalArgumentException("Invalid nRows: " + nRows);
         }
-        if (index > nRows){
+        if (index > nRows) {
             throw new IllegalArgumentException("Index is larger than Data Length: " + index);
         }
 
+        // Return empty Map if index is zero.
+        if (index == 0)
+            return MapToFactory.create(0, nUnique);
+
         // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
         // Base symbols (0..nUnique-1) are implicit and not stored here.
         final Map<Integer, Long> dict = new HashMap<>();
@@ -233,7 +236,8 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows, int in
         // any dictionary entries.
         int old = codes[0];
         int[] oldPhrase = unpack(old, nUnique, dict);
-        for (int v : oldPhrase){
+
+        for (int v : oldPhrase) {
             if (outPos == index) break;
             out.set(outPos++, v);
         }
@@ -258,12 +262,14 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows, int in
 
             // Append the reconstructed phrase to the output mapping.
             for (int v : next) {
-                if (outPos == index) break;
                 out.set(outPos++, v);
+                if (outPos == index)
+                    // Stop immediately once done.
+                    return out;
             }
 
             // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
-            int first = next[0];
+            final int first = next[0];
             dict.put(nextCode++, packKey(old, first));
 
             // Advance.
@@ -318,7 +324,7 @@ private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, in
                 throw new DMLCompressionException("Invalid construction with empty dictionary");
             if (_nRows <= 0)
                 throw new DMLCompressionException("Invalid length of the data. is zero");
-            if (_nUnique <= dict.getNumberOfValues(colIndexes.size()))
+            if (_nUnique != dict.getNumberOfValues(colIndexes.size()))
                 throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has "
                         + dict.getNumberOfValues(colIndexes.size()));
             int[] c = getCounts();
@@ -460,10 +466,10 @@ protected void computeColMxx(double[] c, Builtin builtin) {
 
     @Override
     public AColGroup sliceRows(int rl, int ru) {
-        try{
+        try {
             AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
             return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
-        } catch(Exception e){
+        } catch (Exception e) {
             throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
         }
     }
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index e7d73edde4f..4f02ce97ae7 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -47,7 +47,7 @@ public void testConvertToDDCLZWBasic() {
 
         int[] src = new int[]{
                 // repeating base pattern
-                2, 0, 2, 1, 0, 2, 1, 0, 2,
+                0,0,2, 0, 2, 1, 0, 2, 1, 0, 2,
                 2, 0, 2, 1, 0, 2, 1, 0, 2,
 
                 // variation / shifted pattern

From d39fad0e324f9622d431d74b50c5b642f45b83bc Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Fri, 16 Jan 2026 16:26:36 +0100
Subject: [PATCH 18/24] [SYSTEMDS-3779] Reverted formatting of
 ColGroupDDC,ColGroupDDCLZW,ColGroupDDCTest back to correct formatting. Added
 LZWMappingIterator to decompress values on the fly without having to allocate
 full compression map [WIP]. Added Test class ColGroupDDCLZWTest.

---
 .../compress/colgroup/ColGroupDDC.java        | 2174 +++++++++--------
 .../compress/colgroup/ColGroupDDCLZW.java     | 1131 +++++----
 .../compress/colgroup/ColGroupDDCLZWTest.java |   76 +
 .../compress/colgroup/ColGroupDDCTest.java    |  527 ++--
 4 files changed, 2038 insertions(+), 1870 deletions(-)
 create mode 100644 src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index c0d78e11783..140fde5af16 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -75,1081 +75,1101 @@
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC).
  */
 public class ColGroupDDC extends APreAgg implements IMapToDataGroup {
-    private static final long serialVersionUID = -5769772089913918987L;
-
-    protected final AMapToData _data;
-
-    static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
-
-    protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        super(colIndexes, dict, cachedCounts);
-        _data = data;
-
-        if (CompressedMatrixBlock.debug) {
-            if (getNumValues() == 0)
-                throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (data.size() == 0)
-                throw new DMLCompressionException("Invalid length of the data. is zero");
-
-            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
-                        + dict.getNumberOfValues(colIndexes.size()));
-            int[] c = getCounts();
-            if (c.length != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid DDC Construction");
-            data.verify();
-        }
-    }
-
-    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        if (data.getUnique() == 1)
-            return ColGroupConst.create(colIndexes, dict);
-        else if (dict == null)
-            return new ColGroupEmpty(colIndexes);
-        else
-            return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
-    }
-
-    public AColGroup sparsifyFOR() {
-        return ColGroupDDCFOR.sparsifyFOR(this);
-    }
-
-    public CompressionType getCompType() {
-        return CompressionType.DDC;
-    }
-
-    @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                          SparseBlock sb) {
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = _data.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            _colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                         double[] values) {
-        final int idxSize = _colIndexes.size();
-        if (db.isContiguous()) {
-            final int nColOut = db.getDim(1);
-            if (idxSize == 1 && nColOut == 1)
-                decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == 1)
-                decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
-            else if (idxSize == nColOut) // offC == 0 implied
-                decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
-            else if (offC == 0 && offR == 0)
-                decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
-            else if (offC == 0)
-                decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
-            else
-                decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-        } else
-            decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                          int offC, double[] values) {
-        final double[] c = db.values(0);
-        final int nCols = db.getDim(1);
-        final int colOff = _colIndexes.get(0) + offC;
-        for (int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
-            c[offT] += values[_data.getIndex(i)];
-
-    }
-
-    @Override
-    public AMapToData getMapToData() {
-        return _data;
-    }
-
-    private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                             int offC, double[] values) {
-        final double[] c = db.values(0);
-        decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
-    }
-
-    private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru, int offR,
-                                                                                    double[] values, AMapToData data) {
-        data.decompressToRange(c, rl, ru, offR, values);
-
-    }
-
-    private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
-                                                                           double[] values, int nCol) {
-        final double[] c = db.values(0);
-        for (int r = rl; r < ru; r++) {
-            final int start = _data.getIndex(r) * nCol;
-            final int offStart = (offR + r) * nCol;
-            LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
-                                                                  double[] values, int nCol, int colOut) {
-        int off = (rl + offR) * colOut;
-        for (int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
-            final double[] c = db.values(offT);
-            final int rowIndex = _data.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
-        final int nCol = _colIndexes.size();
-        final int nColU = db.getDim(1);
-        final double[] c = db.values(0);
-        for (int i = rl; i < ru; i++) {
-            final int off = i * nColU;
-            final int rowIndex = _data.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
-                                                              double[] values, int nCol) {
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final double[] c = db.values(offT);
-            final int off = db.pos(offT) + offC;
-            final int rowIndex = _data.getIndex(i) * nCol;
-            _colIndexes.decompressVec(nCol, c, off, values, rowIndex);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                           SparseBlock sb) {
-        for (int r = rl, offT = rl + offR; r < ru; r++, offT++) {
-            final int vr = _data.getIndex(r);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++)
-                ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values) {
-        decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
-    }
-
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
-                                                          double[] values, int nCol) {
-        for (int i = rl, offT = rl + offR; i < ru; i++, offT++) {
-            final int rowIndex = _data.getIndex(i) * nCol;
-            for (int j = 0; j < nCol; j++)
-                ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
-        for (int i = rl; i < ru; i++) {
-            final int vr = _data.getIndex(i);
-            if (sb.isEmpty(vr))
-                continue;
-            final int apos = sb.pos(vr);
-            final int alen = sb.size(vr) + apos;
-            final int[] aix = sb.indexes(vr);
-            final double[] aval = sb.values(vr);
-            for (int j = apos; j < alen; j++) {
-                final int rowOut = _colIndexes.get(aix[j]);
-                final double[] c = db.values(rowOut);
-                final int off = db.pos(rowOut);
-                c[off + i] += aval[j];
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            final double[] c = db.values(rowOut);
-            final int off = db.pos(rowOut);
-            for (int i = rl; i < ru; i++) {
-                final double v = dict[_data.getIndex(i) * nCol + j];
-                c[off + i] += v;
-            }
-        }
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
-
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        for (int i = 0; i < _data.size(); i++) {
-            int di = _data.getIndex(i);
-            if (sb.isEmpty(di))
-                continue;
-
-            final int apos = sb.pos(di);
-            final int alen = sb.size(di) + apos;
-            final int[] aix = sb.indexes(di);
-            final double[] aval = sb.values(di);
-
-            for (int j = apos; j < alen; j++) {
-                sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
-            }
-        }
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
-        int[] colCounts = _dict.countNNZZeroColumns(getCounts());
-        for (int j = 0; j < _colIndexes.size(); j++)
-            sbr.allocate(_colIndexes.get(j), colCounts[j]);
-
-        final int nCol = _colIndexes.size();
-        for (int j = 0; j < nCol; j++) {
-            final int rowOut = _colIndexes.get(j);
-            SparseRow r = sbr.get(rowOut);
-
-            for (int i = 0; i < _data.size(); i++) {
-                final double v = dict[_data.getIndex(i) * nCol + j];
-                r = r.append(i, v);
-            }
-            sbr.set(rowOut, r, false);
-        }
-    }
-
-    @Override
-    public double getIdx(int r, int colIdx) {
-        return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
-    }
-
-    @Override
-    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] += preAgg[_data.getIndex(rix)];
-    }
-
-    @Override
-    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-        for (int i = rl; i < ru; i++)
-            c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
-    }
-
-    @Override
-    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] *= preAgg[_data.getIndex(rix)];
-    }
-
-    @Override
-    public int[] getCounts(int[] counts) {
-        return _data.getCounts(counts);
-    }
-
-    @Override
-    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (_colIndexes.size() == 1)
-            leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                   int cu) {
-        final DenseBlock retV = result.getDenseBlock();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
-        if (matrix.isEmpty())
-            return;
-        else if (matrix.isInSparseFormat()) {
-            if (cl != 0 || cu != _data.size())
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-            else
-                lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
-        } else if (!matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
-                    cu);
-        else
-            lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru) {
-
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int alen = sb.size(r) + apos;
-            final int[] aix = sb.indexes(r);
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos; i < alen; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                 int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
-                                                        double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            // final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
-                                                           double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int apos = sb.pos(r);
-            final int aposSkip = sb.posFIndexGTE(r, cl);
-            final int[] aix = sb.indexes(r);
-            if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-                continue;
-            final int alen = sb.size(r) + apos;
-            final double[] aval = sb.values(r);
-            final int offR = r * nColRet;
-            for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-                retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
-                                                             double[] vals, int rl, int ru, int cl, int cu) {
-        lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
-                                                int rl, int ru, int cl, int cu) {
-        if (retV.isContiguous())
-            lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
-                                                                      int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
-                                                       double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = ret.pos(r);
-            final double[] retV = ret.values(r);
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
-                                                          double[] vals, int rl, int ru, int cl, int cu) {
-        final int colOut = _colIndexes.get(0);
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
-        }
-    }
-
-    private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-
-        if (matrix.isInSparseFormat())
-            lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
-    }
-
-    private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        final DenseBlock db = result.getDenseBlock();
-        final SparseBlock sb = matrix.getSparseBlock();
-
-        if (cl != 0 || cu != _data.size()) {
-            // sub part
-            for (int r = rl; r < ru; r++) {
-                if (sb.isEmpty(r))
-                    continue;
-                final double[] retV = db.values(r);
-                final int pos = db.pos(r);
-                lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
-            }
-        } else {
-            for (int r = rl; r < ru; r++)
-                _data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
-        }
-    }
-
-    private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
-        final int apos = sb.pos(r);
-        final int aposSkip = sb.posFIndexGTE(r, cl);
-        final int[] aix = sb.indexes(r);
-        if (aposSkip <= -1 || aix[apos + aposSkip] >= cu)
-            return;
-        final int alen = sb.size(r) + apos;
-        final double[] aval = sb.values(r);
-        for (int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
-            _dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-        if (matrix.getDenseBlock().isContiguous())
-            lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
-        else
-            lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
-                                                         int cu) {
-        final double[] retV = result.getDenseBlockValues();
-        final int nColM = matrix.getNumColumns();
-        final int nColRet = result.getNumColumns();
-        final double[] mV = matrix.getDenseBlockValues();
-        for (int r = rl; r < ru; r++) {
-            final int offL = r * nColM;
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-        }
-    }
-
-    private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
-                                                            int cu) {
-        final double[] retV = result.getDenseBlockValues();
-        final int nColRet = result.getNumColumns();
-        for (int r = rl; r < ru; r++) {
-            final int offL = db.pos(r);
-            final double[] mV = db.values(r);
-            final int offR = r * nColRet;
-            for (int c = cl; c < cu; c++)
-                _dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
-        }
-    }
-
-    @Override
-    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-        _data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
-    }
-
-    @Override
-    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-        DenseBlock db = that.getDenseBlock();
-        DenseBlock retDB = ret.getDenseBlock();
-        for (int i = rl; i < ru; i++)
-            leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
-    }
-
-    @Override
-    public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) {
-        if (_dict instanceof IdentityDictionary)
-            identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
-        else
-            defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
-    }
-
-    private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES);
-        final int vLen = SPECIES.length();
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        for (int i = rl; i < ru; i++) {
-            int k = _data.getIndex(i);
-            final int offOut = i * jd + crl;
-            final double aa = 1;
-            final int k_right = _colIndexes.get(k);
-            vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-        }
-    }
-
-    private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
-        final double[] a = _dict.getValues();
-        final double[] b = right.getDenseBlockValues();
-        final double[] c = ret.getDenseBlockValues();
-        final int kd = _colIndexes.size();
-        final int jd = right.getNumColumns();
-        final DoubleVector vVec = DoubleVector.zero(SPECIES);
-        final int vLen = SPECIES.length();
-
-        final int blkzI = 32;
-        final int blkzK = 24;
-        final int lenJ = cru - crl;
-        final int end = cru - (lenJ % vLen);
-        for (int bi = rl; bi < ru; bi += blkzI) {
-            final int bie = Math.min(ru, bi + blkzI);
-            for (int bk = 0; bk < kd; bk += blkzK) {
-                final int bke = Math.min(kd, bk + blkzK);
-                for (int i = bi; i < bie; i++) {
-                    int offi = _data.getIndex(i) * kd;
-                    final int offOut = i * jd + crl;
-                    for (int k = bk; k < bke; k++) {
-                        final double aa = a[offi + k];
-                        final int k_right = _colIndexes.get(k);
-                        vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
-                    }
-                }
-            }
-        }
-    }
-
-    final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
-        vVec = vVec.broadcast(aa);
-        final int offj = k * jd;
-        final int end = endT + offj;
-        for (int j = offj + crl; j < end; j += vLen, offOut += vLen) {
-            DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
-            DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
-            res = vVec.fma(bVec, res);
-            res.intoArray(c, offOut);
-        }
-        for (int j = end; j < cru + offj; j++, offOut++) {
-            double bb = b[j];
-            c[offOut] += bb * aa;
-        }
-    }
-
-    @Override
-    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-        if (cl != 0 || cu != _data.size()) {
-            throw new NotImplementedException();
-        }
-        _data.preAggregateSparse(sb, preAgg, rl, ru);
-    }
-
-    @Override
-    public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-        try {
-
-            _data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
-        } catch (Exception e) {
-            throw new CompressionException(that.toString(), e);
-        }
-    }
-
-    @Override
-    public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-        _data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-        final AOffsetIterator itThat = that._indexes.getOffsetIterator();
-        final int nCol = that._colIndexes.size();
-        final int finalOff = that._indexes.getOffsetToLast();
-        final double[] v = ret.getValues();
-        while (true) {
-            final int to = _data.getIndex(itThat.value());
-            that._dict.addToEntry(v, 0, to, nCol);
-            if (itThat.value() == finalOff)
-                break;
-            itThat.next();
-        }
-    }
-
-    @Override
-    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-        _data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
-    }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
-    }
-
-    @Override
-    public ColGroupType getColGroupType() {
-        return ColGroupType.DDC;
-    }
-
-    @Override
-    public long estimateInMemorySize() {
-        long size = super.estimateInMemorySize();
-        size += _data.getInMemorySize();
-        return size;
-    }
-
-    @Override
-    public AColGroup scalarOperation(ScalarOperator op) {
-        if ((op.fn instanceof Plus || op.fn instanceof Minus)) {
-            final double v0 = op.executeScalar(0);
-            if (v0 == 0)
-                return this;
-            final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-        }
-        return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup unaryOperation(UnaryOperator op) {
-        return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-        IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
-        return create(_colIndexes, ret, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-        if ((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
-                ((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
-            final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
-            return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
-        }
-        final IDictionary ret;
-        if (_colIndexes.size() == 1)
-            ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
-        else
-            ret = _dict.binOpRight(op, v, _colIndexes);
-        return create(_colIndexes, ret, _data, getCachedCounts());
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-        super.write(out);
-        _data.write(out);
-    }
-
-    public static ColGroupDDC read(DataInput in) throws IOException {
-        IColIndex cols = ColIndexFactory.read(in);
-        IDictionary dict = DictionaryFactory.read(in);
-        AMapToData data = MapToFactory.readIn(in);
-        return new ColGroupDDC(cols, dict, data, null);
-    }
-
-    @Override
-    public long getExactSizeOnDisk() {
-        long ret = super.getExactSizeOnDisk();
-        ret += _data.getExactSizeOnDisk();
-        return ret;
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        final int nVals = getNumValues();
-        final int nCols = getNumCols();
-        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-    }
-
-    @Override
-    protected int numRowsToMultiply() {
-        return _data.size();
-    }
-
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return _dict.aggregate(c, builtin);
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-        _dict.aggregateCols(c, builtin, _colIndexes);
-    }
-
-    @Override
-    public boolean containsValue(double pattern) {
-        return _dict.containsValue(pattern);
-    }
-
-    @Override
-    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-        if (preAgg != null)
-            return create(colIndexes, preAgg, _data, getCachedCounts());
-        else
-            return null;
-    }
-
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        try {
-            return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
-        } catch (Exception e) {
-            throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
-        }
-    }
-
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        return create(colIndexes, newDictionary, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup append(AColGroup g) {
-        if (g instanceof ColGroupDDC) {
-            if (g.getColIndices().equals(_colIndexes)) {
-
-                ColGroupDDC gDDC = (ColGroupDDC) g;
-                if (gDDC._dict.equals(_dict)) {
-                    AMapToData nd = _data.append(gDDC._data);
-                    return create(_colIndexes, _dict, nd, null);
-                } else
-                    LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-            } else
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
-        } else
-            LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
-        return null;
-    }
-
-    @Override
-    public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
-        for (int i = 1; i < g.length; i++) {
-            if (!_colIndexes.equals(g[i]._colIndexes)) {
-                LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
-                return null;
-            }
-
-            if (!(g[i] instanceof ColGroupDDC)) {
-                LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
-                return null;
-            }
-
-            final ColGroupDDC gDDC = (ColGroupDDC) g[i];
-            if (!gDDC._dict.equals(_dict)) {
-                LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
-                return null;
-            }
-        }
-        AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
-        return create(_colIndexes, _dict, nd, null);
-    }
-
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        return DDCScheme.create(this);
-    }
-
-    @Override
-    public AColGroup recompress() {
-        return this;
-    }
-
-    @Override
-    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-        try {
-
-            IEncode enc = getEncoding();
-            EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
-                    _dict.getSparsity());
-            return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
-        } catch (Exception e) {
-            throw new DMLCompressionException(this.toString(), e);
-        }
-    }
-
-    @Override
-    public IEncode getEncoding() {
-        return EncodingFactory.create(_data);
-    }
-
-    @Override
-    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-        return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
-    }
-
-    @Override
-    public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        final SparseBlock sb = selection.getSparseBlock();
-        final SparseBlock retB = ret.getSparseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    @Override
-    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-        // morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
-        final SparseBlock sb = selection.getSparseBlock();
-        final DenseBlock retB = ret.getDenseBlock();
-        for (int r = rl; r < ru; r++) {
-            if (sb.isEmpty(r))
-                continue;
-            final int sPos = sb.pos(r);
-            final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
-            decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
-                                                          int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-        if (_colIndexes instanceof RangeIndex)
-            leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
-        else {
-
-            pos += cl; // left side matrix position offset.
-            if (a.withEmpty()) {
-                final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-                for (int rc = cl; rc < cu; rc++, pos++) {
-                    final int idx = _data.getIndex(rc);
-                    if (idx != nVal)
-                        values2[pos2 + _colIndexes.get(idx)] += values[pos];
-                }
-            } else {
-                for (int rc = cl; rc < cu; rc++, pos++)
-                    values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
-            }
-        }
-    }
-
-    private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2, int pos2,
-                                                                    int cl, int cu) {
-        IdentityDictionary a = (IdentityDictionary) _dict;
-
-        final int firstCol = pos2 + _colIndexes.get(0);
-        pos += cl; // left side matrix position offset.
-        if (a.withEmpty()) {
-            final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
-            for (int rc = cl; rc < cu; rc++, pos++) {
-                final int idx = _data.getIndex(rc);
-                if (idx != nVal)
-                    values2[firstCol + idx] += values[pos];
-            }
-        } else {
-            for (int rc = cl; rc < cu; rc++, pos++)
-                values2[firstCol + _data.getIndex(rc)] += values[pos];
-        }
-    }
-
-    @Override
-    public AColGroup morph(CompressionType ct, int nRow) {
-        // return this;
-        if (ct == getCompType())
-            return this;
-        else if (ct == CompressionType.SDC) {
-            // return this;
-            int[] counts = getCounts();
-            int maxId = maxIndex(counts);
-            double[] def = _dict.getRow(maxId, _colIndexes.size());
-
-            int offsetSize = nRow - counts[maxId];
-            int[] offsets = new int[offsetSize];
-            AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
-            int o = 0;
-            for (int i = 0; i < nRow; i++) {
-                int v = _data.getIndex(i);
-                if (v != maxId) {
-                    offsets[o] = i;
-                    reducedData.set(o, v);
-                    o++;
-                }
-            }
-
-            return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
-                    reducedData, null);
-        } else if (ct == CompressionType.CONST) {
-            // if(1 < getNumValues()) {
-            String thisS = this.toString();
-            if (thisS.length() > 10000)
-                thisS = thisS.substring(0, 10000) + "...";
-            LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
-            return this;
-            // }
-        } else if (ct == CompressionType.DDCFOR)
-            return this; // it does not make sense to change to FOR.
-        else
-            return super.morph(ct, nRow);
-    }
-
-    private static int maxIndex(int[] counts) {
-        int id = 0;
-        for (int i = 1; i < counts.length; i++) {
-            if (counts[i] > counts[id]) {
-                id = i;
-            }
-        }
-        return id;
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
-        final IDictionary combined = combineDictionaries(nCol, right);
-        final IColIndex combinedColIndex = combineColIndexes(nCol, right);
-        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
-        IDictionary b = ((ColGroupDDC) right).getDictionary();
-        IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
-        IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
-        return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
-    }
-
-    @Override
-    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-        AMapToData[] maps = _data.splitReshapeDDC(multiplier);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
-            throws Exception {
-        AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
-        AColGroup[] res = new AColGroup[multiplier];
-        for (int i = 0; i < multiplier; i++) {
-            final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
-            res[i] = create(ci, _dict, maps[i], null);
-        }
-        return res;
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append(super.toString());
-        sb.append(String.format("\n%15s", "Data: "));
-        sb.append(_data);
-        return sb.toString();
-    }
-
-    @Override
-    protected boolean allowShallowIdentityRightMult() {
-        return true;
-    }
-
-    public AColGroup convertToDeltaDDC() {
-        int numCols = _colIndexes.size();
-        int numRows = _data.size();
-
-        DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
-        double[] rowDelta = new double[numCols];
-        double[] prevRow = new double[numCols];
-        DblArray dblArray = new DblArray(rowDelta);
-        int[] rowToDictId = new int[numRows];
-
-        double[] dictVals = _dict.getValues();
-
-        for (int i = 0; i < numRows; i++) {
-            int dictIdx = _data.getIndex(i);
-            int off = dictIdx * numCols;
-            for (int j = 0; j < numCols; j++) {
-                double val = dictVals[off + j];
-                if (i == 0) {
-                    rowDelta[j] = val;
-                    prevRow[j] = val;
-                } else {
-                    rowDelta[j] = val - prevRow[j];
-                    prevRow[j] = val;
-                }
-            }
-
-            rowToDictId[i] = map.increment(dblArray);
-        }
-
-        if (map.size() == 0)
-            return new ColGroupEmpty(_colIndexes);
-
-        ACount<DblArray>[] vals = map.extractValues();
-        final int nVals = vals.length;
-        final double[] dictValues = new double[nVals * numCols];
-        final int[] oldIdToNewId = new int[map.size()];
-        int idx = 0;
-        for (int i = 0; i < nVals; i++) {
-            final ACount<DblArray> dac = vals[i];
-            final double[] arrData = dac.key().getData();
-            System.arraycopy(arrData, 0, dictValues, idx, numCols);
-            oldIdToNewId[dac.id] = i;
-            idx += numCols;
-        }
-
-        DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
-        AMapToData newData = MapToFactory.create(numRows, nVals);
-        for (int i = 0; i < numRows; i++) {
-            newData.set(i, oldIdToNewId[rowToDictId[i]]);
-        }
-        return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
-    }
-
-    public AColGroup convertToDDCLZW() {
-        return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
-    }
+	private static final long serialVersionUID = -5769772089913918987L;
+
+	protected final AMapToData _data;
+
+	static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
+
+	protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		super(colIndexes, dict, cachedCounts);
+		_data = data;
+
+		if(CompressedMatrixBlock.debug) {
+			if(getNumValues() == 0)
+				throw new DMLCompressionException("Invalid construction with empty dictionary");
+			if(data.size() == 0)
+				throw new DMLCompressionException("Invalid length of the data. is zero");
+
+			if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException(
+					"Invalid map to dict Map has:" + data.getUnique() + " while dict has " +
+						dict.getNumberOfValues(colIndexes.size()));
+			int[] c = getCounts();
+			if(c.length != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid DDC Construction");
+			data.verify();
+		}
+	}
+
+	public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		if(data.getUnique() == 1)
+			return ColGroupConst.create(colIndexes, dict);
+		else if(dict == null)
+			return new ColGroupEmpty(colIndexes);
+		else
+			return new ColGroupDDC(colIndexes, dict, data, cachedCounts);
+	}
+
+	public AColGroup sparsifyFOR() {
+		return ColGroupDDCFOR.sparsifyFOR(this);
+	}
+
+	public CompressionType getCompType() {
+		return CompressionType.DDC;
+	}
+
+	@Override
+	protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
+		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+			final int vr = _data.getIndex(r);
+			if(sb.isEmpty(vr))
+				continue;
+			final double[] c = db.values(offT);
+			final int off = db.pos(offT) + offC;
+			_colIndexes.decompressToDenseFromSparse(sb, vr, off, c);
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		double[] values) {
+		final int idxSize = _colIndexes.size();
+		if(db.isContiguous()) {
+			final int nColOut = db.getDim(1);
+			if(idxSize == 1 && nColOut == 1)
+				decompressToDenseBlockDenseDictSingleColOutContiguous(db, rl, ru, offR, offC, values);
+			else if(idxSize == 1)
+				decompressToDenseBlockDenseDictSingleColContiguous(db, rl, ru, offR, offC, values);
+			else if(idxSize == nColOut) // offC == 0 implied
+				decompressToDenseBlockDenseDictAllColumnsContiguous(db, rl, ru, offR, values, idxSize);
+			else if(offC == 0 && offR == 0)
+				decompressToDenseBlockDenseDictNoOff(db, rl, ru, values);
+			else if(offC == 0)
+				decompressToDenseBlockDenseDictNoColOffset(db, rl, ru, offR, values, idxSize, nColOut);
+			else
+				decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+		}
+		else
+			decompressToDenseBlockDenseDictGeneric(db, rl, ru, offR, offC, values, idxSize);
+	}
+
+	private final void decompressToDenseBlockDenseDictSingleColContiguous(DenseBlock db, int rl, int ru, int offR,
+		int offC, double[] values) {
+		final double[] c = db.values(0);
+		final int nCols = db.getDim(1);
+		final int colOff = _colIndexes.get(0) + offC;
+		for(int i = rl, offT = (rl + offR) * nCols + colOff; i < ru; i++, offT += nCols)
+			c[offT] += values[_data.getIndex(i)];
+
+	}
+
+	@Override
+	public AMapToData getMapToData() {
+		return _data;
+	}
+
+	private final void decompressToDenseBlockDenseDictSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR,
+		int offC, double[] values) {
+		final double[] c = db.values(0);
+		decompressToDenseBlockDenseDictSingleColOutContiguous(c, rl, ru, offR + _colIndexes.get(0), values, _data);
+	}
+
+	private final static void decompressToDenseBlockDenseDictSingleColOutContiguous(double[] c, int rl, int ru,
+		int offR, double[] values, AMapToData data) {
+		data.decompressToRange(c, rl, ru, offR, values);
+
+	}
+
+	private final void decompressToDenseBlockDenseDictAllColumnsContiguous(DenseBlock db, int rl, int ru, int offR,
+		double[] values, int nCol) {
+		final double[] c = db.values(0);
+		for(int r = rl; r < ru; r++) {
+			final int start = _data.getIndex(r) * nCol;
+			final int offStart = (offR + r) * nCol;
+			LibMatrixMult.vectAdd(values, c, start, offStart, nCol);
+		}
+	}
+
+	private final void decompressToDenseBlockDenseDictNoColOffset(DenseBlock db, int rl, int ru, int offR,
+		double[] values, int nCol, int colOut) {
+		int off = (rl + offR) * colOut;
+		for(int i = rl, offT = rl + offR; i < ru; i++, off += colOut) {
+			final double[] c = db.values(offT);
+			final int rowIndex = _data.getIndex(i) * nCol;
+			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+		}
+	}
+
+	private final void decompressToDenseBlockDenseDictNoOff(DenseBlock db, int rl, int ru, double[] values) {
+		final int nCol = _colIndexes.size();
+		final int nColU = db.getDim(1);
+		final double[] c = db.values(0);
+		for(int i = rl; i < ru; i++) {
+			final int off = i * nColU;
+			final int rowIndex = _data.getIndex(i) * nCol;
+			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+		}
+	}
+
+	private final void decompressToDenseBlockDenseDictGeneric(DenseBlock db, int rl, int ru, int offR, int offC,
+		double[] values, int nCol) {
+		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+			final double[] c = db.values(offT);
+			final int off = db.pos(offT) + offC;
+			final int rowIndex = _data.getIndex(i) * nCol;
+			_colIndexes.decompressVec(nCol, c, off, values, rowIndex);
+		}
+	}
+
+	@Override
+	protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
+		for(int r = rl, offT = rl + offR; r < ru; r++, offT++) {
+			final int vr = _data.getIndex(r);
+			if(sb.isEmpty(vr))
+				continue;
+			final int apos = sb.pos(vr);
+			final int alen = sb.size(vr) + apos;
+			final int[] aix = sb.indexes(vr);
+			final double[] aval = sb.values(vr);
+			for(int j = apos; j < alen; j++)
+				ret.append(offT, offC + _colIndexes.get(aix[j]), aval[j]);
+		}
+	}
+
+	@Override
+	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		double[] values) {
+		decompressToSparseBlockDenseDictionary(ret, rl, ru, offR, offC, values, _colIndexes.size());
+	}
+
+	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		double[] values, int nCol) {
+		for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+			final int rowIndex = _data.getIndex(i) * nCol;
+			for(int j = 0; j < nCol; j++)
+				ret.append(offT, _colIndexes.get(j) + offC, values[rowIndex + j]);
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) {
+		for(int i = rl; i < ru; i++) {
+			final int vr = _data.getIndex(i);
+			if(sb.isEmpty(vr))
+				continue;
+			final int apos = sb.pos(vr);
+			final int alen = sb.size(vr) + apos;
+			final int[] aix = sb.indexes(vr);
+			final double[] aval = sb.values(vr);
+			for(int j = apos; j < alen; j++) {
+				final int rowOut = _colIndexes.get(aix[j]);
+				final double[] c = db.values(rowOut);
+				final int off = db.pos(rowOut);
+				c[off + i] += aval[j];
+			}
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+		final int nCol = _colIndexes.size();
+		for(int j = 0; j < nCol; j++) {
+			final int rowOut = _colIndexes.get(j);
+			final double[] c = db.values(rowOut);
+			final int off = db.pos(rowOut);
+			for(int i = rl; i < ru; i++) {
+				final double v = dict[_data.getIndex(i) * nCol + j];
+				c[off + i] += v;
+			}
+		}
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) {
+
+		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+		for(int j = 0; j < _colIndexes.size(); j++)
+			sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+		for(int i = 0; i < _data.size(); i++) {
+			int di = _data.getIndex(i);
+			if(sb.isEmpty(di))
+				continue;
+
+			final int apos = sb.pos(di);
+			final int alen = sb.size(di) + apos;
+			final int[] aix = sb.indexes(di);
+			final double[] aval = sb.values(di);
+
+			for(int j = apos; j < alen; j++) {
+				sbr.append(_colIndexes.get(aix[j]), i, aval[apos]);
+			}
+		}
+
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) {
+		int[] colCounts = _dict.countNNZZeroColumns(getCounts());
+		for(int j = 0; j < _colIndexes.size(); j++)
+			sbr.allocate(_colIndexes.get(j), colCounts[j]);
+
+		final int nCol = _colIndexes.size();
+		for(int j = 0; j < nCol; j++) {
+			final int rowOut = _colIndexes.get(j);
+			SparseRow r = sbr.get(rowOut);
+
+			for(int i = 0; i < _data.size(); i++) {
+				final double v = dict[_data.getIndex(i) * nCol + j];
+				r = r.append(i, v);
+			}
+			sbr.set(rowOut, r, false);
+		}
+	}
+
+	@Override
+	public double getIdx(int r, int colIdx) {
+		return _dict.getValue(_data.getIndex(r), colIdx, _colIndexes.size());
+	}
+
+	@Override
+	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] += preAgg[_data.getIndex(rix)];
+	}
+
+	@Override
+	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+		for(int i = rl; i < ru; i++)
+			c[i] = builtin.execute(c[i], preAgg[_data.getIndex(i)]);
+	}
+
+	@Override
+	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] *= preAgg[_data.getIndex(rix)];
+	}
+
+	@Override
+	public int[] getCounts(int[] counts) {
+		return _data.getCounts(counts);
+	}
+
+	@Override
+	public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+		if(_colIndexes.size() == 1)
+			leftMultByMatrixNoPreAggSingleCol(matrix, result, rl, ru, cl, cu);
+		else
+			lmMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+	}
+
+	private void leftMultByMatrixNoPreAggSingleCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final DenseBlock retV = result.getDenseBlock();
+		final int nColM = matrix.getNumColumns();
+		final int nColRet = result.getNumColumns();
+		final double[] dictVals = _dict.getValues(); // guaranteed dense double since we only have one column.
+		if(matrix.isEmpty())
+			return;
+		else if(matrix.isInSparseFormat()) {
+			if(cl != 0 || cu != _data.size())
+				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+					cu);
+			else
+				lmSparseMatrixNoPreAggSingleCol(matrix.getSparseBlock(), nColM, retV, nColRet, dictVals, rl, ru);
+		}
+		else if(!matrix.getDenseBlock().isContiguous())
+			lmDenseMatrixNoPreAggSingleColNonContiguous(matrix.getDenseBlock(), nColM, retV, nColRet, dictVals, rl, ru,
+				cl, cu);
+		else
+			lmDenseMatrixNoPreAggSingleCol(matrix.getDenseBlockValues(), nColM, retV, nColRet, dictVals, rl, ru, cl,
+				cu);
+	}
+
+	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+		int rl, int ru) {
+
+		if(retV.isContiguous())
+			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru);
+		else
+			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru);
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+		double[] vals, int rl, int ru) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int alen = sb.size(r) + apos;
+			final int[] aix = sb.indexes(r);
+			final double[] aval = sb.values(r);
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+
+			for(int i = apos; i < alen; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+		double[] vals, int rl, int ru) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int alen = sb.size(r) + apos;
+			final int[] aix = sb.indexes(r);
+			final double[] aval = sb.values(r);
+			final int offR = r * nColRet;
+			for(int i = apos; i < alen; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmSparseMatrixNoPreAggSingleCol(SparseBlock sb, int nColM, DenseBlock retV, int nColRet, double[] vals,
+		int rl, int ru, int cl, int cu) {
+		if(retV.isContiguous())
+			lmSparseMatrixNoPreAggSingleColContiguous(sb, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+		else
+			lmSparseMatrixNoPreAggSingleColGeneric(sb, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColGeneric(SparseBlock sb, int nColM, DenseBlock ret, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int aposSkip = sb.posFIndexGTE(r, cl);
+			final int[] aix = sb.indexes(r);
+			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+				continue;
+			final int alen = sb.size(r) + apos;
+			final double[] aval = sb.values(r);
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+			// final int offR = r * nColRet;
+			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmSparseMatrixNoPreAggSingleColContiguous(SparseBlock sb, int nColM, double[] retV, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int apos = sb.pos(r);
+			final int aposSkip = sb.posFIndexGTE(r, cl);
+			final int[] aix = sb.indexes(r);
+			if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+				continue;
+			final int alen = sb.size(r) + apos;
+			final double[] aval = sb.values(r);
+			final int offR = r * nColRet;
+			for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+				retV[offR + colOut] += aval[i] * vals[_data.getIndex(aix[i])];
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColNonContiguous(DenseBlock db, int nColM, DenseBlock retV, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(db, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+	}
+
+	private void lmDenseMatrixNoPreAggSingleCol(double[] mV, int nColM, DenseBlock retV, int nColRet, double[] vals,
+		int rl, int ru, int cl, int cu) {
+		if(retV.isContiguous())
+			lmDenseMatrixNoPreAggSingleColContiguous(mV, nColM, retV.valuesAt(0), nColRet, vals, rl, ru, cl, cu);
+		else
+			lmDenseMatrixNoPreAggSingleColGeneric(mV, nColM, retV, nColRet, vals, rl, ru, cl, cu);
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColNonContiguousInGeneric(DenseBlock db, int nColM, DenseBlock ret,
+		int nColRet, double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+		for(int r = rl; r < ru; r++) {
+			final int offL = db.pos(r);
+			final double[] mV = db.values(r);
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+			for(int c = cl; c < cu; c++)
+				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColGeneric(double[] mV, int nColM, DenseBlock ret, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+		for(int r = rl; r < ru; r++) {
+			final int offL = r * nColM;
+			final int offR = ret.pos(r);
+			final double[] retV = ret.values(r);
+			for(int c = cl; c < cu; c++)
+				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggSingleColContiguous(double[] mV, int nColM, double[] retV, int nColRet,
+		double[] vals, int rl, int ru, int cl, int cu) {
+		final int colOut = _colIndexes.get(0);
+		for(int r = rl; r < ru; r++) {
+			final int offL = r * nColM;
+			final int offR = r * nColRet;
+			for(int c = cl; c < cu; c++)
+				retV[offR + colOut] += mV[offL + c] * vals[_data.getIndex(c)];
+		}
+	}
+
+	private void lmMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+
+		if(matrix.isInSparseFormat())
+			lmSparseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+		else
+			lmDenseMatrixNoPreAggMultiCol(matrix, result, rl, ru, cl, cu);
+	}
+
+	private void lmSparseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final DenseBlock db = result.getDenseBlock();
+		final SparseBlock sb = matrix.getSparseBlock();
+
+		if(cl != 0 || cu != _data.size()) {
+			// sub part
+			for(int r = rl; r < ru; r++) {
+				if(sb.isEmpty(r))
+					continue;
+				final double[] retV = db.values(r);
+				final int pos = db.pos(r);
+				lmSparseMatrixRowColRange(sb, r, pos, retV, cl, cu);
+			}
+		}
+		else {
+			for(int r = rl; r < ru; r++)
+				_data.lmSparseMatrixRow(sb, r, db, _colIndexes, _dict);
+		}
+	}
+
+	private final void lmSparseMatrixRowColRange(SparseBlock sb, int r, int offR, double[] retV, int cl, int cu) {
+		final int apos = sb.pos(r);
+		final int aposSkip = sb.posFIndexGTE(r, cl);
+		final int[] aix = sb.indexes(r);
+		if(aposSkip <= -1 || aix[apos + aposSkip] >= cu)
+			return;
+		final int alen = sb.size(r) + apos;
+		final double[] aval = sb.values(r);
+		for(int i = apos + aposSkip; i < alen && aix[i] < cu; i++)
+			_dict.multiplyScalar(aval[i], retV, offR, _data.getIndex(aix[i]), _colIndexes);
+	}
+
+	private void lmDenseMatrixNoPreAggMultiCol(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+		if(matrix.getDenseBlock().isContiguous())
+			lmDenseMatrixNoPreAggMultiColContiguous(matrix, result, rl, ru, cl, cu);
+		else
+			lmDenseMatrixNoPreAggMultiColNonContiguous(matrix.getDenseBlock(), result, rl, ru, cl, cu);
+	}
+
+	private void lmDenseMatrixNoPreAggMultiColContiguous(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final double[] retV = result.getDenseBlockValues();
+		final int nColM = matrix.getNumColumns();
+		final int nColRet = result.getNumColumns();
+		final double[] mV = matrix.getDenseBlockValues();
+		for(int r = rl; r < ru; r++) {
+			final int offL = r * nColM;
+			final int offR = r * nColRet;
+			for(int c = cl; c < cu; c++)
+				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+		}
+	}
+
+	private void lmDenseMatrixNoPreAggMultiColNonContiguous(DenseBlock db, MatrixBlock result, int rl, int ru, int cl,
+		int cu) {
+		final double[] retV = result.getDenseBlockValues();
+		final int nColRet = result.getNumColumns();
+		for(int r = rl; r < ru; r++) {
+			final int offL = db.pos(r);
+			final double[] mV = db.values(r);
+			final int offR = r * nColRet;
+			for(int c = cl; c < cu; c++)
+				_dict.multiplyScalar(mV[offL + c], retV, offR, _data.getIndex(c), _colIndexes);
+		}
+	}
+
+	@Override
+	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+		_data.preAggregateDense(m, preAgg, rl, ru, cl, cu);
+	}
+
+	@Override
+	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+		DenseBlock db = that.getDenseBlock();
+		DenseBlock retDB = ret.getDenseBlock();
+		for(int i = rl; i < ru; i++)
+			leftMMIdentityPreAggregateDenseSingleRow(db.values(i), db.pos(i), retDB.values(i), retDB.pos(i), cl, cu);
+	}
+
+	@Override
+	public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl,
+		int cru) {
+		if(_dict instanceof IdentityDictionary)
+			identityRightDecompressingMult(right, ret, rl, ru, crl, cru);
+		else
+			defaultRightDecompressingMult(right, ret, rl, ru, crl, cru);
+	}
+
+	private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+		final double[] b = right.getDenseBlockValues();
+		final double[] c = ret.getDenseBlockValues();
+		final int jd = right.getNumColumns();
+		final DoubleVector vVec = DoubleVector.zero(SPECIES);
+		final int vLen = SPECIES.length();
+		final int lenJ = cru - crl;
+		final int end = cru - (lenJ % vLen);
+		for(int i = rl; i < ru; i++) {
+			int k = _data.getIndex(i);
+			final int offOut = i * jd + crl;
+			final double aa = 1;
+			final int k_right = _colIndexes.get(k);
+			vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+		}
+	}
+
+	private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int crl, int cru) {
+		final double[] a = _dict.getValues();
+		final double[] b = right.getDenseBlockValues();
+		final double[] c = ret.getDenseBlockValues();
+		final int kd = _colIndexes.size();
+		final int jd = right.getNumColumns();
+		final DoubleVector vVec = DoubleVector.zero(SPECIES);
+		final int vLen = SPECIES.length();
+
+		final int blkzI = 32;
+		final int blkzK = 24;
+		final int lenJ = cru - crl;
+		final int end = cru - (lenJ % vLen);
+		for(int bi = rl; bi < ru; bi += blkzI) {
+			final int bie = Math.min(ru, bi + blkzI);
+			for(int bk = 0; bk < kd; bk += blkzK) {
+				final int bke = Math.min(kd, bk + blkzK);
+				for(int i = bi; i < bie; i++) {
+					int offi = _data.getIndex(i) * kd;
+					final int offOut = i * jd + crl;
+					for(int k = bk; k < bke; k++) {
+						final double aa = a[offi + k];
+						final int k_right = _colIndexes.get(k);
+						vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
+					}
+				}
+			}
+		}
+	}
+
+	final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k,
+		int vLen, DoubleVector vVec) {
+		vVec = vVec.broadcast(aa);
+		final int offj = k * jd;
+		final int end = endT + offj;
+		for(int j = offj + crl; j < end; j += vLen, offOut += vLen) {
+			DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
+			DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
+			res = vVec.fma(bVec, res);
+			res.intoArray(c, offOut);
+		}
+		for(int j = end; j < cru + offj; j++, offOut++) {
+			double bb = b[j];
+			c[offOut] += bb * aa;
+		}
+	}
+
+	@Override
+	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+		if(cl != 0 || cu != _data.size()) {
+			throw new NotImplementedException();
+		}
+		_data.preAggregateSparse(sb, preAgg, rl, ru);
+	}
+
+	@Override
+	public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+		try {
+
+			_data.preAggregateDDC_DDC(that._data, that._dict, ret, that._colIndexes.size());
+		}
+		catch(Exception e) {
+			throw new CompressionException(that.toString(), e);
+		}
+	}
+
+	@Override
+	public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+		_data.preAggregateDDC_SDCZ(that._data, that._dict, that._indexes, ret, that._colIndexes.size());
+	}
+
+	@Override
+	public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+		final AOffsetIterator itThat = that._indexes.getOffsetIterator();
+		final int nCol = that._colIndexes.size();
+		final int finalOff = that._indexes.getOffsetToLast();
+		final double[] v = ret.getValues();
+		while(true) {
+			final int to = _data.getIndex(itThat.value());
+			that._dict.addToEntry(v, 0, to, nCol);
+			if(itThat.value() == finalOff)
+				break;
+			itThat.next();
+		}
+	}
+
+	@Override
+	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+		_data.preAggregateDDC_RLE(that._ptr, that._data, that._dict, ret, that._colIndexes.size());
+	}
+
+	@Override
+	public boolean sameIndexStructure(AColGroupCompressed that) {
+		return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data;
+	}
+
+	@Override
+	public ColGroupType getColGroupType() {
+		return ColGroupType.DDC;
+	}
+
+	@Override
+	public long estimateInMemorySize() {
+		long size = super.estimateInMemorySize();
+		size += _data.getInMemorySize();
+		return size;
+	}
+
+	@Override
+	public AColGroup scalarOperation(ScalarOperator op) {
+		if((op.fn instanceof Plus || op.fn instanceof Minus)) {
+			final double v0 = op.executeScalar(0);
+			if(v0 == 0)
+				return this;
+			final double[] reference = ColGroupUtils.createReference(_colIndexes.size(), v0);
+			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+		}
+		return create(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup unaryOperation(UnaryOperator op) {
+		return create(_colIndexes, _dict.applyUnaryOp(op), _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+		IDictionary ret = _dict.binOpLeft(op, v, _colIndexes);
+		return create(_colIndexes, ret, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+		if((op.fn instanceof Plus || op.fn instanceof Minus) && _dict instanceof MatrixBlockDictionary &&
+			((MatrixBlockDictionary) _dict).getMatrixBlock().isInSparseFormat()) {
+			final double[] reference = ColGroupUtils.binaryDefRowRight(op, v, _colIndexes);
+			return ColGroupDDCFOR.create(_colIndexes, _dict, _data, getCachedCounts(), reference);
+		}
+		final IDictionary ret;
+		if(_colIndexes.size() == 1)
+			ret = _dict.applyScalarOp(new RightScalarOperator(op.fn, v[_colIndexes.get(0)]));
+		else
+			ret = _dict.binOpRight(op, v, _colIndexes);
+		return create(_colIndexes, ret, _data, getCachedCounts());
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		super.write(out);
+		_data.write(out);
+	}
+
+	public static ColGroupDDC read(DataInput in) throws IOException {
+		IColIndex cols = ColIndexFactory.read(in);
+		IDictionary dict = DictionaryFactory.read(in);
+		AMapToData data = MapToFactory.readIn(in);
+		return new ColGroupDDC(cols, dict, data, null);
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = super.getExactSizeOnDisk();
+		ret += _data.getExactSizeOnDisk();
+		return ret;
+	}
+
+	@Override
+	public double getCost(ComputationCostEstimator e, int nRows) {
+		final int nVals = getNumValues();
+		final int nCols = getNumCols();
+		return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+	}
+
+	@Override
+	protected int numRowsToMultiply() {
+		return _data.size();
+	}
+
+	@Override
+	protected double computeMxx(double c, Builtin builtin) {
+		return _dict.aggregate(c, builtin);
+	}
+
+	@Override
+	protected void computeColMxx(double[] c, Builtin builtin) {
+		_dict.aggregateCols(c, builtin, _colIndexes);
+	}
+
+	@Override
+	public boolean containsValue(double pattern) {
+		return _dict.containsValue(pattern);
+	}
+
+	@Override
+	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+		if(preAgg != null)
+			return create(colIndexes, preAgg, _data, getCachedCounts());
+		else
+			return null;
+	}
+
+	@Override
+	public AColGroup sliceRows(int rl, int ru) {
+		try {
+			return ColGroupDDC.create(_colIndexes, _dict, _data.slice(rl, ru), null);
+		}
+		catch(Exception e) {
+			throw new DMLRuntimeException("Failed to slice out sub part DDC: " + rl + " " + ru, e);
+		}
+	}
+
+	@Override
+	protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+		return create(colIndexes, newDictionary, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup append(AColGroup g) {
+		if(g instanceof ColGroupDDC) {
+			if(g.getColIndices().equals(_colIndexes)) {
+
+				ColGroupDDC gDDC = (ColGroupDDC) g;
+				if(gDDC._dict.equals(_dict)) {
+					AMapToData nd = _data.append(gDDC._data);
+					return create(_colIndexes, _dict, nd, null);
+				}
+				else
+					LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+			}
+			else
+				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g.getColIndices());
+		}
+		else
+			LOG.warn("Not DDC but " + g.getClass().getSimpleName() + ", therefore not appending DDC");
+		return null;
+	}
+
+	@Override
+	public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
+		for(int i = 1; i < g.length; i++) {
+			if(!_colIndexes.equals(g[i]._colIndexes)) {
+				LOG.warn("Not same columns therefore not appending DDC\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
+				return null;
+			}
+
+			if(!(g[i] instanceof ColGroupDDC)) {
+				LOG.warn("Not DDC but " + g[i].getClass().getSimpleName() + ", therefore not appending DDC");
+				return null;
+			}
+
+			final ColGroupDDC gDDC = (ColGroupDDC) g[i];
+			if(!gDDC._dict.equals(_dict)) {
+				LOG.warn("Not same Dictionaries therefore not appending DDC\n" + _dict + "\n\n" + gDDC._dict);
+				return null;
+			}
+		}
+		AMapToData nd = _data.appendN(Arrays.copyOf(g, g.length, IMapToDataGroup[].class));
+		return create(_colIndexes, _dict, nd, null);
+	}
+
+	@Override
+	public ICLAScheme getCompressionScheme() {
+		return DDCScheme.create(this);
+	}
+
+	@Override
+	public AColGroup recompress() {
+		return this;
+	}
+
+	@Override
+	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+		try {
+
+			IEncode enc = getEncoding();
+			EstimationFactors ef = new EstimationFactors(_data.getUnique(), _data.size(), _data.size(),
+				_dict.getSparsity());
+			return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
+		}
+		catch(Exception e) {
+			throw new DMLCompressionException(this.toString(), e);
+		}
+	}
+
+	@Override
+	public IEncode getEncoding() {
+		return EncodingFactory.create(_data);
+	}
+
+	@Override
+	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+		return ColGroupDDC.create(newColIndex, _dict.reorder(reordering), _data, getCachedCounts());
+	}
+
+	@Override
+	public void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+		final SparseBlock sb = selection.getSparseBlock();
+		final SparseBlock retB = ret.getSparseBlock();
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int sPos = sb.pos(r);
+			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+			decompressToSparseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+		}
+	}
+
+	@Override
+	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+		// morph(CompressionType.UNCOMPRESSED, _data.size()).sparseSelection(selection, ret, rl, ru);;
+		final SparseBlock sb = selection.getSparseBlock();
+		final DenseBlock retB = ret.getDenseBlock();
+		for(int r = rl; r < ru; r++) {
+			if(sb.isEmpty(r))
+				continue;
+			final int sPos = sb.pos(r);
+			final int rowCompressed = sb.indexes(r)[sPos]; // column index with 1
+			decompressToDenseBlock(retB, rowCompressed, rowCompressed + 1, r - rowCompressed, 0);
+		}
+	}
+
+	private void leftMMIdentityPreAggregateDenseSingleRow(double[] values, int pos, double[] values2, int pos2, int cl,
+		int cu) {
+		IdentityDictionary a = (IdentityDictionary) _dict;
+		if(_colIndexes instanceof RangeIndex)
+			leftMMIdentityPreAggregateDenseSingleRowRangeIndex(values, pos, values2, pos2, cl, cu);
+		else {
+
+			pos += cl; // left side matrix position offset.
+			if(a.withEmpty()) {
+				final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+				for(int rc = cl; rc < cu; rc++, pos++) {
+					final int idx = _data.getIndex(rc);
+					if(idx != nVal)
+						values2[pos2 + _colIndexes.get(idx)] += values[pos];
+				}
+			}
+			else {
+				for(int rc = cl; rc < cu; rc++, pos++)
+					values2[pos2 + _colIndexes.get(_data.getIndex(rc))] += values[pos];
+			}
+		}
+	}
+
+	private void leftMMIdentityPreAggregateDenseSingleRowRangeIndex(double[] values, int pos, double[] values2,
+		int pos2, int cl, int cu) {
+		IdentityDictionary a = (IdentityDictionary) _dict;
+
+		final int firstCol = pos2 + _colIndexes.get(0);
+		pos += cl; // left side matrix position offset.
+		if(a.withEmpty()) {
+			final int nVal = _dict.getNumberOfValues(_colIndexes.size()) - 1;
+			for(int rc = cl; rc < cu; rc++, pos++) {
+				final int idx = _data.getIndex(rc);
+				if(idx != nVal)
+					values2[firstCol + idx] += values[pos];
+			}
+		}
+		else {
+			for(int rc = cl; rc < cu; rc++, pos++)
+				values2[firstCol + _data.getIndex(rc)] += values[pos];
+		}
+	}
+
+	@Override
+	public AColGroup morph(CompressionType ct, int nRow) {
+		// return this;
+		if(ct == getCompType())
+			return this;
+		else if(ct == CompressionType.SDC) {
+			// return this;
+			int[] counts = getCounts();
+			int maxId = maxIndex(counts);
+			double[] def = _dict.getRow(maxId, _colIndexes.size());
+
+			int offsetSize = nRow - counts[maxId];
+			int[] offsets = new int[offsetSize];
+			AMapToData reducedData = MapToFactory.create(offsetSize, _data.getUnique());
+			int o = 0;
+			for(int i = 0; i < nRow; i++) {
+				int v = _data.getIndex(i);
+				if(v != maxId) {
+					offsets[o] = i;
+					reducedData.set(o, v);
+					o++;
+				}
+			}
+
+			return ColGroupSDC.create(_colIndexes, _data.size(), _dict, def, OffsetFactory.createOffset(offsets),
+				reducedData, null);
+		}
+		else if(ct == CompressionType.CONST) {
+			// if(1 < getNumValues()) {
+			String thisS = this.toString();
+			if(thisS.length() > 10000)
+				thisS = thisS.substring(0, 10000) + "...";
+			LOG.warn("Tried to morph to const from DDC but impossible: " + thisS);
+			return this;
+			// }
+		}
+		else if(ct == CompressionType.DDCFOR)
+			return this; // it does not make sense to change to FOR.
+		else
+			return super.morph(ct, nRow);
+	}
+
+	private static int maxIndex(int[] counts) {
+		int id = 0;
+		for(int i = 1; i < counts.length; i++) {
+			if(counts[i] > counts[id]) {
+				id = i;
+			}
+		}
+		return id;
+	}
+
+	@Override
+	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, List<AColGroup> right) {
+		final IDictionary combined = combineDictionaries(nCol, right);
+		final IColIndex combinedColIndex = combineColIndexes(nCol, right);
+		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroupCompressed combineWithSameIndex(int nRow, int nCol, AColGroup right) {
+		IDictionary b = ((ColGroupDDC) right).getDictionary();
+		IDictionary combined = DictionaryFactory.cBindDictionaries(_dict, b, this.getNumCols(), right.getNumCols());
+		IColIndex combinedColIndex = _colIndexes.combine(right.getColIndices().shift(nCol));
+		return new ColGroupDDC(combinedColIndex, combined, _data, getCachedCounts());
+	}
+
+	@Override
+	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+		AMapToData[] maps = _data.splitReshapeDDC(multiplier);
+		AColGroup[] res = new AColGroup[multiplier];
+		for(int i = 0; i < multiplier; i++) {
+			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+			res[i] = create(ci, _dict, maps[i], null);
+		}
+		return res;
+	}
+
+	@Override
+	public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, ExecutorService pool)
+		throws Exception {
+		AMapToData[] maps = _data.splitReshapeDDCPushDown(multiplier, pool);
+		AColGroup[] res = new AColGroup[multiplier];
+		for(int i = 0; i < multiplier; i++) {
+			final IColIndex ci = i == 0 ? _colIndexes : _colIndexes.shift(i * nColOrg);
+			res[i] = create(ci, _dict, maps[i], null);
+		}
+		return res;
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append(super.toString());
+		sb.append(String.format("\n%15s", "Data: "));
+		sb.append(_data);
+		return sb.toString();
+	}
+
+	@Override
+	protected boolean allowShallowIdentityRightMult() {
+		return true;
+	}
+
+	public AColGroup convertToDeltaDDC() {
+		int numCols = _colIndexes.size();
+		int numRows = _data.size();
+
+		DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64));
+		double[] rowDelta = new double[numCols];
+		double[] prevRow = new double[numCols];
+		DblArray dblArray = new DblArray(rowDelta);
+		int[] rowToDictId = new int[numRows];
+
+		double[] dictVals = _dict.getValues();
+
+		for(int i = 0; i < numRows; i++) {
+			int dictIdx = _data.getIndex(i);
+			int off = dictIdx * numCols;
+			for(int j = 0; j < numCols; j++) {
+				double val = dictVals[off + j];
+				if(i == 0) {
+					rowDelta[j] = val;
+					prevRow[j] = val;
+				}
+				else {
+					rowDelta[j] = val - prevRow[j];
+					prevRow[j] = val;
+				}
+			}
+
+			rowToDictId[i] = map.increment(dblArray);
+		}
+
+		if(map.size() == 0)
+			return new ColGroupEmpty(_colIndexes);
+
+		ACount<DblArray>[] vals = map.extractValues();
+		final int nVals = vals.length;
+		final double[] dictValues = new double[nVals * numCols];
+		final int[] oldIdToNewId = new int[map.size()];
+		int idx = 0;
+		for(int i = 0; i < nVals; i++) {
+			final ACount<DblArray> dac = vals[i];
+			final double[] arrData = dac.key().getData();
+			System.arraycopy(arrData, 0, dictValues, idx, numCols);
+			oldIdToNewId[dac.id] = i;
+			idx += numCols;
+		}
+
+		DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols);
+		AMapToData newData = MapToFactory.create(numRows, nVals);
+		for(int i = 0; i < numRows; i++) {
+			newData.set(i, oldIdToNewId[rowToDictId[i]]);
+		}
+		return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null);
+	}
+
+	public AColGroup convertToDDCLZW() {
+		return ColGroupDDCLZW.create(_colIndexes, _dict, _data, null);
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 50c37936943..a8c279828fb 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -22,9 +22,7 @@
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
+import java.util.*;
 import java.util.concurrent.ExecutorService;
 
 import jdk.incubator.vector.DoubleVector;
@@ -71,578 +69,659 @@
 import shaded.parquet.it.unimi.dsi.fastutil.ints.IntArrayList;
 import shaded.parquet.it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
 
-import java.util.Map;
 import java.util.HashMap;
-import java.util.Stack;
 
 /**
  * Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC) whose
- * mapping vector is additionally lzw compressed.
- * Idea:
- * - DDCLZW stores the mapping vector exclusively in compressed form.
- * - No persistent MapToData cache is maintained.
- * - Sequential operations decode on-the-fly, while operations requiring random access explicitly materialize and fall back to DDC.
+ * mapping vector is additionally lzw compressed. Idea: - DDCLZW stores the mapping vector exclusively in compressed
+ * form. - No persistent MapToData cache is maintained. - Sequential operations decode on-the-fly, while operations
+ * requiring random access explicitly materialize and fall back to DDC.
  */
 public class ColGroupDDCLZW extends APreAgg implements IMapToDataGroup {
-    private static final long serialVersionUID = -5769772089913918987L;
-
-    private final int[] _dataLZW; // LZW compressed representation of the mapping
-    private final int _nRows; // Number of rows in the mapping vector
-    private final int _nUnique; // Number of unique values in the mapping vector
-
-    // Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
-    private static long packKey(int prefixCode, int nextSymbol) {
-        return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
-    }
-
-    // Compresses a mapping (AMapToData) into an LZW-compressed byte/integer/? array. (TODO)
-    private static int[] compress(AMapToData data) {
-        if (data == null)
-            throw new IllegalArgumentException("Invalid input: data is null");
-
-        final int nRows = data.size();
-        if (nRows <= 0) {
-            throw new IllegalArgumentException("Invalid input: data has no rows");
-        }
-
-        final int nUnique = data.getUnique();
-        if (nUnique <= 0) {
-            throw new IllegalArgumentException("Invalid input: data has no unique values");
-        }
-
-        // Fast-path: single symbol
-        if (nRows == 1)
-            return new int[]{data.getIndex(0)};
-
-
-        // LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
-        // Using fastutil keeps lookups fast. (TODO improve time/space complexity)
-        final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
-        dict.defaultReturnValue(-1);
-
-        // Output buffer (heuristic capacity; avoids frequent reallocs)
-        final IntArrayList out = new IntArrayList(Math.max(16, nRows / 2));
-
-        // Codes {0,...,nUnique - 1} are reserved for the original symbols.
-        int nextCode = nUnique;
-
-        // Initialize w with the first input symbol.
-        // AMapToData stores dictionary indices, not actual data values.
-        // Since indices reference positions in an IDictionary, they are always in the valid index range 0 … nUnique−1;
-        int w = data.getIndex(0);
-
-        // Process the remaining input symbols.
-        // Example: _data = [2,0,2,3,0,2,1,0,2].
-        for (int i = 1; i < nRows; i++) {
-            final int k = data.getIndex(i); // next input symbol
-
-            if (k < 0 || k >= nUnique)
-                throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
-
-            final long key = packKey(w, k); // encode (w,k) into long key
-
-            int wk = dict.get(key); // look if wk exists in dict
-            if (wk != -1) {
-                w = wk; // wk exists in dict so replace w by wk and continue.
-            } else {
-                // wk does not exist in dict. output current phrase, add new phrase, restart at k
-                out.add(w);
-                dict.put(key, nextCode++);
-                w = k; // Start new phrase with k
-            }
-        }
-
-        out.add(w);
-        return out.toIntArray();
-    }
-
-    // Unpack upper 32 bits (w) of (w,k) key pair.
-    private static int unpackfirst(long key) {
-        return (int) (key >>> 32);
-    }
-
-    // Unpack lower 32 bits (k) of (w,k) key pair.
-    private static int unpacksecond(long key) {
-        return (int) (key);
-    }
-
-    // Append symbol to end of int-array.
-    private static int[] packint(int[] arr, int last) {
-        int[] result = Arrays.copyOf(arr, arr.length + 1);
-        result[arr.length] = last;
-        return result;
-    }
-
-    // Reconstruct phrase to lzw-code.
-    private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
-        // Base symbol (implicit alphabet)
-        if (code < nUnique)
-            return new int[]{code};
-
-        Stack<Integer> stack = new Stack<>();
-        int c = code;
-
-        while (c >= nUnique) {
-            Long key = dict.get(c);
-            if (key == null)
-                throw new IllegalStateException("Missing dictionary entry for code: " + c);
-
-            int symbol = unpacksecond(key);
-            stack.push(symbol);
-            c = unpackfirst(key);
-        }
-
-        // Basissymbol
-        stack.push(c);
-        int[] outarray = new int[stack.size()];
-        int i = 0;
-        // korrekt ins Output schreiben
-        while (!stack.isEmpty()) {
-            outarray[i++] = stack.pop();
-        }
-        return outarray;
-    }
-
-    // Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
-    // TODO: Compatibility with compress() and used data structures. Improve time/space complexity.
-    private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
-        // Validate input arguments.
-        if (codes == null)
-            throw new IllegalArgumentException("codes is null");
-        if (codes.length == 0)
-            throw new IllegalArgumentException("codes is empty");
-        if (nUnique <= 0)
-            throw new IllegalArgumentException("Invalid alphabet size: " + nUnique);
-        if (nRows <= 0) {
-            throw new IllegalArgumentException("Invalid nRows: " + nRows);
-        }
-        if (index > nRows) {
-            throw new IllegalArgumentException("Index is larger than Data Length: " + index);
-        }
-
-        // Return empty Map if index is zero.
-        if (index == 0)
-            return MapToFactory.create(0, nUnique);
-
-        // Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
-        // Base symbols (0..nUnique-1) are implicit and not stored here.
-        final Map<Integer, Long> dict = new HashMap<>();
-
-        // Output mapping that will be reconstructed.
-        AMapToData out = MapToFactory.create(index, nUnique);
-        int outPos = 0; // Current write position in the output mapping.
-
-        // Decode the first code. The first code always expands to a valid phrase without needing
-        // any dictionary entries.
-        int old = codes[0];
-        int[] oldPhrase = unpack(old, nUnique, dict);
-
-        for (int v : oldPhrase) {
-            if (outPos == index) break;
-            out.set(outPos++, v);
-        }
-
-        // Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
-        int nextCode = nUnique;
-
-        // Process remaining codes.
-        for (int i = 1; i < codes.length; i++) {
-            int key = codes[i];
-
-            int[] next;
-            if (key < nUnique || dict.containsKey(key)) {
-                // Normal case: The code is either a base symbol or already present in the dictionary.
-                next = unpack(key, nUnique, dict);
-            } else {
-                // KwKwK special case: The current code refers to a phrase that is being defined right now.
-                // next = oldPhrase + first(oldPhrase).
-                int first = oldPhrase[0];
-                next = packint(oldPhrase, first);
-            }
-
-            // Append the reconstructed phrase to the output mapping.
-            for (int v : next) {
-                out.set(outPos++, v);
-                if (outPos == index)
-                    // Stop immediately once done.
-                    return out;
-            }
-
-            // Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
-            final int first = next[0];
-            dict.put(nextCode++, packKey(old, first));
-
-            // Advance.
-            old = key;
-            oldPhrase = next;
-        }
-
-        // Safety check: decoder must produce exactly nRows symbols.
-        if (outPos != index)
-            throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + index);
-
-        // Return the reconstructed mapping.
-        return out;
-    }
-
-    // Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
-    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        super(colIndexes, dict, cachedCounts);
-
-        // Derive metadadata
-        _nRows = data.size();
-        _nUnique = dict.getNumberOfValues(colIndexes.size());
-
-        // Compress mapping to LZW
-        _dataLZW = compress(data);
-
-        if (CompressedMatrixBlock.debug) {
-            if (getNumValues() == 0)
-                throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (_nRows == 0)
-                throw new DMLCompressionException("Invalid length of the data. is zero");
-            if (data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + data.getUnique() + " while dict has "
-                        + dict.getNumberOfValues(colIndexes.size()));
-            int[] c = getCounts();
-            if (c.length != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid DDC Construction");
-            data.verify();
-        }
-    }
-
-    // Read Constructor: Used when creating this group from a serialized form (e.g., reading a compressed matrix from disk/memory stream). (TODO)
-    private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, int nRows, int nUnique, int[] cachedCounts) {
-        super(colIndexes, dict, cachedCounts);
-
-        _dataLZW = dataLZW;
-        _nRows = nRows;
-        _nUnique = nUnique;
-
-        if (CompressedMatrixBlock.debug) {
-            if (getNumValues() == 0)
-                throw new DMLCompressionException("Invalid construction with empty dictionary");
-            if (_nRows <= 0)
-                throw new DMLCompressionException("Invalid length of the data. is zero");
-            if (_nUnique != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has "
-                        + dict.getNumberOfValues(colIndexes.size()));
-            int[] c = getCounts();
-            if (c.length != dict.getNumberOfValues(colIndexes.size()))
-                throw new DMLCompressionException("Invalid DDC Construction");
-
-            // Optional: validate that decoding works (expensive)
-            // AMapToData decoded = decode(_dataLZW, _nRows, _nUnique);
-            // decoded.verify();
-        }
-    }
-
-    // Factory method for creating a column group. (AColGroup g = ColGroupDDCLZW.create(...);)
-    public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
-        if (dict == null)
-            return new ColGroupEmpty(colIndexes);
-        else if (data.getUnique() == 1)
-            return ColGroupConst.create(colIndexes, dict);
-        else
-            return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
-    }
-
-    /*
-     * TODO: Operations with complex access patterns shall be uncompressed to ddc format.
-     *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
-     *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
-     * */
-
-    public AColGroup convertToDDC() {
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, _nRows);
-        final int[] counts = getCounts(); // may be null depending on your group
-        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
-    }
-
-    public AColGroup convertToDDC(int index) {
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, index);
-        final int[] counts = getCounts(); // may be null depending on your group
-        return ColGroupDDC.create(_colIndexes, _dict, map, counts);
-    }
-
-    // Deserialize ColGroupDDCLZW object in binary stream.
-    public static ColGroupDDCLZW read(DataInput in) throws IOException {
-        final IColIndex colIndexes = ColIndexFactory.read(in);
-        final IDictionary dict = DictionaryFactory.read(in);
-
-        // Metadata for lzw mapping.
-        final int nRows = in.readInt();
-        final int nUnique = in.readInt();
-
-        // Read compressed mapping array.
-        final int len = in.readInt();
-        if (len < 0)
-            throw new IOException("Invalid LZW data length: " + len);
-
-        final int[] dataLZW = new int[len];
-        for (int i = 0; i < len; i++)
-            dataLZW[i] = in.readInt();
-
-        // cachedCounts currently not serialized (mirror ColGroupDDC.read which passes null)
-        return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
-    }
-
-    // Serialize a ColGroupDDC-object into binary stream.
-    @Override
-    public void write(DataOutput out) throws IOException {
-        _colIndexes.write(out);
-        _dict.write(out);
-        out.writeInt(_nRows);
-        out.writeInt(_nUnique);
-        out.writeInt(_dataLZW.length);
-        for (int i : _dataLZW) out.writeInt(i);
-    }
-
-    @Override
-    public double getIdx(int r, int colIdx) {
-        // TODO: soll schnell sein
-        final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
-        // TODO: ColumnIndex
-        return map.getIndex(r);
-    }
-
-    @Override
-    public CompressionType getCompType() {
-        return CompressionType.DDCLZW;
-    }
-
-    @Override
-    protected ColGroupType getColGroupType() {
-        return ColGroupType.DDCLZW;
-    }
-
-    @Override
-    public boolean containsValue(double pattern) {
-        return _dict.containsValue(pattern);
-    }
-
-    @Override
-    public double getCost(ComputationCostEstimator e, int nRows) {
-        final int nVals = getNumValues();
-        final int nCols = getNumCols();
-        return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
-    }
-
-    @Override
-    public ICLAScheme getCompressionScheme() {
-        //TODO: in ColGroupDDCFor nicht implementiert - sollen wir das erstellen? Inhalt: ncols wie DDC
-        throw new NotImplementedException();
-    }
-
-    @Override
-    protected int numRowsToMultiply() {
-        return _nRows;
-    }
-
-    @Override
-    protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
-        return new ColGroupDDCLZW(colIndexes, newDictionary, _dataLZW, _nRows, _nUnique, getCachedCounts());
-    }
-
-    @Override
-    public AMapToData getMapToData() {
-        throw new NotImplementedException(); // or decompress and return data... decompress(_dataLZW, _nUnique, _nRows, _nRows)
-    }
-
-    @Override
-    public boolean sameIndexStructure(AColGroupCompressed that) {
-        return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == _dataLZW;
-    }
-
-    @Override
-    protected double computeMxx(double c, Builtin builtin) {
-        return _dict.aggregate(c, builtin);
-    }
-
-    @Override
-    protected void computeColMxx(double[] c, Builtin builtin) {
-        _dict.aggregateCols(c, builtin, _colIndexes);
-    }
-
-    @Override
-    public AColGroup sliceRows(int rl, int ru) {
-        try {
-            AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
-            return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
-        } catch (Exception e) {
-            throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
-        }
-    }
-
-
-    @Override
-    protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
-
-    }
-
-    @Override
-    protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict, int nColOut) {
-
-    }
-
-    @Override
-    protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
-
-    }
-
-    @Override
-    protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, SparseBlock sb) {
-
-    }
-
-    @Override
-    protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) {
+	private static final long serialVersionUID = -5769772089913918987L;
+
+	private final int[] _dataLZW; // LZW compressed representation of the mapping
+	private final int _nRows; // Number of rows in the mapping vector
+	private final int _nUnique; // Number of unique values in the mapping vector
+
+	// Builds a packed 64-bit key for (prefixCode(w), nextSymbol(k)) pairs used in the LZW dictionary. (TODO)
+	private static long packKey(int prefixCode, int nextSymbol) {
+		return (((long) prefixCode) << 32) | (nextSymbol & 0xffffffffL);
+	}
+
+	// Compresses a mapping (AMapToData) into an LZW-compressed byte/integer/? array.
+	private static int[] compress(AMapToData data) {
+		if(data == null)
+			throw new IllegalArgumentException("Invalid input: data is null");
+
+		final int nRows = data.size();
+		if(nRows <= 0) {
+			throw new IllegalArgumentException("Invalid input: data has no rows");
+		}
+
+		final int nUnique = data.getUnique();
+		if(nUnique <= 0) {
+			throw new IllegalArgumentException("Invalid input: data has no unique values");
+		}
+
+		// Fast-path: single symbol
+		if(nRows == 1)
+			return new int[] {data.getIndex(0)};
+
+		// LZW dictionary. Maps (prefixCode, nextSymbol) -> newCode (to a new code).
+		// Using fastutil keeps lookups fast. (TODO improve time/space complexity)
+		final Long2IntLinkedOpenHashMap dict = new Long2IntLinkedOpenHashMap(1 << 16);
+		dict.defaultReturnValue(-1);
+
+		// Output buffer (heuristic capacity; avoids frequent reallocs)
+		final IntArrayList out = new IntArrayList(Math.max(16, nRows / 2));
+
+		// Codes {0,...,nUnique - 1} are reserved for the original symbols.
+		int nextCode = nUnique;
+
+		// Initialize w with the first input symbol.
+		// AMapToData stores dictionary indices, not actual data values.
+		// Since indices reference positions in an IDictionary, they are always in the valid index range 0 … nUnique−1;
+		int w = data.getIndex(0);
+
+		// Process the remaining input symbols.
+		// Example: _data = [2,0,2,3,0,2,1,0,2].
+		for(int i = 1; i < nRows; i++) {
+			final int k = data.getIndex(i); // next input symbol
+
+			if(k < 0 || k >= nUnique)
+				throw new IllegalArgumentException("Symbol out of range: " + k + " (nUnique=" + nUnique + ")");
+
+			final long key = packKey(w, k); // encode (w,k) into long key
+
+			int wk = dict.get(key); // look if wk exists in dict
+			if(wk != -1) {
+				w = wk; // wk exists in dict so replace w by wk and continue.
+			}
+			else {
+				// wk does not exist in dict. output current phrase, add new phrase, restart at k
+				out.add(w);
+				dict.put(key, nextCode++);
+				w = k; // Start new phrase with k
+			}
+		}
+
+		out.add(w);
+		return out.toIntArray();
+	}
+
+	// Unpack upper 32 bits (w) of (w,k) key pair.
+	private static int unpackfirst(long key) {
+		return (int) (key >>> 32);
+	}
+
+	// Unpack lower 32 bits (k) of (w,k) key pair.
+	private static int unpacksecond(long key) {
+		return (int) (key);
+	}
+
+	// Append symbol to end of int-array.
+	private static int[] packint(int[] arr, int last) {
+		int[] result = Arrays.copyOf(arr, arr.length + 1);
+		result[arr.length] = last;
+		return result;
+	}
+
+	// Reconstruct phrase to lzw-code.
+	private static int[] unpack(int code, int nUnique, Map<Integer, Long> dict) {
+		// Base symbol (implicit alphabet)
+		if(code < nUnique)
+			return new int[] {code};
+
+		Stack<Integer> stack = new Stack<>();
+		int c = code;
+
+		while(c >= nUnique) {
+			Long key = dict.get(c);
+			if(key == null)
+				throw new IllegalStateException("Missing dictionary entry for code: " + c);
+
+			int symbol = unpacksecond(key);
+			stack.push(symbol);
+			c = unpackfirst(key);
+		}
+
+		// Basissymbol
+		stack.push(c);
+		int[] outarray = new int[stack.size()];
+		int i = 0;
+		// korrekt ins Output schreiben
+		while(!stack.isEmpty()) {
+			outarray[i++] = stack.pop();
+		}
+		return outarray;
+	}
+
+	// Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
+	private static AMapToData decompressFull(int[] codes, int nUnique, int nRows) {
+		return decompress(codes, nUnique, nRows, nRows);
+	}
+
+	private final class LZWMappingIterator {
+		private final Map<Integer, Long> dict = new HashMap<>(); // LZW-dictionary. Maps code -> (prefixCode, nextSymbol).
+		private int lzwIndex = 0; // Current position in the LZW-compressed mapping (_dataLZW).
+		private int mapIndex = 0; // Number of mapping symbols returned so far.
+		private int nextCode = _nUnique; // Next free LZW code.
+		private int[] currentPhrase = null; // Current phrase being decoded from the LZW-compressed mapping.
+		private int currentPhraseIndex = 0; // Next position in the current phrase to return.
+		private int[] oldPhrase = null; // Previous phrase.
+		private int oldCode = -1; // Previous code.
+
+		LZWMappingIterator() {
+			lzwIndex = 1; // First code consumed during initialization.
+			oldCode = _dataLZW[0]; // Decode the first code into initial phrase.
+			oldPhrase = unpack(oldCode, _nUnique, dict);
+			currentPhrase = oldPhrase;
+			currentPhraseIndex = 0;
+			mapIndex = 0; // No mapping symbols have been returned yet.
+		}
+
+		// True if there are more mapping symbols to decode.
+		boolean hasNext() {
+			return mapIndex < _nRows;
+		}
+
+		int next() {
+			if(!hasNext())
+				throw new NoSuchElementException();
+
+			// If the current phrase still has symbols, return the next symbol from it.
+			if(currentPhraseIndex < currentPhrase.length) {
+				mapIndex++;
+				return currentPhrase[currentPhraseIndex++];
+			}
+
+			// Otherwises decode the next code into a new phrase.
+			if(lzwIndex >= _dataLZW.length)
+				throw new IllegalStateException("Invalid LZW index: " + lzwIndex);
+
+			final int key = _dataLZW[lzwIndex++];
+
+			final int[] next;
+			if(key < _nUnique || dict.containsKey(key)) {
+				next = unpack(key, _nUnique,
+					dict); // Normal case: The code is either a base symbol or already present in the dictionary.
+			}
+			else {
+				next = packint(oldPhrase, oldPhrase[0]); // Special case.
+			}
+
+			// Add new phrase to dictionary: nextCode -> (oldCode, firstSymbol(next)).
+			dict.put(nextCode++, packKey(oldCode, next[0]));
+
+			// Advance decoder state.
+			oldCode = key;
+			oldPhrase = next;
+
+			// Start returning symbols from the newly decoded phrase.
+			currentPhrase = next;
+			currentPhraseIndex = 0;
+			
+			mapIndex++;
+			return currentPhrase[currentPhraseIndex++];
+		}
+	}
+
+	// Decompresses an LZW-compressed vector into its pre-compressed AMapToData form until index.
+	private static AMapToData decompress(int[] codes, int nUnique, int nRows, int index) {
+		// Validate input arguments.
+		if(codes == null)
+			throw new IllegalArgumentException("codes is null");
+		if(codes.length == 0)
+			throw new IllegalArgumentException("codes is empty");
+		if(nUnique <= 0)
+			throw new IllegalArgumentException("Invalid alphabet size: " + nUnique);
+		if(nRows <= 0) {
+			throw new IllegalArgumentException("Invalid nRows: " + nRows);
+		}
+		if(index > nRows) {
+			throw new IllegalArgumentException("Index is larger than Data Length: " + index);
+		}
+
+		// Return empty Map if index is zero.
+		if(index == 0)
+			return MapToFactory.create(0, nUnique);
+
+		// Maps: code -> packKey(prefixCode, lastSymbolOfPhrase).
+		// Base symbols (0..nUnique-1) are implicit and not stored here.
+		final Map<Integer, Long> dict = new HashMap<>();
+
+		// Output mapping that will be reconstructed.
+		AMapToData out = MapToFactory.create(index, nUnique);
+		int outPos = 0; // Current write position in the output mapping.
+
+		// Decode the first code. The first code always expands to a valid phrase without needing
+		// any dictionary entries.
+		int old = codes[0];
+		int[] oldPhrase = unpack(old, nUnique, dict);
+
+		for(int v : oldPhrase) {
+			if(outPos == index)
+				break;
+			out.set(outPos++, v);
+		}
+
+		// Next free dictionary code. Codes 0..nUnique-1 are reserved for base symbols.
+		int nextCode = nUnique;
+
+		// Process remaining codes.
+		for(int i = 1; i < codes.length; i++) {
+			int key = codes[i];
+
+			int[] next;
+			if(key < nUnique || dict.containsKey(key)) {
+				// Normal case: The code is either a base symbol or already present in the dictionary.
+				next = unpack(key, nUnique, dict);
+			}
+			else {
+				// KwKwK special case: The current code refers to a phrase that is being defined right now.
+				// next = oldPhrase + first(oldPhrase).
+				int first = oldPhrase[0];
+				next = packint(oldPhrase, first);
+			}
+
+			// Append the reconstructed phrase to the output mapping.
+			for(int v : next) {
+				out.set(outPos++, v);
+				if(outPos == index)
+					// Stop immediately once done.
+					return out;
+			}
+
+			// Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).
+			final int first = next[0];
+			dict.put(nextCode++, packKey(old, first));
+
+			// Advance.
+			old = key;
+			oldPhrase = next;
+		}
+
+		// Safety check: decoder must produce exactly nRows symbols.
+		if(outPos != index)
+			throw new IllegalStateException("Decompression length mismatch: got " + outPos + " expected " + index);
+
+		// Return the reconstructed mapping.
+		return out;
+	}
+
+	// Build Constructor: Used when creating a new DDCLZW instance during compression/build time. (TODO)
+	private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		super(colIndexes, dict, cachedCounts);
+
+		// Derive metadadata
+		_nRows = data.size();
+		_nUnique = dict.getNumberOfValues(colIndexes.size());
+
+		// Compress mapping to LZW
+		_dataLZW = compress(data);
+
+		if(CompressedMatrixBlock.debug) {
+			if(getNumValues() == 0)
+				throw new DMLCompressionException("Invalid construction with empty dictionary");
+			if(_nRows == 0)
+				throw new DMLCompressionException("Invalid length of the data. is zero");
+			if(data.getUnique() != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException(
+					"Invalid map to dict Map has:" + data.getUnique() + " while dict has " +
+						dict.getNumberOfValues(colIndexes.size()));
+			int[] c = getCounts();
+			if(c.length != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid DDC Construction");
+			data.verify();
+		}
+	}
+
+	// Read Constructor: Used when creating this group from a serialized form (e.g., reading a compressed matrix from disk/memory stream). (TODO)
+	private ColGroupDDCLZW(IColIndex colIndexes, IDictionary dict, int[] dataLZW, int nRows, int nUnique,
+		int[] cachedCounts) {
+		super(colIndexes, dict, cachedCounts);
+
+		_dataLZW = dataLZW;
+		_nRows = nRows;
+		_nUnique = nUnique;
+
+		if(CompressedMatrixBlock.debug) {
+			if(getNumValues() == 0)
+				throw new DMLCompressionException("Invalid construction with empty dictionary");
+			if(_nRows <= 0)
+				throw new DMLCompressionException("Invalid length of the data. is zero");
+			if(_nUnique != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid map to dict Map has:" + _nUnique + " while dict has " +
+					dict.getNumberOfValues(colIndexes.size()));
+			int[] c = getCounts();
+			if(c.length != dict.getNumberOfValues(colIndexes.size()))
+				throw new DMLCompressionException("Invalid DDC Construction");
+		}
+	}
+
+	// Factory method for creating a column group. (AColGroup g = ColGroupDDCLZW.create(...);)
+	public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
+		if(dict == null)
+			return new ColGroupEmpty(colIndexes);
+		else if(data.getUnique() == 1)
+			return ColGroupConst.create(colIndexes, dict);
+		else
+			return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
+	}
+
+	/*
+	 * TODO: Operations with complex access patterns shall be uncompressed to ddc format.
+	 *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
+	 *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
+	 * */
+
+	public AColGroup convertToDDC() {
+		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, _nRows);
+		final int[] counts = getCounts(); // may be null depending on your group
+		return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+	}
+
+	public AColGroup convertToDDC(int index) {
+		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, index);
+		final int[] counts = getCounts(); // may be null depending on your group
+		return ColGroupDDC.create(_colIndexes, _dict, map, counts);
+	}
+
+	// Deserialize ColGroupDDCLZW object in binary stream.
+	public static ColGroupDDCLZW read(DataInput in) throws IOException {
+		final IColIndex colIndexes = ColIndexFactory.read(in);
+		final IDictionary dict = DictionaryFactory.read(in);
+
+		// Metadata for lzw mapping.
+		final int nRows = in.readInt();
+		final int nUnique = in.readInt();
+
+		// Read compressed mapping array.
+		final int len = in.readInt();
+		if(len < 0)
+			throw new IOException("Invalid LZW data length: " + len);
+
+		final int[] dataLZW = new int[len];
+		for(int i = 0; i < len; i++)
+			dataLZW[i] = in.readInt();
+
+		// cachedCounts currently not serialized (mirror ColGroupDDC.read which passes null)
+		return new ColGroupDDCLZW(colIndexes, dict, dataLZW, nRows, nUnique, null);
+	}
+
+	// Serialize a ColGroupDDC-object into binary stream.
+	@Override
+	public void write(DataOutput out) throws IOException {
+		_colIndexes.write(out);
+		_dict.write(out);
+		out.writeInt(_nRows);
+		out.writeInt(_nUnique);
+		out.writeInt(_dataLZW.length); // TODO: correct ?
+		for(int i : _dataLZW)
+			out.writeInt(i);
+	}
+
+	@Override
+	public double getIdx(int r, int colIdx) {
+		// TODO: soll schnell sein
+		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
+		// TODO: ColumnIndex
+		return map.getIndex(r);
+	}
+
+	@Override
+	public CompressionType getCompType() {
+		return CompressionType.DDCLZW;
+	}
+
+	@Override
+	protected ColGroupType getColGroupType() {
+		return ColGroupType.DDCLZW;
+	}
+
+	@Override
+	public boolean containsValue(double pattern) {
+		return _dict.containsValue(pattern);
+	}
+
+	@Override
+	public double getCost(ComputationCostEstimator e, int nRows) {
+		final int nVals = getNumValues();
+		final int nCols = getNumCols();
+		return e.getCost(nRows, nRows, nCols, nVals, _dict.getSparsity());
+	}
+
+	@Override
+	public ICLAScheme getCompressionScheme() {
+		//TODO: in ColGroupDDCFor nicht implementiert - sollen wir das erstellen? Inhalt: ncols wie DDC
+		throw new NotImplementedException();
+	}
+
+	@Override
+	protected int numRowsToMultiply() {
+		return _nRows;
+	}
+
+	@Override
+	protected AColGroup copyAndSet(IColIndex colIndexes, IDictionary newDictionary) {
+		return new ColGroupDDCLZW(colIndexes, newDictionary, _dataLZW, _nRows, _nUnique, getCachedCounts());
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = super.getExactSizeOnDisk();
+		ret += 4; // _nRows size
+		ret += 4; // _nUnique size
+		ret += 4; // dataLZW.length
+		ret += (long) _dataLZW.length * 4; //lzw codes
+		return ret;
+	}
+
+	@Override
+	public AMapToData getMapToData() {
+		throw new NotImplementedException(); // or decompress and return data... decompress(_dataLZW, _nUnique, _nRows, _nRows)
+	}
+
+	@Override
+	public boolean sameIndexStructure(AColGroupCompressed that) {
+		return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == _dataLZW;
+	}
+
+	@Override
+	protected double computeMxx(double c, Builtin builtin) {
+		return _dict.aggregate(c, builtin);
+	}
+
+	@Override
+	protected void computeColMxx(double[] c, Builtin builtin) {
+		_dict.aggregateCols(c, builtin, _colIndexes);
+	}
+
+	@Override
+	public AColGroup sliceRows(int rl, int ru) {
+		try {
+			AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
+			return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
+		}
+		catch(Exception e) {
+			throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
+		}
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
+
+	}
+
+	@Override
+	protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
+
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict,
+		int nColOut) {
+
+	}
+
+	@Override
+	protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
+
+	}
+
+	@Override
+	protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
+
+	}
+
+	@Override
+	protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
+		double[] values) {
 
-    }
+	}
 
-    @Override
-    protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, SparseBlock sb) {
+	@Override
+	protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		SparseBlock sb) {
 
-    }
+	}
 
-    @Override
-    protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, double[] values) {
+	@Override
+	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
+		double[] values) {
 
-    }
+	}
 
-    @Override
-    public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
+	@Override
+	public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    public AColGroup scalarOperation(ScalarOperator op) {
-        return null;
-    }
+	@Override
+	public AColGroup scalarOperation(ScalarOperator op) {
+		return null;
+	}
 
-    @Override
-    public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-        return null;
-    }
+	@Override
+	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+		return null;
+	}
 
-    @Override
-    public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-        return null;
-    }
+	@Override
+	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+		return null;
+	}
 
-    @Override
-    public AColGroup unaryOperation(UnaryOperator op) {
-        return null;
-    }
+	@Override
+	public AColGroup unaryOperation(UnaryOperator op) {
+		return null;
+	}
 
-    @Override
-    public AColGroup append(AColGroup g) {
-        return null;
-    }
+	@Override
+	public AColGroup append(AColGroup g) {
+		return null;
+	}
 
-    @Override
-    protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
-        return null;
-    }
+	@Override
+	protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
+		return null;
+	}
 
-    @Override
-    public AColGroup recompress() {
-        return null;
-    }
+	@Override
+	public AColGroup recompress() {
+		return null;
+	}
 
-    @Override
-    public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-        return null;
-    }
+	@Override
+	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
+		return null;
+	}
 
-    @Override
-    protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-        return null;
-    }
+	@Override
+	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
+		return null;
+	}
 
-    @Override
-    protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+	@Override
+	protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
 
-    }
+	}
 
-    @Override
-    protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
+	@Override
+	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
 
-    }
+	}
 
-    @Override
-    public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-        return new AColGroup[0];
-    }
+	@Override
+	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
+		return new AColGroup[0];
+	}
 
-    @Override
-    protected boolean allowShallowIdentityRightMult() {
-        return false;
-    }
+	@Override
+	protected boolean allowShallowIdentityRightMult() {
+		return false;
+	}
 
-    @Override
-    protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-        return null;
-    }
+	@Override
+	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
+		return null;
+	}
 
-    @Override
-    public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
+	@Override
+	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
+	@Override
+	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
+	@Override
+	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
 
-    }
+	}
 
-    @Override
-    public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
+	@Override
+	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
 
-    }
+	}
 
-    @Override
-    protected int[] getCounts(int[] out) {
-        return new int[0];
-    }
+	@Override
+	protected int[] getCounts(int[] out) {
+		return new int[0]; // If returns exeption test wont work.
+	}
 
-    @Override
-    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+	@Override
+	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
 
-    }
+	}
 
-    @Override
-    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+	@Override
+	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
 
-    }
+	}
 
-    @Override
-    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+	@Override
+	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
 
-    }
+	}
 }
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
new file mode 100644
index 00000000000..dfc83673a90
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.colgroup;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.EnumSet;
+
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupIO;
+import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
+import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
+import org.apache.sysds.runtime.compress.estim.ComEstExact;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
+import org.apache.sysds.runtime.functionobjects.Builtin;
+import org.apache.sysds.runtime.functionobjects.Divide;
+import org.apache.sysds.runtime.functionobjects.Equals;
+import org.apache.sysds.runtime.functionobjects.Multiply;
+import org.apache.sysds.runtime.functionobjects.GreaterThan;
+import org.apache.sysds.runtime.functionobjects.Minus;
+import org.apache.sysds.runtime.functionobjects.Plus;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
+import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
+import org.apache.sysds.runtime.util.DataConverter;
+import org.junit.Test;
+
+public class ColGroupDDCLZWTest {
+	protected static final Log LOG = LogFactory.getLog(ColGroupDDCLZWTest.class.getName());
+
+	// TODO: use csb instead of create.
+	/*CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0)
+		.setValidCompressions(EnumSet.of(AColGroup.CompressionType.DDCLZW))
+		.setTransposeInput("false");
+	CompressionSettings cs = csb.create();
+
+	final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes);
+	CompressedSizeInfo csi = new CompressedSizeInfo(cgi);
+	AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0);*/
+	
+}
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index 4f02ce97ae7..dd06226e093 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -36,272 +36,265 @@
 
 public class ColGroupDDCTest {
 
-    protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
-
-    @Test
-    public void testConvertToDDCLZWBasic() {
-        // TODO: neue Methode zum Vergleich
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
-        Dictionary dict = Dictionary.create(dictValues);
-
-        int[] src = new int[]{
-                // repeating base pattern
-                0,0,2, 0, 2, 1, 0, 2, 1, 0, 2,
-                2, 0, 2, 1, 0, 2, 1, 0, 2,
-
-                // variation / shifted pattern
-                1, 0, 1, 2, 0, 1, 2, 0, 1,
-                1, 0, 1, 2, 0, 1, 2, 0, 1,
-
-                // longer runs (good for phrase growth)
-                2, 2, 2, 2, 2,
-                0, 0, 0, 0, 0,
-                1, 1, 1, 1, 1,
-
-                // mixed noise
-                2, 1, 0, 2, 1, 0, 2, 1, 0,
-                0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
-
-                // repeating tail (tests dictionary reuse)
-                2, 0, 2, 1, 0, 2, 1, 0, 2,
-                2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1
-        };
-
-        final int nRows = src.length;
-        final int nUnique = 3;
-        AMapToData data = MapToFactory.create(nRows, nUnique);
-        for (int i = 0; i < nRows; i++)
-            data.set(i, src[i]);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDDCLZW();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDDCLZW);
-
-        ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
-        AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
-
-        assertNotNull(ddclzwDecompressed);
-        assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
-
-        ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
-
-        AMapToData d1 = ddc.getMapToData();
-        AMapToData d2 = ddc2.getMapToData();
-
-        assertEquals(d1.size(), d2.size());
-        assertEquals(d1.getUnique(), d2.getUnique());
-        for (int i = 0; i < d1.size(); i++)
-            assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
-
-        assertEquals(ddc.getColIndices(), ddc2.getColIndices());
-
-        // Testen der Teildekompression:
-        // Index entspricht der Anzahl der Zeichen, die dekodiert werden sollen (0 bis Index-1)
-        int index = 10;
-        ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
-
-        AMapToData d3 = ddcIndex.getMapToData();
-        assertEquals(index, d3.size());
-        assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
-
-        for(int i = 0; i < index; i++){
-            assertEquals(d1.getIndex(i), d3.getIndex(i));
-        }
-
-        // Testen von SliceRows
-        int low = 3;
-        int high = 10;
-        AColGroup slice = ddclzw.sliceRows(low, high);
-        if(slice instanceof ColGroupDDCLZW ddclzwslice){
-            ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
-            ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
-
-            AMapToData d4 = ddcSlice.getMapToData();
-            AMapToData d5 = ddcSlice2.getMapToData();
-
-            assertEquals(d5.size(), d4.size());
-            assertEquals(d5.getUnique(), d4.getUnique());
-
-            for (int i = 0; i < d4.size(); i++)
-                assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
-        }
-
-    }
-
-    @Test
-    public void testConvertToDeltaDDCBasic() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(10.0, mb.get(0, 0), 0.0);
-        assertEquals(20.0, mb.get(0, 1), 0.0);
-        assertEquals(11.0, mb.get(1, 0), 0.0);
-        assertEquals(21.0, mb.get(1, 1), 0.0);
-        assertEquals(12.0, mb.get(2, 0), 0.0);
-        assertEquals(22.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCSingleColumn() {
-        IColIndex colIndexes = ColIndexFactory.create(1);
-        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(5, 5);
-        for (int i = 0; i < 5; i++)
-            data.set(i, i);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(5, 1, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
-
-        assertEquals(1.0, mb.get(0, 0), 0.0);
-        assertEquals(2.0, mb.get(1, 0), 0.0);
-        assertEquals(3.0, mb.get(2, 0), 0.0);
-        assertEquals(4.0, mb.get(3, 0), 0.0);
-        assertEquals(5.0, mb.get(4, 0), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCWithRepeatedValues() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(10.0, mb.get(0, 0), 0.0);
-        assertEquals(20.0, mb.get(0, 1), 0.0);
-        assertEquals(10.0, mb.get(1, 0), 0.0);
-        assertEquals(20.0, mb.get(1, 1), 0.0);
-        assertEquals(10.0, mb.get(2, 0), 0.0);
-        assertEquals(20.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCWithNegativeDeltas() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(10.0, mb.get(0, 0), 0.0);
-        assertEquals(20.0, mb.get(0, 1), 0.0);
-        assertEquals(8.0, mb.get(1, 0), 0.0);
-        assertEquals(15.0, mb.get(1, 1), 0.0);
-        assertEquals(12.0, mb.get(2, 0), 0.0);
-        assertEquals(25.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCWithZeroDeltas() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(3, 3);
-        data.set(0, 0);
-        data.set(1, 1);
-        data.set(2, 2);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(3, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
-
-        assertEquals(5.0, mb.get(0, 0), 0.0);
-        assertEquals(0.0, mb.get(0, 1), 0.0);
-        assertEquals(5.0, mb.get(1, 0), 0.0);
-        assertEquals(0.0, mb.get(1, 1), 0.0);
-        assertEquals(0.0, mb.get(2, 0), 0.0);
-        assertEquals(5.0, mb.get(2, 1), 0.0);
-    }
-
-    @Test
-    public void testConvertToDeltaDDCMultipleUniqueDeltas() {
-        IColIndex colIndexes = ColIndexFactory.create(2);
-        double[] dictValues = new double[]{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
-        Dictionary dict = Dictionary.create(dictValues);
-        AMapToData data = MapToFactory.create(4, 4);
-        for (int i = 0; i < 4; i++)
-            data.set(i, i);
-
-        ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-        AColGroup result = ddc.convertToDeltaDDC();
-
-        assertNotNull(result);
-        assertTrue(result instanceof ColGroupDeltaDDC);
-        ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
-
-        MatrixBlock mb = new MatrixBlock(4, 2, false);
-        mb.allocateDenseBlock();
-        deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
-
-        assertEquals(1.0, mb.get(0, 0), 0.0);
-        assertEquals(2.0, mb.get(0, 1), 0.0);
-        assertEquals(3.0, mb.get(1, 0), 0.0);
-        assertEquals(4.0, mb.get(1, 1), 0.0);
-        assertEquals(5.0, mb.get(2, 0), 0.0);
-        assertEquals(6.0, mb.get(2, 1), 0.0);
-        assertEquals(7.0, mb.get(3, 0), 0.0);
-        assertEquals(8.0, mb.get(3, 1), 0.0);
-    }
+	protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
+
+	@Test
+	public void testConvertToDDCLZWBasic() {
+		// TODO: neue Methode zum Vergleich
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+		Dictionary dict = Dictionary.create(dictValues);
+
+		int[] src = new int[] {
+			// repeating base pattern
+			0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
+
+			// variation / shifted pattern
+			1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
+
+			// longer runs (good for phrase growth)
+			2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+
+			// mixed noise
+			2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
+
+			// repeating tail (tests dictionary reuse)
+			2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1};
+
+		final int nRows = src.length;
+		final int nUnique = 3;
+		AMapToData data = MapToFactory.create(nRows, nUnique);
+		for(int i = 0; i < nRows; i++)
+			data.set(i, src[i]);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDDCLZW();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDDCLZW);
+
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
+		AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
+
+		assertNotNull(ddclzwDecompressed);
+		assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
+
+		ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
+
+		AMapToData d1 = ddc.getMapToData();
+		AMapToData d2 = ddc2.getMapToData();
+
+		assertEquals(d1.size(), d2.size());
+		assertEquals(d1.getUnique(), d2.getUnique());
+		for(int i = 0; i < d1.size(); i++)
+			assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
+
+		assertEquals(ddc.getColIndices(), ddc2.getColIndices());
+
+		// Testen der Teildekompression:
+		// Index entspricht der Anzahl der Zeichen, die dekodiert werden sollen (0 bis Index-1)
+		int index = 10;
+		ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
+
+		AMapToData d3 = ddcIndex.getMapToData();
+		assertEquals(index, d3.size());
+		assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
+
+		for(int i = 0; i < index; i++) {
+			assertEquals(d1.getIndex(i), d3.getIndex(i));
+		}
+
+		// Testen von SliceRows
+		int low = 3;
+		int high = 10;
+		AColGroup slice = ddclzw.sliceRows(low, high);
+		if(slice instanceof ColGroupDDCLZW ddclzwslice) {
+			ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
+			ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
+
+			AMapToData d4 = ddcSlice.getMapToData();
+			AMapToData d5 = ddcSlice2.getMapToData();
+
+			assertEquals(d5.size(), d4.size());
+			assertEquals(d5.getUnique(), d4.getUnique());
+
+			for(int i = 0; i < d4.size(); i++)
+				assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
+		}
+
+	}
+
+	@Test
+	public void testConvertToDeltaDDCBasic() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(10.0, mb.get(0, 0), 0.0);
+		assertEquals(20.0, mb.get(0, 1), 0.0);
+		assertEquals(11.0, mb.get(1, 0), 0.0);
+		assertEquals(21.0, mb.get(1, 1), 0.0);
+		assertEquals(12.0, mb.get(2, 0), 0.0);
+		assertEquals(22.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCSingleColumn() {
+		IColIndex colIndexes = ColIndexFactory.create(1);
+		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(5, 5);
+		for(int i = 0; i < 5; i++)
+			data.set(i, i);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(5, 1, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5);
+
+		assertEquals(1.0, mb.get(0, 0), 0.0);
+		assertEquals(2.0, mb.get(1, 0), 0.0);
+		assertEquals(3.0, mb.get(2, 0), 0.0);
+		assertEquals(4.0, mb.get(3, 0), 0.0);
+		assertEquals(5.0, mb.get(4, 0), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCWithRepeatedValues() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 10.0, 20.0, 10.0, 20.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(10.0, mb.get(0, 0), 0.0);
+		assertEquals(20.0, mb.get(0, 1), 0.0);
+		assertEquals(10.0, mb.get(1, 0), 0.0);
+		assertEquals(20.0, mb.get(1, 1), 0.0);
+		assertEquals(10.0, mb.get(2, 0), 0.0);
+		assertEquals(20.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCWithNegativeDeltas() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 8.0, 15.0, 12.0, 25.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(10.0, mb.get(0, 0), 0.0);
+		assertEquals(20.0, mb.get(0, 1), 0.0);
+		assertEquals(8.0, mb.get(1, 0), 0.0);
+		assertEquals(15.0, mb.get(1, 1), 0.0);
+		assertEquals(12.0, mb.get(2, 0), 0.0);
+		assertEquals(25.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCWithZeroDeltas() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {5.0, 0.0, 5.0, 0.0, 0.0, 5.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(3, 3);
+		data.set(0, 0);
+		data.set(1, 1);
+		data.set(2, 2);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(3, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3);
+
+		assertEquals(5.0, mb.get(0, 0), 0.0);
+		assertEquals(0.0, mb.get(0, 1), 0.0);
+		assertEquals(5.0, mb.get(1, 0), 0.0);
+		assertEquals(0.0, mb.get(1, 1), 0.0);
+		assertEquals(0.0, mb.get(2, 0), 0.0);
+		assertEquals(5.0, mb.get(2, 1), 0.0);
+	}
+
+	@Test
+	public void testConvertToDeltaDDCMultipleUniqueDeltas() {
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
+		Dictionary dict = Dictionary.create(dictValues);
+		AMapToData data = MapToFactory.create(4, 4);
+		for(int i = 0; i < 4; i++)
+			data.set(i, i);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDeltaDDC();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDeltaDDC);
+		ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result;
+
+		MatrixBlock mb = new MatrixBlock(4, 2, false);
+		mb.allocateDenseBlock();
+		deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4);
+
+		assertEquals(1.0, mb.get(0, 0), 0.0);
+		assertEquals(2.0, mb.get(0, 1), 0.0);
+		assertEquals(3.0, mb.get(1, 0), 0.0);
+		assertEquals(4.0, mb.get(1, 1), 0.0);
+		assertEquals(5.0, mb.get(2, 0), 0.0);
+		assertEquals(6.0, mb.get(2, 1), 0.0);
+		assertEquals(7.0, mb.get(3, 0), 0.0);
+		assertEquals(8.0, mb.get(3, 1), 0.0);
+	}
 }
 

From 9e2cf1153c44e3464d977f63c4720af0b76f38db Mon Sep 17 00:00:00 2001
From: Annika Lehmann <anlehmannbe@gmail.com>
Date: Sat, 17 Jan 2026 21:11:56 +0100
Subject: [PATCH 19/24] [SYSTEMDS-3779] Intermediate DDCLZW Scheme

---
 .../compress/colgroup/ColGroupDDCLZW.java     |  28 +-
 .../colgroup/scheme/DDCLZWScheme.java         |  25 ++
 .../colgroup/scheme/DDCLZWSchemeMC.java       | 208 +++++++++++
 .../colgroup/scheme/DDCLZWSchemeSC.java       | 348 ++++++++++++++++++
 .../compress/colgroup/ColGroupDDCTest.java    |   4 +
 5 files changed, 602 insertions(+), 11 deletions(-)
 create mode 100644 src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWScheme.java
 create mode 100644 src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeMC.java
 create mode 100644 src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeSC.java

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index a8c279828fb..75ec28ca30a 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -45,6 +45,7 @@
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator;
 import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
+import org.apache.sysds.runtime.compress.colgroup.scheme.DDCLZWScheme;
 import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme;
 import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme;
 import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator;
@@ -467,7 +468,7 @@ public double getIdx(int r, int colIdx) {
 		// TODO: soll schnell sein
 		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
 		// TODO: ColumnIndex
-		return map.getIndex(r);
+        return _dict.getValue(map.getIndex(r), colIdx, _colIndexes.size());
 	}
 
 	@Override
@@ -495,7 +496,7 @@ public double getCost(ComputationCostEstimator e, int nRows) {
 	@Override
 	public ICLAScheme getCompressionScheme() {
 		//TODO: in ColGroupDDCFor nicht implementiert - sollen wir das erstellen? Inhalt: ncols wie DDC
-		throw new NotImplementedException();
+		return DDCLZWScheme.create(this);
 	}
 
 	@Override
@@ -709,19 +710,24 @@ protected int[] getCounts(int[] out) {
 		return new int[0]; // If returns exeption test wont work.
 	}
 
-	@Override
-	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+        AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] += preAgg[data.getIndex(rix)];
+    }
 
-	}
+    @Override
+    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
 
-	@Override
-	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+    }
 
-	}
+    @Override
+    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+        AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
+        for (int rix = rl; rix < ru; rix++)
+            c[rix] *= preAgg[data.getIndex(rix)];
 
-	@Override
-	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
 
-	}
+    }
 }
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWScheme.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWScheme.java
new file mode 100644
index 00000000000..f4bc4a023fa
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWScheme.java
@@ -0,0 +1,25 @@
+package org.apache.sysds.runtime.compress.colgroup.scheme;
+
+import org.apache.sysds.runtime.compress.colgroup.AColGroup;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDCLZW;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
+import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+
+public abstract class DDCLZWScheme extends DDCScheme {
+    // TODO: private int nUnique; Zu Datenspezifisch, überhaupt sinnvoll
+
+    protected DDCLZWScheme(IColIndex cols) {
+        super(cols);
+    }
+
+    public static DDCLZWScheme create(ColGroupDDCLZW g) {
+        return g.getNumCols() == 1 ? new DDCLZWSchemeSC(g) : new DDCLZWSchemeMC(g);
+    }
+
+    public static DDCLZWScheme create(IColIndex cols) {
+        return cols.size() == 1 ? new DDCLZWSchemeSC(cols) : new DDCLZWSchemeMC(cols);
+    }
+
+}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeMC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeMC.java
new file mode 100644
index 00000000000..4d4f6509d47
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeMC.java
@@ -0,0 +1,208 @@
+package org.apache.sysds.runtime.compress.colgroup.scheme;
+
+import org.apache.sysds.runtime.compress.DMLCompressionException;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDCLZW;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupEmpty;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
+import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
+import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
+import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
+import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
+import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection;
+import org.apache.sysds.runtime.compress.utils.ACount;
+import org.apache.sysds.runtime.compress.utils.DblArray;
+import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap;
+import org.apache.sysds.runtime.compress.utils.DoubleCountHashMap;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.data.Pair;
+
+public class DDCLZWSchemeMC extends DDCLZWScheme {
+    //private DDCSchemeMC ddcscheme;
+    private final DblArray emptyRow;
+
+    private final DblArrayCountHashMap map;
+
+    private DDCLZWSchemeMC(IColIndex cols, DblArrayCountHashMap map) {
+        super(cols);
+        this.map = map;
+        this.emptyRow = new DblArray(new double[cols.size()]);
+    }
+
+    protected DDCLZWSchemeMC(ColGroupDDCLZW g) {
+        super(g.getColIndices());
+        this.lastDict = g.getDictionary();
+        final MatrixBlock mbDict = lastDict.getMBDict(this.cols.size()).getMatrixBlock();
+        final int dictRows = mbDict.getNumRows();
+        final int dictCols = mbDict.getNumColumns();
+
+        // Read the mapping data and materialize map.
+        map = new DblArrayCountHashMap(dictRows * 2);
+        final ReaderColumnSelection r = ReaderColumnSelection.createReader(mbDict, //
+                ColIndexFactory.create(dictCols), false, 0, dictRows);
+
+        DblArray d = null;
+        while((d = r.nextRow()) != null)
+            map.increment(d);
+
+        emptyRow = new DblArray(new double[dictCols]);
+    }
+
+    protected DDCLZWSchemeMC(IColIndex cols) {
+        super(cols);
+        final int nCol = cols.size();
+        this.map = new DblArrayCountHashMap(4);
+        this.emptyRow = new DblArray(new double[nCol]);
+    }
+
+    @Override
+    protected AColGroup encodeV(MatrixBlock data, IColIndex columns) {
+        final int nRow = data.getNumRows();
+        final ReaderColumnSelection reader = ReaderColumnSelection.createReader(//
+                data, columns, false, 0, nRow);
+        return encode(data, reader, nRow, columns);
+    }
+
+    @Override
+    protected AColGroup encodeVT(MatrixBlock data, IColIndex columns) {
+        final int nRow = data.getNumColumns();
+        final ReaderColumnSelection reader = ReaderColumnSelection.createReader(//
+                data, columns, true, 0, nRow);
+        return encode(data, reader, nRow, columns);
+    }
+
+    private AColGroup encode(MatrixBlock data, ReaderColumnSelection reader, int nRow, IColIndex columns) {
+        final AMapToData d = MapToFactory.create(nRow, map.size());
+        DblArray cellVals;
+        ACount<DblArray> emptyIdx = map.getC(emptyRow);
+        if(emptyIdx == null) {
+
+            while((cellVals = reader.nextRow()) != null) {
+                final int row = reader.getCurrentRowIndex();
+
+                final int id = map.getId(cellVals);
+                d.set(row, id);
+
+            }
+        }
+        else {
+            int r = 0;
+            while((cellVals = reader.nextRow()) != null) {
+                final int row = reader.getCurrentRowIndex();
+                if(row != r) {
+                    while(r < row)
+                        d.set(r++, emptyIdx.id);
+                }
+                final int id = map.getId(cellVals);
+                d.set(row, id);
+                r++;
+            }
+            while(r < nRow)
+                d.set(r++, emptyIdx.id);
+        }
+        if(lastDict == null || lastDict.getNumberOfValues(columns.size()) != map.size())
+            lastDict = DictionaryFactory.create(map, columns.size(), false, data.getSparsity());
+        return ColGroupDDCLZW.create(columns, lastDict, d, null);
+
+    }
+
+
+    @Override
+    protected ICLAScheme updateV(MatrixBlock data, IColIndex columns) {
+        final int nRow = data.getNumRows();
+        final ReaderColumnSelection reader = ReaderColumnSelection.createReader(//
+                data, columns, false, 0, nRow);
+        return update(data, reader, nRow, columns);
+    }
+
+    private ICLAScheme update(MatrixBlock data, ReaderColumnSelection reader, int nRow, IColIndex columns) {
+        DblArray d = null;
+        int r = 0;
+        while((d = reader.nextRow()) != null) {
+            final int cr = reader.getCurrentRowIndex();
+            if(cr != r) {
+                map.increment(emptyRow, cr - r);
+                r = cr;
+            }
+            map.increment(d);
+            r++;
+        }
+        if(r < nRow)
+            map.increment(emptyRow, nRow - r - 1);
+
+        return this;
+    }
+
+
+    @Override
+    protected ICLAScheme updateVT(MatrixBlock data, IColIndex columns) {
+        final int nRow = data.getNumColumns();
+        final ReaderColumnSelection reader = ReaderColumnSelection.createReader(//
+                data, columns, true, 0, nRow);
+        return update(data, reader, nRow, columns);
+    }
+
+    @Override
+    protected Pair<ICLAScheme, AColGroup> tryUpdateAndEncodeT(MatrixBlock data, IColIndex columns) {
+        final int nRow = data.getNumColumns();
+        final ReaderColumnSelection reader = ReaderColumnSelection.createReader(//
+                data, columns, true, 0, nRow);
+        return tryUpdateAndEncode(data, reader, nRow, columns);
+    }
+
+    private Pair<ICLAScheme, AColGroup> tryUpdateAndEncode(MatrixBlock data, ReaderColumnSelection reader, int nRow,
+                                                           IColIndex columns) {
+        final AMapToData d = MapToFactory.create(nRow, map.size());
+        int max = d.getUpperBoundValue();
+
+        DblArray cellVals;
+        ACount<DblArray> emptyIdx = map.getC(emptyRow);
+        if(emptyIdx == null) {
+            while((cellVals = reader.nextRow()) != null) {
+                final int row = reader.getCurrentRowIndex();
+                final int id = map.increment(cellVals);
+                if(id > max)
+                    throw new DMLCompressionException("Failed update and encode with " + max + " possible values");
+                d.set(row, id);
+            }
+        }
+        else {
+            int r = 0;
+            while((cellVals = reader.nextRow()) != null) {
+                final int row = reader.getCurrentRowIndex();
+                if(row != r) {
+                    map.increment(emptyRow, row - r);
+                    while(r < row)
+                        d.set(r++, emptyIdx.id);
+                }
+                final int id = map.increment(cellVals);
+                if(id > max)
+                    throw new DMLCompressionException(
+                            "Failed update and encode with " + max + " possible values" + map + " " + map.size());
+                d.set(row, id);
+                r++;
+            }
+            if(r < nRow)
+
+                map.increment(emptyRow, nRow - r);
+            while(r < nRow)
+                d.set(r++, emptyIdx.id);
+        }
+        if(lastDict == null || lastDict.getNumberOfValues(columns.size()) != map.size())
+            lastDict = DictionaryFactory.create(map, columns.size(), false, data.getSparsity());
+
+        AColGroup g = ColGroupDDCLZW.create(columns, lastDict, d, null);
+        ICLAScheme s = this;
+        return new Pair<>(s, g);
+    }
+    @Override
+    public ACLAScheme clone() {
+        return new DDCLZWSchemeMC(cols, map.clone());
+    }
+
+    @Override
+    protected final Object getMap() {
+        return map;
+    }
+}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeSC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeSC.java
new file mode 100644
index 00000000000..ea0bf681810
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCLZWSchemeSC.java
@@ -0,0 +1,348 @@
+package org.apache.sysds.runtime.compress.colgroup.scheme;
+
+import org.apache.sysds.runtime.compress.DMLCompressionException;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupDDCLZW;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupEmpty;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
+import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
+import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
+import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
+import org.apache.sysds.runtime.compress.utils.DoubleCountHashMap;
+import org.apache.sysds.runtime.data.DenseBlock;
+import org.apache.sysds.runtime.data.SparseBlock;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.data.Pair;
+
+public class DDCLZWSchemeSC extends DDCLZWScheme {
+
+    // TODO: Dies ist eine Vorläufige Version - Code hauptsächlich wie in DDCSchemeSC
+    // Prüfen, ob abstrakte Vorgeschaltete Klasse möglich ist oder speichern des DDCSchemeSC als Attribut
+
+    final private DoubleCountHashMap map;
+
+    private DDCLZWSchemeSC(IColIndex cols, DoubleCountHashMap map) {
+        super(cols);
+        this.map = map;
+    }
+
+    protected DDCLZWSchemeSC(ColGroupDDCLZW g) {
+        super(g.getColIndices());
+        this.lastDict = g.getDictionary();
+        int unique = lastDict.getNumberOfValues(1);
+        map = new DoubleCountHashMap(unique);
+        for(int i = 0; i < unique; i++)
+            map.increment(lastDict.getValue(i));
+    }
+
+
+    protected DDCLZWSchemeSC(IColIndex cols) {
+        super(cols);
+        this.map = new DoubleCountHashMap(4);
+    }
+
+
+
+    @Override
+    protected AColGroup encodeV(MatrixBlock data, IColIndex columns) {
+        if(data.isEmpty())
+            return new ColGroupEmpty(columns);
+        final int nRow = data.getNumRows();
+
+        final AMapToData d = MapToFactory.create(nRow, map.size());
+
+        encode(data, d, cols.get(0));
+        if(lastDict == null || lastDict.getNumberOfValues(columns.size()) != map.size())
+            lastDict = DictionaryFactory.create(map);
+
+        return ColGroupDDCLZW.create(columns, lastDict, d, null);
+    }
+    private void encodeSparse(MatrixBlock data, AMapToData d, int col) {
+        final int nRow = data.getNumRows();
+        final SparseBlock sb = data.getSparseBlock();
+        for(int i = 0; i < nRow; i++)
+            d.set(i, map.getId(sb.get(i, col)));
+
+    }
+    private void encode(MatrixBlock data, AMapToData d, int col) {
+        if(data.isInSparseFormat())
+            encodeSparse(data, d, col);
+        else if(data.getDenseBlock().isContiguous())
+            encodeDense(data, d, col);
+        else
+            encodeGeneric(data, d, col);
+    }
+
+    private void encodeDense(final MatrixBlock data, final AMapToData d, final int col) {
+        final int nRow = data.getNumRows();
+        final double[] vals = data.getDenseBlockValues();
+        final int nCol = data.getNumColumns();
+        final int max = nRow * nCol; // guaranteed lower than intmax.
+        for(int i = 0, off = col; off < max; i++, off += nCol)
+            d.set(i, map.getId(vals[off]));
+    }
+
+    private void encodeGeneric(MatrixBlock data, AMapToData d, int col) {
+        final int nRow = data.getNumRows();
+        final DenseBlock db = data.getDenseBlock();
+        for(int i = 0; i < nRow; i++) {
+            final double[] c = db.values(i);
+            final int off = db.pos(i) + col;
+            d.set(i, map.getId(c[off]));
+        }
+    }
+
+    @Override
+    protected AColGroup encodeVT(MatrixBlock data, IColIndex columns) {
+        if(data.isEmpty())
+            return new ColGroupEmpty(columns);
+        final int nRow = data.getNumColumns();
+
+        final AMapToData d = MapToFactory.create(nRow, map.size());
+
+        encodeT(data, d, cols.get(0));
+        if(lastDict == null || lastDict.getNumberOfValues(columns.size()) != map.size())
+            lastDict = DictionaryFactory.create(map);
+
+        return ColGroupDDCLZW.create(columns, lastDict, d, null);
+    }
+
+    private void encodeT(MatrixBlock data, AMapToData d, int col) {
+        if(data.isInSparseFormat())
+            encodeSparseT(data, d, col);
+        else
+            encodeDenseT(data, d, col);
+    }
+
+    private void encodeSparseT(MatrixBlock data, AMapToData d, int col) {
+        final SparseBlock sb = data.getSparseBlock();
+        d.fill(map.getId(0.0));
+        if(!sb.isEmpty(col)) {
+            int apos = sb.pos(col);
+            final int[] aix = sb.indexes(col);
+            final int alen = sb.size(col) + apos;
+            final double[] aval = sb.values(col);
+            while(apos < alen) {
+                final double v = aval[apos];
+                final int idx = aix[apos++];
+                d.set(idx, map.getId(v));
+            }
+        }
+    }
+
+    private void encodeDenseT(MatrixBlock data, AMapToData d, int col) {
+        final DenseBlock db = data.getDenseBlock();
+        final double[] vals = db.values(col);
+        final int nCol = data.getNumColumns();
+        for(int i = 0, off = db.pos(col); i < nCol; i++, off++)
+            d.set(i, map.getId(vals[off]));
+    }
+
+    @Override
+    protected ICLAScheme updateV(MatrixBlock data, IColIndex columns) {
+        if(data.isEmpty())
+            map.increment(0.0, data.getNumRows());
+        else if(data.isInSparseFormat())
+            updateSparse(data, columns.get(0));
+        else if(data.getDenseBlock().isContiguous())
+            updateDense(data, columns.get(0));
+        else
+            updateGeneric(data, columns.get(0));
+
+        return this;
+    }
+
+    private ICLAScheme updateSparse(MatrixBlock data, int col) {
+        final int nRow = data.getNumRows();
+        final SparseBlock sb = data.getSparseBlock();
+        for(int i = 0; i < nRow; i++)
+            map.increment(sb.get(i, col));
+        return this;
+    }
+
+    private ICLAScheme updateDense(MatrixBlock data, int col) {
+
+        final int nRow = data.getNumRows();
+        final double[] vals = data.getDenseBlockValues();
+        final int nCol = data.getNumColumns();
+        final int max = nRow * nCol; // guaranteed lower than intmax.
+        for(int off = col; off < max; off += nCol)
+            map.increment(vals[off]);
+        return this;
+    }
+
+    private ICLAScheme updateGeneric(MatrixBlock data, int col) {
+        final int nRow = data.getNumRows();
+        final DenseBlock db = data.getDenseBlock();
+        for(int i = 0; i < nRow; i++) {
+            final double[] c = db.values(i);
+            final int off = db.pos(i) + col;
+            map.increment(c[off]);
+        }
+        return this;
+    }
+
+    @Override
+    protected ICLAScheme updateVT(MatrixBlock data, IColIndex columns) {
+        if(data.isEmpty())
+            map.increment(0.0, data.getNumColumns());
+        else if(data.isInSparseFormat())
+            updateSparseT(data, columns.get(0));
+        else // dense and generic can be handled together if transposed
+            updateDenseT(data, columns.get(0));
+
+        return this;
+    }
+
+    private void updateDenseT(MatrixBlock data, int col) {
+        final DenseBlock db = data.getDenseBlock();
+        final double[] vals = db.values(col);
+        final int nCol = data.getNumColumns();
+        for(int i = 0, off = db.pos(col); i < nCol; i++, off++)
+            map.increment(vals[off]);
+    }
+
+    private void updateSparseT(MatrixBlock data, int col) {
+        final SparseBlock sb = data.getSparseBlock();
+
+        if(!sb.isEmpty(col)) {
+            int apos = sb.pos(col);
+            final int alen = sb.size(col) + apos;
+            final double[] aval = sb.values(col);
+            map.increment(0.0, alen - apos);
+            while(apos < alen)
+                map.increment(aval[apos++]);
+        }
+        else
+            map.increment(0.0, data.getNumColumns());
+
+    }
+
+    @Override
+    public DDCLZWSchemeSC clone() {
+        return new DDCLZWSchemeSC(cols, map.clone());
+    }
+
+    @Override
+    protected final Object getMap() {
+        return map;
+    }
+
+    // TODO: zwingend erforderlich?
+    @Override
+    protected Pair<ICLAScheme, AColGroup> tryUpdateAndEncode(MatrixBlock data, IColIndex columns) {
+        if(data.isEmpty()) {
+            map.increment(0.0, data.getNumRows());
+            return new Pair<>(this, new ColGroupEmpty(columns));
+        }
+        final int nRow = data.getNumRows();
+
+        final AMapToData d = MapToFactory.create(nRow, map.size());
+
+        encodeAndUpdate(data, d, cols.get(0));
+        if(lastDict == null || lastDict.getNumberOfValues(columns.size()) != map.size())
+            lastDict = DictionaryFactory.create(map);
+
+        return new Pair<>(this, ColGroupDDCLZW.create(columns, lastDict, d, null));
+    }
+
+    private void encodeAndUpdate(MatrixBlock data, AMapToData d, int col) {
+        final int max = d.getUpperBoundValue();
+        if(data.isInSparseFormat())
+            encodeAndUpdateSparse(data, d, col, max);
+        else if(data.getDenseBlock().isContiguous())
+            encodeAndUpdateDense(data, d, col, max);
+        else
+            encodeAndUpdateGeneric(data, d, col, max);
+    }
+
+    private void encodeAndUpdateSparse(MatrixBlock data, AMapToData d, int col, int max) {
+        final int nRow = data.getNumRows();
+        final SparseBlock sb = data.getSparseBlock();
+
+        for(int i = 0; i < nRow; i++) {
+            int id = map.increment(sb.get(i, col));
+            if(id > max)
+                throw new DMLCompressionException("Failed update and encode with " + max + " possible values");
+            d.set(i, id);
+        }
+
+    }
+
+    private void encodeAndUpdateDense(final MatrixBlock data, final AMapToData d, final int col, int max) {
+        final int nRow = data.getNumRows();
+        final double[] vals = data.getDenseBlockValues();
+        final int nCol = data.getNumColumns();
+        final int end = nRow * nCol; // guaranteed lower than intend.
+        for(int i = 0, off = col; off < end; i++, off += nCol) {
+            int id = map.increment(vals[off]);
+            if(id > max)
+                throw new DMLCompressionException("Failed update and encode with " + max + " possible values");
+            d.set(i, id);
+        }
+    }
+
+    private void encodeAndUpdateGeneric(MatrixBlock data, AMapToData d, int col, int max) {
+        final int nRow = data.getNumRows();
+        final DenseBlock db = data.getDenseBlock();
+        for(int i = 0; i < nRow; i++) {
+            final double[] c = db.values(i);
+            final int off = db.pos(i) + col;
+            int id = map.increment(c[off]);
+            if(id > max)
+                throw new DMLCompressionException("Failed update and encode with " + max + " possible values");
+            d.set(i, id);
+        }
+    }
+
+    @Override
+    protected Pair<ICLAScheme, AColGroup> tryUpdateAndEncodeT(MatrixBlock data, IColIndex columns) {
+        if(data.isEmpty())
+            return new Pair<>(this, new ColGroupEmpty(columns));
+        final int nRow = data.getNumColumns();
+
+        final AMapToData d = MapToFactory.create(nRow, map.size());
+
+        encodeAndUpdateT(data, d, cols.get(0));
+        if(lastDict == null || lastDict.getNumberOfValues(columns.size()) != map.size())
+            lastDict = DictionaryFactory.create(map);
+
+        return new Pair<>(this, ColGroupDDCLZW.create(columns, lastDict, d, null));
+    }
+
+    private void encodeAndUpdateT(MatrixBlock data, AMapToData d, int col) {
+        if(data.isInSparseFormat())
+            encodeAndUpdateSparseT(data, d, col);
+        else
+            encodeAndUpdateDenseT(data, d, col);
+    }
+
+    private void encodeAndUpdateSparseT(MatrixBlock data, AMapToData d, int col) {
+        final SparseBlock sb = data.getSparseBlock();
+        if(!sb.isEmpty(col)) {
+            int apos = sb.pos(col);
+            final int[] aix = sb.indexes(col);
+            final int alen = sb.size(col) + apos;
+            d.fill(map.increment(0.0, data.getNumColumns() - alen - apos));
+            final double[] aval = sb.values(col);
+            while(apos < alen) {
+                final double v = aval[apos];
+                final int idx = aix[apos++];
+                d.set(idx, map.increment(v));
+            }
+        }
+        else
+            d.fill(map.increment(0.0, data.getNumColumns()));
+    }
+
+    private void encodeAndUpdateDenseT(MatrixBlock data, AMapToData d, int col) {
+        final DenseBlock db = data.getDenseBlock();
+        final double[] vals = db.values(col);
+        final int nCol = data.getNumColumns();
+        for(int i = 0, off = db.pos(col); i < nCol; i++, off++)
+            d.set(i, map.increment(vals[off]));
+    }
+
+
+}
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index dd06226e093..f6902bd371a 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -122,6 +122,10 @@ public void testConvertToDDCLZWBasic() {
 				assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
 		}
 
+        // Testen von compute RowSums
+        double [] sumsddc = new double[high-low];
+        //ddc.computeColSums(sumsddc, low, high, );
+
 	}
 
 	@Test

From 7de7f1dacb1dcd90ed8cdd7a523ec16b9df32cbd Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Sun, 18 Jan 2026 12:38:19 +0100
Subject: [PATCH 20/24] [SYSTEMDS-3779] Added getIdx using LZWMappingIterator.
 Reverted formatting again.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 47 +++++++++++--------
 .../compress/colgroup/ColGroupDDCTest.java    |  6 +--
 2 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 75ec28ca30a..72cc7ce1a0b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -262,7 +262,7 @@ int next() {
 			// Start returning symbols from the newly decoded phrase.
 			currentPhrase = next;
 			currentPhraseIndex = 0;
-			
+
 			mapIndex++;
 			return currentPhrase[currentPhraseIndex++];
 		}
@@ -465,10 +465,18 @@ public void write(DataOutput out) throws IOException {
 
 	@Override
 	public double getIdx(int r, int colIdx) {
-		// TODO: soll schnell sein
-		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, r);
-		// TODO: ColumnIndex
-        return _dict.getValue(map.getIndex(r), colIdx, _colIndexes.size());
+		if(r < 0 || r >= _nRows)
+			throw new DMLRuntimeException("Row index out of bounds");
+
+		if(colIdx < 0 || colIdx >= _colIndexes.size())
+			throw new DMLRuntimeException("Column index out of bounds");
+
+		final LZWMappingIterator it = new LZWMappingIterator();
+		int dictIdx = -1;
+		for(int i = 0; i <= r; i++) {
+			dictIdx = it.next();
+		}
+		return _dict.getValue(dictIdx, colIdx, _colIndexes.size());
 	}
 
 	@Override
@@ -710,24 +718,23 @@ protected int[] getCounts(int[] out) {
 		return new int[0]; // If returns exeption test wont work.
 	}
 
-    protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
-        AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] += preAgg[data.getIndex(rix)];
-    }
-
-    @Override
-    protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
+	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+		AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] += preAgg[data.getIndex(rix)];
+	}
 
-    }
+	@Override
+	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
 
-    @Override
-    protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
-        AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
-        for (int rix = rl; rix < ru; rix++)
-            c[rix] *= preAgg[data.getIndex(rix)];
+	}
 
+	@Override
+	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+		AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] *= preAgg[data.getIndex(rix)];
 
-    }
+	}
 }
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index f6902bd371a..ca753e3d50f 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -122,9 +122,9 @@ public void testConvertToDDCLZWBasic() {
 				assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
 		}
 
-        // Testen von compute RowSums
-        double [] sumsddc = new double[high-low];
-        //ddc.computeColSums(sumsddc, low, high, );
+		// Testen von compute RowSums
+		double[] sumsddc = new double[high - low];
+		//ddc.computeColSums(sumsddc, low, high, );
 
 	}
 

From 4f3f4137c871aa936d2b7847c7e5166f576b0771 Mon Sep 17 00:00:00 2001
From: Luka Dekanozishvili <luka.dekanozishvili1@gmail.com>
Date: Sun, 18 Jan 2026 19:52:51 +0100
Subject: [PATCH 21/24] [SYSTEMDS-3779] Fixed out of bounds logic

Signed-off-by: Luka Dekanozishvili <luka.dekanozishvili1@gmail.com>
---
 .../apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 72cc7ce1a0b..810aeb5709d 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -328,10 +328,10 @@ private static AMapToData decompress(int[] codes, int nUnique, int nRows, int in
 
 			// Append the reconstructed phrase to the output mapping.
 			for(int v : next) {
-				out.set(outPos++, v);
 				if(outPos == index)
 					// Stop immediately once done.
 					return out;
+				out.set(outPos++, v);
 			}
 
 			// Add new phrase to dictionary: nextCode -> (old, firstSymbol(next)).

From ca7e6ff4a7970eeab7634c5da885432e2128a01b Mon Sep 17 00:00:00 2001
From: Luka Dekanozishvili <luka.dekanozishvili1@gmail.com>
Date: Sun, 18 Jan 2026 19:54:31 +0100
Subject: [PATCH 22/24] [SYSTEMDS-3779] Added new tests for ColGroupDDCLZW
 (draft)

Signed-off-by: Luka Dekanozishvili <luka.dekanozishvili1@gmail.com>
---
 .../compress/colgroup/ColGroupDDCLZWTest.java | 558 ++++++++++++++++--
 .../compress/colgroup/ColGroupDDCTest.java    |  90 ---
 2 files changed, 515 insertions(+), 133 deletions(-)

diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
index dfc83673a90..97695abad07 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
@@ -19,58 +19,530 @@
 
 package org.apache.sysds.test.component.compress.colgroup;
 
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.EnumSet;
-
-import org.apache.commons.lang3.NotImplementedException;
+import java.util.Arrays;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.DMLRuntimeException;
-import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
-import org.apache.sysds.runtime.compress.colgroup.AColGroup;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupIO;
+import org.apache.sysds.runtime.compress.colgroup.*;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
 import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
-import org.apache.sysds.runtime.compress.estim.ComEstExact;
-import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo;
-import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
-import org.apache.sysds.runtime.functionobjects.Builtin;
-import org.apache.sysds.runtime.functionobjects.Divide;
-import org.apache.sysds.runtime.functionobjects.Equals;
-import org.apache.sysds.runtime.functionobjects.Multiply;
-import org.apache.sysds.runtime.functionobjects.GreaterThan;
-import org.apache.sysds.runtime.functionobjects.Minus;
-import org.apache.sysds.runtime.functionobjects.Plus;
-import org.apache.sysds.runtime.matrix.data.MatrixBlock;
-import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
-import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
-import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
-import org.apache.sysds.runtime.util.DataConverter;
+import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
+import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.junit.Test;
 
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertNotNull;
+
 public class ColGroupDDCLZWTest {
 	protected static final Log LOG = LogFactory.getLog(ColGroupDDCLZWTest.class.getName());
 
-	// TODO: use csb instead of create.
-	/*CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0)
-		.setValidCompressions(EnumSet.of(AColGroup.CompressionType.DDCLZW))
-		.setTransposeInput("false");
-	CompressionSettings cs = csb.create();
+	/**
+	 * Creates a sample DDC group for unit tests
+	 */
+	private ColGroupDDC createTestDDC(int[] mapping, int nCols, int nUnique) {
+		IColIndex colIndexes = ColIndexFactory.create(nCols);
+
+		double[] dictValues = new double[nUnique * nCols];
+		for (int i = 0; i < nUnique; i++) {
+			for (int c = 0; c < nCols; c++) {
+				dictValues[i * nCols + c] = (i + 1) * 10.0 + c;
+			}
+		}
+		Dictionary dict = Dictionary.create(dictValues);
+
+		AMapToData data = MapToFactory.create(mapping.length, nUnique);
+		for (int i = 0; i < mapping.length; i++) {
+			data.set(i, mapping[i]);
+		}
+
+		AColGroup result = ColGroupDDC.create(colIndexes, dict, data, null);
+		assertTrue("The result is of class '" + result.getClass() + "'", result instanceof ColGroupDDC);
+		return (ColGroupDDC) result;
+	}
+
+	/**
+	 * Asserts that two maps are identical
+	 */
+	private void assertMapsEqual(AMapToData expected, AMapToData actual) {
+		assertEquals("Size mismatch", expected.size(), actual.size());
+		assertEquals("Unique count mismatch", expected.getUnique(), actual.getUnique());
+
+		for (int i = 0; i < expected.size(); i++) {
+			assertEquals("Mapping mismatch at row " + i, expected.getIndex(i), actual.getIndex(i));
+		}
+	}
+
+	/**
+	 * Applies DDCLZW compression/decompression and asserts that it's left unchanged
+	 */
+	private void assertLosslessCompression(ColGroupDDC original) {
+		// Compress
+		AColGroup compressed = original.convertToDDCLZW();
+		assertNotNull("Compression returned null", compressed);
+		assertTrue(compressed instanceof ColGroupDDCLZW);
+
+		// Decompress
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) compressed;
+		AColGroup decompressed = ddclzw.convertToDDC();
+		assertNotNull("Decompression returned null", decompressed);
+		assertTrue(decompressed instanceof ColGroupDDC);
+
+		// Assert
+		ColGroupDDC result = (ColGroupDDC) decompressed;
+
+		AMapToData d1 = original.getMapToData();
+		AMapToData d2 = result.getMapToData();
+
+		assertMapsEqual(d1, d2);
+		assertEquals("Column indices mismatch", original.getColIndices(), result.getColIndices());
+
+		assertEquals("Size mismatch", d1.size(), d2.size());
+		assertEquals("Unique count mismatch", d1.getUnique(), d2.getUnique());
+
+		for (int i = 0; i < d1.size(); i++) {
+			assertEquals("Mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
+		}
+	}
+
+	/**
+	 * Asserts "partial decompression" up to the `index`
+	 */
+	private void assertPartialDecompression(ColGroupDDCLZW ddclzw, AMapToData original, int index) {
+		ColGroupDDC partial = (ColGroupDDC) ddclzw.convertToDDC(index);
+		AMapToData partialMap = partial.getMapToData();
+
+		assertEquals("Partial size incorrect", index, partialMap.size());
+
+		for (int i = 0; i < index; i++) {
+			assertEquals("Partial map mismatch at " + i, original.getIndex(i), partialMap.getIndex(i));
+		}
+	}
+
+	/**
+	 * Asserts if the slice operation matches DDC's slice
+	 */
+	private void assertSlice(ColGroupDDCLZW ddclzw, ColGroupDDC originalDDC, int low, int high) {
+		AColGroup sliced = ddclzw.sliceRows(low, high);
+		assertTrue(sliced instanceof ColGroupDDCLZW);
+
+		ColGroupDDCLZW ddclzwSlice = (ColGroupDDCLZW) sliced;
+		ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwSlice.convertToDDC();
+		ColGroupDDC expectedSlice = (ColGroupDDC) originalDDC.sliceRows(low, high);
+
+		assertMapsEqual(expectedSlice.getMapToData(), ddcSlice.getMapToData());
+	}
+
+	@Test
+	public void testConvertToDDCLZWBasicNew() {
+		int[] src = new int[] {
+				0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
+				1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
+				2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+				2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
+				2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1
+		};
+
+		// Create DDC with 2 columns, 3 unique values
+		ColGroupDDC ddc = createTestDDC(src, 2, 3);
+
+		assertLosslessCompression(ddc);
+
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+		assertPartialDecompression(ddclzw, ddc.getMapToData(), 101);
+		assertSlice(ddclzw, ddc, 3, 10);
+	}
+
+	@Test(expected = IllegalArgumentException.class)
+	public void testPartialDecompressionOutOfBounds() {
+		int[] src = new int[] {
+				1, 3, 4, 4, 3, 2, 3, 4, 1, 4, 4, 4, 4, 1, 4, 1, 4, 1, 4, 0,
+				1, 3, 4, 4, 3, 2, 3, 4, 1, 4, 4, 4, 4, 1, 4, 1, 4, 1, 4, 0,
+		};
+
+		ColGroupDDC ddc = createTestDDC(src, 3, 5);
+
+		assertLosslessCompression(ddc);
+
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+		assertPartialDecompression(ddclzw, ddc.getMapToData(), 40);
+		assertPartialDecompression(ddclzw, ddc.getMapToData(), 41); // Should throw out of bounds
+	}
+
+	@Test
+	public void testLengthTwo() {
+		int[] src = new int[] { 0, 1 };
+
+		ColGroupDDC ddc = createTestDDC(src, 1, 2);
+
+		assertLosslessCompression(ddc);
+
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+		assertPartialDecompression(ddclzw, ddc.getMapToData(), 0);
+		assertPartialDecompression(ddclzw, ddc.getMapToData(), 2);
+	}
+
+	@Test
+	public void testConvertToDDCLZWBasic() {
+		// TODO: new methods for comparison
+		IColIndex colIndexes = ColIndexFactory.create(2);
+		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
+		Dictionary dict = Dictionary.create(dictValues);
+
+		int[] src = new int[] {
+				// repeating base pattern
+				0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
+				// variation / shifted pattern
+				1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
+				// longer runs (good for phrase growth)
+				2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+				// mixed noise
+				2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
+				// repeating tail (tests dictionary reuse)
+				2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1};
+
+		final int nRows = src.length;
+		final int nUnique = 3;
+		AMapToData data = MapToFactory.create(nRows, nUnique);
+		for(int i = 0; i < nRows; i++)
+			data.set(i, src[i]);
+
+		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
+		AColGroup result = ddc.convertToDDCLZW();
+
+		assertNotNull(result);
+		assertTrue(result instanceof ColGroupDDCLZW);
+
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
+		AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
+
+		assertNotNull(ddclzwDecompressed);
+		assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
+
+		ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
+
+		AMapToData d1 = ddc.getMapToData();
+		AMapToData d2 = ddc2.getMapToData();
+
+		assertEquals(d1.size(), d2.size());
+		assertEquals(d1.getUnique(), d2.getUnique());
+		for(int i = 0; i < d1.size(); i++)
+			assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
+
+		assertEquals(ddc.getColIndices(), ddc2.getColIndices());
+
+		// Test partial decompression:
+		// `index` is the amount of numbers to decode
+		int index = 10;
+		ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
+
+		AMapToData d3 = ddcIndex.getMapToData();
+		assertEquals(index, d3.size());
+		assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
+
+		for(int i = 0; i < index; i++) {
+			assertEquals(d1.getIndex(i), d3.getIndex(i));
+		}
+
+		// Test SliceRows
+		int low = 3;
+		int high = 10;
+		AColGroup slice = ddclzw.sliceRows(low, high);
+		if(slice instanceof ColGroupDDCLZW ddclzwslice) {
+			ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
+			ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
+
+			AMapToData d4 = ddcSlice.getMapToData();
+			AMapToData d5 = ddcSlice2.getMapToData();
+
+			assertEquals(d5.size(), d4.size());
+			assertEquals(d5.getUnique(), d4.getUnique());
+
+			for(int i = 0; i < d4.size(); i++)
+				assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
+		}
+
+		// Compute RowSums
+		// double[] sumsddc = new double[high - low];
+		//ddc.computeColSums(sumsddc, low, high, );
+	}
+
+
+	@Test
+	public void testGetIdxFirstElement() {
+		int[] src = new int[] {0, 1, 2, 1, 0};
+		ColGroupDDC ddc = createTestDDC(src, 2, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		double expected = ddc.getIdx(0, 0);
+		assertEquals(expected, ddclzw.getIdx(0, 0), 0.0001);
+	}
+
+	@Test
+	public void testGetIdxLastElement() {
+		int[] src = new int[] {0, 1, 2, 1, 0};
+		ColGroupDDC ddc = createTestDDC(src, 2, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		int lastRow = src.length - 1;
+		double expected = ddc.getIdx(lastRow, 1);
+		assertEquals(expected, ddclzw.getIdx(lastRow, 1), 0.0001);
+	}
+
+	@Test
+	public void testGetIdxAllElements() {
+		int[] src = new int[] {0, 1, 2, 1, 0, 2, 1};
+		ColGroupDDC ddc = createTestDDC(src, 3, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		for (int row = 0; row < src.length; row++) {
+			for (int col = 0; col < 2; col++) {
+				double expected = ddc.getIdx(row, col);
+				double actual = ddclzw.getIdx(row, col);
+				assertEquals("Mismatch at [" + row + "," + col + "]", expected, actual, 0.0001);
+			}
+		}
+	}
+
+	@Test
+	public void testGetIdxWithRepeatingPattern() {
+		int[] src = new int[] {0, 1, 0, 1, 0, 1, 0, 1};
+		ColGroupDDC ddc = createTestDDC(src, 1, 2);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		double expected = ddc.getIdx(3, 0);
+		assertEquals(expected, ddclzw.getIdx(3, 0), 0.0001);
+	}
+
+	@Test(expected = DMLRuntimeException.class)
+	public void testGetIdxRowOutOfBoundsNegative() {
+		int[] src = new int[] {0, 1, 2};
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		ddclzw.getIdx(-1, 0);
+	}
+
+	@Test(expected = DMLRuntimeException.class)
+	public void testGetIdxRowOutOfBounds() {
+		int[] src = new int[] {0, 1, 2};
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		ddclzw.getIdx(10, 0);
+	}
+
+	@Test(expected = DMLRuntimeException.class)
+	public void testGetIdxColOutOfBoundsNegative() {
+		int[] src = new int[] {0, 1, 2};
+		ColGroupDDC ddc = createTestDDC(src, 3, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		ddclzw.getIdx(0, -1);
+	}
+
+	@Test(expected = DMLRuntimeException.class)
+	public void testGetIdxColOutOfBounds() {
+		int[] src = new int[] {0, 1, 2};
+		ColGroupDDC ddc = createTestDDC(src, 3, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		ddclzw.getIdx(0, 10);
+	}
+
+	@Test
+	public void testSliceRowsSingleRow() {
+		int[] src = new int[] {0, 1, 2, 1, 0, 2, 1};
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertSlice(ddclzw, ddc, 3, 4);
+	}
+
+	@Test
+	public void testSliceRowsMiddleRange() {
+		int[] src = new int[] {0, 1, 2, 0, 1, 2, 0, 1, 2, 0};
+		ColGroupDDC ddc = createTestDDC(src, 2, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertSlice(ddclzw, ddc, 2, 7);
+	}
+
+	@Test
+	public void testSliceRowsEntireRange() {
+		int[] src = new int[] {0, 1, 0, 1, 2};
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertSlice(ddclzw, ddc, 0, src.length);
+	}
+
+	@Test
+	public void testSliceRowsBeginning() {
+		int[] src = new int[] {0, 1, 2, 1, 0, 2};
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertSlice(ddclzw, ddc, 0, 3);
+	}
+
+	@Test
+	public void testSliceRowsEnd() {
+		int[] src = new int[] {0, 1, 2, 1, 0, 2};
+		ColGroupDDC ddc = createTestDDC(src, 2, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertSlice(ddclzw, ddc, 3, 6);
+	}
+
+	@Test
+	public void testSliceRowsWithLongRuns() {
+		int[] src = new int[30];
+		Arrays.fill(src, 0, 10, 0);
+		Arrays.fill(src, 10, 20, 1);
+		Arrays.fill(src, 20, 30, 2);
+
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertSlice(ddclzw, ddc, 5, 25);
+	}
+
+	@Test
+	public void testCreateWithNullDictionary() {
+		IColIndex colIndexes = ColIndexFactory.create(1);
+		int[] src = new int[] {0, 1, 2};
+		AMapToData data = MapToFactory.create(3, 3);
+		for (int i = 0; i < 3; i++) {
+			data.set(i, src[i]);
+		}
+
+		AColGroup result = ColGroupDDCLZW.create(colIndexes, null, data, null);
+		assertTrue("Should create ColGroupEmpty", result instanceof ColGroupEmpty);
+	}
+
+	@Test
+	public void testCreateWithSingleUnique() {
+		IColIndex colIndexes = ColIndexFactory.create(1);
+		double[] dictValues = new double[] {42.0};
+		Dictionary dict = Dictionary.create(dictValues);
+
+		int[] src = new int[] {0, 0, 0, 0};
+		AMapToData data = MapToFactory.create(4, 1);
+		for (int i = 0; i < 4; i++) {
+			data.set(i, 0);
+		}
+
+		AColGroup result = ColGroupDDCLZW.create(colIndexes, dict, data, null);
+		assertTrue("Should create ColGroupConst", result instanceof ColGroupConst);
+	}
+
+	@Test
+	public void testCreateValidDDCLZW() {
+		int[] src = new int[] {0, 1, 0, 1, 2};
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+
+		AColGroup result = ddc.convertToDDCLZW();
+		assertTrue("Should create ColGroupDDCLZW", result instanceof ColGroupDDCLZW);
+	}
+
+	@Test
+	public void testCreateWithMultipleColumns() {
+		int[] src = new int[] {0, 1, 2, 1, 0};
+		ColGroupDDC ddc = createTestDDC(src, 3, 3);
+
+		AColGroup result = ddc.convertToDDCLZW();
+		assertTrue("Should create ColGroupDDCLZW", result instanceof ColGroupDDCLZW);
+	}
+
+	@Test
+	public void testSameNumber() {
+		int[] src = new int[20];
+		Arrays.fill(src, 2);
+
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		assertLosslessCompression(ddc);
+	}
+
+	@Test
+	public void testAlternatingNumbers() {
+		int[] src = new int[30];
+		for (int i = 0; i < src.length; i++) {
+			src[i] = i % 2;
+		}
+
+		ColGroupDDC ddc = createTestDDC(src, 1, 2);
+		assertLosslessCompression(ddc);
+	}
+
+	@Test
+	public void testLongPatterns() {
+		int[] src = new int[50];
+		Arrays.fill(src, 0, 15, 0);
+		Arrays.fill(src, 15, 30, 1);
+		Arrays.fill(src, 30, 45, 2);
+		Arrays.fill(src, 45, 50, 0);
+
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		assertLosslessCompression(ddc);
+	}
+
+	@Test
+	public void testSameIndexStructure() {
+		int[] src = new int[] {0, 1, 0, 1};
+		ColGroupDDC ddc = createTestDDC(src, 1, 2);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertTrue("Same object should have same structure",
+				ddclzw.sameIndexStructure(ddclzw));
+	}
+
+	@Test
+	public void testSameIndexStructureDifferent() {
+		int[] src = new int[] {0, 1, 0, 1};
+
+		ColGroupDDC ddc1 = createTestDDC(src, 1, 2);
+		ColGroupDDC ddc2 = createTestDDC(src, 1, 2);
+
+		ColGroupDDCLZW ddclzw1 = (ColGroupDDCLZW) ddc1.convertToDDCLZW();
+		ColGroupDDCLZW ddclzw2 = (ColGroupDDCLZW) ddc2.convertToDDCLZW();
+
+		// Different objects have different _dataLZW arrays
+		assertFalse("Different objects should have different structure",
+				ddclzw1.sameIndexStructure(ddclzw2));
+	}
+
+	@Test
+	public void testSameIndexStructureDdcLzw() {
+		int[] src = new int[] {0, 1, 2, 1, 0};
+		ColGroupDDC ddc = createTestDDC(src, 1, 3);
+		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
+
+		assertFalse("Different types should not have same structure",
+				ddclzw.sameIndexStructure(ddc));
+	}
+
+	@Test
+	public void testRepetitiveData() {
+		int[] src = new int[] {
+				0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+				0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+				0, 0, 0, 0, 0, 1, 1, 1, 1, 1
+		};
+
+		ColGroupDDC ddc = createTestDDC(src, 1, 2);
+		assertLosslessCompression(ddc);
+	}
+
+	@Test
+	public void testNoRepetition() {
+		int[] src = new int[20];
+		for (int i = 0; i < src.length; i++) {
+			src[i] = i;
+		}
+
+		ColGroupDDC ddc = createTestDDC(src, 1, 20);
+		assertLosslessCompression(ddc);
+	}
 
-	final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes);
-	CompressedSizeInfo csi = new CompressedSizeInfo(cgi);
-	AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0);*/
-	
 }
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
index ca753e3d50f..834e453c9e8 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java
@@ -38,96 +38,6 @@ public class ColGroupDDCTest {
 
 	protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName());
 
-	@Test
-	public void testConvertToDDCLZWBasic() {
-		// TODO: neue Methode zum Vergleich
-		IColIndex colIndexes = ColIndexFactory.create(2);
-		double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0};
-		Dictionary dict = Dictionary.create(dictValues);
-
-		int[] src = new int[] {
-			// repeating base pattern
-			0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
-
-			// variation / shifted pattern
-			1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
-
-			// longer runs (good for phrase growth)
-			2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-
-			// mixed noise
-			2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
-
-			// repeating tail (tests dictionary reuse)
-			2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1};
-
-		final int nRows = src.length;
-		final int nUnique = 3;
-		AMapToData data = MapToFactory.create(nRows, nUnique);
-		for(int i = 0; i < nRows; i++)
-			data.set(i, src[i]);
-
-		ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null);
-		AColGroup result = ddc.convertToDDCLZW();
-
-		assertNotNull(result);
-		assertTrue(result instanceof ColGroupDDCLZW);
-
-		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) result;
-		AColGroup ddclzwDecompressed = ddclzw.convertToDDC();
-
-		assertNotNull(ddclzwDecompressed);
-		assertTrue(ddclzwDecompressed instanceof ColGroupDDC);
-
-		ColGroupDDC ddc2 = (ColGroupDDC) ddclzwDecompressed;
-
-		AMapToData d1 = ddc.getMapToData();
-		AMapToData d2 = ddc2.getMapToData();
-
-		assertEquals(d1.size(), d2.size());
-		assertEquals(d1.getUnique(), d2.getUnique());
-		for(int i = 0; i < d1.size(); i++)
-			assertEquals("mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
-
-		assertEquals(ddc.getColIndices(), ddc2.getColIndices());
-
-		// Testen der Teildekompression:
-		// Index entspricht der Anzahl der Zeichen, die dekodiert werden sollen (0 bis Index-1)
-		int index = 10;
-		ColGroupDDC ddcIndex = (ColGroupDDC) ddclzw.convertToDDC(index);
-
-		AMapToData d3 = ddcIndex.getMapToData();
-		assertEquals(index, d3.size());
-		assertEquals(ddc.getColIndices(), ddcIndex.getColIndices());
-
-		for(int i = 0; i < index; i++) {
-			assertEquals(d1.getIndex(i), d3.getIndex(i));
-		}
-
-		// Testen von SliceRows
-		int low = 3;
-		int high = 10;
-		AColGroup slice = ddclzw.sliceRows(low, high);
-		if(slice instanceof ColGroupDDCLZW ddclzwslice) {
-			ColGroupDDC ddcSlice = (ColGroupDDC) ddclzwslice.convertToDDC();
-			ColGroupDDC ddcSlice2 = (ColGroupDDC) ddc.sliceRows(low, high);
-
-			AMapToData d4 = ddcSlice.getMapToData();
-			AMapToData d5 = ddcSlice2.getMapToData();
-
-			assertEquals(d5.size(), d4.size());
-			assertEquals(d5.getUnique(), d4.getUnique());
-
-			for(int i = 0; i < d4.size(); i++)
-				assertEquals("mapping mismatch at row " + i, d4.getIndex(i), d5.getIndex(i));
-		}
-
-		// Testen von compute RowSums
-		double[] sumsddc = new double[high - low];
-		//ddc.computeColSums(sumsddc, low, high, );
-
-	}
-
 	@Test
 	public void testConvertToDeltaDDCBasic() {
 		IColIndex colIndexes = ColIndexFactory.create(2);

From ddd2a8b4bf76a973c6c9b7790ad9300073ec9087 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Mon, 19 Jan 2026 14:20:09 +0100
Subject: [PATCH 23/24] [SYSTEMDS-3779] Increased sliceRows performance by
 using iterator. Added decompressToDenseBlockDenseDictionary [WIP] needs to be
 tested further. Added fallbacks to ddc for variouos functions. Added scalar
 and unary ops and various other simple methods from ddc.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 157 ++++++++++++------
 .../compress/colgroup/ColGroupDDCLZWTest.java | 145 +++++++++++-----
 2 files changed, 206 insertions(+), 96 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index 810aeb5709d..caec3ac4a56 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -33,11 +33,8 @@
 import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P;
+import org.apache.sysds.runtime.compress.colgroup.dictionary.*;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
-import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
-import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
-import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary;
-import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
 import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
 import org.apache.sysds.runtime.compress.colgroup.indexes.RangeIndex;
@@ -57,9 +54,7 @@
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.data.SparseBlockMCSR;
 import org.apache.sysds.runtime.data.SparseRow;
-import org.apache.sysds.runtime.functionobjects.Builtin;
-import org.apache.sysds.runtime.functionobjects.Minus;
-import org.apache.sysds.runtime.functionobjects.Plus;
+import org.apache.sysds.runtime.functionobjects.*;
 import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
@@ -411,12 +406,6 @@ else if(data.getUnique() == 1)
 			return new ColGroupDDCLZW(colIndexes, dict, data, cachedCounts);
 	}
 
-	/*
-	 * TODO: Operations with complex access patterns shall be uncompressed to ddc format.
-	 *  ... return ColGroupDDC.create(...,decompress(_dataLZW),...). We need to decide which methods are
-	 *  suitable for sequential and which arent. those who arent then we shall materialize and fall back to ddc
-	 * */
-
 	public AColGroup convertToDDC() {
 		final AMapToData map = decompress(_dataLZW, _nUnique, _nRows, _nRows);
 		final int[] counts = getCounts(); // may be null depending on your group
@@ -529,12 +518,12 @@ public long getExactSizeOnDisk() {
 
 	@Override
 	public AMapToData getMapToData() {
-		throw new NotImplementedException(); // or decompress and return data... decompress(_dataLZW, _nUnique, _nRows, _nRows)
+		return decompressFull(_dataLZW, _nUnique, _nRows);
 	}
 
 	@Override
 	public boolean sameIndexStructure(AColGroupCompressed that) {
-		return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == _dataLZW;
+		return that instanceof ColGroupDDCLZW && ((ColGroupDDCLZW) that)._dataLZW == this._dataLZW;
 	}
 
 	@Override
@@ -550,172 +539,240 @@ protected void computeColMxx(double[] c, Builtin builtin) {
 	@Override
 	public AColGroup sliceRows(int rl, int ru) {
 		try {
+			if(rl < 0 || ru > _nRows)
+				throw new DMLRuntimeException("Invalid slice range: " + rl + " - " + ru);
+
+			final int len = ru - rl;
+			if(len == 0)
+				return new ColGroupEmpty(_colIndexes);
+
+			final int[] slicedMapping = new int[len];
+
+			final LZWMappingIterator it = new LZWMappingIterator();
+
+			for(int i = 0; i < rl; i++)
+				it.next();
+
+			for(int i = rl; i < ru; i++)
+				slicedMapping[i - rl] = it.next();
+
+			AMapToData slicedMappingAMapToData = MapToFactory.create(len, _nUnique);
+			for(int i = 0; i < len; i++) {
+				slicedMappingAMapToData.set(i, slicedMapping[i]);
+			}
+
+			return new ColGroupDDCLZW(_colIndexes, _dict, slicedMappingAMapToData, null);
+		}
+		catch(Exception e) {
+			throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
+		}
+
+		/*try {
 			AMapToData map = decompress(_dataLZW, _nUnique, _nRows, ru);
 			return ColGroupDDCLZW.create(_colIndexes, _dict, map.slice(rl, ru), null);
 		}
 		catch(Exception e) {
 			throw new DMLRuntimeException("Failed to slice out sub part DDCLZW: " + rl + ", " + ru, e);
-		}
+		}*/
 	}
 
 	@Override
 	protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock dict) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR db, SparseBlock dict,
 		int nColOut) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR db, double[] dict, int nColOut) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
 		SparseBlock sb) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC,
 		double[] values) {
+		final int nCol = _colIndexes.size();
+		final LZWMappingIterator it = new LZWMappingIterator();
+
+		for(int i = 0; i < rl; i++) {
+			it.next();
+		}
+
+		if(db.isContiguous() && nCol == db.getDim(1) && offC == 0) {
+			final int nColOut = db.getDim(1);
+			final double[] c = db.values(0);
+
+			for(int i = rl; i < ru; i++) {
+				final int dictIdx = it.next();
+				final int rowIndex = dictIdx * nCol;
+				final int rowBaseOff = (i + offR) * nColOut;
 
+				for(int j = 0; j < nCol; j++)
+					c[rowBaseOff + j] = values[rowIndex + j];
+			}
+		}
+		else {
+			for(int i = rl, offT = rl + offR; i < ru; i++, offT++) {
+				final double[] c = db.values(offT);
+				final int off = db.pos(offT) + offC;
+				final int dictIdx = it.next();
+				final int rowIndex = dictIdx * nCol;
+
+				for(int j = 0; j < nCol; j++) {
+					final int colIdx = _colIndexes.get(j);
+					c[off + colIdx] = values[rowIndex + j];
+				}
+			}
+		}
 	}
 
 	@Override
 	protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
 		SparseBlock sb) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC,
 		double[] values) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) {
-
+		convertToDDC().leftMultByMatrixNoPreAgg(matrix, result, rl, ru, cl, cu); // Fallback to DDC.
 	}
 
 	@Override
 	public AColGroup scalarOperation(ScalarOperator op) {
-		return null;
+		if((op.fn instanceof Plus || op.fn instanceof Minus)) {
+			final double v0 = op.executeScalar(0);
+			if(v0 == 0)
+				return this;
+		}
+
+		return new ColGroupDDCLZW(_colIndexes, _dict.applyScalarOp(op), _dataLZW, _nRows, _nUnique, getCachedCounts());
 	}
 
 	@Override
-	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
-		return null;
+	public AColGroup unaryOperation(UnaryOperator op) {
+		return new ColGroupDDCLZW(_colIndexes, _dict.applyUnaryOp(op), _dataLZW, _nRows, _nUnique, getCachedCounts());
 	}
 
 	@Override
-	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
-		return null;
+	public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) {
+		throw new NotImplementedException();
 	}
 
 	@Override
-	public AColGroup unaryOperation(UnaryOperator op) {
-		return null;
+	public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) {
+		throw new NotImplementedException();
 	}
 
 	@Override
 	public AColGroup append(AColGroup g) {
-		return null;
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
-		return null;
+		throw new NotImplementedException();
 	}
 
 	@Override
 	public AColGroup recompress() {
-		return null;
+		throw new NotImplementedException();
 	}
 
 	@Override
 	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-		return null;
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-		return null;
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-		return new AColGroup[0];
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected boolean allowShallowIdentityRightMult() {
-		return false;
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-		return null;
+		throw new NotImplementedException();
 	}
 
 	@Override
 	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-
+		throw new NotImplementedException("Preaggregation not supported for DDCLZW.");
 	}
 
 	@Override
 	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
 	protected int[] getCounts(int[] out) {
-		return new int[0]; // If returns exeption test wont work.
+		AMapToData data = decompressFull(_dataLZW, _nUnique, _nRows);
+		return data.getCounts();
 	}
 
 	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
@@ -726,7 +783,7 @@ protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
 
 	@Override
 	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-
+		throw new NotImplementedException();
 	}
 
 	@Override
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
index 97695abad07..f300a3d7f36 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCLZWTest.java
@@ -20,15 +20,25 @@
 package org.apache.sysds.test.component.compress.colgroup;
 
 import java.util.Arrays;
+import java.util.EnumSet;
+
+import org.apache.commons.lang3.NotImplementedException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
 import org.apache.sysds.runtime.compress.colgroup.*;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
 import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
+import org.apache.sysds.runtime.compress.estim.ComEstExact;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.util.DataConverter;
 import org.junit.Test;
 
 import static org.junit.Assert.*;
@@ -44,15 +54,15 @@ private ColGroupDDC createTestDDC(int[] mapping, int nCols, int nUnique) {
 		IColIndex colIndexes = ColIndexFactory.create(nCols);
 
 		double[] dictValues = new double[nUnique * nCols];
-		for (int i = 0; i < nUnique; i++) {
-			for (int c = 0; c < nCols; c++) {
+		for(int i = 0; i < nUnique; i++) {
+			for(int c = 0; c < nCols; c++) {
 				dictValues[i * nCols + c] = (i + 1) * 10.0 + c;
 			}
 		}
 		Dictionary dict = Dictionary.create(dictValues);
 
 		AMapToData data = MapToFactory.create(mapping.length, nUnique);
-		for (int i = 0; i < mapping.length; i++) {
+		for(int i = 0; i < mapping.length; i++) {
 			data.set(i, mapping[i]);
 		}
 
@@ -68,7 +78,7 @@ private void assertMapsEqual(AMapToData expected, AMapToData actual) {
 		assertEquals("Size mismatch", expected.size(), actual.size());
 		assertEquals("Unique count mismatch", expected.getUnique(), actual.getUnique());
 
-		for (int i = 0; i < expected.size(); i++) {
+		for(int i = 0; i < expected.size(); i++) {
 			assertEquals("Mapping mismatch at row " + i, expected.getIndex(i), actual.getIndex(i));
 		}
 	}
@@ -100,7 +110,7 @@ private void assertLosslessCompression(ColGroupDDC original) {
 		assertEquals("Size mismatch", d1.size(), d2.size());
 		assertEquals("Unique count mismatch", d1.getUnique(), d2.getUnique());
 
-		for (int i = 0; i < d1.size(); i++) {
+		for(int i = 0; i < d1.size(); i++) {
 			assertEquals("Mapping mismatch at row " + i, d1.getIndex(i), d2.getIndex(i));
 		}
 	}
@@ -114,7 +124,7 @@ private void assertPartialDecompression(ColGroupDDCLZW ddclzw, AMapToData origin
 
 		assertEquals("Partial size incorrect", index, partialMap.size());
 
-		for (int i = 0; i < index; i++) {
+		for(int i = 0; i < index; i++) {
 			assertEquals("Partial map mismatch at " + i, original.getIndex(i), partialMap.getIndex(i));
 		}
 	}
@@ -135,13 +145,9 @@ private void assertSlice(ColGroupDDCLZW ddclzw, ColGroupDDC originalDDC, int low
 
 	@Test
 	public void testConvertToDDCLZWBasicNew() {
-		int[] src = new int[] {
-				0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
-				1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
-				2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-				2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
-				2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1
-		};
+		int[] src = new int[] {0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 1, 2, 0, 1, 2, 0, 1, 1,
+			0, 1, 2, 0, 1, 2, 0, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0,
+			2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1};
 
 		// Create DDC with 2 columns, 3 unique values
 		ColGroupDDC ddc = createTestDDC(src, 2, 3);
@@ -155,10 +161,8 @@ public void testConvertToDDCLZWBasicNew() {
 
 	@Test(expected = IllegalArgumentException.class)
 	public void testPartialDecompressionOutOfBounds() {
-		int[] src = new int[] {
-				1, 3, 4, 4, 3, 2, 3, 4, 1, 4, 4, 4, 4, 1, 4, 1, 4, 1, 4, 0,
-				1, 3, 4, 4, 3, 2, 3, 4, 1, 4, 4, 4, 4, 1, 4, 1, 4, 1, 4, 0,
-		};
+		int[] src = new int[] {1, 3, 4, 4, 3, 2, 3, 4, 1, 4, 4, 4, 4, 1, 4, 1, 4, 1, 4, 0, 1, 3, 4, 4, 3, 2, 3, 4, 1, 4,
+			4, 4, 4, 1, 4, 1, 4, 1, 4, 0,};
 
 		ColGroupDDC ddc = createTestDDC(src, 3, 5);
 
@@ -171,7 +175,7 @@ public void testPartialDecompressionOutOfBounds() {
 
 	@Test
 	public void testLengthTwo() {
-		int[] src = new int[] { 0, 1 };
+		int[] src = new int[] {0, 1};
 
 		ColGroupDDC ddc = createTestDDC(src, 1, 2);
 
@@ -190,16 +194,16 @@ public void testConvertToDDCLZWBasic() {
 		Dictionary dict = Dictionary.create(dictValues);
 
 		int[] src = new int[] {
-				// repeating base pattern
-				0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
-				// variation / shifted pattern
-				1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
-				// longer runs (good for phrase growth)
-				2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-				// mixed noise
-				2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
-				// repeating tail (tests dictionary reuse)
-				2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1};
+			// repeating base pattern
+			0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2,
+			// variation / shifted pattern
+			1, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2, 0, 1,
+			// longer runs (good for phrase growth)
+			2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+			// mixed noise
+			2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 1, 1, 1, 1, 1, 1,
+			// repeating tail (tests dictionary reuse)
+			2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1};
 
 		final int nRows = src.length;
 		final int nUnique = 3;
@@ -267,7 +271,6 @@ public void testConvertToDDCLZWBasic() {
 		//ddc.computeColSums(sumsddc, low, high, );
 	}
 
-
 	@Test
 	public void testGetIdxFirstElement() {
 		int[] src = new int[] {0, 1, 2, 1, 0};
@@ -295,8 +298,8 @@ public void testGetIdxAllElements() {
 		ColGroupDDC ddc = createTestDDC(src, 3, 3);
 		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
 
-		for (int row = 0; row < src.length; row++) {
-			for (int col = 0; col < 2; col++) {
+		for(int row = 0; row < src.length; row++) {
+			for(int col = 0; col < 2; col++) {
 				double expected = ddc.getIdx(row, col);
 				double actual = ddclzw.getIdx(row, col);
 				assertEquals("Mismatch at [" + row + "," + col + "]", expected, actual, 0.0001);
@@ -413,7 +416,7 @@ public void testCreateWithNullDictionary() {
 		IColIndex colIndexes = ColIndexFactory.create(1);
 		int[] src = new int[] {0, 1, 2};
 		AMapToData data = MapToFactory.create(3, 3);
-		for (int i = 0; i < 3; i++) {
+		for(int i = 0; i < 3; i++) {
 			data.set(i, src[i]);
 		}
 
@@ -429,7 +432,7 @@ public void testCreateWithSingleUnique() {
 
 		int[] src = new int[] {0, 0, 0, 0};
 		AMapToData data = MapToFactory.create(4, 1);
-		for (int i = 0; i < 4; i++) {
+		for(int i = 0; i < 4; i++) {
 			data.set(i, 0);
 		}
 
@@ -467,7 +470,7 @@ public void testSameNumber() {
 	@Test
 	public void testAlternatingNumbers() {
 		int[] src = new int[30];
-		for (int i = 0; i < src.length; i++) {
+		for(int i = 0; i < src.length; i++) {
 			src[i] = i % 2;
 		}
 
@@ -493,8 +496,7 @@ public void testSameIndexStructure() {
 		ColGroupDDC ddc = createTestDDC(src, 1, 2);
 		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
 
-		assertTrue("Same object should have same structure",
-				ddclzw.sameIndexStructure(ddclzw));
+		assertTrue("Same object should have same structure", ddclzw.sameIndexStructure(ddclzw));
 	}
 
 	@Test
@@ -508,8 +510,7 @@ public void testSameIndexStructureDifferent() {
 		ColGroupDDCLZW ddclzw2 = (ColGroupDDCLZW) ddc2.convertToDDCLZW();
 
 		// Different objects have different _dataLZW arrays
-		assertFalse("Different objects should have different structure",
-				ddclzw1.sameIndexStructure(ddclzw2));
+		assertFalse("Different objects should have different structure", ddclzw1.sameIndexStructure(ddclzw2));
 	}
 
 	@Test
@@ -518,17 +519,13 @@ public void testSameIndexStructureDdcLzw() {
 		ColGroupDDC ddc = createTestDDC(src, 1, 3);
 		ColGroupDDCLZW ddclzw = (ColGroupDDCLZW) ddc.convertToDDCLZW();
 
-		assertFalse("Different types should not have same structure",
-				ddclzw.sameIndexStructure(ddc));
+		assertFalse("Different types should not have same structure", ddclzw.sameIndexStructure(ddc));
 	}
 
 	@Test
 	public void testRepetitiveData() {
-		int[] src = new int[] {
-				0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-				0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-				0, 0, 0, 0, 0, 1, 1, 1, 1, 1
-		};
+		int[] src = new int[] {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+			1};
 
 		ColGroupDDC ddc = createTestDDC(src, 1, 2);
 		assertLosslessCompression(ddc);
@@ -537,7 +534,7 @@ public void testRepetitiveData() {
 	@Test
 	public void testNoRepetition() {
 		int[] src = new int[20];
-		for (int i = 0; i < src.length; i++) {
+		for(int i = 0; i < src.length; i++) {
 			src[i] = i;
 		}
 
@@ -545,4 +542,60 @@ public void testNoRepetition() {
 		assertLosslessCompression(ddc);
 	}
 
+	public void testDecompressToDenseBlock(double[][] data, boolean isTransposed) {
+		if(isTransposed) {
+			throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented");
+		}
+
+		MatrixBlock mbt = DataConverter.convertToMatrixBlock(data);
+
+		final int numCols = mbt.getNumColumns();
+		final int numRows = mbt.getNumRows();
+		IColIndex colIndexes = ColIndexFactory.create(numCols);
+
+		try {
+			CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0)
+				.setValidCompressions(EnumSet.of(AColGroup.CompressionType.DDCLZW)).setTransposeInput("false");
+			CompressionSettings cs = csb.create();
+
+			final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes);
+			CompressedSizeInfo csi = new CompressedSizeInfo(cgi);
+			AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0);
+
+			MatrixBlock ret = new MatrixBlock(numRows, numCols, false);
+			ret.allocateDenseBlock();
+			cg.decompressToDenseBlock(ret.getDenseBlock(), 0, numRows);
+
+			MatrixBlock expected = DataConverter.convertToMatrixBlock(data);
+			assertArrayEquals(expected.getDenseBlockValues(), ret.getDenseBlockValues(), 0.01);
+
+		}
+		catch(NotImplementedException e) {
+			throw e;
+		}
+		catch(Exception e) {
+			e.printStackTrace();
+			throw new DMLRuntimeException("Failed construction : " + this.getClass().getSimpleName(), e);
+		}
+	}
+
+	@Test
+	public void testDecompressToDenseBlockSingleColumn() {
+		testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}}, false);
+	}
+
+	@Test(expected = NotImplementedException.class)
+	public void testDecompressToDenseBlockSingleColumnTransposed() {
+		testDecompressToDenseBlock(new double[][] {{1}, {2}, {3}, {4}, {5}}, true);
+	}
+
+	@Test
+	public void testDecompressToDenseBlockTwoColumns() {
+		testDecompressToDenseBlock(new double[][] {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}, false);
+	}
+
+	@Test(expected = NotImplementedException.class)
+	public void testDecompressToDenseBlockTwoColumnsTransposed() {
+		testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}, {1, 1, 1, 1, 1}}, true);
+	}
 }

From a8735e1f167cbe718aa12492586816973c8c3d46 Mon Sep 17 00:00:00 2001
From: fjobs <florianjobs@gmx.de>
Date: Mon, 19 Jan 2026 19:06:55 +0100
Subject: [PATCH 24/24] Added various fallbacks to ddc for functions with
 complex access patterns. Added append and appendNInternal, recompress and
 various other functions that needed to be implemented. No tests yet.

---
 .../compress/colgroup/ColGroupDDCLZW.java     | 184 ++++++++++++++++--
 1 file changed, 165 insertions(+), 19 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
index caec3ac4a56..28c16762b35 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCLZW.java
@@ -222,6 +222,11 @@ boolean hasNext() {
 			return mapIndex < _nRows;
 		}
 
+		/*void skip(int k) {
+			for(int i = 0; i < k; i++)
+				next();
+		}*/
+
 		int next() {
 			if(!hasNext())
 				throw new NoSuchElementException();
@@ -684,29 +689,131 @@ public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSa
 		throw new NotImplementedException();
 	}
 
+	public int[] appendDataLZWMap(int[] dataLZW) {
+		int[] newDataLZW = new int[_dataLZW.length + dataLZW.length];
+		System.arraycopy(_dataLZW, 0, newDataLZW, 0, _dataLZW.length);
+		System.arraycopy(dataLZW, 0, newDataLZW, _dataLZW.length, dataLZW.length);
+		return newDataLZW;
+	}
+
 	@Override
 	public AColGroup append(AColGroup g) {
-		throw new NotImplementedException();
+		if(g instanceof ColGroupDDCLZW) {
+			if(g.getColIndices().equals(_colIndexes)) {
+				ColGroupDDCLZW gDDCLZW = (ColGroupDDCLZW) g;
+				if(gDDCLZW._dict.equals(_dict)) {
+					if(_nUnique == gDDCLZW._nUnique) {
+						int[] mergedMap = new int[this._nRows + gDDCLZW._nRows];
+
+						LZWMappingIterator it = new LZWMappingIterator();
+						for(int i = 0; i < this._nRows; i++) {
+							mergedMap[i] = it.next();
+						}
+
+						LZWMappingIterator gLZWit = gDDCLZW.new LZWMappingIterator();
+						for(int i = this._nRows; i < mergedMap.length; i++) {
+							mergedMap[i] = gLZWit.next();
+						}
+
+						AMapToData mergedDataAMap = MapToFactory.create(mergedMap.length, _nUnique);
+						int mergedDataAMapPos = 0;
+
+						for(int j : mergedMap) {
+							mergedDataAMap.set(mergedDataAMapPos++, j);
+						}
+
+						int[] mergedDataAMapCompressed = compress(mergedDataAMap);
+
+						return new ColGroupDDCLZW(_colIndexes, _dict, mergedDataAMapCompressed, mergedMap.length,
+							_nUnique, null);
+					}
+					else
+						LOG.warn("Not same unique values therefore not appending DDCLZW\n" + _nUnique + "\n\n" +
+							gDDCLZW._nUnique);
+				}
+				else
+					LOG.warn("Not same Dictionaries therefore not appending DDCLZW\n" + _dict + "\n\n" + gDDCLZW._dict);
+			}
+			else
+				LOG.warn(
+					"Not same columns therefore not appending DDCLZW\n" + _colIndexes + "\n\n" + g.getColIndices());
+		}
+		else
+			LOG.warn("Not DDCLZW but " + g.getClass().getSimpleName() + ", therefore not appending DDCLZW");
+		return null;
 	}
 
+	// TODO: adjust according to contract, "this shall only be appended once".
 	@Override
-	protected AColGroup appendNInternal(AColGroup[] groups, int blen, int rlen) {
-		throw new NotImplementedException();
+	protected AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) {
+		/*throw new NotImplementedException();*/
+		int[] mergedMap = new int[rlen];
+		int mergedMapPos = 0;
+
+		for(int i = 1; i < g.length; i++) {
+			if(!_colIndexes.equals(g[i]._colIndexes)) {
+				LOG.warn("Not same columns therefore not appending DDCLZW\n" + _colIndexes + "\n\n" + g[i]._colIndexes);
+				return null;
+			}
+
+			if(!(g[i] instanceof ColGroupDDCLZW)) {
+				LOG.warn("Not DDCLZW but " + g[i].getClass().getSimpleName() + ", therefore not appending DDCLZW");
+				return null;
+			}
+
+			final ColGroupDDCLZW gDDCLZW = (ColGroupDDCLZW) g[i];
+			if(!gDDCLZW._dict.equals(_dict)) {
+				LOG.warn("Not same Dictionaries therefore not appending DDCLZW\n" + _dict + "\n\n" + gDDCLZW._dict);
+				return null;
+			}
+			if(!(_nUnique == gDDCLZW._nUnique)) {
+				LOG.warn(
+					"Not same unique values therefore not appending DDCLZW\n" + _nUnique + "\n\n" + gDDCLZW._nUnique);
+				return null;
+			}
+		}
+
+		for(AColGroup group : g) {
+			ColGroupDDCLZW gDDCLZW = (ColGroupDDCLZW) group;
+
+			LZWMappingIterator gLZWit = gDDCLZW.new LZWMappingIterator();
+			for(int j = 0; j < gDDCLZW._nRows; j++)
+				mergedMap[mergedMapPos++] = gLZWit.next();
+		}
+
+		AMapToData mergedDataAMap = MapToFactory.create(rlen, _nUnique);
+		int mergedDataAMapPos = 0;
+
+		for(int k = 0; k < rlen; k++) {
+			mergedDataAMap.set(k, mergedMap[k]);
+		}
+
+		int[] mergedDataAMapCompressed = compress(mergedDataAMap);
+
+		return new ColGroupDDCLZW(_colIndexes, _dict, mergedDataAMapCompressed, rlen, _nUnique, null);
 	}
 
 	@Override
 	public AColGroup recompress() {
-		throw new NotImplementedException();
+		return this; // A new or the same column group depending on optimization goal. (Description DDC)
 	}
 
 	@Override
 	public CompressedSizeInfoColGroup getCompressionInfo(int nRow) {
-		throw new NotImplementedException();
+		try {
+			IEncode enc = getEncoding();
+			EstimationFactors ef = new EstimationFactors(_nUnique, _nRows, _nRows, _dict.getSparsity());
+			return new CompressedSizeInfoColGroup(_colIndexes, ef, estimateInMemorySize(), getCompType(), enc);
+		}
+		catch(Exception e) {
+			throw new DMLCompressionException(this.toString(), e);
+		}
 	}
 
 	@Override
 	protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) {
-		throw new NotImplementedException();
+		return new ColGroupDDCLZW(newColIndex, _dict.reorder(reordering), _dataLZW, _nRows, _nUnique,
+			getCachedCounts());
 	}
 
 	@Override
@@ -716,14 +823,16 @@ protected void sparseSelection(MatrixBlock selection, P[] points, MatrixBlock re
 
 	@Override
 	protected void denseSelection(MatrixBlock selection, P[] points, MatrixBlock ret, int rl, int ru) {
-		throw new NotImplementedException();
+		throw new NotImplementedException(); // We need to implement decompToDenseBlock first!
 	}
 
 	@Override
 	public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
-		throw new NotImplementedException();
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		return g.splitReshape(multiplier, nRow, nColOrg); // Fallback to ddc. No splitReshapeDDCLZW implemented.
 	}
 
+	// Not sure here.
 	@Override
 	protected boolean allowShallowIdentityRightMult() {
 		throw new NotImplementedException();
@@ -731,42 +840,54 @@ protected boolean allowShallowIdentityRightMult() {
 
 	@Override
 	protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, IDictionary preAgg) {
-		throw new NotImplementedException();
+		if(preAgg == null)
+			return null;
+		else
+			return new ColGroupDDCLZW(colIndexes, preAgg, _dataLZW, _nRows, _nUnique, getCachedCounts());
 	}
 
 	@Override
 	public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) {
-		throw new NotImplementedException("Preaggregation not supported for DDCLZW.");
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		g.preAggregateDense(m, preAgg, rl, ru, cl, cu); // Fallback to ddc.
 	}
 
 	@Override
 	public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) {
-		throw new NotImplementedException();
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		g.preAggregateSparse(sb, preAgg, rl, ru, cl, cu); // Fallback to ddc.
 	}
 
 	@Override
 	protected void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) {
-		throw new NotImplementedException();
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		g.preAggregateThatDDCStructure(that, ret); // Fallback to ddc.
 	}
 
 	@Override
 	protected void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) {
-		throw new NotImplementedException();
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		g.preAggregateThatSDCZerosStructure(that, ret); // Fallback to ddc.
 	}
 
 	@Override
 	protected void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) {
-		throw new NotImplementedException();
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		g.preAggregateThatSDCSingleZerosStructure(that, ret); // Fallback to ddc.
+
 	}
 
 	@Override
 	protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) {
-		throw new NotImplementedException();
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		g.preAggregateThatRLEStructure(that, ret); // Fallback to ddc.
+
 	}
 
 	@Override
 	public void leftMMIdentityPreAggregateDense(MatrixBlock that, MatrixBlock ret, int rl, int ru, int cl, int cu) {
-		throw new NotImplementedException();
+		ColGroupDDC g = (ColGroupDDC) convertToDDC();
+		g.leftMMIdentityPreAggregateDense(that, ret, rl, ru, cl, cu); // Fallback to ddc.
 	}
 
 	@Override
@@ -775,23 +896,48 @@ protected int[] getCounts(int[] out) {
 		return data.getCounts();
 	}
 
+	@Override
 	protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
+		final LZWMappingIterator it = new LZWMappingIterator();
+		for(int i = 0; i < rl; i++)
+			it.next();
+
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] += preAgg[it.next()];
+	}
+
+	/*protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) {
 		AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
 		for(int rix = rl; rix < ru; rix++)
 			c[rix] += preAgg[data.getIndex(rix)];
-	}
+	}*/
 
 	@Override
 	protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) {
-		throw new NotImplementedException();
+		final LZWMappingIterator it = new LZWMappingIterator();
+		for(int i = 0; i < rl; i++)
+			it.next();
+
+		for(int i = rl; i < ru; i++)
+			c[i] = builtin.execute(c[i], preAgg[it.next()]);
 	}
 
-	@Override
+	/*@Override
 	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
 		AMapToData data = decompress(_dataLZW, _nUnique, _nRows, ru);
 		for(int rix = rl; rix < ru; rix++)
 			c[rix] *= preAgg[data.getIndex(rix)];
 
+	}*/
+
+	@Override
+	protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) {
+		final LZWMappingIterator it = new LZWMappingIterator();
+		for(int i = 0; i < rl; i++)
+			it.next();
+
+		for(int rix = rl; rix < ru; rix++)
+			c[rix] *= preAgg[it.next()];
 	}
 }