From c60fdeba37f1e9a93c15aefbda082783819dec49 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Tue, 31 Mar 2026 00:30:14 +0800 Subject: [PATCH 1/3] add gep op & cmd --- lib/cmd_type.py | 6 +++++- lib/opt_type.py | 12 +++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/cmd_type.py b/lib/cmd_type.py index cc073a20..a24078d7 100644 --- a/lib/cmd_type.py +++ b/lib/cmd_type.py @@ -14,7 +14,7 @@ # Total number of commands that are supported/recognized by controller. # Needs to be updated once more commands are added/supported. -NUM_CMDS = 43 +NUM_CMDS = 44 CMD_LAUNCH = 0 CMD_PAUSE = 1 @@ -66,6 +66,9 @@ # Loop Controller Status. CMD_LC_ALL_COMPLETE = 42 # LC -> Controller: all outer loops complete +# GEP FU Configuration Commands. +CMD_CONFIG_GEP_STRIDE = 43 # Controller -> GEP FU: Configures stride for 2D GEP + CMD_SYMBOL_DICT = { CMD_LAUNCH: "(LAUNCH_KERNEL)", CMD_PAUSE: "(PAUSE_EXECUTION)", @@ -110,5 +113,6 @@ CMD_LC_CHILD_COMPLETE: "(LC_CHILD_COMPLETE)", CMD_LC_CHILD_RESET: "(LC_CHILD_RESET)", CMD_LC_ALL_COMPLETE: "(LC_ALL_COMPLETE)", + CMD_CONFIG_GEP_STRIDE: "(CONFIG_GEP_STRIDE)", } diff --git a/lib/opt_type.py b/lib/opt_type.py index 70256d0b..2cd37c30 100644 --- a/lib/opt_type.py +++ b/lib/opt_type.py @@ -111,6 +111,11 @@ OPT_LOOP_DELIVERY = OpCodeType( 86 ) OPT_EXTRACT_PREDICATE = OpCodeType( 87 ) +OPT_GEP = OpCodeType( 88 ) +OPT_GEP_CONST = OpCodeType( 89 ) +OPT_GEP_2D = OpCodeType( 90 ) +OPT_GEP_2D_CONST = OpCodeType( 91 ) + OPT_SYMBOL_DICT = { OPT_START : "(start)", OPT_NAH : "(NAH)", @@ -202,5 +207,10 @@ OPT_LOOP_COUNT : "(loop_cnt)", OPT_LOOP_DELIVERY : "(loop_deli)", - OPT_EXTRACT_PREDICATE : "(extract_pred)" + OPT_EXTRACT_PREDICATE : "(extract_pred)", + + OPT_GEP : "(gep)", + OPT_GEP_CONST : "(gep')", + OPT_GEP_2D : "(gep2d)", + OPT_GEP_2D_CONST : "(gep2d')", } From e73bc2f0d420d8e68e8f0be483b50c063a6a0852 Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Tue, 31 Mar 2026 00:47:13 +0800 Subject: [PATCH 2/3] add Gep FU & test --- fu/single/GepRTL.py | 166 ++++++++++++++++++++ fu/single/test/GepRTL_test.py | 274 ++++++++++++++++++++++++++++++++++ 2 files changed, 440 insertions(+) create mode 100644 fu/single/GepRTL.py create mode 100644 fu/single/test/GepRTL_test.py diff --git a/fu/single/GepRTL.py b/fu/single/GepRTL.py new file mode 100644 index 00000000..204dbeb5 --- /dev/null +++ b/fu/single/GepRTL.py @@ -0,0 +1,166 @@ +""" +========================================================================== +GepRTL.py +========================================================================== +GetElementPtr (GEP) functional unit for CGRA tile. + +Supports 1D and 2D address generation: + - OPT_GEP: result = base(in0) + index(in1) + - OPT_GEP_CONST: result = base(const) + index(in0) + - OPT_GEP_2D: result = base(in0) + index0(in1) * stride + index1(in2) + - OPT_GEP_2D_CONST: result = base(const) + index0(in0) * stride + index1(in1) + +For 2D operations, the stride is pre-configured via CMD_CONFIG_GEP_STRIDE +through the recv_from_ctrl_mem interface before execution begins. + +Author : Shangkun Li + Date : March 31, 2026 +""" + +from pymtl3 import * +from ..basic.Fu import Fu +from ...lib.opt_type import * +from ...lib.cmd_type import * + +class GepRTL(Fu): + + def construct(s, CtrlPktType, num_inports, num_outports, vector_factor_power = 0): + + super(GepRTL, s).construct(CtrlPktType, num_inports, num_outports, 1, vector_factor_power) + + num_entries = 2 + FuInType = mk_bits(clog2(num_inports + 1)) + CountType = mk_bits(clog2(num_entries + 1)) + + s.in0 = Wire(FuInType) + s.in1 = Wire(FuInType) + s.in2 = Wire(FuInType) + + idx_nbits = clog2(num_inports) + s.in0_idx = Wire(idx_nbits) + s.in1_idx = Wire(idx_nbits) + s.in2_idx = Wire(idx_nbits) + + s.in0_idx //= s.in0[0:idx_nbits] + s.in1_idx //= s.in1[0:idx_nbits] + s.in2_idx //= s.in2[0:idx_nbits] + + s.recv_all_val = Wire(1) + + # Stride register for 2D GEP, configured via CMD_CONFIG_GEP_STRIDE. + s.stride = Wire(s.DataType) + + @update + def comb_logic(): + + s.recv_all_val @= 0 + # For pick input register + s.in0 @= 0 + s.in1 @= 0 + s.in2 @= 0 + for i in range(num_inports): + s.recv_in[i].rdy @= b1(0) + for i in range(num_outports): + s.send_out[i].val @= 0 + s.send_out[i].msg @= s.DataType() + + s.recv_const.rdy @= 0 + s.recv_opt.rdy @= 0 + + s.send_to_ctrl_mem.val @= 0 + s.send_to_ctrl_mem.msg @= s.CgraPayloadType(0, 0, 0, 0, 0) + s.recv_from_ctrl_mem.rdy @= 0 + + # Handle CMD configuration from ctrl_mem. + if s.recv_from_ctrl_mem.val: + s.recv_from_ctrl_mem.rdy @= b1(1) + + if s.recv_opt.val: + if s.recv_opt.msg.fu_in[0] != 0: + s.in0 @= zext(s.recv_opt.msg.fu_in[0] - 1, FuInType) + if s.recv_opt.msg.fu_in[1] != 0: + s.in1 @= zext(s.recv_opt.msg.fu_in[1] - 1, FuInType) + if s.recv_opt.msg.fu_in[2] != 0: + s.in2 @= zext(s.recv_opt.msg.fu_in[2] - 1, FuInType) + + if s.recv_opt.val: + + # ===== OPT_GEP: 1D GEP with two input operands ===== + # result = base(in0) + index(in1) + if s.recv_opt.msg.operation == OPT_GEP: + s.send_out[0].msg.payload @= s.recv_in[s.in0_idx].msg.payload + \ + s.recv_in[s.in1_idx].msg.payload + s.send_out[0].msg.predicate @= s.recv_in[s.in0_idx].msg.predicate & \ + s.recv_in[s.in1_idx].msg.predicate & \ + s.reached_vector_factor + s.recv_all_val @= s.recv_in[s.in0_idx].val & s.recv_in[s.in1_idx].val + s.send_out[0].val @= s.recv_all_val + s.recv_in[s.in0_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_in[s.in1_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_opt.rdy @= s.recv_all_val & s.send_out[0].rdy + + # ===== OPT_GEP_CONST: 1D GEP with const base ===== + # result = base(const) + index(in0) + elif s.recv_opt.msg.operation == OPT_GEP_CONST: + s.send_out[0].msg.payload @= s.recv_const.msg.payload + \ + s.recv_in[s.in0_idx].msg.payload + s.send_out[0].msg.predicate @= s.recv_in[s.in0_idx].msg.predicate & \ + s.reached_vector_factor + s.recv_all_val @= s.recv_in[s.in0_idx].val & s.recv_const.val + s.send_out[0].val @= s.recv_all_val + s.recv_in[s.in0_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_const.rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_opt.rdy @= s.recv_all_val & s.send_out[0].rdy + + # ===== OPT_GEP_2D: 2D GEP with three input operands ===== + # result = base(in0) + index0(in1) * stride + index1(in2) + elif s.recv_opt.msg.operation == OPT_GEP_2D: + s.send_out[0].msg.payload @= s.recv_in[s.in0_idx].msg.payload + \ + s.recv_in[s.in1_idx].msg.payload * s.stride.payload + \ + s.recv_in[s.in2_idx].msg.payload + s.send_out[0].msg.predicate @= s.recv_in[s.in0_idx].msg.predicate & \ + s.recv_in[s.in1_idx].msg.predicate & \ + s.recv_in[s.in2_idx].msg.predicate & \ + s.reached_vector_factor + s.recv_all_val @= s.recv_in[s.in0_idx].val & \ + s.recv_in[s.in1_idx].val & \ + s.recv_in[s.in2_idx].val + s.send_out[0].val @= s.recv_all_val + s.recv_in[s.in0_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_in[s.in1_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_in[s.in2_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_opt.rdy @= s.recv_all_val & s.send_out[0].rdy + + # ===== OPT_GEP_2D_CONST: 2D GEP with const base ===== + # result = base(const) + index0(in0) * stride + index1(in1) + elif s.recv_opt.msg.operation == OPT_GEP_2D_CONST: + s.send_out[0].msg.payload @= s.recv_const.msg.payload + \ + s.recv_in[s.in0_idx].msg.payload * s.stride.payload + \ + s.recv_in[s.in1_idx].msg.payload + s.send_out[0].msg.predicate @= s.recv_in[s.in0_idx].msg.predicate & \ + s.recv_in[s.in1_idx].msg.predicate & \ + s.reached_vector_factor + s.recv_all_val @= s.recv_in[s.in0_idx].val & \ + s.recv_in[s.in1_idx].val & \ + s.recv_const.val + s.send_out[0].val @= s.recv_all_val + s.recv_in[s.in0_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_in[s.in1_idx].rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_const.rdy @= s.recv_all_val & s.send_out[0].rdy + s.recv_opt.rdy @= s.recv_all_val & s.send_out[0].rdy + + else: + for j in range(num_outports): + s.send_out[j].val @= b1(0) + s.recv_opt.rdy @= 0 + s.recv_in[s.in0_idx].rdy @= 0 + s.recv_in[s.in1_idx].rdy @= 0 + + @update_ff + def update_stride(): + if s.reset: + s.stride <<= s.DataType(0, 0) + else: + if s.recv_from_ctrl_mem.val & \ + (s.recv_from_ctrl_mem.msg.cmd == CMD_CONFIG_GEP_STRIDE): + s.stride <<= s.recv_from_ctrl_mem.msg.data diff --git a/fu/single/test/GepRTL_test.py b/fu/single/test/GepRTL_test.py new file mode 100644 index 00000000..7b0704c3 --- /dev/null +++ b/fu/single/test/GepRTL_test.py @@ -0,0 +1,274 @@ +""" +========================================================================== +GepRTL_test.py +========================================================================== +Test cases for GEP (GetElementPtr) functional unit. + +Author : Shangkun Li + Date : March 31, 2026 +""" + +import pytest +from itertools import product +from pymtl3 import * +from ..GepRTL import GepRTL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.opt_type import * +from ....lib.cmd_type import * +from ....lib.messages import * +from ....mem.const.ConstQueueRTL import ConstQueueRTL + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, FunctionUnit, IntraCgraPktType, DataType, ConfigType, + CgraPayloadType, + num_inports, num_outports, data_mem_size, + src0_msgs, src1_msgs, src2_msgs, + src_const, ctrl_msgs, src_from_ctrl, + sink_msgs): + + s.src_in0 = TestSrcRTL(DataType, src0_msgs) + s.src_in1 = TestSrcRTL(DataType, src1_msgs) + s.src_in2 = TestSrcRTL(DataType, src2_msgs) + s.src_opt = TestSrcRTL(ConfigType, ctrl_msgs) + s.src_from_ctrl = TestSrcRTL(CgraPayloadType, src_from_ctrl) + s.sink_out = TestSinkRTL(DataType, sink_msgs) + + s.const_queue = ConstQueueRTL(DataType, src_const) + s.dut = FunctionUnit(IntraCgraPktType, num_inports, num_outports) + + connect(s.src_in0.send, s.dut.recv_in[0]) + connect(s.src_in1.send, s.dut.recv_in[1]) + connect(s.src_in2.send, s.dut.recv_in[2]) + connect(s.dut.recv_const, s.const_queue.send_const) + connect(s.src_opt.send, s.dut.recv_opt) + connect(s.src_from_ctrl.send, s.dut.recv_from_ctrl_mem) + connect(s.dut.send_out[0], s.sink_out.recv) + + def done(s): + return s.src_in0.done() and s.src_in1.done() and \ + s.src_opt.done() and s.sink_out.done() + + def line_trace(s): + return s.dut.line_trace() + +def run_sim(test_harness, max_cycles = 40): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + ncycles = 0 + print() + print("{}:{}".format(ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +#------------------------------------------------------------------------- +# Helper to build common types +#------------------------------------------------------------------------- + +def make_types(data_nbits=32, num_inports=4, num_outports=1, + data_mem_size=8, ctrl_mem_size=8): + DataType = mk_data(data_nbits, 1) + ConfigType = mk_ctrl(num_inports, num_outports) + FuInType = mk_bits(clog2(num_inports + 1)) + DataAddrType = mk_bits(clog2(data_mem_size)) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + CgraPayloadType = mk_cgra_payload(DataType, DataAddrType, ConfigType, CtrlAddrType) + IntraCgraPktType = mk_intra_cgra_pkt(1, 1, 1, CgraPayloadType) + return DataType, ConfigType, FuInType, CgraPayloadType, IntraCgraPktType + +#------------------------------------------------------------------------- +# Test cases: 1D GEP +#------------------------------------------------------------------------- + +@pytest.mark.parametrize( + 'base, index', + product(range(0, 100, 40), range(0, 20, 4)) +) +def test_gep_1d(base, index): + """OPT_GEP: result = base(in0) + index(in1)""" + num_inports = 4 + num_outports = 1 + data_mem_size = 8 + DataType, ConfigType, FuInType, CgraPayloadType, IntraCgraPktType = \ + make_types(num_inports=num_inports, num_outports=num_outports) + + src_in0 = [DataType(base, 1)] + src_in1 = [DataType(index, 1)] + src_in2 = [DataType(0, 0)] + src_const = [DataType(0, 1)] + sink_out = [DataType(base + index, 1)] + # fu_in[0]=1 -> in0(base), fu_in[1]=2 -> in1(index) + src_opt = [ConfigType(OPT_GEP, + [FuInType(1), FuInType(2), FuInType(0), FuInType(0)])] + src_from_ctrl = [] + + th = TestHarness(GepRTL, IntraCgraPktType, DataType, ConfigType, + CgraPayloadType, + num_inports, num_outports, data_mem_size, + src_in0, src_in1, src_in2, src_const, src_opt, + src_from_ctrl, sink_out) + run_sim(th) + + +def test_gep_1d_const(): + """OPT_GEP_CONST: result = base(const) + index(in0)""" + num_inports = 4 + num_outports = 1 + data_mem_size = 8 + DataType, ConfigType, FuInType, CgraPayloadType, IntraCgraPktType = \ + make_types(num_inports=num_inports, num_outports=num_outports) + + base_addr = 1000 + index_vals = [4, 8, 12] + src_in0 = [DataType(v, 1) for v in index_vals] + src_in1 = [] + src_in2 = [] + src_const = [DataType(base_addr, 1)] * len(index_vals) + sink_out = [DataType(base_addr + v, 1) for v in index_vals] + src_opt = [ConfigType(OPT_GEP_CONST, + [FuInType(1), FuInType(0), FuInType(0), FuInType(0)])] * len(index_vals) + src_from_ctrl = [] + + th = TestHarness(GepRTL, IntraCgraPktType, DataType, ConfigType, + CgraPayloadType, + num_inports, num_outports, data_mem_size, + src_in0, src_in1, src_in2, src_const, src_opt, + src_from_ctrl, sink_out) + run_sim(th) + + +#------------------------------------------------------------------------- +# Test cases: 2D GEP +#------------------------------------------------------------------------- + +def test_gep_2d(): + """OPT_GEP_2D: result = base(in0) + index0(in1) * stride + index1(in2) + Simulates A[i][j] where A is int[N][10], element_size=4. + stride = 10 * 4 = 40. + """ + num_inports = 4 + num_outports = 1 + data_mem_size = 8 + DataType, ConfigType, FuInType, CgraPayloadType, IntraCgraPktType = \ + make_types(num_inports=num_inports, num_outports=num_outports) + + base_addr = 2000 + stride = 40 # 10 elements * 4 bytes + + # Test A[0][0], A[1][3], A[2][5] + test_cases = [ + (0, 0), # offset = 0*40 + 0 = 0 + (1, 12), # offset = 1*40 + 12 = 52 + (2, 20), # offset = 2*40 + 20 = 100 + ] + + src_in0 = [DataType(base_addr, 1)] * len(test_cases) + src_in1 = [DataType(i, 1) for i, j in test_cases] + src_in2 = [DataType(j, 1) for i, j in test_cases] + src_const = [DataType(0, 1)] + sink_out = [DataType(base_addr + i * stride + j, 1) for i, j in test_cases] + # fu_in[0]=1 -> in0(base), fu_in[1]=2 -> in1(index0), fu_in[2]=3 -> in2(index1) + src_opt = [ConfigType(OPT_GEP_2D, + [FuInType(1), FuInType(2), FuInType(3), FuInType(0)])] * len(test_cases) + # Pre-configure stride via CMD before execution. + src_from_ctrl = [ + CgraPayloadType(CMD_CONFIG_GEP_STRIDE, DataType(stride, 1), 0, ConfigType(0), 0), + ] + + th = TestHarness(GepRTL, IntraCgraPktType, DataType, ConfigType, + CgraPayloadType, + num_inports, num_outports, data_mem_size, + src_in0, src_in1, src_in2, src_const, src_opt, + src_from_ctrl, sink_out) + run_sim(th) + + +def test_gep_2d_const(): + """OPT_GEP_2D_CONST: result = base(const) + index0(in0) * stride + index1(in1) + Simulates A[i][j] where A is int[N][8], element_size=4. + stride = 8 * 4 = 32, base from const_queue. + """ + num_inports = 4 + num_outports = 1 + data_mem_size = 8 + DataType, ConfigType, FuInType, CgraPayloadType, IntraCgraPktType = \ + make_types(num_inports=num_inports, num_outports=num_outports) + + base_addr = 4000 + stride = 32 # 8 elements * 4 bytes + + # Test A[0][0], A[1][2], A[3][4] + test_cases = [ + (0, 0), # offset = 0*32 + 0 = 0 + (1, 8), # offset = 1*32 + 8 = 40 + (3, 16), # offset = 3*32 + 16 = 112 + ] + + src_in0 = [DataType(i, 1) for i, j in test_cases] + src_in1 = [DataType(j, 1) for i, j in test_cases] + src_in2 = [DataType(0, 0)] * len(test_cases) + src_const = [DataType(base_addr, 1)] * len(test_cases) + sink_out = [DataType(base_addr + i * stride + j, 1) for i, j in test_cases] + # fu_in[0]=1 -> in0(index0), fu_in[1]=2 -> in1(index1) + src_opt = [ConfigType(OPT_GEP_2D_CONST, + [FuInType(1), FuInType(2), FuInType(0), FuInType(0)])] * len(test_cases) + # Pre-configure stride via CMD. + src_from_ctrl = [ + CgraPayloadType(CMD_CONFIG_GEP_STRIDE, DataType(stride, 1), 0, ConfigType(0), 0), + ] + + th = TestHarness(GepRTL, IntraCgraPktType, DataType, ConfigType, + CgraPayloadType, + num_inports, num_outports, data_mem_size, + src_in0, src_in1, src_in2, src_const, src_opt, + src_from_ctrl, sink_out) + run_sim(th) + + +#------------------------------------------------------------------------- +# Test predicate propagation +#------------------------------------------------------------------------- + +def test_gep_predicate(): + """Test that predicates propagate correctly through GEP.""" + num_inports = 4 + num_outports = 1 + data_mem_size = 8 + DataType, ConfigType, FuInType, CgraPayloadType, IntraCgraPktType = \ + make_types(num_inports=num_inports, num_outports=num_outports) + + # Predicate=0 on one input should result in predicate=0 output. + src_in0 = [DataType(100, 1), DataType(200, 0)] + src_in1 = [DataType(10, 0), DataType(20, 1)] + src_in2 = [DataType(0, 0), DataType(0, 0)] + src_const = [DataType(0, 1)] + sink_out = [DataType(110, 0), DataType(220, 0)] + src_opt = [ + ConfigType(OPT_GEP, [FuInType(1), FuInType(2), FuInType(0), FuInType(0)]), + ConfigType(OPT_GEP, [FuInType(1), FuInType(2), FuInType(0), FuInType(0)]), + ] + src_from_ctrl = [] + + th = TestHarness(GepRTL, IntraCgraPktType, DataType, ConfigType, + CgraPayloadType, + num_inports, num_outports, data_mem_size, + src_in0, src_in1, src_in2, src_const, src_opt, + src_from_ctrl, sink_out) + run_sim(th) From a17dee8fb24c82f68cf98b7056be018a954fe4af Mon Sep 17 00:00:00 2001 From: ShangkunLI Date: Tue, 31 Mar 2026 01:02:26 +0800 Subject: [PATCH 3/3] update test for element-based address --- fu/single/test/GepRTL_test.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/fu/single/test/GepRTL_test.py b/fu/single/test/GepRTL_test.py index 7b0704c3..11a9d41b 100644 --- a/fu/single/test/GepRTL_test.py +++ b/fu/single/test/GepRTL_test.py @@ -160,8 +160,8 @@ def test_gep_1d_const(): def test_gep_2d(): """OPT_GEP_2D: result = base(in0) + index0(in1) * stride + index1(in2) - Simulates A[i][j] where A is int[N][10], element_size=4. - stride = 10 * 4 = 40. + Simulates A[i][j] where A has 10 elements per row. + Memory is element-addressed for now, so stride = 10 (elements, not bytes). """ num_inports = 4 num_outports = 1 @@ -170,13 +170,13 @@ def test_gep_2d(): make_types(num_inports=num_inports, num_outports=num_outports) base_addr = 2000 - stride = 40 # 10 elements * 4 bytes + stride = 10 # 10 elements per row (element-granular addressing) - # Test A[0][0], A[1][3], A[2][5] + # Test A[0][0], A[1][3], A[2][7] test_cases = [ - (0, 0), # offset = 0*40 + 0 = 0 - (1, 12), # offset = 1*40 + 12 = 52 - (2, 20), # offset = 2*40 + 20 = 100 + (0, 0), # offset = 0*10 + 0 = 0 + (1, 3), # offset = 1*10 + 3 = 13 + (2, 7), # offset = 2*10 + 7 = 27 ] src_in0 = [DataType(base_addr, 1)] * len(test_cases) @@ -202,8 +202,9 @@ def test_gep_2d(): def test_gep_2d_const(): """OPT_GEP_2D_CONST: result = base(const) + index0(in0) * stride + index1(in1) - Simulates A[i][j] where A is int[N][8], element_size=4. - stride = 8 * 4 = 32, base from const_queue. + Simulates A[i][j] where A has 8 elements per row. + Memory is element-addressed for now, so stride = 8 (elements, not bytes). + Base address comes from const_queue. """ num_inports = 4 num_outports = 1 @@ -212,13 +213,13 @@ def test_gep_2d_const(): make_types(num_inports=num_inports, num_outports=num_outports) base_addr = 4000 - stride = 32 # 8 elements * 4 bytes + stride = 8 # 8 elements per row (element-granular addressing) - # Test A[0][0], A[1][2], A[3][4] + # Test A[0][0], A[1][2], A[3][5] test_cases = [ - (0, 0), # offset = 0*32 + 0 = 0 - (1, 8), # offset = 1*32 + 8 = 40 - (3, 16), # offset = 3*32 + 16 = 112 + (0, 0), # offset = 0*8 + 0 = 0 + (1, 2), # offset = 1*8 + 2 = 10 + (3, 5), # offset = 3*8 + 5 = 29 ] src_in0 = [DataType(i, 1) for i, j in test_cases]