diff --git a/fu/single/PhiRTL.py b/fu/single/PhiRTL.py index 0371ece8..64bd394b 100644 --- a/fu/single/PhiRTL.py +++ b/fu/single/PhiRTL.py @@ -2,9 +2,7 @@ ========================================================================== PhiRTL.py ========================================================================== -Functional unit Phi for CGRA tile. Note that only one phi_const or -phi_start can be mapped onto the same tile, as we only have one 'first' -bit to record whether it is the first execution. +Functional unit Phi for CGRA tile. Author : Cheng Tan Date : November 30, 2019 @@ -24,7 +22,8 @@ def construct(s, CtrlPktType, num_inports, num_outports, vector_factor_power = 0 num_entries = 2 FuInType = mk_bits(clog2(num_inports + 1)) CountType = mk_bits(clog2(num_entries + 1)) - s.first = Wire(b1) + # Supports multiple PHI_CONST and PHI_START mapped on the same tile. + s.first = [Wire(b1) for _ in range(2 ** s.CtrlAddrType.nbits)] s.in0 = Wire(FuInType) s.in1 = Wire(FuInType) @@ -83,7 +82,7 @@ def comb_logic(): s.recv_opt.rdy @= s.recv_all_val & s.send_out[0].rdy elif s.recv_opt.msg.operation == OPT_PHI_START: - if s.first: + if s.first[s.ctrl_addr_inport]: s.send_out[0].msg.payload @= s.recv_in[s.in0_idx].msg.payload s.send_out[0].msg.predicate @= s.reached_vector_factor elif s.recv_in[s.in0_idx].msg.predicate == Bits1(1): @@ -95,27 +94,27 @@ def comb_logic(): else: # No predecessor is active. s.send_out[0].msg.payload @= s.recv_in[s.in0_idx].msg.payload s.send_out[0].msg.predicate @= 0 - s.recv_all_val @= ((s.first & s.recv_in[s.in0_idx].val) | \ - (~s.first & s.recv_in[s.in0_idx].val & s.recv_in[s.in1_idx].val)) + s.recv_all_val @= ((s.first[s.ctrl_addr_inport] & s.recv_in[s.in0_idx].val) | \ + (~s.first[s.ctrl_addr_inport] & s.recv_in[s.in0_idx].val & s.recv_in[s.in1_idx].val)) s.send_out[0].val @= s.recv_all_val s.recv_in[s.in0_idx].rdy @= s.recv_all_val & s.send_out[0].rdy - s.recv_in[s.in1_idx].rdy @= ~s.first & s.recv_all_val & s.send_out[0].rdy + s.recv_in[s.in1_idx].rdy @= ~s.first[s.ctrl_addr_inport] & s.recv_all_val & s.send_out[0].rdy s.recv_opt.rdy @= s.recv_all_val & s.send_out[0].rdy elif s.recv_opt.msg.operation == OPT_PHI_CONST: - if s.first: + if s.first[s.ctrl_addr_inport]: s.send_out[0].msg.payload @= s.recv_const.msg.payload else: s.send_out[0].msg.payload @= s.recv_in[s.in0_idx].msg.payload - s.recv_all_val @= ((s.first & s.recv_const.val) | \ - (~s.first & s.recv_in[s.in0_idx].val)) + s.recv_all_val @= ((s.first[s.ctrl_addr_inport] & s.recv_const.val) | \ + (~s.first[s.ctrl_addr_inport] & s.recv_in[s.in0_idx].val)) s.send_out[0].val @= s.recv_all_val s.recv_in[s.in0_idx].rdy @= s.recv_all_val & s.send_out[0].rdy s.recv_const.rdy @= s.recv_all_val & s.send_out[0].rdy s.recv_opt.rdy @= s.recv_all_val & s.send_out[0].rdy - if s.first: + if s.first[s.ctrl_addr_inport]: s.send_out[0].msg.predicate @= s.recv_const.msg.predicate & \ s.reached_vector_factor else: @@ -129,14 +128,14 @@ def comb_logic(): s.recv_in[s.in0_idx].rdy @= 0 s.recv_in[s.in1_idx].rdy @= 0 - # branch_start could be the entry of a function, which is executed by - # only once. + # PHI_CONST and PHI_START have different behavior when exeucting for the first time. @update_ff - def br_start_once(): + def record_first_execution(): if s.reset | s.clear: - s.first <<= b1(1) + for i in range (2 ** s.CtrlAddrType.nbits): + s.first[i] <<= b1(1) if ((s.recv_opt.msg.operation == OPT_PHI_CONST) | (s.recv_opt.msg.operation == OPT_PHI_START)) & s.reached_vector_factor: - s.first <<= b1(0) + s.first[s.ctrl_addr_inport] <<= b1(0) def line_trace(s): opt_str = " #" @@ -144,5 +143,6 @@ def line_trace(s): opt_str = OPT_SYMBOL_DICT[s.recv_opt.msg.operation] out_str = ",".join([str(x.msg) for x in s.send_out]) recv_str = ",".join([str(x.msg) for x in s.recv_in]) - return f'[recv: {recv_str}] {opt_str} (const_reg: {s.recv_const.msg}) ] = [out: {out_str}] (s.recv_opt.rdy: {s.recv_opt.rdy}, {OPT_SYMBOL_DICT[s.recv_opt.msg.operation]}, send[0].val: {s.send_out[0].val}) reached_vector_factor: {s.reached_vector_factor}; vector_factor_counter: {s.vector_factor_counter}' + first_str = ",".join([str(x) for x in s.first]) + return f'[recv: {recv_str}] {opt_str} (const_reg: {s.recv_const.msg}) (first: {first_str})] = [out: {out_str}] (s.recv_opt.rdy: {s.recv_opt.rdy}, {OPT_SYMBOL_DICT[s.recv_opt.msg.operation]}, send[0].val: {s.send_out[0].val}) reached_vector_factor: {s.reached_vector_factor}; vector_factor_counter: {s.vector_factor_counter}; ctrl_addr_inport: {s.ctrl_addr_inport}' diff --git a/fu/single/test/PhiRTL_test.py b/fu/single/test/PhiRTL_test.py index 04b7f8b0..29797de8 100644 --- a/fu/single/test/PhiRTL_test.py +++ b/fu/single/test/PhiRTL_test.py @@ -23,13 +23,16 @@ class TestHarness(Component): def construct(s, FunctionUnit, IntraCgraPktType, DataType, CtrlType, num_inports, num_outports, data_mem_size, src0_msgs, - src1_msgs, src_const, src_opt, sink_msgs): + src1_msgs, src_const, src_opt, sink_msgs, src_ctrl_addr = []): s.src_in0 = TestSrcRTL(DataType, src0_msgs) s.src_in1 = TestSrcRTL(DataType, src1_msgs) s.src_const = TestSrcRTL(DataType, src_const) s.src_opt = TestSrcRTL(CtrlType, src_opt) s.sink_out = TestSinkRTL(DataType, sink_msgs) + s.CtrlAddrType = IntraCgraPktType.get_field_type(kAttrPayload).get_field_type(kAttrCtrlAddr) + if src_ctrl_addr: + s.src_ctrl_addr = TestSrcRTL(s.CtrlAddrType, src_ctrl_addr) s.dut = FunctionUnit(IntraCgraPktType, num_inports, num_outports) @@ -38,6 +41,9 @@ def construct(s, FunctionUnit, IntraCgraPktType, DataType, CtrlType, s.src_const.send //= s.dut.recv_const s.src_opt.send //= s.dut.recv_opt s.dut.send_out[0] //= s.sink_out.recv + if src_ctrl_addr: + s.src_ctrl_addr.send.msg //= s.dut.ctrl_addr_inport + s.src_ctrl_addr.send.rdy //= 1 def done(s): return s.src_opt.done() and s.sink_out.done() @@ -109,17 +115,22 @@ def test_Phi_start(): IntraCgraPktType = mk_intra_cgra_pkt(1, 1, 1, CgraPayloadType) FuInType = mk_bits(clog2(num_inports + 1)) pickRegister = [FuInType(x + 1) for x in range(num_inports)] - src_in0 = [DataType(2, 1), DataType(3, 0), DataType(6, 0)] - src_in1 = [ DataType(5, 1), DataType(2, 1)] - src_const = [DataType(0, 0), DataType(5, 0), DataType(2, 1)] + # Each PHI_START mapped on the same tile has its own s.first, + # s.first[0] becomes 0 after receiving DataType(2, 1), but first[1] still 1, which is why + # the output at clock cycle 2 is DataType(3, 1), rather than the pending DataType(8, 1). + src_in0 = [DataType(2, 1), DataType(3, 0), DataType(6, 0), DataType(3, 1)] + src_in1 = [ DataType(8, 1), DataType(5, 0)] + src_const = [] + # Assumes that two PHI_START mapped on the same tile and iterates for twice. src_opt = [CtrlType(OPT_PHI_START, pickRegister), + CtrlType(OPT_PHI_START, pickRegister), CtrlType(OPT_PHI_START, pickRegister), CtrlType(OPT_PHI_START, pickRegister)] - - sink_out = [DataType(2, 1), DataType(5, 1), DataType(2, 1)] + src_ctrl_addr = [CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(0), CtrlAddrType(1)] + sink_out = [DataType(2, 1), DataType(3, 1), DataType(8, 1), DataType(3, 1)] th = TestHarness(FU, IntraCgraPktType, DataType, CtrlType, num_inports, num_outports, data_mem_size, src_in0, src_in1, - src_const, src_opt, sink_out) + src_const, src_opt, sink_out, src_ctrl_addr) run_sim(th) def test_Phi_const(): @@ -137,19 +148,25 @@ def test_Phi_const(): IntraCgraPktType = mk_intra_cgra_pkt(1, 1, 1, CgraPayloadType) FuInType = mk_bits(clog2(num_inports + 1)) pickRegister = [FuInType(x + 1) for x in range(num_inports)] - src_in0 = [DataType(1, 1), DataType(4, 1), DataType(7, 0)] - src_in1 = [DataType(2, 0), DataType(5, 1), DataType(8, 1)] + # Each PHI_CONST mapped on the same tile has its own s.first, + # s.first[0] becomes 0 after receiving DataType(1, 1), but first[1] still 1, which is why + # the output at clock cycle 2 is from const DataType(6, 0), rather than the DataType(2, 1). + src_in0 = [DataType(1, 1), DataType(2, 1), DataType(4, 1), DataType(7, 0)] + src_in1 = [] # `PHI_CONST` normally is the starting point of a kernel. # The const value is only picked at the first time. - src_const = [DataType(3, 0), DataType(6, 0), DataType(9, 1)] + src_const = [DataType(3, 0), DataType(6, 0)] + # Assumes that two PHI_START mapped on the same tile and iterates for twice. src_opt = [CtrlType(OPT_PHI_CONST, pickRegister), CtrlType(OPT_PHI_CONST, pickRegister), - CtrlType(OPT_PHI_CONST, pickRegister) ] - sink_out = [DataType(3, 0), DataType(4, 1), DataType(7, 0)] + CtrlType(OPT_PHI_CONST, pickRegister), + CtrlType(OPT_PHI_CONST, pickRegister)] + src_ctrl_addr = [CtrlAddrType(0), CtrlAddrType(1), CtrlAddrType(0), CtrlAddrType(1)] + sink_out = [DataType(3, 0), DataType(6, 0), DataType(4, 1), DataType(7, 0)] th = TestHarness(FU, IntraCgraPktType, DataType, CtrlType, num_inports, num_outports, data_mem_size, src_in0, src_in1, src_const, src_opt, - sink_out) + sink_out, src_ctrl_addr) run_sim(th) def test_Phi_vector(): diff --git a/lib/opt_type.py b/lib/opt_type.py index 2cd37c30..dec22b11 100644 --- a/lib/opt_type.py +++ b/lib/opt_type.py @@ -106,7 +106,7 @@ OPT_REM_INCLUSIVE_END = OpCodeType( 15 ) OPT_LOOP_CONTROL = OpCodeType( 83 ) -OPT_STREAM_LD = OpCodeType( 84 ) +OPT_STREAM_LD = OpCodeType( 88 ) OPT_LOOP_COUNT = OpCodeType( 85 ) OPT_LOOP_DELIVERY = OpCodeType( 86 ) OPT_EXTRACT_PREDICATE = OpCodeType( 87 )