diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..1f489083 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,9 @@ +# Set all the text files to use LF line endings. +* text=auto eol=lf + +# Set all the binary files to use binary mode(avoid corruption). +*.png binary +*.jpg binary +*.pdf binary +*.zip binary +*.tar.gz binary diff --git a/cgra/CgraRTL.py b/cgra/CgraRTL.py index ec2b0b15..87777384 100644 --- a/cgra/CgraRTL.py +++ b/cgra/CgraRTL.py @@ -1,272 +1,272 @@ -""" -========================================================================= -CgraRTL.py -========================================================================= - -Author : Cheng Tan - Date : Dec 22, 2024 -""" -from ..controller.ControllerRTL import ControllerRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.basic.val_rdy.queues import BypassQueueRTL -from ..lib.opt_type import * -from ..lib.util.common import * -from ..mem.data.DataMemControllerRTL import DataMemControllerRTL -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos -from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ..tile.TileRTL import TileRTL -from ..lib.util.data_struct_attr import * -from ..lib.messages import * -from ..lib.util.common import * - - -class CgraRTL(Component): - - def construct(s, CgraPayloadType, - multi_cgra_rows, - multi_cgra_columns, - width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, num_ctrl, - total_steps, mem_access_is_combinational, - FunctionUnit, FuList, cgra_topology, - controller2addr_map, idTo2d_map, - is_multi_cgra = True, - has_ctrl_ring = True): - - # Derives all types from CgraPayloadType. - DataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = DataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_bitwidth = DataType.get_field_type(kAttrPayload).nbits - - - num_tiles = width * height - num_rd_tiles = height + width - 1 - - CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) - - CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, CgraPayloadType) - - NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, num_rd_tiles, - CgraPayloadType) - - # Other topology can simply modify the tiles connections, or - # leverage the template for modeling. - assert(cgra_topology == MESH or cgra_topology == KING_MESH) - s.num_mesh_ports = 4 - if cgra_topology == MESH: - s.num_mesh_ports = 4 - elif cgra_topology == KING_MESH: - s.num_mesh_ports = 8 - - s.has_ctrl_ring = has_ctrl_ring - s.num_tiles = width * height - # The left and bottom tiles are connected to the data memory. - data_mem_num_rd_tiles = height + width - 1 - data_mem_num_wr_tiles = height + width - 1 - - num_cgras = multi_cgra_rows * multi_cgra_columns - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - CtrlRingPos = mk_ring_pos(s.num_tiles + 1) - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - assert(data_mem_size_per_bank * num_banks_per_cgra <= \ - data_mem_size_global) - - # Interfaces - s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) - s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) - s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) - s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) - - # Interfaces on the boundary of the CGRA. - s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width )] - s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width )] - s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width )] - s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width )] - - s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] - s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] - s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] - s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] - - # Components - s.tile = [TileRTL(CtrlPktType, - ctrl_mem_size, - data_mem_size_global, num_ctrl, - total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, num_cgras, s.num_tiles, - num_registers_per_reg_bank, - FuList = FuList) - for i in range(s.num_tiles)] - s.data_mem = DataMemControllerRTL(NocPktType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks_per_cgra, - data_mem_num_rd_tiles, - data_mem_num_wr_tiles, - multi_cgra_rows, - multi_cgra_columns, - s.num_tiles, - mem_access_is_combinational, - idTo2d_map) - s.controller = ControllerRTL(NocPktType, - multi_cgra_rows, multi_cgra_columns, - s.num_tiles, controller2addr_map, idTo2d_map) - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - # The last argument of 1 is for the latency per hop. - if has_ctrl_ring: - s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles + 1, 1) - s.cgra_id = InPort(CgraIdType) - - # Address lower and upper bound. - s.address_lower = InPort(DataAddrType) - s.address_upper = InPort(DataAddrType) - - # Connections - # Connects the controller id. - s.controller.cgra_id //= s.cgra_id - s.data_mem.cgra_id //= s.cgra_id - - # Connects the address lower and upper bound. - s.data_mem.address_lower //= s.address_lower - s.data_mem.address_upper //= s.address_upper - - # Connects data memory with controller. - s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request - s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request - s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response - s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt - s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt - s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt - - if is_multi_cgra: - s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc - s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc - else: - s.bypass_queue = BypassQueueRTL(NocPktType, 1) - s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc - s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc - - # Connects the ctrl interface between CPU and controller. - s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt - s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt - - # Assigns tile id. - for i in range(s.num_tiles): - s.tile[i].tile_id //= i - s.tile[i].cgra_id //= s.cgra_id - - if has_ctrl_ring: - # Connects ring with each control memory. - for i in range(s.num_tiles): - s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt - s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt - s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt - s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt - - for i in range(s.num_tiles): - - if i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.tile[i-width].recv_data[PORT_INDEX_NORTH] - - if i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTH] //= s.tile[i+width].recv_data[PORT_INDEX_SOUTH] - - if i % width > 0: - s.tile[i].send_data[PORT_INDEX_WEST] //= s.tile[i-1].recv_data[PORT_INDEX_EAST] - - if i % width < width - 1: - s.tile[i].send_data[PORT_INDEX_EAST] //= s.tile[i+1].recv_data[PORT_INDEX_WEST] - - if cgra_topology == KING_MESH: - if i % width > 0 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST] //= s.tile[i+width-1].recv_data[PORT_INDEX_SOUTHEAST] - s.tile[i+width-1].send_data[PORT_INDEX_SOUTHEAST] //= s.tile[i].recv_data[PORT_INDEX_NORTHWEST] - - if i % width < width - 1 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHEAST] //= s.tile[i+width+1].recv_data[PORT_INDEX_SOUTHWEST] - s.tile[i+width+1].send_data[PORT_INDEX_SOUTHWEST] //= s.tile[i].recv_data[PORT_INDEX_NORTHEAST] - - if i // width == 0: - s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) - s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) - - if i // width == height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) - s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) - - if i % width == 0 and i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) - - if i % width == 0 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) - - if i % width == width - 1 and i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) - - if i % width == width - 1 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) - - - if i // width == 0: - s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[i % width] - s.tile[i].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[i % width] - - if i // width == height - 1: - s.tile[i].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[i % width] - s.tile[i].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[i % width] - - if i % width == 0: - s.tile[i].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[i // width] - s.tile[i].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[i // width] - - if i % width == width - 1: - s.tile[i].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[i // width] - s.tile[i].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[i // width] - - if i % width == 0 or i // width == 0: - s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[width + i // width - 1 if i >= width else i % width] - s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[width + i // width - 1 if i >= width else i % width] - s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[width + i // width - 1 if i >= width else i % width] - s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[width + i // width - 1 if i >= width else i % width] - else: - s.tile[i].to_mem_raddr.rdy //= 0 - s.tile[i].from_mem_rdata.val //= 0 - s.tile[i].from_mem_rdata.msg //= DataType(0, 0) - s.tile[i].to_mem_waddr.rdy //= 0 - s.tile[i].to_mem_wdata.rdy //= 0 - - # Line trace - def line_trace(s): - res = "||\n".join([(("\n[cgra"+str(s.cgra_id)+"_tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) - for (i,x) in enumerate(s.tile)]) - if s.has_ctrl_ring: - res += "\n :: [" + s.ctrl_ring.line_trace() + "] \n" - res += "\n :: [" + s.data_mem.line_trace() + "] \n" - return res - - - - +""" +========================================================================= +CgraRTL.py +========================================================================= + +Author : Cheng Tan + Date : Dec 22, 2024 +""" +from ..controller.ControllerRTL import ControllerRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.basic.val_rdy.queues import BypassQueueRTL +from ..lib.opt_type import * +from ..lib.util.common import * +from ..mem.data.DataMemControllerRTL import DataMemControllerRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL +from ..tile.TileRTL import TileRTL +from ..lib.util.data_struct_attr import * +from ..lib.messages import * +from ..lib.util.common import * + + +class CgraRTL(Component): + + def construct(s, CgraPayloadType, + multi_cgra_rows, + multi_cgra_columns, + width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, num_ctrl, + total_steps, mem_access_is_combinational, + FunctionUnit, FuList, cgra_topology, + controller2addr_map, idTo2d_map, + is_multi_cgra = True, + has_ctrl_ring = True): + + # Derives all types from CgraPayloadType. + DataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = DataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_bitwidth = DataType.get_field_type(kAttrPayload).nbits + + + num_tiles = width * height + num_rd_tiles = height + width - 1 + + CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) + + CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + num_tiles, CgraPayloadType) + + NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + num_tiles, num_rd_tiles, + CgraPayloadType) + + # Other topology can simply modify the tiles connections, or + # leverage the template for modeling. + assert(cgra_topology == MESH or cgra_topology == KING_MESH) + s.num_mesh_ports = 4 + if cgra_topology == MESH: + s.num_mesh_ports = 4 + elif cgra_topology == KING_MESH: + s.num_mesh_ports = 8 + + s.has_ctrl_ring = has_ctrl_ring + s.num_tiles = width * height + # The left and bottom tiles are connected to the data memory. + data_mem_num_rd_tiles = height + width - 1 + data_mem_num_wr_tiles = height + width - 1 + + num_cgras = multi_cgra_rows * multi_cgra_columns + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + CtrlRingPos = mk_ring_pos(s.num_tiles + 1) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + assert(data_mem_size_per_bank * num_banks_per_cgra <= \ + data_mem_size_global) + + # Interfaces + s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) + s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) + s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) + s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) + + # Interfaces on the boundary of the CGRA. + s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width )] + s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width )] + s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width )] + s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width )] + + s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] + s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] + + # Components + s.tile = [TileRTL(CtrlPktType, + ctrl_mem_size, + data_mem_size_global, num_ctrl, + total_steps, 4, 2, s.num_mesh_ports, + s.num_mesh_ports, num_cgras, s.num_tiles, + num_registers_per_reg_bank, + FuList = FuList) + for i in range(s.num_tiles)] + s.data_mem = DataMemControllerRTL(NocPktType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks_per_cgra, + data_mem_num_rd_tiles, + data_mem_num_wr_tiles, + multi_cgra_rows, + multi_cgra_columns, + s.num_tiles, + mem_access_is_combinational, + idTo2d_map) + s.controller = ControllerRTL(NocPktType, + multi_cgra_rows, multi_cgra_columns, + s.num_tiles, controller2addr_map, idTo2d_map) + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + # The last argument of 1 is for the latency per hop. + if has_ctrl_ring: + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles + 1, 1) + s.cgra_id = InPort(CgraIdType) + + # Address lower and upper bound. + s.address_lower = InPort(DataAddrType) + s.address_upper = InPort(DataAddrType) + + # Connections + # Connects the controller id. + s.controller.cgra_id //= s.cgra_id + s.data_mem.cgra_id //= s.cgra_id + + # Connects the address lower and upper bound. + s.data_mem.address_lower //= s.address_lower + s.data_mem.address_upper //= s.address_upper + + # Connects data memory with controller. + s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request + s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request + s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response + s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt + s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt + s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt + + if is_multi_cgra: + s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc + s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc + else: + s.bypass_queue = BypassQueueRTL(NocPktType, 1) + s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc + s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc + + # Connects the ctrl interface between CPU and controller. + s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt + s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt + + # Assigns tile id. + for i in range(s.num_tiles): + s.tile[i].tile_id //= i + s.tile[i].cgra_id //= s.cgra_id + + if has_ctrl_ring: + # Connects ring with each control memory. + for i in range(s.num_tiles): + s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt + s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt + s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt + s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt + + for i in range(s.num_tiles): + + if i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.tile[i-width].recv_data[PORT_INDEX_NORTH] + + if i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTH] //= s.tile[i+width].recv_data[PORT_INDEX_SOUTH] + + if i % width > 0: + s.tile[i].send_data[PORT_INDEX_WEST] //= s.tile[i-1].recv_data[PORT_INDEX_EAST] + + if i % width < width - 1: + s.tile[i].send_data[PORT_INDEX_EAST] //= s.tile[i+1].recv_data[PORT_INDEX_WEST] + + if cgra_topology == KING_MESH: + if i % width > 0 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST] //= s.tile[i+width-1].recv_data[PORT_INDEX_SOUTHEAST] + s.tile[i+width-1].send_data[PORT_INDEX_SOUTHEAST] //= s.tile[i].recv_data[PORT_INDEX_NORTHWEST] + + if i % width < width - 1 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHEAST] //= s.tile[i+width+1].recv_data[PORT_INDEX_SOUTHWEST] + s.tile[i+width+1].send_data[PORT_INDEX_SOUTHWEST] //= s.tile[i].recv_data[PORT_INDEX_NORTHEAST] + + if i // width == 0: + s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) + + if i // width == height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) + + if i % width == 0 and i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) + + if i % width == 0 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) + + if i % width == width - 1 and i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) + + if i % width == width - 1 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) + + + if i // width == 0: + s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[i % width] + s.tile[i].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[i % width] + + if i // width == height - 1: + s.tile[i].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[i % width] + s.tile[i].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[i % width] + + if i % width == 0: + s.tile[i].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[i // width] + s.tile[i].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[i // width] + + if i % width == width - 1: + s.tile[i].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[i // width] + s.tile[i].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[i // width] + + if i % width == 0 or i // width == 0: + s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[width + i // width - 1 if i >= width else i % width] + s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[width + i // width - 1 if i >= width else i % width] + s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[width + i // width - 1 if i >= width else i % width] + s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[width + i // width - 1 if i >= width else i % width] + else: + s.tile[i].to_mem_raddr.rdy //= 0 + s.tile[i].from_mem_rdata.val //= 0 + s.tile[i].from_mem_rdata.msg //= DataType(0, 0) + s.tile[i].to_mem_waddr.rdy //= 0 + s.tile[i].to_mem_wdata.rdy //= 0 + + # Line trace + def line_trace(s): + res = "||\n".join([(("\n[cgra"+str(s.cgra_id)+"_tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) + for (i,x) in enumerate(s.tile)]) + if s.has_ctrl_ring: + res += "\n :: [" + s.ctrl_ring.line_trace() + "] \n" + res += "\n :: [" + s.data_mem.line_trace() + "] \n" + return res + + + + diff --git a/cgra/CgraTemplateRTL.py b/cgra/CgraTemplateRTL.py index 300d7832..00788487 100644 --- a/cgra/CgraTemplateRTL.py +++ b/cgra/CgraTemplateRTL.py @@ -1,382 +1,382 @@ -""" -========================================================================= -CgraTemplateRTL.py -========================================================================= - -Author : Cheng Tan - Date : Dec 30, 2024 -""" -from ..controller.ControllerRTL import ControllerRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.basic.val_rdy.queues import BypassQueueRTL -from ..lib.opt_type import * -from ..lib.util.common import * -from ..mem.data.DataMemControllerRTL import DataMemControllerRTL -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos -from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ..tile.TileRTL import TileRTL -from ..lib.util.data_struct_attr import * -from ..lib.messages import * - -from ..fu.single.PhiRTL import PhiRTL -from ..fu.single.AdderRTL import AdderRTL -from ..fu.single.ShifterRTL import ShifterRTL -from ..fu.single.MemUnitRTL import MemUnitRTL -from ..fu.single.SelRTL import SelRTL -from ..fu.single.CompRTL import CompRTL -from ..fu.double.SeqMulAdderRTL import SeqMulAdderRTL -from ..fu.single.RetRTL import RetRTL -from ..fu.single.MulRTL import MulRTL -from ..fu.single.ExclusiveDivRTL import ExclusiveDivRTL -from ..fu.single.LogicRTL import LogicRTL -from ..fu.single.GrantRTL import GrantRTL -from ..fu.single.LoopControlRTL import LoopControlRTL -from ..fu.single.ConstRTL import ConstRTL -from ..fu.float.FpAddRTL import FpAddRTL -from ..fu.float.FpMulRTL import FpMulRTL - -fu_map = { - "add": AdderRTL, - "mul": MulRTL, - "div": ExclusiveDivRTL, - "fadd": FpAddRTL, - "fmul": FpMulRTL, - "fdiv": None, - "logic": LogicRTL, - "cmp": CompRTL, - "sel": SelRTL, - "type_conv": None, - "vfmul": None, - "fadd_fadd": None, - "fmul_fadd": None, - "grant": GrantRTL, - "loop_control": LoopControlRTL, - "phi": PhiRTL, - "constant": ConstRTL, - "mem": MemUnitRTL, - "return": RetRTL, - "mem_indexed": MemUnitRTL, - "alloca": None, - "shift": ShifterRTL, -} - -def map_fu2rtl(fu_type: list[str]): - fuRTL = list({fu_map[fu] for fu in fu_type}) - fuRTL_new = [fu for fu in fuRTL if fu is not None] - return fuRTL_new - - -class CgraTemplateRTL(Component): - - def construct(s, CgraPayloadType, - multi_cgra_rows, - multi_cgra_columns, - per_cgra_rows, per_cgra_columns, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, num_ctrl, - total_steps, mem_access_is_combinational, - FunctionUnit, FuList, TileList, LinkList, - dataSPM, controller2addr_map, idTo2d_map, - is_multi_cgra = True, cgra_id = 0, - provided_max_per_cgra_rows = None, - provided_max_per_cgra_cols = None, - provided_max_num_rd_tiles = None, - provided_max_num_wr_tiles = None): - """ - provided_max_per_cgra_rows: the row number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. - provided_max_per_cgra_cols: the column number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. - provided_max_num_rd_tiles: the number of read ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. - provided_max_num_wr_tiles: the number of write ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. - """ - - DataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = DataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_bitwidth = DataType.get_field_type(kAttrPayload).nbits - - CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) - - # Reconstructs packet types. - # In the case of heterogeneous multi-cgra, `max_num_tiles` means the tile number of the largest cgra. - # In the case of single cgra, it is the tile number of the current cgra. - max_per_cgra_rows = provided_max_per_cgra_rows if provided_max_per_cgra_rows is not None else per_cgra_rows - max_per_cgra_cols = provided_max_per_cgra_cols if provided_max_per_cgra_cols is not None else per_cgra_columns - max_num_tiles = max_per_cgra_rows * max_per_cgra_cols - # In the case of heterogeneous multi-cgra, `max_num_rd_tiles` means the number of read ports of the largest cgra. - # In the case of single cgra, it is the number of read ports of the current cgra. - max_num_rd_tiles = provided_max_num_rd_tiles if provided_max_num_rd_tiles is not None else dataSPM.getNumOfValidReadPorts() - max_num_wr_tiles = provided_max_num_wr_tiles if provided_max_num_wr_tiles is not None else dataSPM.getNumOfValidWritePorts() - - - # Use largest CGRA shape(max_num_tiles) to set CtrlPktType/NocPktType for compatibility. - CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - max_num_tiles, CgraPayloadType) - - NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - max_num_tiles, max_num_rd_tiles, - CgraPayloadType) - - s.num_mesh_ports = 8 - # tile number of the current cgra. - s.num_tiles = len(TileList) - num_cgras = multi_cgra_rows * multi_cgra_columns - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - CtrlRingPos = mk_ring_pos(max_num_tiles + 1) - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - assert(data_mem_size_per_bank * num_banks_per_cgra <= \ - data_mem_size_global) - - # Interfaces - s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) - s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) - s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) - s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) - - if is_multi_cgra: - # Use the largest CGRA shape to set the boundary ports for compatibility in the case of heterogeneous multi-cgra. - # Remember to ground the remaining boundary ports of the current CGRA when the current CGRA has fewer rows or columns than the largest CGRA. - # See also: - s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)] - s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)] - s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)] - s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)] - s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)] - s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)] - s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)] - s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)] - - # Components - s.tile = [TileRTL(CtrlPktType, - ctrl_mem_size, - data_mem_size_global, num_ctrl, - total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, num_cgras, s.num_tiles, - num_registers_per_reg_bank, - FuList = map_fu2rtl(TileList[i].getAllValidFuTypes())) - for i in range(s.num_tiles)] - # FIXME: Need to enrish data-SPM-related user-controlled parameters, e.g., number of banks. - s.data_mem = DataMemControllerRTL(NocPktType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks_per_cgra, - max_num_rd_tiles, - max_num_wr_tiles, - multi_cgra_rows, - multi_cgra_columns, - max_num_tiles, - mem_access_is_combinational, - idTo2d_map) - s.cgra_id = InPort(CgraIdType) - s.controller = ControllerRTL(NocPktType, - multi_cgra_rows, multi_cgra_columns, - max_num_tiles, controller2addr_map, idTo2d_map) - # Connects controller id. - s.controller.cgra_id //= s.cgra_id - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - # The last argument of 1 is for the latency per hop. - s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, max_num_tiles + 1, 1) - - # Address lower and upper bound. - s.address_lower = InPort(DataAddrType) - s.address_upper = InPort(DataAddrType) - - # Connections data mem cgra ID. - s.data_mem.cgra_id //= s.cgra_id - - # Connects the address lower and upper bound. - s.data_mem.address_lower //= s.address_lower - s.data_mem.address_upper //= s.address_upper - - # Connects data memory with controller. - s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request - s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request - s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response - s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt - s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt - s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt - - if is_multi_cgra: - s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc - s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc - else: - s.bypass_queue = BypassQueueRTL(NocPktType, 1) - s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc - s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc - - # Connects the ctrl interface between CPU and controller. - s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt - s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt - - # Assigns tile id. - for i in range(s.num_tiles): - s.tile[i].cgra_id //= s.cgra_id - s.tile[i].tile_id //= i - - # Connects ring with each control memory. - for i in range(s.num_tiles): - s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt - s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt - - s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt - s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt - - # Grounds the remaining ports of the ring. - for i in range(s.num_tiles + 1, max_num_tiles + 1): - s.ctrl_ring.send[i].rdy //= 0 - s.ctrl_ring.recv[i].val //= 0 - s.ctrl_ring.recv[i].msg //= CtrlPktType() - - # Records the tile indices and ports that have been grounded for from_mem and to_mem, - # to avoid PyMTL3 MultiWriterError. - recv_data_grounded_for_from_mem = set() - send_data_rdy_grounded_for_to_mem = set() - - for link in LinkList: - - if link.isFromMem(): - memPort = link.getMemReadPort() - dstTileIndex = link.dstTile.getIndex(TileList) - if not link.disabled: - s.data_mem.recv_raddr[memPort] //= s.tile[dstTileIndex].to_mem_raddr - s.data_mem.send_rdata[memPort] //= s.tile[dstTileIndex].from_mem_rdata - - # Grounds the generic routing port since it is unused for memory links when in single-CGRA mode. - # NOTE `recv_data` is used to receive data between multiple CGRAs. - if not link.disabled and not is_multi_cgra: - s.tile[dstTileIndex].recv_data[link.dstPort].val //= 0 - s.tile[dstTileIndex].recv_data[link.dstPort].msg //= DataType(0, 0) - # Records the tile indices and ports that have been grounded. - recv_data_grounded_for_from_mem.add((dstTileIndex, link.dstPort)) - - elif link.isToMem(): - memPort = link.getMemWritePort() - srcTileIndex = link.srcTile.getIndex(TileList) - if not link.disabled: - s.tile[srcTileIndex].to_mem_waddr //= s.data_mem.recv_waddr[memPort] - s.tile[srcTileIndex].to_mem_wdata //= s.data_mem.recv_wdata[memPort] - - # Grounds the generic routing port ready signal when in single-CGRA mode. - # NOTE `send_data` is used to send data between multiple CGRAs. - if not link.disabled and not is_multi_cgra: - s.tile[srcTileIndex].send_data[link.srcPort].rdy //= 0 - # Records the tile indices and ports that have been grounded. - send_data_rdy_grounded_for_to_mem.add((srcTileIndex, link.srcPort)) - - else: - srcTileIndex = link.srcTile.getIndex(TileList) - dstTileIndex = link.dstTile.getIndex(TileList) - if not link.disabled: - s.tile[srcTileIndex].send_data[link.srcPort] //= s.tile[dstTileIndex].recv_data[link.dstPort] - - # (cgra_idx_x, cgra_idx_y) is the coordinate of the current cgra in multi-cgra(Cartesian coordinate system). - """ - ^ y - | - | cgra2 cgra3 - | cgra0 cgra1 - +---------------> x - See also https://github.com/tancheng/VectorCGRA/blob/master/doc/figures/multi_cgra_coordinate_and_storage_way.png - - """ - cgra_idx_x = cgra_id % multi_cgra_columns - cgra_idx_y = cgra_id // multi_cgra_columns - - """ - y ^ - | tile12 tile13 tile14 tile15 - | tile8 tile9 tile10 tile11 - | tile4 tile5 tile6 tile7 - | tile0 tile1 tile2 tile3 - |--------------------------> x - - See also https://github.com/tancheng/VectorCGRA/blob/master/doc/figures/multi_cgra_coordinate_and_storage_way.png - """ - if is_multi_cgra: - for tile_idx_y in range(per_cgra_rows): - for tile_idx_x in range(per_cgra_columns): - tile_id = tile_idx_y * per_cgra_columns + tile_idx_x - # Only connects if the port is valid - if tile_idx_y == per_cgra_rows - 1: - if PORT_INDEX_NORTH not in TileList[tile_id].getInvalidOutPorts(): - s.tile[tile_id].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[tile_idx_x] - if PORT_INDEX_NORTH not in TileList[tile_id].getInvalidInPorts(): - s.tile[tile_id].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[tile_idx_x] - - if tile_idx_y == 0: - # Corner case: In multi-cgra, for each row of CGRAs except the bottom row, - # the south port of the bottom row tiles must be connected to the adjacent/south cgra. - if cgra_idx_y > 0: - s.tile[tile_id].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[tile_idx_x] - s.tile[tile_id].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[tile_idx_x] - else: #cgra_idx_y == 0 - # In multi-cgra, for the bottom row CGRAs, the south ports of the bottom row tiles should be grounded. - s.tile[tile_id].send_data[PORT_INDEX_SOUTH].rdy //= 0 - s.tile[tile_id].recv_data[PORT_INDEX_SOUTH].val //= 0 - s.tile[tile_id].recv_data[PORT_INDEX_SOUTH].msg //= DataType(0, 0) - - if tile_idx_x == 0: - # Corner case: In multi-cgra, for each column of CGRAs except the first column, - # the west port of the first column tiles must be connected to the adjacent/west cgra. - if cgra_idx_x > 0: - s.tile[tile_id].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[tile_idx_y] - s.tile[tile_id].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[tile_idx_y] - else: #cgra_idx_x == 0 - # In multi-cgra, for the first column CGRAs, the west ports of the first column tiles should be grounded. - s.tile[tile_id].send_data[PORT_INDEX_WEST].rdy //= 0 - s.tile[tile_id].recv_data[PORT_INDEX_WEST].val //= 0 - s.tile[tile_id].recv_data[PORT_INDEX_WEST].msg //= DataType(0, 0) - - if tile_idx_x == per_cgra_columns - 1: - if PORT_INDEX_EAST not in TileList[tile_id].getInvalidOutPorts(): - s.tile[tile_id].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[tile_idx_y] - if PORT_INDEX_EAST not in TileList[tile_id].getInvalidInPorts(): - s.tile[tile_id].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[tile_idx_y] - - for tile_idx_y in range(per_cgra_rows): - for tile_idx_x in range(per_cgra_columns): - i = tile_idx_y * per_cgra_columns + tile_idx_x - - for invalidInPort in TileList[i].getInvalidInPorts(): - """ - Corner case 1: - When the links between the dataSPM and the leftmost tiles are disabled, the PORT_INDEX_WEST status becomes invalid. - In this case, if the current CGRA needs to connect to the CGRA on its left, then the recv_data/send_data signals must not be tied to ground. - - Corner case 2: - When the links between the dataSPM and the bottom tiles are disabled, the PORT_INDEX_SOUTH status becomes invalid. - In this case, if the current CGRA needs to connect to the CGRA below it, then the recv_data/send_data signals must not be tied to ground. - """ - skip_multi = (is_multi_cgra and tile_idx_x == 0 and invalidInPort == PORT_INDEX_WEST) or \ - (is_multi_cgra and tile_idx_y == 0 and invalidInPort == PORT_INDEX_SOUTH) - skip_from_mem_dup = (not is_multi_cgra) and ((i, invalidInPort) in recv_data_grounded_for_from_mem) - if not skip_multi and not skip_from_mem_dup: - s.tile[i].recv_data[invalidInPort].val //= 0 - s.tile[i].recv_data[invalidInPort].msg //= DataType(0, 0) - - for invalidOutPort in TileList[i].getInvalidOutPorts(): - skip_multi = (is_multi_cgra and tile_idx_x == 0 and invalidOutPort == PORT_INDEX_WEST) or \ - (is_multi_cgra and tile_idx_y == 0 and invalidOutPort == PORT_INDEX_SOUTH) - skip_to_mem_dup = (not is_multi_cgra) and ((i, invalidOutPort) in send_data_rdy_grounded_for_to_mem) - if not skip_multi and not skip_to_mem_dup: - s.tile[i].send_data[invalidOutPort].rdy //= 0 - - if not TileList[i].hasFromMem(): - s.tile[i].to_mem_raddr.rdy //= 0 - s.tile[i].from_mem_rdata.val //= 0 - s.tile[i].from_mem_rdata.msg //= DataType(0, 0) - - if not TileList[i].hasToMem(): - s.tile[i].to_mem_waddr.rdy //= 0 - s.tile[i].to_mem_wdata.rdy //= 0 - - # Line trace - def line_trace(s): - res = "||\n".join([(("[tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) - for (i,x) in enumerate(s.tile)]) - res += "\n :: [" + s.data_mem.line_trace() + "] \n" - return res - - +""" +========================================================================= +CgraTemplateRTL.py +========================================================================= + +Author : Cheng Tan + Date : Dec 30, 2024 +""" +from ..controller.ControllerRTL import ControllerRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.basic.val_rdy.queues import BypassQueueRTL +from ..lib.opt_type import * +from ..lib.util.common import * +from ..mem.data.DataMemControllerRTL import DataMemControllerRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL +from ..tile.TileRTL import TileRTL +from ..lib.util.data_struct_attr import * +from ..lib.messages import * + +from ..fu.single.PhiRTL import PhiRTL +from ..fu.single.AdderRTL import AdderRTL +from ..fu.single.ShifterRTL import ShifterRTL +from ..fu.single.MemUnitRTL import MemUnitRTL +from ..fu.single.SelRTL import SelRTL +from ..fu.single.CompRTL import CompRTL +from ..fu.double.SeqMulAdderRTL import SeqMulAdderRTL +from ..fu.single.RetRTL import RetRTL +from ..fu.single.MulRTL import MulRTL +from ..fu.single.ExclusiveDivRTL import ExclusiveDivRTL +from ..fu.single.LogicRTL import LogicRTL +from ..fu.single.GrantRTL import GrantRTL +from ..fu.single.LoopControlRTL import LoopControlRTL +from ..fu.single.ConstRTL import ConstRTL +from ..fu.float.FpAddRTL import FpAddRTL +from ..fu.float.FpMulRTL import FpMulRTL + +fu_map = { + "add": AdderRTL, + "mul": MulRTL, + "div": ExclusiveDivRTL, + "fadd": FpAddRTL, + "fmul": FpMulRTL, + "fdiv": None, + "logic": LogicRTL, + "cmp": CompRTL, + "sel": SelRTL, + "type_conv": None, + "vfmul": None, + "fadd_fadd": None, + "fmul_fadd": None, + "grant": GrantRTL, + "loop_control": LoopControlRTL, + "phi": PhiRTL, + "constant": ConstRTL, + "mem": MemUnitRTL, + "return": RetRTL, + "mem_indexed": MemUnitRTL, + "alloca": None, + "shift": ShifterRTL, +} + +def map_fu2rtl(fu_type: list[str]): + fuRTL = list({fu_map[fu] for fu in fu_type}) + fuRTL_new = [fu for fu in fuRTL if fu is not None] + return fuRTL_new + + +class CgraTemplateRTL(Component): + + def construct(s, CgraPayloadType, + multi_cgra_rows, + multi_cgra_columns, + per_cgra_rows, per_cgra_columns, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, num_ctrl, + total_steps, mem_access_is_combinational, + FunctionUnit, FuList, TileList, LinkList, + dataSPM, controller2addr_map, idTo2d_map, + is_multi_cgra = True, cgra_id = 0, + provided_max_per_cgra_rows = None, + provided_max_per_cgra_cols = None, + provided_max_num_rd_tiles = None, + provided_max_num_wr_tiles = None): + """ + provided_max_per_cgra_rows: the row number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + provided_max_per_cgra_cols: the column number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + provided_max_num_rd_tiles: the number of read ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + provided_max_num_wr_tiles: the number of write ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + """ + + DataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = DataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_bitwidth = DataType.get_field_type(kAttrPayload).nbits + + CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) + + # Reconstructs packet types. + # In the case of heterogeneous multi-cgra, `max_num_tiles` means the tile number of the largest cgra. + # In the case of single cgra, it is the tile number of the current cgra. + max_per_cgra_rows = provided_max_per_cgra_rows if provided_max_per_cgra_rows is not None else per_cgra_rows + max_per_cgra_cols = provided_max_per_cgra_cols if provided_max_per_cgra_cols is not None else per_cgra_columns + max_num_tiles = max_per_cgra_rows * max_per_cgra_cols + # In the case of heterogeneous multi-cgra, `max_num_rd_tiles` means the number of read ports of the largest cgra. + # In the case of single cgra, it is the number of read ports of the current cgra. + max_num_rd_tiles = provided_max_num_rd_tiles if provided_max_num_rd_tiles is not None else dataSPM.getNumOfValidReadPorts() + max_num_wr_tiles = provided_max_num_wr_tiles if provided_max_num_wr_tiles is not None else dataSPM.getNumOfValidWritePorts() + + + # Use largest CGRA shape(max_num_tiles) to set CtrlPktType/NocPktType for compatibility. + CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + max_num_tiles, CgraPayloadType) + + NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + max_num_tiles, max_num_rd_tiles, + CgraPayloadType) + + s.num_mesh_ports = 8 + # tile number of the current cgra. + s.num_tiles = len(TileList) + num_cgras = multi_cgra_rows * multi_cgra_columns + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + CtrlRingPos = mk_ring_pos(max_num_tiles + 1) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + assert(data_mem_size_per_bank * num_banks_per_cgra <= \ + data_mem_size_global) + + # Interfaces + s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) + s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) + s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) + s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) + + if is_multi_cgra: + # Use the largest CGRA shape to set the boundary ports for compatibility in the case of heterogeneous multi-cgra. + # Remember to ground the remaining boundary ports of the current CGRA when the current CGRA has fewer rows or columns than the largest CGRA. + # See also: + s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)] + s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)] + s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)] + s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)] + + # Components + s.tile = [TileRTL(CtrlPktType, + ctrl_mem_size, + data_mem_size_global, num_ctrl, + total_steps, 4, 2, s.num_mesh_ports, + s.num_mesh_ports, num_cgras, s.num_tiles, + num_registers_per_reg_bank, + FuList = map_fu2rtl(TileList[i].getAllValidFuTypes())) + for i in range(s.num_tiles)] + # FIXME: Need to enrish data-SPM-related user-controlled parameters, e.g., number of banks. + s.data_mem = DataMemControllerRTL(NocPktType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks_per_cgra, + max_num_rd_tiles, + max_num_wr_tiles, + multi_cgra_rows, + multi_cgra_columns, + max_num_tiles, + mem_access_is_combinational, + idTo2d_map) + s.cgra_id = InPort(CgraIdType) + s.controller = ControllerRTL(NocPktType, + multi_cgra_rows, multi_cgra_columns, + max_num_tiles, controller2addr_map, idTo2d_map) + # Connects controller id. + s.controller.cgra_id //= s.cgra_id + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + # The last argument of 1 is for the latency per hop. + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, max_num_tiles + 1, 1) + + # Address lower and upper bound. + s.address_lower = InPort(DataAddrType) + s.address_upper = InPort(DataAddrType) + + # Connections data mem cgra ID. + s.data_mem.cgra_id //= s.cgra_id + + # Connects the address lower and upper bound. + s.data_mem.address_lower //= s.address_lower + s.data_mem.address_upper //= s.address_upper + + # Connects data memory with controller. + s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request + s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request + s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response + s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt + s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt + s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt + + if is_multi_cgra: + s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc + s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc + else: + s.bypass_queue = BypassQueueRTL(NocPktType, 1) + s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc + s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc + + # Connects the ctrl interface between CPU and controller. + s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt + s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt + + # Assigns tile id. + for i in range(s.num_tiles): + s.tile[i].cgra_id //= s.cgra_id + s.tile[i].tile_id //= i + + # Connects ring with each control memory. + for i in range(s.num_tiles): + s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt + s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt + + s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt + s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt + + # Grounds the remaining ports of the ring. + for i in range(s.num_tiles + 1, max_num_tiles + 1): + s.ctrl_ring.send[i].rdy //= 0 + s.ctrl_ring.recv[i].val //= 0 + s.ctrl_ring.recv[i].msg //= CtrlPktType() + + # Records the tile indices and ports that have been grounded for from_mem and to_mem, + # to avoid PyMTL3 MultiWriterError. + recv_data_grounded_for_from_mem = set() + send_data_rdy_grounded_for_to_mem = set() + + for link in LinkList: + + if link.isFromMem(): + memPort = link.getMemReadPort() + dstTileIndex = link.dstTile.getIndex(TileList) + if not link.disabled: + s.data_mem.recv_raddr[memPort] //= s.tile[dstTileIndex].to_mem_raddr + s.data_mem.send_rdata[memPort] //= s.tile[dstTileIndex].from_mem_rdata + + # Grounds the generic routing port since it is unused for memory links when in single-CGRA mode. + # NOTE `recv_data` is used to receive data between multiple CGRAs. + if not link.disabled and not is_multi_cgra: + s.tile[dstTileIndex].recv_data[link.dstPort].val //= 0 + s.tile[dstTileIndex].recv_data[link.dstPort].msg //= DataType(0, 0) + # Records the tile indices and ports that have been grounded. + recv_data_grounded_for_from_mem.add((dstTileIndex, link.dstPort)) + + elif link.isToMem(): + memPort = link.getMemWritePort() + srcTileIndex = link.srcTile.getIndex(TileList) + if not link.disabled: + s.tile[srcTileIndex].to_mem_waddr //= s.data_mem.recv_waddr[memPort] + s.tile[srcTileIndex].to_mem_wdata //= s.data_mem.recv_wdata[memPort] + + # Grounds the generic routing port ready signal when in single-CGRA mode. + # NOTE `send_data` is used to send data between multiple CGRAs. + if not link.disabled and not is_multi_cgra: + s.tile[srcTileIndex].send_data[link.srcPort].rdy //= 0 + # Records the tile indices and ports that have been grounded. + send_data_rdy_grounded_for_to_mem.add((srcTileIndex, link.srcPort)) + + else: + srcTileIndex = link.srcTile.getIndex(TileList) + dstTileIndex = link.dstTile.getIndex(TileList) + if not link.disabled: + s.tile[srcTileIndex].send_data[link.srcPort] //= s.tile[dstTileIndex].recv_data[link.dstPort] + + # (cgra_idx_x, cgra_idx_y) is the coordinate of the current cgra in multi-cgra(Cartesian coordinate system). + """ + ^ y + | + | cgra2 cgra3 + | cgra0 cgra1 + +---------------> x + See also https://github.com/tancheng/VectorCGRA/blob/master/doc/figures/multi_cgra_coordinate_and_storage_way.png + + """ + cgra_idx_x = cgra_id % multi_cgra_columns + cgra_idx_y = cgra_id // multi_cgra_columns + + """ + y ^ + | tile12 tile13 tile14 tile15 + | tile8 tile9 tile10 tile11 + | tile4 tile5 tile6 tile7 + | tile0 tile1 tile2 tile3 + |--------------------------> x + + See also https://github.com/tancheng/VectorCGRA/blob/master/doc/figures/multi_cgra_coordinate_and_storage_way.png + """ + if is_multi_cgra: + for tile_idx_y in range(per_cgra_rows): + for tile_idx_x in range(per_cgra_columns): + tile_id = tile_idx_y * per_cgra_columns + tile_idx_x + # Only connects if the port is valid + if tile_idx_y == per_cgra_rows - 1: + if PORT_INDEX_NORTH not in TileList[tile_id].getInvalidOutPorts(): + s.tile[tile_id].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[tile_idx_x] + if PORT_INDEX_NORTH not in TileList[tile_id].getInvalidInPorts(): + s.tile[tile_id].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[tile_idx_x] + + if tile_idx_y == 0: + # Corner case: In multi-cgra, for each row of CGRAs except the bottom row, + # the south port of the bottom row tiles must be connected to the adjacent/south cgra. + if cgra_idx_y > 0: + s.tile[tile_id].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[tile_idx_x] + s.tile[tile_id].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[tile_idx_x] + else: #cgra_idx_y == 0 + # In multi-cgra, for the bottom row CGRAs, the south ports of the bottom row tiles should be grounded. + s.tile[tile_id].send_data[PORT_INDEX_SOUTH].rdy //= 0 + s.tile[tile_id].recv_data[PORT_INDEX_SOUTH].val //= 0 + s.tile[tile_id].recv_data[PORT_INDEX_SOUTH].msg //= DataType(0, 0) + + if tile_idx_x == 0: + # Corner case: In multi-cgra, for each column of CGRAs except the first column, + # the west port of the first column tiles must be connected to the adjacent/west cgra. + if cgra_idx_x > 0: + s.tile[tile_id].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[tile_idx_y] + s.tile[tile_id].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[tile_idx_y] + else: #cgra_idx_x == 0 + # In multi-cgra, for the first column CGRAs, the west ports of the first column tiles should be grounded. + s.tile[tile_id].send_data[PORT_INDEX_WEST].rdy //= 0 + s.tile[tile_id].recv_data[PORT_INDEX_WEST].val //= 0 + s.tile[tile_id].recv_data[PORT_INDEX_WEST].msg //= DataType(0, 0) + + if tile_idx_x == per_cgra_columns - 1: + if PORT_INDEX_EAST not in TileList[tile_id].getInvalidOutPorts(): + s.tile[tile_id].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[tile_idx_y] + if PORT_INDEX_EAST not in TileList[tile_id].getInvalidInPorts(): + s.tile[tile_id].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[tile_idx_y] + + for tile_idx_y in range(per_cgra_rows): + for tile_idx_x in range(per_cgra_columns): + i = tile_idx_y * per_cgra_columns + tile_idx_x + + for invalidInPort in TileList[i].getInvalidInPorts(): + """ + Corner case 1: + When the links between the dataSPM and the leftmost tiles are disabled, the PORT_INDEX_WEST status becomes invalid. + In this case, if the current CGRA needs to connect to the CGRA on its left, then the recv_data/send_data signals must not be tied to ground. + + Corner case 2: + When the links between the dataSPM and the bottom tiles are disabled, the PORT_INDEX_SOUTH status becomes invalid. + In this case, if the current CGRA needs to connect to the CGRA below it, then the recv_data/send_data signals must not be tied to ground. + """ + skip_multi = (is_multi_cgra and tile_idx_x == 0 and invalidInPort == PORT_INDEX_WEST) or \ + (is_multi_cgra and tile_idx_y == 0 and invalidInPort == PORT_INDEX_SOUTH) + skip_from_mem_dup = (not is_multi_cgra) and ((i, invalidInPort) in recv_data_grounded_for_from_mem) + if not skip_multi and not skip_from_mem_dup: + s.tile[i].recv_data[invalidInPort].val //= 0 + s.tile[i].recv_data[invalidInPort].msg //= DataType(0, 0) + + for invalidOutPort in TileList[i].getInvalidOutPorts(): + skip_multi = (is_multi_cgra and tile_idx_x == 0 and invalidOutPort == PORT_INDEX_WEST) or \ + (is_multi_cgra and tile_idx_y == 0 and invalidOutPort == PORT_INDEX_SOUTH) + skip_to_mem_dup = (not is_multi_cgra) and ((i, invalidOutPort) in send_data_rdy_grounded_for_to_mem) + if not skip_multi and not skip_to_mem_dup: + s.tile[i].send_data[invalidOutPort].rdy //= 0 + + if not TileList[i].hasFromMem(): + s.tile[i].to_mem_raddr.rdy //= 0 + s.tile[i].from_mem_rdata.val //= 0 + s.tile[i].from_mem_rdata.msg //= DataType(0, 0) + + if not TileList[i].hasToMem(): + s.tile[i].to_mem_waddr.rdy //= 0 + s.tile[i].to_mem_wdata.rdy //= 0 + + # Line trace + def line_trace(s): + res = "||\n".join([(("[tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) + for (i,x) in enumerate(s.tile)]) + res += "\n :: [" + s.data_mem.line_trace() + "] \n" + return res + + diff --git a/cgra/CgraWithContextSwitchRTL.py b/cgra/CgraWithContextSwitchRTL.py index 29569af2..361c0a9b 100644 --- a/cgra/CgraWithContextSwitchRTL.py +++ b/cgra/CgraWithContextSwitchRTL.py @@ -1,263 +1,263 @@ -""" -========================================================================= -Cgra_Context_Switch_RTL.py -========================================================================= - -Author : Yufei Yang - Date : Oct 17, 2025 -""" -from ..controller.ControllerRTL import ControllerRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.basic.val_rdy.queues import BypassQueueRTL -from ..lib.opt_type import * -from ..lib.util.common import * -from ..mem.data.DataMemControllerRTL import DataMemControllerRTL -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos -from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ..tile.TileWithContextSwitchRTL import TileWithContextSwitchRTL -from ..lib.util.data_struct_attr import * -from ..lib.messages import * - - -class CgraWithContextSwitchRTL(Component): - - def construct(s, CgraPayloadType, - multi_cgra_rows, - multi_cgra_columns, - width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, num_ctrl, - total_steps, mem_access_is_combinational, - FunctionUnit, FuList, cgra_topology, - controller2addr_map, idTo2d_map, - is_multi_cgra = True): - - DataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = DataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_bitwidth = DataType.get_field_type(kAttrPayload).nbits - - CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) - - num_tiles = width * height - num_rd_tiles = height + width - 1 - - CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, CgraPayloadType) - - NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, num_rd_tiles, - CgraPayloadType) - # Other topology can simply modify the tiles connections, or - # leverage the template for modeling. - assert(cgra_topology == MESH or cgra_topology == KING_MESH) - s.num_mesh_ports = 4 - if cgra_topology == MESH: - s.num_mesh_ports = 4 - elif cgra_topology == KING_MESH: - s.num_mesh_ports = 8 - - s.num_tiles = width * height - # The left and bottom tiles are connected to the data memory. - data_mem_num_rd_tiles = height + width - 1 - data_mem_num_wr_tiles = height + width - 1 - - num_cgras = multi_cgra_rows * multi_cgra_columns - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - CtrlRingPos = mk_ring_pos(s.num_tiles + 1) - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - assert(data_mem_size_per_bank * num_banks_per_cgra <= \ - data_mem_size_global) - - # Interfaces - s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) - s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) - s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) - s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) - - # Interfaces on the boundary of the CGRA. - s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width )] - s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width )] - s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width )] - s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width )] - - s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] - s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] - s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] - s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] - - # Components - s.tile = [TileWithContextSwitchRTL(CtrlPktType, - ctrl_mem_size, - data_mem_size_global, num_ctrl, - total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, num_cgras, s.num_tiles, - num_registers_per_reg_bank, - FuList = FuList) - for i in range(s.num_tiles)] - s.data_mem = DataMemControllerRTL(NocPktType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks_per_cgra, - data_mem_num_rd_tiles, - data_mem_num_wr_tiles, - multi_cgra_rows, - multi_cgra_columns, - s.num_tiles, - mem_access_is_combinational, - idTo2d_map) - s.controller = ControllerRTL(NocPktType, - multi_cgra_rows, multi_cgra_columns, - s.num_tiles, controller2addr_map, idTo2d_map) - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - # The last argument of 1 is for the latency per hop. - s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles + 1, 1) - s.cgra_id = InPort(CgraIdType) - - # Address lower and upper bound. - s.address_lower = InPort(DataAddrType) - s.address_upper = InPort(DataAddrType) - - # Connections - # Connects the controller id. - s.controller.cgra_id //= s.cgra_id - s.data_mem.cgra_id //= s.cgra_id - - # Connects the address lower and upper bound. - s.data_mem.address_lower //= s.address_lower - s.data_mem.address_upper //= s.address_upper - - # Connects data memory with controller. - s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request - s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request - s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response - s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt - s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt - s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt - - if is_multi_cgra: - s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc - s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc - else: - s.bypass_queue = BypassQueueRTL(NocPktType, 1) - s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc - s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc - - # Connects the ctrl interface between CPU and controller. - s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt - s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt - - # Assigns tile id. - for i in range(s.num_tiles): - s.tile[i].tile_id //= i - s.tile[i].cgra_id //= s.cgra_id - - # Connects ring with each control memory. - for i in range(s.num_tiles): - s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt - s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt - - for i in range(s.num_tiles): - s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt - s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt - - for i in range(s.num_tiles): - - if i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.tile[i-width].recv_data[PORT_INDEX_NORTH] - - if i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTH] //= s.tile[i+width].recv_data[PORT_INDEX_SOUTH] - - if i % width > 0: - s.tile[i].send_data[PORT_INDEX_WEST] //= s.tile[i-1].recv_data[PORT_INDEX_EAST] - - if i % width < width - 1: - s.tile[i].send_data[PORT_INDEX_EAST] //= s.tile[i+1].recv_data[PORT_INDEX_WEST] - - if cgra_topology == KING_MESH: - if i % width > 0 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST] //= s.tile[i+width-1].recv_data[PORT_INDEX_SOUTHEAST] - s.tile[i+width-1].send_data[PORT_INDEX_SOUTHEAST] //= s.tile[i].recv_data[PORT_INDEX_NORTHWEST] - - if i % width < width - 1 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHEAST] //= s.tile[i+width+1].recv_data[PORT_INDEX_SOUTHWEST] - s.tile[i+width+1].send_data[PORT_INDEX_SOUTHWEST] //= s.tile[i].recv_data[PORT_INDEX_NORTHEAST] - - if i // width == 0: - s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) - s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) - - if i // width == height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) - s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) - - if i % width == 0 and i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) - - if i % width == 0 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) - - if i % width == width - 1 and i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) - - if i % width == width - 1 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) - - - if i // width == 0: - s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[i % width] - s.tile[i].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[i % width] - - if i // width == height - 1: - s.tile[i].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[i % width] - s.tile[i].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[i % width] - - if i % width == 0: - s.tile[i].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[i // width] - s.tile[i].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[i // width] - - if i % width == width - 1: - s.tile[i].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[i // width] - s.tile[i].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[i // width] - - if i % width == 0 or i // width == 0: - s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[width + i // width - 1 if i >= width else i % width] - s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[width + i // width - 1 if i >= width else i % width] - s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[width + i // width - 1 if i >= width else i % width] - s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[width + i // width - 1 if i >= width else i % width] - else: - s.tile[i].to_mem_raddr.rdy //= 0 - s.tile[i].from_mem_rdata.val //= 0 - s.tile[i].from_mem_rdata.msg //= DataType(0, 0) - s.tile[i].to_mem_waddr.rdy //= 0 - s.tile[i].to_mem_wdata.rdy //= 0 - - # Line trace - def line_trace(s): - res = "||\n".join([(("\n[cgra"+str(s.cgra_id)+"_tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) - for (i,x) in enumerate(s.tile)]) - res += "\n :: [" + s.ctrl_ring.line_trace() + "] \n" - res += "\n :: [" + s.data_mem.line_trace() + "] \n" - return res - - +""" +========================================================================= +Cgra_Context_Switch_RTL.py +========================================================================= + +Author : Yufei Yang + Date : Oct 17, 2025 +""" +from ..controller.ControllerRTL import ControllerRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.basic.val_rdy.queues import BypassQueueRTL +from ..lib.opt_type import * +from ..lib.util.common import * +from ..mem.data.DataMemControllerRTL import DataMemControllerRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL +from ..tile.TileWithContextSwitchRTL import TileWithContextSwitchRTL +from ..lib.util.data_struct_attr import * +from ..lib.messages import * + + +class CgraWithContextSwitchRTL(Component): + + def construct(s, CgraPayloadType, + multi_cgra_rows, + multi_cgra_columns, + width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, num_ctrl, + total_steps, mem_access_is_combinational, + FunctionUnit, FuList, cgra_topology, + controller2addr_map, idTo2d_map, + is_multi_cgra = True): + + DataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = DataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_bitwidth = DataType.get_field_type(kAttrPayload).nbits + + CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) + + num_tiles = width * height + num_rd_tiles = height + width - 1 + + CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + num_tiles, CgraPayloadType) + + NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + num_tiles, num_rd_tiles, + CgraPayloadType) + # Other topology can simply modify the tiles connections, or + # leverage the template for modeling. + assert(cgra_topology == MESH or cgra_topology == KING_MESH) + s.num_mesh_ports = 4 + if cgra_topology == MESH: + s.num_mesh_ports = 4 + elif cgra_topology == KING_MESH: + s.num_mesh_ports = 8 + + s.num_tiles = width * height + # The left and bottom tiles are connected to the data memory. + data_mem_num_rd_tiles = height + width - 1 + data_mem_num_wr_tiles = height + width - 1 + + num_cgras = multi_cgra_rows * multi_cgra_columns + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + CtrlRingPos = mk_ring_pos(s.num_tiles + 1) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + assert(data_mem_size_per_bank * num_banks_per_cgra <= \ + data_mem_size_global) + + # Interfaces + s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) + s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) + s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) + s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) + + # Interfaces on the boundary of the CGRA. + s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width )] + s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width )] + s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width )] + s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width )] + + s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] + s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] + + # Components + s.tile = [TileWithContextSwitchRTL(CtrlPktType, + ctrl_mem_size, + data_mem_size_global, num_ctrl, + total_steps, 4, 2, s.num_mesh_ports, + s.num_mesh_ports, num_cgras, s.num_tiles, + num_registers_per_reg_bank, + FuList = FuList) + for i in range(s.num_tiles)] + s.data_mem = DataMemControllerRTL(NocPktType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks_per_cgra, + data_mem_num_rd_tiles, + data_mem_num_wr_tiles, + multi_cgra_rows, + multi_cgra_columns, + s.num_tiles, + mem_access_is_combinational, + idTo2d_map) + s.controller = ControllerRTL(NocPktType, + multi_cgra_rows, multi_cgra_columns, + s.num_tiles, controller2addr_map, idTo2d_map) + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + # The last argument of 1 is for the latency per hop. + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles + 1, 1) + s.cgra_id = InPort(CgraIdType) + + # Address lower and upper bound. + s.address_lower = InPort(DataAddrType) + s.address_upper = InPort(DataAddrType) + + # Connections + # Connects the controller id. + s.controller.cgra_id //= s.cgra_id + s.data_mem.cgra_id //= s.cgra_id + + # Connects the address lower and upper bound. + s.data_mem.address_lower //= s.address_lower + s.data_mem.address_upper //= s.address_upper + + # Connects data memory with controller. + s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request + s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request + s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response + s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt + s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt + s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt + + if is_multi_cgra: + s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc + s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc + else: + s.bypass_queue = BypassQueueRTL(NocPktType, 1) + s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc + s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc + + # Connects the ctrl interface between CPU and controller. + s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt + s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt + + # Assigns tile id. + for i in range(s.num_tiles): + s.tile[i].tile_id //= i + s.tile[i].cgra_id //= s.cgra_id + + # Connects ring with each control memory. + for i in range(s.num_tiles): + s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt + s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt + + for i in range(s.num_tiles): + s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt + s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt + + for i in range(s.num_tiles): + + if i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.tile[i-width].recv_data[PORT_INDEX_NORTH] + + if i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTH] //= s.tile[i+width].recv_data[PORT_INDEX_SOUTH] + + if i % width > 0: + s.tile[i].send_data[PORT_INDEX_WEST] //= s.tile[i-1].recv_data[PORT_INDEX_EAST] + + if i % width < width - 1: + s.tile[i].send_data[PORT_INDEX_EAST] //= s.tile[i+1].recv_data[PORT_INDEX_WEST] + + if cgra_topology == KING_MESH: + if i % width > 0 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST] //= s.tile[i+width-1].recv_data[PORT_INDEX_SOUTHEAST] + s.tile[i+width-1].send_data[PORT_INDEX_SOUTHEAST] //= s.tile[i].recv_data[PORT_INDEX_NORTHWEST] + + if i % width < width - 1 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHEAST] //= s.tile[i+width+1].recv_data[PORT_INDEX_SOUTHWEST] + s.tile[i+width+1].send_data[PORT_INDEX_SOUTHWEST] //= s.tile[i].recv_data[PORT_INDEX_NORTHEAST] + + if i // width == 0: + s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) + + if i // width == height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) + + if i % width == 0 and i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) + + if i % width == 0 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) + + if i % width == width - 1 and i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) + + if i % width == width - 1 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) + + + if i // width == 0: + s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[i % width] + s.tile[i].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[i % width] + + if i // width == height - 1: + s.tile[i].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[i % width] + s.tile[i].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[i % width] + + if i % width == 0: + s.tile[i].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[i // width] + s.tile[i].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[i // width] + + if i % width == width - 1: + s.tile[i].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[i // width] + s.tile[i].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[i // width] + + if i % width == 0 or i // width == 0: + s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[width + i // width - 1 if i >= width else i % width] + s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[width + i // width - 1 if i >= width else i % width] + s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[width + i // width - 1 if i >= width else i % width] + s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[width + i // width - 1 if i >= width else i % width] + else: + s.tile[i].to_mem_raddr.rdy //= 0 + s.tile[i].from_mem_rdata.val //= 0 + s.tile[i].from_mem_rdata.msg //= DataType(0, 0) + s.tile[i].to_mem_waddr.rdy //= 0 + s.tile[i].to_mem_wdata.rdy //= 0 + + # Line trace + def line_trace(s): + res = "||\n".join([(("\n[cgra"+str(s.cgra_id)+"_tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) + for (i,x) in enumerate(s.tile)]) + res += "\n :: [" + s.ctrl_ring.line_trace() + "] \n" + res += "\n :: [" + s.data_mem.line_trace() + "] \n" + return res + + diff --git a/cgra/CgraWithStreamingLoadRTL.py b/cgra/CgraWithStreamingLoadRTL.py index c554bfb6..6e7dcbf5 100644 --- a/cgra/CgraWithStreamingLoadRTL.py +++ b/cgra/CgraWithStreamingLoadRTL.py @@ -1,272 +1,272 @@ -""" -========================================================================= -CgraWithStreamingLoadRTL.py -========================================================================= - -Author : Cheng Tan - Date : Dec 22, 2024 -""" -from ..controller.ControllerRTL import ControllerRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.basic.val_rdy.queues import BypassQueueRTL -from ..lib.opt_type import * -from ..lib.util.common import * -from ..mem.data.DataMemControllerRTL import DataMemControllerRTL -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos -from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ..tile.TileWithStreamingLoadRTL import TileWithStreamingLoadRTL -from ..lib.util.data_struct_attr import * -from ..lib.messages import * -from ..lib.util.common import * - - -class CgraWithStreamingLoadRTL(Component): - - def construct(s, CgraPayloadType, - multi_cgra_rows, - multi_cgra_columns, - width, height, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, num_ctrl, - total_steps, mem_access_is_combinational, - FunctionUnit, FuList, cgra_topology, - controller2addr_map, idTo2d_map, - is_multi_cgra = True, - has_ctrl_ring = True): - - # Derives all types from CgraPayloadType. - DataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = DataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_bitwidth = DataType.get_field_type(kAttrPayload).nbits - - - num_tiles = width * height - num_rd_tiles = height + width - 1 - - CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) - - CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, CgraPayloadType) - - NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, num_rd_tiles, - CgraPayloadType) - - # Other topology can simply modify the tiles connections, or - # leverage the template for modeling. - assert(cgra_topology == MESH or cgra_topology == KING_MESH) - s.num_mesh_ports = 4 - if cgra_topology == MESH: - s.num_mesh_ports = 4 - elif cgra_topology == KING_MESH: - s.num_mesh_ports = 8 - - s.has_ctrl_ring = has_ctrl_ring - s.num_tiles = width * height - # The left and bottom tiles are connected to the data memory. - data_mem_num_rd_tiles = height + width - 1 - data_mem_num_wr_tiles = height + width - 1 - - num_cgras = multi_cgra_rows * multi_cgra_columns - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - CtrlRingPos = mk_ring_pos(s.num_tiles + 1) - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - assert(data_mem_size_per_bank * num_banks_per_cgra <= \ - data_mem_size_global) - - # Interfaces - s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) - s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) - s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) - s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) - - # Interfaces on the boundary of the CGRA. - s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width )] - s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width )] - s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width )] - s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width )] - - s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] - s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] - s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] - s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] - - # Components - s.tile = [TileWithStreamingLoadRTL(CtrlPktType, - ctrl_mem_size, - data_mem_size_global, num_ctrl, - total_steps, 4, 2, s.num_mesh_ports, - s.num_mesh_ports, num_cgras, s.num_tiles, - num_registers_per_reg_bank, - FuList = FuList) - for i in range(s.num_tiles)] - s.data_mem = DataMemControllerRTL(NocPktType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks_per_cgra, - data_mem_num_rd_tiles, - data_mem_num_wr_tiles, - multi_cgra_rows, - multi_cgra_columns, - s.num_tiles, - mem_access_is_combinational, - idTo2d_map) - s.controller = ControllerRTL(NocPktType, - multi_cgra_rows, multi_cgra_columns, - s.num_tiles, controller2addr_map, idTo2d_map) - # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - # The last argument of 1 is for the latency per hop. - if has_ctrl_ring: - s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles + 1, 1) - s.cgra_id = InPort(CgraIdType) - - # Address lower and upper bound. - s.address_lower = InPort(DataAddrType) - s.address_upper = InPort(DataAddrType) - - # Connections - # Connects the controller id. - s.controller.cgra_id //= s.cgra_id - s.data_mem.cgra_id //= s.cgra_id - - # Connects the address lower and upper bound. - s.data_mem.address_lower //= s.address_lower - s.data_mem.address_upper //= s.address_upper - - # Connects data memory with controller. - s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request - s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request - s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response - s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt - s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt - s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt - - if is_multi_cgra: - s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc - s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc - else: - s.bypass_queue = BypassQueueRTL(NocPktType, 1) - s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc - s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc - - # Connects the ctrl interface between CPU and controller. - s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt - s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt - - # Assigns tile id. - for i in range(s.num_tiles): - s.tile[i].tile_id //= i - s.tile[i].cgra_id //= s.cgra_id - - if has_ctrl_ring: - # Connects ring with each control memory. - for i in range(s.num_tiles): - s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt - s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt - s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt - s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt - - for i in range(s.num_tiles): - - if i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.tile[i-width].recv_data[PORT_INDEX_NORTH] - - if i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTH] //= s.tile[i+width].recv_data[PORT_INDEX_SOUTH] - - if i % width > 0: - s.tile[i].send_data[PORT_INDEX_WEST] //= s.tile[i-1].recv_data[PORT_INDEX_EAST] - - if i % width < width - 1: - s.tile[i].send_data[PORT_INDEX_EAST] //= s.tile[i+1].recv_data[PORT_INDEX_WEST] - - if cgra_topology == KING_MESH: - if i % width > 0 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST] //= s.tile[i+width-1].recv_data[PORT_INDEX_SOUTHEAST] - s.tile[i+width-1].send_data[PORT_INDEX_SOUTHEAST] //= s.tile[i].recv_data[PORT_INDEX_NORTHWEST] - - if i % width < width - 1 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHEAST] //= s.tile[i+width+1].recv_data[PORT_INDEX_SOUTHWEST] - s.tile[i+width+1].send_data[PORT_INDEX_SOUTHWEST] //= s.tile[i].recv_data[PORT_INDEX_NORTHEAST] - - if i // width == 0: - s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) - s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) - - if i // width == height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) - s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) - - if i % width == 0 and i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) - - if i % width == 0 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) - - if i % width == width - 1 and i // width > 0: - s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) - - if i % width == width - 1 and i // width < height - 1: - s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 - s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) - - - if i // width == 0: - s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[i % width] - s.tile[i].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[i % width] - - if i // width == height - 1: - s.tile[i].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[i % width] - s.tile[i].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[i % width] - - if i % width == 0: - s.tile[i].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[i // width] - s.tile[i].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[i // width] - - if i % width == width - 1: - s.tile[i].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[i // width] - s.tile[i].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[i // width] - - if i % width == 0 or i // width == 0: - s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[width + i // width - 1 if i >= width else i % width] - s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[width + i // width - 1 if i >= width else i % width] - s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[width + i // width - 1 if i >= width else i % width] - s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[width + i // width - 1 if i >= width else i % width] - else: - s.tile[i].to_mem_raddr.rdy //= 0 - s.tile[i].from_mem_rdata.val //= 0 - s.tile[i].from_mem_rdata.msg //= DataType(0, 0) - s.tile[i].to_mem_waddr.rdy //= 0 - s.tile[i].to_mem_wdata.rdy //= 0 - - # Line trace - def line_trace(s): - res = "||\n".join([(("\n[cgra"+str(s.cgra_id)+"_tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) - for (i,x) in enumerate(s.tile)]) - if s.has_ctrl_ring: - res += "\n :: [" + s.ctrl_ring.line_trace() + "] \n" - res += "\n :: [" + s.data_mem.line_trace() + "] \n" - return res - - - - +""" +========================================================================= +CgraWithStreamingLoadRTL.py +========================================================================= + +Author : Cheng Tan + Date : Dec 22, 2024 +""" +from ..controller.ControllerRTL import ControllerRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.basic.val_rdy.queues import BypassQueueRTL +from ..lib.opt_type import * +from ..lib.util.common import * +from ..mem.data.DataMemControllerRTL import DataMemControllerRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL +from ..tile.TileWithStreamingLoadRTL import TileWithStreamingLoadRTL +from ..lib.util.data_struct_attr import * +from ..lib.messages import * +from ..lib.util.common import * + + +class CgraWithStreamingLoadRTL(Component): + + def construct(s, CgraPayloadType, + multi_cgra_rows, + multi_cgra_columns, + width, height, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, num_ctrl, + total_steps, mem_access_is_combinational, + FunctionUnit, FuList, cgra_topology, + controller2addr_map, idTo2d_map, + is_multi_cgra = True, + has_ctrl_ring = True): + + # Derives all types from CgraPayloadType. + DataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = DataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_bitwidth = DataType.get_field_type(kAttrPayload).nbits + + + num_tiles = width * height + num_rd_tiles = height + width - 1 + + CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) + + CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + num_tiles, CgraPayloadType) + + NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, + num_tiles, num_rd_tiles, + CgraPayloadType) + + # Other topology can simply modify the tiles connections, or + # leverage the template for modeling. + assert(cgra_topology == MESH or cgra_topology == KING_MESH) + s.num_mesh_ports = 4 + if cgra_topology == MESH: + s.num_mesh_ports = 4 + elif cgra_topology == KING_MESH: + s.num_mesh_ports = 8 + + s.has_ctrl_ring = has_ctrl_ring + s.num_tiles = width * height + # The left and bottom tiles are connected to the data memory. + data_mem_num_rd_tiles = height + width - 1 + data_mem_num_wr_tiles = height + width - 1 + + num_cgras = multi_cgra_rows * multi_cgra_columns + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + CtrlRingPos = mk_ring_pos(s.num_tiles + 1) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + assert(data_mem_size_per_bank * num_banks_per_cgra <= \ + data_mem_size_global) + + # Interfaces + s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) + s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType) + s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) + s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) + + # Interfaces on the boundary of the CGRA. + s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(width )] + s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(width )] + s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(width )] + s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(width )] + + s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(height)] + s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(height)] + s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] + + # Components + s.tile = [TileWithStreamingLoadRTL(CtrlPktType, + ctrl_mem_size, + data_mem_size_global, num_ctrl, + total_steps, 4, 2, s.num_mesh_ports, + s.num_mesh_ports, num_cgras, s.num_tiles, + num_registers_per_reg_bank, + FuList = FuList) + for i in range(s.num_tiles)] + s.data_mem = DataMemControllerRTL(NocPktType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks_per_cgra, + data_mem_num_rd_tiles, + data_mem_num_wr_tiles, + multi_cgra_rows, + multi_cgra_columns, + s.num_tiles, + mem_access_is_combinational, + idTo2d_map) + s.controller = ControllerRTL(NocPktType, + multi_cgra_rows, multi_cgra_columns, + s.num_tiles, controller2addr_map, idTo2d_map) + # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. + # The last argument of 1 is for the latency per hop. + if has_ctrl_ring: + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles + 1, 1) + s.cgra_id = InPort(CgraIdType) + + # Address lower and upper bound. + s.address_lower = InPort(DataAddrType) + s.address_upper = InPort(DataAddrType) + + # Connections + # Connects the controller id. + s.controller.cgra_id //= s.cgra_id + s.data_mem.cgra_id //= s.cgra_id + + # Connects the address lower and upper bound. + s.data_mem.address_lower //= s.address_lower + s.data_mem.address_upper //= s.address_upper + + # Connects data memory with controller. + s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request + s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request + s.data_mem.recv_from_noc_load_response_pkt //= s.controller.send_to_tile_load_response + s.data_mem.send_to_noc_load_request_pkt //= s.controller.recv_from_tile_load_request_pkt + s.data_mem.send_to_noc_load_response_pkt //= s.controller.recv_from_tile_load_response_pkt + s.data_mem.send_to_noc_store_pkt //= s.controller.recv_from_tile_store_request_pkt + + if is_multi_cgra: + s.recv_from_inter_cgra_noc //= s.controller.recv_from_inter_cgra_noc + s.send_to_inter_cgra_noc //= s.controller.send_to_inter_cgra_noc + else: + s.bypass_queue = BypassQueueRTL(NocPktType, 1) + s.bypass_queue.send //= s.controller.recv_from_inter_cgra_noc + s.bypass_queue.recv //= s.controller.send_to_inter_cgra_noc + + # Connects the ctrl interface between CPU and controller. + s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt + s.send_to_cpu_pkt //= s.controller.send_to_cpu_pkt + + # Assigns tile id. + for i in range(s.num_tiles): + s.tile[i].tile_id //= i + s.tile[i].cgra_id //= s.cgra_id + + if has_ctrl_ring: + # Connects ring with each control memory. + for i in range(s.num_tiles): + s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt + s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt + s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt + s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt + + for i in range(s.num_tiles): + + if i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.tile[i-width].recv_data[PORT_INDEX_NORTH] + + if i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTH] //= s.tile[i+width].recv_data[PORT_INDEX_SOUTH] + + if i % width > 0: + s.tile[i].send_data[PORT_INDEX_WEST] //= s.tile[i-1].recv_data[PORT_INDEX_EAST] + + if i % width < width - 1: + s.tile[i].send_data[PORT_INDEX_EAST] //= s.tile[i+1].recv_data[PORT_INDEX_WEST] + + if cgra_topology == KING_MESH: + if i % width > 0 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST] //= s.tile[i+width-1].recv_data[PORT_INDEX_SOUTHEAST] + s.tile[i+width-1].send_data[PORT_INDEX_SOUTHEAST] //= s.tile[i].recv_data[PORT_INDEX_NORTHWEST] + + if i % width < width - 1 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHEAST] //= s.tile[i+width+1].recv_data[PORT_INDEX_SOUTHWEST] + s.tile[i+width+1].send_data[PORT_INDEX_SOUTHWEST] //= s.tile[i].recv_data[PORT_INDEX_NORTHEAST] + + if i // width == 0: + s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) + + if i // width == height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) + s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) + + if i % width == 0 and i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHWEST].msg //= DataType(0, 0) + + if i % width == 0 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHWEST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHWEST].msg //= DataType(0, 0) + + if i % width == width - 1 and i // width > 0: + s.tile[i].send_data[PORT_INDEX_SOUTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_SOUTHEAST].msg //= DataType(0, 0) + + if i % width == width - 1 and i // width < height - 1: + s.tile[i].send_data[PORT_INDEX_NORTHEAST].rdy //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].val //= 0 + s.tile[i].recv_data[PORT_INDEX_NORTHEAST].msg //= DataType(0, 0) + + + if i // width == 0: + s.tile[i].send_data[PORT_INDEX_SOUTH] //= s.send_data_on_boundary_south[i % width] + s.tile[i].recv_data[PORT_INDEX_SOUTH] //= s.recv_data_on_boundary_south[i % width] + + if i // width == height - 1: + s.tile[i].send_data[PORT_INDEX_NORTH] //= s.send_data_on_boundary_north[i % width] + s.tile[i].recv_data[PORT_INDEX_NORTH] //= s.recv_data_on_boundary_north[i % width] + + if i % width == 0: + s.tile[i].send_data[PORT_INDEX_WEST] //= s.send_data_on_boundary_west[i // width] + s.tile[i].recv_data[PORT_INDEX_WEST] //= s.recv_data_on_boundary_west[i // width] + + if i % width == width - 1: + s.tile[i].send_data[PORT_INDEX_EAST] //= s.send_data_on_boundary_east[i // width] + s.tile[i].recv_data[PORT_INDEX_EAST] //= s.recv_data_on_boundary_east[i // width] + + if i % width == 0 or i // width == 0: + s.tile[i].to_mem_raddr //= s.data_mem.recv_raddr[width + i // width - 1 if i >= width else i % width] + s.tile[i].from_mem_rdata //= s.data_mem.send_rdata[width + i // width - 1 if i >= width else i % width] + s.tile[i].to_mem_waddr //= s.data_mem.recv_waddr[width + i // width - 1 if i >= width else i % width] + s.tile[i].to_mem_wdata //= s.data_mem.recv_wdata[width + i // width - 1 if i >= width else i % width] + else: + s.tile[i].to_mem_raddr.rdy //= 0 + s.tile[i].from_mem_rdata.val //= 0 + s.tile[i].from_mem_rdata.msg //= DataType(0, 0) + s.tile[i].to_mem_waddr.rdy //= 0 + s.tile[i].to_mem_wdata.rdy //= 0 + + # Line trace + def line_trace(s): + res = "||\n".join([(("\n[cgra"+str(s.cgra_id)+"_tile"+str(i)+"]: ") + x.line_trace() + x.ctrl_mem.line_trace()) + for (i,x) in enumerate(s.tile)]) + if s.has_ctrl_ring: + res += "\n :: [" + s.ctrl_ring.line_trace() + "] \n" + res += "\n :: [" + s.data_mem.line_trace() + "] \n" + return res + + + + diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index 2a5b1c05..83b41068 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -1,390 +1,390 @@ -""" -========================================================================== -ControllerRTL.py -========================================================================== -Controller for each CGRA. Mutiple controllers are interconnected in a -multi-cgra system. - -Author : Cheng Tan - Date : Dec 2, 2024 -""" - -from ..lib.basic.val_rdy.ifcs import RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import SendIfcRTL -from ..lib.basic.val_rdy.queues import NormalQueueRTL -from ..lib.messages import * -from ..lib.opt_type import * -from ..lib.util.common import * -from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL -from ..noc.PyOCN.pymtl3_net.xbar.XbarRTL import XbarRTL - -from .GlobalReduceUnitRTL import GlobalReduceUnitRTL -from ..lib.util.data_struct_attr import * - -class ControllerRTL(Component): - - def construct(s, - InterCgraPktType, - multi_cgra_rows, - multi_cgra_columns, - num_tiles, - controller2addr_map, - idTo2d_map): - - # Derives types from InterCgraPktType. - CgraPayloadType = InterCgraPktType.get_field_type(kAttrPayload) - DataType = CgraPayloadType.get_field_type(kAttrData) - DataAddrType = CgraPayloadType.get_field_type(kAttrDataAddr) - - # Derives CgraIdType from grid dimensions. - CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) - - # Reconstructs IntraCgraPktType. - IntraCgraPktType = mk_intra_cgra_pkt(multi_cgra_columns, - multi_cgra_rows, - num_tiles, - CgraPayloadType) - - assert(multi_cgra_columns >= multi_cgra_rows) - - # Used for calculating the x/y position. - XType = mk_bits(max(clog2(multi_cgra_columns), 1)) - YType = mk_bits(max(clog2(multi_cgra_rows), 1)) - TileIdType = mk_bits(clog2(num_tiles + 1)) - ControllerXbarPktType = mk_controller_noc_xbar_pkt(InterCgraPktType) - - # Interface - s.cgra_id = InPort(CgraIdType) - - # Request from/to other CGRA via NoC. - s.recv_from_inter_cgra_noc = RecvIfcRTL(InterCgraPktType) - s.send_to_inter_cgra_noc = SendIfcRTL(InterCgraPktType) - - s.recv_from_cpu_pkt = RecvIfcRTL(IntraCgraPktType) - s.send_to_ctrl_ring_pkt = SendIfcRTL(IntraCgraPktType) - - s.recv_from_ctrl_ring_pkt = RecvIfcRTL(IntraCgraPktType) - s.send_to_cpu_pkt = SendIfcRTL(IntraCgraPktType) - - # Request from/to tiles. - s.recv_from_tile_load_request_pkt = RecvIfcRTL(InterCgraPktType) - s.recv_from_tile_load_response_pkt = RecvIfcRTL(InterCgraPktType) - s.recv_from_tile_store_request_pkt = RecvIfcRTL(InterCgraPktType) - - s.send_to_mem_load_request = SendIfcRTL(InterCgraPktType) - s.send_to_tile_load_response = SendIfcRTL(InterCgraPktType) - s.send_to_mem_store_request = SendIfcRTL(InterCgraPktType) - - # Component - s.recv_from_tile_load_request_pkt_queue = ChannelRTL(InterCgraPktType, latency = 1) - s.recv_from_tile_load_response_pkt_queue = ChannelRTL(InterCgraPktType, latency = 1) - s.recv_from_tile_store_request_pkt_queue = ChannelRTL(InterCgraPktType, latency = 1) - - s.send_to_mem_load_request_queue = ChannelRTL(InterCgraPktType, latency = 1) - s.send_to_tile_load_response_queue = ChannelRTL(InterCgraPktType, latency = 1) - s.send_to_mem_store_request_queue = ChannelRTL(InterCgraPktType, latency = 1) - - # Crossbar with 4 inports (load and store requests towards remote - # memory, load response from local memory, ctrl&data packet from cpu, - # and command signal from inter-tile, i.e., intra-cgra, ring) and 1 - # outport (only allow one request be sent out per cycle). - s.crossbar = XbarRTL(ControllerXbarPktType, CONTROLLER_CROSSBAR_INPORTS, 1) - s.recv_from_cpu_pkt_queue = NormalQueueRTL(IntraCgraPktType) - s.send_to_cpu_pkt_queue = NormalQueueRTL(IntraCgraPktType) - - # Global reduce unit. - # TODO: We need multiple GlobalReduceUnitRTL to enable more than 1 reduction - # across the fabric: https://github.com/tancheng/VectorCGRA/issues/184. - s.global_reduce_unit = GlobalReduceUnitRTL(InterCgraPktType) - - # LUT for global data address mapping. - addr_offset_nbits = 0 - s.addr2controller_lut = [Wire(CgraIdType) for _ in range(len(controller2addr_map))] - # Assumes the address range is contiguous within one CGRA's SPMs. - addr2controller_vector = [-1 for _ in range(len(controller2addr_map))] - for src_cgra_id, address_range in controller2addr_map.items(): - begin_addr, end_addr = address_range[0], address_range[1] - address_length = end_addr - begin_addr + 1 - assert (address_length & (address_length - 1)) == 0, f"{address_length} is not a power of 2." - addr_offset_nbits = clog2(address_length) - addr_base = begin_addr >> addr_offset_nbits - assert addr2controller_vector[addr_base] == -1, f"address range [{begin_addr}, {end_addr}] overlaps with others." - addr2controller_vector[addr_base] = CgraIdType(src_cgra_id) - - s.addr2controller_lut[addr_base] //= CgraIdType(src_cgra_id) - - # Constructs the idTo2d lut. - s.idTo2d_x_lut= [Wire(XType) for _ in range(multi_cgra_columns * multi_cgra_rows)] - s.idTo2d_y_lut= [Wire(YType) for _ in range(multi_cgra_columns * multi_cgra_rows)] - for cgra_id in idTo2d_map: - xy = idTo2d_map[cgra_id] - s.idTo2d_x_lut[cgra_id] //= XType(xy[0]) - s.idTo2d_y_lut[cgra_id] //= YType(xy[1]) - - s.addr_dst_id = Wire(CgraIdType) - - # Connections. - # Requests towards others, 1 cycle delay to improve timing. - s.recv_from_tile_load_request_pkt_queue.recv //= s.recv_from_tile_load_request_pkt - s.recv_from_tile_load_response_pkt_queue.recv //= s.recv_from_tile_load_response_pkt - s.recv_from_tile_store_request_pkt_queue.recv //= s.recv_from_tile_store_request_pkt - - # Requests towards local from others, 1 cycle delay to improve timing. - s.send_to_mem_load_request_queue.send //= s.send_to_mem_load_request - s.send_to_tile_load_response_queue.send //= s.send_to_tile_load_response - s.send_to_mem_store_request_queue.send //= s.send_to_mem_store_request - - # For control signals delivery from CPU to tiles. - s.recv_from_cpu_pkt //= s.recv_from_cpu_pkt_queue.recv - s.send_to_cpu_pkt //= s.send_to_cpu_pkt_queue.send - - @update - def update_received_msg(): - kLoadRequestInportIdx = 0 - kLoadResponseInportIdx = 1 - kStoreRequestInportIdx = 2 - kFromCpuCtrlAndDataIdx = 3 - kFromInterTileRingIdx = 4 - kFromReduceUnitIdx = 5 - - s.send_to_cpu_pkt_queue.recv.val @= 0 - s.send_to_cpu_pkt_queue.recv.msg @= IntraCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.recv_from_ctrl_ring_pkt.rdy @= 0 - - for i in range(CONTROLLER_CROSSBAR_INPORTS): - s.crossbar.recv[i].val @= 0 - s.crossbar.recv[i].msg @= ControllerXbarPktType(0, 0) - - # For the command signal from inter-tile/intra-cgra control ring. - s.crossbar.recv[kFromInterTileRingIdx].val @= s.recv_from_ctrl_ring_pkt.val - s.recv_from_ctrl_ring_pkt.rdy @= s.crossbar.recv[kFromInterTileRingIdx].rdy - s.crossbar.recv[kFromInterTileRingIdx].msg @= \ - ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) - InterCgraPktType(s.cgra_id, - s.recv_from_ctrl_ring_pkt.msg.dst_cgra_id, - s.idTo2d_x_lut[s.cgra_id], # src_x - s.idTo2d_y_lut[s.cgra_id], # src_y - s.recv_from_ctrl_ring_pkt.msg.dst_cgra_x, # dst_x - s.recv_from_ctrl_ring_pkt.msg.dst_cgra_y, # dst_y - s.recv_from_ctrl_ring_pkt.msg.src, # src_tile_id - s.recv_from_ctrl_ring_pkt.msg.dst, # dst_tile_id - 0, # remote_src_port, only used for inter-cgra remote load request/response. - 0, # opaque - 0, # vc_id. No need to specify vc_id for self produce-consume pkt thanks to the additional VC buffer. - s.recv_from_ctrl_ring_pkt.msg.payload)) - - # For the load request from local tiles. - s.crossbar.recv[kLoadRequestInportIdx].val @= s.recv_from_tile_load_request_pkt_queue.send.val - s.recv_from_tile_load_request_pkt_queue.send.rdy @= s.crossbar.recv[kLoadRequestInportIdx].rdy - s.crossbar.recv[kLoadRequestInportIdx].msg @= \ - ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) - s.recv_from_tile_load_request_pkt_queue.send.msg) - - # For the store request from local tiles. - s.crossbar.recv[kStoreRequestInportIdx].val @= s.recv_from_tile_store_request_pkt_queue.send.val - s.recv_from_tile_store_request_pkt_queue.send.rdy @= s.crossbar.recv[kStoreRequestInportIdx].rdy - s.crossbar.recv[kStoreRequestInportIdx].msg @= \ - ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) - s.recv_from_tile_store_request_pkt_queue.send.msg) - - # For the load response (i.e., the data towards other) from local memory. - s.crossbar.recv[kLoadResponseInportIdx].val @= \ - s.recv_from_tile_load_response_pkt_queue.send.val - s.recv_from_tile_load_response_pkt_queue.send.rdy @= s.crossbar.recv[kLoadResponseInportIdx].rdy - s.crossbar.recv[kLoadResponseInportIdx].msg @= \ - ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) - s.recv_from_tile_load_response_pkt_queue.send.msg) - - # For the load response (i.e., the data towards other) from local memory. - s.crossbar.recv[kFromReduceUnitIdx].val @= \ - s.global_reduce_unit.send.val - s.global_reduce_unit.send.rdy @= s.crossbar.recv[kFromReduceUnitIdx].rdy - s.crossbar.recv[kFromReduceUnitIdx].msg @= s.global_reduce_unit.send.msg - - # For the ctrl and data preloading. - s.crossbar.recv[kFromCpuCtrlAndDataIdx].val @= \ - s.recv_from_cpu_pkt_queue.send.val - s.recv_from_cpu_pkt_queue.send.rdy @= s.crossbar.recv[kFromCpuCtrlAndDataIdx].rdy - s.crossbar.recv[kFromCpuCtrlAndDataIdx].msg @= \ - ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) - InterCgraPktType(s.cgra_id, # src - s.recv_from_cpu_pkt_queue.send.msg.dst_cgra_id, # dst - 0, # src_x - 0, # src_y - s.idTo2d_x_lut[s.recv_from_cpu_pkt_queue.send.msg.dst_cgra_id], # dst_x - s.idTo2d_y_lut[s.recv_from_cpu_pkt_queue.send.msg.dst_cgra_id], # dst_y - num_tiles, # src_tile_id, num_tiles is used to indicate the request is from CPU, so the LOAD response can come back. - s.recv_from_cpu_pkt_queue.send.msg.dst, # dst_tile_id - 0, # remote_src_port, only used for inter-cgra remote load request/response. - 0, # opaque - 0, # vc_id - s.recv_from_cpu_pkt_queue.send.msg.payload)) - - # TODO: For the other cmd types. - - - # @update - # def update_received_msg_from_noc(): - - # Initiates the signals. - s.send_to_mem_load_request_queue.recv.val @= 0 - s.send_to_mem_store_request_queue.recv.val @= 0 - s.send_to_tile_load_response_queue.recv.val @= 0 - - s.send_to_mem_load_request_queue.recv.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.send_to_mem_store_request_queue.recv.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.send_to_tile_load_response_queue.recv.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - - s.recv_from_inter_cgra_noc.rdy @= 0 - s.send_to_ctrl_ring_pkt.val @= 0 - s.send_to_ctrl_ring_pkt.msg @= IntraCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.global_reduce_unit.recv_count.val @= 0 - s.global_reduce_unit.recv_count.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.global_reduce_unit.recv_data.val @= 0 - s.global_reduce_unit.recv_data.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - - # For the load request from NoC. - received_pkt = s.recv_from_inter_cgra_noc.msg - if s.recv_from_inter_cgra_noc.val: - if s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LOAD_REQUEST: - s.send_to_mem_load_request_queue.recv.val @= 1 - - if s.send_to_mem_load_request_queue.recv.rdy: - s.recv_from_inter_cgra_noc.rdy @= 1 - s.send_to_mem_load_request_queue.recv.msg @= received_pkt - - elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_STORE_REQUEST: - s.send_to_mem_store_request_queue.recv.msg @= received_pkt - s.send_to_mem_store_request_queue.recv.val @= 1 - - if s.send_to_mem_store_request_queue.recv.rdy: - s.recv_from_inter_cgra_noc.rdy @= 1 - - elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LOAD_RESPONSE: - # FIXME: This condition needs to check whether this controller is the - # one connecting to CPU, and with the help from additional field indicating - # whether the packet is originally from CPU. - # https://github.com/tancheng/VectorCGRA/issues/116. - if s.recv_from_inter_cgra_noc.msg.dst_tile_id == num_tiles: - s.recv_from_inter_cgra_noc.rdy @= s.send_to_cpu_pkt_queue.recv.rdy - s.send_to_cpu_pkt_queue.recv.val @= 1 - s.send_to_cpu_pkt_queue.recv.msg @= \ - IntraCgraPktType(s.recv_from_inter_cgra_noc.msg.src_tile_id, # src - s.recv_from_inter_cgra_noc.msg.dst_tile_id, # dst - s.recv_from_inter_cgra_noc.msg.src, # src_cgra_id - s.recv_from_inter_cgra_noc.msg.dst, # src_cgra_id - s.recv_from_inter_cgra_noc.msg.src_x, # src_cgra_x - s.recv_from_inter_cgra_noc.msg.src_y, # src_cgra_y - s.recv_from_inter_cgra_noc.msg.dst_x, # dst_cgra_x - s.recv_from_inter_cgra_noc.msg.dst_y, # dst_cgra_y - 0, # opaque - 0, # vc_id - s.recv_from_inter_cgra_noc.msg.payload) - - else: - s.recv_from_inter_cgra_noc.rdy @= s.send_to_tile_load_response_queue.recv.rdy - s.send_to_tile_load_response_queue.recv.msg @= received_pkt - s.send_to_tile_load_response_queue.recv.val @= 1 - - elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_COMPLETE: - s.recv_from_inter_cgra_noc.rdy @= s.send_to_cpu_pkt_queue.recv.rdy - s.send_to_cpu_pkt_queue.recv.val @= 1 - s.send_to_cpu_pkt_queue.recv.msg @= \ - IntraCgraPktType(s.recv_from_inter_cgra_noc.msg.src_tile_id, # src - s.recv_from_inter_cgra_noc.msg.dst_tile_id, # dst - s.recv_from_inter_cgra_noc.msg.src, # src_cgra_id - s.recv_from_inter_cgra_noc.msg.dst, # src_cgra_id - s.recv_from_inter_cgra_noc.msg.src_x, # src_cgra_x - s.recv_from_inter_cgra_noc.msg.src_y, # src_cgra_y - s.recv_from_inter_cgra_noc.msg.dst_x, # dst_cgra_x - s.recv_from_inter_cgra_noc.msg.dst_y, # dst_cgra_y - 0, # opaque - 0, # vc_id - s.recv_from_inter_cgra_noc.msg.payload) - - # Consume and discard the leaf counter complete signal (loop termination - # notification from LoopCounter FU) to avoid blocking the NoC. - elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LEAF_COUNTER_COMPLETE: - s.recv_from_inter_cgra_noc.rdy @= 1 - - elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD: - s.recv_from_inter_cgra_noc.rdy @= s.global_reduce_unit.recv_data.rdy - s.global_reduce_unit.recv_data.val @= 1 - s.global_reduce_unit.recv_data.msg @= s.recv_from_inter_cgra_noc.msg - - elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_COUNT: - s.recv_from_inter_cgra_noc.rdy @= s.global_reduce_unit.recv_count.rdy - s.global_reduce_unit.recv_count.val @= 1 - s.global_reduce_unit.recv_count.msg @= s.recv_from_inter_cgra_noc.msg - - elif (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_CTRL_LOWER_BOUND) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONST) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_START_ADDR) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_STRIDE) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_END_ADDR) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_PAUSE) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_PRESERVE) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_RESUME) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_RECORD_PHI_ADDR) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_TERMINATE) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LAUNCH) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_LOOP_LOWER) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_LOOP_UPPER) | \ - (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_LOOP_STEP) : - s.recv_from_inter_cgra_noc.rdy @= s.send_to_ctrl_ring_pkt.rdy - s.send_to_ctrl_ring_pkt.val @= s.recv_from_inter_cgra_noc.val - s.send_to_ctrl_ring_pkt.msg @= \ - IntraCgraPktType(s.recv_from_inter_cgra_noc.msg.src_tile_id, # src - s.recv_from_inter_cgra_noc.msg.dst_tile_id, # dst - s.recv_from_inter_cgra_noc.msg.src, # src_cgra_id - s.recv_from_inter_cgra_noc.msg.dst, # src_cgra_id - s.recv_from_inter_cgra_noc.msg.src_x, # src_cgra_x - s.recv_from_inter_cgra_noc.msg.src_y, # src_cgra_y - s.recv_from_inter_cgra_noc.msg.dst_x, # dst_cgra_x - s.recv_from_inter_cgra_noc.msg.dst_y, # dst_cgra_y - 0, # opaque - 0, # vc_id - s.recv_from_inter_cgra_noc.msg.payload) - - # else: - # # TODO: Handle other cmd types. - # assert(False) - - @update - def update_sending_to_noc_msg(): - s.send_to_inter_cgra_noc.val @= s.crossbar.send[0].val - s.crossbar.send[0].rdy @= s.send_to_inter_cgra_noc.rdy - s.send_to_inter_cgra_noc.msg @= s.crossbar.send[0].msg.inter_cgra_pkt - # addr_dst_id = 0 - if (s.crossbar.send[0].msg.inter_cgra_pkt.payload.cmd == CMD_LOAD_REQUEST) | \ - (s.crossbar.send[0].msg.inter_cgra_pkt.payload.cmd == CMD_STORE_REQUEST): - s.send_to_inter_cgra_noc.msg.dst @= s.addr_dst_id - s.send_to_inter_cgra_noc.msg.dst_x @= s.idTo2d_x_lut[s.addr_dst_id] - s.send_to_inter_cgra_noc.msg.dst_y @= s.idTo2d_y_lut[s.addr_dst_id] - - @update - def capture_addr_dst_id(): - s.addr_dst_id @= s.addr2controller_lut[trunc(s.crossbar.send[0].msg.inter_cgra_pkt.payload.data_addr >> addr_offset_nbits, CgraIdType)] - - def line_trace(s): - recv_from_cpu_pkt_str = "recv_from_cpu_pkt: " + str(s.recv_from_cpu_pkt.msg) - recv_from_cpu_pkt_queue_str = "recv_from_cpu_pkt_queue.send: " + str(s.recv_from_cpu_pkt_queue.send.msg) - crossbar_recv_str = "crossbar_recv.val:" + str(s.crossbar.recv[3].val) + " crossbar_recv.rdy:" + str(s.crossbar.recv[3].rdy) + " crossbar_recv.msg: " + str(s.crossbar.recv[3].msg) - send_to_ctrl_ring_pkt_str = "send_to_ctrl_ring_pkt.val:" + str(s.send_to_ctrl_ring_pkt.val) + " send_to_ctrl_ring_pkt: " + str(s.send_to_ctrl_ring_pkt.msg) + " send_to_ctrl_ring_pkt.rdy:" + str(s.send_to_ctrl_ring_pkt.rdy) - recv_from_tile_load_request_pkt_str = "recv_from_tile_load_request_pkt: " + str(s.recv_from_tile_load_request_pkt.msg) - recv_from_tile_load_response_pkt_str = "recv_from_tile_load_response_pkt: " + str(s.recv_from_tile_load_response_pkt.msg) - recv_from_tile_store_request_pkt_str = "recv_from_tile_store_request_pkt: " + str(s.recv_from_tile_store_request_pkt.msg) - crossbar_str = "crossbar: {" + s.crossbar.line_trace() + "}" - send_to_mem_load_request_str = "send_to_mem_load_request: " + str(s.send_to_mem_load_request.msg) - send_to_mem_store_request_str = "send_to_mem_store_request: " + str(s.send_to_mem_store_request.msg) - recv_from_noc_str ="recv_from_noc_pkt.val: " + str(s.recv_from_inter_cgra_noc.val) + " recv_from_noc_pkt.msg: " + str(s.recv_from_inter_cgra_noc.msg) + " recv_from_noc_pkt.rdy: " + str(s.recv_from_inter_cgra_noc.rdy) - send_to_noc_str = "send_to_noc_pkt: " + str(s.send_to_inter_cgra_noc.msg) + "; rdy: " + str(s.send_to_inter_cgra_noc.rdy) + "; val: " + str(s.send_to_inter_cgra_noc.val) - return f'{recv_from_cpu_pkt_str} || {recv_from_cpu_pkt_queue_str} || {crossbar_recv_str} || {send_to_ctrl_ring_pkt_str} || {recv_from_tile_load_request_pkt_str} || {recv_from_tile_load_response_pkt_str} || {recv_from_tile_store_request_pkt_str} || {crossbar_str} || {send_to_mem_load_request_str} || {send_to_mem_store_request_str} || {recv_from_noc_str} || {send_to_noc_str}\n' +""" +========================================================================== +ControllerRTL.py +========================================================================== +Controller for each CGRA. Mutiple controllers are interconnected in a +multi-cgra system. + +Author : Cheng Tan + Date : Dec 2, 2024 +""" + +from ..lib.basic.val_rdy.ifcs import RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import SendIfcRTL +from ..lib.basic.val_rdy.queues import NormalQueueRTL +from ..lib.messages import * +from ..lib.opt_type import * +from ..lib.util.common import * +from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL +from ..noc.PyOCN.pymtl3_net.xbar.XbarRTL import XbarRTL + +from .GlobalReduceUnitRTL import GlobalReduceUnitRTL +from ..lib.util.data_struct_attr import * + +class ControllerRTL(Component): + + def construct(s, + InterCgraPktType, + multi_cgra_rows, + multi_cgra_columns, + num_tiles, + controller2addr_map, + idTo2d_map): + + # Derives types from InterCgraPktType. + CgraPayloadType = InterCgraPktType.get_field_type(kAttrPayload) + DataType = CgraPayloadType.get_field_type(kAttrData) + DataAddrType = CgraPayloadType.get_field_type(kAttrDataAddr) + + # Derives CgraIdType from grid dimensions. + CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) + + # Reconstructs IntraCgraPktType. + IntraCgraPktType = mk_intra_cgra_pkt(multi_cgra_columns, + multi_cgra_rows, + num_tiles, + CgraPayloadType) + + assert(multi_cgra_columns >= multi_cgra_rows) + + # Used for calculating the x/y position. + XType = mk_bits(max(clog2(multi_cgra_columns), 1)) + YType = mk_bits(max(clog2(multi_cgra_rows), 1)) + TileIdType = mk_bits(clog2(num_tiles + 1)) + ControllerXbarPktType = mk_controller_noc_xbar_pkt(InterCgraPktType) + + # Interface + s.cgra_id = InPort(CgraIdType) + + # Request from/to other CGRA via NoC. + s.recv_from_inter_cgra_noc = RecvIfcRTL(InterCgraPktType) + s.send_to_inter_cgra_noc = SendIfcRTL(InterCgraPktType) + + s.recv_from_cpu_pkt = RecvIfcRTL(IntraCgraPktType) + s.send_to_ctrl_ring_pkt = SendIfcRTL(IntraCgraPktType) + + s.recv_from_ctrl_ring_pkt = RecvIfcRTL(IntraCgraPktType) + s.send_to_cpu_pkt = SendIfcRTL(IntraCgraPktType) + + # Request from/to tiles. + s.recv_from_tile_load_request_pkt = RecvIfcRTL(InterCgraPktType) + s.recv_from_tile_load_response_pkt = RecvIfcRTL(InterCgraPktType) + s.recv_from_tile_store_request_pkt = RecvIfcRTL(InterCgraPktType) + + s.send_to_mem_load_request = SendIfcRTL(InterCgraPktType) + s.send_to_tile_load_response = SendIfcRTL(InterCgraPktType) + s.send_to_mem_store_request = SendIfcRTL(InterCgraPktType) + + # Component + s.recv_from_tile_load_request_pkt_queue = ChannelRTL(InterCgraPktType, latency = 1) + s.recv_from_tile_load_response_pkt_queue = ChannelRTL(InterCgraPktType, latency = 1) + s.recv_from_tile_store_request_pkt_queue = ChannelRTL(InterCgraPktType, latency = 1) + + s.send_to_mem_load_request_queue = ChannelRTL(InterCgraPktType, latency = 1) + s.send_to_tile_load_response_queue = ChannelRTL(InterCgraPktType, latency = 1) + s.send_to_mem_store_request_queue = ChannelRTL(InterCgraPktType, latency = 1) + + # Crossbar with 4 inports (load and store requests towards remote + # memory, load response from local memory, ctrl&data packet from cpu, + # and command signal from inter-tile, i.e., intra-cgra, ring) and 1 + # outport (only allow one request be sent out per cycle). + s.crossbar = XbarRTL(ControllerXbarPktType, CONTROLLER_CROSSBAR_INPORTS, 1) + s.recv_from_cpu_pkt_queue = NormalQueueRTL(IntraCgraPktType) + s.send_to_cpu_pkt_queue = NormalQueueRTL(IntraCgraPktType) + + # Global reduce unit. + # TODO: We need multiple GlobalReduceUnitRTL to enable more than 1 reduction + # across the fabric: https://github.com/tancheng/VectorCGRA/issues/184. + s.global_reduce_unit = GlobalReduceUnitRTL(InterCgraPktType) + + # LUT for global data address mapping. + addr_offset_nbits = 0 + s.addr2controller_lut = [Wire(CgraIdType) for _ in range(len(controller2addr_map))] + # Assumes the address range is contiguous within one CGRA's SPMs. + addr2controller_vector = [-1 for _ in range(len(controller2addr_map))] + for src_cgra_id, address_range in controller2addr_map.items(): + begin_addr, end_addr = address_range[0], address_range[1] + address_length = end_addr - begin_addr + 1 + assert (address_length & (address_length - 1)) == 0, f"{address_length} is not a power of 2." + addr_offset_nbits = clog2(address_length) + addr_base = begin_addr >> addr_offset_nbits + assert addr2controller_vector[addr_base] == -1, f"address range [{begin_addr}, {end_addr}] overlaps with others." + addr2controller_vector[addr_base] = CgraIdType(src_cgra_id) + + s.addr2controller_lut[addr_base] //= CgraIdType(src_cgra_id) + + # Constructs the idTo2d lut. + s.idTo2d_x_lut= [Wire(XType) for _ in range(multi_cgra_columns * multi_cgra_rows)] + s.idTo2d_y_lut= [Wire(YType) for _ in range(multi_cgra_columns * multi_cgra_rows)] + for cgra_id in idTo2d_map: + xy = idTo2d_map[cgra_id] + s.idTo2d_x_lut[cgra_id] //= XType(xy[0]) + s.idTo2d_y_lut[cgra_id] //= YType(xy[1]) + + s.addr_dst_id = Wire(CgraIdType) + + # Connections. + # Requests towards others, 1 cycle delay to improve timing. + s.recv_from_tile_load_request_pkt_queue.recv //= s.recv_from_tile_load_request_pkt + s.recv_from_tile_load_response_pkt_queue.recv //= s.recv_from_tile_load_response_pkt + s.recv_from_tile_store_request_pkt_queue.recv //= s.recv_from_tile_store_request_pkt + + # Requests towards local from others, 1 cycle delay to improve timing. + s.send_to_mem_load_request_queue.send //= s.send_to_mem_load_request + s.send_to_tile_load_response_queue.send //= s.send_to_tile_load_response + s.send_to_mem_store_request_queue.send //= s.send_to_mem_store_request + + # For control signals delivery from CPU to tiles. + s.recv_from_cpu_pkt //= s.recv_from_cpu_pkt_queue.recv + s.send_to_cpu_pkt //= s.send_to_cpu_pkt_queue.send + + @update + def update_received_msg(): + kLoadRequestInportIdx = 0 + kLoadResponseInportIdx = 1 + kStoreRequestInportIdx = 2 + kFromCpuCtrlAndDataIdx = 3 + kFromInterTileRingIdx = 4 + kFromReduceUnitIdx = 5 + + s.send_to_cpu_pkt_queue.recv.val @= 0 + s.send_to_cpu_pkt_queue.recv.msg @= IntraCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.recv_from_ctrl_ring_pkt.rdy @= 0 + + for i in range(CONTROLLER_CROSSBAR_INPORTS): + s.crossbar.recv[i].val @= 0 + s.crossbar.recv[i].msg @= ControllerXbarPktType(0, 0) + + # For the command signal from inter-tile/intra-cgra control ring. + s.crossbar.recv[kFromInterTileRingIdx].val @= s.recv_from_ctrl_ring_pkt.val + s.recv_from_ctrl_ring_pkt.rdy @= s.crossbar.recv[kFromInterTileRingIdx].rdy + s.crossbar.recv[kFromInterTileRingIdx].msg @= \ + ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) + InterCgraPktType(s.cgra_id, + s.recv_from_ctrl_ring_pkt.msg.dst_cgra_id, + s.idTo2d_x_lut[s.cgra_id], # src_x + s.idTo2d_y_lut[s.cgra_id], # src_y + s.recv_from_ctrl_ring_pkt.msg.dst_cgra_x, # dst_x + s.recv_from_ctrl_ring_pkt.msg.dst_cgra_y, # dst_y + s.recv_from_ctrl_ring_pkt.msg.src, # src_tile_id + s.recv_from_ctrl_ring_pkt.msg.dst, # dst_tile_id + 0, # remote_src_port, only used for inter-cgra remote load request/response. + 0, # opaque + 0, # vc_id. No need to specify vc_id for self produce-consume pkt thanks to the additional VC buffer. + s.recv_from_ctrl_ring_pkt.msg.payload)) + + # For the load request from local tiles. + s.crossbar.recv[kLoadRequestInportIdx].val @= s.recv_from_tile_load_request_pkt_queue.send.val + s.recv_from_tile_load_request_pkt_queue.send.rdy @= s.crossbar.recv[kLoadRequestInportIdx].rdy + s.crossbar.recv[kLoadRequestInportIdx].msg @= \ + ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) + s.recv_from_tile_load_request_pkt_queue.send.msg) + + # For the store request from local tiles. + s.crossbar.recv[kStoreRequestInportIdx].val @= s.recv_from_tile_store_request_pkt_queue.send.val + s.recv_from_tile_store_request_pkt_queue.send.rdy @= s.crossbar.recv[kStoreRequestInportIdx].rdy + s.crossbar.recv[kStoreRequestInportIdx].msg @= \ + ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) + s.recv_from_tile_store_request_pkt_queue.send.msg) + + # For the load response (i.e., the data towards other) from local memory. + s.crossbar.recv[kLoadResponseInportIdx].val @= \ + s.recv_from_tile_load_response_pkt_queue.send.val + s.recv_from_tile_load_response_pkt_queue.send.rdy @= s.crossbar.recv[kLoadResponseInportIdx].rdy + s.crossbar.recv[kLoadResponseInportIdx].msg @= \ + ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) + s.recv_from_tile_load_response_pkt_queue.send.msg) + + # For the load response (i.e., the data towards other) from local memory. + s.crossbar.recv[kFromReduceUnitIdx].val @= \ + s.global_reduce_unit.send.val + s.global_reduce_unit.send.rdy @= s.crossbar.recv[kFromReduceUnitIdx].rdy + s.crossbar.recv[kFromReduceUnitIdx].msg @= s.global_reduce_unit.send.msg + + # For the ctrl and data preloading. + s.crossbar.recv[kFromCpuCtrlAndDataIdx].val @= \ + s.recv_from_cpu_pkt_queue.send.val + s.recv_from_cpu_pkt_queue.send.rdy @= s.crossbar.recv[kFromCpuCtrlAndDataIdx].rdy + s.crossbar.recv[kFromCpuCtrlAndDataIdx].msg @= \ + ControllerXbarPktType(0, # dst (always 0 to align with the single outport of the crossbar, i.e., NoC) + InterCgraPktType(s.cgra_id, # src + s.recv_from_cpu_pkt_queue.send.msg.dst_cgra_id, # dst + 0, # src_x + 0, # src_y + s.idTo2d_x_lut[s.recv_from_cpu_pkt_queue.send.msg.dst_cgra_id], # dst_x + s.idTo2d_y_lut[s.recv_from_cpu_pkt_queue.send.msg.dst_cgra_id], # dst_y + num_tiles, # src_tile_id, num_tiles is used to indicate the request is from CPU, so the LOAD response can come back. + s.recv_from_cpu_pkt_queue.send.msg.dst, # dst_tile_id + 0, # remote_src_port, only used for inter-cgra remote load request/response. + 0, # opaque + 0, # vc_id + s.recv_from_cpu_pkt_queue.send.msg.payload)) + + # TODO: For the other cmd types. + + + # @update + # def update_received_msg_from_noc(): + + # Initiates the signals. + s.send_to_mem_load_request_queue.recv.val @= 0 + s.send_to_mem_store_request_queue.recv.val @= 0 + s.send_to_tile_load_response_queue.recv.val @= 0 + + s.send_to_mem_load_request_queue.recv.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.send_to_mem_store_request_queue.recv.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.send_to_tile_load_response_queue.recv.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + + s.recv_from_inter_cgra_noc.rdy @= 0 + s.send_to_ctrl_ring_pkt.val @= 0 + s.send_to_ctrl_ring_pkt.msg @= IntraCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.global_reduce_unit.recv_count.val @= 0 + s.global_reduce_unit.recv_count.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.global_reduce_unit.recv_data.val @= 0 + s.global_reduce_unit.recv_data.msg @= InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + + # For the load request from NoC. + received_pkt = s.recv_from_inter_cgra_noc.msg + if s.recv_from_inter_cgra_noc.val: + if s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LOAD_REQUEST: + s.send_to_mem_load_request_queue.recv.val @= 1 + + if s.send_to_mem_load_request_queue.recv.rdy: + s.recv_from_inter_cgra_noc.rdy @= 1 + s.send_to_mem_load_request_queue.recv.msg @= received_pkt + + elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_STORE_REQUEST: + s.send_to_mem_store_request_queue.recv.msg @= received_pkt + s.send_to_mem_store_request_queue.recv.val @= 1 + + if s.send_to_mem_store_request_queue.recv.rdy: + s.recv_from_inter_cgra_noc.rdy @= 1 + + elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LOAD_RESPONSE: + # FIXME: This condition needs to check whether this controller is the + # one connecting to CPU, and with the help from additional field indicating + # whether the packet is originally from CPU. + # https://github.com/tancheng/VectorCGRA/issues/116. + if s.recv_from_inter_cgra_noc.msg.dst_tile_id == num_tiles: + s.recv_from_inter_cgra_noc.rdy @= s.send_to_cpu_pkt_queue.recv.rdy + s.send_to_cpu_pkt_queue.recv.val @= 1 + s.send_to_cpu_pkt_queue.recv.msg @= \ + IntraCgraPktType(s.recv_from_inter_cgra_noc.msg.src_tile_id, # src + s.recv_from_inter_cgra_noc.msg.dst_tile_id, # dst + s.recv_from_inter_cgra_noc.msg.src, # src_cgra_id + s.recv_from_inter_cgra_noc.msg.dst, # src_cgra_id + s.recv_from_inter_cgra_noc.msg.src_x, # src_cgra_x + s.recv_from_inter_cgra_noc.msg.src_y, # src_cgra_y + s.recv_from_inter_cgra_noc.msg.dst_x, # dst_cgra_x + s.recv_from_inter_cgra_noc.msg.dst_y, # dst_cgra_y + 0, # opaque + 0, # vc_id + s.recv_from_inter_cgra_noc.msg.payload) + + else: + s.recv_from_inter_cgra_noc.rdy @= s.send_to_tile_load_response_queue.recv.rdy + s.send_to_tile_load_response_queue.recv.msg @= received_pkt + s.send_to_tile_load_response_queue.recv.val @= 1 + + elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_COMPLETE: + s.recv_from_inter_cgra_noc.rdy @= s.send_to_cpu_pkt_queue.recv.rdy + s.send_to_cpu_pkt_queue.recv.val @= 1 + s.send_to_cpu_pkt_queue.recv.msg @= \ + IntraCgraPktType(s.recv_from_inter_cgra_noc.msg.src_tile_id, # src + s.recv_from_inter_cgra_noc.msg.dst_tile_id, # dst + s.recv_from_inter_cgra_noc.msg.src, # src_cgra_id + s.recv_from_inter_cgra_noc.msg.dst, # src_cgra_id + s.recv_from_inter_cgra_noc.msg.src_x, # src_cgra_x + s.recv_from_inter_cgra_noc.msg.src_y, # src_cgra_y + s.recv_from_inter_cgra_noc.msg.dst_x, # dst_cgra_x + s.recv_from_inter_cgra_noc.msg.dst_y, # dst_cgra_y + 0, # opaque + 0, # vc_id + s.recv_from_inter_cgra_noc.msg.payload) + + # Consume and discard the leaf counter complete signal (loop termination + # notification from LoopCounter FU) to avoid blocking the NoC. + elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LEAF_COUNTER_COMPLETE: + s.recv_from_inter_cgra_noc.rdy @= 1 + + elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD: + s.recv_from_inter_cgra_noc.rdy @= s.global_reduce_unit.recv_data.rdy + s.global_reduce_unit.recv_data.val @= 1 + s.global_reduce_unit.recv_data.msg @= s.recv_from_inter_cgra_noc.msg + + elif s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_COUNT: + s.recv_from_inter_cgra_noc.rdy @= s.global_reduce_unit.recv_count.rdy + s.global_reduce_unit.recv_count.val @= 1 + s.global_reduce_unit.recv_count.msg @= s.recv_from_inter_cgra_noc.msg + + elif (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_CTRL_LOWER_BOUND) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONST) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_START_ADDR) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_STRIDE) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_END_ADDR) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_PAUSE) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_PRESERVE) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_RESUME) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_RECORD_PHI_ADDR) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_TERMINATE) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_LAUNCH) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_LOOP_LOWER) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_LOOP_UPPER) | \ + (s.recv_from_inter_cgra_noc.msg.payload.cmd == CMD_CONFIG_LOOP_STEP) : + s.recv_from_inter_cgra_noc.rdy @= s.send_to_ctrl_ring_pkt.rdy + s.send_to_ctrl_ring_pkt.val @= s.recv_from_inter_cgra_noc.val + s.send_to_ctrl_ring_pkt.msg @= \ + IntraCgraPktType(s.recv_from_inter_cgra_noc.msg.src_tile_id, # src + s.recv_from_inter_cgra_noc.msg.dst_tile_id, # dst + s.recv_from_inter_cgra_noc.msg.src, # src_cgra_id + s.recv_from_inter_cgra_noc.msg.dst, # src_cgra_id + s.recv_from_inter_cgra_noc.msg.src_x, # src_cgra_x + s.recv_from_inter_cgra_noc.msg.src_y, # src_cgra_y + s.recv_from_inter_cgra_noc.msg.dst_x, # dst_cgra_x + s.recv_from_inter_cgra_noc.msg.dst_y, # dst_cgra_y + 0, # opaque + 0, # vc_id + s.recv_from_inter_cgra_noc.msg.payload) + + # else: + # # TODO: Handle other cmd types. + # assert(False) + + @update + def update_sending_to_noc_msg(): + s.send_to_inter_cgra_noc.val @= s.crossbar.send[0].val + s.crossbar.send[0].rdy @= s.send_to_inter_cgra_noc.rdy + s.send_to_inter_cgra_noc.msg @= s.crossbar.send[0].msg.inter_cgra_pkt + # addr_dst_id = 0 + if (s.crossbar.send[0].msg.inter_cgra_pkt.payload.cmd == CMD_LOAD_REQUEST) | \ + (s.crossbar.send[0].msg.inter_cgra_pkt.payload.cmd == CMD_STORE_REQUEST): + s.send_to_inter_cgra_noc.msg.dst @= s.addr_dst_id + s.send_to_inter_cgra_noc.msg.dst_x @= s.idTo2d_x_lut[s.addr_dst_id] + s.send_to_inter_cgra_noc.msg.dst_y @= s.idTo2d_y_lut[s.addr_dst_id] + + @update + def capture_addr_dst_id(): + s.addr_dst_id @= s.addr2controller_lut[trunc(s.crossbar.send[0].msg.inter_cgra_pkt.payload.data_addr >> addr_offset_nbits, CgraIdType)] + + def line_trace(s): + recv_from_cpu_pkt_str = "recv_from_cpu_pkt: " + str(s.recv_from_cpu_pkt.msg) + recv_from_cpu_pkt_queue_str = "recv_from_cpu_pkt_queue.send: " + str(s.recv_from_cpu_pkt_queue.send.msg) + crossbar_recv_str = "crossbar_recv.val:" + str(s.crossbar.recv[3].val) + " crossbar_recv.rdy:" + str(s.crossbar.recv[3].rdy) + " crossbar_recv.msg: " + str(s.crossbar.recv[3].msg) + send_to_ctrl_ring_pkt_str = "send_to_ctrl_ring_pkt.val:" + str(s.send_to_ctrl_ring_pkt.val) + " send_to_ctrl_ring_pkt: " + str(s.send_to_ctrl_ring_pkt.msg) + " send_to_ctrl_ring_pkt.rdy:" + str(s.send_to_ctrl_ring_pkt.rdy) + recv_from_tile_load_request_pkt_str = "recv_from_tile_load_request_pkt: " + str(s.recv_from_tile_load_request_pkt.msg) + recv_from_tile_load_response_pkt_str = "recv_from_tile_load_response_pkt: " + str(s.recv_from_tile_load_response_pkt.msg) + recv_from_tile_store_request_pkt_str = "recv_from_tile_store_request_pkt: " + str(s.recv_from_tile_store_request_pkt.msg) + crossbar_str = "crossbar: {" + s.crossbar.line_trace() + "}" + send_to_mem_load_request_str = "send_to_mem_load_request: " + str(s.send_to_mem_load_request.msg) + send_to_mem_store_request_str = "send_to_mem_store_request: " + str(s.send_to_mem_store_request.msg) + recv_from_noc_str ="recv_from_noc_pkt.val: " + str(s.recv_from_inter_cgra_noc.val) + " recv_from_noc_pkt.msg: " + str(s.recv_from_inter_cgra_noc.msg) + " recv_from_noc_pkt.rdy: " + str(s.recv_from_inter_cgra_noc.rdy) + send_to_noc_str = "send_to_noc_pkt: " + str(s.send_to_inter_cgra_noc.msg) + "; rdy: " + str(s.send_to_inter_cgra_noc.rdy) + "; val: " + str(s.send_to_inter_cgra_noc.val) + return f'{recv_from_cpu_pkt_str} || {recv_from_cpu_pkt_queue_str} || {crossbar_recv_str} || {send_to_ctrl_ring_pkt_str} || {recv_from_tile_load_request_pkt_str} || {recv_from_tile_load_response_pkt_str} || {recv_from_tile_store_request_pkt_str} || {crossbar_str} || {send_to_mem_load_request_str} || {send_to_mem_store_request_str} || {recv_from_noc_str} || {send_to_noc_str}\n' diff --git a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py index d5556396..a407c56b 100644 --- a/mem/ctrl/RingMultiCtrlMemDynamicRTL.py +++ b/mem/ctrl/RingMultiCtrlMemDynamicRTL.py @@ -1,69 +1,69 @@ -""" -========================================================================== -RingMultiCtrlMemDynamicRTL.py -========================================================================== -Ring connecting multiple control memories. - -Author : Cheng Tan - Date : Dec 22, 2024 -""" - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from .CtrlMemDynamicRTL import CtrlMemDynamicRTL -from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ...lib.opt_type import * -from ...noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos -from ...noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL -from ...lib.util.data_struct_attr import * - -class RingMultiCtrlMemDynamicRTL(Component): - def construct(s, CtrlPktType, CgraPayloadType, - width, height, ctrl_mem_size, num_fu_inports, - num_fu_outports, num_tile_inports, num_tile_outports, - ctrl_count_per_iter = 4, total_ctrl_steps = 4): - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - # Constant - num_tiles = width * height - s.num_tiles = width * height - CtrlRingPos = mk_ring_pos(num_tiles + 1) - - # Interface - s.send_ctrl = [SendIfcRTL(CtrlSignalType) for _ in range(s.num_tiles)] - s.recv_pkt_from_controller = RecvIfcRTL(CtrlPktType) - s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) - - # Components - s.ctrl_memories = [ - CtrlMemDynamicRTL(CtrlPktType, - ctrl_mem_size, num_fu_inports, - num_fu_outports, num_tile_inports, - num_tile_outports, 1, num_tiles, ctrl_count_per_iter, - total_ctrl_steps) for terminal_id in range(s.num_tiles)] - s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, num_tiles + 1, 1) - - # Connections - for i in range(s.num_tiles): - s.ctrl_memories[i].cgra_id //= 0 - s.ctrl_memories[i].tile_id //= i - s.ctrl_memories[i].recv_from_element.val //= 1 - s.ctrl_memories[i].recv_from_element.msg //= CgraPayloadType() - - for i in range(s.num_tiles): - s.ctrl_ring.send[i] //= s.ctrl_memories[i].recv_pkt_from_controller - s.ctrl_ring.send[s.num_tiles] //= s.send_to_controller_pkt - - for i in range(s.num_tiles): - s.ctrl_ring.recv[i] //= s.ctrl_memories[i].send_pkt_to_controller - s.ctrl_ring.recv[s.num_tiles] //= s.recv_pkt_from_controller - - for i in range(s.num_tiles): - s.ctrl_memories[i].send_ctrl //= s.send_ctrl[i] - - def line_trace(s): - res = "||\n".join([(("[ctrl_memory["+str(i)+"]: ") + x.line_trace()) - for (i,x) in enumerate(s.ctrl_memories)]) - res += " ## ctrl_ring: " + s.ctrl_ring.line_trace() - return res - +""" +========================================================================== +RingMultiCtrlMemDynamicRTL.py +========================================================================== +Ring connecting multiple control memories. + +Author : Cheng Tan + Date : Dec 22, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from .CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ...lib.opt_type import * +from ...noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ...noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL +from ...lib.util.data_struct_attr import * + +class RingMultiCtrlMemDynamicRTL(Component): + def construct(s, CtrlPktType, CgraPayloadType, + width, height, ctrl_mem_size, num_fu_inports, + num_fu_outports, num_tile_inports, num_tile_outports, + ctrl_count_per_iter = 4, total_ctrl_steps = 4): + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + # Constant + num_tiles = width * height + s.num_tiles = width * height + CtrlRingPos = mk_ring_pos(num_tiles + 1) + + # Interface + s.send_ctrl = [SendIfcRTL(CtrlSignalType) for _ in range(s.num_tiles)] + s.recv_pkt_from_controller = RecvIfcRTL(CtrlPktType) + s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) + + # Components + s.ctrl_memories = [ + CtrlMemDynamicRTL(CtrlPktType, + ctrl_mem_size, num_fu_inports, + num_fu_outports, num_tile_inports, + num_tile_outports, 1, num_tiles, ctrl_count_per_iter, + total_ctrl_steps) for terminal_id in range(s.num_tiles)] + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, num_tiles + 1, 1) + + # Connections + for i in range(s.num_tiles): + s.ctrl_memories[i].cgra_id //= 0 + s.ctrl_memories[i].tile_id //= i + s.ctrl_memories[i].recv_from_element.val //= 1 + s.ctrl_memories[i].recv_from_element.msg //= CgraPayloadType() + + for i in range(s.num_tiles): + s.ctrl_ring.send[i] //= s.ctrl_memories[i].recv_pkt_from_controller + s.ctrl_ring.send[s.num_tiles] //= s.send_to_controller_pkt + + for i in range(s.num_tiles): + s.ctrl_ring.recv[i] //= s.ctrl_memories[i].send_pkt_to_controller + s.ctrl_ring.recv[s.num_tiles] //= s.recv_pkt_from_controller + + for i in range(s.num_tiles): + s.ctrl_memories[i].send_ctrl //= s.send_ctrl[i] + + def line_trace(s): + res = "||\n".join([(("[ctrl_memory["+str(i)+"]: ") + x.line_trace()) + for (i,x) in enumerate(s.ctrl_memories)]) + res += " ## ctrl_ring: " + s.ctrl_ring.line_trace() + return res + diff --git a/mem/data/DataMemControllerRTL.py b/mem/data/DataMemControllerRTL.py index 2e80af35..356a0ea2 100644 --- a/mem/data/DataMemControllerRTL.py +++ b/mem/data/DataMemControllerRTL.py @@ -1,451 +1,451 @@ -""" -========================================================================== -DataMemControllerRTL.py -========================================================================== -Data memory for CGRA. It has addtional port to connect to controller, -which can be used for multi-CGRA fabric. - - Send/recv data request/response to/from other CGRA controllers. - - Based on whether the target data address is within the local space. - - Coherence is not targeted for now; protyping in static memory space. - - Send/recv cmd request/response to/from other CGRA controllers. - - E.g., dynamic rescheduling. - - The cmd can be originally derived from a runtime scheduler. - -In addition, it contains a crossbar to handle multi-bank conflicts. - - Crossbar contains an arbitor, i.e., stall may happen on certain port. - - Therefore, bypass queue is leveraged on the input port. - - [ ] https://github.com/tancheng/VectorCGRA/issues/26: - Blocking vs. non-blocking should be configured/propagated here. - - Non-blocking: - - Immediate return data though it is not ready: - - Bank conflicted lower priority access. - - Remote accessed data. - - Blocking and non-blocking might be configurabled in a dynamic way. - -Author : Cheng Tan - Date : Aug 28, 2025 -""" - -from .DataMemWrapperRTL import DataMemWrapperRTL -from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ...lib.messages import * -from ...noc.PyOCN.pymtl3_net.xbar.XbarBypassQueueRTL import XbarBypassQueueRTL -from ...lib.util.data_struct_attr import * - -class DataMemControllerRTL(Component): - def construct(s, - NocPktType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks_per_cgra = 4, - num_rd_tiles = 4, - num_wr_tiles = 4, - multi_cgra_rows = 2, - multi_cgra_columns = 2, - num_tiles = 16, - mem_access_is_combinational = True, - idTo2d_map = {0: [0, 0]}): - - CgraPayloadType = NocPktType.get_field_type(kAttrPayload) - DataType = CgraPayloadType.get_field_type(kAttrData) - # Constants. - global_addr_nbits = clog2(data_mem_size_global) - per_bank_addr_nbits = clog2(data_mem_size_per_bank) - assert(2 ** global_addr_nbits == data_mem_size_global) - assert(2 ** per_bank_addr_nbits == data_mem_size_per_bank) - XType = mk_bits(max(clog2(multi_cgra_columns), 1)) - YType = mk_bits(max(clog2(multi_cgra_rows), 1)) - AddrType = mk_bits(global_addr_nbits) - PerBankAddrType = mk_bits(per_bank_addr_nbits) - s.num_banks_per_cgra = num_banks_per_cgra - LocalBankIndexType = mk_bits(clog2(num_banks_per_cgra)) - s.num_rd_tiles = num_rd_tiles - s.num_wr_tiles = num_wr_tiles - RdTileIdType = mk_bits(clog2(num_rd_tiles)) - # The additional port is for the request from inter-cgra NoC via controller. - num_xbar_in_rd_ports = num_rd_tiles + 1 - num_xbar_in_wr_ports = num_wr_tiles + 1 - num_xbar_out_rd_ports = num_banks_per_cgra + 1 - num_xbar_out_wr_ports = num_banks_per_cgra + 1 - num_cgras = multi_cgra_rows * multi_cgra_columns - XbarOutRdType = mk_bits(clog2(num_xbar_out_rd_ports)) - XbarOutWrType = mk_bits(clog2(num_xbar_out_wr_ports)) - MemReadPktType = \ - mk_mem_access_pkt(DataType, - num_xbar_in_rd_ports, - num_xbar_out_rd_ports, - data_mem_size_global, - num_cgras, - num_tiles, - num_rd_tiles) - MemWritePktType = \ - mk_mem_access_pkt(DataType, - num_xbar_in_wr_ports, - num_xbar_out_wr_ports, - data_mem_size_global, - num_cgras, - num_tiles, - num_rd_tiles) - - # Reverses the source and destination for response packet. - MemResponsePktType = \ - mk_mem_access_pkt(DataType, - num_xbar_out_rd_ports, - num_xbar_in_rd_ports, - data_mem_size_global, - num_cgras, - num_tiles, - num_rd_tiles) - - # Interfaces. - # [num_rd_tiles] indicates the request from the NoC. ---> Add separate recv port for NoC. - s.recv_from_noc_load_request = RecvIfcRTL(NocPktType) - s.recv_from_noc_store_request = RecvIfcRTL(NocPktType) - - # [0, ..., num_rd_tiles - 1] indicate the requests from/to the tiles, - s.recv_raddr = [RecvIfcRTL(AddrType) for _ in range(num_rd_tiles)] - s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(num_wr_tiles)] - s.recv_wdata = [RecvIfcRTL(DataType) for _ in range(num_wr_tiles)] - - - s.send_rdata = [SendIfcRTL(DataType) for _ in range(num_rd_tiles)] - - s.send_to_noc_load_response_pkt = SendIfcRTL(NocPktType) - - # Response that is from a remote SRAM. - s.recv_from_noc_load_response_pkt = RecvIfcRTL(NocPktType) - - # Requests that targets remote SRAMs. - s.send_to_noc_load_request_pkt = SendIfcRTL(NocPktType) - s.send_to_noc_store_pkt = SendIfcRTL(NocPktType) - - # Components. - s.memory_wrapper = [DataMemWrapperRTL(DataType, MemReadPktType, MemWritePktType, MemResponsePktType, - data_mem_size_global, data_mem_size_per_bank, mem_access_is_combinational) - for _ in range(num_banks_per_cgra)] - # The additional 1 on inports indicates the read/write from NoC. - # The additional 1 on outports indicates the request out of bound of - # local memory space that would be forwarded to NoC. - s.read_crossbar = XbarBypassQueueRTL(MemReadPktType, num_xbar_in_rd_ports, - num_xbar_out_rd_ports) - s.write_crossbar = XbarBypassQueueRTL(MemWritePktType, num_xbar_in_wr_ports, - num_xbar_out_wr_ports) - s.response_crossbar = XbarBypassQueueRTL(MemResponsePktType, num_xbar_out_rd_ports, - num_xbar_in_rd_ports) - - s.rd_pkt = [Wire(MemReadPktType) for _ in range(num_xbar_in_rd_ports)] - s.wr_pkt = [Wire(MemWritePktType) for _ in range(num_xbar_in_wr_ports)] - - s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) - - s.address_lower = InPort(AddrType) - s.address_upper = InPort(AddrType) - - # Constructs the idTo2d lut. - s.idTo2d_x_lut= [Wire(XType) for _ in range(multi_cgra_columns * multi_cgra_rows)] - s.idTo2d_y_lut= [Wire(YType) for _ in range(multi_cgra_columns * multi_cgra_rows)] - for cgra_id in idTo2d_map: - xy = idTo2d_map[cgra_id] - s.idTo2d_x_lut[cgra_id] //= XType(xy[0]) - s.idTo2d_y_lut[cgra_id] //= YType(xy[1]) - - # Connections. - for i in range(num_banks_per_cgra): - s.read_crossbar.send[i] //= s.memory_wrapper[i].recv_rd - s.write_crossbar.send[i] //= s.memory_wrapper[i].recv_wr - s.memory_wrapper[i].send //= s.response_crossbar.recv[i] - - @update - def assemble_xbar_pkt(): - for i in range(num_xbar_in_rd_ports): - s.rd_pkt[i] @= MemReadPktType(i, 0, 0, DataType(0, 0, 0, 0), 0, 0, i) - - for i in range(num_xbar_in_wr_ports): - s.wr_pkt[i] @= MemWritePktType(i, 0, 0, DataType(0, 0, 0, 0), 0, 0, i) - - for i in range(num_rd_tiles): - recv_raddr = s.recv_raddr[i].msg - # Calculates the target bank index for load. - if (recv_raddr >= s.address_lower) & (recv_raddr <= s.address_upper): - bank_index_load_local = trunc((recv_raddr - s.address_lower) >> per_bank_addr_nbits, XbarOutRdType) - else: - bank_index_load_local = XbarOutRdType(num_banks_per_cgra) - # FIXME: change to exact tile id. - s.rd_pkt[i] @= MemReadPktType(i, # src - bank_index_load_local, # dst - recv_raddr, # addr - DataType(0, 0, 0, 0), # data - s.cgra_id, # src_cgra - 0, # src_tile - i) # remote_src_port - - recv_raddr_from_noc = s.recv_from_noc_load_request.msg.payload.data_addr - # Calculates the target bank index. - if (recv_raddr_from_noc >= s.address_lower) & (recv_raddr_from_noc <= s.address_upper): - bank_index_load_from_noc = trunc((recv_raddr_from_noc - s.address_lower) >> per_bank_addr_nbits, XbarOutRdType) - else: - bank_index_load_from_noc = XbarOutRdType(num_banks_per_cgra) - s.rd_pkt[num_rd_tiles] @= MemReadPktType(num_rd_tiles, # src - bank_index_load_from_noc, # dst - recv_raddr_from_noc, # addr - DataType(0, 0, 0, 0), # data - s.recv_from_noc_load_request.msg.src, # src_cgra - s.recv_from_noc_load_request.msg.src_tile_id, # src_tile - s.recv_from_noc_load_request.msg.remote_src_port) # remote_src_port - - for i in range(num_wr_tiles): - recv_waddr = s.recv_waddr[i].msg - # Calculates the target bank index for store. - if (recv_waddr >= s.address_lower) & (recv_waddr <= s.address_upper): - bank_index_store_local = trunc((recv_waddr - s.address_lower) >> per_bank_addr_nbits, XbarOutWrType) - else: - bank_index_store_local = XbarOutWrType(num_banks_per_cgra) - s.wr_pkt[i] @= MemWritePktType(i, # src - bank_index_store_local, # dst - recv_waddr, # addr - s.recv_wdata[i].msg, # data - 0, # src_cgra - 0, # src_tile - i) # remote_src_port - - recv_waddr_from_noc = s.recv_from_noc_store_request.msg.payload.data_addr - recv_wdata_from_noc = s.recv_from_noc_store_request.msg.payload.data - if (recv_waddr_from_noc >= s.address_lower) & (recv_waddr_from_noc <= s.address_upper): - bank_index_store_from_noc = trunc((recv_waddr_from_noc - s.address_lower) >> per_bank_addr_nbits, XbarOutWrType) - else: - bank_index_store_from_noc = XbarOutWrType(num_banks_per_cgra) - s.wr_pkt[num_wr_tiles] @= MemWritePktType(num_wr_tiles, # src - bank_index_store_from_noc, # dst - recv_waddr_from_noc, # addr - recv_wdata_from_noc, # data - 0, # src_cgra - 0, # src_tile - num_wr_tiles) # remote_src_port - - # Connects xbar with the memory wrapper. - @update - def update_all(): - # Initializes the signals. - for i in range(num_rd_tiles): - s.recv_raddr[i].rdy @= 0 - s.recv_from_noc_load_request.rdy @= 0 - - for i in range(num_wr_tiles): - s.recv_waddr[i].rdy @= 0 - # s.recv_wdata_bypass_q[i].send.rdy @= 0 - s.recv_from_noc_store_request.rdy @= 0 - # s.recv_wdata_bypass_q[num_wr_tiles].send.rdy @= 0 - - for i in range(num_rd_tiles): - s.send_rdata[i].val @= 0 - s.send_rdata[i].msg @= DataType() - s.send_to_noc_load_response_pkt.val @= 0 - - s.send_to_noc_load_response_pkt.msg @= \ - NocPktType(0, # src - 0, # dst - 0, # src_x - 0, # src_y - 0, # dst_x - 0, # dst_y - 0, # src_tile_id - 0, # dst_tile_id - 0, # remote_src_port - 0, # opaque - 0, # vc_id - CgraPayloadType(0, 0, 0, 0, 0)) - - - for i in range(num_wr_tiles): - s.recv_wdata[i].rdy @= 0 - - s.send_to_noc_store_pkt.msg @= \ - NocPktType(0, # src - 0, # dst - 0, # src_x - 0, # src_y - 0, # dst_x - 0, # dst_y - 0, # src_tile_id - 0, # dst_tile_id - 0, # remote_src_port - 0, # opaque - 0, # vc_id - CgraPayloadType(0, 0, 0, 0, 0)) - - s.send_to_noc_store_pkt.val @= 0 - - for i in range(num_xbar_in_rd_ports): - s.read_crossbar.recv[i].val @= 0 - s.read_crossbar.recv[i].msg @= MemReadPktType(0, 0, 0, DataType(0, 0, 0, 0), 0, 0, 0) - - s.recv_from_noc_load_response_pkt.rdy @= 0 - - for i in range(num_xbar_in_wr_ports): - s.write_crossbar.recv[i].val @= 0 - s.write_crossbar.recv[i].msg @= MemWritePktType(0, 0, 0, DataType(0, 0, 0, 0), 0, 0, 0) - - s.send_to_noc_load_request_pkt.msg @= \ - NocPktType(0, # src - 0, # dst - 0, # src_x - 0, # src_y - 0, # dst_x - 0, # dst_y - 0, # src_tile_id - 0, # dst_tile_id - 0, # remote_src_port - 0, # opaque - 0, # vc_id - CgraPayloadType(0, 0, 0, 0, 0)) - - s.send_to_noc_load_request_pkt.val @= 0 - - # Connects the load request ports (from tiles and NoC) to the xbar targetting memory and NoC. - for i in range(num_rd_tiles): - s.read_crossbar.recv[i].val @= s.recv_raddr[i].val - s.read_crossbar.recv[i].msg @= s.rd_pkt[i] - s.recv_raddr[i].rdy @= s.read_crossbar.recv[i].rdy - s.read_crossbar.recv[num_rd_tiles].val @= s.recv_from_noc_load_request.val - s.read_crossbar.recv[num_rd_tiles].msg @= s.rd_pkt[num_rd_tiles] - s.recv_from_noc_load_request.rdy @= s.read_crossbar.recv[num_rd_tiles].rdy - - # Connects the store request ports (from tiles and NoC) to the xbar targetting memory and NoC. - for i in range(num_wr_tiles): - s.write_crossbar.recv[i].val @= s.recv_waddr[i].val - s.write_crossbar.recv[i].msg @= s.wr_pkt[i] - s.recv_waddr[i].rdy @= s.write_crossbar.recv[i].rdy - s.recv_wdata[i].rdy @= s.write_crossbar.recv[i].rdy - s.write_crossbar.recv[num_wr_tiles].val @= s.recv_from_noc_store_request.val - s.write_crossbar.recv[num_wr_tiles].msg @= s.wr_pkt[num_wr_tiles] - s.recv_from_noc_store_request.rdy @= s.write_crossbar.recv[num_wr_tiles].rdy - - # Connects the response ports to tiles and NoC from the xbar. - # Number of load responses is expected to be the same as the number of load requests. - for i in range(num_xbar_in_rd_ports): - if i < num_rd_tiles: - s.send_rdata[RdTileIdType(i)].msg @= s.response_crossbar.send[i].msg.data - s.send_rdata[RdTileIdType(i)].val @= s.response_crossbar.send[i].val - s.response_crossbar.send[i].rdy @= s.send_rdata[RdTileIdType(i)].rdy - else: - from_cgra_id = s.response_crossbar.send[i].msg.src_cgra - from_tile_id = s.response_crossbar.send[i].msg.src_tile - s.send_to_noc_load_response_pkt.msg @= \ - NocPktType( - s.cgra_id, # src_cgra_id - from_cgra_id, # dst_cgra_id - s.idTo2d_x_lut[s.cgra_id], # src_cgra_x - s.idTo2d_y_lut[s.cgra_id], # src_cgra_y - s.idTo2d_x_lut[from_cgra_id], # dst_cgra_x - s.idTo2d_y_lut[from_cgra_id], # dst_cgra_y - 0, # src_tile_id set as 0 as it is from memory rather than a specific tile. - from_tile_id, # dst_tile_id - s.response_crossbar.send[i].msg.remote_src_port, # remote_src_port, carries the original source port id towards the src. - 0, # opaque - 0, # vc_id - CgraPayloadType( - CMD_LOAD_RESPONSE, - s.response_crossbar.send[i].msg.data, - s.response_crossbar.send[i].msg.addr, 0, 0)) - - s.send_to_noc_load_response_pkt.val @= s.response_crossbar.send[i].val - s.response_crossbar.send[i].rdy @= s.send_to_noc_load_response_pkt.rdy - - # Handles the request (not response) towards the others via the NoC. The dst would be - # updated in the controller. - s.send_to_noc_load_request_pkt.msg @= \ - NocPktType(s.cgra_id, # src - 0, # dst - s.idTo2d_x_lut[s.cgra_id], # src_x - s.idTo2d_y_lut[s.cgra_id], # src_y - 0, # dst_x - 0, # dst_y - 0, # src_tile_id - 0, # dst_tile_id - s.read_crossbar.send[num_banks_per_cgra].msg.src, # remote_src_port - 0, # opaque - 0, # vc_id - CgraPayloadType( - CMD_LOAD_REQUEST, - 0, - s.read_crossbar.send[num_banks_per_cgra].msg.addr, 0, 0)) - - s.send_to_noc_load_request_pkt.val @= s.read_crossbar.send[num_banks_per_cgra].val - # TODO: https://github.com/tancheng/VectorCGRA/issues/26 -- Modify this part for non-blocking access. - # 'val` indicates the data is arbitrated successfully. - s.recv_from_noc_load_response_pkt.rdy @= s.response_crossbar.recv[num_banks_per_cgra].rdy - s.response_crossbar.recv[num_banks_per_cgra].val @= s.recv_from_noc_load_response_pkt.val - s.response_crossbar.recv[num_banks_per_cgra].msg @= \ - MemResponsePktType(num_banks_per_cgra, - s.recv_from_noc_load_response_pkt.msg.remote_src_port, - s.recv_from_noc_load_response_pkt.msg.payload.data_addr, - s.recv_from_noc_load_response_pkt.msg.payload.data, - s.recv_from_noc_load_response_pkt.msg.src, - s.recv_from_noc_load_response_pkt.msg.src_tile_id, - 0) - - # Allows other load request towards NoC when the previous one is not responded. There - # could be out-of-order load response, i.e., potential consistency issue. - s.read_crossbar.send[num_banks_per_cgra].rdy @= s.send_to_noc_load_request_pkt.rdy - - # Handles the write port towards the NoC. - s.send_to_noc_store_pkt.msg @= \ - NocPktType(s.cgra_id, # src - 0, # dst - s.idTo2d_x_lut[s.cgra_id], # src_x - s.idTo2d_y_lut[s.cgra_id], # src_y - 0, # dst_x - 0, # dst_y - 0, # src_tile_id - 0, # dst_tile_id - s.write_crossbar.send[num_banks_per_cgra].msg.src, # remote_src_port - 0, # opaque - 0, # vc_id - CgraPayloadType( - CMD_STORE_REQUEST, - s.write_crossbar.send[num_banks_per_cgra].msg.data, - s.write_crossbar.send[num_banks_per_cgra].msg.addr, 0, 0)) - - s.send_to_noc_store_pkt.val @= s.write_crossbar.send[num_banks_per_cgra].val - s.write_crossbar.send[num_banks_per_cgra].rdy @= s.send_to_noc_store_pkt.rdy - - def line_trace(s): - recv_raddr_str = "recv_from_tile_read_addr: {" - recv_waddr_str = "recv_from_tile_write_addr: {" - recv_wdata_str = "recv_from_tile_write_data: {" - content_str = "content: {" - send_rdata_str = "send_to_tile_read_data: {" - - send_to_noc_load_request_pkt_str = "send_to_noc_load_request_pkt: {" - send_to_noc_load_response_pkt_str = "send_to_noc_load_response_pkt: {" - recv_from_noc_load_response_pkt_str = "recv_from_noc_load_response_pkt: {" - send_to_noc_store_pkt_str = "send_to_noc_store_pkt: {" - - - for b in range(s.num_banks_per_cgra): - recv_raddr_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.recv_raddr]) + ";" - recv_waddr_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.recv_waddr]) + ";" - recv_wdata_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.recv_wdata]) + ";" - content_str += " bank[" + str(b) + "]: " + "|".join([str(data) for data in s.memory_wrapper[b].memory.regs]) + ";" - send_rdata_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.send_rdata]) + ";" - - send_to_noc_load_request_pkt_str += str(s.send_to_noc_load_request_pkt.msg) + ";" - send_to_noc_load_response_pkt_str += " " + str(s.send_to_noc_load_response_pkt.msg) + " " - recv_from_noc_load_response_pkt_str += str(s.recv_from_noc_load_response_pkt.msg) + ";" - send_to_noc_store_pkt_str += str(s.send_to_noc_store_pkt.msg) + ", val: " + str(s.send_to_noc_store_pkt.val) + ";" - - recv_raddr_str += "}" - send_rdata_str += "}" - recv_waddr_str += "}" - recv_wdata_str += "}" - send_to_noc_load_request_pkt_str += "}" - send_to_noc_load_response_pkt_str += "}" - recv_from_noc_load_response_pkt_str += "}" - send_to_noc_store_pkt_str += "}" - read_crossbar_str = "read_crossbar: " + s.read_crossbar.line_trace() - write_crossbar_str = "write_crossbar: " + s.write_crossbar.line_trace() - content_str += "}" - - return f'{recv_raddr_str} || {recv_waddr_str} || {recv_wdata_str} || {send_rdata_str} || {send_to_noc_load_request_pkt_str} || {send_to_noc_load_response_pkt_str} || {recv_from_noc_load_response_pkt_str} || {send_to_noc_store_pkt_str} || {read_crossbar_str} || {write_crossbar_str} || [{content_str}]' - +""" +========================================================================== +DataMemControllerRTL.py +========================================================================== +Data memory for CGRA. It has addtional port to connect to controller, +which can be used for multi-CGRA fabric. + - Send/recv data request/response to/from other CGRA controllers. + - Based on whether the target data address is within the local space. + - Coherence is not targeted for now; protyping in static memory space. + - Send/recv cmd request/response to/from other CGRA controllers. + - E.g., dynamic rescheduling. + - The cmd can be originally derived from a runtime scheduler. + +In addition, it contains a crossbar to handle multi-bank conflicts. + - Crossbar contains an arbitor, i.e., stall may happen on certain port. + - Therefore, bypass queue is leveraged on the input port. + - [ ] https://github.com/tancheng/VectorCGRA/issues/26: + Blocking vs. non-blocking should be configured/propagated here. + - Non-blocking: + - Immediate return data though it is not ready: + - Bank conflicted lower priority access. + - Remote accessed data. + - Blocking and non-blocking might be configurabled in a dynamic way. + +Author : Cheng Tan + Date : Aug 28, 2025 +""" + +from .DataMemWrapperRTL import DataMemWrapperRTL +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ...lib.messages import * +from ...noc.PyOCN.pymtl3_net.xbar.XbarBypassQueueRTL import XbarBypassQueueRTL +from ...lib.util.data_struct_attr import * + +class DataMemControllerRTL(Component): + def construct(s, + NocPktType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks_per_cgra = 4, + num_rd_tiles = 4, + num_wr_tiles = 4, + multi_cgra_rows = 2, + multi_cgra_columns = 2, + num_tiles = 16, + mem_access_is_combinational = True, + idTo2d_map = {0: [0, 0]}): + + CgraPayloadType = NocPktType.get_field_type(kAttrPayload) + DataType = CgraPayloadType.get_field_type(kAttrData) + # Constants. + global_addr_nbits = clog2(data_mem_size_global) + per_bank_addr_nbits = clog2(data_mem_size_per_bank) + assert(2 ** global_addr_nbits == data_mem_size_global) + assert(2 ** per_bank_addr_nbits == data_mem_size_per_bank) + XType = mk_bits(max(clog2(multi_cgra_columns), 1)) + YType = mk_bits(max(clog2(multi_cgra_rows), 1)) + AddrType = mk_bits(global_addr_nbits) + PerBankAddrType = mk_bits(per_bank_addr_nbits) + s.num_banks_per_cgra = num_banks_per_cgra + LocalBankIndexType = mk_bits(clog2(num_banks_per_cgra)) + s.num_rd_tiles = num_rd_tiles + s.num_wr_tiles = num_wr_tiles + RdTileIdType = mk_bits(clog2(num_rd_tiles)) + # The additional port is for the request from inter-cgra NoC via controller. + num_xbar_in_rd_ports = num_rd_tiles + 1 + num_xbar_in_wr_ports = num_wr_tiles + 1 + num_xbar_out_rd_ports = num_banks_per_cgra + 1 + num_xbar_out_wr_ports = num_banks_per_cgra + 1 + num_cgras = multi_cgra_rows * multi_cgra_columns + XbarOutRdType = mk_bits(clog2(num_xbar_out_rd_ports)) + XbarOutWrType = mk_bits(clog2(num_xbar_out_wr_ports)) + MemReadPktType = \ + mk_mem_access_pkt(DataType, + num_xbar_in_rd_ports, + num_xbar_out_rd_ports, + data_mem_size_global, + num_cgras, + num_tiles, + num_rd_tiles) + MemWritePktType = \ + mk_mem_access_pkt(DataType, + num_xbar_in_wr_ports, + num_xbar_out_wr_ports, + data_mem_size_global, + num_cgras, + num_tiles, + num_rd_tiles) + + # Reverses the source and destination for response packet. + MemResponsePktType = \ + mk_mem_access_pkt(DataType, + num_xbar_out_rd_ports, + num_xbar_in_rd_ports, + data_mem_size_global, + num_cgras, + num_tiles, + num_rd_tiles) + + # Interfaces. + # [num_rd_tiles] indicates the request from the NoC. ---> Add separate recv port for NoC. + s.recv_from_noc_load_request = RecvIfcRTL(NocPktType) + s.recv_from_noc_store_request = RecvIfcRTL(NocPktType) + + # [0, ..., num_rd_tiles - 1] indicate the requests from/to the tiles, + s.recv_raddr = [RecvIfcRTL(AddrType) for _ in range(num_rd_tiles)] + s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(num_wr_tiles)] + s.recv_wdata = [RecvIfcRTL(DataType) for _ in range(num_wr_tiles)] + + + s.send_rdata = [SendIfcRTL(DataType) for _ in range(num_rd_tiles)] + + s.send_to_noc_load_response_pkt = SendIfcRTL(NocPktType) + + # Response that is from a remote SRAM. + s.recv_from_noc_load_response_pkt = RecvIfcRTL(NocPktType) + + # Requests that targets remote SRAMs. + s.send_to_noc_load_request_pkt = SendIfcRTL(NocPktType) + s.send_to_noc_store_pkt = SendIfcRTL(NocPktType) + + # Components. + s.memory_wrapper = [DataMemWrapperRTL(DataType, MemReadPktType, MemWritePktType, MemResponsePktType, + data_mem_size_global, data_mem_size_per_bank, mem_access_is_combinational) + for _ in range(num_banks_per_cgra)] + # The additional 1 on inports indicates the read/write from NoC. + # The additional 1 on outports indicates the request out of bound of + # local memory space that would be forwarded to NoC. + s.read_crossbar = XbarBypassQueueRTL(MemReadPktType, num_xbar_in_rd_ports, + num_xbar_out_rd_ports) + s.write_crossbar = XbarBypassQueueRTL(MemWritePktType, num_xbar_in_wr_ports, + num_xbar_out_wr_ports) + s.response_crossbar = XbarBypassQueueRTL(MemResponsePktType, num_xbar_out_rd_ports, + num_xbar_in_rd_ports) + + s.rd_pkt = [Wire(MemReadPktType) for _ in range(num_xbar_in_rd_ports)] + s.wr_pkt = [Wire(MemWritePktType) for _ in range(num_xbar_in_wr_ports)] + + s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) + + s.address_lower = InPort(AddrType) + s.address_upper = InPort(AddrType) + + # Constructs the idTo2d lut. + s.idTo2d_x_lut= [Wire(XType) for _ in range(multi_cgra_columns * multi_cgra_rows)] + s.idTo2d_y_lut= [Wire(YType) for _ in range(multi_cgra_columns * multi_cgra_rows)] + for cgra_id in idTo2d_map: + xy = idTo2d_map[cgra_id] + s.idTo2d_x_lut[cgra_id] //= XType(xy[0]) + s.idTo2d_y_lut[cgra_id] //= YType(xy[1]) + + # Connections. + for i in range(num_banks_per_cgra): + s.read_crossbar.send[i] //= s.memory_wrapper[i].recv_rd + s.write_crossbar.send[i] //= s.memory_wrapper[i].recv_wr + s.memory_wrapper[i].send //= s.response_crossbar.recv[i] + + @update + def assemble_xbar_pkt(): + for i in range(num_xbar_in_rd_ports): + s.rd_pkt[i] @= MemReadPktType(i, 0, 0, DataType(0, 0, 0, 0), 0, 0, i) + + for i in range(num_xbar_in_wr_ports): + s.wr_pkt[i] @= MemWritePktType(i, 0, 0, DataType(0, 0, 0, 0), 0, 0, i) + + for i in range(num_rd_tiles): + recv_raddr = s.recv_raddr[i].msg + # Calculates the target bank index for load. + if (recv_raddr >= s.address_lower) & (recv_raddr <= s.address_upper): + bank_index_load_local = trunc((recv_raddr - s.address_lower) >> per_bank_addr_nbits, XbarOutRdType) + else: + bank_index_load_local = XbarOutRdType(num_banks_per_cgra) + # FIXME: change to exact tile id. + s.rd_pkt[i] @= MemReadPktType(i, # src + bank_index_load_local, # dst + recv_raddr, # addr + DataType(0, 0, 0, 0), # data + s.cgra_id, # src_cgra + 0, # src_tile + i) # remote_src_port + + recv_raddr_from_noc = s.recv_from_noc_load_request.msg.payload.data_addr + # Calculates the target bank index. + if (recv_raddr_from_noc >= s.address_lower) & (recv_raddr_from_noc <= s.address_upper): + bank_index_load_from_noc = trunc((recv_raddr_from_noc - s.address_lower) >> per_bank_addr_nbits, XbarOutRdType) + else: + bank_index_load_from_noc = XbarOutRdType(num_banks_per_cgra) + s.rd_pkt[num_rd_tiles] @= MemReadPktType(num_rd_tiles, # src + bank_index_load_from_noc, # dst + recv_raddr_from_noc, # addr + DataType(0, 0, 0, 0), # data + s.recv_from_noc_load_request.msg.src, # src_cgra + s.recv_from_noc_load_request.msg.src_tile_id, # src_tile + s.recv_from_noc_load_request.msg.remote_src_port) # remote_src_port + + for i in range(num_wr_tiles): + recv_waddr = s.recv_waddr[i].msg + # Calculates the target bank index for store. + if (recv_waddr >= s.address_lower) & (recv_waddr <= s.address_upper): + bank_index_store_local = trunc((recv_waddr - s.address_lower) >> per_bank_addr_nbits, XbarOutWrType) + else: + bank_index_store_local = XbarOutWrType(num_banks_per_cgra) + s.wr_pkt[i] @= MemWritePktType(i, # src + bank_index_store_local, # dst + recv_waddr, # addr + s.recv_wdata[i].msg, # data + 0, # src_cgra + 0, # src_tile + i) # remote_src_port + + recv_waddr_from_noc = s.recv_from_noc_store_request.msg.payload.data_addr + recv_wdata_from_noc = s.recv_from_noc_store_request.msg.payload.data + if (recv_waddr_from_noc >= s.address_lower) & (recv_waddr_from_noc <= s.address_upper): + bank_index_store_from_noc = trunc((recv_waddr_from_noc - s.address_lower) >> per_bank_addr_nbits, XbarOutWrType) + else: + bank_index_store_from_noc = XbarOutWrType(num_banks_per_cgra) + s.wr_pkt[num_wr_tiles] @= MemWritePktType(num_wr_tiles, # src + bank_index_store_from_noc, # dst + recv_waddr_from_noc, # addr + recv_wdata_from_noc, # data + 0, # src_cgra + 0, # src_tile + num_wr_tiles) # remote_src_port + + # Connects xbar with the memory wrapper. + @update + def update_all(): + # Initializes the signals. + for i in range(num_rd_tiles): + s.recv_raddr[i].rdy @= 0 + s.recv_from_noc_load_request.rdy @= 0 + + for i in range(num_wr_tiles): + s.recv_waddr[i].rdy @= 0 + # s.recv_wdata_bypass_q[i].send.rdy @= 0 + s.recv_from_noc_store_request.rdy @= 0 + # s.recv_wdata_bypass_q[num_wr_tiles].send.rdy @= 0 + + for i in range(num_rd_tiles): + s.send_rdata[i].val @= 0 + s.send_rdata[i].msg @= DataType() + s.send_to_noc_load_response_pkt.val @= 0 + + s.send_to_noc_load_response_pkt.msg @= \ + NocPktType(0, # src + 0, # dst + 0, # src_x + 0, # src_y + 0, # dst_x + 0, # dst_y + 0, # src_tile_id + 0, # dst_tile_id + 0, # remote_src_port + 0, # opaque + 0, # vc_id + CgraPayloadType(0, 0, 0, 0, 0)) + + + for i in range(num_wr_tiles): + s.recv_wdata[i].rdy @= 0 + + s.send_to_noc_store_pkt.msg @= \ + NocPktType(0, # src + 0, # dst + 0, # src_x + 0, # src_y + 0, # dst_x + 0, # dst_y + 0, # src_tile_id + 0, # dst_tile_id + 0, # remote_src_port + 0, # opaque + 0, # vc_id + CgraPayloadType(0, 0, 0, 0, 0)) + + s.send_to_noc_store_pkt.val @= 0 + + for i in range(num_xbar_in_rd_ports): + s.read_crossbar.recv[i].val @= 0 + s.read_crossbar.recv[i].msg @= MemReadPktType(0, 0, 0, DataType(0, 0, 0, 0), 0, 0, 0) + + s.recv_from_noc_load_response_pkt.rdy @= 0 + + for i in range(num_xbar_in_wr_ports): + s.write_crossbar.recv[i].val @= 0 + s.write_crossbar.recv[i].msg @= MemWritePktType(0, 0, 0, DataType(0, 0, 0, 0), 0, 0, 0) + + s.send_to_noc_load_request_pkt.msg @= \ + NocPktType(0, # src + 0, # dst + 0, # src_x + 0, # src_y + 0, # dst_x + 0, # dst_y + 0, # src_tile_id + 0, # dst_tile_id + 0, # remote_src_port + 0, # opaque + 0, # vc_id + CgraPayloadType(0, 0, 0, 0, 0)) + + s.send_to_noc_load_request_pkt.val @= 0 + + # Connects the load request ports (from tiles and NoC) to the xbar targetting memory and NoC. + for i in range(num_rd_tiles): + s.read_crossbar.recv[i].val @= s.recv_raddr[i].val + s.read_crossbar.recv[i].msg @= s.rd_pkt[i] + s.recv_raddr[i].rdy @= s.read_crossbar.recv[i].rdy + s.read_crossbar.recv[num_rd_tiles].val @= s.recv_from_noc_load_request.val + s.read_crossbar.recv[num_rd_tiles].msg @= s.rd_pkt[num_rd_tiles] + s.recv_from_noc_load_request.rdy @= s.read_crossbar.recv[num_rd_tiles].rdy + + # Connects the store request ports (from tiles and NoC) to the xbar targetting memory and NoC. + for i in range(num_wr_tiles): + s.write_crossbar.recv[i].val @= s.recv_waddr[i].val + s.write_crossbar.recv[i].msg @= s.wr_pkt[i] + s.recv_waddr[i].rdy @= s.write_crossbar.recv[i].rdy + s.recv_wdata[i].rdy @= s.write_crossbar.recv[i].rdy + s.write_crossbar.recv[num_wr_tiles].val @= s.recv_from_noc_store_request.val + s.write_crossbar.recv[num_wr_tiles].msg @= s.wr_pkt[num_wr_tiles] + s.recv_from_noc_store_request.rdy @= s.write_crossbar.recv[num_wr_tiles].rdy + + # Connects the response ports to tiles and NoC from the xbar. + # Number of load responses is expected to be the same as the number of load requests. + for i in range(num_xbar_in_rd_ports): + if i < num_rd_tiles: + s.send_rdata[RdTileIdType(i)].msg @= s.response_crossbar.send[i].msg.data + s.send_rdata[RdTileIdType(i)].val @= s.response_crossbar.send[i].val + s.response_crossbar.send[i].rdy @= s.send_rdata[RdTileIdType(i)].rdy + else: + from_cgra_id = s.response_crossbar.send[i].msg.src_cgra + from_tile_id = s.response_crossbar.send[i].msg.src_tile + s.send_to_noc_load_response_pkt.msg @= \ + NocPktType( + s.cgra_id, # src_cgra_id + from_cgra_id, # dst_cgra_id + s.idTo2d_x_lut[s.cgra_id], # src_cgra_x + s.idTo2d_y_lut[s.cgra_id], # src_cgra_y + s.idTo2d_x_lut[from_cgra_id], # dst_cgra_x + s.idTo2d_y_lut[from_cgra_id], # dst_cgra_y + 0, # src_tile_id set as 0 as it is from memory rather than a specific tile. + from_tile_id, # dst_tile_id + s.response_crossbar.send[i].msg.remote_src_port, # remote_src_port, carries the original source port id towards the src. + 0, # opaque + 0, # vc_id + CgraPayloadType( + CMD_LOAD_RESPONSE, + s.response_crossbar.send[i].msg.data, + s.response_crossbar.send[i].msg.addr, 0, 0)) + + s.send_to_noc_load_response_pkt.val @= s.response_crossbar.send[i].val + s.response_crossbar.send[i].rdy @= s.send_to_noc_load_response_pkt.rdy + + # Handles the request (not response) towards the others via the NoC. The dst would be + # updated in the controller. + s.send_to_noc_load_request_pkt.msg @= \ + NocPktType(s.cgra_id, # src + 0, # dst + s.idTo2d_x_lut[s.cgra_id], # src_x + s.idTo2d_y_lut[s.cgra_id], # src_y + 0, # dst_x + 0, # dst_y + 0, # src_tile_id + 0, # dst_tile_id + s.read_crossbar.send[num_banks_per_cgra].msg.src, # remote_src_port + 0, # opaque + 0, # vc_id + CgraPayloadType( + CMD_LOAD_REQUEST, + 0, + s.read_crossbar.send[num_banks_per_cgra].msg.addr, 0, 0)) + + s.send_to_noc_load_request_pkt.val @= s.read_crossbar.send[num_banks_per_cgra].val + # TODO: https://github.com/tancheng/VectorCGRA/issues/26 -- Modify this part for non-blocking access. + # 'val` indicates the data is arbitrated successfully. + s.recv_from_noc_load_response_pkt.rdy @= s.response_crossbar.recv[num_banks_per_cgra].rdy + s.response_crossbar.recv[num_banks_per_cgra].val @= s.recv_from_noc_load_response_pkt.val + s.response_crossbar.recv[num_banks_per_cgra].msg @= \ + MemResponsePktType(num_banks_per_cgra, + s.recv_from_noc_load_response_pkt.msg.remote_src_port, + s.recv_from_noc_load_response_pkt.msg.payload.data_addr, + s.recv_from_noc_load_response_pkt.msg.payload.data, + s.recv_from_noc_load_response_pkt.msg.src, + s.recv_from_noc_load_response_pkt.msg.src_tile_id, + 0) + + # Allows other load request towards NoC when the previous one is not responded. There + # could be out-of-order load response, i.e., potential consistency issue. + s.read_crossbar.send[num_banks_per_cgra].rdy @= s.send_to_noc_load_request_pkt.rdy + + # Handles the write port towards the NoC. + s.send_to_noc_store_pkt.msg @= \ + NocPktType(s.cgra_id, # src + 0, # dst + s.idTo2d_x_lut[s.cgra_id], # src_x + s.idTo2d_y_lut[s.cgra_id], # src_y + 0, # dst_x + 0, # dst_y + 0, # src_tile_id + 0, # dst_tile_id + s.write_crossbar.send[num_banks_per_cgra].msg.src, # remote_src_port + 0, # opaque + 0, # vc_id + CgraPayloadType( + CMD_STORE_REQUEST, + s.write_crossbar.send[num_banks_per_cgra].msg.data, + s.write_crossbar.send[num_banks_per_cgra].msg.addr, 0, 0)) + + s.send_to_noc_store_pkt.val @= s.write_crossbar.send[num_banks_per_cgra].val + s.write_crossbar.send[num_banks_per_cgra].rdy @= s.send_to_noc_store_pkt.rdy + + def line_trace(s): + recv_raddr_str = "recv_from_tile_read_addr: {" + recv_waddr_str = "recv_from_tile_write_addr: {" + recv_wdata_str = "recv_from_tile_write_data: {" + content_str = "content: {" + send_rdata_str = "send_to_tile_read_data: {" + + send_to_noc_load_request_pkt_str = "send_to_noc_load_request_pkt: {" + send_to_noc_load_response_pkt_str = "send_to_noc_load_response_pkt: {" + recv_from_noc_load_response_pkt_str = "recv_from_noc_load_response_pkt: {" + send_to_noc_store_pkt_str = "send_to_noc_store_pkt: {" + + + for b in range(s.num_banks_per_cgra): + recv_raddr_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.recv_raddr]) + ";" + recv_waddr_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.recv_waddr]) + ";" + recv_wdata_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.recv_wdata]) + ";" + content_str += " bank[" + str(b) + "]: " + "|".join([str(data) for data in s.memory_wrapper[b].memory.regs]) + ";" + send_rdata_str += " bank[" + str(b) + "]: " + "|".join([str(data.msg) for data in s.send_rdata]) + ";" + + send_to_noc_load_request_pkt_str += str(s.send_to_noc_load_request_pkt.msg) + ";" + send_to_noc_load_response_pkt_str += " " + str(s.send_to_noc_load_response_pkt.msg) + " " + recv_from_noc_load_response_pkt_str += str(s.recv_from_noc_load_response_pkt.msg) + ";" + send_to_noc_store_pkt_str += str(s.send_to_noc_store_pkt.msg) + ", val: " + str(s.send_to_noc_store_pkt.val) + ";" + + recv_raddr_str += "}" + send_rdata_str += "}" + recv_waddr_str += "}" + recv_wdata_str += "}" + send_to_noc_load_request_pkt_str += "}" + send_to_noc_load_response_pkt_str += "}" + recv_from_noc_load_response_pkt_str += "}" + send_to_noc_store_pkt_str += "}" + read_crossbar_str = "read_crossbar: " + s.read_crossbar.line_trace() + write_crossbar_str = "write_crossbar: " + s.write_crossbar.line_trace() + content_str += "}" + + return f'{recv_raddr_str} || {recv_waddr_str} || {recv_wdata_str} || {send_rdata_str} || {send_to_noc_load_request_pkt_str} || {send_to_noc_load_response_pkt_str} || {recv_from_noc_load_response_pkt_str} || {send_to_noc_store_pkt_str} || {read_crossbar_str} || {write_crossbar_str} || [{content_str}]' + diff --git a/mem/data/DataMemRTL.py b/mem/data/DataMemRTL.py index 5a09ef5d..ca048a2c 100644 --- a/mem/data/DataMemRTL.py +++ b/mem/data/DataMemRTL.py @@ -1,105 +1,105 @@ -""" -========================================================================== -DataMemRTL.py -========================================================================== -Data memory for CGRA. - -Author : Cheng Tan - Date : Dec 20, 2019 -""" - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ...lib.opt_type import * - -class DataMemRTL(Component): - - def construct(s, DataType, data_mem_size, rd_ports = 1, wr_ports = 1, - preload_data = None): - - # Constant - - AddrType = mk_bits(clog2(data_mem_size)) - - # Interface - - s.recv_raddr = [RecvIfcRTL(AddrType) for _ in range(rd_ports)] - s.send_rdata = [SendIfcRTL(DataType) for _ in range(rd_ports)] - s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(wr_ports)] - s.recv_wdata = [RecvIfcRTL(DataType) for _ in range(wr_ports)] - - # Component - - s.reg_file = RegisterFile(DataType, data_mem_size, rd_ports, wr_ports + rd_ports) - s.initWrites = [Wire(b1) for _ in range(data_mem_size)] - - if preload_data == None: - @update - def update_read_without_init(): - for i in range(rd_ports): - # s.reg_file.wen[wr_ports + i] @= b1(0) - s.reg_file.raddr[i] @= s.recv_raddr[i].msg - s.send_rdata[i].msg @= s.reg_file.rdata[i] - - for i in range( wr_ports ): - s.reg_file.wen[i] @= b1(0) - s.reg_file.waddr[i] @= s.recv_waddr[i].msg - s.reg_file.wdata[i] @= s.recv_wdata[i].msg - if s.recv_waddr[i].val == b1(1): - s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val - - else: - s.preloadData = [Wire(DataType) for _ in range(data_mem_size)] - for i in range(len(preload_data)): - s.preloadData[i] //= preload_data[i] - - @update - def update_read_with_init(): - - for i in range( rd_ports ): - s.reg_file.wen[wr_ports + i] @= b1(0) - if s.initWrites[s.recv_raddr[i].msg] == b1(0): - s.send_rdata[i].msg @= s.preloadData[s.recv_raddr[i].msg] - s.reg_file.waddr[wr_ports + i] @= s.recv_raddr[i].msg - s.reg_file.wdata[wr_ports + i] @= s.preloadData[s.recv_raddr[i].msg] - s.reg_file.wen[wr_ports + i] @= b1(1) - else: - s.reg_file.raddr[i] @= s.recv_raddr[i].msg - s.send_rdata[i].msg @= s.reg_file.rdata[i] - - for i in range(wr_ports): - if s.recv_waddr[i].val == b1(1): - s.reg_file.waddr[i] @= s.recv_waddr[i].msg - s.reg_file.wdata[i] @= s.recv_wdata[i].msg - s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val - - # Connections - - @update_ff - def update_init(): - for i in range( rd_ports ): - if s.recv_raddr[i].val == b1(1): - s.initWrites[s.recv_raddr[i].msg] <<= s.initWrites[s.recv_raddr[i].msg] | b1(1) - for i in range( wr_ports ): - if s.recv_waddr[i].val == b1(1): - s.initWrites[s.recv_waddr[i].msg] <<= s.initWrites[s.recv_waddr[i].msg] | b1(1) - - @update - def update_signal(): - for i in range(rd_ports): - s.recv_raddr[i].rdy @= s.send_rdata[i].rdy - s.send_rdata[i].val @= s.recv_raddr[i].val - for i in range(wr_ports): - s.recv_waddr[i].rdy @= Bits1(1) - s.recv_wdata[i].rdy @= Bits1(1) - - def line_trace(s): - recv_raddr_str = "recv_read_addr: " + "|".join([str(data.msg) for data in s.recv_raddr]) - recv_waddr_str = "recv_write_addr: " + "|".join([str(data.msg) for data in s.recv_waddr]) - recv_wdata_str = "recv_write_data: " + "|".join([str(data.msg) for data in s.recv_wdata]) - content_str = "content: " + "|".join([str(data) for data in s.reg_file.regs]) - send_rdata_str = "send_read_data: " + "|".join([str(data.msg) for data in s.send_rdata]) - return f'{recv_raddr_str} || {recv_waddr_str} || {recv_wdata_str} || [{content_str}] || {send_rdata_str}' - +""" +========================================================================== +DataMemRTL.py +========================================================================== +Data memory for CGRA. + +Author : Cheng Tan + Date : Dec 20, 2019 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ...lib.opt_type import * + +class DataMemRTL(Component): + + def construct(s, DataType, data_mem_size, rd_ports = 1, wr_ports = 1, + preload_data = None): + + # Constant + + AddrType = mk_bits(clog2(data_mem_size)) + + # Interface + + s.recv_raddr = [RecvIfcRTL(AddrType) for _ in range(rd_ports)] + s.send_rdata = [SendIfcRTL(DataType) for _ in range(rd_ports)] + s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(wr_ports)] + s.recv_wdata = [RecvIfcRTL(DataType) for _ in range(wr_ports)] + + # Component + + s.reg_file = RegisterFile(DataType, data_mem_size, rd_ports, wr_ports + rd_ports) + s.initWrites = [Wire(b1) for _ in range(data_mem_size)] + + if preload_data == None: + @update + def update_read_without_init(): + for i in range(rd_ports): + # s.reg_file.wen[wr_ports + i] @= b1(0) + s.reg_file.raddr[i] @= s.recv_raddr[i].msg + s.send_rdata[i].msg @= s.reg_file.rdata[i] + + for i in range( wr_ports ): + s.reg_file.wen[i] @= b1(0) + s.reg_file.waddr[i] @= s.recv_waddr[i].msg + s.reg_file.wdata[i] @= s.recv_wdata[i].msg + if s.recv_waddr[i].val == b1(1): + s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val + + else: + s.preloadData = [Wire(DataType) for _ in range(data_mem_size)] + for i in range(len(preload_data)): + s.preloadData[i] //= preload_data[i] + + @update + def update_read_with_init(): + + for i in range( rd_ports ): + s.reg_file.wen[wr_ports + i] @= b1(0) + if s.initWrites[s.recv_raddr[i].msg] == b1(0): + s.send_rdata[i].msg @= s.preloadData[s.recv_raddr[i].msg] + s.reg_file.waddr[wr_ports + i] @= s.recv_raddr[i].msg + s.reg_file.wdata[wr_ports + i] @= s.preloadData[s.recv_raddr[i].msg] + s.reg_file.wen[wr_ports + i] @= b1(1) + else: + s.reg_file.raddr[i] @= s.recv_raddr[i].msg + s.send_rdata[i].msg @= s.reg_file.rdata[i] + + for i in range(wr_ports): + if s.recv_waddr[i].val == b1(1): + s.reg_file.waddr[i] @= s.recv_waddr[i].msg + s.reg_file.wdata[i] @= s.recv_wdata[i].msg + s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val + + # Connections + + @update_ff + def update_init(): + for i in range( rd_ports ): + if s.recv_raddr[i].val == b1(1): + s.initWrites[s.recv_raddr[i].msg] <<= s.initWrites[s.recv_raddr[i].msg] | b1(1) + for i in range( wr_ports ): + if s.recv_waddr[i].val == b1(1): + s.initWrites[s.recv_waddr[i].msg] <<= s.initWrites[s.recv_waddr[i].msg] | b1(1) + + @update + def update_signal(): + for i in range(rd_ports): + s.recv_raddr[i].rdy @= s.send_rdata[i].rdy + s.send_rdata[i].val @= s.recv_raddr[i].val + for i in range(wr_ports): + s.recv_waddr[i].rdy @= Bits1(1) + s.recv_wdata[i].rdy @= Bits1(1) + + def line_trace(s): + recv_raddr_str = "recv_read_addr: " + "|".join([str(data.msg) for data in s.recv_raddr]) + recv_waddr_str = "recv_write_addr: " + "|".join([str(data.msg) for data in s.recv_waddr]) + recv_wdata_str = "recv_write_data: " + "|".join([str(data.msg) for data in s.recv_wdata]) + content_str = "content: " + "|".join([str(data) for data in s.reg_file.regs]) + send_rdata_str = "send_read_data: " + "|".join([str(data.msg) for data in s.send_rdata]) + return f'{recv_raddr_str} || {recv_waddr_str} || {recv_wdata_str} || [{content_str}] || {send_rdata_str}' + diff --git a/mem/data/DataMemScalableRTL.py b/mem/data/DataMemScalableRTL.py index 6dc1594e..28a6acb7 100644 --- a/mem/data/DataMemScalableRTL.py +++ b/mem/data/DataMemScalableRTL.py @@ -1,113 +1,113 @@ -""" -========================================================================== -DataMemScalableRTL.py -========================================================================== -Data memory for CGRA. It has addtional port to connect to controller, -which can be used for multi-CGRA fabric. - -Author : Cheng Tan - Date : Dec 4, 2024 -""" - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ...lib.opt_type import * - -class DataMemScalableRTL(Component): - - def construct(s, DataType, data_mem_size, rd_ports = 1, wr_ports = 1, - preload_data = None): - - # Constant - AddrType = mk_bits(clog2(data_mem_size)) - - # Interface - s.recv_raddr = [RecvIfcRTL(AddrType) for _ in range(rd_ports)] - s.send_rdata = [SendIfcRTL(DataType) for _ in range(rd_ports)] - s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(wr_ports)] - s.recv_wdata = [RecvIfcRTL(DataType) for _ in range(wr_ports)] - - s.recv_from_noc = RecvIfcRTL(DataType) - s.send_to_noc = SendIfcRTL(DataType) - - # Component - - s.reg_file = RegisterFile( DataType, data_mem_size, rd_ports, wr_ports + rd_ports ) - s.initWrites = [ Wire( b1 ) for _ in range( data_mem_size ) ] - - # FIXME: Following signals need to be set via some logic, i.e., - # handling miss accesses. - s.send_to_noc.val //= 0 - s.send_to_noc.msg //= DataType(0, 0) - s.recv_from_noc.rdy //= 0 - - if preload_data == None: - @update - def update_read_without_init(): - for i in range( rd_ports ): - # s.reg_file.wen[wr_ports + i] @= b1(0) - s.reg_file.raddr[i] @= s.recv_raddr[i].msg - s.send_rdata[i].msg @= s.reg_file.rdata[i] - - for i in range( wr_ports ): - s.reg_file.wen[i] @= b1(0) - s.reg_file.waddr[i] @= s.recv_waddr[i].msg - s.reg_file.wdata[i] @= s.recv_wdata[i].msg - if s.recv_waddr[i].val == b1(1): - s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val - - else: - s.preloadData = [Wire(DataType) for _ in range(data_mem_size)] - for i in range(len( preload_data)): - s.preloadData[i] //= preload_data[i] - - @update - def update_read_with_init(): - - for i in range(rd_ports): - s.reg_file.wen[wr_ports + i] @= b1(0) - if s.initWrites[s.recv_raddr[i].msg] == b1(0): - s.send_rdata[i].msg @= s.preloadData[s.recv_raddr[i].msg] - s.reg_file.waddr[wr_ports + i] @= s.recv_raddr[i].msg - s.reg_file.wdata[wr_ports + i] @= s.preloadData[s.recv_raddr[i].msg] - s.reg_file.wen[wr_ports + i] @= b1(1) - else: - s.reg_file.raddr[i] @= s.recv_raddr[i].msg - s.send_rdata[i].msg @= s.reg_file.rdata[i] - - for i in range( wr_ports ): - if s.recv_waddr[i].val == b1(1): - s.reg_file.waddr[i] @= s.recv_waddr[i].msg - s.reg_file.wdata[i] @= s.recv_wdata[i].msg - s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val - - # Connections - - @update_ff - def update_init(): - for i in range( rd_ports ): - if s.recv_raddr[i].val == b1(1): - s.initWrites[s.recv_raddr[i].msg] <<= s.initWrites[s.recv_raddr[i].msg] | b1(1) - for i in range( wr_ports ): - if s.recv_waddr[i].val == b1(1): - s.initWrites[s.recv_waddr[i].msg] <<= s.initWrites[s.recv_waddr[i].msg] | b1(1) - - @update - def update_signal(): - for i in range( rd_ports ): - s.recv_raddr[i].rdy @= s.send_rdata[i].rdy - s.send_rdata[i].val @= s.recv_raddr[i].val - for i in range( wr_ports ): - s.recv_waddr[i].rdy @= Bits1( 1 ) - s.recv_wdata[i].rdy @= Bits1( 1 ) - - def line_trace(s): - recv_raddr_str = "recv_read_addr: " + "|".join([str(data.msg) for data in s.recv_raddr]) - recv_waddr_str = "recv_write_addr: " + "|".join([str(data.msg) for data in s.recv_waddr]) - recv_wdata_str = "recv_write_data: " + "|".join([str(data.msg) for data in s.recv_wdata]) - content_str = "content: " + "|".join([str(data) for data in s.reg_file.regs]) - send_rdata_str = "send_read_data: " + "|".join([str(data.msg) for data in s.send_rdata]) - return f'{recv_raddr_str} || {recv_waddr_str} || {recv_wdata_str} || [{content_str}] || {send_rdata_str}' - +""" +========================================================================== +DataMemScalableRTL.py +========================================================================== +Data memory for CGRA. It has addtional port to connect to controller, +which can be used for multi-CGRA fabric. + +Author : Cheng Tan + Date : Dec 4, 2024 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ...lib.opt_type import * + +class DataMemScalableRTL(Component): + + def construct(s, DataType, data_mem_size, rd_ports = 1, wr_ports = 1, + preload_data = None): + + # Constant + AddrType = mk_bits(clog2(data_mem_size)) + + # Interface + s.recv_raddr = [RecvIfcRTL(AddrType) for _ in range(rd_ports)] + s.send_rdata = [SendIfcRTL(DataType) for _ in range(rd_ports)] + s.recv_waddr = [RecvIfcRTL(AddrType) for _ in range(wr_ports)] + s.recv_wdata = [RecvIfcRTL(DataType) for _ in range(wr_ports)] + + s.recv_from_noc = RecvIfcRTL(DataType) + s.send_to_noc = SendIfcRTL(DataType) + + # Component + + s.reg_file = RegisterFile( DataType, data_mem_size, rd_ports, wr_ports + rd_ports ) + s.initWrites = [ Wire( b1 ) for _ in range( data_mem_size ) ] + + # FIXME: Following signals need to be set via some logic, i.e., + # handling miss accesses. + s.send_to_noc.val //= 0 + s.send_to_noc.msg //= DataType(0, 0) + s.recv_from_noc.rdy //= 0 + + if preload_data == None: + @update + def update_read_without_init(): + for i in range( rd_ports ): + # s.reg_file.wen[wr_ports + i] @= b1(0) + s.reg_file.raddr[i] @= s.recv_raddr[i].msg + s.send_rdata[i].msg @= s.reg_file.rdata[i] + + for i in range( wr_ports ): + s.reg_file.wen[i] @= b1(0) + s.reg_file.waddr[i] @= s.recv_waddr[i].msg + s.reg_file.wdata[i] @= s.recv_wdata[i].msg + if s.recv_waddr[i].val == b1(1): + s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val + + else: + s.preloadData = [Wire(DataType) for _ in range(data_mem_size)] + for i in range(len( preload_data)): + s.preloadData[i] //= preload_data[i] + + @update + def update_read_with_init(): + + for i in range(rd_ports): + s.reg_file.wen[wr_ports + i] @= b1(0) + if s.initWrites[s.recv_raddr[i].msg] == b1(0): + s.send_rdata[i].msg @= s.preloadData[s.recv_raddr[i].msg] + s.reg_file.waddr[wr_ports + i] @= s.recv_raddr[i].msg + s.reg_file.wdata[wr_ports + i] @= s.preloadData[s.recv_raddr[i].msg] + s.reg_file.wen[wr_ports + i] @= b1(1) + else: + s.reg_file.raddr[i] @= s.recv_raddr[i].msg + s.send_rdata[i].msg @= s.reg_file.rdata[i] + + for i in range( wr_ports ): + if s.recv_waddr[i].val == b1(1): + s.reg_file.waddr[i] @= s.recv_waddr[i].msg + s.reg_file.wdata[i] @= s.recv_wdata[i].msg + s.reg_file.wen[i] @= s.recv_wdata[i].val & s.recv_waddr[i].val + + # Connections + + @update_ff + def update_init(): + for i in range( rd_ports ): + if s.recv_raddr[i].val == b1(1): + s.initWrites[s.recv_raddr[i].msg] <<= s.initWrites[s.recv_raddr[i].msg] | b1(1) + for i in range( wr_ports ): + if s.recv_waddr[i].val == b1(1): + s.initWrites[s.recv_waddr[i].msg] <<= s.initWrites[s.recv_waddr[i].msg] | b1(1) + + @update + def update_signal(): + for i in range( rd_ports ): + s.recv_raddr[i].rdy @= s.send_rdata[i].rdy + s.send_rdata[i].val @= s.recv_raddr[i].val + for i in range( wr_ports ): + s.recv_waddr[i].rdy @= Bits1( 1 ) + s.recv_wdata[i].rdy @= Bits1( 1 ) + + def line_trace(s): + recv_raddr_str = "recv_read_addr: " + "|".join([str(data.msg) for data in s.recv_raddr]) + recv_waddr_str = "recv_write_addr: " + "|".join([str(data.msg) for data in s.recv_waddr]) + recv_wdata_str = "recv_write_data: " + "|".join([str(data.msg) for data in s.recv_wdata]) + content_str = "content: " + "|".join([str(data) for data in s.reg_file.regs]) + send_rdata_str = "send_read_data: " + "|".join([str(data.msg) for data in s.send_rdata]) + return f'{recv_raddr_str} || {recv_waddr_str} || {recv_wdata_str} || [{content_str}] || {send_rdata_str}' + diff --git a/mem/data/DataMemWrapperRTL.py b/mem/data/DataMemWrapperRTL.py index 0c89b202..eda33a45 100644 --- a/mem/data/DataMemWrapperRTL.py +++ b/mem/data/DataMemWrapperRTL.py @@ -1,107 +1,107 @@ -""" -========================================================================== -DataMemWrapperRTL.py -========================================================================== -Data memory for CGRA. - -Author : Cheng Tan - Date : Aug 27, 2025 -""" - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ...lib.messages import * -from ...lib.opt_type import * -from ...noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL - -class DataMemWrapperRTL(Component): - - def construct(s, - DataType, - MemReadType, - MemWriteType, - MemResponseType, - global_data_mem_size, - per_bank_data_mem_size, - is_combinational = True): - - # Constant. - GlobalAddrType = mk_bits(clog2(global_data_mem_size)) - PerBankAddrType = mk_bits(clog2(per_bank_data_mem_size)) - - # Interface. - s.recv_rd = RecvIfcRTL(MemReadType) - s.recv_wr = RecvIfcRTL(MemWriteType) - s.send = SendIfcRTL(MemResponseType) - - # Component. - # As we include xbar and multi-bank for the memory hierarchy, - # we prefer as few as possible number of ports. - rd_ports_per_bank = 1 - wr_ports_per_bank = 1 - s.memory = RegisterFile(DataType, per_bank_data_mem_size, - rd_ports_per_bank, wr_ports_per_bank) - # TODO: We need to replace channel (normal queue) with bypass - # queue when replacing register file with SRAM. This channel - # here is used to mimic the SRAM 1 cycle latency. Bypass queue - # can still queue up the load requests, facilitating streaming. - latency = 0 if is_combinational else 1 - s.channel_rd = ChannelRTL(MemReadType, latency = latency) - s.channel_wr = ChannelRTL(MemWriteType, latency = latency) - - # Connection. - s.recv_rd //= s.channel_rd.recv - s.recv_wr //= s.channel_wr.recv - - @update - def compose_send_msg(): - s.send.msg @= MemResponseType(0, 0, 0, DataType(0, 0, 0, 0), 0, 0, 0) - # TODO: change to pipe's out's wen. - if s.channel_rd.send.val: - s.send.msg.src @= s.channel_rd.send.msg.dst - s.send.msg.dst @= s.channel_rd.send.msg.src - s.send.msg.addr @= s.channel_rd.send.msg.addr - s.send.msg.data @= s.memory.rdata[0] - s.send.msg.src_cgra @= s.channel_rd.send.msg.src_cgra - s.send.msg.src_tile @= s.channel_rd.send.msg.src_tile - s.send.msg.remote_src_port @= s.channel_rd.send.msg.remote_src_port - - @update - def request_memory(): - # Default values. - s.memory.wen[0] @= 0 - s.memory.raddr[0] @= PerBankAddrType(0) - s.memory.waddr[0] @= PerBankAddrType(0) - s.memory.wdata[0] @= DataType(0, 0, 0, 0) - - if s.channel_rd.send.val: - s.memory.raddr[0] @= \ - trunc(s.channel_rd.send.msg.addr % per_bank_data_mem_size, PerBankAddrType) - if s.channel_wr.send.val: - s.memory.waddr[0] @= \ - trunc(s.channel_wr.send.msg.addr % per_bank_data_mem_size, PerBankAddrType) - s.memory.wdata[0] @= s.channel_wr.send.msg.data - s.memory.wen[0] @= 1 - - @update - def notify_channel_rdy(): - # TODO: change to SRAM's rdy when replacing register file - # with SRAM. - s.channel_rd.send.rdy @= s.send.rdy - s.channel_wr.send.rdy @= 1 - - @update - def notify_send_val(): - # TODO: change to SRAM's valid when replacing register file - # with SRAM. - s.send.val @= s.channel_rd.send.val - - def line_trace(s): - recv_rd_str = "recv_rd_msg: " + str(s.recv_rd.msg) - recv_wr_str = "recv_wr_msg: " + str(s.recv_wr.msg) - content_str = "content: " + "|".join([str(data) for data in s.memory.regs]) - send_str = "send_msg: " + str(s.send.msg) - return f'{recv_rd_str} || {recv_wr_str} || [{content_str}] || {send_str}' - +""" +========================================================================== +DataMemWrapperRTL.py +========================================================================== +Data memory for CGRA. + +Author : Cheng Tan + Date : Aug 27, 2025 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ...lib.messages import * +from ...lib.opt_type import * +from ...noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL + +class DataMemWrapperRTL(Component): + + def construct(s, + DataType, + MemReadType, + MemWriteType, + MemResponseType, + global_data_mem_size, + per_bank_data_mem_size, + is_combinational = True): + + # Constant. + GlobalAddrType = mk_bits(clog2(global_data_mem_size)) + PerBankAddrType = mk_bits(clog2(per_bank_data_mem_size)) + + # Interface. + s.recv_rd = RecvIfcRTL(MemReadType) + s.recv_wr = RecvIfcRTL(MemWriteType) + s.send = SendIfcRTL(MemResponseType) + + # Component. + # As we include xbar and multi-bank for the memory hierarchy, + # we prefer as few as possible number of ports. + rd_ports_per_bank = 1 + wr_ports_per_bank = 1 + s.memory = RegisterFile(DataType, per_bank_data_mem_size, + rd_ports_per_bank, wr_ports_per_bank) + # TODO: We need to replace channel (normal queue) with bypass + # queue when replacing register file with SRAM. This channel + # here is used to mimic the SRAM 1 cycle latency. Bypass queue + # can still queue up the load requests, facilitating streaming. + latency = 0 if is_combinational else 1 + s.channel_rd = ChannelRTL(MemReadType, latency = latency) + s.channel_wr = ChannelRTL(MemWriteType, latency = latency) + + # Connection. + s.recv_rd //= s.channel_rd.recv + s.recv_wr //= s.channel_wr.recv + + @update + def compose_send_msg(): + s.send.msg @= MemResponseType(0, 0, 0, DataType(0, 0, 0, 0), 0, 0, 0) + # TODO: change to pipe's out's wen. + if s.channel_rd.send.val: + s.send.msg.src @= s.channel_rd.send.msg.dst + s.send.msg.dst @= s.channel_rd.send.msg.src + s.send.msg.addr @= s.channel_rd.send.msg.addr + s.send.msg.data @= s.memory.rdata[0] + s.send.msg.src_cgra @= s.channel_rd.send.msg.src_cgra + s.send.msg.src_tile @= s.channel_rd.send.msg.src_tile + s.send.msg.remote_src_port @= s.channel_rd.send.msg.remote_src_port + + @update + def request_memory(): + # Default values. + s.memory.wen[0] @= 0 + s.memory.raddr[0] @= PerBankAddrType(0) + s.memory.waddr[0] @= PerBankAddrType(0) + s.memory.wdata[0] @= DataType(0, 0, 0, 0) + + if s.channel_rd.send.val: + s.memory.raddr[0] @= \ + trunc(s.channel_rd.send.msg.addr % per_bank_data_mem_size, PerBankAddrType) + if s.channel_wr.send.val: + s.memory.waddr[0] @= \ + trunc(s.channel_wr.send.msg.addr % per_bank_data_mem_size, PerBankAddrType) + s.memory.wdata[0] @= s.channel_wr.send.msg.data + s.memory.wen[0] @= 1 + + @update + def notify_channel_rdy(): + # TODO: change to SRAM's rdy when replacing register file + # with SRAM. + s.channel_rd.send.rdy @= s.send.rdy + s.channel_wr.send.rdy @= 1 + + @update + def notify_send_val(): + # TODO: change to SRAM's valid when replacing register file + # with SRAM. + s.send.val @= s.channel_rd.send.val + + def line_trace(s): + recv_rd_str = "recv_rd_msg: " + str(s.recv_rd.msg) + recv_wr_str = "recv_wr_msg: " + str(s.recv_wr.msg) + content_str = "content: " + "|".join([str(data) for data in s.memory.regs]) + send_str = "send_msg: " + str(s.send.msg) + return f'{recv_rd_str} || {recv_wr_str} || [{content_str}] || {send_str}' + diff --git a/mem/data/test/DataMemCL_test.py b/mem/data/test/DataMemCL_test.py index 304d5c8c..203d40a5 100644 --- a/mem/data/test/DataMemCL_test.py +++ b/mem/data/test/DataMemCL_test.py @@ -1,83 +1,83 @@ -""" -========================================================================== -DataMemCL_test.py -========================================================================== -Test cases for DataMemCL. - -Author : Cheng Tan - Date : Nov 26, 2022 -""" - -from pymtl3 import * -from ..DataMemCL import DataMemCL -from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL -from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL -from ....lib.messages import * -from ....lib.opt_type import * - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DataType, AddrType, data_mem_size, read_addr, - read_data, write_addr, write_data, preloadData): - - s.read_addr = TestSrcRTL(AddrType, read_addr) - s.read_data = TestSinkRTL(DataType, read_data) - - s.write_addr = TestSrcRTL(AddrType, write_addr) - s.write_data = TestSrcRTL(DataType, write_data) - - - s.dataMem = DataMemCL(DataType, data_mem_size, - preload_data = preloadData) - - s.dataMem.recv_raddr[0] //= s.read_addr.send - s.dataMem.send_rdata[0] //= s.read_data.recv - s.dataMem.recv_waddr[0] //= s.write_addr.send - s.dataMem.recv_wdata[0] //= s.write_data.send - - def done(s): - return s.read_addr.done() and s.read_data.done() - - def line_trace(s): - return s.dataMem.line_trace() - -def run_sim(test_harness, max_cycles = 10): - test_harness.elaborate() - test_harness.apply(DefaultPassGroup()) - test_harness.sim_reset() - - # Run simulation - ncycles = 0 - print() - print("{}:{}".format(ncycles, test_harness.line_trace())) - while not test_harness.done() and ncycles < max_cycles: - test_harness.sim_tick() - ncycles += 1 - print("{}:{}".format( ncycles, test_harness.line_trace())) - - # Check timeout - assert ncycles < max_cycles - - test_harness.sim_tick() - test_harness.sim_tick() - test_harness.sim_tick() - -def test_const_queue(): - DataType = mk_data(16, 1) - data_mem_size = 100 - AddrType = mk_bits(clog2(data_mem_size)) - preloadData = [DataType(i, 1) for i in range(100)] - - read_addr = [AddrType(2), AddrType(3), AddrType(0), AddrType(12)] - read_data = [DataType(2, 1), DataType(3, 1), DataType(0, 1), DataType(33, 1)] - write_addr = [AddrType(12), AddrType(23)] - write_data = [DataType(33, 1), DataType(44, 1)] - - th = TestHarness(DataType, AddrType, data_mem_size, read_addr, - read_data, write_addr, write_data, preloadData) - run_sim(th) - +""" +========================================================================== +DataMemCL_test.py +========================================================================== +Test cases for DataMemCL. + +Author : Cheng Tan + Date : Nov 26, 2022 +""" + +from pymtl3 import * +from ..DataMemCL import DataMemCL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.messages import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, DataType, AddrType, data_mem_size, read_addr, + read_data, write_addr, write_data, preloadData): + + s.read_addr = TestSrcRTL(AddrType, read_addr) + s.read_data = TestSinkRTL(DataType, read_data) + + s.write_addr = TestSrcRTL(AddrType, write_addr) + s.write_data = TestSrcRTL(DataType, write_data) + + + s.dataMem = DataMemCL(DataType, data_mem_size, + preload_data = preloadData) + + s.dataMem.recv_raddr[0] //= s.read_addr.send + s.dataMem.send_rdata[0] //= s.read_data.recv + s.dataMem.recv_waddr[0] //= s.write_addr.send + s.dataMem.recv_wdata[0] //= s.write_data.send + + def done(s): + return s.read_addr.done() and s.read_data.done() + + def line_trace(s): + return s.dataMem.line_trace() + +def run_sim(test_harness, max_cycles = 10): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + ncycles = 0 + print() + print("{}:{}".format(ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format( ncycles, test_harness.line_trace())) + + # Check timeout + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_const_queue(): + DataType = mk_data(16, 1) + data_mem_size = 100 + AddrType = mk_bits(clog2(data_mem_size)) + preloadData = [DataType(i, 1) for i in range(100)] + + read_addr = [AddrType(2), AddrType(3), AddrType(0), AddrType(12)] + read_data = [DataType(2, 1), DataType(3, 1), DataType(0, 1), DataType(33, 1)] + write_addr = [AddrType(12), AddrType(23)] + write_data = [DataType(33, 1), DataType(44, 1)] + + th = TestHarness(DataType, AddrType, data_mem_size, read_addr, + read_data, write_addr, write_data, preloadData) + run_sim(th) + diff --git a/mem/data/test/DataMemControllerRTL_test.py b/mem/data/test/DataMemControllerRTL_test.py index 3fa15a82..f13b3e46 100644 --- a/mem/data/test/DataMemControllerRTL_test.py +++ b/mem/data/test/DataMemControllerRTL_test.py @@ -1,237 +1,237 @@ -""" -========================================================================== -DataMemControllerRTL_test.py -========================================================================== -Test cases for DataMemControllerRTL. - -Author : Cheng Tan - Date : Aug 28, 2025 -""" - -from pymtl3.passes.backends.verilog import (VerilogTranslationPass) -from pymtl3.stdlib.test_utils import config_model_with_cmdline_opts - -from ..DataMemControllerRTL import DataMemControllerRTL -from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL -from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL -from ....lib.messages import * -from ....lib.opt_type import * - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, NocPktType, - data_mem_size_global, data_mem_size_per_bank, num_banks, - rd_tiles, wr_tiles, num_cgra_rows, num_cgra_columns, - num_tiles, - read_addr, read_data, write_addr, - write_data, noc_recv_load, - send_to_noc_load_request_pkt, send_to_noc_store_pkt): - - CgraPayloadType = NocPktType.get_field_type(kAttrPayload) - DataType = CgraPayloadType.get_field_type(kAttrData) - DataAddrType = CgraPayloadType.get_field_type(kAttrDataAddr) - s.num_banks = num_banks - s.rd_tiles = rd_tiles - s.wr_tiles = wr_tiles - s.recv_raddr = [TestSrcRTL(DataAddrType, read_addr[i]) - for i in range(rd_tiles)] - s.send_rdata = [TestSinkRTL(DataType, read_data[i]) - for i in range(rd_tiles)] - - s.recv_waddr = [TestSrcRTL(DataAddrType, write_addr[i]) - for i in range(wr_tiles)] - s.recv_wdata = [TestSrcRTL(DataType, write_data[i]) - for i in range(wr_tiles)] - - s.recv_from_noc = TestSrcRTL(NocPktType, noc_recv_load) - - s.send_to_noc_load_request_pkt = TestSinkRTL(NocPktType, send_to_noc_load_request_pkt) - s.send_to_noc_store_pkt = TestSinkRTL(NocPktType, send_to_noc_store_pkt) - - s.mem_controller = DataMemControllerRTL(NocPktType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks, - rd_tiles, - wr_tiles, - num_cgra_rows, - num_cgra_columns, - num_tiles, - mem_access_is_combinational = True) - - for i in range(rd_tiles): - s.mem_controller.recv_raddr[i] //= s.recv_raddr[i].send - s.mem_controller.send_rdata[i] //= s.send_rdata[i].recv - - for i in range(wr_tiles): - s.mem_controller.recv_waddr[i] //= s.recv_waddr[i].send - s.mem_controller.recv_wdata[i] //= s.recv_wdata[i].send - - s.mem_controller.recv_from_noc_load_response_pkt //= s.recv_from_noc.send - s.mem_controller.send_to_noc_load_request_pkt //= s.send_to_noc_load_request_pkt.recv - s.mem_controller.send_to_noc_store_pkt //= s.send_to_noc_store_pkt.recv - - s.mem_controller.address_lower //= 0 - s.mem_controller.address_upper //= 31 - - s.cgra_id = 0 - - def done(s): - for i in range(s.rd_tiles): - if not s.recv_raddr[i].done() or not s.send_rdata[i].done(): - return False - - for i in range(s.wr_tiles): - if not s.recv_waddr[i].done() or not s.recv_wdata[i].done(): - return False - - if not s.send_to_noc_load_request_pkt.done() or \ - not s.send_to_noc_store_pkt.done() or \ - not s.recv_from_noc.done(): - return False - - return True - - def line_trace(s): - return s.mem_controller.line_trace() - -def run_sim(test_harness, max_cycles = 40): - test_harness.apply(DefaultPassGroup()) - test_harness.sim_reset() - - # Run simulation - - ncycles = 0 - print() - print("{}:{}".format(ncycles, test_harness.line_trace())) - while not test_harness.done() and ncycles < max_cycles: - test_harness.sim_tick() - ncycles += 1 - print("{}:{}".format(ncycles, test_harness.line_trace())) - - # Check timeout - assert ncycles < max_cycles - - test_harness.sim_tick() - test_harness.sim_tick() - test_harness.sim_tick() - -def test_mem_controller(cmdline_opts): - data_nbits = 32 - predicate_nbits = 1 - DataType = mk_data(data_nbits, predicate_nbits) - data_mem_size_global = 64 - data_mem_size_per_bank = 16 - num_banks = 2 - - num_registers_per_reg_bank = 16 - num_cgra_columns = 1 - num_cgra_rows = 1 - num_tiles = 4 - rd_tiles = 4 - wr_tiles = 4 - ctrl_mem_size = 6 - num_tile_inports = 4 - num_tile_outports =4 - num_fu_inports = 4 - num_fu_outports = 2 - - DataAddrType = mk_bits(clog2(data_mem_size_global)) - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - - CtrlType = mk_ctrl(num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports, - num_registers_per_reg_bank) - - CgraPayloadType = mk_cgra_payload(DataType, - DataAddrType, - CtrlType, - CtrlAddrType) - - InterCgraPktType = mk_inter_cgra_pkt(num_cgra_columns, - num_cgra_rows, - num_tiles, - rd_tiles, - CgraPayloadType) - - # test_meta_data = [ - # # addr: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - # [0x00, 0x00, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], - # # addr: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - # [0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd7]] - - # Input write requests. - write_addr = [ - [DataAddrType(2), DataAddrType(31), DataAddrType(45)], - [DataAddrType(40), DataAddrType(31)], - [DataAddrType(2), DataAddrType(3)], - [DataAddrType(2)] - ] - write_data = [ - [DataType(0x00a8, 1), DataType(0x00d7, 1), DataType(0xd545, 1)], - [DataType(0xd040, 1), DataType(0x0d70, 1)], - [DataType(0x0a80, 1), DataType(0x00a9, 1)], - [DataType(0xa800, 1)] - ] - # Input read requests. - read_addr = [ - [DataAddrType(42), DataAddrType(2), DataAddrType(31), DataAddrType(3), DataAddrType(3)], - [DataAddrType(30), DataAddrType(17), DataAddrType(31), DataAddrType(2)], - [], - [DataAddrType(2), DataAddrType(2), DataAddrType(2), DataAddrType(25)] - ] - # Expected response. - read_data = [ - [DataType(0xbbbb, 1), DataType(0x00a8, 1), DataType(0x00d7, 1), DataType(0x0000, 0), DataType(0x00a9, 1)], - [DataType(0x0000, 0), DataType(0x0000, 0), DataType(0x0d70, 1), DataType(0xa800, 1)], - [], - [DataType(0x0000, 0), DataType(0x0a80, 1), DataType(0xa800, 1), DataType(0x0000, 0)] - ] - - # Input data. - send_to_noc_load_request_pkt = [ - # src dst src_x src_y dst_x dst_y src_tile dst_tile remote_src_port opq vc - InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_LOAD_REQUEST, data_addr = 42)), - ] - - noc_recv_load = [ - InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_LOAD_RESPONSE, DataType(0xbbbb, 1))) - ] - - # Expected. - send_to_noc_store_pkt = [ - # src dst src_x src_y dst_x dst_y src_tile dst_tile remote_src_port opq vc data_addr - InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, CgraPayloadType(CMD_STORE_REQUEST, DataType(0xd040, 1), 40)), - InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_STORE_REQUEST, DataType(0xd545, 1), 45)), - ] - - th = TestHarness(InterCgraPktType, - data_mem_size_global, - data_mem_size_per_bank, - num_banks, - rd_tiles, - wr_tiles, - num_cgra_rows, - num_cgra_columns, - num_tiles, - read_addr, - read_data, - write_addr, - write_data, - noc_recv_load, - send_to_noc_load_request_pkt, - send_to_noc_store_pkt) - - th.elaborate() - th.mem_controller.set_metadata(VerilogTranslationPass.explicit_module_name, - f'DataMemControllerRTL_translation') - th = config_model_with_cmdline_opts( th, cmdline_opts, duts=['mem_controller'] ) - - run_sim(th) - +""" +========================================================================== +DataMemControllerRTL_test.py +========================================================================== +Test cases for DataMemControllerRTL. + +Author : Cheng Tan + Date : Aug 28, 2025 +""" + +from pymtl3.passes.backends.verilog import (VerilogTranslationPass) +from pymtl3.stdlib.test_utils import config_model_with_cmdline_opts + +from ..DataMemControllerRTL import DataMemControllerRTL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.messages import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, NocPktType, + data_mem_size_global, data_mem_size_per_bank, num_banks, + rd_tiles, wr_tiles, num_cgra_rows, num_cgra_columns, + num_tiles, + read_addr, read_data, write_addr, + write_data, noc_recv_load, + send_to_noc_load_request_pkt, send_to_noc_store_pkt): + + CgraPayloadType = NocPktType.get_field_type(kAttrPayload) + DataType = CgraPayloadType.get_field_type(kAttrData) + DataAddrType = CgraPayloadType.get_field_type(kAttrDataAddr) + s.num_banks = num_banks + s.rd_tiles = rd_tiles + s.wr_tiles = wr_tiles + s.recv_raddr = [TestSrcRTL(DataAddrType, read_addr[i]) + for i in range(rd_tiles)] + s.send_rdata = [TestSinkRTL(DataType, read_data[i]) + for i in range(rd_tiles)] + + s.recv_waddr = [TestSrcRTL(DataAddrType, write_addr[i]) + for i in range(wr_tiles)] + s.recv_wdata = [TestSrcRTL(DataType, write_data[i]) + for i in range(wr_tiles)] + + s.recv_from_noc = TestSrcRTL(NocPktType, noc_recv_load) + + s.send_to_noc_load_request_pkt = TestSinkRTL(NocPktType, send_to_noc_load_request_pkt) + s.send_to_noc_store_pkt = TestSinkRTL(NocPktType, send_to_noc_store_pkt) + + s.mem_controller = DataMemControllerRTL(NocPktType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks, + rd_tiles, + wr_tiles, + num_cgra_rows, + num_cgra_columns, + num_tiles, + mem_access_is_combinational = True) + + for i in range(rd_tiles): + s.mem_controller.recv_raddr[i] //= s.recv_raddr[i].send + s.mem_controller.send_rdata[i] //= s.send_rdata[i].recv + + for i in range(wr_tiles): + s.mem_controller.recv_waddr[i] //= s.recv_waddr[i].send + s.mem_controller.recv_wdata[i] //= s.recv_wdata[i].send + + s.mem_controller.recv_from_noc_load_response_pkt //= s.recv_from_noc.send + s.mem_controller.send_to_noc_load_request_pkt //= s.send_to_noc_load_request_pkt.recv + s.mem_controller.send_to_noc_store_pkt //= s.send_to_noc_store_pkt.recv + + s.mem_controller.address_lower //= 0 + s.mem_controller.address_upper //= 31 + + s.cgra_id = 0 + + def done(s): + for i in range(s.rd_tiles): + if not s.recv_raddr[i].done() or not s.send_rdata[i].done(): + return False + + for i in range(s.wr_tiles): + if not s.recv_waddr[i].done() or not s.recv_wdata[i].done(): + return False + + if not s.send_to_noc_load_request_pkt.done() or \ + not s.send_to_noc_store_pkt.done() or \ + not s.recv_from_noc.done(): + return False + + return True + + def line_trace(s): + return s.mem_controller.line_trace() + +def run_sim(test_harness, max_cycles = 40): + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + + ncycles = 0 + print() + print("{}:{}".format(ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_mem_controller(cmdline_opts): + data_nbits = 32 + predicate_nbits = 1 + DataType = mk_data(data_nbits, predicate_nbits) + data_mem_size_global = 64 + data_mem_size_per_bank = 16 + num_banks = 2 + + num_registers_per_reg_bank = 16 + num_cgra_columns = 1 + num_cgra_rows = 1 + num_tiles = 4 + rd_tiles = 4 + wr_tiles = 4 + ctrl_mem_size = 6 + num_tile_inports = 4 + num_tile_outports =4 + num_fu_inports = 4 + num_fu_outports = 2 + + DataAddrType = mk_bits(clog2(data_mem_size_global)) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + + CtrlType = mk_ctrl(num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank) + + CgraPayloadType = mk_cgra_payload(DataType, + DataAddrType, + CtrlType, + CtrlAddrType) + + InterCgraPktType = mk_inter_cgra_pkt(num_cgra_columns, + num_cgra_rows, + num_tiles, + rd_tiles, + CgraPayloadType) + + # test_meta_data = [ + # # addr: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + # [0x00, 0x00, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + # # addr: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + # [0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd7]] + + # Input write requests. + write_addr = [ + [DataAddrType(2), DataAddrType(31), DataAddrType(45)], + [DataAddrType(40), DataAddrType(31)], + [DataAddrType(2), DataAddrType(3)], + [DataAddrType(2)] + ] + write_data = [ + [DataType(0x00a8, 1), DataType(0x00d7, 1), DataType(0xd545, 1)], + [DataType(0xd040, 1), DataType(0x0d70, 1)], + [DataType(0x0a80, 1), DataType(0x00a9, 1)], + [DataType(0xa800, 1)] + ] + # Input read requests. + read_addr = [ + [DataAddrType(42), DataAddrType(2), DataAddrType(31), DataAddrType(3), DataAddrType(3)], + [DataAddrType(30), DataAddrType(17), DataAddrType(31), DataAddrType(2)], + [], + [DataAddrType(2), DataAddrType(2), DataAddrType(2), DataAddrType(25)] + ] + # Expected response. + read_data = [ + [DataType(0xbbbb, 1), DataType(0x00a8, 1), DataType(0x00d7, 1), DataType(0x0000, 0), DataType(0x00a9, 1)], + [DataType(0x0000, 0), DataType(0x0000, 0), DataType(0x0d70, 1), DataType(0xa800, 1)], + [], + [DataType(0x0000, 0), DataType(0x0a80, 1), DataType(0xa800, 1), DataType(0x0000, 0)] + ] + + # Input data. + send_to_noc_load_request_pkt = [ + # src dst src_x src_y dst_x dst_y src_tile dst_tile remote_src_port opq vc + InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_LOAD_REQUEST, data_addr = 42)), + ] + + noc_recv_load = [ + InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_LOAD_RESPONSE, DataType(0xbbbb, 1))) + ] + + # Expected. + send_to_noc_store_pkt = [ + # src dst src_x src_y dst_x dst_y src_tile dst_tile remote_src_port opq vc data_addr + InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, CgraPayloadType(CMD_STORE_REQUEST, DataType(0xd040, 1), 40)), + InterCgraPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_STORE_REQUEST, DataType(0xd545, 1), 45)), + ] + + th = TestHarness(InterCgraPktType, + data_mem_size_global, + data_mem_size_per_bank, + num_banks, + rd_tiles, + wr_tiles, + num_cgra_rows, + num_cgra_columns, + num_tiles, + read_addr, + read_data, + write_addr, + write_data, + noc_recv_load, + send_to_noc_load_request_pkt, + send_to_noc_store_pkt) + + th.elaborate() + th.mem_controller.set_metadata(VerilogTranslationPass.explicit_module_name, + f'DataMemControllerRTL_translation') + th = config_model_with_cmdline_opts( th, cmdline_opts, duts=['mem_controller'] ) + + run_sim(th) + diff --git a/mem/data/test/DataMemRTL_test.py b/mem/data/test/DataMemRTL_test.py index dbef8dad..e8924ee4 100644 --- a/mem/data/test/DataMemRTL_test.py +++ b/mem/data/test/DataMemRTL_test.py @@ -1,83 +1,83 @@ -""" -========================================================================== -DataMemRTL_test.py -========================================================================== -Test cases for DataMemRTL. - -Author : Cheng Tan - Date : Nov 26, 2022 -""" - -from pymtl3 import * -from ..DataMemRTL import DataMemRTL -from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL -from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL -from ....lib.messages import * -from ....lib.opt_type import * - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness( Component ): - - def construct(s, DataType, AddrType, data_mem_size, read_addr, - read_data, write_addr, write_data, preloadData): - - s.read_addr = TestSrcRTL(AddrType, read_addr) - s.read_data = TestSinkRTL(DataType, read_data) - - s.write_addr = TestSrcRTL(AddrType, write_addr) - s.write_data = TestSrcRTL(DataType, write_data) - - - s.dataMem = DataMemRTL(DataType, data_mem_size, - preload_data = preloadData) - - s.dataMem.recv_raddr[0] //= s.read_addr.send - s.dataMem.send_rdata[0] //= s.read_data.recv - s.dataMem.recv_waddr[0] //= s.write_addr.send - s.dataMem.recv_wdata[0] //= s.write_data.send - - def done(s): - return s.read_addr.done() and s.read_data.done() - - def line_trace(s): - return s.dataMem.line_trace() - -def run_sim(test_harness, max_cycles = 10): - test_harness.elaborate() - test_harness.apply(DefaultPassGroup()) - test_harness.sim_reset() - - # Run simulation - ncycles = 0 - print() - print("{}:{}".format( ncycles, test_harness.line_trace())) - while not test_harness.done() and ncycles < max_cycles: - test_harness.sim_tick() - ncycles += 1 - print("{}:{}".format(ncycles, test_harness.line_trace())) - - # Check timeout - assert ncycles < max_cycles - - test_harness.sim_tick() - test_harness.sim_tick() - test_harness.sim_tick() - -def test_const_queue(): - DataType = mk_data(16, 1) - data_mem_size = 20 - AddrType = mk_bits(clog2(data_mem_size)) - preloadData = [DataType(i, 1) for i in range(data_mem_size)] - - read_addr = [AddrType(2), AddrType(3), AddrType(0), AddrType(12)] - read_data = [DataType(2, 1), DataType(3, 1), DataType(0, 1), DataType(13, 1)] - write_addr = [AddrType(12), AddrType(13)] - write_data = [DataType(13, 1), DataType(14, 1)] - - th = TestHarness(DataType, AddrType, data_mem_size, read_addr, - read_data, write_addr, write_data, preloadData) - run_sim(th) - +""" +========================================================================== +DataMemRTL_test.py +========================================================================== +Test cases for DataMemRTL. + +Author : Cheng Tan + Date : Nov 26, 2022 +""" + +from pymtl3 import * +from ..DataMemRTL import DataMemRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.messages import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness( Component ): + + def construct(s, DataType, AddrType, data_mem_size, read_addr, + read_data, write_addr, write_data, preloadData): + + s.read_addr = TestSrcRTL(AddrType, read_addr) + s.read_data = TestSinkRTL(DataType, read_data) + + s.write_addr = TestSrcRTL(AddrType, write_addr) + s.write_data = TestSrcRTL(DataType, write_data) + + + s.dataMem = DataMemRTL(DataType, data_mem_size, + preload_data = preloadData) + + s.dataMem.recv_raddr[0] //= s.read_addr.send + s.dataMem.send_rdata[0] //= s.read_data.recv + s.dataMem.recv_waddr[0] //= s.write_addr.send + s.dataMem.recv_wdata[0] //= s.write_data.send + + def done(s): + return s.read_addr.done() and s.read_data.done() + + def line_trace(s): + return s.dataMem.line_trace() + +def run_sim(test_harness, max_cycles = 10): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + ncycles = 0 + print() + print("{}:{}".format( ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_const_queue(): + DataType = mk_data(16, 1) + data_mem_size = 20 + AddrType = mk_bits(clog2(data_mem_size)) + preloadData = [DataType(i, 1) for i in range(data_mem_size)] + + read_addr = [AddrType(2), AddrType(3), AddrType(0), AddrType(12)] + read_data = [DataType(2, 1), DataType(3, 1), DataType(0, 1), DataType(13, 1)] + write_addr = [AddrType(12), AddrType(13)] + write_data = [DataType(13, 1), DataType(14, 1)] + + th = TestHarness(DataType, AddrType, data_mem_size, read_addr, + read_data, write_addr, write_data, preloadData) + run_sim(th) + diff --git a/mem/data/test/DataMemScalableRTL_test.py b/mem/data/test/DataMemScalableRTL_test.py index 68ae0dd5..b53265a5 100644 --- a/mem/data/test/DataMemScalableRTL_test.py +++ b/mem/data/test/DataMemScalableRTL_test.py @@ -1,85 +1,85 @@ -""" -========================================================================== -DataMemScalableRTL_test.py -========================================================================== -Test cases for DataMemScalableRTL. - -Author : Cheng Tan - Date : Dec 6, 2024 -""" - -from pymtl3 import * -from ..DataMemScalableRTL import DataMemScalableRTL -from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL -from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL -from ....lib.messages import * -from ....lib.opt_type import * - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DataType, AddrType, data_mem_size, read_addr, - read_data, write_addr, write_data, preloadData): - - s.read_addr = TestSrcRTL(AddrType, read_addr) - s.read_data = TestSinkRTL(DataType, read_data) - - s.write_addr = TestSrcRTL(AddrType, write_addr) - s.write_data = TestSrcRTL(DataType, write_data) - - s.dataMem = DataMemScalableRTL(DataType, data_mem_size, - preload_data = preloadData) - - s.dataMem.recv_raddr[0] //= s.read_addr.send - s.dataMem.send_rdata[0] //= s.read_data.recv - s.dataMem.recv_waddr[0] //= s.write_addr.send - s.dataMem.recv_wdata[0] //= s.write_data.send - - def done(s): - return s.read_addr.done() and s.read_data.done() - - def line_trace(s): - return s.dataMem.line_trace() - -def run_sim(test_harness, max_cycles=20): - test_harness.elaborate() - test_harness.apply(DefaultPassGroup()) - test_harness.sim_reset() - - # Run simulation - - ncycles = 0 - print() - print( "{}:{}".format(ncycles, test_harness.line_trace())) - while not test_harness.done() and ncycles < max_cycles: - test_harness.sim_tick() - ncycles += 1 - print("{}:{}".format(ncycles, test_harness.line_trace())) - - # Check timeout - - assert ncycles < max_cycles - - test_harness.sim_tick() - test_harness.sim_tick() - test_harness.sim_tick() - -def test_const_queue(): - DataType = mk_data(16, 1) - data_mem_size = 20 - AddrType = mk_bits(clog2(data_mem_size)) - preloadData = [DataType(i, 1) for i in range(data_mem_size)] - - read_addr = [AddrType(2), AddrType(3), AddrType(0), AddrType(12)] - read_data = [DataType(2, 1), DataType(3, 1), DataType(0, 1), DataType(13, 1)] - # read_data = [DataType(0, 0), DataType(0, 0), DataType(0, 0), DataType(13, 1)] - write_addr = [AddrType(12), AddrType(13)] - write_data = [DataType(13, 1), DataType(14, 1)] - - th = TestHarness(DataType, AddrType, data_mem_size, read_addr, - read_data, write_addr, write_data, preloadData) - run_sim(th) - +""" +========================================================================== +DataMemScalableRTL_test.py +========================================================================== +Test cases for DataMemScalableRTL. + +Author : Cheng Tan + Date : Dec 6, 2024 +""" + +from pymtl3 import * +from ..DataMemScalableRTL import DataMemScalableRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.messages import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, DataType, AddrType, data_mem_size, read_addr, + read_data, write_addr, write_data, preloadData): + + s.read_addr = TestSrcRTL(AddrType, read_addr) + s.read_data = TestSinkRTL(DataType, read_data) + + s.write_addr = TestSrcRTL(AddrType, write_addr) + s.write_data = TestSrcRTL(DataType, write_data) + + s.dataMem = DataMemScalableRTL(DataType, data_mem_size, + preload_data = preloadData) + + s.dataMem.recv_raddr[0] //= s.read_addr.send + s.dataMem.send_rdata[0] //= s.read_data.recv + s.dataMem.recv_waddr[0] //= s.write_addr.send + s.dataMem.recv_wdata[0] //= s.write_data.send + + def done(s): + return s.read_addr.done() and s.read_data.done() + + def line_trace(s): + return s.dataMem.line_trace() + +def run_sim(test_harness, max_cycles=20): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + + ncycles = 0 + print() + print( "{}:{}".format(ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_const_queue(): + DataType = mk_data(16, 1) + data_mem_size = 20 + AddrType = mk_bits(clog2(data_mem_size)) + preloadData = [DataType(i, 1) for i in range(data_mem_size)] + + read_addr = [AddrType(2), AddrType(3), AddrType(0), AddrType(12)] + read_data = [DataType(2, 1), DataType(3, 1), DataType(0, 1), DataType(13, 1)] + # read_data = [DataType(0, 0), DataType(0, 0), DataType(0, 0), DataType(13, 1)] + write_addr = [AddrType(12), AddrType(13)] + write_data = [DataType(13, 1), DataType(14, 1)] + + th = TestHarness(DataType, AddrType, data_mem_size, read_addr, + read_data, write_addr, write_data, preloadData) + run_sim(th) + diff --git a/mem/data/test/DataMemWrapperRTL_test.py b/mem/data/test/DataMemWrapperRTL_test.py index 88c0eaf6..d0647316 100644 --- a/mem/data/test/DataMemWrapperRTL_test.py +++ b/mem/data/test/DataMemWrapperRTL_test.py @@ -1,111 +1,111 @@ -""" -========================================================================== -DataMemWrapperRTL_test.py -========================================================================== -Test cases for DataMemWrapperRTL. - -Author : Cheng Tan - Date : Aug 28, 2025 -""" - -from pymtl3 import * -from ..DataMemWrapperRTL import DataMemWrapperRTL -from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL -from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL -from ....lib.messages import * -from ....lib.opt_type import * - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DataType, MemReadType, MemWriteType, MemResponseType, - global_data_mem_size, per_bank_data_mem_size, - mem_rd_request, mem_wr_request, mem_response): - - s.mem_rd_request = TestSrcRTL(MemReadType, mem_rd_request) - s.mem_wr_request = TestSrcRTL(MemWriteType, mem_wr_request) - - s.mem_response = TestSinkRTL(MemResponseType, mem_response) - - s.data_mem_wrapper = DataMemWrapperRTL(DataType, - MemReadType, - MemWriteType, - MemResponseType, - global_data_mem_size, - per_bank_data_mem_size, - False) - - s.data_mem_wrapper.recv_rd //= s.mem_rd_request.send - s.data_mem_wrapper.recv_wr //= s.mem_wr_request.send - s.data_mem_wrapper.send //= s.mem_response.recv - - def done(s): - return s.mem_rd_request.done() and s.mem_wr_request.done() and s.mem_response.done() - - def line_trace(s): - return s.data_mem_wrapper.line_trace() - -def run_sim(test_harness, max_cycles = 20): - test_harness.elaborate() - test_harness.apply(DefaultPassGroup()) - test_harness.sim_reset() - - # Run simulation - ncycles = 0 - print() - print("{}:{}".format( ncycles, test_harness.line_trace())) - while not test_harness.done() and ncycles < max_cycles: - test_harness.sim_tick() - ncycles += 1 - print("{}:{}".format(ncycles, test_harness.line_trace())) - - # Check timeout - assert ncycles < max_cycles - - test_harness.sim_tick() - test_harness.sim_tick() - test_harness.sim_tick() - -def test_const_queue(): - DataType = mk_data(16, 1) - global_data_mem_size = 32 - per_bank_data_mem_size = 8 - rd_tiles = 4 - wr_tiles = 4 - rd_banks = 4 - wr_banks = 4 - num_cgras = 4 - num_tiles = 4 - - MemReadType = mk_mem_access_pkt(DataType, rd_tiles, rd_banks, global_data_mem_size, num_cgras, num_tiles) - MemWriteType = mk_mem_access_pkt(DataType, wr_tiles, wr_banks, global_data_mem_size, num_cgras, num_tiles) - # Reverses the source and destination for response packet. - MemResponseType = mk_mem_access_pkt(DataType, rd_banks, rd_tiles, global_data_mem_size, num_cgras, num_tiles) - - # dst addr data - mem_wr_request = [MemWriteType (0, 0, 2, DataType(0xc, 1), 0, 0, 0), - MemWriteType (0, 0, 4, DataType(0xb, 1), 0, 0, 0), - MemWriteType (0, 0, 6, DataType(0xa, 1), 0, 0, 0) - ] - mem_rd_request = [MemReadType (0, 1, 6, DataType(0x0, 0), 0, 0, 0), - MemReadType (0, 2, 6, DataType(0x0, 0), 0, 0, 0), - MemReadType (0, 3, 6, DataType(0x0, 0), 0, 0, 0), - MemReadType (0, 1, 6, DataType(0x0, 0), 0, 0, 0), - MemReadType (0, 2, 4, DataType(0x0, 0), 0, 0, 0), - MemReadType (0, 3, 2, DataType(0x0, 0), 0, 0, 3) - ] - mem_response = [MemResponseType(1, 0, 6, DataType(0x0, 0), 0, 0, 0), - MemResponseType(2, 0, 6, DataType(0x0, 0), 0, 0, 0), - MemResponseType(3, 0, 6, DataType(0x0, 0), 0, 0, 0), - MemResponseType(1, 0, 6, DataType(0xa, 1), 0, 0, 0), - MemResponseType(2, 0, 4, DataType(0xb, 1), 0, 0, 0), - MemResponseType(3, 0, 2, DataType(0xc, 1), 0, 0, 3) - ] - - th = TestHarness(DataType, MemReadType, MemWriteType, MemResponseType, - global_data_mem_size, per_bank_data_mem_size, - mem_rd_request, mem_wr_request, mem_response) - run_sim(th) +""" +========================================================================== +DataMemWrapperRTL_test.py +========================================================================== +Test cases for DataMemWrapperRTL. + +Author : Cheng Tan + Date : Aug 28, 2025 +""" + +from pymtl3 import * +from ..DataMemWrapperRTL import DataMemWrapperRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.messages import * +from ....lib.opt_type import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, DataType, MemReadType, MemWriteType, MemResponseType, + global_data_mem_size, per_bank_data_mem_size, + mem_rd_request, mem_wr_request, mem_response): + + s.mem_rd_request = TestSrcRTL(MemReadType, mem_rd_request) + s.mem_wr_request = TestSrcRTL(MemWriteType, mem_wr_request) + + s.mem_response = TestSinkRTL(MemResponseType, mem_response) + + s.data_mem_wrapper = DataMemWrapperRTL(DataType, + MemReadType, + MemWriteType, + MemResponseType, + global_data_mem_size, + per_bank_data_mem_size, + False) + + s.data_mem_wrapper.recv_rd //= s.mem_rd_request.send + s.data_mem_wrapper.recv_wr //= s.mem_wr_request.send + s.data_mem_wrapper.send //= s.mem_response.recv + + def done(s): + return s.mem_rd_request.done() and s.mem_wr_request.done() and s.mem_response.done() + + def line_trace(s): + return s.data_mem_wrapper.line_trace() + +def run_sim(test_harness, max_cycles = 20): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + ncycles = 0 + print() + print("{}:{}".format( ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_const_queue(): + DataType = mk_data(16, 1) + global_data_mem_size = 32 + per_bank_data_mem_size = 8 + rd_tiles = 4 + wr_tiles = 4 + rd_banks = 4 + wr_banks = 4 + num_cgras = 4 + num_tiles = 4 + + MemReadType = mk_mem_access_pkt(DataType, rd_tiles, rd_banks, global_data_mem_size, num_cgras, num_tiles) + MemWriteType = mk_mem_access_pkt(DataType, wr_tiles, wr_banks, global_data_mem_size, num_cgras, num_tiles) + # Reverses the source and destination for response packet. + MemResponseType = mk_mem_access_pkt(DataType, rd_banks, rd_tiles, global_data_mem_size, num_cgras, num_tiles) + + # dst addr data + mem_wr_request = [MemWriteType (0, 0, 2, DataType(0xc, 1), 0, 0, 0), + MemWriteType (0, 0, 4, DataType(0xb, 1), 0, 0, 0), + MemWriteType (0, 0, 6, DataType(0xa, 1), 0, 0, 0) + ] + mem_rd_request = [MemReadType (0, 1, 6, DataType(0x0, 0), 0, 0, 0), + MemReadType (0, 2, 6, DataType(0x0, 0), 0, 0, 0), + MemReadType (0, 3, 6, DataType(0x0, 0), 0, 0, 0), + MemReadType (0, 1, 6, DataType(0x0, 0), 0, 0, 0), + MemReadType (0, 2, 4, DataType(0x0, 0), 0, 0, 0), + MemReadType (0, 3, 2, DataType(0x0, 0), 0, 0, 3) + ] + mem_response = [MemResponseType(1, 0, 6, DataType(0x0, 0), 0, 0, 0), + MemResponseType(2, 0, 6, DataType(0x0, 0), 0, 0, 0), + MemResponseType(3, 0, 6, DataType(0x0, 0), 0, 0, 0), + MemResponseType(1, 0, 6, DataType(0xa, 1), 0, 0, 0), + MemResponseType(2, 0, 4, DataType(0xb, 1), 0, 0, 0), + MemResponseType(3, 0, 2, DataType(0xc, 1), 0, 0, 3) + ] + + th = TestHarness(DataType, MemReadType, MemWriteType, MemResponseType, + global_data_mem_size, per_bank_data_mem_size, + mem_rd_request, mem_wr_request, mem_response) + run_sim(th) diff --git a/mem/register_cluster/RegisterBankRTL.py b/mem/register_cluster/RegisterBankRTL.py index 15ef468e..d550df2f 100644 --- a/mem/register_cluster/RegisterBankRTL.py +++ b/mem/register_cluster/RegisterBankRTL.py @@ -1,85 +1,85 @@ -""" -========================================================================== -RegisterBankRTL.py -========================================================================== -Register bank between routing crossbar and FU in CGRA tile. It can be -initialized/modeled/parameterized as multiple instances. Each one contains -multiple registers that can be indexed/picked for read/write. Each has -one write port (from routing crossbar, fu crossbar, or const) and one read -port (towards FU). - -Author : Cheng Tan - Date : Feb 6, 2025 -""" - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ...lib.opt_type import * -from ...lib.util.common import * - -from ...lib.util.common import ( - READ_TOWARDS_NOTHING, - READ_TOWARDS_FU, - READ_TOWARDS_ROUTING_XBAR, - READ_TOWARDS_BOTH, -) - -class RegisterBankRTL(Component): - - def construct(s, DataType, CtrlType, reg_bank_id, num_registers = 4): - - # Constant - AddrType = mk_bits(clog2(num_registers)) - s.reg_bank_id = reg_bank_id - - # Interface - s.inport_opt = InPort(CtrlType) - s.send_data = SendIfcRTL(DataType) - # InPort is enough to expose the data. Recv ifc would complicate - # the design and handshake. - s.inport_wdata = [InPort(DataType) for _ in range(3)] - s.inport_valid = [InPort(mk_bits(1)) for _ in range(3)] - - # Component - s.reg_file = RegisterFile(DataType, num_registers, rd_ports = 1, - wr_ports = 1) - - @update - def access_registers(): - # Initializes signals. - s.reg_file.raddr[0] @= AddrType() - s.send_data.msg @= DataType() - s.reg_file.waddr[0] @= AddrType() - s.reg_file.wdata[0] @= DataType() - s.reg_file.wen[0] @= 0 - - read_towards = s.inport_opt.read_reg_towards[reg_bank_id] - # Reads from register if towards FU (1), routing_xbar (2), or both (3) - if read_towards > 0: - s.reg_file.raddr[0] @= s.inport_opt.read_reg_idx[reg_bank_id] - s.send_data.msg @= s.reg_file.rdata[0] - - write_reg_from = s.inport_opt.write_reg_from[reg_bank_id] - if ~s.reset & (write_reg_from > 0): - if s.inport_valid[write_reg_from - 1]: - s.reg_file.waddr[0] @= s.inport_opt.write_reg_idx[reg_bank_id] - s.reg_file.wdata[0] @= s.inport_wdata[write_reg_from - 1] - s.reg_file.wen[0] @= 1 - - @update - def update_send_val(): - s.send_data.val @= 0 - read_towards = s.inport_opt.read_reg_towards[reg_bank_id] - # Sends if towards FU or both (i.e. read_towards > 0) - if ~s.reset & (read_towards > 0): - s.send_data.val @= 1 - - def line_trace(s): - inport_opt_str = "inport_opt: " + str(s.inport_opt) - inport_wdata_str = "inport_wdata: " + str(s.inport_wdata) - content_str = "content: " + "|".join([str(data) for data in s.reg_file.regs]) - send_data_str = "send_data: " + str(s.send_data.msg) - return f'reg_bank_id: {s.reg_bank_id} || {inport_wdata_str} || {inport_opt_str} || [{content_str}] || {send_data_str}' - +""" +========================================================================== +RegisterBankRTL.py +========================================================================== +Register bank between routing crossbar and FU in CGRA tile. It can be +initialized/modeled/parameterized as multiple instances. Each one contains +multiple registers that can be indexed/picked for read/write. Each has +one write port (from routing crossbar, fu crossbar, or const) and one read +port (towards FU). + +Author : Cheng Tan + Date : Feb 6, 2025 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ...lib.opt_type import * +from ...lib.util.common import * + +from ...lib.util.common import ( + READ_TOWARDS_NOTHING, + READ_TOWARDS_FU, + READ_TOWARDS_ROUTING_XBAR, + READ_TOWARDS_BOTH, +) + +class RegisterBankRTL(Component): + + def construct(s, DataType, CtrlType, reg_bank_id, num_registers = 4): + + # Constant + AddrType = mk_bits(clog2(num_registers)) + s.reg_bank_id = reg_bank_id + + # Interface + s.inport_opt = InPort(CtrlType) + s.send_data = SendIfcRTL(DataType) + # InPort is enough to expose the data. Recv ifc would complicate + # the design and handshake. + s.inport_wdata = [InPort(DataType) for _ in range(3)] + s.inport_valid = [InPort(mk_bits(1)) for _ in range(3)] + + # Component + s.reg_file = RegisterFile(DataType, num_registers, rd_ports = 1, + wr_ports = 1) + + @update + def access_registers(): + # Initializes signals. + s.reg_file.raddr[0] @= AddrType() + s.send_data.msg @= DataType() + s.reg_file.waddr[0] @= AddrType() + s.reg_file.wdata[0] @= DataType() + s.reg_file.wen[0] @= 0 + + read_towards = s.inport_opt.read_reg_towards[reg_bank_id] + # Reads from register if towards FU (1), routing_xbar (2), or both (3) + if read_towards > 0: + s.reg_file.raddr[0] @= s.inport_opt.read_reg_idx[reg_bank_id] + s.send_data.msg @= s.reg_file.rdata[0] + + write_reg_from = s.inport_opt.write_reg_from[reg_bank_id] + if ~s.reset & (write_reg_from > 0): + if s.inport_valid[write_reg_from - 1]: + s.reg_file.waddr[0] @= s.inport_opt.write_reg_idx[reg_bank_id] + s.reg_file.wdata[0] @= s.inport_wdata[write_reg_from - 1] + s.reg_file.wen[0] @= 1 + + @update + def update_send_val(): + s.send_data.val @= 0 + read_towards = s.inport_opt.read_reg_towards[reg_bank_id] + # Sends if towards FU or both (i.e. read_towards > 0) + if ~s.reset & (read_towards > 0): + s.send_data.val @= 1 + + def line_trace(s): + inport_opt_str = "inport_opt: " + str(s.inport_opt) + inport_wdata_str = "inport_wdata: " + str(s.inport_wdata) + content_str = "content: " + "|".join([str(data) for data in s.reg_file.regs]) + send_data_str = "send_data: " + str(s.send_data.msg) + return f'reg_bank_id: {s.reg_bank_id} || {inport_wdata_str} || {inport_opt_str} || [{content_str}] || {send_data_str}' + diff --git a/mem/register_cluster/RegisterClusterRTL.py b/mem/register_cluster/RegisterClusterRTL.py index 128ad085..f6a7ad25 100644 --- a/mem/register_cluster/RegisterClusterRTL.py +++ b/mem/register_cluster/RegisterClusterRTL.py @@ -1,106 +1,106 @@ -""" -========================================================================== -RegisterClusterRTL.py -========================================================================== -Register cluster contains multiple register banks. - -Author : Cheng Tan - Date : Feb 7, 2025 -""" - -from pymtl3 import * -from pymtl3.stdlib.primitive import RegisterFile -from .RegisterBankRTL import RegisterBankRTL -from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ...lib.opt_type import * -from ...lib.util.common import * - -# Canonical definitions live in common.py; keep local aliases to minimize churn. -from ...lib.util.common import ( - READ_TOWARDS_NOTHING, - READ_TOWARDS_FU, - READ_TOWARDS_ROUTING_XBAR, - READ_TOWARDS_BOTH, -) - -kReadTowardsNothing = READ_TOWARDS_NOTHING -kReadTowardsFu = READ_TOWARDS_FU -kReadTowardsRoutingXbar = READ_TOWARDS_ROUTING_XBAR -kReadTowardsBoth = READ_TOWARDS_BOTH - -class RegisterClusterRTL(Component): - - def construct(s, DataType, CtrlType, num_reg_banks, - num_registers_per_reg_bank = 4): - - # Interface - s.inport_opt = InPort(CtrlType) - s.recv_data_from_routing_crossbar = [RecvIfcRTL(DataType) for _ in range(num_reg_banks)] - s.recv_data_from_fu_crossbar = [RecvIfcRTL(DataType) for _ in range(num_reg_banks)] - s.recv_data_from_const = [RecvIfcRTL(DataType) for _ in range(num_reg_banks)] - s.send_data_to_fu = [SendIfcRTL(DataType) for _ in range(num_reg_banks)] - # Direct output from register banks towards routing crossbar (bypasses FU). - s.send_data_to_routing_crossbar = [SendIfcRTL(DataType) for _ in range(num_reg_banks)] - - # Component - s.reg_bank = [RegisterBankRTL(DataType, CtrlType, i, num_registers_per_reg_bank) - for i in range(num_reg_banks)] - - # Connections. - for i in range(num_reg_banks): - s.reg_bank[i].inport_opt //= s.inport_opt - s.reg_bank[i].inport_wdata[PORT_INDEX_ROUTING_CROSSBAR] //= s.recv_data_from_routing_crossbar[i].msg - s.reg_bank[i].inport_wdata[PORT_INDEX_FU_CROSSBAR] //= s.recv_data_from_fu_crossbar[i].msg - s.reg_bank[i].inport_wdata[PORT_INDEX_CONST] //= s.recv_data_from_const[i].msg - s.reg_bank[i].inport_valid[PORT_INDEX_ROUTING_CROSSBAR] //= s.recv_data_from_routing_crossbar[i].val - s.reg_bank[i].inport_valid[PORT_INDEX_FU_CROSSBAR] //= s.recv_data_from_fu_crossbar[i].val - s.reg_bank[i].inport_valid[PORT_INDEX_CONST] //= s.recv_data_from_const[i].val - - @update - def update_msgs_signals(): - # Initializes signals. - for i in range(num_reg_banks): - s.send_data_to_fu[i].msg @= DataType() - s.recv_data_from_routing_crossbar[i].rdy @= 0 - s.recv_data_from_fu_crossbar[i].rdy @= 0 - s.recv_data_from_const[i].rdy @= 0 - s.send_data_to_fu[i].val @= 0 - s.send_data_to_routing_crossbar[i].msg @= DataType() - s.send_data_to_routing_crossbar[i].val @= 0 - - for i in range(num_reg_banks): - read_towards = s.inport_opt.read_reg_towards[i] - # Checks if data should go towards FU (1 or 3) - reg_towards_fu = (read_towards == kReadTowardsFu) | (read_towards == kReadTowardsBoth) - # Checks if data should go towards routing_xbar (2 or 3) - reg_towards_routing_xbar = (read_towards == kReadTowardsRoutingXbar) | (read_towards == kReadTowardsBoth) - - # Data from register bank has priority over routing crossbar data for FU path. - # Note: reg_bank[i].send_data.val is set based on read_reg_towards in RegisterBankRTL. - if s.reg_bank[i].send_data.val & reg_towards_fu: - s.send_data_to_fu[i].msg @= \ - s.reg_bank[i].send_data.msg - elif s.recv_data_from_routing_crossbar[i].val: - s.send_data_to_fu[i].msg @= \ - s.recv_data_from_routing_crossbar[i].msg - - s.send_data_to_fu[i].val @= \ - s.recv_data_from_routing_crossbar[i].val | \ - (s.reg_bank[i].send_data.val & reg_towards_fu) - s.reg_bank[i].send_data.rdy @= s.send_data_to_fu[i].rdy - - s.recv_data_from_routing_crossbar[i].rdy @= ((s.inport_opt.write_reg_from[i] == PORT_ROUTING_CROSSBAR) \ - & (s.inport_opt.operation == OPT_NAH)) | s.send_data_to_fu[i].rdy - s.recv_data_from_fu_crossbar[i].rdy @= 1 - s.recv_data_from_const[i].rdy @= 1 - - # Drive the direct reg -> routing_crossbar path. - if reg_towards_routing_xbar: - s.send_data_to_routing_crossbar[i].msg @= s.reg_bank[i].send_data.msg - s.send_data_to_routing_crossbar[i].val @= 1 - - def line_trace(s): - reg_bank_str = "reg_banks: " + "|".join([reg_bank.line_trace() for reg_bank in s.reg_bank]) - return f'{reg_bank_str}' - +""" +========================================================================== +RegisterClusterRTL.py +========================================================================== +Register cluster contains multiple register banks. + +Author : Cheng Tan + Date : Feb 7, 2025 +""" + +from pymtl3 import * +from pymtl3.stdlib.primitive import RegisterFile +from .RegisterBankRTL import RegisterBankRTL +from ...lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ...lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ...lib.opt_type import * +from ...lib.util.common import * + +# Canonical definitions live in common.py; keep local aliases to minimize churn. +from ...lib.util.common import ( + READ_TOWARDS_NOTHING, + READ_TOWARDS_FU, + READ_TOWARDS_ROUTING_XBAR, + READ_TOWARDS_BOTH, +) + +kReadTowardsNothing = READ_TOWARDS_NOTHING +kReadTowardsFu = READ_TOWARDS_FU +kReadTowardsRoutingXbar = READ_TOWARDS_ROUTING_XBAR +kReadTowardsBoth = READ_TOWARDS_BOTH + +class RegisterClusterRTL(Component): + + def construct(s, DataType, CtrlType, num_reg_banks, + num_registers_per_reg_bank = 4): + + # Interface + s.inport_opt = InPort(CtrlType) + s.recv_data_from_routing_crossbar = [RecvIfcRTL(DataType) for _ in range(num_reg_banks)] + s.recv_data_from_fu_crossbar = [RecvIfcRTL(DataType) for _ in range(num_reg_banks)] + s.recv_data_from_const = [RecvIfcRTL(DataType) for _ in range(num_reg_banks)] + s.send_data_to_fu = [SendIfcRTL(DataType) for _ in range(num_reg_banks)] + # Direct output from register banks towards routing crossbar (bypasses FU). + s.send_data_to_routing_crossbar = [SendIfcRTL(DataType) for _ in range(num_reg_banks)] + + # Component + s.reg_bank = [RegisterBankRTL(DataType, CtrlType, i, num_registers_per_reg_bank) + for i in range(num_reg_banks)] + + # Connections. + for i in range(num_reg_banks): + s.reg_bank[i].inport_opt //= s.inport_opt + s.reg_bank[i].inport_wdata[PORT_INDEX_ROUTING_CROSSBAR] //= s.recv_data_from_routing_crossbar[i].msg + s.reg_bank[i].inport_wdata[PORT_INDEX_FU_CROSSBAR] //= s.recv_data_from_fu_crossbar[i].msg + s.reg_bank[i].inport_wdata[PORT_INDEX_CONST] //= s.recv_data_from_const[i].msg + s.reg_bank[i].inport_valid[PORT_INDEX_ROUTING_CROSSBAR] //= s.recv_data_from_routing_crossbar[i].val + s.reg_bank[i].inport_valid[PORT_INDEX_FU_CROSSBAR] //= s.recv_data_from_fu_crossbar[i].val + s.reg_bank[i].inport_valid[PORT_INDEX_CONST] //= s.recv_data_from_const[i].val + + @update + def update_msgs_signals(): + # Initializes signals. + for i in range(num_reg_banks): + s.send_data_to_fu[i].msg @= DataType() + s.recv_data_from_routing_crossbar[i].rdy @= 0 + s.recv_data_from_fu_crossbar[i].rdy @= 0 + s.recv_data_from_const[i].rdy @= 0 + s.send_data_to_fu[i].val @= 0 + s.send_data_to_routing_crossbar[i].msg @= DataType() + s.send_data_to_routing_crossbar[i].val @= 0 + + for i in range(num_reg_banks): + read_towards = s.inport_opt.read_reg_towards[i] + # Checks if data should go towards FU (1 or 3) + reg_towards_fu = (read_towards == kReadTowardsFu) | (read_towards == kReadTowardsBoth) + # Checks if data should go towards routing_xbar (2 or 3) + reg_towards_routing_xbar = (read_towards == kReadTowardsRoutingXbar) | (read_towards == kReadTowardsBoth) + + # Data from register bank has priority over routing crossbar data for FU path. + # Note: reg_bank[i].send_data.val is set based on read_reg_towards in RegisterBankRTL. + if s.reg_bank[i].send_data.val & reg_towards_fu: + s.send_data_to_fu[i].msg @= \ + s.reg_bank[i].send_data.msg + elif s.recv_data_from_routing_crossbar[i].val: + s.send_data_to_fu[i].msg @= \ + s.recv_data_from_routing_crossbar[i].msg + + s.send_data_to_fu[i].val @= \ + s.recv_data_from_routing_crossbar[i].val | \ + (s.reg_bank[i].send_data.val & reg_towards_fu) + s.reg_bank[i].send_data.rdy @= s.send_data_to_fu[i].rdy + + s.recv_data_from_routing_crossbar[i].rdy @= ((s.inport_opt.write_reg_from[i] == PORT_ROUTING_CROSSBAR) \ + & (s.inport_opt.operation == OPT_NAH)) | s.send_data_to_fu[i].rdy + s.recv_data_from_fu_crossbar[i].rdy @= 1 + s.recv_data_from_const[i].rdy @= 1 + + # Drive the direct reg -> routing_crossbar path. + if reg_towards_routing_xbar: + s.send_data_to_routing_crossbar[i].msg @= s.reg_bank[i].send_data.msg + s.send_data_to_routing_crossbar[i].val @= 1 + + def line_trace(s): + reg_bank_str = "reg_banks: " + "|".join([reg_bank.line_trace() for reg_bank in s.reg_bank]) + return f'{reg_bank_str}' + diff --git a/mem/register_cluster/test/RegisterBankRTL_test.py b/mem/register_cluster/test/RegisterBankRTL_test.py index 65f24516..3f9949c5 100644 --- a/mem/register_cluster/test/RegisterBankRTL_test.py +++ b/mem/register_cluster/test/RegisterBankRTL_test.py @@ -1,106 +1,106 @@ -""" -========================================================================== -RegisterBankRTL_test.py -========================================================================== -Test cases for RegisterBankRTL. - -Author : Cheng Tan - Date : Feb 7, 2025 -""" - -from pymtl3 import * -from ..RegisterBankRTL import RegisterBankRTL -from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL -from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL -from ....lib.messages import * -from ....lib.opt_type import * -from ....lib.util.common import * - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DataType, ConfigType, reg_bank_id, num_registers, - src_opt, src_msgs, sink_msgs): - - s.sink = TestSinkRTL(DataType, sink_msgs) - - s.reg_bank = RegisterBankRTL(DataType, ConfigType, reg_bank_id, - num_registers) - - s.reg_bank.inport_wdata[PORT_INDEX_ROUTING_CROSSBAR] //= src_msgs[PORT_INDEX_ROUTING_CROSSBAR] - s.reg_bank.inport_wdata[PORT_INDEX_FU_CROSSBAR] //= src_msgs[PORT_INDEX_FU_CROSSBAR] - s.reg_bank.inport_wdata[PORT_INDEX_CONST] //= src_msgs[PORT_INDEX_CONST] - s.reg_bank.inport_valid[PORT_INDEX_ROUTING_CROSSBAR] //= 1 - s.reg_bank.inport_valid[PORT_INDEX_FU_CROSSBAR] //= 1 - s.reg_bank.inport_valid[PORT_INDEX_CONST] //= 1 - s.reg_bank.inport_opt //= src_opt - s.reg_bank.send_data //= s.sink.recv - - def done(s): - return s.sink.done() - - def line_trace(s): - return s.reg_bank.line_trace() - -def run_sim(test_harness, max_cycles = 10): - test_harness.elaborate() - test_harness.apply(DefaultPassGroup()) - test_harness.sim_reset() - - # Run simulation - ncycles = 0 - print() - print("{}:{}".format( ncycles, test_harness.line_trace())) - while not test_harness.done() and ncycles < max_cycles: - test_harness.sim_tick() - ncycles += 1 - print("{}:{}".format(ncycles, test_harness.line_trace())) - - # Check timeout - assert ncycles < max_cycles - - test_harness.sim_tick() - test_harness.sim_tick() - test_harness.sim_tick() - -def test_reg_bank(): - DataType = mk_data(16, 1) - data_mem_size = 20 - AddrType = mk_bits(clog2(data_mem_size)) - preloadData = [DataType(i, 1) for i in range(data_mem_size)] - - num_ctrl_operations = 64 - num_fu_inports = 4 - num_fu_outports = 2 - num_tile_inports = 4 - num_tile_outports = 4 - num_registers_per_reg_bank = 16 - reg_bank_id = 1 - - ConfigType = mk_ctrl(num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports, - num_registers_per_reg_bank) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - - src_opt = ConfigType(OPT_ADD_CONST, pickRegister) - src_opt.write_reg_from[reg_bank_id] = b2(2) - # Writes data into reg[15]. - src_opt.write_reg_idx[reg_bank_id] = b4(15) - # read_reg_towards: 0=nothing, 1=FU, 2=routing_xbar, 3=both - src_opt.read_reg_towards[reg_bank_id] = b2(1) - # Reads data from reg[15]. - src_opt.read_reg_idx[reg_bank_id] = b4(15) # read after write - - write_data = [DataType(10, 1), DataType(11, 1), DataType(12, 1)] - expected_read_data = [DataType(0, 0), DataType(11, 1), DataType(11, 1)] - - th = TestHarness(DataType, ConfigType, reg_bank_id, num_registers_per_reg_bank, - src_opt, write_data, expected_read_data) - run_sim(th) - +""" +========================================================================== +RegisterBankRTL_test.py +========================================================================== +Test cases for RegisterBankRTL. + +Author : Cheng Tan + Date : Feb 7, 2025 +""" + +from pymtl3 import * +from ..RegisterBankRTL import RegisterBankRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.messages import * +from ....lib.opt_type import * +from ....lib.util.common import * + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, DataType, ConfigType, reg_bank_id, num_registers, + src_opt, src_msgs, sink_msgs): + + s.sink = TestSinkRTL(DataType, sink_msgs) + + s.reg_bank = RegisterBankRTL(DataType, ConfigType, reg_bank_id, + num_registers) + + s.reg_bank.inport_wdata[PORT_INDEX_ROUTING_CROSSBAR] //= src_msgs[PORT_INDEX_ROUTING_CROSSBAR] + s.reg_bank.inport_wdata[PORT_INDEX_FU_CROSSBAR] //= src_msgs[PORT_INDEX_FU_CROSSBAR] + s.reg_bank.inport_wdata[PORT_INDEX_CONST] //= src_msgs[PORT_INDEX_CONST] + s.reg_bank.inport_valid[PORT_INDEX_ROUTING_CROSSBAR] //= 1 + s.reg_bank.inport_valid[PORT_INDEX_FU_CROSSBAR] //= 1 + s.reg_bank.inport_valid[PORT_INDEX_CONST] //= 1 + s.reg_bank.inport_opt //= src_opt + s.reg_bank.send_data //= s.sink.recv + + def done(s): + return s.sink.done() + + def line_trace(s): + return s.reg_bank.line_trace() + +def run_sim(test_harness, max_cycles = 10): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + ncycles = 0 + print() + print("{}:{}".format( ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_reg_bank(): + DataType = mk_data(16, 1) + data_mem_size = 20 + AddrType = mk_bits(clog2(data_mem_size)) + preloadData = [DataType(i, 1) for i in range(data_mem_size)] + + num_ctrl_operations = 64 + num_fu_inports = 4 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + num_registers_per_reg_bank = 16 + reg_bank_id = 1 + + ConfigType = mk_ctrl(num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + + src_opt = ConfigType(OPT_ADD_CONST, pickRegister) + src_opt.write_reg_from[reg_bank_id] = b2(2) + # Writes data into reg[15]. + src_opt.write_reg_idx[reg_bank_id] = b4(15) + # read_reg_towards: 0=nothing, 1=FU, 2=routing_xbar, 3=both + src_opt.read_reg_towards[reg_bank_id] = b2(1) + # Reads data from reg[15]. + src_opt.read_reg_idx[reg_bank_id] = b4(15) # read after write + + write_data = [DataType(10, 1), DataType(11, 1), DataType(12, 1)] + expected_read_data = [DataType(0, 0), DataType(11, 1), DataType(11, 1)] + + th = TestHarness(DataType, ConfigType, reg_bank_id, num_registers_per_reg_bank, + src_opt, write_data, expected_read_data) + run_sim(th) + diff --git a/mem/register_cluster/test/RegisterClusterRTL_test.py b/mem/register_cluster/test/RegisterClusterRTL_test.py index 34f7080e..d7fbb4a3 100644 --- a/mem/register_cluster/test/RegisterClusterRTL_test.py +++ b/mem/register_cluster/test/RegisterClusterRTL_test.py @@ -1,376 +1,376 @@ -""" -========================================================================== -RegisterClusterRTL_test.py -========================================================================== -Test cases for RegisterClusterRTL. - -Author : Cheng Tan - Date : Feb 7, 2025 -""" - -from pymtl3 import * -from ..RegisterClusterRTL import RegisterClusterRTL -from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL -from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL -from ....lib.messages import * -from ....lib.opt_type import * -from ....lib.util.common import ( - READ_TOWARDS_NOTHING, - READ_TOWARDS_FU, - READ_TOWARDS_ROUTING_XBAR, - READ_TOWARDS_BOTH, - PORT_INDEX_ROUTING_CROSSBAR, - PORT_INDEX_FU_CROSSBAR, - PORT_INDEX_CONST, - PORT_ROUTING_CROSSBAR, - PORT_FU_CROSSBAR, - PORT_CONST, -) - -# Local bitwidth helpers (avoid relying on optional pymtl3.stdlib modules) -Bits2 = mk_bits(2) -Bits4 = mk_bits(4) - -def b2(v): - return Bits2(v) - -def b4(v): - return Bits4(v) - -#------------------------------------------------------------------------- -# Test harness -#------------------------------------------------------------------------- - -class TestHarness(Component): - - def construct(s, DataType, ConfigType, num_reg_banks, num_registers, - src_opt, src_msgs_routing_xbar, src_msgs_fu_xbar, - src_msgs_const, sink_msgs): - - s.num_reg_banks = num_reg_banks - s.src_opt = Wire(ConfigType) - - s.src_routing_xbar = [TestSrcRTL(DataType, src_msgs_routing_xbar[i]) - for i in range(num_reg_banks)] - s.src_fu_xbar = [TestSrcRTL(DataType, src_msgs_fu_xbar[i]) - for i in range(num_reg_banks)] - s.src_const = [TestSrcRTL(DataType, src_msgs_const[i]) - for i in range(num_reg_banks)] - - s.sink = [TestSinkRTL(DataType, sink_msgs[i]) - for i in range(num_reg_banks)] - - s.reg_cluster = RegisterClusterRTL(DataType, ConfigType, num_reg_banks, - num_registers) - - s.src_opt //= src_opt - for i in range(num_reg_banks): - s.reg_cluster.inport_opt //= s.src_opt - s.reg_cluster.recv_data_from_routing_crossbar[i] //= \ - s.src_routing_xbar[i].send - s.reg_cluster.recv_data_from_fu_crossbar[i] //= \ - s.src_fu_xbar[i].send - s.reg_cluster.recv_data_from_const[i] //= \ - s.src_const[i].send - s.reg_cluster.send_data_to_fu[i] //= \ - s.sink[i].recv - - def done(s): - for i in range(s.num_reg_banks): - if not s.sink[i].done(): - return False - return True - - def line_trace(s): - return s.reg_cluster.line_trace() - -def run_sim(test_harness, max_cycles = 10): - test_harness.elaborate() - test_harness.apply(DefaultPassGroup()) - test_harness.sim_reset() - - # Run simulation - ncycles = 0 - print() - print("{}:{}".format( ncycles, test_harness.line_trace())) - while not test_harness.done() and ncycles < max_cycles: - test_harness.sim_tick() - ncycles += 1 - print("{}:{}".format(ncycles, test_harness.line_trace())) - - # Check timeout - assert ncycles < max_cycles - - test_harness.sim_tick() - test_harness.sim_tick() - test_harness.sim_tick() - -def test_reg_bank(): - DataType = mk_data(16, 1) - data_mem_size = 20 - AddrType = mk_bits(clog2(data_mem_size)) - preloadData = [DataType(i, 1) for i in range(data_mem_size)] - - num_ctrl_operations = 64 - num_fu_inports = 4 - num_fu_outports = 2 - num_tile_inports = 4 - num_tile_outports = 4 - num_registers_per_reg_bank = 16 - num_reg_banks = 4 - reg_bank_id = 1 - - ConfigType = mk_ctrl(num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports, - num_registers_per_reg_bank) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] - - src_opt = ConfigType(OPT_ADD_CONST, pickRegister) - src_opt.write_reg_from[reg_bank_id] = b2(PORT_FU_CROSSBAR) - # Writes data into reg[15]. - src_opt.write_reg_idx[reg_bank_id] = b4(15) - # read_reg_towards: 0=nothing, 1=FU, 2=routing_xbar, 3=both - src_opt.read_reg_towards[reg_bank_id] = b2(READ_TOWARDS_FU) - # Reads data from reg[15]. - src_opt.read_reg_idx[reg_bank_id] = b4(15) # read after write - - src_data_from_routing_xbar = \ - [[DataType(5, 1)], - [DataType(10, 1), DataType(11, 1)], - [], - [DataType(42, 1)] - ] - src_data_from_fu_xbar = \ - [[], - [DataType(12, 1)], - [], - [] - ] - src_data_from_const = \ - [[], - [DataType(13, 1)], - [], - [] - ] - - expected_sink_data = \ - [[DataType(5, 1)], - # Routing of 10 and 11 are overwritten by read_reg. - [DataType(0, 0), DataType(0, 0), DataType(12, 1)], - [], - [DataType(42, 1)] - ] - - th = TestHarness(DataType, ConfigType, num_reg_banks, - num_registers_per_reg_bank, src_opt, - src_data_from_routing_xbar, - src_data_from_fu_xbar, - src_data_from_const, - expected_sink_data) - run_sim(th) - -#------------------------------------------------------------------------- -# Extended test harness that also sinks send_data_to_routing_crossbar -#------------------------------------------------------------------------- - -class TestHarnessWithXbarSink(Component): - - def construct(s, DataType, ConfigType, num_reg_banks, num_registers, - src_opt, src_msgs_routing_xbar, src_msgs_fu_xbar, - src_msgs_const, sink_msgs_fu, sink_msgs_xbar): - - s.num_reg_banks = num_reg_banks - s.src_opt = Wire(ConfigType) - - s.src_routing_xbar = [TestSrcRTL(DataType, src_msgs_routing_xbar[i]) - for i in range(num_reg_banks)] - s.src_fu_xbar = [TestSrcRTL(DataType, src_msgs_fu_xbar[i]) - for i in range(num_reg_banks)] - s.src_const = [TestSrcRTL(DataType, src_msgs_const[i]) - for i in range(num_reg_banks)] - - s.sink_fu = [TestSinkRTL(DataType, sink_msgs_fu[i]) - for i in range(num_reg_banks)] - s.sink_xbar = [TestSinkRTL(DataType, sink_msgs_xbar[i]) - for i in range(num_reg_banks)] - - s.reg_cluster = RegisterClusterRTL(DataType, ConfigType, num_reg_banks, - num_registers) - - s.src_opt //= src_opt - for i in range(num_reg_banks): - s.reg_cluster.inport_opt //= s.src_opt - s.reg_cluster.recv_data_from_routing_crossbar[i] //= \ - s.src_routing_xbar[i].send - s.reg_cluster.recv_data_from_fu_crossbar[i] //= \ - s.src_fu_xbar[i].send - s.reg_cluster.recv_data_from_const[i] //= \ - s.src_const[i].send - s.reg_cluster.send_data_to_fu[i] //= s.sink_fu[i].recv - s.reg_cluster.send_data_to_routing_crossbar[i] //= s.sink_xbar[i].recv - - def done(s): - for i in range(s.num_reg_banks): - if not s.sink_fu[i].done() or not s.sink_xbar[i].done(): - return False - return True - - def line_trace(s): - return s.reg_cluster.line_trace() - -#------------------------------------------------------------------------- -# test: read_reg_towards=2 => data goes to routing_xbar only, not FU -#------------------------------------------------------------------------- - -def test_reg_cluster_read_towards_routing_xbar(): - """ - Writes a value into reg[3] of bank 0 via the FU-crossbar path, then on - the next control word set read_reg_towards[0]=2 (READ_TOWARDS_ROUTING_XBAR). - Expects the value to appear on send_data_to_routing_crossbar[0] and - nothing on send_data_to_fu[0]. - """ - DataType = mk_data(16, 1) - num_fu_inports = 4 - num_fu_outports = 2 - num_tile_inports = 4 - num_tile_outports = 4 - num_registers_per_reg_bank = 16 - num_reg_banks = 4 - - ConfigType = mk_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports, - num_registers_per_reg_bank) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - - # Control word: write bank-0 reg[3] from FU-crossbar (write_reg_from=2), - # then read it towards routing_xbar (read_reg_towards=2). - src_opt = ConfigType(OPT_ADD_CONST, [FuInType(x + 1) for x in range(num_fu_inports)]) - src_opt.write_reg_from[0] = b2(PORT_FU_CROSSBAR) # write from FU-crossbar - src_opt.write_reg_idx[0] = b4(3) - src_opt.read_reg_towards[0] = b2(READ_TOWARDS_ROUTING_XBAR) - src_opt.read_reg_idx[0] = b4(3) - - # Bank 0 receives one value via FU-crossbar; all others empty. - src_data_from_routing_xbar = [[] for _ in range(num_reg_banks)] - src_data_from_fu_xbar = [[DataType(77, 1)], [], [], []] - src_data_from_const = [[] for _ in range(num_reg_banks)] - - # FU sink: bank 0 gets nothing because read_reg_towards=2 means the - # register data is NOT forwarded to FU. - # Routing-xbar sink: TestSrcRTL starts sending in cycle 2 (1 cycle after - # reset), so the write lands at end of cycle 2 and the value is readable - # in cycle 3 — two leading DataType(0,0) before DataType(77,1). - sink_msgs_fu = [[] for _ in range(num_reg_banks)] - sink_msgs_xbar = [[DataType(0, 0), DataType(0, 0), DataType(77, 1)], [], [], []] - - th = TestHarnessWithXbarSink( - DataType, ConfigType, num_reg_banks, num_registers_per_reg_bank, - src_opt, - src_data_from_routing_xbar, - src_data_from_fu_xbar, - src_data_from_const, - sink_msgs_fu, - sink_msgs_xbar) - run_sim(th, max_cycles = 15) - -#------------------------------------------------------------------------- -# test: read_reg_towards=3 => data goes to BOTH FU and routing_xbar -#------------------------------------------------------------------------- - -def test_reg_cluster_read_towards_both(): - """ - Writes a value into reg[7] of bank 2 via the routing-crossbar path, then - set read_reg_towards[2]=3 (READ_TOWARDS_BOTH). - Expects the same value on both send_data_to_fu[2] and - send_data_to_routing_crossbar[2]. - """ - DataType = mk_data(16, 1) - num_fu_inports = 4 - num_fu_outports = 2 - num_tile_inports = 4 - num_tile_outports = 4 - num_registers_per_reg_bank = 16 - num_reg_banks = 4 - - ConfigType = mk_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports, - num_registers_per_reg_bank) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - - src_opt = ConfigType(OPT_ADD_CONST, [FuInType(x + 1) for x in range(num_fu_inports)]) - src_opt.write_reg_from[2] = b2(PORT_ROUTING_CROSSBAR) # write from routing-crossbar - src_opt.write_reg_idx[2] = b4(7) - src_opt.read_reg_towards[2] = b2(READ_TOWARDS_BOTH) - src_opt.read_reg_idx[2] = b4(7) - - src_data_from_routing_xbar = [[], [], [DataType(55, 1)], []] - src_data_from_fu_xbar = [[] for _ in range(num_reg_banks)] - src_data_from_const = [[] for _ in range(num_reg_banks)] - - # Bank 2: reg data (55) goes to both FU and routing_xbar. - # TestSrcRTL starts sending in cycle 2, write lands at end of cycle 2, - # value readable in cycle 3 — two leading DataType(0,0) on both paths. - sink_msgs_fu = [[], [], [DataType(0, 0), DataType(0, 0), DataType(55, 1)], []] - sink_msgs_xbar = [[], [], [DataType(0, 0), DataType(0, 0), DataType(55, 1)], []] - - th = TestHarnessWithXbarSink( - DataType, ConfigType, num_reg_banks, num_registers_per_reg_bank, - src_opt, - src_data_from_routing_xbar, - src_data_from_fu_xbar, - src_data_from_const, - sink_msgs_fu, - sink_msgs_xbar) - run_sim(th, max_cycles = 15) - -#------------------------------------------------------------------------- -# test: read_reg_towards=1 => data goes to FU only, xbar output stays idle -#------------------------------------------------------------------------- - -def test_reg_cluster_read_towards_fu_no_xbar_output(): - """ - Sets read_reg_towards[1]=1 (READ_TOWARDS_FU). - Verifies send_data_to_routing_crossbar[1] never fires (empty sink). - """ - DataType = mk_data(16, 1) - num_fu_inports = 4 - num_fu_outports = 2 - num_tile_inports = 4 - num_tile_outports = 4 - num_registers_per_reg_bank = 16 - num_reg_banks = 4 - - ConfigType = mk_ctrl(num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports, - num_registers_per_reg_bank) - FuInType = mk_bits(clog2(num_fu_inports + 1)) - - src_opt = ConfigType(OPT_ADD_CONST, [FuInType(x + 1) for x in range(num_fu_inports)]) - src_opt.write_reg_from[1] = b2(PORT_FU_CROSSBAR) # write from FU-crossbar - src_opt.write_reg_idx[1] = b4(0) - src_opt.read_reg_towards[1] = b2(READ_TOWARDS_FU) - src_opt.read_reg_idx[1] = b4(0) - - src_data_from_routing_xbar = [[] for _ in range(num_reg_banks)] - src_data_from_fu_xbar = [[], [DataType(33, 1)], [], []] - src_data_from_const = [[] for _ in range(num_reg_banks)] - - # FU sink for bank 1: TestSrcRTL starts sending in cycle 2, write lands - # at end of cycle 2, value readable in cycle 3 — two leading DataType(0,0). - # Xbar sink stays empty because read_reg_towards=1 (FU only). - sink_msgs_fu = [[], [DataType(0, 0), DataType(0, 0), DataType(33, 1)], [], []] - sink_msgs_xbar = [[] for _ in range(num_reg_banks)] - - th = TestHarnessWithXbarSink( - DataType, ConfigType, num_reg_banks, num_registers_per_reg_bank, - src_opt, - src_data_from_routing_xbar, - src_data_from_fu_xbar, - src_data_from_const, - sink_msgs_fu, - sink_msgs_xbar) - run_sim(th, max_cycles = 15) - +""" +========================================================================== +RegisterClusterRTL_test.py +========================================================================== +Test cases for RegisterClusterRTL. + +Author : Cheng Tan + Date : Feb 7, 2025 +""" + +from pymtl3 import * +from ..RegisterClusterRTL import RegisterClusterRTL +from ....lib.basic.val_rdy.SourceRTL import SourceRTL as TestSrcRTL +from ....lib.basic.val_rdy.SinkRTL import SinkRTL as TestSinkRTL +from ....lib.messages import * +from ....lib.opt_type import * +from ....lib.util.common import ( + READ_TOWARDS_NOTHING, + READ_TOWARDS_FU, + READ_TOWARDS_ROUTING_XBAR, + READ_TOWARDS_BOTH, + PORT_INDEX_ROUTING_CROSSBAR, + PORT_INDEX_FU_CROSSBAR, + PORT_INDEX_CONST, + PORT_ROUTING_CROSSBAR, + PORT_FU_CROSSBAR, + PORT_CONST, +) + +# Local bitwidth helpers (avoid relying on optional pymtl3.stdlib modules) +Bits2 = mk_bits(2) +Bits4 = mk_bits(4) + +def b2(v): + return Bits2(v) + +def b4(v): + return Bits4(v) + +#------------------------------------------------------------------------- +# Test harness +#------------------------------------------------------------------------- + +class TestHarness(Component): + + def construct(s, DataType, ConfigType, num_reg_banks, num_registers, + src_opt, src_msgs_routing_xbar, src_msgs_fu_xbar, + src_msgs_const, sink_msgs): + + s.num_reg_banks = num_reg_banks + s.src_opt = Wire(ConfigType) + + s.src_routing_xbar = [TestSrcRTL(DataType, src_msgs_routing_xbar[i]) + for i in range(num_reg_banks)] + s.src_fu_xbar = [TestSrcRTL(DataType, src_msgs_fu_xbar[i]) + for i in range(num_reg_banks)] + s.src_const = [TestSrcRTL(DataType, src_msgs_const[i]) + for i in range(num_reg_banks)] + + s.sink = [TestSinkRTL(DataType, sink_msgs[i]) + for i in range(num_reg_banks)] + + s.reg_cluster = RegisterClusterRTL(DataType, ConfigType, num_reg_banks, + num_registers) + + s.src_opt //= src_opt + for i in range(num_reg_banks): + s.reg_cluster.inport_opt //= s.src_opt + s.reg_cluster.recv_data_from_routing_crossbar[i] //= \ + s.src_routing_xbar[i].send + s.reg_cluster.recv_data_from_fu_crossbar[i] //= \ + s.src_fu_xbar[i].send + s.reg_cluster.recv_data_from_const[i] //= \ + s.src_const[i].send + s.reg_cluster.send_data_to_fu[i] //= \ + s.sink[i].recv + + def done(s): + for i in range(s.num_reg_banks): + if not s.sink[i].done(): + return False + return True + + def line_trace(s): + return s.reg_cluster.line_trace() + +def run_sim(test_harness, max_cycles = 10): + test_harness.elaborate() + test_harness.apply(DefaultPassGroup()) + test_harness.sim_reset() + + # Run simulation + ncycles = 0 + print() + print("{}:{}".format( ncycles, test_harness.line_trace())) + while not test_harness.done() and ncycles < max_cycles: + test_harness.sim_tick() + ncycles += 1 + print("{}:{}".format(ncycles, test_harness.line_trace())) + + # Check timeout + assert ncycles < max_cycles + + test_harness.sim_tick() + test_harness.sim_tick() + test_harness.sim_tick() + +def test_reg_bank(): + DataType = mk_data(16, 1) + data_mem_size = 20 + AddrType = mk_bits(clog2(data_mem_size)) + preloadData = [DataType(i, 1) for i in range(data_mem_size)] + + num_ctrl_operations = 64 + num_fu_inports = 4 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + num_registers_per_reg_bank = 16 + num_reg_banks = 4 + reg_bank_id = 1 + + ConfigType = mk_ctrl(num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] + + src_opt = ConfigType(OPT_ADD_CONST, pickRegister) + src_opt.write_reg_from[reg_bank_id] = b2(PORT_FU_CROSSBAR) + # Writes data into reg[15]. + src_opt.write_reg_idx[reg_bank_id] = b4(15) + # read_reg_towards: 0=nothing, 1=FU, 2=routing_xbar, 3=both + src_opt.read_reg_towards[reg_bank_id] = b2(READ_TOWARDS_FU) + # Reads data from reg[15]. + src_opt.read_reg_idx[reg_bank_id] = b4(15) # read after write + + src_data_from_routing_xbar = \ + [[DataType(5, 1)], + [DataType(10, 1), DataType(11, 1)], + [], + [DataType(42, 1)] + ] + src_data_from_fu_xbar = \ + [[], + [DataType(12, 1)], + [], + [] + ] + src_data_from_const = \ + [[], + [DataType(13, 1)], + [], + [] + ] + + expected_sink_data = \ + [[DataType(5, 1)], + # Routing of 10 and 11 are overwritten by read_reg. + [DataType(0, 0), DataType(0, 0), DataType(12, 1)], + [], + [DataType(42, 1)] + ] + + th = TestHarness(DataType, ConfigType, num_reg_banks, + num_registers_per_reg_bank, src_opt, + src_data_from_routing_xbar, + src_data_from_fu_xbar, + src_data_from_const, + expected_sink_data) + run_sim(th) + +#------------------------------------------------------------------------- +# Extended test harness that also sinks send_data_to_routing_crossbar +#------------------------------------------------------------------------- + +class TestHarnessWithXbarSink(Component): + + def construct(s, DataType, ConfigType, num_reg_banks, num_registers, + src_opt, src_msgs_routing_xbar, src_msgs_fu_xbar, + src_msgs_const, sink_msgs_fu, sink_msgs_xbar): + + s.num_reg_banks = num_reg_banks + s.src_opt = Wire(ConfigType) + + s.src_routing_xbar = [TestSrcRTL(DataType, src_msgs_routing_xbar[i]) + for i in range(num_reg_banks)] + s.src_fu_xbar = [TestSrcRTL(DataType, src_msgs_fu_xbar[i]) + for i in range(num_reg_banks)] + s.src_const = [TestSrcRTL(DataType, src_msgs_const[i]) + for i in range(num_reg_banks)] + + s.sink_fu = [TestSinkRTL(DataType, sink_msgs_fu[i]) + for i in range(num_reg_banks)] + s.sink_xbar = [TestSinkRTL(DataType, sink_msgs_xbar[i]) + for i in range(num_reg_banks)] + + s.reg_cluster = RegisterClusterRTL(DataType, ConfigType, num_reg_banks, + num_registers) + + s.src_opt //= src_opt + for i in range(num_reg_banks): + s.reg_cluster.inport_opt //= s.src_opt + s.reg_cluster.recv_data_from_routing_crossbar[i] //= \ + s.src_routing_xbar[i].send + s.reg_cluster.recv_data_from_fu_crossbar[i] //= \ + s.src_fu_xbar[i].send + s.reg_cluster.recv_data_from_const[i] //= \ + s.src_const[i].send + s.reg_cluster.send_data_to_fu[i] //= s.sink_fu[i].recv + s.reg_cluster.send_data_to_routing_crossbar[i] //= s.sink_xbar[i].recv + + def done(s): + for i in range(s.num_reg_banks): + if not s.sink_fu[i].done() or not s.sink_xbar[i].done(): + return False + return True + + def line_trace(s): + return s.reg_cluster.line_trace() + +#------------------------------------------------------------------------- +# test: read_reg_towards=2 => data goes to routing_xbar only, not FU +#------------------------------------------------------------------------- + +def test_reg_cluster_read_towards_routing_xbar(): + """ + Writes a value into reg[3] of bank 0 via the FU-crossbar path, then on + the next control word set read_reg_towards[0]=2 (READ_TOWARDS_ROUTING_XBAR). + Expects the value to appear on send_data_to_routing_crossbar[0] and + nothing on send_data_to_fu[0]. + """ + DataType = mk_data(16, 1) + num_fu_inports = 4 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + num_registers_per_reg_bank = 16 + num_reg_banks = 4 + + ConfigType = mk_ctrl(num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + num_registers_per_reg_bank) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + + # Control word: write bank-0 reg[3] from FU-crossbar (write_reg_from=2), + # then read it towards routing_xbar (read_reg_towards=2). + src_opt = ConfigType(OPT_ADD_CONST, [FuInType(x + 1) for x in range(num_fu_inports)]) + src_opt.write_reg_from[0] = b2(PORT_FU_CROSSBAR) # write from FU-crossbar + src_opt.write_reg_idx[0] = b4(3) + src_opt.read_reg_towards[0] = b2(READ_TOWARDS_ROUTING_XBAR) + src_opt.read_reg_idx[0] = b4(3) + + # Bank 0 receives one value via FU-crossbar; all others empty. + src_data_from_routing_xbar = [[] for _ in range(num_reg_banks)] + src_data_from_fu_xbar = [[DataType(77, 1)], [], [], []] + src_data_from_const = [[] for _ in range(num_reg_banks)] + + # FU sink: bank 0 gets nothing because read_reg_towards=2 means the + # register data is NOT forwarded to FU. + # Routing-xbar sink: TestSrcRTL starts sending in cycle 2 (1 cycle after + # reset), so the write lands at end of cycle 2 and the value is readable + # in cycle 3 — two leading DataType(0,0) before DataType(77,1). + sink_msgs_fu = [[] for _ in range(num_reg_banks)] + sink_msgs_xbar = [[DataType(0, 0), DataType(0, 0), DataType(77, 1)], [], [], []] + + th = TestHarnessWithXbarSink( + DataType, ConfigType, num_reg_banks, num_registers_per_reg_bank, + src_opt, + src_data_from_routing_xbar, + src_data_from_fu_xbar, + src_data_from_const, + sink_msgs_fu, + sink_msgs_xbar) + run_sim(th, max_cycles = 15) + +#------------------------------------------------------------------------- +# test: read_reg_towards=3 => data goes to BOTH FU and routing_xbar +#------------------------------------------------------------------------- + +def test_reg_cluster_read_towards_both(): + """ + Writes a value into reg[7] of bank 2 via the routing-crossbar path, then + set read_reg_towards[2]=3 (READ_TOWARDS_BOTH). + Expects the same value on both send_data_to_fu[2] and + send_data_to_routing_crossbar[2]. + """ + DataType = mk_data(16, 1) + num_fu_inports = 4 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + num_registers_per_reg_bank = 16 + num_reg_banks = 4 + + ConfigType = mk_ctrl(num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + num_registers_per_reg_bank) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + + src_opt = ConfigType(OPT_ADD_CONST, [FuInType(x + 1) for x in range(num_fu_inports)]) + src_opt.write_reg_from[2] = b2(PORT_ROUTING_CROSSBAR) # write from routing-crossbar + src_opt.write_reg_idx[2] = b4(7) + src_opt.read_reg_towards[2] = b2(READ_TOWARDS_BOTH) + src_opt.read_reg_idx[2] = b4(7) + + src_data_from_routing_xbar = [[], [], [DataType(55, 1)], []] + src_data_from_fu_xbar = [[] for _ in range(num_reg_banks)] + src_data_from_const = [[] for _ in range(num_reg_banks)] + + # Bank 2: reg data (55) goes to both FU and routing_xbar. + # TestSrcRTL starts sending in cycle 2, write lands at end of cycle 2, + # value readable in cycle 3 — two leading DataType(0,0) on both paths. + sink_msgs_fu = [[], [], [DataType(0, 0), DataType(0, 0), DataType(55, 1)], []] + sink_msgs_xbar = [[], [], [DataType(0, 0), DataType(0, 0), DataType(55, 1)], []] + + th = TestHarnessWithXbarSink( + DataType, ConfigType, num_reg_banks, num_registers_per_reg_bank, + src_opt, + src_data_from_routing_xbar, + src_data_from_fu_xbar, + src_data_from_const, + sink_msgs_fu, + sink_msgs_xbar) + run_sim(th, max_cycles = 15) + +#------------------------------------------------------------------------- +# test: read_reg_towards=1 => data goes to FU only, xbar output stays idle +#------------------------------------------------------------------------- + +def test_reg_cluster_read_towards_fu_no_xbar_output(): + """ + Sets read_reg_towards[1]=1 (READ_TOWARDS_FU). + Verifies send_data_to_routing_crossbar[1] never fires (empty sink). + """ + DataType = mk_data(16, 1) + num_fu_inports = 4 + num_fu_outports = 2 + num_tile_inports = 4 + num_tile_outports = 4 + num_registers_per_reg_bank = 16 + num_reg_banks = 4 + + ConfigType = mk_ctrl(num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, + num_registers_per_reg_bank) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + + src_opt = ConfigType(OPT_ADD_CONST, [FuInType(x + 1) for x in range(num_fu_inports)]) + src_opt.write_reg_from[1] = b2(PORT_FU_CROSSBAR) # write from FU-crossbar + src_opt.write_reg_idx[1] = b4(0) + src_opt.read_reg_towards[1] = b2(READ_TOWARDS_FU) + src_opt.read_reg_idx[1] = b4(0) + + src_data_from_routing_xbar = [[] for _ in range(num_reg_banks)] + src_data_from_fu_xbar = [[], [DataType(33, 1)], [], []] + src_data_from_const = [[] for _ in range(num_reg_banks)] + + # FU sink for bank 1: TestSrcRTL starts sending in cycle 2, write lands + # at end of cycle 2, value readable in cycle 3 — two leading DataType(0,0). + # Xbar sink stays empty because read_reg_towards=1 (FU only). + sink_msgs_fu = [[], [DataType(0, 0), DataType(0, 0), DataType(33, 1)], [], []] + sink_msgs_xbar = [[] for _ in range(num_reg_banks)] + + th = TestHarnessWithXbarSink( + DataType, ConfigType, num_reg_banks, num_registers_per_reg_bank, + src_opt, + src_data_from_routing_xbar, + src_data_from_fu_xbar, + src_data_from_const, + sink_msgs_fu, + sink_msgs_xbar) + run_sim(th, max_cycles = 15) + diff --git a/multi_cgra/MeshMultiCgraRTL.py b/multi_cgra/MeshMultiCgraRTL.py index 9eae065c..b9a05f0c 100644 --- a/multi_cgra/MeshMultiCgraRTL.py +++ b/multi_cgra/MeshMultiCgraRTL.py @@ -1,164 +1,164 @@ -""" -========================================================================== -MeshMultiCgraRTL.py -========================================================================== -Mesh connecting multiple CGRAs, each CGRA contains one controller. - -Author : Cheng Tan - Date : Jan 8, 2025 -""" - -from ..cgra.CgraRTL import CgraRTL -from ..cgra.CgraWithContextSwitchRTL import CgraWithContextSwitchRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.opt_type import * -from ..noc.PyOCN.pymtl3_net.meshnet.MeshNetworkRTL import MeshNetworkRTL -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_mesh_pos -from ..lib.messages import * -from ..lib.util.data_struct_attr import * - -class MeshMultiCgraRTL(Component): - - def construct(s, CgraPayloadType, cgra_rows, cgra_columns, - tile_rows, tile_columns, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, - num_ctrl, total_steps, - mem_access_is_combinational, - FunctionUnit, FuList, per_cgra_topology, - controller2addr_map, - support_task_switching = False): - - # Derives all types from CgraPayloadType. - CgraDataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = CgraDataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_nbits = CgraDataType.get_field_type(kAttrPayload).nbits - - # Reconstructs packet types. - num_tiles = tile_rows * tile_columns - num_rd_tiles = tile_rows + tile_columns - 1 - - CtrlPktType = mk_intra_cgra_pkt(cgra_columns, cgra_rows, - num_tiles, CgraPayloadType) - - NocPktType = mk_inter_cgra_pkt(cgra_columns, cgra_rows, - num_tiles, num_rd_tiles, - CgraPayloadType) - # Constant - s.num_cgras = cgra_rows * cgra_columns - idTo2d_map = {} - - # Mesh position takes column as argument first. - MeshPos = mk_mesh_pos(cgra_columns, cgra_rows) - s.num_tiles = tile_rows * tile_columns - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - ControllerIdType = mk_bits(max(1, clog2(s.num_cgras))) - - # Interface - # Request from/to CPU. - s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) - s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) - - # Components - for cgra_row in range(cgra_rows): - for cgra_col in range(cgra_columns): - idTo2d_map[cgra_row * cgra_columns + cgra_col] = (cgra_col, cgra_row) - - if support_task_switching: - s.cgra = [CgraWithContextSwitchRTL(CgraPayloadType, cgra_rows, cgra_columns, - tile_columns, tile_rows, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, - num_ctrl, total_steps, - mem_access_is_combinational, - FunctionUnit, FuList, per_cgra_topology, - controller2addr_map, idTo2d_map) - for cgra_id in range(s.num_cgras)] - else: - s.cgra = [CgraRTL(CgraPayloadType, cgra_rows, cgra_columns, - tile_columns, tile_rows, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, - num_ctrl, total_steps, - mem_access_is_combinational, - FunctionUnit, FuList, per_cgra_topology, - controller2addr_map, idTo2d_map, - has_ctrl_ring = True) - for cgra_id in range(s.num_cgras)] - - # Latency is 1. - s.mesh = MeshNetworkRTL(NocPktType, MeshPos, cgra_columns, cgra_rows, 1) - - # Connections - for i in range(s.num_cgras): - s.mesh.send[i] //= s.cgra[i].recv_from_inter_cgra_noc - s.mesh.recv[i] //= s.cgra[i].send_to_inter_cgra_noc - - # Connects controller id. - for cgra_id in range(s.num_cgras): - s.cgra[cgra_id].cgra_id //= cgra_id - - # Connects memory address upper and lower bound for each CGRA. - for cgra_id in range(s.num_cgras): - s.cgra[cgra_id].address_lower //= DataAddrType(controller2addr_map[cgra_id][0]) - s.cgra[cgra_id].address_upper //= DataAddrType(controller2addr_map[cgra_id][1]) - - s.recv_from_cpu_pkt //= s.cgra[0].recv_from_cpu_pkt - s.send_to_cpu_pkt //= s.cgra[0].send_to_cpu_pkt - - for i in range(1, s.num_cgras): - s.cgra[i].recv_from_cpu_pkt.val //= 0 - s.cgra[i].recv_from_cpu_pkt.msg //= CtrlPktType() - s.cgra[i].send_to_cpu_pkt.rdy //= 0 - - # Connects the tiles on the boundary of each two adjacent CGRAs. - for cgra_row in range(cgra_rows): - for cgra_col in range(cgra_columns): - if cgra_row != 0: - for tile_col in range(tile_columns): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col] //= \ - s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col] - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col] //= \ - s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col] - else: - for tile_col in range(tile_columns): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].msg //= CgraDataType() - - if cgra_row == cgra_rows - 1: - for tile_col in range(tile_columns): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].msg //= CgraDataType() - - if cgra_col != 0: - for tile_row in range(tile_rows): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row] //= \ - s.cgra[cgra_row * cgra_columns + cgra_col - 1].recv_data_on_boundary_east[tile_row] - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row] //= \ - s.cgra[cgra_row * cgra_columns + cgra_col - 1].send_data_on_boundary_east[tile_row] - else: - for tile_row in range(tile_rows): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].msg //= CgraDataType() - - if cgra_col == cgra_columns - 1: - for tile_row in range(tile_rows): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_east[tile_row].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].msg //= CgraDataType() - - def line_trace(s): - res = "||\n".join([(("\n\n[cgra_"+str(i)+": ") + x.line_trace()) - for (i,x) in enumerate(s.cgra)]) - res += " ## mesh: " + s.mesh.line_trace() - return res - +""" +========================================================================== +MeshMultiCgraRTL.py +========================================================================== +Mesh connecting multiple CGRAs, each CGRA contains one controller. + +Author : Cheng Tan + Date : Jan 8, 2025 +""" + +from ..cgra.CgraRTL import CgraRTL +from ..cgra.CgraWithContextSwitchRTL import CgraWithContextSwitchRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.opt_type import * +from ..noc.PyOCN.pymtl3_net.meshnet.MeshNetworkRTL import MeshNetworkRTL +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_mesh_pos +from ..lib.messages import * +from ..lib.util.data_struct_attr import * + +class MeshMultiCgraRTL(Component): + + def construct(s, CgraPayloadType, cgra_rows, cgra_columns, + tile_rows, tile_columns, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, + num_ctrl, total_steps, + mem_access_is_combinational, + FunctionUnit, FuList, per_cgra_topology, + controller2addr_map, + support_task_switching = False): + + # Derives all types from CgraPayloadType. + CgraDataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = CgraDataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_nbits = CgraDataType.get_field_type(kAttrPayload).nbits + + # Reconstructs packet types. + num_tiles = tile_rows * tile_columns + num_rd_tiles = tile_rows + tile_columns - 1 + + CtrlPktType = mk_intra_cgra_pkt(cgra_columns, cgra_rows, + num_tiles, CgraPayloadType) + + NocPktType = mk_inter_cgra_pkt(cgra_columns, cgra_rows, + num_tiles, num_rd_tiles, + CgraPayloadType) + # Constant + s.num_cgras = cgra_rows * cgra_columns + idTo2d_map = {} + + # Mesh position takes column as argument first. + MeshPos = mk_mesh_pos(cgra_columns, cgra_rows) + s.num_tiles = tile_rows * tile_columns + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + ControllerIdType = mk_bits(max(1, clog2(s.num_cgras))) + + # Interface + # Request from/to CPU. + s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) + s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) + + # Components + for cgra_row in range(cgra_rows): + for cgra_col in range(cgra_columns): + idTo2d_map[cgra_row * cgra_columns + cgra_col] = (cgra_col, cgra_row) + + if support_task_switching: + s.cgra = [CgraWithContextSwitchRTL(CgraPayloadType, cgra_rows, cgra_columns, + tile_columns, tile_rows, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, + num_ctrl, total_steps, + mem_access_is_combinational, + FunctionUnit, FuList, per_cgra_topology, + controller2addr_map, idTo2d_map) + for cgra_id in range(s.num_cgras)] + else: + s.cgra = [CgraRTL(CgraPayloadType, cgra_rows, cgra_columns, + tile_columns, tile_rows, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, + num_ctrl, total_steps, + mem_access_is_combinational, + FunctionUnit, FuList, per_cgra_topology, + controller2addr_map, idTo2d_map, + has_ctrl_ring = True) + for cgra_id in range(s.num_cgras)] + + # Latency is 1. + s.mesh = MeshNetworkRTL(NocPktType, MeshPos, cgra_columns, cgra_rows, 1) + + # Connections + for i in range(s.num_cgras): + s.mesh.send[i] //= s.cgra[i].recv_from_inter_cgra_noc + s.mesh.recv[i] //= s.cgra[i].send_to_inter_cgra_noc + + # Connects controller id. + for cgra_id in range(s.num_cgras): + s.cgra[cgra_id].cgra_id //= cgra_id + + # Connects memory address upper and lower bound for each CGRA. + for cgra_id in range(s.num_cgras): + s.cgra[cgra_id].address_lower //= DataAddrType(controller2addr_map[cgra_id][0]) + s.cgra[cgra_id].address_upper //= DataAddrType(controller2addr_map[cgra_id][1]) + + s.recv_from_cpu_pkt //= s.cgra[0].recv_from_cpu_pkt + s.send_to_cpu_pkt //= s.cgra[0].send_to_cpu_pkt + + for i in range(1, s.num_cgras): + s.cgra[i].recv_from_cpu_pkt.val //= 0 + s.cgra[i].recv_from_cpu_pkt.msg //= CtrlPktType() + s.cgra[i].send_to_cpu_pkt.rdy //= 0 + + # Connects the tiles on the boundary of each two adjacent CGRAs. + for cgra_row in range(cgra_rows): + for cgra_col in range(cgra_columns): + if cgra_row != 0: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col] + else: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].msg //= CgraDataType() + + if cgra_row == cgra_rows - 1: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].msg //= CgraDataType() + + if cgra_col != 0: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].recv_data_on_boundary_east[tile_row] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].send_data_on_boundary_east[tile_row] + else: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].msg //= CgraDataType() + + if cgra_col == cgra_columns - 1: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_east[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].msg //= CgraDataType() + + def line_trace(s): + res = "||\n".join([(("\n\n[cgra_"+str(i)+": ") + x.line_trace()) + for (i,x) in enumerate(s.cgra)]) + res += " ## mesh: " + s.mesh.line_trace() + return res + diff --git a/multi_cgra/RingMultiCgraRTL.py b/multi_cgra/RingMultiCgraRTL.py index 6e173e71..5cfe0626 100644 --- a/multi_cgra/RingMultiCgraRTL.py +++ b/multi_cgra/RingMultiCgraRTL.py @@ -1,148 +1,148 @@ -""" -========================================================================== -RingMultiCgraRTL.py -========================================================================== -Ring connecting multiple CGRAs, each CGRA contains one controller. - -Author : Cheng Tan - Date : Dec 23, 2024 -""" - -from ..cgra.CgraRTL import CgraRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.opt_type import * -from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos -from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL - - -from ..lib.util.data_struct_attr import * -from ..lib.messages import * - -class RingMultiCgraRTL(Component): - def construct(s, CgraPayloadType, cgra_rows, cgra_columns, tile_rows, tile_columns, - ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, - num_banks_per_cgra, num_registers_per_reg_bank, - num_ctrl, total_steps, - mem_access_is_combinational, - FunctionUnit, FuList, - controller2addr_map): - - # Derives all types from CgraPayloadType. - CgraDataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = CgraDataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_nbits = CgraDataType.get_field_type(kAttrPayload).nbits - - # Reconstructs packet types. - num_tiles = tile_rows * tile_columns - num_rd_tiles = tile_rows + tile_columns - 1 - - CtrlPktType = mk_intra_cgra_pkt(cgra_columns, cgra_rows, - num_tiles, CgraPayloadType) - - NocPktType = mk_inter_cgra_pkt(cgra_columns, cgra_rows, - num_tiles, num_rd_tiles, - CgraPayloadType) - # Constant - idTo2d_map = {} - s.num_cgras = cgra_rows * cgra_columns - RingPos = mk_ring_pos(s.num_cgras) - s.num_tiles = tile_rows * tile_columns - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size_global)) - ControllerIdType = mk_bits(max(1, clog2(s.num_cgras))) - - # Interface - # Request from/to CPU. - s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) - s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) - - # Components - # Constructs the topology as 1d. - for cgra_id in range(s.num_cgras): - idTo2d_map[cgra_id] = (cgra_id, 0) - - s.cgra = [CgraRTL(CgraPayloadType, - # Constructs the topology as 1d. - 1, s.num_cgras, - tile_columns, tile_rows, - ctrl_mem_size, data_mem_size_global, - data_mem_size_per_bank, num_banks_per_cgra, - num_registers_per_reg_bank, - num_ctrl, total_steps, - mem_access_is_combinational, - FunctionUnit, FuList, - "Mesh", controller2addr_map, idTo2d_map) - for cgra_id in range(s.num_cgras)] - - # Latency is 1. - s.ring = RingNetworkRTL(NocPktType, RingPos, s.num_cgras, 1) - - # Connections - for i in range(s.num_cgras): - s.ring.send[i] //= s.cgra[i].recv_from_inter_cgra_noc - s.ring.recv[i] //= s.cgra[i].send_to_inter_cgra_noc - - # Connects the controller id. - for cgra_id in range(s.num_cgras): - s.cgra[cgra_id].cgra_id //= cgra_id - - # Connects memory address upper and lower bound for each CGRA. - for cgra_id in range(s.num_cgras): - s.cgra[cgra_id].address_lower //= DataAddrType(controller2addr_map[cgra_id][0]) - s.cgra[cgra_id].address_upper //= DataAddrType(controller2addr_map[cgra_id][1]) - - s.recv_from_cpu_pkt //= s.cgra[0].recv_from_cpu_pkt - s.send_to_cpu_pkt //= s.cgra[0].send_to_cpu_pkt - - for i in range(1, s.num_cgras): - s.cgra[i].recv_from_cpu_pkt.val //= 0 - s.cgra[i].recv_from_cpu_pkt.msg //= CtrlPktType() - s.cgra[i].send_to_cpu_pkt.rdy //= 0 - - # Connects the tiles on the boundary of each two ajacent CGRAs. - for cgra_row in range(cgra_rows): - for cgra_col in range(cgra_columns): - if cgra_row != 0: - for tile_col in range(tile_columns): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col] //= \ - s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col] - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col] //= \ - s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col] - else: - for tile_col in range(tile_columns): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].msg //= CgraDataType() - - if cgra_row == cgra_rows - 1: - for tile_col in range(tile_columns): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].msg //= CgraDataType() - - if cgra_col != 0: - for tile_row in range(tile_rows): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row] //= \ - s.cgra[cgra_row * cgra_columns + cgra_col - 1].recv_data_on_boundary_east[tile_row] - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row] //= \ - s.cgra[cgra_row * cgra_columns + cgra_col - 1].send_data_on_boundary_east[tile_row] - else: - for tile_row in range(tile_rows): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].msg //= CgraDataType() - - if cgra_col == cgra_columns - 1: - for tile_row in range(tile_rows): - s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_east[tile_row].rdy //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].val //= 0 - s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].msg //= CgraDataType() - - def line_trace(s): - res = "||\n".join([(("[cgra["+str(i)+"]: ") + x.line_trace()) - for (i,x) in enumerate(s.cgra)]) - res += " ## ring: " + s.ring.line_trace() - return res - +""" +========================================================================== +RingMultiCgraRTL.py +========================================================================== +Ring connecting multiple CGRAs, each CGRA contains one controller. + +Author : Cheng Tan + Date : Dec 23, 2024 +""" + +from ..cgra.CgraRTL import CgraRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.opt_type import * +from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_ring_pos +from ..noc.PyOCN.pymtl3_net.ringnet.RingNetworkRTL import RingNetworkRTL + + +from ..lib.util.data_struct_attr import * +from ..lib.messages import * + +class RingMultiCgraRTL(Component): + def construct(s, CgraPayloadType, cgra_rows, cgra_columns, tile_rows, tile_columns, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, + num_banks_per_cgra, num_registers_per_reg_bank, + num_ctrl, total_steps, + mem_access_is_combinational, + FunctionUnit, FuList, + controller2addr_map): + + # Derives all types from CgraPayloadType. + CgraDataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = CgraDataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_nbits = CgraDataType.get_field_type(kAttrPayload).nbits + + # Reconstructs packet types. + num_tiles = tile_rows * tile_columns + num_rd_tiles = tile_rows + tile_columns - 1 + + CtrlPktType = mk_intra_cgra_pkt(cgra_columns, cgra_rows, + num_tiles, CgraPayloadType) + + NocPktType = mk_inter_cgra_pkt(cgra_columns, cgra_rows, + num_tiles, num_rd_tiles, + CgraPayloadType) + # Constant + idTo2d_map = {} + s.num_cgras = cgra_rows * cgra_columns + RingPos = mk_ring_pos(s.num_cgras) + s.num_tiles = tile_rows * tile_columns + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + ControllerIdType = mk_bits(max(1, clog2(s.num_cgras))) + + # Interface + # Request from/to CPU. + s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) + s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType) + + # Components + # Constructs the topology as 1d. + for cgra_id in range(s.num_cgras): + idTo2d_map[cgra_id] = (cgra_id, 0) + + s.cgra = [CgraRTL(CgraPayloadType, + # Constructs the topology as 1d. + 1, s.num_cgras, + tile_columns, tile_rows, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, + num_ctrl, total_steps, + mem_access_is_combinational, + FunctionUnit, FuList, + "Mesh", controller2addr_map, idTo2d_map) + for cgra_id in range(s.num_cgras)] + + # Latency is 1. + s.ring = RingNetworkRTL(NocPktType, RingPos, s.num_cgras, 1) + + # Connections + for i in range(s.num_cgras): + s.ring.send[i] //= s.cgra[i].recv_from_inter_cgra_noc + s.ring.recv[i] //= s.cgra[i].send_to_inter_cgra_noc + + # Connects the controller id. + for cgra_id in range(s.num_cgras): + s.cgra[cgra_id].cgra_id //= cgra_id + + # Connects memory address upper and lower bound for each CGRA. + for cgra_id in range(s.num_cgras): + s.cgra[cgra_id].address_lower //= DataAddrType(controller2addr_map[cgra_id][0]) + s.cgra[cgra_id].address_upper //= DataAddrType(controller2addr_map[cgra_id][1]) + + s.recv_from_cpu_pkt //= s.cgra[0].recv_from_cpu_pkt + s.send_to_cpu_pkt //= s.cgra[0].send_to_cpu_pkt + + for i in range(1, s.num_cgras): + s.cgra[i].recv_from_cpu_pkt.val //= 0 + s.cgra[i].recv_from_cpu_pkt.msg //= CtrlPktType() + s.cgra[i].send_to_cpu_pkt.rdy //= 0 + + # Connects the tiles on the boundary of each two ajacent CGRAs. + for cgra_row in range(cgra_rows): + for cgra_col in range(cgra_columns): + if cgra_row != 0: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col] //= \ + s.cgra[(cgra_row - 1) * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col] + else: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_south[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_south[tile_col].msg //= CgraDataType() + + if cgra_row == cgra_rows - 1: + for tile_col in range(tile_columns): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_north[tile_col].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_north[tile_col].msg //= CgraDataType() + + if cgra_col != 0: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].recv_data_on_boundary_east[tile_row] + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row] //= \ + s.cgra[cgra_row * cgra_columns + cgra_col - 1].send_data_on_boundary_east[tile_row] + else: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_west[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_west[tile_row].msg //= CgraDataType() + + if cgra_col == cgra_columns - 1: + for tile_row in range(tile_rows): + s.cgra[cgra_row * cgra_columns + cgra_col].send_data_on_boundary_east[tile_row].rdy //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].val //= 0 + s.cgra[cgra_row * cgra_columns + cgra_col].recv_data_on_boundary_east[tile_row].msg //= CgraDataType() + + def line_trace(s): + res = "||\n".join([(("[cgra["+str(i)+"]: ") + x.line_trace()) + for (i,x) in enumerate(s.cgra)]) + res += " ## ring: " + s.ring.line_trace() + return res + diff --git a/tile/TileRTL.py b/tile/TileRTL.py index 0dbe9f74..0facddcf 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -1,335 +1,335 @@ -""" -========================================================================= -TileSeparateCrossbarRTL.py -========================================================================= -The tile contains a list of functional units, a configuration memory, a -set of registers (e.g., channels), and two crossbars. One crossbar is for -routing the data to registers (i.e., the channels before FU and the -channels after the crossbar), and the other one is for passing the to the -next crossbar. - -Detailed in: https://github.com/tancheng/VectorCGRA/issues/13 (Option 2). - -Author : Cheng Tan - Date : Nov 26, 2024 -""" - -from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ..fu.single.AdderRTL import AdderRTL -from ..fu.single.GrantRTL import GrantRTL -from ..fu.single.CompRTL import CompRTL -from ..fu.single.MemUnitRTL import MemUnitRTL -from ..fu.single.MulRTL import MulRTL -from ..fu.single.PhiRTL import PhiRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.cmd_type import * -from ..lib.util.common import * -from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL -from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL -from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL -from ..noc.CrossbarRTL import CrossbarRTL -from ..noc.LinkOrRTL import LinkOrRTL -from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL -from ..rf.RegisterRTL import RegisterRTL -from ..lib.util.data_struct_attr import * - - -class TileRTL(Component): - - def construct(s, IntraCgraPktType, - ctrl_mem_size, data_mem_size, num_ctrl, - total_steps, num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports, num_cgras, num_tiles, - num_registers_per_reg_bank = 16, - Fu = FlexibleFuRTL, - FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, GrantRTL, MemUnitRTL]): - - # Derives types from IntraCgraPktType. - CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) - CtrlPktType = IntraCgraPktType - DataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = DataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_bitwidth = DataType.get_field_type(kAttrPayload).nbits - - # Constants. - num_routing_xbar_inports = num_tile_inports + num_fu_inports - num_routing_xbar_outports = num_fu_inports + num_tile_outports - - num_fu_xbar_inports = num_fu_outports - num_fu_xbar_outports = num_fu_inports + num_tile_outports - - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size)) - - # Interfaces. - s.recv_data = [RecvIfcRTL(DataType) - for _ in range (num_tile_inports)] - s.send_data = [SendIfcRTL(DataType) - for _ in range (num_tile_outports)] - - # Ctrl. - s.recv_from_controller_pkt = RecvIfcRTL(CtrlPktType) - # Sends the ctrl packets to ctrl ring. - s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) - - # Data. - s.to_mem_raddr = SendIfcRTL(DataAddrType) - s.from_mem_rdata = RecvIfcRTL(DataType) - s.to_mem_waddr = SendIfcRTL(DataAddrType) - s.to_mem_wdata = SendIfcRTL(DataType) - - # Components. - s.element = FlexibleFuRTL(CtrlPktType, num_fu_inports, - num_fu_outports, num_tiles, FuList) - s.const_mem = ConstQueueDynamicRTL(DataType, ctrl_mem_size) - s.routing_crossbar = CrossbarRTL(DataType, - CtrlSignalType, - num_routing_xbar_inports, - num_routing_xbar_outports, - num_cgras, - num_tiles, - ctrl_mem_size, - num_tile_outports) - s.fu_crossbar = CrossbarRTL(DataType, - CtrlSignalType, - num_fu_xbar_inports, - num_fu_xbar_outports, - num_cgras, - num_tiles, - ctrl_mem_size, - num_tile_outports) - s.register_cluster = \ - RegisterClusterRTL(DataType, CtrlSignalType, num_fu_inports, - num_registers_per_reg_bank) - s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, - ctrl_mem_size, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports, - num_cgras, - num_tiles, - num_ctrl, - total_steps) - - # The `tile_in_channel` indicates the outport channels that are - # connected to the next tiles. - s.tile_in_channel = [ChannelRTL(DataType, latency = 1) - for _ in range(num_tile_inports)] - - # The `tile_out_or_link` would "or" the outports of the - # `tile_out_channel` and the FUs. - s.tile_out_or_link = [LinkOrRTL(DataType) - for _ in range(num_tile_outports)] - - # Signals indicating whether certain modules already done their jobs. - s.element_done = Wire(1) - s.fu_crossbar_done = Wire(1) - s.routing_crossbar_done = Wire(1) - - s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) - s.tile_id = InPort(mk_bits(clog2(num_tiles + 1))) - - # Propagates tile id. - s.element.tile_id //= s.tile_id - s.ctrl_mem.cgra_id //= s.cgra_id - s.ctrl_mem.tile_id //= s.tile_id - s.fu_crossbar.cgra_id //= s.cgra_id - s.fu_crossbar.tile_id //= s.tile_id - s.routing_crossbar.cgra_id //= s.cgra_id - s.routing_crossbar.tile_id //= s.tile_id - - # Assigns crossbar id. - s.routing_crossbar.crossbar_id //= PORT_INDEX_ROUTING_CROSSBAR - s.fu_crossbar.crossbar_id //= PORT_INDEX_FU_CROSSBAR - - # Constant queue. - s.element.recv_const //= s.const_mem.send_const - - # Fu data <-> ctrl memory (eventually towards/from CPU via controller). - s.element.send_to_ctrl_mem //= s.ctrl_mem.recv_from_element - s.element.recv_from_ctrl_mem //= s.ctrl_mem.send_to_element - - # Ctrl address port. - s.routing_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - s.fu_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - s.element.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - - # Prologue port. - s.element.prologue_count_inport //= s.ctrl_mem.prologue_count_outport_fu - for addr in range(ctrl_mem_size): - for i in range(num_routing_xbar_inports): - s.routing_crossbar.prologue_count_inport[addr][i] //= \ - s.ctrl_mem.prologue_count_outport_routing_crossbar[addr][i] - for i in range(num_fu_xbar_inports): - s.fu_crossbar.prologue_count_inport[addr][i] //= \ - s.ctrl_mem.prologue_count_outport_fu_crossbar[addr][i] - - for i in range(len(FuList)): - if FuList[i] == MemUnitRTL: - s.to_mem_raddr //= s.element.to_mem_raddr[i] - s.from_mem_rdata //= s.element.from_mem_rdata[i] - s.to_mem_waddr //= s.element.to_mem_waddr[i] - s.to_mem_wdata //= s.element.to_mem_wdata[i] - else: - s.element.to_mem_raddr[i].rdy //= 0 - s.element.from_mem_rdata[i].val //= 0 - s.element.from_mem_rdata[i].msg //= DataType() - s.element.to_mem_waddr[i].rdy //= 0 - s.element.to_mem_wdata[i].rdy //= 0 - - # Connections on the `routing_crossbar`. - # The data from other tiles should be connected to the - # `routing_crossbar`. - for i in range(num_tile_inports): - s.recv_data[i] //= s.tile_in_channel[i].recv - s.tile_in_channel[i].send //= s.routing_crossbar.recv_data[i] - - # Register banks are connected to the routing crossbar as additional - # inports, enabling reg -> outport DATA_MOV without occupying the FU. - for i in range(num_fu_inports): - s.register_cluster.send_data_to_routing_crossbar[i] //= \ - s.routing_crossbar.recv_data[num_tile_inports + i] - - # Connects specific xbar control signals to the corresponding crossbar. - for i in range(num_routing_xbar_outports): - s.routing_crossbar.crossbar_outport[i] //= \ - s.ctrl_mem.send_ctrl.msg.routing_xbar_outport[i] - s.fu_crossbar.crossbar_outport[i] //= \ - s.ctrl_mem.send_ctrl.msg.fu_xbar_outport[i] - - # Connections on the `fu_crossbar`. - for i in range(num_fu_outports): - s.element.send_out[i] //= s.fu_crossbar.recv_data[i] - - # The data going out to the other tiles should be from the - # `routing_crossbar`. Note that there are also data being fed into - # the FUs via the `routing_crossbar`, which are filtered out by - # `num_tile_outports` below. In addition, we "or" the outports of - # the FUs (via `fu_crossbar`) with the outports of the - # `routing_crossbar` through the corresponding channels. - for i in range(num_tile_outports): - s.fu_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_fu - s.routing_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_xbar - s.tile_out_or_link[i].send //= s.send_data[i] - - # Crossbars outputs are integrated with the "register_cluster". - # Whether the required operands for FU are from the "routing_crossbar" - # or from the "register_cluster" depends on the control signals. - for i in range(num_fu_inports): - s.routing_crossbar.send_data[num_tile_outports + i] //= \ - s.register_cluster.recv_data_from_routing_crossbar[i] - s.fu_crossbar.send_data[num_tile_outports + i] //= \ - s.register_cluster.recv_data_from_fu_crossbar[i] - - s.register_cluster.recv_data_from_const[i].msg //= DataType() - s.register_cluster.recv_data_from_const[i].val //= 0 - - s.register_cluster.send_data_to_fu[i] //= \ - s.element.recv_in[i] - s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg - - # Clear ports are only useful during context switching. - # We connect to 0 to make sure they have drivers. - for i in range(len(FuList)): - s.element.clear[i] //= 0 - s.fu_crossbar.clear //= 0 - s.routing_crossbar.clear //= 0 - - @update - def feed_pkt(): - s.ctrl_mem.recv_pkt_from_controller.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) - s.ctrl_mem.recv_pkt_from_controller.val @= 0 - s.const_mem.recv_const.val @= 0 - s.recv_from_controller_pkt.rdy @= 0 - - if s.recv_from_controller_pkt.val & \ - ((s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_LAUNCH) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_LOOP_LOWER) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_LOOP_UPPER) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_LOOP_STEP)): - s.ctrl_mem.recv_pkt_from_controller.val @= 1 - s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg - s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy - elif s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONST): - s.const_mem.recv_const.val @= 1 - s.const_mem.recv_const.msg @= s.recv_from_controller_pkt.msg.payload.data - s.recv_from_controller_pkt.rdy @= s.const_mem.recv_const.rdy - - @update - def update_send_out_signal(): - s.send_to_controller_pkt.val @= 0 - s.send_to_controller_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - if s.ctrl_mem.send_pkt_to_controller.val: - s.send_to_controller_pkt.val @= 1 - s.send_to_controller_pkt.msg @= s.ctrl_mem.send_pkt_to_controller.msg - s.ctrl_mem.send_pkt_to_controller.rdy @= s.send_to_controller_pkt.rdy - - # Updates the configuration memory related signals. - @update - def update_opt(): - s.element.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - s.routing_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - s.fu_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - - # FIXME: Do we still need separate element and routing_xbar? - # FIXME: Do we need to consider reg bank here? - s.element.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.element_done - s.routing_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.routing_crossbar_done - s.fu_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.fu_crossbar_done - - # FIXME: yo96, rename ctrl.rdy to ctrl.proceed or sth similar. - # Allows either the FU-related go out first or routing-xbar go out first. And only - # allows the ctrl signal proceed till all the sub-modules done their own job (once). - s.ctrl_mem.send_ctrl.rdy @= (s.element.recv_opt.rdy | s.element_done) & \ - (s.routing_crossbar.recv_opt.rdy | s.routing_crossbar_done) & \ - (s.fu_crossbar.recv_opt.rdy | s.fu_crossbar_done) - - # TODO: https://github.com/tancheng/VectorCGRA/issues/127 - @update - def notify_const_mem(): - s.const_mem.ctrl_proceed @= s.ctrl_mem.send_ctrl.rdy & s.ctrl_mem.send_ctrl.val - - # Updates the signals indicating whether certain modules already done their jobs. - @update_ff - def already_done(): - if s.reset | s.ctrl_mem.send_ctrl.rdy: - s.element_done <<= 0 - s.fu_crossbar_done <<= 0 - s.routing_crossbar_done <<= 0 - else: - if s.element.recv_opt.rdy: - s.element_done <<= 1 - if s.fu_crossbar.recv_opt.rdy: - s.fu_crossbar_done <<= 1 - if s.routing_crossbar.recv_opt.rdy: - s.routing_crossbar_done <<= 1 - - @update - def notify_crossbars_compute_status(): - s.routing_crossbar.compute_done @= s.element_done - s.fu_crossbar.compute_done @= s.element_done - - # Line trace - def line_trace(s): - recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) - send_str = "|".join([str(x.msg) for x in s.send_data]) - tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) - tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) - tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) - out_str = "|".join(["(" + str(x.msg.payload) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) - ctrl_mem = s.ctrl_mem.line_trace() - const_mem = s.const_mem.line_trace() - return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " - +""" +========================================================================= +TileSeparateCrossbarRTL.py +========================================================================= +The tile contains a list of functional units, a configuration memory, a +set of registers (e.g., channels), and two crossbars. One crossbar is for +routing the data to registers (i.e., the channels before FU and the +channels after the crossbar), and the other one is for passing the to the +next crossbar. + +Detailed in: https://github.com/tancheng/VectorCGRA/issues/13 (Option 2). + +Author : Cheng Tan + Date : Nov 26, 2024 +""" + +from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ..fu.single.AdderRTL import AdderRTL +from ..fu.single.GrantRTL import GrantRTL +from ..fu.single.CompRTL import CompRTL +from ..fu.single.MemUnitRTL import MemUnitRTL +from ..fu.single.MulRTL import MulRTL +from ..fu.single.PhiRTL import PhiRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.cmd_type import * +from ..lib.util.common import * +from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL +from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL +from ..noc.CrossbarRTL import CrossbarRTL +from ..noc.LinkOrRTL import LinkOrRTL +from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL +from ..rf.RegisterRTL import RegisterRTL +from ..lib.util.data_struct_attr import * + + +class TileRTL(Component): + + def construct(s, IntraCgraPktType, + ctrl_mem_size, data_mem_size, num_ctrl, + total_steps, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, num_cgras, num_tiles, + num_registers_per_reg_bank = 16, + Fu = FlexibleFuRTL, + FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, GrantRTL, MemUnitRTL]): + + # Derives types from IntraCgraPktType. + CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) + CtrlPktType = IntraCgraPktType + DataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = DataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_bitwidth = DataType.get_field_type(kAttrPayload).nbits + + # Constants. + num_routing_xbar_inports = num_tile_inports + num_fu_inports + num_routing_xbar_outports = num_fu_inports + num_tile_outports + + num_fu_xbar_inports = num_fu_outports + num_fu_xbar_outports = num_fu_inports + num_tile_outports + + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size)) + + # Interfaces. + s.recv_data = [RecvIfcRTL(DataType) + for _ in range (num_tile_inports)] + s.send_data = [SendIfcRTL(DataType) + for _ in range (num_tile_outports)] + + # Ctrl. + s.recv_from_controller_pkt = RecvIfcRTL(CtrlPktType) + # Sends the ctrl packets to ctrl ring. + s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) + + # Data. + s.to_mem_raddr = SendIfcRTL(DataAddrType) + s.from_mem_rdata = RecvIfcRTL(DataType) + s.to_mem_waddr = SendIfcRTL(DataAddrType) + s.to_mem_wdata = SendIfcRTL(DataType) + + # Components. + s.element = FlexibleFuRTL(CtrlPktType, num_fu_inports, + num_fu_outports, num_tiles, FuList) + s.const_mem = ConstQueueDynamicRTL(DataType, ctrl_mem_size) + s.routing_crossbar = CrossbarRTL(DataType, + CtrlSignalType, + num_routing_xbar_inports, + num_routing_xbar_outports, + num_cgras, + num_tiles, + ctrl_mem_size, + num_tile_outports) + s.fu_crossbar = CrossbarRTL(DataType, + CtrlSignalType, + num_fu_xbar_inports, + num_fu_xbar_outports, + num_cgras, + num_tiles, + ctrl_mem_size, + num_tile_outports) + s.register_cluster = \ + RegisterClusterRTL(DataType, CtrlSignalType, num_fu_inports, + num_registers_per_reg_bank) + s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, + ctrl_mem_size, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_cgras, + num_tiles, + num_ctrl, + total_steps) + + # The `tile_in_channel` indicates the outport channels that are + # connected to the next tiles. + s.tile_in_channel = [ChannelRTL(DataType, latency = 1) + for _ in range(num_tile_inports)] + + # The `tile_out_or_link` would "or" the outports of the + # `tile_out_channel` and the FUs. + s.tile_out_or_link = [LinkOrRTL(DataType) + for _ in range(num_tile_outports)] + + # Signals indicating whether certain modules already done their jobs. + s.element_done = Wire(1) + s.fu_crossbar_done = Wire(1) + s.routing_crossbar_done = Wire(1) + + s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) + s.tile_id = InPort(mk_bits(clog2(num_tiles + 1))) + + # Propagates tile id. + s.element.tile_id //= s.tile_id + s.ctrl_mem.cgra_id //= s.cgra_id + s.ctrl_mem.tile_id //= s.tile_id + s.fu_crossbar.cgra_id //= s.cgra_id + s.fu_crossbar.tile_id //= s.tile_id + s.routing_crossbar.cgra_id //= s.cgra_id + s.routing_crossbar.tile_id //= s.tile_id + + # Assigns crossbar id. + s.routing_crossbar.crossbar_id //= PORT_INDEX_ROUTING_CROSSBAR + s.fu_crossbar.crossbar_id //= PORT_INDEX_FU_CROSSBAR + + # Constant queue. + s.element.recv_const //= s.const_mem.send_const + + # Fu data <-> ctrl memory (eventually towards/from CPU via controller). + s.element.send_to_ctrl_mem //= s.ctrl_mem.recv_from_element + s.element.recv_from_ctrl_mem //= s.ctrl_mem.send_to_element + + # Ctrl address port. + s.routing_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + s.fu_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + s.element.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + + # Prologue port. + s.element.prologue_count_inport //= s.ctrl_mem.prologue_count_outport_fu + for addr in range(ctrl_mem_size): + for i in range(num_routing_xbar_inports): + s.routing_crossbar.prologue_count_inport[addr][i] //= \ + s.ctrl_mem.prologue_count_outport_routing_crossbar[addr][i] + for i in range(num_fu_xbar_inports): + s.fu_crossbar.prologue_count_inport[addr][i] //= \ + s.ctrl_mem.prologue_count_outport_fu_crossbar[addr][i] + + for i in range(len(FuList)): + if FuList[i] == MemUnitRTL: + s.to_mem_raddr //= s.element.to_mem_raddr[i] + s.from_mem_rdata //= s.element.from_mem_rdata[i] + s.to_mem_waddr //= s.element.to_mem_waddr[i] + s.to_mem_wdata //= s.element.to_mem_wdata[i] + else: + s.element.to_mem_raddr[i].rdy //= 0 + s.element.from_mem_rdata[i].val //= 0 + s.element.from_mem_rdata[i].msg //= DataType() + s.element.to_mem_waddr[i].rdy //= 0 + s.element.to_mem_wdata[i].rdy //= 0 + + # Connections on the `routing_crossbar`. + # The data from other tiles should be connected to the + # `routing_crossbar`. + for i in range(num_tile_inports): + s.recv_data[i] //= s.tile_in_channel[i].recv + s.tile_in_channel[i].send //= s.routing_crossbar.recv_data[i] + + # Register banks are connected to the routing crossbar as additional + # inports, enabling reg -> outport DATA_MOV without occupying the FU. + for i in range(num_fu_inports): + s.register_cluster.send_data_to_routing_crossbar[i] //= \ + s.routing_crossbar.recv_data[num_tile_inports + i] + + # Connects specific xbar control signals to the corresponding crossbar. + for i in range(num_routing_xbar_outports): + s.routing_crossbar.crossbar_outport[i] //= \ + s.ctrl_mem.send_ctrl.msg.routing_xbar_outport[i] + s.fu_crossbar.crossbar_outport[i] //= \ + s.ctrl_mem.send_ctrl.msg.fu_xbar_outport[i] + + # Connections on the `fu_crossbar`. + for i in range(num_fu_outports): + s.element.send_out[i] //= s.fu_crossbar.recv_data[i] + + # The data going out to the other tiles should be from the + # `routing_crossbar`. Note that there are also data being fed into + # the FUs via the `routing_crossbar`, which are filtered out by + # `num_tile_outports` below. In addition, we "or" the outports of + # the FUs (via `fu_crossbar`) with the outports of the + # `routing_crossbar` through the corresponding channels. + for i in range(num_tile_outports): + s.fu_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_fu + s.routing_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_xbar + s.tile_out_or_link[i].send //= s.send_data[i] + + # Crossbars outputs are integrated with the "register_cluster". + # Whether the required operands for FU are from the "routing_crossbar" + # or from the "register_cluster" depends on the control signals. + for i in range(num_fu_inports): + s.routing_crossbar.send_data[num_tile_outports + i] //= \ + s.register_cluster.recv_data_from_routing_crossbar[i] + s.fu_crossbar.send_data[num_tile_outports + i] //= \ + s.register_cluster.recv_data_from_fu_crossbar[i] + + s.register_cluster.recv_data_from_const[i].msg //= DataType() + s.register_cluster.recv_data_from_const[i].val //= 0 + + s.register_cluster.send_data_to_fu[i] //= \ + s.element.recv_in[i] + s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg + + # Clear ports are only useful during context switching. + # We connect to 0 to make sure they have drivers. + for i in range(len(FuList)): + s.element.clear[i] //= 0 + s.fu_crossbar.clear //= 0 + s.routing_crossbar.clear //= 0 + + @update + def feed_pkt(): + s.ctrl_mem.recv_pkt_from_controller.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) + s.ctrl_mem.recv_pkt_from_controller.val @= 0 + s.const_mem.recv_const.val @= 0 + s.recv_from_controller_pkt.rdy @= 0 + + if s.recv_from_controller_pkt.val & \ + ((s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_LAUNCH) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_LOOP_LOWER) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_LOOP_UPPER) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_LOOP_STEP)): + s.ctrl_mem.recv_pkt_from_controller.val @= 1 + s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg + s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy + elif s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONST): + s.const_mem.recv_const.val @= 1 + s.const_mem.recv_const.msg @= s.recv_from_controller_pkt.msg.payload.data + s.recv_from_controller_pkt.rdy @= s.const_mem.recv_const.rdy + + @update + def update_send_out_signal(): + s.send_to_controller_pkt.val @= 0 + s.send_to_controller_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + if s.ctrl_mem.send_pkt_to_controller.val: + s.send_to_controller_pkt.val @= 1 + s.send_to_controller_pkt.msg @= s.ctrl_mem.send_pkt_to_controller.msg + s.ctrl_mem.send_pkt_to_controller.rdy @= s.send_to_controller_pkt.rdy + + # Updates the configuration memory related signals. + @update + def update_opt(): + s.element.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + s.routing_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + s.fu_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + + # FIXME: Do we still need separate element and routing_xbar? + # FIXME: Do we need to consider reg bank here? + s.element.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.element_done + s.routing_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.routing_crossbar_done + s.fu_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.fu_crossbar_done + + # FIXME: yo96, rename ctrl.rdy to ctrl.proceed or sth similar. + # Allows either the FU-related go out first or routing-xbar go out first. And only + # allows the ctrl signal proceed till all the sub-modules done their own job (once). + s.ctrl_mem.send_ctrl.rdy @= (s.element.recv_opt.rdy | s.element_done) & \ + (s.routing_crossbar.recv_opt.rdy | s.routing_crossbar_done) & \ + (s.fu_crossbar.recv_opt.rdy | s.fu_crossbar_done) + + # TODO: https://github.com/tancheng/VectorCGRA/issues/127 + @update + def notify_const_mem(): + s.const_mem.ctrl_proceed @= s.ctrl_mem.send_ctrl.rdy & s.ctrl_mem.send_ctrl.val + + # Updates the signals indicating whether certain modules already done their jobs. + @update_ff + def already_done(): + if s.reset | s.ctrl_mem.send_ctrl.rdy: + s.element_done <<= 0 + s.fu_crossbar_done <<= 0 + s.routing_crossbar_done <<= 0 + else: + if s.element.recv_opt.rdy: + s.element_done <<= 1 + if s.fu_crossbar.recv_opt.rdy: + s.fu_crossbar_done <<= 1 + if s.routing_crossbar.recv_opt.rdy: + s.routing_crossbar_done <<= 1 + + @update + def notify_crossbars_compute_status(): + s.routing_crossbar.compute_done @= s.element_done + s.fu_crossbar.compute_done @= s.element_done + + # Line trace + def line_trace(s): + recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) + send_str = "|".join([str(x.msg) for x in s.send_data]) + tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) + tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) + tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) + out_str = "|".join(["(" + str(x.msg.payload) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) + ctrl_mem = s.ctrl_mem.line_trace() + const_mem = s.const_mem.line_trace() + return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " + diff --git a/tile/TileWithContextSwitchRTL.py b/tile/TileWithContextSwitchRTL.py index ad46171d..284fe62f 100644 --- a/tile/TileWithContextSwitchRTL.py +++ b/tile/TileWithContextSwitchRTL.py @@ -1,380 +1,380 @@ -""" -========================================================================= -TileWithContextSwitchRTL.py -========================================================================= -Integrates tile with the context switch module and clearable channels - -Author : Yufei Yang - Date : Sep 24, 2025 -""" - -from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ..fu.single.AdderRTL import AdderRTL -from ..fu.single.GrantRTL import GrantRTL -from ..fu.single.CompRTL import CompRTL -from ..fu.single.MemUnitRTL import MemUnitRTL -from ..fu.single.MulRTL import MulRTL -from ..fu.single.PhiRTL import PhiRTL -from ..fu.single.RetRTL import RetRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.cmd_type import * -from ..lib.util.common import * -from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL -from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL -from ..mem.ctrl.ContextSwitchRTL import ContextSwitchRTL -from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL -from ..noc.CrossbarRTL import CrossbarRTL -from ..noc.LinkOrRTL import LinkOrRTL -from ..noc.ChannelWithClearRTL import ChannelWithClearRTL -from ..rf.RegisterRTL import RegisterRTL -from ..lib.util.data_struct_attr import * - - -class TileWithContextSwitchRTL(Component): - - def construct(s, IntraCgraPktType, - ctrl_mem_size, data_mem_size, num_ctrl, - total_steps, num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports, num_cgras, num_tiles, - num_registers_per_reg_bank = 16, - Fu = FlexibleFuRTL, - FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, GrantRTL, MemUnitRTL]): - - # Derives types from CgraPayloadType. - CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) - CtrlPktType = IntraCgraPktType - DataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = DataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_bitwidth = DataType.get_field_type(kAttrPayload).nbits - - # Constants. - num_routing_xbar_inports = num_tile_inports + num_fu_inports - num_routing_xbar_outports = num_fu_inports + num_tile_outports - - num_fu_xbar_inports = num_fu_outports - num_fu_xbar_outports = num_fu_inports + num_tile_outports - - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size)) - - # Interfaces. - s.recv_data = [RecvIfcRTL(DataType) - for _ in range (num_tile_inports)] - s.send_data = [SendIfcRTL(DataType) - for _ in range (num_tile_outports)] - - # Ctrl. - s.recv_from_controller_pkt = RecvIfcRTL(CtrlPktType) - # Sends the ctrl packets to ctrl ring. - s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) - - # Data. - s.to_mem_raddr = SendIfcRTL(DataAddrType) - s.from_mem_rdata = RecvIfcRTL(DataType) - s.to_mem_waddr = SendIfcRTL(DataAddrType) - s.to_mem_wdata = SendIfcRTL(DataType) - - # Components. - s.element = FlexibleFuRTL(CtrlPktType, num_fu_inports, num_fu_outports, - num_tiles, FuList) - # We use many CMD_CONST to simulate runtime commands in TileWithContextSwitchRTL_test, - # so here we increase the size of const_mem to avoid deadlock. - s.const_mem = ConstQueueDynamicRTL(DataType, ctrl_mem_size+10) - s.routing_crossbar = CrossbarRTL(DataType, - CtrlSignalType, - num_routing_xbar_inports, - num_routing_xbar_outports, - num_cgras, - num_tiles, - ctrl_mem_size, - num_tile_outports) - s.fu_crossbar = CrossbarRTL(DataType, - CtrlSignalType, - num_fu_xbar_inports, - num_fu_xbar_outports, - num_cgras, - num_tiles, - ctrl_mem_size, - num_tile_outports) - s.register_cluster = \ - RegisterClusterRTL(DataType, CtrlSignalType, num_fu_inports, - num_registers_per_reg_bank) - s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, - ctrl_mem_size, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports, - num_cgras, - num_tiles, - num_ctrl, - total_steps) - s.context_switch = ContextSwitchRTL(data_bitwidth, clog2(ctrl_mem_size)) - - # The `tile_in_channel` indicates the outport channels that are - # connected to the next tiles. - s.tile_in_channel = [ChannelWithClearRTL(DataType, latency = 1) - for _ in range(num_tile_inports)] - - # The `tile_out_or_link` would "or" the outports of the - # `tile_out_channel` and the FUs. - s.tile_out_or_link = [LinkOrRTL(DataType) - for _ in range(num_tile_outports)] - - # Signals indicating whether certain modules already done their jobs. - s.element_done = Wire(1) - s.fu_crossbar_done = Wire(1) - s.routing_crossbar_done = Wire(1) - - # Used for: - # Clearing the 'first' signal in PhiRTL to correctly resume the progress. - # Clearing the 'prologue_counter' signal in CrossbarRTL to correctly resume the progress. - s.clear = Wire(1) - - s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) - s.tile_id = InPort(mk_bits(clog2(num_tiles + 1))) - - # Propagates tile id. - s.element.tile_id //= s.tile_id - s.ctrl_mem.cgra_id //= s.cgra_id - s.ctrl_mem.tile_id //= s.tile_id - s.fu_crossbar.cgra_id //= s.cgra_id - s.fu_crossbar.tile_id //= s.tile_id - s.routing_crossbar.cgra_id //= s.cgra_id - s.routing_crossbar.tile_id //= s.tile_id - - # Assigns crossbar id. - s.routing_crossbar.crossbar_id //= PORT_INDEX_ROUTING_CROSSBAR - s.fu_crossbar.crossbar_id //= PORT_INDEX_FU_CROSSBAR - - # Constant queue. - s.element.recv_const //= s.const_mem.send_const - - # Fu data <-> ctrl memory (eventually towards/from CPU via controller). - s.element.send_to_ctrl_mem //= s.ctrl_mem.recv_from_element - s.element.recv_from_ctrl_mem //= s.ctrl_mem.send_to_element - - # Ctrl address port. - s.routing_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - s.fu_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - - # Connects context switch module - s.context_switch.recv_cmd //= s.recv_from_controller_pkt.msg.payload.cmd - s.context_switch.recv_cmd_vld //= s.recv_from_controller_pkt.val - s.context_switch.recv_opt //= s.ctrl_mem.send_ctrl.msg.operation - s.context_switch.progress_in //= s.element.send_out[0].msg - s.context_switch.progress_in_val //= s.element.send_out[0].val - s.context_switch.phi_addr //= s.recv_from_controller_pkt.msg.payload.ctrl_addr - s.context_switch.ctrl_mem_rd_addr //= s.ctrl_mem.ctrl_addr_outport - - # Prologue port. - s.element.prologue_count_inport //= s.ctrl_mem.prologue_count_outport_fu - for addr in range(ctrl_mem_size): - for i in range(num_routing_xbar_inports): - s.routing_crossbar.prologue_count_inport[addr][i] //= \ - s.ctrl_mem.prologue_count_outport_routing_crossbar[addr][i] - for i in range(num_fu_xbar_inports): - s.fu_crossbar.prologue_count_inport[addr][i] //= \ - s.ctrl_mem.prologue_count_outport_fu_crossbar[addr][i] - - for i in range(len(FuList)): - if FuList[i] == MemUnitRTL: - s.to_mem_raddr //= s.element.to_mem_raddr[i] - s.from_mem_rdata //= s.element.from_mem_rdata[i] - s.to_mem_waddr //= s.element.to_mem_waddr[i] - s.to_mem_wdata //= s.element.to_mem_wdata[i] - else: - s.element.to_mem_raddr[i].rdy //= 0 - s.element.from_mem_rdata[i].val //= 0 - s.element.from_mem_rdata[i].msg //= DataType() - s.element.to_mem_waddr[i].rdy //= 0 - s.element.to_mem_wdata[i].rdy //= 0 - - # Feed clear signal to PhiRTL and CrossbarRTL to correctly resume the progress. - for i in range(len(FuList)): - if (FuList[i] == PhiRTL) | (FuList[i] == RetRTL): - s.element.clear[i] //= s.clear - else: - s.element.clear[i] //= 0 - s.fu_crossbar.clear //= s.clear - s.routing_crossbar.clear //= s.clear - s.const_mem.clear //= s.clear - - # Connections on the `routing_crossbar`. - # The data from other tiles should be connected to the - # `routing_crossbar`. - for i in range(num_tile_inports): - s.recv_data[i] //= s.tile_in_channel[i].recv - s.tile_in_channel[i].send //= s.routing_crossbar.recv_data[i] - - # Register banks are connected to the routing crossbar as additional - # inports (num_tile_inports .. num_tile_inports+num_fu_inports-1), - # enabling reg -> outport DATA_MOV without occupying the FU. - for i in range(num_fu_inports): - s.register_cluster.send_data_to_routing_crossbar[i] //= \ - s.routing_crossbar.recv_data[num_tile_inports + i] - - # Connects specific xbar control signals to the corresponding crossbar. - for i in range(num_routing_xbar_outports): - s.routing_crossbar.crossbar_outport[i] //= \ - s.ctrl_mem.send_ctrl.msg.routing_xbar_outport[i] - s.fu_crossbar.crossbar_outport[i] //= \ - s.ctrl_mem.send_ctrl.msg.fu_xbar_outport[i] - - # Connections on the `fu_crossbar`. - # Leaves the recv_data[0] to resume the progress. - for i in range(1, num_fu_outports): - s.element.send_out[i] //= s.fu_crossbar.recv_data[i] - - # The data going out to the other tiles should be from the - # `routing_crossbar`. Note that there are also data being fed into - # the FUs via the `routing_crossbar`, which are filtered out by - # `num_tile_outports` below. In addition, we "or" the outports of - # the FUs (via `fu_crossbar`) with the outports of the - # `routing_crossbar` through the corresponding channels. - for i in range(num_tile_outports): - s.fu_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_fu - s.routing_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_xbar - s.tile_out_or_link[i].send //= s.send_data[i] - - # Crossbars outputs are integrated with the "register_cluster". - # Whether the required operands for FU are from the "routing_crossbar" - # or from the "register_cluster" depends on the control signals. - for i in range(num_fu_inports): - s.routing_crossbar.send_data[num_tile_outports + i] //= \ - s.register_cluster.recv_data_from_routing_crossbar[i] - s.fu_crossbar.send_data[num_tile_outports + i] //= \ - s.register_cluster.recv_data_from_fu_crossbar[i] - - s.register_cluster.recv_data_from_const[i].msg //= DataType() - s.register_cluster.recv_data_from_const[i].val //= 0 - - s.register_cluster.send_data_to_fu[i] //= \ - s.element.recv_in[i] - s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg - - @update - def feed_pkt(): - s.ctrl_mem.recv_pkt_from_controller.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) - s.ctrl_mem.recv_pkt_from_controller.val @= 0 - s.const_mem.recv_const.val @= 0 - s.recv_from_controller_pkt.rdy @= 0 - - if s.recv_from_controller_pkt.val & \ - ((s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_CTRL_LOWER_BOUND) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_START_ADDR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_STRIDE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_END_ADDR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_RECORD_PHI_ADDR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_LAUNCH) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_PAUSE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_PRESERVE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_RESUME)): - s.ctrl_mem.recv_pkt_from_controller.val @= 1 - s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg - s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy - elif s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONST): - s.const_mem.recv_const.val @= 1 - s.const_mem.recv_const.msg @= s.recv_from_controller_pkt.msg.payload.data - s.recv_from_controller_pkt.rdy @= s.const_mem.recv_const.rdy - - if s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_TERMINATE): - s.ctrl_mem.recv_pkt_from_controller.val @= 1 - s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg - s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy - s.clear @= 1 - for i in range(num_tile_inports): - s.tile_in_channel[i].clear @= 1 - else: - s.clear @= 0 - for i in range(num_tile_inports): - s.tile_in_channel[i].clear @= 0 - - @update - def update_send_out_signal(): - s.send_to_controller_pkt.val @= 0 - s.send_to_controller_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - if s.ctrl_mem.send_pkt_to_controller.val: - s.send_to_controller_pkt.val @= 1 - s.send_to_controller_pkt.msg @= s.ctrl_mem.send_pkt_to_controller.msg - s.ctrl_mem.send_pkt_to_controller.rdy @= s.send_to_controller_pkt.rdy - - # Updates the configuration memory related signals. - @update - def update_opt(): - s.element.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - s.routing_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - s.fu_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - - # FIXME: Do we still need separate element and routing_xbar? - # FIXME: Do we need to consider reg bank here? - s.element.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.element_done - s.routing_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.routing_crossbar_done - s.fu_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.fu_crossbar_done - - # FIXME: yo96, rename ctrl.rdy to ctrl.proceed or sth similar. - # Allows either the FU-related go out first or routing-xbar go out first. And only - # allows the ctrl signal proceed till all the sub-modules done their own job (once). - s.ctrl_mem.send_ctrl.rdy @= (s.element.recv_opt.rdy | s.element_done) & \ - (s.routing_crossbar.recv_opt.rdy | s.routing_crossbar_done) & \ - (s.fu_crossbar.recv_opt.rdy | s.fu_crossbar_done) - - # TODO: https://github.com/tancheng/VectorCGRA/issues/127 - @update - def notify_const_mem(): - s.const_mem.ctrl_proceed @= s.ctrl_mem.send_ctrl.rdy & s.ctrl_mem.send_ctrl.val - - @update - def overwrite_fu_outport(): - s.element.send_out[0].rdy @= s.fu_crossbar.recv_data[0].rdy - if s.context_switch.overwrite_fu_outport.val == 1: - s.fu_crossbar.recv_data[0].val @= 1 - s.fu_crossbar.recv_data[0].msg @= s.context_switch.overwrite_fu_outport.msg - else: - s.fu_crossbar.recv_data[0].val @= s.element.send_out[0].val - s.fu_crossbar.recv_data[0].msg @= s.element.send_out[0].msg - - # Updates the signals indicating whether certain modules already done their jobs. - @update_ff - def already_done(): - if s.reset | s.ctrl_mem.send_ctrl.rdy | s.clear: - s.element_done <<= 0 - s.fu_crossbar_done <<= 0 - s.routing_crossbar_done <<= 0 - else: - if s.element.recv_opt.rdy: - s.element_done <<= 1 - if s.fu_crossbar.recv_opt.rdy: - s.fu_crossbar_done <<= 1 - if s.routing_crossbar.recv_opt.rdy: - s.routing_crossbar_done <<= 1 - - @update - def notify_crossbars_compute_status(): - s.routing_crossbar.compute_done @= s.element_done - s.fu_crossbar.compute_done @= s.element_done - - # Line trace - def line_trace(s): - recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) - send_str = "|".join([str(x.msg) for x in s.send_data]) - tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) - tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) - tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) - out_str = "|".join(["(" + str(x.msg.payload) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) - ctrl_mem = s.ctrl_mem.line_trace() - const_mem = s.const_mem.line_trace() - context_switch = s.context_switch.line_trace() - return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} || context_switch: {context_switch}## " - +""" +========================================================================= +TileWithContextSwitchRTL.py +========================================================================= +Integrates tile with the context switch module and clearable channels + +Author : Yufei Yang + Date : Sep 24, 2025 +""" + +from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ..fu.single.AdderRTL import AdderRTL +from ..fu.single.GrantRTL import GrantRTL +from ..fu.single.CompRTL import CompRTL +from ..fu.single.MemUnitRTL import MemUnitRTL +from ..fu.single.MulRTL import MulRTL +from ..fu.single.PhiRTL import PhiRTL +from ..fu.single.RetRTL import RetRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.cmd_type import * +from ..lib.util.common import * +from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL +from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ..mem.ctrl.ContextSwitchRTL import ContextSwitchRTL +from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL +from ..noc.CrossbarRTL import CrossbarRTL +from ..noc.LinkOrRTL import LinkOrRTL +from ..noc.ChannelWithClearRTL import ChannelWithClearRTL +from ..rf.RegisterRTL import RegisterRTL +from ..lib.util.data_struct_attr import * + + +class TileWithContextSwitchRTL(Component): + + def construct(s, IntraCgraPktType, + ctrl_mem_size, data_mem_size, num_ctrl, + total_steps, num_fu_inports, num_fu_outports, num_tile_inports, + num_tile_outports, num_cgras, num_tiles, + num_registers_per_reg_bank = 16, + Fu = FlexibleFuRTL, + FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, GrantRTL, MemUnitRTL]): + + # Derives types from CgraPayloadType. + CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) + CtrlPktType = IntraCgraPktType + DataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = DataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_bitwidth = DataType.get_field_type(kAttrPayload).nbits + + # Constants. + num_routing_xbar_inports = num_tile_inports + num_fu_inports + num_routing_xbar_outports = num_fu_inports + num_tile_outports + + num_fu_xbar_inports = num_fu_outports + num_fu_xbar_outports = num_fu_inports + num_tile_outports + + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size)) + + # Interfaces. + s.recv_data = [RecvIfcRTL(DataType) + for _ in range (num_tile_inports)] + s.send_data = [SendIfcRTL(DataType) + for _ in range (num_tile_outports)] + + # Ctrl. + s.recv_from_controller_pkt = RecvIfcRTL(CtrlPktType) + # Sends the ctrl packets to ctrl ring. + s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) + + # Data. + s.to_mem_raddr = SendIfcRTL(DataAddrType) + s.from_mem_rdata = RecvIfcRTL(DataType) + s.to_mem_waddr = SendIfcRTL(DataAddrType) + s.to_mem_wdata = SendIfcRTL(DataType) + + # Components. + s.element = FlexibleFuRTL(CtrlPktType, num_fu_inports, num_fu_outports, + num_tiles, FuList) + # We use many CMD_CONST to simulate runtime commands in TileWithContextSwitchRTL_test, + # so here we increase the size of const_mem to avoid deadlock. + s.const_mem = ConstQueueDynamicRTL(DataType, ctrl_mem_size+10) + s.routing_crossbar = CrossbarRTL(DataType, + CtrlSignalType, + num_routing_xbar_inports, + num_routing_xbar_outports, + num_cgras, + num_tiles, + ctrl_mem_size, + num_tile_outports) + s.fu_crossbar = CrossbarRTL(DataType, + CtrlSignalType, + num_fu_xbar_inports, + num_fu_xbar_outports, + num_cgras, + num_tiles, + ctrl_mem_size, + num_tile_outports) + s.register_cluster = \ + RegisterClusterRTL(DataType, CtrlSignalType, num_fu_inports, + num_registers_per_reg_bank) + s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, + ctrl_mem_size, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_cgras, + num_tiles, + num_ctrl, + total_steps) + s.context_switch = ContextSwitchRTL(data_bitwidth, clog2(ctrl_mem_size)) + + # The `tile_in_channel` indicates the outport channels that are + # connected to the next tiles. + s.tile_in_channel = [ChannelWithClearRTL(DataType, latency = 1) + for _ in range(num_tile_inports)] + + # The `tile_out_or_link` would "or" the outports of the + # `tile_out_channel` and the FUs. + s.tile_out_or_link = [LinkOrRTL(DataType) + for _ in range(num_tile_outports)] + + # Signals indicating whether certain modules already done their jobs. + s.element_done = Wire(1) + s.fu_crossbar_done = Wire(1) + s.routing_crossbar_done = Wire(1) + + # Used for: + # Clearing the 'first' signal in PhiRTL to correctly resume the progress. + # Clearing the 'prologue_counter' signal in CrossbarRTL to correctly resume the progress. + s.clear = Wire(1) + + s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) + s.tile_id = InPort(mk_bits(clog2(num_tiles + 1))) + + # Propagates tile id. + s.element.tile_id //= s.tile_id + s.ctrl_mem.cgra_id //= s.cgra_id + s.ctrl_mem.tile_id //= s.tile_id + s.fu_crossbar.cgra_id //= s.cgra_id + s.fu_crossbar.tile_id //= s.tile_id + s.routing_crossbar.cgra_id //= s.cgra_id + s.routing_crossbar.tile_id //= s.tile_id + + # Assigns crossbar id. + s.routing_crossbar.crossbar_id //= PORT_INDEX_ROUTING_CROSSBAR + s.fu_crossbar.crossbar_id //= PORT_INDEX_FU_CROSSBAR + + # Constant queue. + s.element.recv_const //= s.const_mem.send_const + + # Fu data <-> ctrl memory (eventually towards/from CPU via controller). + s.element.send_to_ctrl_mem //= s.ctrl_mem.recv_from_element + s.element.recv_from_ctrl_mem //= s.ctrl_mem.send_to_element + + # Ctrl address port. + s.routing_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + s.fu_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + + # Connects context switch module + s.context_switch.recv_cmd //= s.recv_from_controller_pkt.msg.payload.cmd + s.context_switch.recv_cmd_vld //= s.recv_from_controller_pkt.val + s.context_switch.recv_opt //= s.ctrl_mem.send_ctrl.msg.operation + s.context_switch.progress_in //= s.element.send_out[0].msg + s.context_switch.progress_in_val //= s.element.send_out[0].val + s.context_switch.phi_addr //= s.recv_from_controller_pkt.msg.payload.ctrl_addr + s.context_switch.ctrl_mem_rd_addr //= s.ctrl_mem.ctrl_addr_outport + + # Prologue port. + s.element.prologue_count_inport //= s.ctrl_mem.prologue_count_outport_fu + for addr in range(ctrl_mem_size): + for i in range(num_routing_xbar_inports): + s.routing_crossbar.prologue_count_inport[addr][i] //= \ + s.ctrl_mem.prologue_count_outport_routing_crossbar[addr][i] + for i in range(num_fu_xbar_inports): + s.fu_crossbar.prologue_count_inport[addr][i] //= \ + s.ctrl_mem.prologue_count_outport_fu_crossbar[addr][i] + + for i in range(len(FuList)): + if FuList[i] == MemUnitRTL: + s.to_mem_raddr //= s.element.to_mem_raddr[i] + s.from_mem_rdata //= s.element.from_mem_rdata[i] + s.to_mem_waddr //= s.element.to_mem_waddr[i] + s.to_mem_wdata //= s.element.to_mem_wdata[i] + else: + s.element.to_mem_raddr[i].rdy //= 0 + s.element.from_mem_rdata[i].val //= 0 + s.element.from_mem_rdata[i].msg //= DataType() + s.element.to_mem_waddr[i].rdy //= 0 + s.element.to_mem_wdata[i].rdy //= 0 + + # Feed clear signal to PhiRTL and CrossbarRTL to correctly resume the progress. + for i in range(len(FuList)): + if (FuList[i] == PhiRTL) | (FuList[i] == RetRTL): + s.element.clear[i] //= s.clear + else: + s.element.clear[i] //= 0 + s.fu_crossbar.clear //= s.clear + s.routing_crossbar.clear //= s.clear + s.const_mem.clear //= s.clear + + # Connections on the `routing_crossbar`. + # The data from other tiles should be connected to the + # `routing_crossbar`. + for i in range(num_tile_inports): + s.recv_data[i] //= s.tile_in_channel[i].recv + s.tile_in_channel[i].send //= s.routing_crossbar.recv_data[i] + + # Register banks are connected to the routing crossbar as additional + # inports (num_tile_inports .. num_tile_inports+num_fu_inports-1), + # enabling reg -> outport DATA_MOV without occupying the FU. + for i in range(num_fu_inports): + s.register_cluster.send_data_to_routing_crossbar[i] //= \ + s.routing_crossbar.recv_data[num_tile_inports + i] + + # Connects specific xbar control signals to the corresponding crossbar. + for i in range(num_routing_xbar_outports): + s.routing_crossbar.crossbar_outport[i] //= \ + s.ctrl_mem.send_ctrl.msg.routing_xbar_outport[i] + s.fu_crossbar.crossbar_outport[i] //= \ + s.ctrl_mem.send_ctrl.msg.fu_xbar_outport[i] + + # Connections on the `fu_crossbar`. + # Leaves the recv_data[0] to resume the progress. + for i in range(1, num_fu_outports): + s.element.send_out[i] //= s.fu_crossbar.recv_data[i] + + # The data going out to the other tiles should be from the + # `routing_crossbar`. Note that there are also data being fed into + # the FUs via the `routing_crossbar`, which are filtered out by + # `num_tile_outports` below. In addition, we "or" the outports of + # the FUs (via `fu_crossbar`) with the outports of the + # `routing_crossbar` through the corresponding channels. + for i in range(num_tile_outports): + s.fu_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_fu + s.routing_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_xbar + s.tile_out_or_link[i].send //= s.send_data[i] + + # Crossbars outputs are integrated with the "register_cluster". + # Whether the required operands for FU are from the "routing_crossbar" + # or from the "register_cluster" depends on the control signals. + for i in range(num_fu_inports): + s.routing_crossbar.send_data[num_tile_outports + i] //= \ + s.register_cluster.recv_data_from_routing_crossbar[i] + s.fu_crossbar.send_data[num_tile_outports + i] //= \ + s.register_cluster.recv_data_from_fu_crossbar[i] + + s.register_cluster.recv_data_from_const[i].msg //= DataType() + s.register_cluster.recv_data_from_const[i].val //= 0 + + s.register_cluster.send_data_to_fu[i] //= \ + s.element.recv_in[i] + s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg + + @update + def feed_pkt(): + s.ctrl_mem.recv_pkt_from_controller.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) + s.ctrl_mem.recv_pkt_from_controller.val @= 0 + s.const_mem.recv_const.val @= 0 + s.recv_from_controller_pkt.rdy @= 0 + + if s.recv_from_controller_pkt.val & \ + ((s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_CTRL_LOWER_BOUND) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_START_ADDR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_STRIDE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_END_ADDR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_RECORD_PHI_ADDR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_LAUNCH) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_PAUSE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_PRESERVE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_RESUME)): + s.ctrl_mem.recv_pkt_from_controller.val @= 1 + s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg + s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy + elif s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONST): + s.const_mem.recv_const.val @= 1 + s.const_mem.recv_const.msg @= s.recv_from_controller_pkt.msg.payload.data + s.recv_from_controller_pkt.rdy @= s.const_mem.recv_const.rdy + + if s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_TERMINATE): + s.ctrl_mem.recv_pkt_from_controller.val @= 1 + s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg + s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy + s.clear @= 1 + for i in range(num_tile_inports): + s.tile_in_channel[i].clear @= 1 + else: + s.clear @= 0 + for i in range(num_tile_inports): + s.tile_in_channel[i].clear @= 0 + + @update + def update_send_out_signal(): + s.send_to_controller_pkt.val @= 0 + s.send_to_controller_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + if s.ctrl_mem.send_pkt_to_controller.val: + s.send_to_controller_pkt.val @= 1 + s.send_to_controller_pkt.msg @= s.ctrl_mem.send_pkt_to_controller.msg + s.ctrl_mem.send_pkt_to_controller.rdy @= s.send_to_controller_pkt.rdy + + # Updates the configuration memory related signals. + @update + def update_opt(): + s.element.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + s.routing_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + s.fu_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + + # FIXME: Do we still need separate element and routing_xbar? + # FIXME: Do we need to consider reg bank here? + s.element.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.element_done + s.routing_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.routing_crossbar_done + s.fu_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.fu_crossbar_done + + # FIXME: yo96, rename ctrl.rdy to ctrl.proceed or sth similar. + # Allows either the FU-related go out first or routing-xbar go out first. And only + # allows the ctrl signal proceed till all the sub-modules done their own job (once). + s.ctrl_mem.send_ctrl.rdy @= (s.element.recv_opt.rdy | s.element_done) & \ + (s.routing_crossbar.recv_opt.rdy | s.routing_crossbar_done) & \ + (s.fu_crossbar.recv_opt.rdy | s.fu_crossbar_done) + + # TODO: https://github.com/tancheng/VectorCGRA/issues/127 + @update + def notify_const_mem(): + s.const_mem.ctrl_proceed @= s.ctrl_mem.send_ctrl.rdy & s.ctrl_mem.send_ctrl.val + + @update + def overwrite_fu_outport(): + s.element.send_out[0].rdy @= s.fu_crossbar.recv_data[0].rdy + if s.context_switch.overwrite_fu_outport.val == 1: + s.fu_crossbar.recv_data[0].val @= 1 + s.fu_crossbar.recv_data[0].msg @= s.context_switch.overwrite_fu_outport.msg + else: + s.fu_crossbar.recv_data[0].val @= s.element.send_out[0].val + s.fu_crossbar.recv_data[0].msg @= s.element.send_out[0].msg + + # Updates the signals indicating whether certain modules already done their jobs. + @update_ff + def already_done(): + if s.reset | s.ctrl_mem.send_ctrl.rdy | s.clear: + s.element_done <<= 0 + s.fu_crossbar_done <<= 0 + s.routing_crossbar_done <<= 0 + else: + if s.element.recv_opt.rdy: + s.element_done <<= 1 + if s.fu_crossbar.recv_opt.rdy: + s.fu_crossbar_done <<= 1 + if s.routing_crossbar.recv_opt.rdy: + s.routing_crossbar_done <<= 1 + + @update + def notify_crossbars_compute_status(): + s.routing_crossbar.compute_done @= s.element_done + s.fu_crossbar.compute_done @= s.element_done + + # Line trace + def line_trace(s): + recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) + send_str = "|".join([str(x.msg) for x in s.send_data]) + tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) + tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) + tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) + out_str = "|".join(["(" + str(x.msg.payload) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) + ctrl_mem = s.ctrl_mem.line_trace() + const_mem = s.const_mem.line_trace() + context_switch = s.context_switch.line_trace() + return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} || context_switch: {context_switch}## " + diff --git a/tile/TileWithStreamingLoadRTL.py b/tile/TileWithStreamingLoadRTL.py index 9f9ebc5a..aeb3f97a 100644 --- a/tile/TileWithStreamingLoadRTL.py +++ b/tile/TileWithStreamingLoadRTL.py @@ -1,328 +1,328 @@ -""" -========================================================================= -TileWithStreamingLoadRTL.py -========================================================================= -Integrates tile with StreamimgMemUnit for streaming LD. - -Author : Yufei Yang - Date : Jan 21, 2026 -""" - -from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ..fu.single.AdderRTL import AdderRTL -from ..fu.single.GrantRTL import GrantRTL -from ..fu.single.CompRTL import CompRTL -from ..fu.single.StreamingMemUnitRTL import StreamingMemUnitRTL -from ..fu.single.MulRTL import MulRTL -from ..fu.single.PhiRTL import PhiRTL -from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL -from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..lib.cmd_type import * -from ..lib.util.common import * -from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL -from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL -from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL -from ..noc.CrossbarRTL import CrossbarRTL -from ..noc.LinkOrRTL import LinkOrRTL -from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL -from ..rf.RegisterRTL import RegisterRTL -from ..lib.util.data_struct_attr import * - -class TileWithStreamingLoadRTL(Component): - - def construct(s, IntraCgraPktType, - ctrl_mem_size, data_mem_size, num_ctrl, - total_steps, num_fu_inports, num_fu_outports, - num_tile_inports, num_tile_outports, num_cgras, num_tiles, - num_registers_per_reg_bank = 16, - Fu = FlexibleFuRTL, - FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, GrantRTL, StreamingMemUnitRTL]): - - # Derives types from IntraCgraPktType. - CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) - CtrlPktType = IntraCgraPktType - DataType = CgraPayloadType.get_field_type(kAttrData) - PredicateType = DataType.get_field_type(kAttrPredicate) - CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) - data_bitwidth = DataType.get_field_type(kAttrPayload).nbits - - # Constants. - num_routing_xbar_inports = num_tile_inports - num_routing_xbar_outports = num_fu_inports + num_tile_outports - - num_fu_xbar_inports = num_fu_outports - num_fu_xbar_outports = num_fu_inports + num_tile_outports - - CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - DataAddrType = mk_bits(clog2(data_mem_size)) - - # Interfaces. - s.recv_data = [RecvIfcRTL(DataType) - for _ in range (num_tile_inports)] - s.send_data = [SendIfcRTL(DataType) - for _ in range (num_tile_outports)] - - # Ctrl. - s.recv_from_controller_pkt = RecvIfcRTL(CtrlPktType) - # Sends the ctrl packets to ctrl ring. - s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) - - # Data. - s.to_mem_raddr = SendIfcRTL(DataAddrType) - s.from_mem_rdata = RecvIfcRTL(DataType) - s.to_mem_waddr = SendIfcRTL(DataAddrType) - s.to_mem_wdata = SendIfcRTL(DataType) - - # Components. - s.element = FlexibleFuRTL(CtrlPktType, num_fu_inports, num_fu_outports, - num_tiles, FuList) - s.const_mem = ConstQueueDynamicRTL(DataType, ctrl_mem_size) - s.routing_crossbar = CrossbarRTL(DataType, - CtrlSignalType, - num_routing_xbar_inports, - num_routing_xbar_outports, - num_cgras, - num_tiles, - ctrl_mem_size, - num_tile_outports) - s.fu_crossbar = CrossbarRTL(DataType, - CtrlSignalType, - num_fu_xbar_inports, - num_fu_xbar_outports, - num_cgras, - num_tiles, - ctrl_mem_size, - num_tile_outports) - s.register_cluster = \ - RegisterClusterRTL(DataType, CtrlSignalType, num_fu_inports, - num_registers_per_reg_bank) - s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, - ctrl_mem_size, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports, - num_cgras, - num_tiles, - num_ctrl, - total_steps) - - # The `tile_in_channel` indicates the outport channels that are - # connected to the next tiles. - s.tile_in_channel = [ChannelRTL(DataType, latency = 1) - for _ in range(num_tile_inports)] - - # The `tile_out_or_link` would "or" the outports of the - # `tile_out_channel` and the FUs. - s.tile_out_or_link = [LinkOrRTL(DataType) - for _ in range(num_tile_outports)] - - # Signals indicating whether certain modules already done their jobs. - s.element_done = Wire(1) - s.fu_crossbar_done = Wire(1) - s.routing_crossbar_done = Wire(1) - - s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) - s.tile_id = InPort(mk_bits(clog2(num_tiles + 1))) - - # Propagates tile id. - s.element.tile_id //= s.tile_id - s.ctrl_mem.cgra_id //= s.cgra_id - s.ctrl_mem.tile_id //= s.tile_id - s.fu_crossbar.cgra_id //= s.cgra_id - s.fu_crossbar.tile_id //= s.tile_id - s.routing_crossbar.cgra_id //= s.cgra_id - s.routing_crossbar.tile_id //= s.tile_id - - # Assigns crossbar id. - s.routing_crossbar.crossbar_id //= PORT_INDEX_ROUTING_CROSSBAR - s.fu_crossbar.crossbar_id //= PORT_INDEX_FU_CROSSBAR - - # Constant queue. - s.element.recv_const //= s.const_mem.send_const - - # Fu data <-> ctrl memory (eventually towards/from CPU via controller). - s.element.send_to_ctrl_mem //= s.ctrl_mem.recv_from_element - s.element.recv_from_ctrl_mem //= s.ctrl_mem.send_to_element - - # Ctrl address port. - s.routing_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - s.fu_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - s.element.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport - - # Prologue port. - s.element.prologue_count_inport //= s.ctrl_mem.prologue_count_outport_fu - for addr in range(ctrl_mem_size): - for i in range(num_routing_xbar_inports): - s.routing_crossbar.prologue_count_inport[addr][i] //= \ - s.ctrl_mem.prologue_count_outport_routing_crossbar[addr][i] - for i in range(num_fu_xbar_inports): - s.fu_crossbar.prologue_count_inport[addr][i] //= \ - s.ctrl_mem.prologue_count_outport_fu_crossbar[addr][i] - - for i in range(len(FuList)): - if FuList[i] == StreamingMemUnitRTL: - s.to_mem_raddr //= s.element.to_mem_raddr[i] - s.from_mem_rdata //= s.element.from_mem_rdata[i] - s.to_mem_waddr //= s.element.to_mem_waddr[i] - s.to_mem_wdata //= s.element.to_mem_wdata[i] - else: - s.element.to_mem_raddr[i].rdy //= 0 - s.element.from_mem_rdata[i].val //= 0 - s.element.from_mem_rdata[i].msg //= DataType() - s.element.to_mem_waddr[i].rdy //= 0 - s.element.to_mem_wdata[i].rdy //= 0 - - # Connections on the `routing_crossbar`. - # The data from other tiles should be connected to the - # `routing_crossbar`. - for i in range(num_tile_inports): - s.recv_data[i] //= s.tile_in_channel[i].recv - s.tile_in_channel[i].send //= s.routing_crossbar.recv_data[i] - - # Connects specific xbar control signals to the corresponding crossbar. - for i in range(num_routing_xbar_outports): - s.routing_crossbar.crossbar_outport[i] //= \ - s.ctrl_mem.send_ctrl.msg.routing_xbar_outport[i] - s.fu_crossbar.crossbar_outport[i] //= \ - s.ctrl_mem.send_ctrl.msg.fu_xbar_outport[i] - - # Connections on the `fu_crossbar`. - for i in range(num_fu_outports): - s.element.send_out[i] //= s.fu_crossbar.recv_data[i] - - # The data going out to the other tiles should be from the - # `routing_crossbar`. Note that there are also data being fed into - # the FUs via the `routing_crossbar`, which are filtered out by - # `num_tile_outports` below. In addition, we "or" the outports of - # the FUs (via `fu_crossbar`) with the outports of the - # `routing_crossbar` through the corresponding channels. - for i in range(num_tile_outports): - s.fu_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_fu - s.routing_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_xbar - s.tile_out_or_link[i].send //= s.send_data[i] - - # Crossbars outputs are integrated with the "register_cluster". - # Whether the required operands for FU are from the "routing_crossbar" - # or from the "register_cluster" depends on the control signals. - for i in range(num_fu_inports): - s.routing_crossbar.send_data[num_tile_outports + i] //= \ - s.register_cluster.recv_data_from_routing_crossbar[i] - s.fu_crossbar.send_data[num_tile_outports + i] //= \ - s.register_cluster.recv_data_from_fu_crossbar[i] - - s.register_cluster.recv_data_from_const[i].msg //= DataType() - s.register_cluster.recv_data_from_const[i].val //= 0 - - s.register_cluster.send_data_to_fu[i] //= \ - s.element.recv_in[i] - s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg - - # Clear ports are only useful during context switching. - # We connect to 0 to make sure they have drivers. - for i in range(len(FuList)): - s.element.clear[i] //= 0 - s.fu_crossbar.clear //= 0 - s.routing_crossbar.clear //= 0 - - @update - def feed_pkt(): - s.ctrl_mem.recv_pkt_from_controller.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) - s.ctrl_mem.recv_pkt_from_controller.val @= 0 - s.const_mem.recv_const.val @= 0 - s.recv_from_controller_pkt.rdy @= 0 - - if s.recv_from_controller_pkt.val & \ - ((s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_START_ADDR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_STRIDE) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_END_ADDR) | \ - (s.recv_from_controller_pkt.msg.payload.cmd == CMD_LAUNCH)): - s.ctrl_mem.recv_pkt_from_controller.val @= 1 - s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg - s.element.recv_pkt_from_controller.val @= 1 - s.element.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg - s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy | \ - s.element.recv_pkt_from_controller.rdy - elif s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONST): - s.const_mem.recv_const.val @= 1 - s.const_mem.recv_const.msg @= s.recv_from_controller_pkt.msg.payload.data - s.recv_from_controller_pkt.rdy @= s.const_mem.recv_const.rdy - - @update - def update_send_out_signal(): - s.send_to_controller_pkt.val @= 0 - s.send_to_controller_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - if s.ctrl_mem.send_pkt_to_controller.val: - s.send_to_controller_pkt.val @= 1 - s.send_to_controller_pkt.msg @= s.ctrl_mem.send_pkt_to_controller.msg - s.ctrl_mem.send_pkt_to_controller.rdy @= s.send_to_controller_pkt.rdy - - # Updates the configuration memory related signals. - @update - def update_opt(): - s.element.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - s.routing_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - s.fu_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg - - # FIXME: Do we still need separate element and routing_xbar? - # FIXME: Do we need to consider reg bank here? - s.element.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.element_done - s.routing_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.routing_crossbar_done - s.fu_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.fu_crossbar_done - - # FIXME: yo96, rename ctrl.rdy to ctrl.proceed or sth similar. - # Allows either the FU-related go out first or routing-xbar go out first. And only - # allows the ctrl signal proceed till all the sub-modules done their own job (once). - s.ctrl_mem.send_ctrl.rdy @= (s.element.recv_opt.rdy | s.element_done) & \ - (s.routing_crossbar.recv_opt.rdy | s.routing_crossbar_done) & \ - (s.fu_crossbar.recv_opt.rdy | s.fu_crossbar_done) - - # TODO: https://github.com/tancheng/VectorCGRA/issues/127 - @update - def notify_const_mem(): - s.const_mem.ctrl_proceed @= s.ctrl_mem.send_ctrl.rdy & s.ctrl_mem.send_ctrl.val - - # Updates the signals indicating whether certain modules already done their jobs. - @update_ff - def already_done(): - if s.reset | s.ctrl_mem.send_ctrl.rdy: - s.element_done <<= 0 - s.fu_crossbar_done <<= 0 - s.routing_crossbar_done <<= 0 - else: - # s.element_done keeps 0 during streaming LD. - if s.element.recv_opt.rdy: - s.element_done <<= 1 - if s.fu_crossbar.recv_opt.rdy: - # s.fu_crossbar_done should also be 0 during streaming LD. - s.fu_crossbar_done <<= (1 & s.element_done) - if s.routing_crossbar.recv_opt.rdy: - # s.routing_crossbar_done should also be 0 during streaming LD. - s.routing_crossbar_done <<= (1 & s.element_done) - - @update - def notify_crossbars_compute_status(): - s.routing_crossbar.compute_done @= s.element_done - s.fu_crossbar.compute_done @= s.element_done - - # Line trace - def line_trace(s): - recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) - send_str = "|".join([str(x.msg) for x in s.send_data]) - tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) - tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) - tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) - out_str = "|".join(["(" + str(x.msg.payload) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) - ctrl_mem = s.ctrl_mem.line_trace() - const_mem = s.const_mem.line_trace() - return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " - +""" +========================================================================= +TileWithStreamingLoadRTL.py +========================================================================= +Integrates tile with StreamimgMemUnit for streaming LD. + +Author : Yufei Yang + Date : Jan 21, 2026 +""" + +from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ..fu.single.AdderRTL import AdderRTL +from ..fu.single.GrantRTL import GrantRTL +from ..fu.single.CompRTL import CompRTL +from ..fu.single.StreamingMemUnitRTL import StreamingMemUnitRTL +from ..fu.single.MulRTL import MulRTL +from ..fu.single.PhiRTL import PhiRTL +from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL +from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL +from ..lib.cmd_type import * +from ..lib.util.common import * +from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL +from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL +from ..noc.CrossbarRTL import CrossbarRTL +from ..noc.LinkOrRTL import LinkOrRTL +from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL +from ..rf.RegisterRTL import RegisterRTL +from ..lib.util.data_struct_attr import * + +class TileWithStreamingLoadRTL(Component): + + def construct(s, IntraCgraPktType, + ctrl_mem_size, data_mem_size, num_ctrl, + total_steps, num_fu_inports, num_fu_outports, + num_tile_inports, num_tile_outports, num_cgras, num_tiles, + num_registers_per_reg_bank = 16, + Fu = FlexibleFuRTL, + FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, GrantRTL, StreamingMemUnitRTL]): + + # Derives types from IntraCgraPktType. + CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) + CtrlPktType = IntraCgraPktType + DataType = CgraPayloadType.get_field_type(kAttrData) + PredicateType = DataType.get_field_type(kAttrPredicate) + CtrlSignalType = CgraPayloadType.get_field_type(kAttrCtrl) + data_bitwidth = DataType.get_field_type(kAttrPayload).nbits + + # Constants. + num_routing_xbar_inports = num_tile_inports + num_routing_xbar_outports = num_fu_inports + num_tile_outports + + num_fu_xbar_inports = num_fu_outports + num_fu_xbar_outports = num_fu_inports + num_tile_outports + + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + DataAddrType = mk_bits(clog2(data_mem_size)) + + # Interfaces. + s.recv_data = [RecvIfcRTL(DataType) + for _ in range (num_tile_inports)] + s.send_data = [SendIfcRTL(DataType) + for _ in range (num_tile_outports)] + + # Ctrl. + s.recv_from_controller_pkt = RecvIfcRTL(CtrlPktType) + # Sends the ctrl packets to ctrl ring. + s.send_to_controller_pkt = SendIfcRTL(CtrlPktType) + + # Data. + s.to_mem_raddr = SendIfcRTL(DataAddrType) + s.from_mem_rdata = RecvIfcRTL(DataType) + s.to_mem_waddr = SendIfcRTL(DataAddrType) + s.to_mem_wdata = SendIfcRTL(DataType) + + # Components. + s.element = FlexibleFuRTL(CtrlPktType, num_fu_inports, num_fu_outports, + num_tiles, FuList) + s.const_mem = ConstQueueDynamicRTL(DataType, ctrl_mem_size) + s.routing_crossbar = CrossbarRTL(DataType, + CtrlSignalType, + num_routing_xbar_inports, + num_routing_xbar_outports, + num_cgras, + num_tiles, + ctrl_mem_size, + num_tile_outports) + s.fu_crossbar = CrossbarRTL(DataType, + CtrlSignalType, + num_fu_xbar_inports, + num_fu_xbar_outports, + num_cgras, + num_tiles, + ctrl_mem_size, + num_tile_outports) + s.register_cluster = \ + RegisterClusterRTL(DataType, CtrlSignalType, num_fu_inports, + num_registers_per_reg_bank) + s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, + ctrl_mem_size, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_cgras, + num_tiles, + num_ctrl, + total_steps) + + # The `tile_in_channel` indicates the outport channels that are + # connected to the next tiles. + s.tile_in_channel = [ChannelRTL(DataType, latency = 1) + for _ in range(num_tile_inports)] + + # The `tile_out_or_link` would "or" the outports of the + # `tile_out_channel` and the FUs. + s.tile_out_or_link = [LinkOrRTL(DataType) + for _ in range(num_tile_outports)] + + # Signals indicating whether certain modules already done their jobs. + s.element_done = Wire(1) + s.fu_crossbar_done = Wire(1) + s.routing_crossbar_done = Wire(1) + + s.cgra_id = InPort(mk_bits(max(1, clog2(num_cgras)))) + s.tile_id = InPort(mk_bits(clog2(num_tiles + 1))) + + # Propagates tile id. + s.element.tile_id //= s.tile_id + s.ctrl_mem.cgra_id //= s.cgra_id + s.ctrl_mem.tile_id //= s.tile_id + s.fu_crossbar.cgra_id //= s.cgra_id + s.fu_crossbar.tile_id //= s.tile_id + s.routing_crossbar.cgra_id //= s.cgra_id + s.routing_crossbar.tile_id //= s.tile_id + + # Assigns crossbar id. + s.routing_crossbar.crossbar_id //= PORT_INDEX_ROUTING_CROSSBAR + s.fu_crossbar.crossbar_id //= PORT_INDEX_FU_CROSSBAR + + # Constant queue. + s.element.recv_const //= s.const_mem.send_const + + # Fu data <-> ctrl memory (eventually towards/from CPU via controller). + s.element.send_to_ctrl_mem //= s.ctrl_mem.recv_from_element + s.element.recv_from_ctrl_mem //= s.ctrl_mem.send_to_element + + # Ctrl address port. + s.routing_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + s.fu_crossbar.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + s.element.ctrl_addr_inport //= s.ctrl_mem.ctrl_addr_outport + + # Prologue port. + s.element.prologue_count_inport //= s.ctrl_mem.prologue_count_outport_fu + for addr in range(ctrl_mem_size): + for i in range(num_routing_xbar_inports): + s.routing_crossbar.prologue_count_inport[addr][i] //= \ + s.ctrl_mem.prologue_count_outport_routing_crossbar[addr][i] + for i in range(num_fu_xbar_inports): + s.fu_crossbar.prologue_count_inport[addr][i] //= \ + s.ctrl_mem.prologue_count_outport_fu_crossbar[addr][i] + + for i in range(len(FuList)): + if FuList[i] == StreamingMemUnitRTL: + s.to_mem_raddr //= s.element.to_mem_raddr[i] + s.from_mem_rdata //= s.element.from_mem_rdata[i] + s.to_mem_waddr //= s.element.to_mem_waddr[i] + s.to_mem_wdata //= s.element.to_mem_wdata[i] + else: + s.element.to_mem_raddr[i].rdy //= 0 + s.element.from_mem_rdata[i].val //= 0 + s.element.from_mem_rdata[i].msg //= DataType() + s.element.to_mem_waddr[i].rdy //= 0 + s.element.to_mem_wdata[i].rdy //= 0 + + # Connections on the `routing_crossbar`. + # The data from other tiles should be connected to the + # `routing_crossbar`. + for i in range(num_tile_inports): + s.recv_data[i] //= s.tile_in_channel[i].recv + s.tile_in_channel[i].send //= s.routing_crossbar.recv_data[i] + + # Connects specific xbar control signals to the corresponding crossbar. + for i in range(num_routing_xbar_outports): + s.routing_crossbar.crossbar_outport[i] //= \ + s.ctrl_mem.send_ctrl.msg.routing_xbar_outport[i] + s.fu_crossbar.crossbar_outport[i] //= \ + s.ctrl_mem.send_ctrl.msg.fu_xbar_outport[i] + + # Connections on the `fu_crossbar`. + for i in range(num_fu_outports): + s.element.send_out[i] //= s.fu_crossbar.recv_data[i] + + # The data going out to the other tiles should be from the + # `routing_crossbar`. Note that there are also data being fed into + # the FUs via the `routing_crossbar`, which are filtered out by + # `num_tile_outports` below. In addition, we "or" the outports of + # the FUs (via `fu_crossbar`) with the outports of the + # `routing_crossbar` through the corresponding channels. + for i in range(num_tile_outports): + s.fu_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_fu + s.routing_crossbar.send_data[i] //= s.tile_out_or_link[i].recv_xbar + s.tile_out_or_link[i].send //= s.send_data[i] + + # Crossbars outputs are integrated with the "register_cluster". + # Whether the required operands for FU are from the "routing_crossbar" + # or from the "register_cluster" depends on the control signals. + for i in range(num_fu_inports): + s.routing_crossbar.send_data[num_tile_outports + i] //= \ + s.register_cluster.recv_data_from_routing_crossbar[i] + s.fu_crossbar.send_data[num_tile_outports + i] //= \ + s.register_cluster.recv_data_from_fu_crossbar[i] + + s.register_cluster.recv_data_from_const[i].msg //= DataType() + s.register_cluster.recv_data_from_const[i].val //= 0 + + s.register_cluster.send_data_to_fu[i] //= \ + s.element.recv_in[i] + s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg + + # Clear ports are only useful during context switching. + # We connect to 0 to make sure they have drivers. + for i in range(len(FuList)): + s.element.clear[i] //= 0 + s.fu_crossbar.clear //= 0 + s.routing_crossbar.clear //= 0 + + @update + def feed_pkt(): + s.ctrl_mem.recv_pkt_from_controller.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) + s.ctrl_mem.recv_pkt_from_controller.val @= 0 + s.const_mem.recv_const.val @= 0 + s.recv_from_controller_pkt.rdy @= 0 + + if s.recv_from_controller_pkt.val & \ + ((s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_FU_CROSSBAR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_PROLOGUE_ROUTING_CROSSBAR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_TOTAL_CTRL_COUNT) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_COUNT_PER_ITER) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_ADD_RESPONSE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_GLOBAL_REDUCE_MUL_RESPONSE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_START_ADDR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_STRIDE) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONFIG_STREAMING_LD_END_ADDR) | \ + (s.recv_from_controller_pkt.msg.payload.cmd == CMD_LAUNCH)): + s.ctrl_mem.recv_pkt_from_controller.val @= 1 + s.ctrl_mem.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg + s.element.recv_pkt_from_controller.val @= 1 + s.element.recv_pkt_from_controller.msg @= s.recv_from_controller_pkt.msg + s.recv_from_controller_pkt.rdy @= s.ctrl_mem.recv_pkt_from_controller.rdy | \ + s.element.recv_pkt_from_controller.rdy + elif s.recv_from_controller_pkt.val & (s.recv_from_controller_pkt.msg.payload.cmd == CMD_CONST): + s.const_mem.recv_const.val @= 1 + s.const_mem.recv_const.msg @= s.recv_from_controller_pkt.msg.payload.data + s.recv_from_controller_pkt.rdy @= s.const_mem.recv_const.rdy + + @update + def update_send_out_signal(): + s.send_to_controller_pkt.val @= 0 + s.send_to_controller_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + if s.ctrl_mem.send_pkt_to_controller.val: + s.send_to_controller_pkt.val @= 1 + s.send_to_controller_pkt.msg @= s.ctrl_mem.send_pkt_to_controller.msg + s.ctrl_mem.send_pkt_to_controller.rdy @= s.send_to_controller_pkt.rdy + + # Updates the configuration memory related signals. + @update + def update_opt(): + s.element.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + s.routing_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + s.fu_crossbar.recv_opt.msg @= s.ctrl_mem.send_ctrl.msg + + # FIXME: Do we still need separate element and routing_xbar? + # FIXME: Do we need to consider reg bank here? + s.element.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.element_done + s.routing_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.routing_crossbar_done + s.fu_crossbar.recv_opt.val @= s.ctrl_mem.send_ctrl.val & ~s.fu_crossbar_done + + # FIXME: yo96, rename ctrl.rdy to ctrl.proceed or sth similar. + # Allows either the FU-related go out first or routing-xbar go out first. And only + # allows the ctrl signal proceed till all the sub-modules done their own job (once). + s.ctrl_mem.send_ctrl.rdy @= (s.element.recv_opt.rdy | s.element_done) & \ + (s.routing_crossbar.recv_opt.rdy | s.routing_crossbar_done) & \ + (s.fu_crossbar.recv_opt.rdy | s.fu_crossbar_done) + + # TODO: https://github.com/tancheng/VectorCGRA/issues/127 + @update + def notify_const_mem(): + s.const_mem.ctrl_proceed @= s.ctrl_mem.send_ctrl.rdy & s.ctrl_mem.send_ctrl.val + + # Updates the signals indicating whether certain modules already done their jobs. + @update_ff + def already_done(): + if s.reset | s.ctrl_mem.send_ctrl.rdy: + s.element_done <<= 0 + s.fu_crossbar_done <<= 0 + s.routing_crossbar_done <<= 0 + else: + # s.element_done keeps 0 during streaming LD. + if s.element.recv_opt.rdy: + s.element_done <<= 1 + if s.fu_crossbar.recv_opt.rdy: + # s.fu_crossbar_done should also be 0 during streaming LD. + s.fu_crossbar_done <<= (1 & s.element_done) + if s.routing_crossbar.recv_opt.rdy: + # s.routing_crossbar_done should also be 0 during streaming LD. + s.routing_crossbar_done <<= (1 & s.element_done) + + @update + def notify_crossbars_compute_status(): + s.routing_crossbar.compute_done @= s.element_done + s.fu_crossbar.compute_done @= s.element_done + + # Line trace + def line_trace(s): + recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) + send_str = "|".join([str(x.msg) for x in s.send_data]) + tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) + tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) + tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) + out_str = "|".join(["(" + str(x.msg.payload) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) + ctrl_mem = s.ctrl_mem.line_trace() + const_mem = s.const_mem.line_trace() + return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " +