From 62a301754dcdacedaf1c1f03fa3d644541b18e1d Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Sun, 3 May 2026 16:38:06 +0800 Subject: [PATCH 01/14] Format ArchParser by black. --- multi_cgra/arch_parser/ArchParser.py | 105 ++++++++++++++++++--------- 1 file changed, 71 insertions(+), 34 deletions(-) diff --git a/multi_cgra/arch_parser/ArchParser.py b/multi_cgra/arch_parser/ArchParser.py index 47e19eb6..da3afcc3 100644 --- a/multi_cgra/arch_parser/ArchParser.py +++ b/multi_cgra/arch_parser/ArchParser.py @@ -9,19 +9,19 @@ class ArchParser: def __init__(self, yaml_file: str): - with open(yaml_file, 'r') as f: + with open(yaml_file, "r") as f: self.yaml_data = yaml.safe_load(f) - self.cgra_rows = self.yaml_data['multi_cgra_defaults']['rows'] - self.cgra_columns = self.yaml_data['multi_cgra_defaults']['columns'] - self.per_cgra_rows = self.yaml_data['cgra_defaults']['rows'] - self.per_cgra_columns = self.yaml_data['cgra_defaults']['columns'] - self.num_registers = self.yaml_data['tile_defaults']['num_registers'] - self.fu_types = self.yaml_data['tile_defaults']['fu_types'] + self.cgra_rows = self.yaml_data["multi_cgra_defaults"]["rows"] + self.cgra_columns = self.yaml_data["multi_cgra_defaults"]["columns"] + self.per_cgra_rows = self.yaml_data["cgra_defaults"]["rows"] + self.per_cgra_columns = self.yaml_data["cgra_defaults"]["columns"] + self.num_registers = self.yaml_data["tile_defaults"]["num_registers"] + self.fu_types = self.yaml_data["tile_defaults"]["fu_types"] def parse_dataSPM(self): - data_mem_num_rd_tiles = self.per_cgra_rows + self.per_cgra_columns - 1 - data_mem_num_wr_tiles = self.per_cgra_rows + self.per_cgra_columns - 1 + data_mem_num_rd_tiles = self.per_cgra_rows + self.per_cgra_columns - 1 + data_mem_num_wr_tiles = self.per_cgra_rows + self.per_cgra_columns - 1 return DataSPM(data_mem_num_rd_tiles, data_mem_num_wr_tiles) def parse_tiles(self): @@ -46,14 +46,14 @@ def parse_tiles(self): def parse_cgras(self): # Restricted by ControllerRTL. - assert self.cgra_rows <= self.cgra_columns, "multi_cgra_rows must be less than or equal to multi_cgra_columns." + assert ( + self.cgra_rows <= self.cgra_columns + ), "multi_cgra_rows must be less than or equal to multi_cgra_columns." num_cgras = self.cgra_rows * self.cgra_columns # Restricted by data_mem_size_global(the power of 2). - assert (num_cgras & (num_cgras - 1) - ) == 0, "num_cgras must be the power of 2." + assert (num_cgras & (num_cgras - 1)) == 0, "num_cgras must be the power of 2." tiles = self.parse_tiles() - # cgra id to valid links. id2validLinks = {} # cgra id to valid tiles. @@ -70,12 +70,18 @@ def parse_cgras(self): # Iterates id2validTiles to enable boundary ports for cgra_id, tiles_flat in id2validTiles.items(): configure_boundary_ports( - cgra_id, tiles_flat, self.cgra_rows, self.cgra_columns, self.per_cgra_rows, self.per_cgra_columns) + cgra_id, + tiles_flat, + self.cgra_rows, + self.cgra_columns, + self.per_cgra_rows, + self.per_cgra_columns, + ) dataSPM = self.parse_dataSPM() id2dataSPM = {} id2ctrlMemSize_map = {} - ctrlMemSize = self.yaml_data['cgra_defaults']['configMemSize'] + ctrlMemSize = self.yaml_data["cgra_defaults"]["configMemSize"] for id in range(num_cgras): id2dataSPM[id] = dataSPM @@ -86,26 +92,44 @@ def parse_cgras(self): cgras.append([]) for cgra_col in range(self.cgra_columns): id = cgra_row * self.cgra_columns + cgra_col - cgras[cgra_row].append(ParamCGRA( - self.per_cgra_rows, self.per_cgra_columns, id2validTiles[id], id2validLinks[id], id2dataSPM[id], id2ctrlMemSize_map[id])) + cgras[cgra_row].append( + ParamCGRA( + self.per_cgra_rows, + self.per_cgra_columns, + id2validTiles[id], + id2validLinks[id], + id2dataSPM[id], + id2ctrlMemSize_map[id], + ) + ) # Overrides the tiles. - if 'tile_overrides' in self.yaml_data: - data = self.yaml_data['tile_overrides'] + if "tile_overrides" in self.yaml_data: + data = self.yaml_data["tile_overrides"] for override in data: - fu_types = [] if not override['existence'] else override['fu_types'] - cgras[override['cgra_x']][override['cgra_y']].overrideTiles(override['tile_x'], override['tile_y'], fu_types, override['existence']) + fu_types = [] if not override["existence"] else override["fu_types"] + cgras[override["cgra_x"]][override["cgra_y"]].overrideTiles( + override["tile_x"], + override["tile_y"], + fu_types, + override["existence"], + ) # Overrides the links. - if 'link_overrides' in self.yaml_data: - data = self.yaml_data['link_overrides'] + if "link_overrides" in self.yaml_data: + data = self.yaml_data["link_overrides"] for override in data: - if override['src_cgra_x'] == override['dst_cgra_x'] and override['src_cgra_y'] == override['dst_cgra_y']: - cgras[override['src_cgra_x']][override['src_cgra_y']].overrideLinks( - override['src_tile_x'], override['src_tile_y'], - override['dst_tile_x'], override['dst_tile_y'], - override['existence'] - ) + if ( + override["src_cgra_x"] == override["dst_cgra_x"] + and override["src_cgra_y"] == override["dst_cgra_y"] + ): + cgras[override["src_cgra_x"]][override["src_cgra_y"]].overrideLinks( + override["src_tile_x"], + override["src_tile_y"], + override["dst_tile_x"], + override["dst_tile_y"], + override["existence"], + ) return cgras def parse_multi_cgra_param(self): @@ -117,7 +141,7 @@ def get_simplest_cgra_param(self) -> ParamCGRA: cgras = self.parse_cgras() # set of (cgra_id, cgra) cgras_item = ( - ( i * self.cgra_columns + j , cgras[i][j] ) + (i * self.cgra_columns + j, cgras[i][j]) for i in range(self.cgra_rows) for j in range(self.cgra_columns) ) @@ -126,8 +150,21 @@ def get_simplest_cgra_param(self) -> ParamCGRA: tiles = simplest_cgra.tiles # Disables the boundary ports of a single cgra. - configure_boundary_ports(cgra_id, tiles, self.cgra_rows, self.cgra_columns, - self.per_cgra_rows, self.per_cgra_columns, False) + configure_boundary_ports( + cgra_id, + tiles, + self.cgra_rows, + self.cgra_columns, + self.per_cgra_rows, + self.per_cgra_columns, + False, + ) - return ParamCGRA(simplest_cgra.rows, simplest_cgra.columns, tiles, simplest_cgra.links, - simplest_cgra.dataSPM, simplest_cgra.configMemSize) + return ParamCGRA( + simplest_cgra.rows, + simplest_cgra.columns, + tiles, + simplest_cgra.links, + simplest_cgra.dataSPM, + simplest_cgra.configMemSize, + ) From 69106e68084824f70bacb53a6eab5b04e06e3790 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Sun, 3 May 2026 16:40:40 +0800 Subject: [PATCH 02/14] Add heterogeneous support in ArchParser --- multi_cgra/arch_parser/ArchParser.py | 56 +++++++++++++------ .../test/MeshMultiCgraTemplateRTL_test.py | 2 +- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/multi_cgra/arch_parser/ArchParser.py b/multi_cgra/arch_parser/ArchParser.py index da3afcc3..870c69e9 100644 --- a/multi_cgra/arch_parser/ArchParser.py +++ b/multi_cgra/arch_parser/ArchParser.py @@ -27,22 +27,42 @@ def parse_dataSPM(self): def parse_tiles(self): """ Parse the tiles in one CGRA. + We should consider the case of heterogeneous CGRA. """ - tiles = [] - for r in range(self.per_cgra_rows): - tiles.append([]) - for c in range(self.per_cgra_columns): - """ - Mapping way of tiles in a single CGRA (Cartesian coordinate system): - ^ - | y (row) increases upward: 0 at the bottom, up to `per_cgra_rows-1` at the top - | - | (row, col): (y, x) - +------------------------> - 0 x (column) increases to the right: 0 at the left, up to `per_cgra_columns-1` at the right - """ - tiles[r].append(Tile(c, r, self.num_registers, self.fu_types)) - return tiles + + # cgra_id to tiles map. + id2tiles_map = {i: [] for i in range(self.cgra_rows * self.cgra_columns)} + # default tiles. + for i in range(self.cgra_rows * self.cgra_columns): + for r in range(self.per_cgra_rows): + id2tiles_map[i].append([]) + for c in range(self.per_cgra_columns): + id2tiles_map[i][r].append( + Tile(c, r, self.num_registers, self.fu_types) + ) + + if "cgra_overrides" in self.yaml_data: + for override in self.yaml_data["cgra_overrides"]: + cgra_id = override["cgra_x"] * self.cgra_columns + override["cgra_y"] + override_tiles = [] + for r in range(override["rows"]): + override_tiles.append([]) + for c in range(override["columns"]): + """ + Mapping way of tiles in a single CGRA (Cartesian coordinate system): + ^ + | y (row) increases upward: 0 at the bottom, up to `override["rows"]-1` at the top + | + | (row, col): (y, x) + +------------------------> + 0 x (column) increases to the right: 0 at the left, up to `override["columns"]-1` at the right + """ + override_tiles[r].append( + Tile(c, r, self.num_registers, self.fu_types) + ) + id2tiles_map[cgra_id] = override_tiles + + return id2tiles_map def parse_cgras(self): # Restricted by ControllerRTL. @@ -52,15 +72,15 @@ def parse_cgras(self): num_cgras = self.cgra_rows * self.cgra_columns # Restricted by data_mem_size_global(the power of 2). assert (num_cgras & (num_cgras - 1)) == 0, "num_cgras must be the power of 2." - tiles = self.parse_tiles() - + # cgra id to tiles map. + id2tiles_map = self.parse_tiles() # cgra id to valid links. id2validLinks = {} # cgra id to valid tiles. id2validTiles = {} for id in range(num_cgras): - tiles0 = copy.deepcopy(tiles) + tiles0 = copy.deepcopy(id2tiles_map[id]) links0 = get_links(tiles0) tiles0_flat = [t for row in tiles0 for t in row] diff --git a/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py b/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py index e8d9730e..c03106fb 100644 --- a/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py +++ b/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py @@ -252,7 +252,7 @@ def test_mesh_multi_cgra_universal(cmdline_opts, arch_yaml_path = "arch.yaml"): ''' Creates test performing load -> inc -> store on cgra 2. Specifically, cgra 2 tile 0 performs `load` on memory address 34, and stores the result (0xfe) in register 7. - cgra 2 tile 0 read data from register 7 and performs `inc` (0xfe -> 0xff), and sends result to tile 2. + cgra 2 tile 0 read data from register 7 and performs `inc` (0xfe -> 0xff), and sends result to tile `per_cgra_columns`. cgra 2 tile `per_cgra_columns` waits for the data from tile 0, and performs stores (0xff) to memory address 3. Note that address 34 is in cgra 1's sram bank 0, while address 3 is in cgra 0's sram bank 0, therefore, all the memory addresses from cgra 2 are remote. From d1a9a38c615696f05c0138fc88f5f9213e8e2090 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Sun, 3 May 2026 21:18:32 +0800 Subject: [PATCH 03/14] Enhance MultiCgraParam representation to include detailed sizes of individual CGRAs and add method to count valid tiles in ParamCGRA. --- multi_cgra/arch_parser/MultiCgraParam.py | 9 +++++---- multi_cgra/arch_parser/ParamCGRA.py | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/multi_cgra/arch_parser/MultiCgraParam.py b/multi_cgra/arch_parser/MultiCgraParam.py index 01ce4dee..81851b60 100644 --- a/multi_cgra/arch_parser/MultiCgraParam.py +++ b/multi_cgra/arch_parser/MultiCgraParam.py @@ -87,7 +87,8 @@ def from_params( return cls(num_cgra_rows, num_cgra_cols, cgras) def __repr__(self): - return ( - f"\nSize of MultiCGRAs: {self.rows}x{self.cols}\n" - + f"Size of CGRA(Tiles): {self.cgras[0][0].rows}x{self.cgras[0][0].columns}" - ) + cgra_size_str = "" + for y in range(self.rows): + for x in range(self.cols): + cgra_size_str += f"CGRA {y * self.cols + x}: {self.cgras[x][y].rows}x{self.cgras[x][y].columns}\n" + return f"\nSize of MultiCGRAs: {self.rows}x{self.cols}\n" + cgra_size_str diff --git a/multi_cgra/arch_parser/ParamCGRA.py b/multi_cgra/arch_parser/ParamCGRA.py index 688ed4b8..9bbdb21b 100644 --- a/multi_cgra/arch_parser/ParamCGRA.py +++ b/multi_cgra/arch_parser/ParamCGRA.py @@ -52,5 +52,9 @@ def getFuNum(self): """Returns the total number of valid functional units in the CGRA.""" return sum(tile.getFuNum() for tile in self.tiles if not tile.disabled) + def getTileNum(self): + """Returns the total number of valid tiles in the CGRA.""" + return sum(1 for tile in self.tiles if not tile.disabled) + def __repr__(self) -> str: return f"ParamCGRA(rows={self.rows}, columns={self.columns})" From e4bd15a4a5544b7903b8a48d28ea79a375dc6fbd Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Sun, 3 May 2026 21:19:26 +0800 Subject: [PATCH 04/14] Enhance ArchParser to support for heterogeneous multi cgra. --- lib/util/cgra/cgra_helper.py | 80 ++++++++++++-------- multi_cgra/arch_parser/ArchParser.py | 105 +++++++++++++++++---------- 2 files changed, 116 insertions(+), 69 deletions(-) diff --git a/lib/util/cgra/cgra_helper.py b/lib/util/cgra/cgra_helper.py index b1baeb51..b5f62a82 100644 --- a/lib/util/cgra/cgra_helper.py +++ b/lib/util/cgra/cgra_helper.py @@ -131,8 +131,8 @@ def set_port_validity(tile, port, is_valid = True): def configure_boundary_ports(cgra_id, tiles_flat, - num_cgra_rows, num_cgra_columns, - per_cgra_rows, per_cgra_columns, + num_cgra_rows, num_cgra_cols, + id2shape_map, is_valid = True): """ Enable boundary ports for tiles on adjacent CGRAs. @@ -141,49 +141,69 @@ def configure_boundary_ports(cgra_id, tiles_flat, - cgra_id: ID of the current CGRA (0-indexed, bottom-left to top-right) - tiles_flat: Flat list of tiles for this CGRA (reshaped from 2D) - num_cgra_rows: Number of CGRA rows in the mesh - - num_cgra_columns: Number of CGRA columns in the mesh - - per_cgra_rows: Number of tile rows in each CGRA - - per_cgra_columns: Number of tile columns in each CGRA + - num_cgra_cols: Number of CGRA columns in the mesh + - id2shape_map: Map of each CGRA id to its shape: (num_tile_rows x num_tile_columns) tiles - is_valid: If true, enable ports, otherwise disable ports - - CGRA ID mapping (example for 2x2): - CGRA 2: [row=0, col=0] CGRA 3: [row=0, col=1] (top row, row=0) - CGRA 0: [row=1, col=0] CGRA 1: [row=1, col=1] (bottom row, row=1) """ # Converts CGRA ID to 2D coordinates - cgra_row = (num_cgra_rows - 1) - (cgra_id // num_cgra_columns) - cgra_col = cgra_id % num_cgra_columns + cgra_x = cgra_id % num_cgra_cols + cgra_y = cgra_id // num_cgra_cols + # The number of tile rows and columns in the current CGRA. + num_tile_rows, num_tile_cols = id2shape_map[cgra_id] - # Helper to get tile from flat list using row/col indices - def get_tile(row, col): - return tiles_flat[row * per_cgra_columns + col] + # Helper to get tile from flat list using x/y indices + def get_tile(x, y): + return tiles_flat[y * num_tile_cols + x] # Enables NORTH ports if there's a neighbor to the north - if cgra_row > 0: + if cgra_y < num_cgra_rows - 1: # This CGRA has a neighbor above + # Gets the tile shape of the neighbor CGRA. + neighbor_cgra_id = cgra_id + num_cgra_cols + num_neighbor_tile_rows, num_neighbor_tile_cols = id2shape_map[neighbor_cgra_id] + # Top row of tiles in this CGRA should have NORTH ports enabled - top_row_idx = per_cgra_rows - 1 - for tile_col in range(per_cgra_columns): - set_port_validity(get_tile(top_row_idx, tile_col), PORT_INDEX_NORTH, is_valid) + # y axis of the top row tiles of the current CGRA. + top_row_y = num_tile_rows - 1 + valid_port_num = min(num_tile_cols, num_neighbor_tile_cols) + for tile_x in range(valid_port_num): + set_port_validity(get_tile(tile_x, top_row_y), PORT_INDEX_NORTH, is_valid) # Enables SOUTH ports if there's a neighbor to the south - if cgra_row < num_cgra_rows - 1: + if cgra_y > 0: # This CGRA has a neighbor below + # Gets the tile shape of the neighbor CGRA. + neighbor_cgra_id = cgra_id - num_cgra_cols + num_neighbor_tile_rows, num_neighbor_tile_cols = id2shape_map[neighbor_cgra_id] + # Bottom row of tiles in this CGRA should have SOUTH ports enabled - bottom_row_idx = 0 - for tile_col in range(per_cgra_columns): - set_port_validity(get_tile(bottom_row_idx, tile_col), PORT_INDEX_SOUTH, is_valid) + bottom_row_y = 0 + valid_port_num = min(num_tile_cols, num_neighbor_tile_cols) + for tile_x in range(valid_port_num): + set_port_validity(get_tile(tile_x, bottom_row_y), PORT_INDEX_SOUTH, is_valid) # Enables EAST ports if there's a neighbor to the east - if cgra_col < num_cgra_columns - 1: + if cgra_x < num_cgra_cols - 1: + # This CGRA has a neighbor to the right. + # Gets the tile shape of the neighbor CGRA. + neighbor_cgra_id = cgra_id + 1 + num_neighbor_tile_rows, num_neighbor_tile_cols = id2shape_map[neighbor_cgra_id] + # Rightmost column of tiles in this CGRA should have EAST ports enabled - east_col_idx = per_cgra_columns - 1 - for tile_row in range(per_cgra_rows): - set_port_validity(get_tile(tile_row, east_col_idx), PORT_INDEX_EAST, is_valid) + east_col_x = num_cgra_cols - 1 + valid_port_num = min(num_tile_rows, num_neighbor_tile_rows) + for tile_y in range(valid_port_num): + set_port_validity(get_tile(east_col_x, tile_y), PORT_INDEX_EAST, is_valid) # Enables WEST ports if there's a neighbor to the west - if cgra_col > 0: + if cgra_x > 0: + # This CGRA has a neighbor to the left. + # Gets the tile shape of the neighbor CGRA. + neighbor_cgra_id = cgra_id - 1 + num_neighbor_tile_rows, num_neighbor_tile_cols = id2shape_map[neighbor_cgra_id] + # Leftmost column of tiles in this CGRA should have WEST ports enabled - west_col_idx = 0 - for tile_row in range(per_cgra_rows): - set_port_validity(get_tile(tile_row, west_col_idx), PORT_INDEX_WEST, is_valid) + west_col_x = 0 + valid_port_num = min(num_tile_rows, num_neighbor_tile_rows) + for tile_y in range(valid_port_num): + set_port_validity(get_tile(west_col_x, tile_y), PORT_INDEX_WEST, is_valid) diff --git a/multi_cgra/arch_parser/ArchParser.py b/multi_cgra/arch_parser/ArchParser.py index 870c69e9..f670bf5d 100644 --- a/multi_cgra/arch_parser/ArchParser.py +++ b/multi_cgra/arch_parser/ArchParser.py @@ -18,6 +18,7 @@ def __init__(self, yaml_file: str): self.per_cgra_columns = self.yaml_data["cgra_defaults"]["columns"] self.num_registers = self.yaml_data["tile_defaults"]["num_registers"] self.fu_types = self.yaml_data["tile_defaults"]["fu_types"] + self.num_cgras = self.cgra_rows * self.cgra_columns def parse_dataSPM(self): data_mem_num_rd_tiles = self.per_cgra_rows + self.per_cgra_columns - 1 @@ -29,36 +30,54 @@ def parse_tiles(self): Parse the tiles in one CGRA. We should consider the case of heterogeneous CGRA. """ - - # cgra_id to tiles map. - id2tiles_map = {i: [] for i in range(self.cgra_rows * self.cgra_columns)} + # map of cgra_id to tiles. + id2tiles_map = {i: [] for i in range(self.num_cgras)} # default tiles. - for i in range(self.cgra_rows * self.cgra_columns): - for r in range(self.per_cgra_rows): + for i in range(self.num_cgras): + """ + Mapping way of tiles in a single CGRA (Cartesian coordinate system): + ^ + | y increases upward: 0 at the bottom, up to `tile_row_num-1` at the top + | + | (x,y) + +------------------------> + 0 x increases to the right: 0 at the left, up to `tile_column_num-1` at the right + + CGRA i + ^ + | tile6 tile7 tile8 + | tile3 tile4 tile5 + | tile0 tile1 tile2 + +------------------------> + id2tiles_map[i] = [[tile0, tile1, tile2], [tile3, tile4, tile5], [tile6, tile7, tile8]] + """ + for y in range(self.per_cgra_rows): id2tiles_map[i].append([]) - for c in range(self.per_cgra_columns): - id2tiles_map[i][r].append( - Tile(c, r, self.num_registers, self.fu_types) + for x in range(self.per_cgra_columns): + id2tiles_map[i][y].append( + Tile(x, y, self.num_registers, self.fu_types) ) if "cgra_overrides" in self.yaml_data: for override in self.yaml_data["cgra_overrides"]: - cgra_id = override["cgra_x"] * self.cgra_columns + override["cgra_y"] + cgra_id = ( + override["cgra_y"] * self.cgra_columns + override["cgra_x"] + ) override_tiles = [] - for r in range(override["rows"]): + for y in range(override["rows"]): override_tiles.append([]) - for c in range(override["columns"]): + for x in range(override["columns"]): """ Mapping way of tiles in a single CGRA (Cartesian coordinate system): ^ - | y (row) increases upward: 0 at the bottom, up to `override["rows"]-1` at the top + | y increases upward: 0 at the bottom, up to `override["rows"]-1` at the top | - | (row, col): (y, x) + | (x,y) +------------------------> - 0 x (column) increases to the right: 0 at the left, up to `override["columns"]-1` at the right + 0 x increases to the right: 0 at the left, up to `override["columns"]-1` at the right """ - override_tiles[r].append( - Tile(c, r, self.num_registers, self.fu_types) + override_tiles[y].append( + Tile(x, y, self.num_registers, self.fu_types) ) id2tiles_map[cgra_id] = override_tiles @@ -68,20 +87,28 @@ def parse_cgras(self): # Restricted by ControllerRTL. assert ( self.cgra_rows <= self.cgra_columns - ), "multi_cgra_rows must be less than or equal to multi_cgra_columns." - num_cgras = self.cgra_rows * self.cgra_columns + ), "cgra_rows must be less than or equal to cgra_columns." # Restricted by data_mem_size_global(the power of 2). - assert (num_cgras & (num_cgras - 1)) == 0, "num_cgras must be the power of 2." + assert (self.num_cgras & (self.num_cgras - 1)) == 0, "num_cgras must be the power of 2." # cgra id to tiles map. id2tiles_map = self.parse_tiles() + # Map of each CGRA id to its shape: (num_cgra_rows, num_cgra_columns) + id2shape_map = { + cgra_id: (len(id2tiles_map[cgra_id]), len(id2tiles_map[cgra_id][0])) + for cgra_id in range(self.num_cgras) + } + self.id2shape_map = id2shape_map + # cgra id to valid links. id2validLinks = {} # cgra id to valid tiles. id2validTiles = {} - for id in range(num_cgras): + for id in range(self.num_cgras): tiles0 = copy.deepcopy(id2tiles_map[id]) links0 = get_links(tiles0) + # Flattens the tiles to a 1D list from left to right. + # e.g., [[tile0, tile1], [tile2, tile3]] -> [tile0, tile1, tile2, tile3] tiles0_flat = [t for row in tiles0 for t in row] id2validLinks[id] = links0 @@ -94,8 +121,7 @@ def parse_cgras(self): tiles_flat, self.cgra_rows, self.cgra_columns, - self.per_cgra_rows, - self.per_cgra_columns, + id2shape_map, ) dataSPM = self.parse_dataSPM() @@ -103,19 +129,19 @@ def parse_cgras(self): id2ctrlMemSize_map = {} ctrlMemSize = self.yaml_data["cgra_defaults"]["configMemSize"] - for id in range(num_cgras): + for id in range(self.num_cgras): id2dataSPM[id] = dataSPM id2ctrlMemSize_map[id] = ctrlMemSize cgras = [] - for cgra_row in range(self.cgra_rows): + for y in range(self.cgra_rows): cgras.append([]) - for cgra_col in range(self.cgra_columns): - id = cgra_row * self.cgra_columns + cgra_col - cgras[cgra_row].append( + for x in range(self.cgra_columns): + id = y * self.cgra_columns + x + cgras[y].append( ParamCGRA( - self.per_cgra_rows, - self.per_cgra_columns, + id2shape_map[id][0], + id2shape_map[id][1], id2validTiles[id], id2validLinks[id], id2dataSPM[id], @@ -128,7 +154,7 @@ def parse_cgras(self): data = self.yaml_data["tile_overrides"] for override in data: fu_types = [] if not override["existence"] else override["fu_types"] - cgras[override["cgra_x"]][override["cgra_y"]].overrideTiles( + cgras[override["cgra_y"]][override["cgra_x"]].overrideTiles( override["tile_x"], override["tile_y"], fu_types, @@ -143,7 +169,7 @@ def parse_cgras(self): override["src_cgra_x"] == override["dst_cgra_x"] and override["src_cgra_y"] == override["dst_cgra_y"] ): - cgras[override["src_cgra_x"]][override["src_cgra_y"]].overrideLinks( + cgras[override["src_cgra_y"]][override["src_cgra_x"]].overrideLinks( override["src_tile_x"], override["src_tile_y"], override["dst_tile_x"], @@ -157,16 +183,18 @@ def parse_multi_cgra_param(self): return MultiCgraParam(self.cgra_rows, self.cgra_columns, cgras) def get_simplest_cgra_param(self) -> ParamCGRA: - """Returns the simplest(has the least number of functional units) CGRA parameter.""" + """ + Returns the simplest(has the least number of tiles) CGRA parameter. + """ cgras = self.parse_cgras() # set of (cgra_id, cgra) cgras_item = ( - (i * self.cgra_columns + j, cgras[i][j]) - for i in range(self.cgra_rows) - for j in range(self.cgra_columns) + (y * self.cgra_columns + x, cgras[y][x]) + for y in range(self.cgra_rows) + for x in range(self.cgra_columns) ) - # Finds the cgra which has the least number of FUs. - cgra_id, simplest_cgra = min(cgras_item, key=lambda item: item[1].getFuNum()) + # Finds the cgra which has the least number of tiles. + cgra_id, simplest_cgra = min(cgras_item, key=lambda item: item[1].getTileNum()) tiles = simplest_cgra.tiles # Disables the boundary ports of a single cgra. @@ -175,8 +203,7 @@ def get_simplest_cgra_param(self) -> ParamCGRA: tiles, self.cgra_rows, self.cgra_columns, - self.per_cgra_rows, - self.per_cgra_columns, + self.id2shape_map, False, ) From 52c5263579a76ee0f91346195467fd4d74a0e916 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Mon, 4 May 2026 15:26:42 +0800 Subject: [PATCH 05/14] [Fix] Remove the disable of link from tile2(cgra2) to dataSPM since tile2 needs to write the memory. --- .../arch_parser/test/arch_multi_cgra_overrides.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/multi_cgra/arch_parser/test/arch_multi_cgra_overrides.yaml b/multi_cgra/arch_parser/test/arch_multi_cgra_overrides.yaml index 883908fd..547007db 100644 --- a/multi_cgra/arch_parser/test/arch_multi_cgra_overrides.yaml +++ b/multi_cgra/arch_parser/test/arch_multi_cgra_overrides.yaml @@ -46,17 +46,6 @@ link_overrides: dst_tile_y: 0 existence: false -# disables the link from tile(0,1) to dataSPM in cgra(0,1) -- src_cgra_x: 0 - src_cgra_y: 1 - dst_cgra_x: 0 - dst_cgra_y: 1 - - src_tile_x: 0 - src_tile_y: 1 - dst_tile_x: -1 - dst_tile_y: -1 - existence: false # disables the link from tile(1,0) to dataSPM in cgra(0,1) - src_cgra_x: 0 From 69c7cf3ca7da003781921a8e9fb68d4eea2ba087 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Mon, 4 May 2026 18:29:58 +0800 Subject: [PATCH 06/14] Enhance parse_dataSPM to support for hetero multi cgra --- multi_cgra/arch_parser/ArchParser.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/multi_cgra/arch_parser/ArchParser.py b/multi_cgra/arch_parser/ArchParser.py index f670bf5d..2bf10443 100644 --- a/multi_cgra/arch_parser/ArchParser.py +++ b/multi_cgra/arch_parser/ArchParser.py @@ -19,11 +19,19 @@ def __init__(self, yaml_file: str): self.num_registers = self.yaml_data["tile_defaults"]["num_registers"] self.fu_types = self.yaml_data["tile_defaults"]["fu_types"] self.num_cgras = self.cgra_rows * self.cgra_columns + # map of cgra_id to its shape: (per_cgra_rows, per_cgra_columns) + self.id2shape_map = None - def parse_dataSPM(self): - data_mem_num_rd_tiles = self.per_cgra_rows + self.per_cgra_columns - 1 - data_mem_num_wr_tiles = self.per_cgra_rows + self.per_cgra_columns - 1 - return DataSPM(data_mem_num_rd_tiles, data_mem_num_wr_tiles) + def parse_dataSPM(self) -> dict[int, DataSPM]: + if self.id2shape_map is None: + raise ValueError("id2shape_map is not parsed yet.") + id2dataSPM = {} + for id in range(self.num_cgras): + per_cgra_rows, per_cgra_columns = self.id2shape_map[id] + data_mem_num_rd_tiles = per_cgra_rows + per_cgra_columns - 1 + data_mem_num_wr_tiles = per_cgra_rows + per_cgra_columns - 1 + id2dataSPM[id] = DataSPM(data_mem_num_rd_tiles, data_mem_num_wr_tiles) + return id2dataSPM def parse_tiles(self): """ @@ -92,7 +100,7 @@ def parse_cgras(self): assert (self.num_cgras & (self.num_cgras - 1)) == 0, "num_cgras must be the power of 2." # cgra id to tiles map. id2tiles_map = self.parse_tiles() - # Map of each CGRA id to its shape: (num_cgra_rows, num_cgra_columns) + # Map of each CGRA id to its shape: (per_cgra_rows, per_cgra_columns) id2shape_map = { cgra_id: (len(id2tiles_map[cgra_id]), len(id2tiles_map[cgra_id][0])) for cgra_id in range(self.num_cgras) @@ -124,14 +132,9 @@ def parse_cgras(self): id2shape_map, ) - dataSPM = self.parse_dataSPM() - id2dataSPM = {} - id2ctrlMemSize_map = {} + id2dataSPM = self.parse_dataSPM() ctrlMemSize = self.yaml_data["cgra_defaults"]["configMemSize"] - - for id in range(self.num_cgras): - id2dataSPM[id] = dataSPM - id2ctrlMemSize_map[id] = ctrlMemSize + id2ctrlMemSize_map = {id: ctrlMemSize for id in range(self.num_cgras)} cgras = [] for y in range(self.cgra_rows): From bf3ff9973929f6795e66871ad285f40c4098ef91 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Mon, 4 May 2026 22:57:29 +0800 Subject: [PATCH 07/14] [Fix] Fix the error of coordinate computation. --- lib/util/cgra/cgra_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/util/cgra/cgra_helper.py b/lib/util/cgra/cgra_helper.py index b5f62a82..7a80947e 100644 --- a/lib/util/cgra/cgra_helper.py +++ b/lib/util/cgra/cgra_helper.py @@ -190,7 +190,7 @@ def get_tile(x, y): num_neighbor_tile_rows, num_neighbor_tile_cols = id2shape_map[neighbor_cgra_id] # Rightmost column of tiles in this CGRA should have EAST ports enabled - east_col_x = num_cgra_cols - 1 + east_col_x = num_tile_cols - 1 valid_port_num = min(num_tile_rows, num_neighbor_tile_rows) for tile_y in range(valid_port_num): set_port_validity(get_tile(east_col_x, tile_y), PORT_INDEX_EAST, is_valid) From c8553a1b7c81e2de641b32f9c3a3bf0b98a0064f Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Tue, 5 May 2026 15:18:19 +0800 Subject: [PATCH 08/14] [Doc] Add the comment of parse_cgras. --- multi_cgra/arch_parser/ArchParser.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/multi_cgra/arch_parser/ArchParser.py b/multi_cgra/arch_parser/ArchParser.py index 2bf10443..fa013dda 100644 --- a/multi_cgra/arch_parser/ArchParser.py +++ b/multi_cgra/arch_parser/ArchParser.py @@ -92,6 +92,11 @@ def parse_tiles(self): return id2tiles_map def parse_cgras(self): + """ + Parse the CGRAs in the architecture file. + Returns a list of ParamCGRA objects. + Return list: e.g., 3x3 multi-cgra, cgras = [[cgra0, cgra1, cgra2], [cgra3, cgra4, cgra5], [cgra6, cgra7, cgra8]] + """ # Restricted by ControllerRTL. assert ( self.cgra_rows <= self.cgra_columns From d9661877501d4130bbefea959a7baa6e6b56bb34 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Tue, 5 May 2026 15:44:29 +0800 Subject: [PATCH 09/14] Grounds the unused ports in multi-hetero-cgra --- multi_cgra/MeshMultiCgraTemplateRTL.py | 110 +++++++++++++++++++++---- 1 file changed, 92 insertions(+), 18 deletions(-) diff --git a/multi_cgra/MeshMultiCgraTemplateRTL.py b/multi_cgra/MeshMultiCgraTemplateRTL.py index de64c3ec..cd06a561 100644 --- a/multi_cgra/MeshMultiCgraTemplateRTL.py +++ b/multi_cgra/MeshMultiCgraTemplateRTL.py @@ -7,12 +7,12 @@ from ..lib.util.data_struct_attr import * from ..noc.PyOCN.pymtl3_net.meshnet.MeshNetworkRTL import MeshNetworkRTL from ..noc.PyOCN.pymtl3_net.ocnlib.ifcs.positions import mk_mesh_pos +from typing import List class MeshMultiCgraTemplateRTL(Component): def construct(s, CgraPayloadType, cgra_rows, cgra_columns, - # per_cgra_rows, per_cgra_columns, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, @@ -26,13 +26,19 @@ def construct(s, CgraPayloadType, CgraDataType = CgraPayloadType.get_field_type(kAttrData) # Reconstructs packet types. - num_tiles = id2cgraSize_map[0][0] * id2cgraSize_map[0][1] - num_rd_tiles = id2cgraSize_map[0][0] + id2cgraSize_map[0][1] - 1 + # In heterogeneous multi-CGRA architectures, CtrlPktType and NocPktType + # must accommodate the largest CGRA shape to ensure uniform packet width + # and correct inter-CGRA communication. + cgra_size:List[List[int, int]] = [id2cgraSize_map[id] for id in range(cgra_rows * cgra_columns)] + max_rows, max_cols = max(cgra_size, key=lambda x: x[0] * x[1]) + # The tile number of the largest cgra. + max_num_tiles = max_rows * max_cols + max_num_rd_tiles = max_rows + max_cols - 1 CtrlPktType = mk_intra_cgra_pkt(cgra_columns, cgra_rows, - num_tiles, CgraPayloadType) + max_num_tiles, CgraPayloadType) NocPktType = mk_inter_cgra_pkt(cgra_columns, cgra_rows, - num_tiles, num_rd_tiles, + max_num_tiles, max_num_rd_tiles, CgraPayloadType) # Constant @@ -71,7 +77,7 @@ def construct(s, CgraPayloadType, FunctionUnit, FuList, id2validTiles[cgra_id], id2validLinks[cgra_id], id2dataSPM[cgra_id], controller2addr_map, idTo2d_map, - is_multi_cgra, cgra_id) + is_multi_cgra, cgra_id, max_num_tiles, max_num_rd_tiles) for cgra_id in range(s.num_cgras)] # Latency is 1. s.mesh = MeshNetworkRTL(NocPktType, MeshPos, cgra_columns, cgra_rows, 1) @@ -90,6 +96,7 @@ def construct(s, CgraPayloadType, s.cgra[cgra_id].address_lower //= DataAddrType(controller2addr_map[cgra_id][0]) s.cgra[cgra_id].address_upper //= DataAddrType(controller2addr_map[cgra_id][1]) + # Only the CGRA0 connects to the CPU. s.recv_from_cpu_pkt //= s.cgra[0].recv_from_cpu_pkt s.send_to_cpu_pkt //= s.cgra[0].send_to_cpu_pkt @@ -106,17 +113,51 @@ def construct(s, CgraPayloadType, for cgra_row in range(cgra_rows): for cgra_col in range(cgra_columns): idx = cgra_row * cgra_columns + cgra_col + # The number of tile rows and columns of the current CGRA. per_cgra_rows = id2cgraSize_map[idx][0] per_cgra_columns = id2cgraSize_map[idx][1] # Connects North-South boundaries if cgra_row > 0: - neighbor_idx = (cgra_row - 1) * cgra_columns + cgra_col - for tile_col in range(per_cgra_columns): - s.cgra[idx].send_data_on_boundary_south[tile_col] //= \ - s.cgra[neighbor_idx].recv_data_on_boundary_north[tile_col] - s.cgra[idx].recv_data_on_boundary_south[tile_col] //= \ - s.cgra[neighbor_idx].send_data_on_boundary_north[tile_col] + # The south neighbor CGRA. + neighbor_idx = idx - cgra_columns + # The number of columns of the south neighbor CGRA. + per_neighbor_cgra_columns = id2cgraSize_map[neighbor_idx][1] + + # In heterogeneous multi-cgra, if the current CGRA has the same columns with the south neighbor CGRA, + if per_cgra_columns == per_neighbor_cgra_columns: + # Connects the south boundary of the current CGRA to the north boundary of the south neighbor CGRA. + for tile_col in range(per_cgra_columns): + s.cgra[idx].send_data_on_boundary_south[tile_col] //= \ + s.cgra[neighbor_idx].recv_data_on_boundary_north[tile_col] + s.cgra[idx].recv_data_on_boundary_south[tile_col] //= \ + s.cgra[neighbor_idx].send_data_on_boundary_north[tile_col] + # In heterogeneous multi-cgra, if the current CGRA has more columns than the south neighbor CGRA, + elif per_cgra_columns > per_neighbor_cgra_columns: + # Connects the south boundary of the current CGRA to the north boundary of the south neighbor CGRA with the same number of columns. + for tile_col in range(per_neighbor_cgra_columns): + s.cgra[idx].send_data_on_boundary_south[tile_col] //= \ + s.cgra[neighbor_idx].recv_data_on_boundary_north[tile_col] + s.cgra[idx].recv_data_on_boundary_south[tile_col] //= \ + s.cgra[neighbor_idx].send_data_on_boundary_north[tile_col] + # Grounds the remaining south boundary of the current CGRA. + for tile_col in range(per_neighbor_cgra_columns, per_cgra_columns): + s.cgra[idx].send_data_on_boundary_south[tile_col].rdy //= 0 + s.cgra[idx].recv_data_on_boundary_south[tile_col].val //= 0 + s.cgra[idx].recv_data_on_boundary_south[tile_col].msg //= CgraDataType() + # In heterogeneous multi-cgra, if the current CGRA has fewer columns than the south neighbor CGRA, + else: + # Connects the south boundary of the current CGRA to the north boundary of the south neighbor CGRA with the same number of columns. + for tile_col in range(per_cgra_columns): + s.cgra[idx].send_data_on_boundary_south[tile_col] //= \ + s.cgra[neighbor_idx].recv_data_on_boundary_north[tile_col] + s.cgra[idx].recv_data_on_boundary_south[tile_col] //= \ + s.cgra[neighbor_idx].send_data_on_boundary_north[tile_col] + # Grounds the remaining north boundary of the south neighbor CGRA. + for tile_col in range(per_cgra_columns, per_neighbor_cgra_columns): + s.cgra[neighbor_idx].send_data_on_boundary_north[tile_col].rdy //= 0 + s.cgra[neighbor_idx].recv_data_on_boundary_north[tile_col].val //= 0 + s.cgra[neighbor_idx].recv_data_on_boundary_north[tile_col].msg //= CgraDataType() else: # Bottom edge: connects south boundary to 0 for tile_col in range(per_cgra_columns): @@ -133,12 +174,45 @@ def construct(s, CgraPayloadType, # Connect East-West boundaries if cgra_col > 0: - neighbor_idx = cgra_row * cgra_columns + cgra_col - 1 - for tile_row in range(per_cgra_rows): - s.cgra[idx].send_data_on_boundary_west[tile_row] //= \ - s.cgra[neighbor_idx].recv_data_on_boundary_east[tile_row] - s.cgra[idx].recv_data_on_boundary_west[tile_row] //= \ - s.cgra[neighbor_idx].send_data_on_boundary_east[tile_row] + # The west neighbor CGRA. + neighbor_idx = idx - 1 + # The number of rows of the west neighbor CGRA. + per_neighbor_cgra_rows = id2cgraSize_map[neighbor_idx][0] + + # In heterogeneous multi-cgra, if the current CGRA has the same rows with the west neighbor CGRA, + if per_cgra_rows == per_neighbor_cgra_rows: + # Connects the west boundary of the current CGRA to the east boundary of the west neighbor CGRA. + for tile_row in range(per_cgra_rows): + s.cgra[idx].send_data_on_boundary_west[tile_row] //= \ + s.cgra[neighbor_idx].recv_data_on_boundary_east[tile_row] + s.cgra[idx].recv_data_on_boundary_west[tile_row] //= \ + s.cgra[neighbor_idx].send_data_on_boundary_east[tile_row] + # In heterogeneous multi-cgra, if the current CGRA has more rows than the west neighbor CGRA, + elif per_cgra_rows > per_neighbor_cgra_rows: + # Connects the west boundary of the current CGRA to the east boundary of the west neighbor CGRA with the same number of rows. + for tile_row in range(per_neighbor_cgra_rows): + s.cgra[idx].send_data_on_boundary_west[tile_row] //= \ + s.cgra[neighbor_idx].recv_data_on_boundary_east[tile_row] + s.cgra[idx].recv_data_on_boundary_west[tile_row] //= \ + s.cgra[neighbor_idx].send_data_on_boundary_east[tile_row] + # Grounds the remaining west boundary of the current CGRA. + for tile_row in range(per_neighbor_cgra_rows, per_cgra_rows): + s.cgra[idx].send_data_on_boundary_west[tile_row].rdy //= 0 + s.cgra[idx].recv_data_on_boundary_west[tile_row].val //= 0 + s.cgra[idx].recv_data_on_boundary_west[tile_row].msg //= CgraDataType() + # In heterogeneous multi-cgra, if the current CGRA has fewer rows than the west neighbor CGRA, + else: + # Connects the west boundary of the current CGRA to the east boundary of the west neighbor CGRA with the same number of rows. + for tile_row in range(per_cgra_rows): + s.cgra[idx].send_data_on_boundary_west[tile_row] //= \ + s.cgra[neighbor_idx].recv_data_on_boundary_east[tile_row] + s.cgra[idx].recv_data_on_boundary_west[tile_row] //= \ + s.cgra[neighbor_idx].send_data_on_boundary_east[tile_row] + # Grounds the remaining east boundary of the west neighbor CGRA. + for tile_row in range(per_cgra_rows, per_neighbor_cgra_rows): + s.cgra[neighbor_idx].send_data_on_boundary_east[tile_row].rdy //= 0 + s.cgra[neighbor_idx].recv_data_on_boundary_east[tile_row].val //= 0 + s.cgra[neighbor_idx].recv_data_on_boundary_east[tile_row].msg //= CgraDataType() else: # Left edge: connects west boundary to 0 for tile_row in range(per_cgra_rows): From 8784586e942c4aad22ef119792ac2f570aa4f02d Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Tue, 5 May 2026 21:41:11 +0800 Subject: [PATCH 10/14] Enhance CgraTemplateRTL to support maximum tile and port configurations for heterogeneous multi-CGRA architectures. Update CtrlMemDynamicRTL to correctly handle tile IDs in packet types. --- cgra/CgraTemplateRTL.py | 56 +++++++++++++++++++++++++---------- mem/ctrl/CtrlMemDynamicRTL.py | 5 ++-- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/cgra/CgraTemplateRTL.py b/cgra/CgraTemplateRTL.py index 33f37387..be3b804d 100644 --- a/cgra/CgraTemplateRTL.py +++ b/cgra/CgraTemplateRTL.py @@ -79,7 +79,12 @@ def construct(s, CgraPayloadType, total_steps, mem_access_is_combinational, FunctionUnit, FuList, TileList, LinkList, dataSPM, controller2addr_map, idTo2d_map, - is_multi_cgra = True, cgra_id = 0): + is_multi_cgra = True, cgra_id = 0, max_num_tiles_ = None, max_num_rd_tiles_ = None, max_num_wr_tiles_ = None): + """ + max_num_tiles_: the tile number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + max_num_rd_tiles_: the number of read ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + max_num_wr_tiles_: the number of write ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + """ DataType = CgraPayloadType.get_field_type(kAttrData) PredicateType = DataType.get_field_type(kAttrPredicate) @@ -89,22 +94,29 @@ def construct(s, CgraPayloadType, CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows) # Reconstructs packet types. - num_tiles = len(TileList) - # Calculates num_rd_tiles from TileList (number of tiles with read ports). - num_rd_tiles = dataSPM.getNumOfValidReadPorts() - + # In the case of heterogeneous multi-cgra, `max_num_tiles` means the tile number of the largest cgra. + # In the case of single cgra, it is the tile number of the current cgra. + max_num_tiles = max_num_tiles_ if max_num_tiles_ is not None else len(TileList) + # In the case of heterogeneous multi-cgra, `max_num_rd_tiles` means the number of read ports of the largest cgra. + # In the case of single cgra, it is the number of read ports of the current cgra. + max_num_rd_tiles = max_num_rd_tiles_ if max_num_rd_tiles_ is not None else dataSPM.getNumOfValidReadPorts() + max_num_wr_tiles = max_num_wr_tiles_ if max_num_wr_tiles_ is not None else dataSPM.getNumOfValidWritePorts() + + + # Use largest CGRA shape(max_num_tiles) to set CtrlPktType/NocPktType for compatibility. CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, CgraPayloadType) + max_num_tiles, CgraPayloadType) NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows, - num_tiles, num_rd_tiles, + max_num_tiles, max_num_rd_tiles, CgraPayloadType) s.num_mesh_ports = 8 + # tile number of the current cgra. s.num_tiles = len(TileList) num_cgras = multi_cgra_rows * multi_cgra_columns # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. - CtrlRingPos = mk_ring_pos(s.num_tiles + 1) + CtrlRingPos = mk_ring_pos(max_num_tiles + 1) CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) DataAddrType = mk_bits(clog2(data_mem_size_global)) assert(data_mem_size_per_bank * num_banks_per_cgra <= \ @@ -140,22 +152,22 @@ def construct(s, CgraPayloadType, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, - dataSPM.getNumOfValidReadPorts(), - dataSPM.getNumOfValidWritePorts(), + max_num_rd_tiles, + max_num_wr_tiles, multi_cgra_rows, multi_cgra_columns, - s.num_tiles, + max_num_tiles, mem_access_is_combinational, idTo2d_map) s.cgra_id = InPort(CgraIdType) s.controller = ControllerRTL(NocPktType, multi_cgra_rows, multi_cgra_columns, - s.num_tiles, controller2addr_map, idTo2d_map) + max_num_tiles, controller2addr_map, idTo2d_map) # Connects controller id. s.controller.cgra_id //= s.cgra_id # An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU. # The last argument of 1 is for the latency per hop. - s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, s.num_tiles + 1, 1) + s.ctrl_ring = RingNetworkRTL(CtrlPktType, CtrlRingPos, max_num_tiles + 1, 1) # Address lower and upper bound. s.address_lower = InPort(DataAddrType) @@ -196,11 +208,17 @@ def construct(s, CgraPayloadType, # Connects ring with each control memory. for i in range(s.num_tiles): s.ctrl_ring.send[i] //= s.tile[i].recv_from_controller_pkt - for i in range(s.num_tiles): s.ctrl_ring.recv[i] //= s.tile[i].send_to_controller_pkt + s.ctrl_ring.recv[s.num_tiles] //= s.controller.send_to_ctrl_ring_pkt s.ctrl_ring.send[s.num_tiles] //= s.controller.recv_from_ctrl_ring_pkt + # Grounds the remaining ports of the ring. + for i in range(s.num_tiles + 1, max_num_tiles + 1): + s.ctrl_ring.send[i].rdy //= 0 + s.ctrl_ring.recv[i].val //= 0 + s.ctrl_ring.recv[i].msg //= CtrlPktType() + for link in LinkList: if link.isFromMem(): @@ -233,8 +251,16 @@ def construct(s, CgraPayloadType, s.tile[srcTileIndex].send_data[link.srcPort] //= s.tile[dstTileIndex].recv_data[link.dstPort] # (cgra_idx_x, cgra_idx_y) is the coordinate of the current cgra in multi-cgra(Cartesian coordinate system). + """ + ^ y + | + | cgra2 cgra3 + | cgra0 cgra1 + +---------------> x + + """ cgra_idx_x = cgra_id % multi_cgra_columns - cgra_idx_y = cgra_id // multi_cgra_rows + cgra_idx_y = cgra_id // multi_cgra_columns """ row ^ diff --git a/mem/ctrl/CtrlMemDynamicRTL.py b/mem/ctrl/CtrlMemDynamicRTL.py index 37f48a63..cbb330d8 100644 --- a/mem/ctrl/CtrlMemDynamicRTL.py +++ b/mem/ctrl/CtrlMemDynamicRTL.py @@ -30,6 +30,7 @@ def construct(s, IntraCgraPktType, CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) CtrlType = CgraPayloadType.get_field_type(kAttrCtrl) + IntraPktTileIdType = IntraCgraPktType.get_field_type(kAttrSrc) # The total_ctrl_steps indicates the number of steps the ctrl # signals should proceed. For example, if the number of ctrl # signals is 4 and they need to repeat 5 times, then the total @@ -179,7 +180,7 @@ def update_send_pkt_to_controller(): if s.start_iterate_ctrl == b1(1): if s.recv_from_element_queue.send.val & (~s.sent_complete): s.send_pkt_to_controller.msg @= \ - IntraCgraPktType(s.tile_id, num_tiles, 0, 0, 0, 0, 0, 0, 0, 0, + IntraCgraPktType(zext(s.tile_id, IntraPktTileIdType), num_tiles, 0, 0, 0, 0, 0, 0, 0, 0, s.recv_from_element_queue.send.msg) s.send_pkt_to_controller.val @= 1 s.recv_from_element_queue.send.rdy @= s.send_pkt_to_controller.rdy @@ -188,7 +189,7 @@ def update_send_pkt_to_controller(): # Sends COMPLETE signal to Controller when the last ctrl signal is done. if ~s.sent_complete & (s.total_ctrl_steps_val > 0) & (s.times == s.total_ctrl_steps_val) & s.start_iterate_ctrl: s.send_pkt_to_controller.msg @= \ - IntraCgraPktType(s.tile_id, num_tiles, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_COMPLETE, 0, 0, 0, 0)) + IntraCgraPktType(zext(s.tile_id, IntraPktTileIdType), num_tiles, 0, 0, 0, 0, 0, 0, 0, 0, CgraPayloadType(CMD_COMPLETE, 0, 0, 0, 0)) s.send_pkt_to_controller.val @= 1 @update From 5d88f9701b962a522a65929a8db1f5b4332e5ea4 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Tue, 5 May 2026 21:42:07 +0800 Subject: [PATCH 11/14] Update MeshMultiCgraTemplateRTL to calculate maximum valid read and write ports for tiles, enhancing support for heterogeneous multi-CGRA configurations. --- multi_cgra/MeshMultiCgraTemplateRTL.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/multi_cgra/MeshMultiCgraTemplateRTL.py b/multi_cgra/MeshMultiCgraTemplateRTL.py index cd06a561..f5067293 100644 --- a/multi_cgra/MeshMultiCgraTemplateRTL.py +++ b/multi_cgra/MeshMultiCgraTemplateRTL.py @@ -33,7 +33,8 @@ def construct(s, CgraPayloadType, max_rows, max_cols = max(cgra_size, key=lambda x: x[0] * x[1]) # The tile number of the largest cgra. max_num_tiles = max_rows * max_cols - max_num_rd_tiles = max_rows + max_cols - 1 + max_num_rd_tiles = max(id2dataSPM[id].getNumOfValidReadPorts() for id in range(cgra_rows * cgra_columns)) + max_num_wr_tiles = max(id2dataSPM[id].getNumOfValidWritePorts() for id in range(cgra_rows * cgra_columns)) CtrlPktType = mk_intra_cgra_pkt(cgra_columns, cgra_rows, max_num_tiles, CgraPayloadType) @@ -77,7 +78,7 @@ def construct(s, CgraPayloadType, FunctionUnit, FuList, id2validTiles[cgra_id], id2validLinks[cgra_id], id2dataSPM[cgra_id], controller2addr_map, idTo2d_map, - is_multi_cgra, cgra_id, max_num_tiles, max_num_rd_tiles) + is_multi_cgra, cgra_id, max_num_tiles, max_num_rd_tiles, max_num_wr_tiles) for cgra_id in range(s.num_cgras)] # Latency is 1. s.mesh = MeshNetworkRTL(NocPktType, MeshPos, cgra_columns, cgra_rows, 1) From 55fb83c8b3e423d19d13d60b331308bf9b3b298c Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Tue, 5 May 2026 21:42:58 +0800 Subject: [PATCH 12/14] Add the arch file and the test for multi heterogeneous CGRAs. --- .../test/MeshMultiCgraTemplateRTL_test.py | 223 +++++++++++++++++- .../test/arch_multi_hetero_cgra_override.yaml | 30 +++ 2 files changed, 247 insertions(+), 6 deletions(-) create mode 100644 multi_cgra/test/arch_multi_hetero_cgra_override.yaml diff --git a/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py b/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py index c03106fb..24075a87 100644 --- a/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py +++ b/multi_cgra/test/MeshMultiCgraTemplateRTL_test.py @@ -50,7 +50,7 @@ class TestHarness(Component): def construct(s, DUT, FunctionUnit, FuList, IntraCgraPktType, - cgra_rows, cgra_columns, per_cgra_rows, per_cgra_columns, ctrl_mem_size, + cgra_rows, cgra_columns, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, src_ctrl_pkt, src_query_pkt, @@ -63,8 +63,6 @@ def construct(s, DUT, FunctionUnit, FuList, cmp_func): CgraPayloadType = IntraCgraPktType.get_field_type(kAttrPayload) - s.num_terminals = cgra_rows * cgra_columns - s.num_tiles = per_cgra_columns * per_cgra_rows s.src_ctrl_pkt = TestSrcRTL(IntraCgraPktType, src_ctrl_pkt) s.src_query_pkt = TestSrcRTL(IntraCgraPktType, src_query_pkt) @@ -72,8 +70,7 @@ def construct(s, DUT, FunctionUnit, FuList, s.expected_sink_out = TestSinkRTL(IntraCgraPktType, expected_sink_out_pkt, cmp_fn = cmp_func) s.dut = DUT(CgraPayloadType, - cgra_rows, cgra_columns, - # per_cgra_rows, per_cgra_columns, + cgra_rows, cgra_columns, ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, @@ -151,6 +148,10 @@ def run_sim(test_harness, max_cycles = 200): def test_mesh_multi_cgra_universal(cmdline_opts, arch_yaml_path = "arch.yaml"): + """ + Test the multi-CGRA CGRA configurations. + NOTE This test only considers CGRAs with the same shape, meaning all CGRAs have the same number of tile rows and columns. + """ arch_file = os.path.join(os.path.dirname(__file__), arch_yaml_path) print(f"Use the architecture file: {arch_file}") arch_parser = ArchParser(arch_file) @@ -359,7 +360,7 @@ def test_mesh_multi_cgra_universal(cmdline_opts, arch_yaml_path = "arch.yaml"): th = TestHarness(DUT, FunctionUnit, FuList, IntraCgraPktType, num_cgra_rows, num_cgra_columns, - per_cgra_rows, per_cgra_columns, ctrl_mem_size, data_mem_size_global, + ctrl_mem_size, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, src_ctrl_pkt, src_query_pkt, ctrl_steps_per_iter, ctrl_steps_total, @@ -381,3 +382,213 @@ def test_simplified_multi_cgra(cmdline_opts, arch_yaml_path = "arch_override.yam arch_parser = ArchParser(arch_file) cgra_param = arch_parser.get_simplest_cgra_param() CgraTemplateRTL_test.test_cgra_universal(cmdline_opts, arch_file, cgra_param) + +def test_mesh_multi_hetero_cgra(cmdline_opts, arch_yaml_path = "arch_multi_hetero_cgra_override.yaml"): + """ + Test the multi-CGRA with heterogeneous(different tile rows and columns) CGRA configurations. + """ + arch_file = os.path.join(os.path.dirname(__file__), arch_yaml_path) + print(f"Use the architecture file: {arch_file}") + arch_parser = ArchParser(arch_file) + multiCgraParam = arch_parser.parse_multi_cgra_param() + + print(f"multiCgraParam: {multiCgraParam}") + # singleCgraParam = multiCgraParam.cgras[0][0] + num_cgra_rows = multiCgraParam.rows + num_cgra_columns = multiCgraParam.cols + # per_cgra_rows = singleCgraParam.rows + # per_cgra_columns = singleCgraParam.columns + num_banks_per_cgra = 2 + data_mem_size_per_bank = 32 + mem_access_is_combinational = True + + num_tile_inports = 8 + num_tile_outports = 8 + num_fu_inports = 4 + num_fu_outports = 2 + ctrl_mem_size = 16 + num_cgras = num_cgra_rows * num_cgra_columns + data_mem_size_global = data_mem_size_per_bank * num_banks_per_cgra * num_cgras + + cgras = multiCgraParam.cgras + cgras_flat = [cgra for row in cgras for cgra in row] # [cgra0, cgra1, cgra2.....] + # Finds the largest cgra. + largest_cgra = max(cgras_flat, key=lambda _cgra: _cgra.rows * _cgra.columns) + # Uses the largest cgra's shape to determine the bitwidth of IntraCgraPktType. + max_cgra_rows = largest_cgra.rows + max_cgra_columns = largest_cgra.columns + num_tiles = max_cgra_columns * max_cgra_rows + + TileInType = mk_bits(clog2(num_tile_inports + num_fu_inports + 1)) + FuInType = mk_bits(clog2(num_fu_inports + 1)) + FuOutType = mk_bits(clog2(num_fu_outports + 1)) + data_nbits = 32 + DataType = mk_data(data_nbits, 1) + DataAddrType = mk_bits(clog2(data_mem_size_global)) + DUT = MeshMultiCgraTemplateRTL + + FunctionUnit = FlexibleFuRTL + FuList = [AdderRTL, + MulRTL, + LogicRTL, + ShifterRTL, + PhiRTL, + CompRTL, + GrantRTL, + MemUnitRTL, + SelRTL, + RetRTL, + SeqMulAdderRTL, + VectorMulComboRTL, + VectorAdderComboRTL] + num_registers_per_reg_bank = 16 + per_cgra_data_size = int(data_mem_size_global / num_cgras) + controller2addr_map = {} + for i in range(num_cgras): + controller2addr_map[i] = [i * per_cgra_data_size, + (i + 1) * per_cgra_data_size - 1] + print("[LOG] controller2addr_map: ", controller2addr_map) + + RegIdxType = mk_bits(clog2(num_registers_per_reg_bank)) + + CtrlType = mk_ctrl(num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank) + + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + + CgraPayloadType = mk_cgra_payload(DataType, + DataAddrType, + CtrlType, + CtrlAddrType) + + IntraCgraPktType = mk_intra_cgra_pkt(num_cgra_columns, + num_cgra_rows, + num_tiles, + CgraPayloadType) + + src_ctrl_pkt = [] + expected_sink_out_pkt = [] + src_query_pkt = [] + ctrl_steps_per_iter = 0 + + cmp_func = lambda a, b : a.payload.data == b.payload.data and a.payload.cmd == b.payload.cmd + + ''' + Creates test performing load -> inc -> store on cgra 2. Assume that the size of cgra2 is `cgra2_rows x cgra2_columns`. + Specifically, + cgra 2 tile 0 performs `load` on memory address 34, and stores the result (0xfe) in register 7. + cgra 2 tile 0 read data from register 7 and performs `inc` (0xfe -> 0xff), and sends result to tile `cgra2_columns`. + cgra 2 tile `cgra2_columns` waits for the data from tile 0, and performs stores (0xff) to memory address 3. + Note that address 34 is in cgra 1's sram bank 0, while address 3 is in cgra 0's sram bank 0, + therefore, all the memory addresses from cgra 2 are remote. + ''' + # tile `cgra2_columns` can access the memory, as it is on the first column. + # Get the column number of cgra2. + cgra2_column = cgras_flat[2].columns + target_tile = cgra2_column + src_ctrl_pkt = \ + [ + # Preloads data. address 34 belongs to cgra 1 (not cgra 0) + IntraCgraPktType(0, 0, payload = CgraPayloadType(CMD_STORE_REQUEST, data = DataType(254, 1), data_addr = 34)), + # Tile 0. + # Indicates the load address of 2. dst_cgra_y + IntraCgraPktType(0, 0, 0, 2, 0, 0, 0, 1, payload = CgraPayloadType(CMD_CONST, data = DataType(34, 1))), + # src dst src_cgra dst_cgra + IntraCgraPktType(0, 0, 0, 2, 0, 0, 0, 1, + payload = CgraPayloadType(CMD_CONFIG, ctrl_addr = 0, + ctrl = CtrlType(OPT_LD_CONST, + [FuInType(0), FuInType(0), FuInType(0), FuInType(0)], + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + # Note that we still need to set FU xbar. + FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(0)], + # 2 indicates the FU xbar port (instead of const queue or routing xbar port). + write_reg_from = [b2(2), b2(0), b2(0), b2(0)], + write_reg_idx = [RegIdxType(7), RegIdxType(0), RegIdxType(0), RegIdxType(0)]))), + IntraCgraPktType(0, 0, 0, 2, 0, 0, 0, 1, + payload = CgraPayloadType(CMD_CONFIG, ctrl_addr = 1, + ctrl = CtrlType(OPT_INC, + [FuInType(1), FuInType(0), FuInType(0), FuInType(0)], + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + read_reg_towards = [b2(1), b2(0), b2(0), b2(0)], + read_reg_idx = [RegIdxType(7), RegIdxType(0), RegIdxType(0), RegIdxType(0)]))), + + # Tile `target_tile`. Note that tile 0 and tile `target_tile` can access the memory, as they are on + # the first column. + # Indicates the store address of 3. + IntraCgraPktType(0, target_tile, 0, 2, 0, 0, 0, 1, payload = CgraPayloadType(CMD_CONST, data = DataType(3, 1))), + # src dst src_cgra dst_cgra + IntraCgraPktType(0, target_tile, 0, 2, 0, 0, 0, 1, + payload = CgraPayloadType(CMD_CONFIG, ctrl_addr = 0, + ctrl = CtrlType(OPT_STR_CONST, + [FuInType(1), FuInType(0), FuInType(0), FuInType(0)], + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(2), TileInType(0), TileInType(0), TileInType(0)], + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]))), + # Pre-configure per-tile total config count. + # Only execute one operation (i.e., store) is enough for this tile. + # If this is set more than 1, no `COMPLETE` signal would be set back + # to CPU/test_harness. + IntraCgraPktType(0, target_tile, 0, 2, 0, 0, 0, 1, payload = CgraPayloadType(CMD_CONFIG_TOTAL_CTRL_COUNT, data = DataType(1))), + + # For launching the two tiles. + IntraCgraPktType(0, 0, 0, 2, 0, 0, 0, 1, payload = CgraPayloadType(CMD_LAUNCH)), + IntraCgraPktType(0, target_tile, 0, 2, 0, 0, 0, 1, payload = CgraPayloadType(CMD_LAUNCH)), + ] + + src_query_pkt = \ + [ + IntraCgraPktType(payload = CgraPayloadType(CMD_LOAD_REQUEST, data_addr = 3)), + IntraCgraPktType(payload = CgraPayloadType(CMD_LOAD_REQUEST, data_addr = 34)), + ] + + expected_sink_out_pkt = \ + [ + # src dst src/dst cgra x/y + IntraCgraPktType(0, num_tiles, 2, 0, 0, 1, 0, 0, payload = CgraPayloadType(CMD_COMPLETE)), + IntraCgraPktType(target_tile, num_tiles, 2, 0, 0, 1, 0, 0, payload = CgraPayloadType(CMD_COMPLETE)), + # Expected updated value. + IntraCgraPktType(0, num_tiles, 0, 0, 0, 0, 0, 0, payload = CgraPayloadType(CMD_LOAD_RESPONSE, data = DataType(0xff, 1), data_addr = 3)), + IntraCgraPktType(0, num_tiles, 1, 0, 1, 0, 0, 0, payload = CgraPayloadType(CMD_LOAD_RESPONSE, data = DataType(0xfe, 1), data_addr = 34)), + ] + + ctrl_steps_per_iter = 2 + ctrl_steps_total = 2 + + id2validTiles = {id: cgra.getValidTiles() for id, cgra in enumerate(cgras_flat)} + id2validLinks = {id: cgra.getValidLinks() for id, cgra in enumerate(cgras_flat)} + id2dataSPM = {id: cgra.dataSPM for id, cgra in enumerate(cgras_flat)} + id2ctrlMemSize_map = {id: cgra.configMemSize for id, cgra in enumerate(cgras_flat)} + id2cgraSize_map = {id: [cgra.rows, cgra.columns] for id, cgra in enumerate(cgras_flat)} + + th = TestHarness(DUT, FunctionUnit, FuList, IntraCgraPktType, + num_cgra_rows, num_cgra_columns, + ctrl_mem_size, data_mem_size_global, + data_mem_size_per_bank, num_banks_per_cgra, + num_registers_per_reg_bank, src_ctrl_pkt, src_query_pkt, + ctrl_steps_per_iter, ctrl_steps_total, + id2ctrlMemSize_map, id2cgraSize_map, + id2validTiles, id2validLinks, id2dataSPM, + mem_access_is_combinational, + controller2addr_map, expected_sink_out_pkt, cmp_func) + + th.elaborate() + th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, + ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', + 'ALWCOMBORDER']) + th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) + run_sim(th) diff --git a/multi_cgra/test/arch_multi_hetero_cgra_override.yaml b/multi_cgra/test/arch_multi_hetero_cgra_override.yaml new file mode 100644 index 00000000..3919db9e --- /dev/null +++ b/multi_cgra/test/arch_multi_hetero_cgra_override.yaml @@ -0,0 +1,30 @@ +# This is an example of overriding the CGRAs with heterogeneous shapes. +# CGRA0: 2x2; CGRA1: 3x3; CGRA2: 4x4; CGRA3: 4x4. +multi_cgra_defaults: + rows: 2 + columns: 2 + +cgra_defaults: + rows: 4 + columns: 4 + configMemSize: 16 + +cgra_defaults: + rows: 2 + columns: 2 + configMemSize: 16 + +tile_defaults: + num_registers: 16 + fu_types: ["add", "mul", "div", "fadd", "fmul", "fdiv", "logic", "cmp", "sel", "type_conv", "vfmul", "fadd_fadd", "fmul_fadd", "grant", "loop_control", "phi", "constant", "mem", "return", "mem_indexed", "alloca", "shift"] + +cgra_overrides: +- cgra_x: 0 + cgra_y: 0 + rows: 2 + columns: 2 + +- cgra_x: 1 + cgra_y: 0 + rows: 3 + columns: 3 \ No newline at end of file From 26199d0e829c56bdfb109d858639df54915ad0fa Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Wed, 6 May 2026 21:40:46 +0800 Subject: [PATCH 13/14] [Fix] Use max_per_cgra_rows and max_per_cgra_col to construct tile ports to avoid bitwidth mismatch. --- cgra/CgraTemplateRTL.py | 25 +++++++++++-------- multi_cgra/MeshMultiCgraTemplateRTL.py | 18 ++++++++++--- multi_cgra/arch_parser/MultiCgraParam.py | 2 +- .../test/arch_multi_hetero_cgra_override.yaml | 5 ---- 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/cgra/CgraTemplateRTL.py b/cgra/CgraTemplateRTL.py index be3b804d..340257b5 100644 --- a/cgra/CgraTemplateRTL.py +++ b/cgra/CgraTemplateRTL.py @@ -79,7 +79,7 @@ def construct(s, CgraPayloadType, total_steps, mem_access_is_combinational, FunctionUnit, FuList, TileList, LinkList, dataSPM, controller2addr_map, idTo2d_map, - is_multi_cgra = True, cgra_id = 0, max_num_tiles_ = None, max_num_rd_tiles_ = None, max_num_wr_tiles_ = None): + is_multi_cgra = True, cgra_id = 0, max_per_cgra_rows_ = None, max_per_cgra_cols_ = None, max_num_rd_tiles_ = None, max_num_wr_tiles_ = None): """ max_num_tiles_: the tile number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. max_num_rd_tiles_: the number of read ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. @@ -96,7 +96,9 @@ def construct(s, CgraPayloadType, # Reconstructs packet types. # In the case of heterogeneous multi-cgra, `max_num_tiles` means the tile number of the largest cgra. # In the case of single cgra, it is the tile number of the current cgra. - max_num_tiles = max_num_tiles_ if max_num_tiles_ is not None else len(TileList) + max_per_cgra_rows = max_per_cgra_rows_ if max_per_cgra_rows_ is not None else per_cgra_rows + max_per_cgra_cols = max_per_cgra_cols_ if max_per_cgra_cols_ is not None else per_cgra_columns + max_num_tiles = max_per_cgra_rows * max_per_cgra_cols # In the case of heterogeneous multi-cgra, `max_num_rd_tiles` means the number of read ports of the largest cgra. # In the case of single cgra, it is the number of read ports of the current cgra. max_num_rd_tiles = max_num_rd_tiles_ if max_num_rd_tiles_ is not None else dataSPM.getNumOfValidReadPorts() @@ -129,14 +131,17 @@ def construct(s, CgraPayloadType, s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType) if is_multi_cgra: - s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(per_cgra_columns)] - s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(per_cgra_columns)] - s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(per_cgra_columns)] - s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(per_cgra_columns)] - s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(per_cgra_rows)] - s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(per_cgra_rows)] - s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(per_cgra_rows)] - s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(per_cgra_rows)] + # Use the largest CGRA shape to set the boundary ports for compatibility in the case of heterogeneous multi-cgra. + # Remember to ground the remaining boundary ports of the current CGRA when the current CGRA has fewer rows or columns than the largest CGRA. + # See also: + s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)] + s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)] + s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)] + s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)] + s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)] # Components s.tile = [TileRTL(CtrlPktType, diff --git a/multi_cgra/MeshMultiCgraTemplateRTL.py b/multi_cgra/MeshMultiCgraTemplateRTL.py index f5067293..88b8d731 100644 --- a/multi_cgra/MeshMultiCgraTemplateRTL.py +++ b/multi_cgra/MeshMultiCgraTemplateRTL.py @@ -30,9 +30,9 @@ def construct(s, CgraPayloadType, # must accommodate the largest CGRA shape to ensure uniform packet width # and correct inter-CGRA communication. cgra_size:List[List[int, int]] = [id2cgraSize_map[id] for id in range(cgra_rows * cgra_columns)] - max_rows, max_cols = max(cgra_size, key=lambda x: x[0] * x[1]) + max_per_cgra_rows, max_per_cgra_cols = max(cgra_size, key=lambda x: x[0] * x[1]) # The tile number of the largest cgra. - max_num_tiles = max_rows * max_cols + max_num_tiles = max_per_cgra_rows * max_per_cgra_cols max_num_rd_tiles = max(id2dataSPM[id].getNumOfValidReadPorts() for id in range(cgra_rows * cgra_columns)) max_num_wr_tiles = max(id2dataSPM[id].getNumOfValidWritePorts() for id in range(cgra_rows * cgra_columns)) @@ -78,7 +78,7 @@ def construct(s, CgraPayloadType, FunctionUnit, FuList, id2validTiles[cgra_id], id2validLinks[cgra_id], id2dataSPM[cgra_id], controller2addr_map, idTo2d_map, - is_multi_cgra, cgra_id, max_num_tiles, max_num_rd_tiles, max_num_wr_tiles) + is_multi_cgra, cgra_id, max_per_cgra_rows, max_per_cgra_cols, max_num_rd_tiles, max_num_wr_tiles) for cgra_id in range(s.num_cgras)] # Latency is 1. s.mesh = MeshNetworkRTL(NocPktType, MeshPos, cgra_columns, cgra_rows, 1) @@ -228,6 +228,18 @@ def construct(s, CgraPayloadType, s.cgra[idx].recv_data_on_boundary_east[tile_row].msg //= CgraDataType() s.cgra[idx].send_data_on_boundary_east[tile_row].rdy //= 0 + # Grounds the remaining boundary ports of the current CGRA. + if per_cgra_rows < max_per_cgra_rows: + for tile_row in range(per_cgra_rows, max_per_cgra_rows): + s.cgra[idx].send_data_on_boundary_west[tile_row].rdy //= 0 + s.cgra[idx].recv_data_on_boundary_west[tile_row].val //= 0 + s.cgra[idx].recv_data_on_boundary_west[tile_row].msg //= CgraDataType() + if per_cgra_columns < max_per_cgra_cols: + for tile_col in range(per_cgra_columns, max_per_cgra_cols): + s.cgra[idx].send_data_on_boundary_south[tile_col].rdy //= 0 + s.cgra[idx].recv_data_on_boundary_south[tile_col].val //= 0 + s.cgra[idx].recv_data_on_boundary_south[tile_col].msg //= CgraDataType() + def line_trace(s): res = "||\n".join([(("\n\n[cgra_"+str(i)+": ") + x.line_trace()) for (i,x) in enumerate(s.cgra)]) diff --git a/multi_cgra/arch_parser/MultiCgraParam.py b/multi_cgra/arch_parser/MultiCgraParam.py index 81851b60..aefba8eb 100644 --- a/multi_cgra/arch_parser/MultiCgraParam.py +++ b/multi_cgra/arch_parser/MultiCgraParam.py @@ -90,5 +90,5 @@ def __repr__(self): cgra_size_str = "" for y in range(self.rows): for x in range(self.cols): - cgra_size_str += f"CGRA {y * self.cols + x}: {self.cgras[x][y].rows}x{self.cgras[x][y].columns}\n" + cgra_size_str += f"CGRA {y * self.cols + x}: {self.cgras[y][x].rows}x{self.cgras[y][x].columns}\n" return f"\nSize of MultiCGRAs: {self.rows}x{self.cols}\n" + cgra_size_str diff --git a/multi_cgra/test/arch_multi_hetero_cgra_override.yaml b/multi_cgra/test/arch_multi_hetero_cgra_override.yaml index 3919db9e..7fa0dc5e 100644 --- a/multi_cgra/test/arch_multi_hetero_cgra_override.yaml +++ b/multi_cgra/test/arch_multi_hetero_cgra_override.yaml @@ -4,11 +4,6 @@ multi_cgra_defaults: rows: 2 columns: 2 -cgra_defaults: - rows: 4 - columns: 4 - configMemSize: 16 - cgra_defaults: rows: 2 columns: 2 From 0402021e659ff5362576e9de42542dccb2ba8004 Mon Sep 17 00:00:00 2001 From: BenkangPeng Date: Thu, 7 May 2026 10:59:54 +0800 Subject: [PATCH 14/14] Rename variables --- cgra/CgraTemplateRTL.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cgra/CgraTemplateRTL.py b/cgra/CgraTemplateRTL.py index 340257b5..a2d0d573 100644 --- a/cgra/CgraTemplateRTL.py +++ b/cgra/CgraTemplateRTL.py @@ -79,11 +79,16 @@ def construct(s, CgraPayloadType, total_steps, mem_access_is_combinational, FunctionUnit, FuList, TileList, LinkList, dataSPM, controller2addr_map, idTo2d_map, - is_multi_cgra = True, cgra_id = 0, max_per_cgra_rows_ = None, max_per_cgra_cols_ = None, max_num_rd_tiles_ = None, max_num_wr_tiles_ = None): + is_multi_cgra = True, cgra_id = 0, + provided_max_per_cgra_rows = None, + provided_max_per_cgra_cols = None, + provided_max_num_rd_tiles = None, + provided_max_num_wr_tiles = None): """ - max_num_tiles_: the tile number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. - max_num_rd_tiles_: the number of read ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. - max_num_wr_tiles_: the number of write ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + provided_max_per_cgra_rows: the row number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + provided_max_per_cgra_cols: the column number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + provided_max_num_rd_tiles: the number of read ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. + provided_max_num_wr_tiles: the number of write ports of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch. """ DataType = CgraPayloadType.get_field_type(kAttrData) @@ -96,13 +101,13 @@ def construct(s, CgraPayloadType, # Reconstructs packet types. # In the case of heterogeneous multi-cgra, `max_num_tiles` means the tile number of the largest cgra. # In the case of single cgra, it is the tile number of the current cgra. - max_per_cgra_rows = max_per_cgra_rows_ if max_per_cgra_rows_ is not None else per_cgra_rows - max_per_cgra_cols = max_per_cgra_cols_ if max_per_cgra_cols_ is not None else per_cgra_columns + max_per_cgra_rows = provided_max_per_cgra_rows if provided_max_per_cgra_rows is not None else per_cgra_rows + max_per_cgra_cols = provided_max_per_cgra_cols if provided_max_per_cgra_cols is not None else per_cgra_columns max_num_tiles = max_per_cgra_rows * max_per_cgra_cols # In the case of heterogeneous multi-cgra, `max_num_rd_tiles` means the number of read ports of the largest cgra. # In the case of single cgra, it is the number of read ports of the current cgra. - max_num_rd_tiles = max_num_rd_tiles_ if max_num_rd_tiles_ is not None else dataSPM.getNumOfValidReadPorts() - max_num_wr_tiles = max_num_wr_tiles_ if max_num_wr_tiles_ is not None else dataSPM.getNumOfValidWritePorts() + max_num_rd_tiles = provided_max_num_rd_tiles if provided_max_num_rd_tiles is not None else dataSPM.getNumOfValidReadPorts() + max_num_wr_tiles = provided_max_num_wr_tiles if provided_max_num_wr_tiles is not None else dataSPM.getNumOfValidWritePorts() # Use largest CGRA shape(max_num_tiles) to set CtrlPktType/NocPktType for compatibility.