From 302f266929033d8e94ed2973439285cb129f1de2 Mon Sep 17 00:00:00 2001 From: sebasthechill Date: Thu, 19 Mar 2026 19:52:11 -0400 Subject: [PATCH] MMU: Add MMU RTL and IP briefs --- docs/ip-briefs/ptw One Pager (v1.0).md | 307 +++++++++ docs/ip-briefs/sv32_mmu One Pager (v1.0).md | 314 +++++++++ docs/ip-briefs/tlb One Pager (v1.0).md | 172 +++++ rtl/cpu/mmu/ptw.sv | 287 ++++++++ rtl/cpu/mmu/ptw_tb.sv | 682 ++++++++++++++++++++ rtl/cpu/mmu/sv32_mmu.sv | 380 ++++++++--- rtl/cpu/mmu/sv32_mmu_tb.sv | 248 +++++++ rtl/cpu/mmu/tlb.sv | 252 ++++++++ 8 files changed, 2555 insertions(+), 87 deletions(-) create mode 100644 docs/ip-briefs/ptw One Pager (v1.0).md create mode 100644 docs/ip-briefs/sv32_mmu One Pager (v1.0).md create mode 100644 docs/ip-briefs/tlb One Pager (v1.0).md create mode 100644 rtl/cpu/mmu/ptw_tb.sv create mode 100644 rtl/cpu/mmu/sv32_mmu_tb.sv diff --git a/docs/ip-briefs/ptw One Pager (v1.0).md b/docs/ip-briefs/ptw One Pager (v1.0).md new file mode 100644 index 0000000..3005431 --- /dev/null +++ b/docs/ip-briefs/ptw One Pager (v1.0).md @@ -0,0 +1,307 @@ +ptw — Module Brief (v1.0) RTL: rtl/cpu/mmu/ptw.sv + + + +#### **Purpose \& Role** + +The Page Table Walker (PTW) performs a hardware page-table traversal for the SV32 Virtual memory. When the TLB and sv32\_mmu detects a miss, the PTW takes the page table entries (PTEs) from memory using AXI4-Lite read requests. It implements the 2 level SV32 walk (VPN\[1]->VPN\[0]) and returns either a valid PTE or a page fault condition. + + + +The PTW is a lone external module. It does not manage TLBs directly. + +sv32\_mmu requests page walks -> ptw gets PTEs -> sv32\_mmu interprets results and completes permission checks -> sv32\_mmu handles TLB insertion after walk completes. + + + +**Parameters +Name Default Description** +--- + +  TIMEOUT\_CYCLES 256 Max cycles allowed mem response before timeout + +  ADDR\_WIDTH 32 Width of virtual \& PTE address generated. + +  DATA\_WIDTH 32 Width of PTE fetch data. + +  PPN\_WIDTH 22 Physical page # width for next level base @. + + + +#### **Interfaces (Ports)** + +##### **Signal Dir Width Description** + +  clk\_i In 1 Clock input. + +  rst\_ni In 1 Active-low reset. + + flush\_i In 1 Flush input from MMU / SFENCE.VMA; clears in-flight walk state. + +  walk\_req\_valid\_i In 1 Request start page walk (from sv32). +  walk\_req\_ready\_o Out 1 PTW ready to accept new walk request. + + walk\_req\_addr\_i In 32 Root page-table base address for the level-1 walk. + +  walk\_req\_vpn\_i In 20 VPN\[19:0] split into VPN\[1]/\[0]. + +  walk\_rsp\_valid\_o Out 1 PTW response valid. + +  walk\_rsp\_pte\_o Out 32 Returned PTE data (valid or faulty). + +  walk\_rsp\_error\_o Out 1 PTW error indication (timeout / invalid walk / access fault). + +  axi\_ar\_valid\_o Out 1 AXI4-Lite read address valid. + +  axi\_ar\_addr\_o Out 32 AXI4-Lite read address for PTE fetch. + +  axi\_ar\_ready\_i In 1 AXI4-Lite address channel ready. + +  axi\_r\_valid\_i In 1 AXI4-Lite read data valid. + +  axi\_r\_data\_i In 32 PTE data returned from memory. + +  axi\_r\_resp\_i In 2 AXI read response code for access-fault detection. + + + + + +#### **Protocols** + +* Walk interface: single outstanding request. Valid/ready managed between sv32\_mmu and PTW. +* AXI4-Lite: address request uses axi\_ar\_valid\_o / axi\_ar\_ready\_i, read data returns through axi\_r\_valid\_i / axi\_r\_data\_i / axi\_r\_resp\_i. +* MMU sequencing: PTW only begins a walk when a valid request is accepted, and no interleaved walks are supported. + +  + +#### **Behavior \& Timing** + +* Performs a two-level Sv32 lookup, fetches the level-1 PTE using the root page-table base plus VPN\[1]. +* If the entry is a pointer, fetches the level-2 PTE using the next-level base plus VPN\[0]. +* Detects and reports invalid PTEs, misaligned PTE conditions, access faults, and timeout conditions. +* Walk latency depends on memory response timing with typical latency 8–20 cycles, with maximum latency bounded by TIMEOUT\_CYCLES. +* One translation may be in flight at a time. +* PTW does not modify A/D bits (sv32\_mmu handles that if required). +* flush\_i clears PTW state and cancels any in-flight walk. + + + +#### **Programming Model** + +The PTW is not software-visible. It is indirectly controlled through sv32\_mmu and CSRs: + +* SATP.PPN provides root page-table pointer. +* SFENCE.VMA causes PTW state flush through the MMU. +* No memory mapped registers. +* No CSR interface inside PTW. + + + +#### **Errors \& IRQs** + +#####   **Condition Description Handling** + +  Timeout Memory read response exceeds walk\_rsp\_error\_o asserted. + +  TIMEOUT\_CYCLES. + +  Invalid PTE PTE has illegal or reserved Error returned to sv32\_mmu. + +  values. + +  Misaligned PTE Invalid alignment from PTE. Treated as page fault. + +  Access fault AXI bus returns error. Error raised to sv32\_mmu. + + + +PTW does not generate standalone interrupts; all exceptions are handled by sv32\_mmu and core trap logic. + + + +#### **Performance Targets** + +#####   **Metric Target Notes** + +  Walk latency <= 40 cycles typical 2 level walk under average memory timing. + +  Throughput 1 walk at a time Back pressure via walk\_req\_valid\_i. + +  Frequency 500 MHz Same as MMU domain. + + + +#### **Dependencies** + +* sv32\_mmu (requests + responses), AXI4-Lite interconnect (memory PTE fetches). +* Clocks: clk\_i / rst\_ni shared with MMU. +* Inputs: SATP root pointer (via mmu). +* Must be coordinated with TLB insertions performed by sv32\_mmu. + + + +#### **Verification Links** + +Unit tests: verification/mmu/test\_ptw.py + +Integration: verification/core/system\_paging.sv + +Coverage: cov/cov/ptw\_cov.html + +Known limitations: + +* Superpage support behavior depends on final Sv32 alignment policy implemented in RTL. +* No multi-walk concurrency. +* Timeout behavior not cycle-accurate with all DRAM models. + + + +#### **Definitions \& Acronyms** + + + +AXI4-Lite: + +Advanced eXtensible Interface, lightweight subset of the ARM AXI4 protocol used for memory-mapped control and status register accesses. + + + +A/D bits: + +Accessed and Dirty bits within a page-table entry (PTE). The MMU sets these when a page is read or written for the first time. + + + +ASID: + +Address Space Identifier; field in the SATP register distinguishing virtual-memory contexts. + + + +CDC: + +Clock-Domain Crossing; logic used to safely transfer signals between different clock domains. + + + +CPU: + +Central Processing Unit. + + + +CSR: + +Control and Status Register; RISC-V architectural registers that configure privilege behavior, MMU mode, and interrupts. + + + +MMU: + +Memory Management Unit; hardware responsible for translating virtual addresses to physical addresses and enforcing protection. + + + +OS: + +Operating System. + + + +PA: + +PADDR\_WIDTH — Physical Address; bit-width of the physical address output from the MMU. + + + +PTE: + +Page Table Entry; 32-bit descriptor in memory describing one virtual-to-physical mapping and its permissions. + + + +PTW: + +Page Table Walker; sub-module that fetches PTEs from memory on a TLB miss. + + + +RAM: + +Random-Access Memory; main system memory where program data and page tables reside. + + + +R/W/X: + +Read, Write, and Execute permission bits inside a PTE. + + + +RV32 / RV32I: 32-bit RISC-V base integer instruction set architecture. + + + +SATP: + +Supervisor Address Translation and Protection register; enables paging and provides root page-table pointer and ASID. + + + +SFENCE.VMA: + +Supervisor Fence for Virtual-Memory Area; RISC-V instruction that invalidates TLB entries. + + + +S-mode / U-mode / M-mode: Supervisor, User, and Machine privilege levels defined by the RISC-V privilege specification. + + + +SoC: + +System-on-Chip, integrated design including CPU, MMU, caches, interconnect, and peripherals. + + + +SV32: + +RISC-V 32-bit virtual-memory scheme using two-level page tables with 4 KB pages. + + + +TLB: + +Translation Lookaside Buffer; cache storing recently used PTEs to accelerate address translation. + + + +VPN: + +Virtual Page Number; upper bits of a virtual address that index the page table. + + + +CSR\_FILE: + +Hardware block managing RISC-V control/status registers used by the CPU and MMU. + + + +AXI Crossbar: + +On-chip interconnect fabric (rtl/bus/axi/axi\_crossbar.sv) that routes AXI transactions between masters (CPU, PTW) and slaves (memory, peripherals). + + + +BootROM: + +Read-only memory code executed on reset to initialize hardware and enable the MMU/OS. + + + +IRQ: + +Interrupt Request; hardware signal used to notify the processor of asynchronous events. + diff --git a/docs/ip-briefs/sv32_mmu One Pager (v1.0).md b/docs/ip-briefs/sv32_mmu One Pager (v1.0).md new file mode 100644 index 0000000..55d848d --- /dev/null +++ b/docs/ip-briefs/sv32_mmu One Pager (v1.0).md @@ -0,0 +1,314 @@ +sv32\_mmu — Module Brief (v1.0) RTL: rtl/cpu/mmu/sv32\_mmu.sv + + + +#### **Purpose \& Role** + +Sv32 virtual memory translation unit. Handles TLB management, page-table walks (delegated to an external Page Table Walker module), and access-permission enforcement for S-mode and U-mode memory accesses. Sits between the CPU memory stage and the memory subsystem, translating virtual addresses into physical addresses. Uses a Translation Lookaside Buffer (TLB) for cached translations and issues page-table walk requests to an external PTW module on TLB miss. Enforces R/W/X and U/S permissions according to the Sv32 specification, propagating page-faults to core trap logic. Ensures isolation between privilege levels and maintains correct virtual memory operation across the CPU pipeline. + + + +**Parameters +Name Default Description** +--- + +  TLB\_ENTRIES 16 Number of cached page table entries. + +  PAGE\_SIZE 4 KB Base page size per SV32 specification. + +  PTW\_TIMEOUT\_CYCLES 256 Maximum cycles to wait for external PTW. + +  ADDR\_WIDTH 32 Virtual address width (fixed for RV32). + +  PADDR\_WIDTH 34 Physical address width to memory subsystem. + + + +#### **Interfaces (Ports)** + +##### **Signal Dir Width Description** + +  clk\_i In 1 Clock input. + +  rst\_ni In 1 Active-low reset. + +  va\_i In 32 Virtual address input. + +  pa\_o Out 34 Physical address output. + +  valid\_i In 1 Request valid. + +  ready\_o Out 1 Ready for next request. + + ptw\_req\_valid\_o Out 1 Request external PTW initiation. + + ptw\_req\_ready\_i In 1 External PTW ready to accept walk request. + + ptw\_req\_root\_addr\_o Out 32 Root page-table base address derived from SATP for PTW walk requests. + + ptw\_req\_vpn\_o Out 20 VPN\[19:0] extracted from virtual address. + + ptw\_rsp\_valid\_i In 1 External PTW response valid. + + ptw\_rsp\_pte\_i In 32 Returned PTE data from external PTW. + + ptw\_rsp\_error\_i In 1 External PTW error indication.  +satp\_i In 32 SATP register value. + +  priv\_i In 2 Current privilege level (U/S/M). +access\_i In 2 Access type (load/store/fetch). + + fault\_o Out 1 Page-fault indication output. + + fault\_cause\_o Out 2 Fault cause (instruction/load/store page fault). + + fault\_timeout\_o Out 1 PTW timeout indication output. + + fault\_va\_o Out 32 Faulting virtual address. + + sum\_i In 1 SUM control from status CSR. + + mxr\_i In 1 MXR control from status CSR. + + uxn\_i In 1 UXN / execute restriction control from status CSR. + + sfence\_vma\_i In 1 Flush / invalidate request for TLB and in-flight walk. + + + +#### **Protocols** + +* CPU side: uses in-order valid/ready. +* PTW side: sv32\_mmu issues a single outstanding walk request using valid/ready, + +provides the root page-table base address and VPN, and the external PTW returns either PTE data or an error on response valid. + +  + +#### **Behavior \& Timing** + +* TLB hit ---> 1-cycle translation. +* TLB miss ---> sv32\_mmu issues a PTW request to the external PTW, providing the root page-table base address from SATP and VPN\[19:0] from the virtual address. +* PTW performs the actual AXI/DRAM access and returns either PTE data or an error through ptw\_rsp\_\* signals. +* sv32\_mmu performs permission checks for R/W/X and U/S enforcement after a valid PTW response. +* Order based request handling. One translation in flight. +* SV32 two level walk: Root PPN from SATP -> VPN\[1] -> VPN\[0]. +* Single clock domain (clk\_i). No CDC. One pipeline stage for TLB lookup. Stalls on misses. +* sv32\_mmu uses access\_i together with SUM/MXR/UXN and privilege mode to determine permission validity and drives explicit fault outputs on translation or permission failure. +* sfence\_vma\_i flushes the TLB and cancels any in-flight walk state. + + + +#### **Programming Model** + +Controlled by CSRs: SATP (enable/ASID/root PPN), SFENCE.VMA (invalidate), and SUM/MXR/UXN bits from mstatus/sstatus. Refer to csr\_spec.yaml. No memory mapped registers. + + + +#### **Errors \& IRQs** + +#####   **Condition Description Handling** + +  Page fault PTW returned error or CPU exception + +  permission violation. (load/store/instr). + +  PTW timeout No response from external Sets error flag, + +  PTW in timeout window. Retry or exception. + +  Misaligned PTE Invalid alignment from PTW Treated as page fault. +response. + + + +There are no stand alone IRQ outputs. Exceptions spread to core trap logic. + +Page faults are surfaced through fault\_o, fault\_cause\_o, fault\_timeout\_o, and fault\_va\_o for core trap handling. + + + +#### **Performance Targets** + +#####   **Metric Target Notes** + +  TLB hit latency 1 cycle No stall translation. + +  TLB miss latency less than or equal to 40 cycles Two level walk average. + +  Throughput 1 translation/cycle When not PTW stalled. + +  Clock frequency 500 MHz CPU domain minimal default. + + + +#### **Dependencies** + +* Modules: tlb (lookup + insertion), ptw (external page table walker module). +* Clocks/Resets: clk\_i, rst\_ni (shared with CPU). +* Software: SATP must be configured before enable; SFENCE.VMA after context switch. +* PTW performs AXI4-Lite/AXI memory reads; MMU provides the root page-table base address and VPN, and receives returned PTE data or error. +* MMU receives privilege and CSR configuration from csr\_file. + + + +#### **Verification Links** + +Unit tests: verification/mmu/test\_sv32\_mmu.py + +Integration: verification/core/system\_paging.sv + +Coverage: cov/mmu\_cov.html + +Known limitations: No superpage support (>4 MiB). PTW timeout error path unverified. + + + +#### **Definitions \& Acronyms** + + + +AXI4-Lite: + +Advanced eXtensible Interface, lightweight subset of the ARM AXI4 protocol used for memory-mapped control and status register accesses. + + + +A/D bits: + +Accessed and Dirty bits within a page-table entry (PTE). The MMU sets these when a page is read or written for the first time. + + + +ASID: + +Address Space Identifier; field in the SATP register distinguishing virtual-memory contexts. + + + +CDC: + +Clock-Domain Crossing; logic used to safely transfer signals between different clock domains. + + + +CPU: + +Central Processing Unit. + + + +CSR: + +Control and Status Register; RISC-V architectural registers that configure privilege behavior, MMU mode, and interrupts. + + + +MMU: + +Memory Management Unit; hardware responsible for translating virtual addresses to physical addresses and enforcing protection. + + + +OS: + +Operating System. + + + +PA: + +PADDR\_WIDTH — Physical Address; bit-width of the physical address output from the MMU. + + + +PTE: + +Page Table Entry; 32-bit descriptor in memory describing one virtual-to-physical mapping and its permissions. + + + +PTW: + +Page Table Walker; external module that fetches PTEs from memory on a TLB miss. + + + +RAM: + +Random-Access Memory; main system memory where program data and page tables reside. + + + +R/W/X: + +Read, Write, and Execute permission bits inside a PTE. + + + +RV32 / RV32I: 32-bit RISC-V base integer instruction set architecture. + + + +SATP: + +Supervisor Address Translation and Protection register; enables paging and provides root page-table pointer and ASID. + + + +SFENCE.VMA: + +Supervisor Fence for Virtual-Memory Area; RISC-V instruction that invalidates TLB entries. + + + +S-mode / U-mode / M-mode: Supervisor, User, and Machine privilege levels defined by the RISC-V privilege specification. + + + +SoC: + +System-on-Chip, integrated design including CPU, MMU, caches, interconnect, and peripherals. + + + +SV32: + +RISC-V 32-bit virtual-memory scheme using two-level page tables with 4 KB pages. + + + +TLB: + +Translation Lookaside Buffer; cache storing recently used PTEs to accelerate address translation. + + + +VPN: + +Virtual Page Number; upper bits of a virtual address that index the page table. + + + +CSR\_FILE: + +Hardware block managing RISC-V control/status registers used by the CPU and MMU. + + + +AXI Crossbar: + +On-chip interconnect fabric (rtl/bus/axi/axi\_crossbar.sv) that routes AXI transactions between masters (CPU, PTW) and slaves (memory, peripherals). + + + +BootROM: + +Read-only memory code executed on reset to initialize hardware and enable the MMU/OS. + + + +IRQ: + +Interrupt Request; hardware signal used to notify the processor of asynchronous events. + diff --git a/docs/ip-briefs/tlb One Pager (v1.0).md b/docs/ip-briefs/tlb One Pager (v1.0).md new file mode 100644 index 0000000..b9e6d8a --- /dev/null +++ b/docs/ip-briefs/tlb One Pager (v1.0).md @@ -0,0 +1,172 @@ +tlb — Module Brief (v1.0) RTL: rtl/cpu/mmu/tlb.sv + + + +#### **Purpose \& Role** + +The TLB, Translation Lookaside Buffer, provides cached virtual to physical mappings for the Sv32 MMU. It takes recently used page table entries and stores them to accelerate address translation and minimize page table walks. It supports separate instruction (I) and data (D) lookup paths with shared shootdown domain. Implemented as an associative cache that uses a content addressable memory (CAM) structure with the least recently used (LRU) or pseudo least recently used replacement policy. During misses, the requests are sent to the MMU, which forwards them to an external Page Table Walker (PTW) for page-table fetch. When address spaces change, global invalidations through SFENCE.VMA and SATP writes ensure these outdated translations are removed. + + + +**Parameters +Name Default Description** +--- + +  ENTRIES 16 Number of cached TLB entries per instance. + +  PAGE\_SIZE 4 KB Base page size per Sv32 specification. + +  ASSOCIATIVE FULL Fully associative lookup organization. + +  REPL\_POLICY LRU Replacement policy: LRU or pseudo-LRU select. + +  ADDR\_WIDTH 32 Virtual address width for tag comparison. + +  PADDR\_WIDTH 34 Physical address width for stored entries. + + + +#### **Interfaces (Ports)** + +##### **Signal Dir Width Description** + +  clk\_i In 1 Clock input. + +  rst\_ni In 1 Active-low reset. + +  lookup\_va\_i In 32 Virtual address to translate. + +  lookup\_hit\_o Out 1 Indicates translation hit. + +  lookup\_pa\_o Out 34 Physical address result if hit. + +  lookup\_valid\_i In 1 Lookup request valid. + +  lookup\_ready\_o Out 1 Ready for next lookup. + +  insert\_valid\_i In 1 Request insert entry (from sv32\_mmu). + +  insert\_vpn\_i In 20 Virtual page number to cache. + +  insert\_ppn\_i In 22 Physical page number to cache. + +  insert\_perm\_i In 8 Permission bits (R/W/X/U/S/A/D). + +  flush\_i In 1 Global flush signal (SFENCE.VMA/SATP). + +  miss\_o Out 1 When lookup misses existing entries. + + + +#### **Protocols** + +* Lookup interface uses sequential valid/ready handshake with MMU pipeline. +* Insert interface triggered by sv32\_mmu after PTW completion. +* Flush is synchronous and clears all entries within one cycle after assertion. + +  + +#### **Behavior \& Timing** + +* CAM based associative lookup performs tag comparison in one cycle. +* On hit ---> return cached physical address. No pipeline stall. +* On miss --> raise miss\_o prompting MMU to initiate PTW fetch. +* LRU or pseudo LRU replacement selects victim entry for new insertions. +* Supports optional shared shootdown across I/D TLBs when enabled. +* Single clock domain (clk\_i). No clock domain crossings. + + + +#### **Programming Model** + +Indirectly controlled through MMU CSRs and instructions: + +* SFENCE.VMA: Flush all or ASID specific entries. +* SATP writes: trigger global flush and context switch. +* Privilege level (w/ CSR) determines permission bits cached with each entry. +* Refer to csr\_spec.yaml for CSR definitions. + + + + + +#### **Errors \& IRQs** + +#####   **Condition Description Handling** + +  Parity error CAM parity or ECC error. Entry invalidated, + +  Reloaded on next access. + +  Invalid insert Insertion without valid PTE. Ignored, triggers MMU retry. + + + +There are no stand alone IRQ outputs. Exceptions spread to core trap logic. + + + +#### **Performance Targets** + +#####   **Metric Target Notes** + +  Lookup latency 1 cycle No stall on hit. + +  Insert latency 1 cycle Tag and data write. + +  Flush latency 1-2 cycles Depends on entry count. + +  Throughput 1 lookup/cycle Continuous pipeline operation. + + + +#### **Dependencies** + +* Connected to: sv32\_mmu for lookups, misses, and entry insertions. +* Page-table misses resolved indirectly through external PTW (handled by sv32\_mmu). +* Receives: SFENCE.VMA and SATP write flush controls through CSR subsystem. +* Clocks/Resets: clk\_i, rst\_ni (is shared with MMU). +* Integration: Shares shootdown domain across instruction/data TLB instances. + + + +#### **Verification Links** + +Unit tests: verification/mmu/test\_tlb.py + +Integration: verification/mmu/test\_sv32\_mmu.sv + +Coverage: cov/tlb\_cov.html + +Known limitations: No superpage support (>4 MiB) entries. pseudo LRU accuracy is not verified under concurrent insertions. + + + +#### **Definitions \& Acronyms** + + + +TLB: Translation Lookaside Buffer. Cache storing recently used page-table entries. + +CAM: Content Addressable Memory. Memory allowing associative lookup based on tag comparison. + +LRU: Least Recently Used. Replacement policy that evicts the entry unused for the longest time. + +PTW: Page Table Walker. External MMU logic that fetches page-table entries on TLB misses. + +MMU: Memory Management Unit. Performs address translation and permission checks. + +PTE: 32-bit page table entry. Descriptor defining a mapping between virtual and physical pages. + +SFENCE.VMA: Supervisor Fence Virtual Memory Area. Instruction used to flush TLB entries. + +SATP: Supervisor Address Translation and Protection register. Defines root page table and ASID. + +ASID: Address Space Identifier. Distinguishes virtual-memory address spaces. + +CSR: Control and Status Register. Holds configuration and privilege control data. + +AXI4-Lite: Simplified version of ARM AXI4 bus protocol used for memory access. + +SoC: System-on-Chip. Integrated CPU, MMU, cache, and peripheral components. + diff --git a/rtl/cpu/mmu/ptw.sv b/rtl/cpu/mmu/ptw.sv index e69de29..f2806ef 100644 --- a/rtl/cpu/mmu/ptw.sv +++ b/rtl/cpu/mmu/ptw.sv @@ -0,0 +1,287 @@ +module ptw #( + parameter int unsigned TIMEOUT_CYCLES = 256, + parameter int unsigned ADDR_WIDTH = 32, + parameter int unsigned DATA_WIDTH = 32, // Sv32 PTE width + parameter int unsigned PPN_WIDTH = 22 // Sv32 PTE stores 22-bit PPN in bits [31:10] +) ( + input logic clk_i, + input logic rst_ni, + + // flush from sfence.vma to clear any in-flight walk + input logic flush_i, + + // walk request from mmu + input logic walk_req_valid_i, + output logic walk_req_ready_o, + input logic [ADDR_WIDTH-1:0] walk_req_addr_i, // L1 table base address + input logic [19:0] walk_req_vpn_i, // full VPN {vpn1,vpn0} + + // walk response back to mmu + output logic walk_rsp_valid_o, + output logic [DATA_WIDTH-1:0] walk_rsp_pte_o, // 32-bit leaf PTE + output logic walk_rsp_error_o, // fault/timeout + + // axi-lite read interface for pte fetches + output logic axi_ar_valid_o, + output logic [ADDR_WIDTH-1:0] axi_ar_addr_o, + input logic axi_ar_ready_i, + input logic axi_r_valid_i, + input logic [DATA_WIDTH-1:0] axi_r_data_i, + input logic [1:0] axi_r_resp_i +); + + typedef enum logic [2:0] { + IDLE, + SEND_L1, + WAIT_L1, + SEND_L2, + WAIT_L2, + DONE, + ERROR + } ptw_state_e; + + ptw_state_e state_q, state_d; + + // latched request info + logic [ADDR_WIDTH-1:0] base_addr_q; + logic [19:0] vpn_q; + + // split vpn for l1 / l2 table indexing + logic [9:0] vpn_l1; + logic [9:0] vpn_l2; + + // pte registers + logic [DATA_WIDTH-1:0] pte_l1_q; + logic [DATA_WIDTH-1:0] pte_l2_q; + + // track whether final leaf came from L2 + logic used_l2_q, used_l2_d; + + // timeout handling + logic [31:0] timeout_cnt_q, timeout_cnt_d; + logic timeout_expired; + + // computed pte addresses + logic [ADDR_WIDTH-1:0] l1_addr; + logic [ADDR_WIDTH-1:0] l2_base_addr; + logic [ADDR_WIDTH-1:0] l2_addr; + + // alignment helpers based on pte size + localparam int unsigned PTE_SIZE_BYTES = (DATA_WIDTH / 8); // 4 for Sv32 + localparam int unsigned PTE_ALIGN_BITS = (PTE_SIZE_BYTES > 1) ? $clog2(PTE_SIZE_BYTES) : 1; + + logic l1_addr_misaligned; + logic l2_addr_misaligned; + + // combinational PTE / AXI decode signals + logic axi_pte_invalid; + logic axi_pte_is_pointer; + logic axi_pte_is_leaf; + logic axi_pte_has_ad_fault; + logic axi_pte_superpage_misaligned; + logic axi_resp_access_fault; + + // vpn splits + assign vpn_l1 = vpn_q[19:10]; + assign vpn_l2 = vpn_q[9:0]; + + assign timeout_expired = (timeout_cnt_q >= TIMEOUT_CYCLES); + + // invalid if V == 0, or if R == 0 and W == 1 + assign axi_pte_invalid = !axi_r_data_i[0] || (!axi_r_data_i[1] && axi_r_data_i[2]); + + // pointer if valid and both R/X are clear + assign axi_pte_is_pointer = axi_r_data_i[0] && !axi_r_data_i[1] && !axi_r_data_i[3]; + + // leaf if valid and either R or X is set + assign axi_pte_is_leaf = axi_r_data_i[0] && (axi_r_data_i[1] || axi_r_data_i[3]); + + // PTW does not set A/D; treat A==0 as fault + assign axi_pte_has_ad_fault = !axi_r_data_i[6]; + + // For an L1 superpage leaf, lower 10 bits of the PPN must be zero. + // Full Sv32 PPN field is bits [31:10], so lower 10 PPN bits are PTE[19:10]. + assign axi_pte_superpage_misaligned = |axi_r_data_i[19:10]; + + // AXI read access fault + assign axi_resp_access_fault = (axi_r_resp_i != 2'b00); + + // l1 pte address: base_addr_q + (vpn_l1 * PTE_SIZE_BYTES) + assign l1_addr = base_addr_q + (ADDR_WIDTH'(vpn_l1) << PTE_ALIGN_BITS); + + // l2 table base address from full 22-bit Sv32 PPN field. + // This produces a 34-bit physical base internally, but PTW address port is ADDR_WIDTH wide. + // Truncate explicitly to avoid implicit-width warnings. + assign l2_base_addr = ADDR_WIDTH'({pte_l1_q[31:10], 12'b0}); + + // l2 pte address + assign l2_addr = l2_base_addr + (ADDR_WIDTH'(vpn_l2) << PTE_ALIGN_BITS); + + assign l1_addr_misaligned = |l1_addr[PTE_ALIGN_BITS-1:0]; + assign l2_addr_misaligned = |l2_addr[PTE_ALIGN_BITS-1:0]; + + // FSM + outputs + always_comb begin + state_d = state_q; + + walk_req_ready_o = (state_q == IDLE); + + walk_rsp_valid_o = 1'b0; + walk_rsp_pte_o = '0; + walk_rsp_error_o = 1'b0; + + axi_ar_valid_o = 1'b0; + axi_ar_addr_o = '0; + + timeout_cnt_d = timeout_cnt_q; + used_l2_d = used_l2_q; + + if (state_q == WAIT_L1 || state_q == WAIT_L2) begin + if (!timeout_expired) + timeout_cnt_d = timeout_cnt_q + 1; + end else begin + timeout_cnt_d = '0; + end + + unique case (state_q) + IDLE: begin + if (walk_req_valid_i && walk_req_ready_o) begin + used_l2_d = 1'b0; + state_d = SEND_L1; + end + end + + SEND_L1: begin + if (l1_addr_misaligned) begin + state_d = ERROR; + end else begin + axi_ar_valid_o = 1'b1; + axi_ar_addr_o = l1_addr; + if (axi_ar_ready_i) + state_d = WAIT_L1; + end + end + + WAIT_L1: begin + if (axi_r_valid_i) begin + if (axi_resp_access_fault) + state_d = ERROR; + else if (axi_pte_invalid) + state_d = ERROR; + else if (axi_pte_is_leaf) begin + if (axi_pte_superpage_misaligned || axi_pte_has_ad_fault) + state_d = ERROR; + else begin + used_l2_d = 1'b0; + state_d = DONE; + end + end else if (axi_pte_is_pointer) begin + state_d = SEND_L2; + end else begin + state_d = ERROR; + end + end else if (timeout_expired) begin + state_d = ERROR; + end + end + + SEND_L2: begin + if (l2_addr_misaligned) begin + state_d = ERROR; + end else begin + axi_ar_valid_o = 1'b1; + axi_ar_addr_o = l2_addr; + if (axi_ar_ready_i) + state_d = WAIT_L2; + end + end + + WAIT_L2: begin + if (axi_r_valid_i) begin + if (axi_resp_access_fault) + state_d = ERROR; + else if (axi_pte_invalid) + state_d = ERROR; + else if (axi_pte_is_leaf) begin + if (axi_pte_has_ad_fault) + state_d = ERROR; + else begin + used_l2_d = 1'b1; + state_d = DONE; + end + end else begin + state_d = ERROR; + end + end else if (timeout_expired) begin + state_d = ERROR; + end + end + + DONE: begin + walk_rsp_valid_o = 1'b1; + walk_rsp_pte_o = used_l2_q ? pte_l2_q : pte_l1_q; + walk_rsp_error_o = 1'b0; + state_d = IDLE; + end + + ERROR: begin + walk_rsp_valid_o = 1'b1; + walk_rsp_pte_o = '0; + walk_rsp_error_o = 1'b1; + state_d = IDLE; + end + + default: begin + state_d = IDLE; + end + endcase + + if (flush_i) begin + state_d = IDLE; + walk_rsp_valid_o = 1'b0; + walk_rsp_error_o = 1'b0; + axi_ar_valid_o = 1'b0; + used_l2_d = 1'b0; + timeout_cnt_d = '0; + end + end + + // sequential state + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + state_q <= IDLE; + base_addr_q <= '0; + vpn_q <= '0; + pte_l1_q <= '0; + pte_l2_q <= '0; + used_l2_q <= 1'b0; + timeout_cnt_q <= '0; + end else if (flush_i) begin + state_q <= IDLE; + base_addr_q <= '0; + vpn_q <= '0; + pte_l1_q <= '0; + pte_l2_q <= '0; + used_l2_q <= 1'b0; + timeout_cnt_q <= '0; + end else begin + state_q <= state_d; + timeout_cnt_q <= timeout_cnt_d; + used_l2_q <= used_l2_d; + + if (state_q == IDLE && walk_req_valid_i && walk_req_ready_o) begin + base_addr_q <= walk_req_addr_i; + vpn_q <= walk_req_vpn_i; + pte_l1_q <= '0; + pte_l2_q <= '0; + end + + if (state_q == WAIT_L1 && axi_r_valid_i && !axi_resp_access_fault) + pte_l1_q <= axi_r_data_i; + + if (state_q == WAIT_L2 && axi_r_valid_i && !axi_resp_access_fault) + pte_l2_q <= axi_r_data_i; + end + end + +endmodule \ No newline at end of file diff --git a/rtl/cpu/mmu/ptw_tb.sv b/rtl/cpu/mmu/ptw_tb.sv new file mode 100644 index 0000000..f7ea7e9 --- /dev/null +++ b/rtl/cpu/mmu/ptw_tb.sv @@ -0,0 +1,682 @@ +`timescale 1ns/1ps + +module testbench; + + localparam int TIMEOUT_CYCLES = 8; + localparam int ADDR_WIDTH = 32; + localparam int DATA_WIDTH = 32; + localparam int PPN_WIDTH = (ADDR_WIDTH - 12); + + logic clk_i; + logic rst_ni; + logic flush_i; + logic walk_req_valid_i; + logic walk_req_ready_o; + logic [ADDR_WIDTH-1:0] walk_req_addr_i; + logic [19:0] walk_req_vpn_i; + logic walk_rsp_valid_o; + logic [DATA_WIDTH-1:0] walk_rsp_pte_o; + logic walk_rsp_error_o; + logic axi_ar_valid_o; + logic [ADDR_WIDTH-1:0] axi_ar_addr_o; + logic axi_ar_ready_i; + logic axi_r_valid_i; + logic [DATA_WIDTH-1:0] axi_r_data_i; + logic [1:0] axi_r_resp_i; + + int tests_run; + int tests_passed; + + ptw #( + .TIMEOUT_CYCLES(TIMEOUT_CYCLES), + .ADDR_WIDTH (ADDR_WIDTH), + .DATA_WIDTH (DATA_WIDTH), + .PPN_WIDTH (PPN_WIDTH) + ) dut ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .walk_req_valid_i(walk_req_valid_i), + .walk_req_ready_o(walk_req_ready_o), + .walk_req_addr_i (walk_req_addr_i), + .walk_req_vpn_i (walk_req_vpn_i), + .walk_rsp_valid_o(walk_rsp_valid_o), + .walk_rsp_pte_o (walk_rsp_pte_o), + .walk_rsp_error_o(walk_rsp_error_o), + .axi_ar_valid_o (axi_ar_valid_o), + .axi_ar_addr_o (axi_ar_addr_o), + .axi_ar_ready_i (axi_ar_ready_i), + .axi_r_valid_i (axi_r_valid_i), + .axi_r_data_i (axi_r_data_i), + .axi_r_resp_i (axi_r_resp_i) + ); + + always #5 clk_i = ~clk_i; + + task automatic clear_inputs; + begin + flush_i = 1'b0; + walk_req_valid_i = 1'b0; + walk_req_addr_i = '0; + walk_req_vpn_i = '0; + axi_ar_ready_i = 1'b0; + axi_r_valid_i = 1'b0; + axi_r_data_i = '0; + axi_r_resp_i = 2'b00; + end + endtask + + task automatic tick; + begin + @(posedge clk_i); + #1; + end + endtask + + task automatic reset_dut; + begin + clear_inputs(); + rst_ni = 1'b0; + repeat (3) tick(); + rst_ni = 1'b1; + repeat (2) tick(); + end + endtask + + task automatic check_bit; + input string name; + input logic actual; + input logic expected; + begin + if (actual !== expected) begin + $display("FAIL: %s expected=%0b actual=%0b at t=%0t", name, expected, actual, $time); + $fatal(1); + end + end + endtask + + task automatic check_word; + input string name; + input logic [31:0] actual; + input logic [31:0] expected; + begin + if (actual !== expected) begin + $display("FAIL: %s expected=0x%08x actual=0x%08x at t=%0t", name, expected, actual, $time); + $fatal(1); + end + end + endtask + + task automatic start_walk; + input logic [31:0] base_addr; + input logic [19:0] vpn; + begin + check_bit("walk_req_ready before request", walk_req_ready_o, 1'b1); + walk_req_addr_i = base_addr; + walk_req_vpn_i = vpn; + walk_req_valid_i = 1'b1; + tick(); + walk_req_valid_i = 1'b0; + walk_req_addr_i = '0; + walk_req_vpn_i = '0; + end + endtask + + task automatic expect_ar_handshake; + input logic [31:0] expected_addr; + input int stall_cycles; + begin + repeat (stall_cycles) begin + check_bit("axi_ar_valid during stall", axi_ar_valid_o, 1'b1); + check_word("axi_ar_addr during stall", axi_ar_addr_o, expected_addr); + axi_ar_ready_i = 1'b0; + tick(); + end + + check_bit("axi_ar_valid before handshake", axi_ar_valid_o, 1'b1); + check_word("axi_ar_addr before handshake", axi_ar_addr_o, expected_addr); + axi_ar_ready_i = 1'b1; + tick(); + axi_ar_ready_i = 1'b0; + end + endtask + + task automatic drive_read; + input logic [31:0] data; + input logic [1:0] resp; + input int wait_cycles; + begin + repeat (wait_cycles) tick(); + axi_r_data_i = data; + axi_r_resp_i = resp; + axi_r_valid_i = 1'b1; + tick(); + axi_r_valid_i = 1'b0; + axi_r_data_i = '0; + axi_r_resp_i = 2'b00; + end + endtask + + task automatic expect_response; + input logic expected_error; + input logic [31:0] expected_pte; + begin + check_bit("walk_rsp_valid", walk_rsp_valid_o, 1'b1); + check_bit("walk_rsp_error", walk_rsp_error_o, expected_error); + check_word("walk_rsp_pte", walk_rsp_pte_o, expected_pte); + tick(); + check_bit("walk_req_ready after response", walk_req_ready_o, 1'b1); + check_bit("walk_rsp_valid cleared", walk_rsp_valid_o, 1'b0); + end + endtask + + task automatic expect_no_response_for; + input int cycles; + begin + repeat (cycles) begin + check_bit("walk_rsp_valid should stay low", walk_rsp_valid_o, 1'b0); + tick(); + end + end + endtask + + function automatic [31:0] mk_pointer_pte; + input logic [21:0] next_ppn; + begin + // valid pointer: V=1, R=0, W=0, X=0 + mk_pointer_pte = {next_ppn, 10'b0000000001}; + end + endfunction + + function automatic [31:0] mk_leaf_pte; + input logic [21:0] ppn; + input logic r; + input logic w; + input logic x; + input logic a; + input logic d; + begin + // [31:10]=PPN, low bits include D A G U X W R V at [7:0] + mk_leaf_pte = {ppn, 2'b00, d, a, 1'b0, 1'b0, x, w, r, 1'b1}; + end + endfunction + + task automatic run_test_two_level_success; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [9:0] vpn0; + logic [31:0] l1_addr; + logic [31:0] l2_addr; + logic [31:0] l1_pte; + logic [31:0] l2_pte; + begin + $display("TEST: two_level_success"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0000_4000; + vpn = 20'h155AA; + vpn1 = vpn[19:10]; + vpn0 = vpn[9:0]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_pointer_pte(22'h00080); + l2_addr = {l1_pte[29:10], 12'b0} + (32'(vpn0) << 2); + l2_pte = mk_leaf_pte(22'h2ABCD, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1); + + start_walk(base_addr, vpn); + check_bit("busy after request", walk_req_ready_o, 1'b0); + expect_ar_handshake(l1_addr, 2); + drive_read(l1_pte, 2'b00, 2); + expect_ar_handshake(l2_addr, 1); + drive_read(l2_pte, 2'b00, 1); + expect_response(1'b0, l2_pte); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_l1_superpage_success; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + logic [31:0] l1_pte; + begin + $display("TEST: l1_superpage_success"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0000_8000; + vpn = 20'h0C321; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_leaf_pte(22'h154000, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 0); + expect_response(1'b0, l1_pte); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_invalid_v_on_l1; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + logic [31:0] bad_pte; + begin + $display("TEST: invalid_v_on_l1"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0000_A000; + vpn = 20'h11111; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + bad_pte = 32'h0000_0000; + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(bad_pte, 2'b00, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_invalid_r0w1_on_l1; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + logic [31:0] bad_pte; + begin + $display("TEST: invalid_r0w1_on_l1"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0000_B000; + vpn = 20'h22222; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + bad_pte = 32'h0000_0005; // V=1, R=0, W=1 + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(bad_pte, 2'b00, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_axi_fault_on_l1; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + begin + $display("TEST: axi_fault_on_l1"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0000_C000; + vpn = 20'h33333; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 1); + drive_read(32'hDEAD_BEEF, 2'b10, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_ad_fault_on_l1_leaf; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + logic [31:0] l1_pte; + begin + $display("TEST: ad_fault_on_l1_leaf"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0000_D000; + vpn = 20'h44444; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_leaf_pte(22'h088000, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_superpage_misaligned_error; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + logic [31:0] l1_pte; + begin + $display("TEST: superpage_misaligned_error"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0000_E000; + vpn = 20'h55555; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_leaf_pte(22'h000123, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_l2_ad_fault; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [9:0] vpn0; + logic [31:0] l1_addr; + logic [31:0] l2_addr; + logic [31:0] l1_pte; + logic [31:0] l2_pte; + begin + $display("TEST: l2_ad_fault"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0001_0000; + vpn = 20'h66666; + vpn1 = vpn[19:10]; + vpn0 = vpn[9:0]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_pointer_pte(22'h00090); + l2_addr = {l1_pte[29:10], 12'b0} + (32'(vpn0) << 2); + l2_pte = mk_leaf_pte(22'h012345, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 0); + expect_ar_handshake(l2_addr, 0); + drive_read(l2_pte, 2'b00, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_invalid_l2_nonleaf; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [9:0] vpn0; + logic [31:0] l1_addr; + logic [31:0] l2_addr; + logic [31:0] l1_pte; + logic [31:0] l2_pte; + begin + $display("TEST: invalid_l2_nonleaf"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0001_1000; + vpn = 20'h77777; + vpn1 = vpn[19:10]; + vpn0 = vpn[9:0]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_pointer_pte(22'h000A0); + l2_addr = {l1_pte[29:10], 12'b0} + (32'(vpn0) << 2); + l2_pte = mk_pointer_pte(22'h000B0); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 1); + expect_ar_handshake(l2_addr, 0); + drive_read(l2_pte, 2'b00, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_axi_fault_on_l2; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [9:0] vpn0; + logic [31:0] l1_addr; + logic [31:0] l2_addr; + logic [31:0] l1_pte; + begin + $display("TEST: axi_fault_on_l2"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0001_2000; + vpn = 20'h88888; + vpn1 = vpn[19:10]; + vpn0 = vpn[9:0]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_pointer_pte(22'h000C0); + l2_addr = {l1_pte[29:10], 12'b0} + (32'(vpn0) << 2); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 0); + expect_ar_handshake(l2_addr, 2); + drive_read(32'hCAFE_BABE, 2'b11, 0); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_timeout_on_l1; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + int i; + begin + $display("TEST: timeout_on_l1"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0001_3000; + vpn = 20'h99999; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + for (i = 0; i < TIMEOUT_CYCLES + 1; i = i + 1) + expect_no_response_for(1); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_timeout_on_l2; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [9:0] vpn0; + logic [31:0] l1_addr; + logic [31:0] l2_addr; + logic [31:0] l1_pte; + int i; + begin + $display("TEST: timeout_on_l2"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0001_4000; + vpn = 20'hAAAAA; + vpn1 = vpn[19:10]; + vpn0 = vpn[9:0]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_pointer_pte(22'h000D0); + l2_addr = {l1_pte[29:10], 12'b0} + (32'(vpn0) << 2); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 0); + expect_ar_handshake(l2_addr, 0); + for (i = 0; i < TIMEOUT_CYCLES + 1; i = i + 1) + expect_no_response_for(1); + expect_response(1'b1, 32'h0000_0000); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_flush_while_waiting_l1; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [31:0] l1_addr; + begin + $display("TEST: flush_while_waiting_l1"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0001_5000; + vpn = 20'hBBBBB; + vpn1 = vpn[19:10]; + l1_addr = base_addr + (32'(vpn1) << 2); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + check_bit("no response before flush", walk_rsp_valid_o, 1'b0); + flush_i = 1'b1; + tick(); + flush_i = 1'b0; + check_bit("ready after flush l1", walk_req_ready_o, 1'b1); + check_bit("no response generated by flush l1", walk_rsp_valid_o, 1'b0); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_flush_while_waiting_l2; + logic [31:0] base_addr; + logic [19:0] vpn; + logic [9:0] vpn1; + logic [9:0] vpn0; + logic [31:0] l1_addr; + logic [31:0] l2_addr; + logic [31:0] l1_pte; + begin + $display("TEST: flush_while_waiting_l2"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr = 32'h0001_6000; + vpn = 20'hCCCCC; + vpn1 = vpn[19:10]; + vpn0 = vpn[9:0]; + l1_addr = base_addr + (32'(vpn1) << 2); + l1_pte = mk_pointer_pte(22'h000E0); + l2_addr = {l1_pte[29:10], 12'b0} + (32'(vpn0) << 2); + + start_walk(base_addr, vpn); + expect_ar_handshake(l1_addr, 0); + drive_read(l1_pte, 2'b00, 0); + expect_ar_handshake(l2_addr, 0); + flush_i = 1'b1; + tick(); + flush_i = 1'b0; + check_bit("ready after flush l2", walk_req_ready_o, 1'b1); + check_bit("no response generated by flush l2", walk_rsp_valid_o, 1'b0); + tests_passed = tests_passed + 1; + end + endtask + + task automatic run_test_single_outstanding_request; + logic [31:0] base_addr_1; + logic [31:0] base_addr_2; + logic [19:0] vpn_1; + logic [19:0] vpn_2; + logic [9:0] vpn1_1; + logic [9:0] vpn0_1; + logic [31:0] l1_addr_1; + logic [31:0] l2_addr_1; + logic [31:0] l1_pte_1; + logic [31:0] l2_pte_1; + begin + $display("TEST: single_outstanding_request"); + tests_run = tests_run + 1; + reset_dut(); + + base_addr_1 = 32'h0001_7000; + base_addr_2 = 32'h0002_7000; + vpn_1 = 20'h12345; + vpn_2 = 20'h54321; + vpn1_1 = vpn_1[19:10]; + vpn0_1 = vpn_1[9:0]; + l1_addr_1 = base_addr_1 + (32'(vpn1_1) << 2); + l1_pte_1 = mk_pointer_pte(22'h000F0); + l2_addr_1 = {l1_pte_1[29:10], 12'b0} + (32'(vpn0_1) << 2); + l2_pte_1 = mk_leaf_pte(22'h01ABCDE, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1); + + start_walk(base_addr_1, vpn_1); + expect_ar_handshake(l1_addr_1, 0); + + // Try to present a second request while busy. DUT should not accept it. + walk_req_addr_i = base_addr_2; + walk_req_vpn_i = vpn_2; + walk_req_valid_i = 1'b1; + check_bit("not ready while busy", walk_req_ready_o, 1'b0); + tick(); + walk_req_valid_i = 1'b0; + walk_req_addr_i = '0; + walk_req_vpn_i = '0; + + drive_read(l1_pte_1, 2'b00, 0); + expect_ar_handshake(l2_addr_1, 0); + drive_read(l2_pte_1, 2'b00, 0); + expect_response(1'b0, l2_pte_1); + + // Confirm DUT can accept a new request only after finishing. + check_bit("ready for next request", walk_req_ready_o, 1'b1); + tests_passed = tests_passed + 1; + end + endtask + + initial begin + clk_i = 1'b0; + rst_ni = 1'b0; + tests_run = 0; + tests_passed = 0; + clear_inputs(); + + run_test_two_level_success(); + run_test_l1_superpage_success(); + run_test_invalid_v_on_l1(); + run_test_invalid_r0w1_on_l1(); + run_test_axi_fault_on_l1(); + run_test_ad_fault_on_l1_leaf(); + run_test_superpage_misaligned_error(); + run_test_l2_ad_fault(); + run_test_invalid_l2_nonleaf(); + run_test_axi_fault_on_l2(); + run_test_timeout_on_l1(); + run_test_timeout_on_l2(); + run_test_flush_while_waiting_l1(); + run_test_flush_while_waiting_l2(); + run_test_single_outstanding_request(); + + $display("----------------------------------------"); + $display("PTW extensive testbench complete: %0d/%0d tests passed", tests_passed, tests_run); + $display("----------------------------------------"); + $finish; + end + +endmodule \ No newline at end of file diff --git a/rtl/cpu/mmu/sv32_mmu.sv b/rtl/cpu/mmu/sv32_mmu.sv index caf696e..69acee5 100644 --- a/rtl/cpu/mmu/sv32_mmu.sv +++ b/rtl/cpu/mmu/sv32_mmu.sv @@ -1,140 +1,346 @@ -// ----------------------------------------------------------------------------- -// sv32_mmu.sv -// Sv32 Memory Management Unit (MMU) -// One-pager implementation skeleton -// ----------------------------------------------------------------------------- -// -// Module performs Sv32 virtual to physical translation using an external -// PTW (Page Table Walker) and a TLB. This file is an early state template -// based on the one pager specification. -// -// ----------------------------------------------------------------------------- - module sv32_mmu #( - parameter int TLB_ENTRIES = 16, - parameter int PAGE_SIZE = 4096, + parameter int TLB_ENTRIES = 16, + parameter int PAGE_SIZE = 4096, parameter int PTW_TIMEOUT_CYCLES = 256, - parameter int ADDR_WIDTH = 32, - parameter int PADDR_WIDTH = 34 + parameter int ADDR_WIDTH = 32, + parameter int PADDR_WIDTH = 34 )( - input logic clk_i, - input logic rst_ni, + input logic clk_i, + input logic rst_ni, + + input logic [ADDR_WIDTH-1:0] va_i, + input logic valid_i, + output logic ready_o, - // Translation request from CPU - input logic [ADDR_WIDTH-1:0] va_i, - input logic valid_i, - output logic ready_o, + input logic [1:0] access_i, - // Translated physical address to CPU output logic [PADDR_WIDTH-1:0] pa_o, - // PTW interface (external module) - output logic ptw_req_valid_o, - output logic [ADDR_WIDTH-1:0] ptw_req_addr_o, - input logic ptw_rsp_valid_i, - input logic [63:0] ptw_rsp_data_i, + output logic fault_o, + output logic [1:0] fault_cause_o, + output logic fault_timeout_o, + output logic [ADDR_WIDTH-1:0] fault_va_o, + + // Clean PTW interface + output logic ptw_req_valid_o, + input logic ptw_req_ready_i, + output logic [ADDR_WIDTH-1:0] ptw_req_root_addr_o, + output logic [19:0] ptw_req_vpn_o, + + input logic ptw_rsp_valid_i, + input logic [31:0] ptw_rsp_pte_i, + input logic ptw_rsp_error_i, - // CSR / privilege inputs - input logic [31:0] satp_i, - input logic [1:0] priv_i + input logic [31:0] satp_i, + input logic [1:0] priv_i, + + input logic sum_i, + input logic mxr_i, + input logic uxn_i, + + input logic sfence_vma_i ); - // Internal Types & Signals - // ------------------------- + localparam int OFFSET_BITS = 12; + localparam int VPN_BITS = ADDR_WIDTH - OFFSET_BITS; // 20 for Sv32 + localparam int PPN_BITS = PADDR_WIDTH - OFFSET_BITS; // 22 for 34-bit PA + + localparam logic [1:0] PRIV_U = 2'b00; + localparam logic [1:0] PRIV_S = 2'b01; + localparam logic [1:0] PRIV_M = 2'b11; + + localparam logic [1:0] ACC_LOAD = 2'b00; + localparam logic [1:0] ACC_STORE = 2'b01; + localparam logic [1:0] ACC_FETCH = 2'b10; + + wire satp_mode_sv32 = satp_i[31]; + + localparam int TO_W = $clog2(PTW_TIMEOUT_CYCLES + 1); + localparam logic [TO_W-1:0] TIMEOUT_MAX = TO_W'(PTW_TIMEOUT_CYCLES); typedef enum logic [1:0] { IDLE, TLB_LOOKUP, - PTW_WAIT, - OUTPUT_RESULT + PTW_REQ, + PTW_WAIT } mmu_state_e; - mmu_state_e state_d, state_q; + mmu_state_e state_q, state_d; + + logic [ADDR_WIDTH-1:0] va_q; + logic [1:0] access_q; + + logic [PADDR_WIDTH-1:0] pa_q; + logic fault_q; + logic [1:0] cause_q; + logic timeout_q; - logic tlb_hit; - logic [PADDR_WIDTH-1:0] tlb_pa; + logic [TO_W-1:0] to_cnt_q, to_cnt_d; - logic miss_detected; + // TLB signals + logic tlb_lookup_ready_o_w; + logic tlb_lookup_hit_o_w; + logic [PADDR_WIDTH-1:0] tlb_lookup_pa_o_w; + logic tlb_miss_o_w; - // TLB Instance (placeholder) - // -------------------------- + logic tlb_insert_valid_i_w; + logic [VPN_BITS-1:0] tlb_insert_vpn_i_w; + logic [PPN_BITS-1:0] tlb_insert_ppn_i_w; + logic [7:0] tlb_insert_perm_i_w; - // NOTE: - // Replace this with the actual TLB module - // from rtl/cpu/mmu/tlb.sv and hook up ports accordingly. + // PTW response decode + logic [31:0] pte_rsp_w; + logic ptw_leaf_ok_w; + logic ptw_needs_ad_w; - // tlb #(.ENTRIES(TLB_ENTRIES)) u_tlb ( - // .clk_i(clk_i), - // .rst_ni(rst_ni), - // .lookup_va_i(va_i), - // .lookup_valid_i(valid_i), - // .lookup_ready_o(), - // .lookup_hit_o(tlb_hit), - // .lookup_pa_o(tlb_pa), - // .miss_o(miss_detected), - // .insert_valid_i(), - // .insert_vpn_i(), - // .insert_ppn_i(), - // .insert_perm_i(), - // .flush_i(1'b0) - // ); + // Latched request fields + wire [VPN_BITS-1:0] vpn_q = va_q[ADDR_WIDTH-1:OFFSET_BITS]; + wire [OFFSET_BITS-1:0] off_q = va_q[OFFSET_BITS-1:0]; + + // SATP root page table base address + // Sv32: root PPN in SATP[21:0], page aligned + wire [ADDR_WIDTH-1:0] satp_root_addr_w = {satp_i[19:0], 12'b0}; + + tlb #( + .ENTRIES (TLB_ENTRIES), + .PAGE_SIZE (PAGE_SIZE), + .ADDR_WIDTH (ADDR_WIDTH), + .PADDR_WIDTH (PADDR_WIDTH) + ) tlb_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .lookup_va_i (va_q), + .lookup_valid_i (state_q == TLB_LOOKUP), + .lookup_ready_o (tlb_lookup_ready_o_w), + .lookup_hit_o (tlb_lookup_hit_o_w), + .lookup_pa_o (tlb_lookup_pa_o_w), + .miss_o (tlb_miss_o_w), + .insert_valid_i (tlb_insert_valid_i_w), + .insert_vpn_i (tlb_insert_vpn_i_w), + .insert_ppn_i (tlb_insert_ppn_i_w), + .insert_perm_i (tlb_insert_perm_i_w), + .flush_i (sfence_vma_i) + ); + + function automatic logic [PADDR_WIDTH-1:0] make_pa( + input logic [PPN_BITS-1:0] ppn, + input logic [OFFSET_BITS-1:0] off_i + ); + logic [PADDR_WIDTH-1:0] tmp; + begin + tmp = '0; + tmp[PADDR_WIDTH-1:OFFSET_BITS] = ppn; + tmp[OFFSET_BITS-1:0] = off_i; + make_pa = tmp; + end + endfunction - // PTW Interface Logic (template) - // ------------------------------ + function automatic logic [1:0] pf_cause(input logic [1:0] acc); + begin + case (acc) + ACC_FETCH: pf_cause = 2'b00; + ACC_LOAD: pf_cause = 2'b01; + default: pf_cause = 2'b10; + endcase + end + endfunction + + function automatic logic sv32_leaf_ok( + input logic [7:0] flags, + input logic [1:0] priv, + input logic [1:0] acc, + input logic sum, + input logic mxr, + input logic uxn, + output logic needs_ad_fault + ); + logic V, R, W, X, U, G, A, D; + logic ok; + begin + {D, A, G, U, X, W, R, V} = flags[7:0]; + ok = 1'b1; + needs_ad_fault = 1'b0; + + if (!V) ok = 1'b0; + if (W && !R) ok = 1'b0; + + if (priv == PRIV_U) begin + if (!U) ok = 1'b0; + end + else if (priv == PRIV_S) begin + if (U && (acc != ACC_FETCH) && !sum) ok = 1'b0; + if (U && (acc == ACC_FETCH) && uxn) ok = 1'b0; + end + + if (acc == ACC_FETCH) begin + if (!X) ok = 1'b0; + end + else if (acc == ACC_STORE) begin + if (!W) ok = 1'b0; + end + else begin + if (!(R || (mxr && X))) ok = 1'b0; + end + + if (!A) begin + ok = 1'b0; + needs_ad_fault = 1'b1; + end + if ((acc == ACC_STORE) && !D) begin + ok = 1'b0; + needs_ad_fault = 1'b1; + end + + sv32_leaf_ok = ok; + end + endfunction - // For assignment submission: - // MMU will assert ptw_req_valid_o on a miss and wait for ptw_rsp_valid_i. - // - // Real logic will be added later by your PTW / MMU teammates. + // PTW request outputs - assign ptw_req_valid_o = (state_q == PTW_WAIT); - assign ptw_req_addr_o = va_i; // placeholder: real implementation extracts VPN + assign ptw_req_valid_o = (state_q == PTW_REQ); + assign ptw_req_root_addr_o = satp_root_addr_w; + assign ptw_req_vpn_o = vpn_q; - // State Machine - // ------------- + // PTW response decode + + assign pte_rsp_w = ptw_rsp_pte_i; + assign ptw_leaf_ok_w = sv32_leaf_ok(pte_rsp_w[7:0], priv_i, access_q, sum_i, mxr_i, uxn_i, ptw_needs_ad_w); + assign tlb_insert_valid_i_w = (state_q == PTW_WAIT) && ptw_rsp_valid_i && !ptw_rsp_error_i && ptw_leaf_ok_w; + assign tlb_insert_vpn_i_w = vpn_q; + assign tlb_insert_ppn_i_w = pte_rsp_w[31:10]; + assign tlb_insert_perm_i_w = pte_rsp_w[7:0]; + + // Sequential state / result registers always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin - state_q <= IDLE; - end else begin - state_q <= state_d; + state_q <= IDLE; + va_q <= '0; + access_q <= '0; + pa_q <= '0; + fault_q <= 1'b0; + cause_q <= 2'b00; + timeout_q <= 1'b0; + to_cnt_q <= '0; + end + else begin + state_q <= state_d; + to_cnt_q <= to_cnt_d; + + // Latch a new request + if (state_q == IDLE && valid_i) begin + va_q <= va_i; + access_q <= access_i; + fault_q <= 1'b0; + timeout_q <= 1'b0; + cause_q <= pf_cause(access_i); + end + + // TLB hit or bypass + if (state_q == TLB_LOOKUP) begin + if (!satp_mode_sv32 || (priv_i == PRIV_M)) begin + pa_q <= {{(PADDR_WIDTH-ADDR_WIDTH){1'b0}}, va_q}; + fault_q <= 1'b0; + end + else if (tlb_lookup_hit_o_w) begin + // Assumption: a TLB hit implies an already validated translation. + pa_q <= tlb_lookup_pa_o_w; + fault_q <= 1'b0; + end + end + + // PTW response handling + if (state_q == PTW_WAIT && ptw_rsp_valid_i) begin + logic [31:0] pte; + logic ok, needs_ad; + + pte = ptw_rsp_pte_i; + ok = sv32_leaf_ok(pte[7:0], priv_i, access_q, sum_i, mxr_i, uxn_i, needs_ad); + + if (ptw_rsp_error_i) begin + pa_q <= '0; + fault_q <= 1'b1; + timeout_q <= 1'b0; + end + else if (ok) begin + pa_q <= make_pa(pte[31:10], off_q); + fault_q <= 1'b0; + timeout_q <= 1'b0; + end + else begin + pa_q <= '0; + fault_q <= 1'b1; + timeout_q <= 1'b0; + end + end + + // PTW timeout + if (state_q == PTW_WAIT && (to_cnt_q == TIMEOUT_MAX)) begin + pa_q <= '0; + fault_q <= 1'b1; + timeout_q <= 1'b1; + end end end + // Timeout counter + always_comb begin - state_d = state_q; - ready_o = 1'b0; - pa_o = '0; + to_cnt_d = to_cnt_q; + + if (state_q != PTW_WAIT) begin + to_cnt_d = '0; + end + else begin + if (to_cnt_q != TIMEOUT_MAX) + to_cnt_d = to_cnt_q + 1'b1; + end + end - case (state_q) + // Next-state / outputs + always_comb begin + state_d = state_q; + ready_o = 1'b0; + + pa_o = pa_q; + fault_o = fault_q; + fault_cause_o = cause_q; + fault_timeout_o = timeout_q; + fault_va_o = va_q; + + unique case (state_q) IDLE: begin if (valid_i) state_d = TLB_LOOKUP; end TLB_LOOKUP: begin - if (tlb_hit) begin - pa_o = tlb_pa; + if (!satp_mode_sv32 || (priv_i == PRIV_M) || tlb_lookup_hit_o_w) begin ready_o = 1'b1; - state_d = OUTPUT_RESULT; - end else begin - state_d = PTW_WAIT; // TLB miss -> request PTW + state_d = IDLE; + end + else begin + state_d = PTW_REQ; end end + PTW_REQ: begin + if (ptw_req_ready_i) + state_d = PTW_WAIT; + end + PTW_WAIT: begin - if (ptw_rsp_valid_i) begin - // Placeholder: real code inserts into TLB + checks permissions + if (ptw_rsp_valid_i || (to_cnt_q == TIMEOUT_MAX)) begin ready_o = 1'b1; - state_d = OUTPUT_RESULT; + state_d = IDLE; end end - OUTPUT_RESULT: begin - // End of translation, ready for next request + default: begin state_d = IDLE; end endcase end -endmodule + +endmodule \ No newline at end of file diff --git a/rtl/cpu/mmu/sv32_mmu_tb.sv b/rtl/cpu/mmu/sv32_mmu_tb.sv new file mode 100644 index 0000000..518d30b --- /dev/null +++ b/rtl/cpu/mmu/sv32_mmu_tb.sv @@ -0,0 +1,248 @@ +`timescale 1ns/1ps + +module tb; + + logic clk = 0; + logic rst_n = 0; + + always #5 clk = ~clk; + + // MMU-side stimulus + + logic [31:0] va; + logic valid; + logic ready; + logic [1:0] access; + logic [33:0] pa; + + logic fault; + logic [1:0] cause; + logic timeout; + logic [31:0] fault_va; + + logic [31:0] satp; + logic [1:0] priv; + logic sum, mxr, uxn; + logic sfence; + + // MMU <-> PTW + + logic ptw_req_valid; + logic ptw_req_ready; + logic [31:0] ptw_req_root_addr; + logic [19:0] ptw_req_vpn; + + logic ptw_rsp_valid; + logic [31:0] ptw_rsp_pte; + logic ptw_rsp_error; + + // PTW <-> fake AXI memory + + logic axi_ar_valid; + logic [31:0] axi_ar_addr; + logic axi_ar_ready; + logic axi_r_valid; + logic [31:0] axi_r_data; + logic [1:0] axi_r_resp; + + // real MMU + + sv32_mmu dut ( + .clk_i(clk), + .rst_ni(rst_n), + + .va_i(va), + .valid_i(valid), + .ready_o(ready), + .access_i(access), + + .pa_o(pa), + + .fault_o(fault), + .fault_cause_o(cause), + .fault_timeout_o(timeout), + .fault_va_o(fault_va), + + .ptw_req_valid_o(ptw_req_valid), + .ptw_req_ready_i(ptw_req_ready), + .ptw_req_root_addr_o(ptw_req_root_addr), + .ptw_req_vpn_o(ptw_req_vpn), + + .ptw_rsp_valid_i(ptw_rsp_valid), + .ptw_rsp_pte_i(ptw_rsp_pte), + .ptw_rsp_error_i(ptw_rsp_error), + + .satp_i(satp), + .priv_i(priv), + .sum_i(sum), + .mxr_i(mxr), + .uxn_i(uxn), + + .sfence_vma_i(sfence) + ); + + + // Real PTW + + ptw ptw_i ( + .clk_i(clk), + .rst_ni(rst_n), + .flush_i(sfence), + + .walk_req_valid_i(ptw_req_valid), + .walk_req_ready_o(ptw_req_ready), + .walk_req_addr_i(ptw_req_root_addr), + .walk_req_vpn_i(ptw_req_vpn), + + .walk_rsp_valid_o(ptw_rsp_valid), + .walk_rsp_pte_o(ptw_rsp_pte), + .walk_rsp_error_o(ptw_rsp_error), + + .axi_ar_valid_o(axi_ar_valid), + .axi_ar_addr_o(axi_ar_addr), + .axi_ar_ready_i(axi_ar_ready), + .axi_r_valid_i(axi_r_valid), + .axi_r_data_i(axi_r_data), + .axi_r_resp_i(axi_r_resp) + ); + + // Fake memory for PTW AXI reads + + localparam logic [31:0] ROOT_BASE = 32'h0000_1000; + localparam logic [31:0] L2_BASE = 32'h0000_2000; + + logic mem_pending; + logic [31:0] mem_addr_q; + logic [1:0] mem_delay_q; + + assign axi_ar_ready = 1'b1; + + function automatic [31:0] make_l1_pointer(); + logic [21:0] ppn; + logic [7:0] flags; + begin + // pointer to level-2 page table at 0x2000 + ppn = L2_BASE[31:10]; + flags = 8'b00000001; // V=1, non-leaf pointer + make_l1_pointer = {ppn, 2'b00, flags}; + end + endfunction + + function automatic [31:0] make_l2_leaf(input [31:0] req_addr); + logic [21:0] ppn; + logic [7:0] flags; + begin + // identity-like mapping based on PTW read address page + ppn = req_addr[31:10]; + + // V,R,W,X,U,G,A,D = [0]..[7] packed into low 8 bits + // choose a valid readable/executable leaf with A/D set + flags = 8'b11001111; + + make_l2_leaf = {ppn, 2'b00, flags}; + end + endfunction + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + mem_pending <= 1'b0; + mem_addr_q <= '0; + mem_delay_q <= '0; + axi_r_valid <= 1'b0; + axi_r_data <= '0; + axi_r_resp <= 2'b00; + end else begin + axi_r_valid <= 1'b0; + + if (!mem_pending && axi_ar_valid && axi_ar_ready) begin + mem_pending <= 1'b1; + mem_addr_q <= axi_ar_addr; + mem_delay_q <= 2; + end else if (mem_pending) begin + if (mem_delay_q != 0) begin + mem_delay_q <= mem_delay_q - 1'b1; + end else begin + axi_r_valid <= 1'b1; + axi_r_resp <= 2'b00; + + if ((mem_addr_q >= ROOT_BASE) && (mem_addr_q < ROOT_BASE + 32'h1000)) + axi_r_data <= make_l1_pointer(); + else + axi_r_data <= make_l2_leaf(mem_addr_q); + + mem_pending <= 1'b0; + end + end + end + end + + // Request helper + + task automatic send_req(input [31:0] addr, input [1:0] acc); + int cycles; + begin + va = addr; + access = acc; + valid = 1'b1; + + cycles = 0; + while (!ready && cycles < 300) begin + @(posedge clk); + cycles++; + end + + if (!ready) begin + $display("FAIL: timeout waiting for MMU ready on VA=%h", addr); + $finish; + end + + @(posedge clk); + $display("VA=%h PA=%h fault=%0d cause=%0d timeout=%0d", + addr, pa, fault, cause, timeout); + + valid = 1'b0; + @(posedge clk); + end + endtask + + // Test sequence + + initial begin + va = 32'h0; + valid = 1'b0; + access = 2'b00; + + satp = 32'h8000_0001; // MODE=1, root PPN=1 => root base 0x1000 + priv = 2'b01; // S-mode + sum = 1'b1; + mxr = 1'b0; + uxn = 1'b0; + sfence = 1'b0; + + repeat (4) @(posedge clk); + rst_n = 1'b1; + repeat (2) @(posedge clk); + + // first access: TLB miss -> PTW walk + send_req(32'h0000_0123, 2'b10); + + // second access same page: should hit TLB + send_req(32'h0000_0128, 2'b10); + + // different page + send_req(32'h0000_2456, 2'b00); + + // flush and force another PTW walk + sfence = 1'b1; + @(posedge clk); + sfence = 1'b0; + @(posedge clk); + + send_req(32'h0000_0123, 2'b10); + + $display("TB DONE"); + #20; + $finish; + end + +endmodule \ No newline at end of file diff --git a/rtl/cpu/mmu/tlb.sv b/rtl/cpu/mmu/tlb.sv index e69de29..c3a5849 100644 --- a/rtl/cpu/mmu/tlb.sv +++ b/rtl/cpu/mmu/tlb.sv @@ -0,0 +1,252 @@ +`timescale 1ns / 1ps + + + +// tlb.sv Translation Lookaside Buffer +// RTL path: rtl/cpu/mmu/tlb.sv +// +// Purpose: +// - Cache virtual to physical translations for Sv32 MMU to avoid PTW walks. +// - Compare VPN tags against all entries. +// - On hit: returns physical address (PPN + page offset) in 1 cycle. +// - On miss: asserts miss_o so sv32_mmu can trigger external PTW. +// - Supports insert path (from sv32_mmu after PTW completes). +// - Supports global flush (SFENCE.VMA / SATP write). +// +// Notes: +// - Fully associative CAM-style lookup. +// - Uses true LRU with age counters. +// - Single clock domain. +// - Vivado-friendly SystemVerilog. + +module tlb #( + + // Parameters (from one-pager) + parameter int ENTRIES = 16, // # of cached entries + parameter int PAGE_SIZE = 4096, // base page size of 4 KB + parameter int ADDR_WIDTH = 32, // VA width + parameter int PADDR_WIDTH = 34 // PA width + // ASSOCIATIVE = FULL (implied by CAM approach in this specific design) + // REPL_POLICY = LRU in this implementation +) ( + // Clock / Reset + input logic clk_i, + input logic rst_ni, + + // Lookup interface, our MMU pipeline + input logic [ADDR_WIDTH-1:0] lookup_va_i, + input logic lookup_valid_i, + output logic lookup_ready_o, + + output logic lookup_hit_o, + output logic [PADDR_WIDTH-1:0] lookup_pa_o, + + // Miss indication to MMU + output logic miss_o, + + // Insert interface from sv32_mmu after PTW completion + input logic insert_valid_i, + input logic [19:0] insert_vpn_i, // Sv32 VPN[19:0] + input logic [21:0] insert_ppn_i, // PPN width (22) + input logic [7:0] insert_perm_i, // R/W/X/U/S/A/D etc. + + // Flush, SFENCE.VMA / SATP write + input logic flush_i +); + + // Constants / Derived fields + localparam int PAGE_OFFSET_BITS = 12; // 4 KB page -> 12-bit offset + localparam int VPN_BITS = 20; // Sv32 VPN width + localparam int PPN_BITS = 22; // Sv32 PPN width + localparam int INDEX_BITS = (ENTRIES <= 1) ? 1 : $clog2(ENTRIES); + localparam int AGE_BITS = (ENTRIES <= 2) ? 1 : $clog2(ENTRIES); + + // Storage arrays, one entry per slot + logic [ENTRIES-1:0] valid_q; // valid bit + logic [VPN_BITS-1:0] tag_vpn_q [0:ENTRIES-1]; // tag: VPN[19:0] + logic [PPN_BITS-1:0] data_ppn_q [0:ENTRIES-1]; // data: PPN + logic [7:0] perm_q [0:ENTRIES-1]; // permissions + + // True LRU bookkeeping + logic [AGE_BITS-1:0] age_q [0:ENTRIES-1]; + + // Lookup combinational signals + logic [VPN_BITS-1:0] lookup_vpn; + logic [PAGE_OFFSET_BITS-1:0] lookup_offset; + + logic [ENTRIES-1:0] hit_vec; + logic any_hit; + logic [INDEX_BITS-1:0] hit_index; + logic found_hit; + + // Victim selection signals + logic found_invalid; + logic [INDEX_BITS-1:0] invalid_index; + logic [INDEX_BITS-1:0] lru_index; + logic [INDEX_BITS-1:0] victim_index; + logic [AGE_BITS-1:0] max_age; + + integer idx; + + // Split incoming VA into VPN + offset + always_comb begin + lookup_offset = lookup_va_i[PAGE_OFFSET_BITS-1:0]; + lookup_vpn = lookup_va_i[ADDR_WIDTH-1:PAGE_OFFSET_BITS]; + end + + // Hit detection, CAM compare + // hit_vec[i] = valid_q[i] && (tag_vpn_q[i] == lookup_vpn) + always_comb begin + hit_vec = '0; + for (int i = 0; i < ENTRIES; i++) begin + hit_vec[i] = valid_q[i] && (tag_vpn_q[i] == lookup_vpn); + end + end + + // Reduce OR to get any_hit; find first hit_index + always_comb begin + any_hit = |hit_vec; + hit_index = '0; + found_hit = 1'b0; + + for (int i = 0; i < ENTRIES; i++) begin + if (!found_hit && hit_vec[i]) begin + hit_index = INDEX_BITS'(i); + found_hit = 1'b1; + end + end + end + + // Lookup protocol behavior + // Always ready unless flush is active + always_comb begin + lookup_ready_o = ~flush_i; + end + + // Output formation: + // - On hit: PA = {PPN, offset} + // - On miss: lookup_hit_o=0, miss_o=1, when lookup_valid_i & ready + always_comb begin + lookup_hit_o = 1'b0; + lookup_pa_o = '0; + miss_o = 1'b0; + + if (lookup_valid_i && lookup_ready_o) begin + if (any_hit) begin + lookup_hit_o = 1'b1; + lookup_pa_o = {data_ppn_q[hit_index], lookup_offset}; + miss_o = 1'b0; + end else begin + lookup_hit_o = 1'b0; + lookup_pa_o = '0; + miss_o = 1'b1; + end + end + end + + // Victim selection: + // Prefer an invalid entry first, else choose the oldest LRU age + always_comb begin + found_invalid = 1'b0; + invalid_index = '0; + + for (int i = 0; i < ENTRIES; i++) begin + if (!found_invalid && !valid_q[i]) begin + found_invalid = 1'b1; + invalid_index = INDEX_BITS'(i); + end + end + end + + always_comb begin + lru_index = '0; + max_age = age_q[0]; + + for (int i = 1; i < ENTRIES; i++) begin + if (age_q[i] > max_age) begin + max_age = age_q[i]; + lru_index = INDEX_BITS'(i); + end + end + end + + always_comb begin + if (found_invalid) begin + victim_index = invalid_index; + end else begin + victim_index = lru_index; + end + end + + // Sequential state updates, reset/flush/insert/replacement state + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + // Reset values + valid_q <= '0; + + // Optional clear arrays for simulation clarity + for (idx = 0; idx < ENTRIES; idx++) begin + tag_vpn_q[idx] <= '0; + data_ppn_q[idx] <= '0; + perm_q[idx] <= '0; + age_q[idx] <= '0; + end + + end else begin + // Flush has highest priority + if (flush_i) begin + valid_q <= '0; + + // Optional clears + for (idx = 0; idx < ENTRIES; idx++) begin + tag_vpn_q[idx] <= '0; + data_ppn_q[idx] <= '0; + perm_q[idx] <= '0; + age_q[idx] <= '0; + end + + end else begin + // Insert handling + // - pick victim (invalid first, else LRU) + // - write arrays and valid bit + // - update replacement bookkeeping + if (insert_valid_i) begin + tag_vpn_q[victim_index] <= insert_vpn_i; + data_ppn_q[victim_index] <= insert_ppn_i; + perm_q[victim_index] <= insert_perm_i; + valid_q[victim_index] <= 1'b1; + + // Mark inserted entry as most recently used + for (idx = 0; idx < ENTRIES; idx++) begin + if (INDEX_BITS'(idx) == victim_index) begin + age_q[idx] <= '0; + end else if (valid_q[idx]) begin + if (age_q[idx] != {AGE_BITS{1'b1}}) + age_q[idx] <= age_q[idx] + AGE_BITS'(1); + end else begin + age_q[idx] <= '0; + end + end + end + + // Replacement bookkeeping updates on lookup hit (touch) + else if (lookup_valid_i && lookup_ready_o && any_hit) begin + for (idx = 0; idx < ENTRIES; idx++) begin + if (INDEX_BITS'(idx) == hit_index) begin + age_q[idx] <= '0; + end else if (valid_q[idx]) begin + if (age_q[idx] != {AGE_BITS{1'b1}}) + age_q[idx] <= age_q[idx] + AGE_BITS'(1); + end else begin + age_q[idx] <= '0; + end + end + end + + // Optional parity/ECC error detection + invalidate entry + // not implemented in this version + end + end + end + +endmodule