Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 63 additions & 8 deletions common/driver/data_dev_top.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@
#include <gpu_async.h>
#endif

// PCI_IRQ_LEGACY was renamed to PCI_IRQ_INTX in Linux 6.11; the old name was
// kept as a deprecated alias for a few releases, then dropped. Provide the new
// spelling on pre-6.11 kernels so the single pci_alloc_irq_vectors() call site
// builds across the CI matrix (Ubuntu 22.04 / kernel 5.15 lacks PCI_IRQ_INTX).
#ifndef PCI_IRQ_INTX
#define PCI_IRQ_INTX PCI_IRQ_LEGACY
#endif

// Init Configuration values
static int cfgTxCount = 1024;
static int cfgRxCount = 1024;
Expand Down Expand Up @@ -234,6 +242,23 @@ int DataDev_Probe(struct pci_dev *pcidev, const struct pci_device_id *dev_id) {
goto err_post_en;
}

// The driver's register map tops out at a fixed 16 MB window (2 * USER_SIZE:
// DMA engine (AGEN2_OFF), PCIe PHY (PHY_OFF), AxiVersion (AVER_OFF), and the
// user region: rwBase = base + PHY_OFF, rwSize = 2*USER_SIZE - PHY_OFF). A
// BAR0 *larger* than this means the firmware was built with a different
// address map, so reject it with a clear message instead of letting the
// mismatch surface later. A smaller BAR0 is tolerated: the emulator shrinks
// its BAR when it cannot reserve 16 MB of contiguous memory, and only the
// lower register regions are exercised in that configuration.
if ( dev->baseSize > (2 * USER_SIZE) ) {
dev_err(&pcidev->dev,
"%s: Probe: BAR0 size 0x%x exceeds expected 0x%x (16 MB); "
"firmware register-map mismatch.\n",
MOD_NAME, dev->baseSize, (unsigned int)(2 * USER_SIZE));
probeReturn = -EINVAL;
goto err_post_en;
}
Comment on lines +245 to +260
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what the context for this check is. afaik our firmware images all have fixed BAR sizes of 16MB right now.

BAR size difference doesn't necessarily indicate a register map mismatch (please correct me if I'm wrong @ruck314). If BAR is bumped to 32MB, AxiVersion/AxiPcieCore/etc. will still have the same offsets, unless explicitly changed in fw. In fact, we can have register map differences even if the BAR size is the same.

I also really dislike exceptions added for the sake of automated tests, especially considering this actually misses a case where BAR0 was 1MB back in 2017 with a different register map. Though, I'm pretty sure the driver has handling for these old firmware versions. Just skimming the git history here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JJL772 This relates to an odd dynamic in the FPGA firmware's IP core: selecting the MSI-X IRQ silently changes the BAR0 size from 16MB to 64MB without any notification. Both the existing datadev driver and firmware assume BAR0 is 16MB.

I would have preferred a != check instead of >, but the emulator allocates less than 16MB and I couldn't find a good way to make it allocate exactly 16MB.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification. we can keep this check then

Copy link
Copy Markdown
Contributor Author

@ruck314 ruck314 Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a side note: When I run the C1100 with MSI interrupt the frame rate frequency for the PRBS testing into the CPU is incredibly stable compared to the legacy interrupt (comparing the min/max values) and able to increase FW->CPU bandwidth from 111Gb/s to 113Gb/s

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice to see performance improvements like that!!


// Set basic device attributes
dev->pcidev = pcidev; // PCI device structure
dev->device = &(pcidev->dev); // Device structure
Expand Down Expand Up @@ -263,16 +288,41 @@ int DataDev_Probe(struct pci_dev *pcidev, const struct pci_device_id *dev_id) {
dev->cfgTimeout = cfgTimeout;
dev->debug = cfgDebug;

// Allocate IRQ vectors: try MSI-X first, then MSI, then legacy INTx.
// pci_alloc_irq_vectors() walks the requested types in priority order
// and returns the count of vectors negotiated; pci_irq_vector(pdev, 0)
// returns the right Linux IRQ number regardless of which path won. A
// bitstream that advertises more than one type is tolerated for
// backwards compatibility with legacy PCIe IP cores that report
// INTx + MSI/MSI-X simultaneously: the cascade picks the highest
// priority kind that's actually advertised. Probe fails only if all
// three are unavailable.
ret = pci_alloc_irq_vectors(pcidev, 1, 1,
PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_INTX);
if (ret < 0) {
dev_err(&pcidev->dev,
"%s: Probe: pci_alloc_irq_vectors() = %i\n", MOD_NAME, ret);
probeReturn = ret;
goto err_unmap;
}

// Assign the IRQ number from the pci_dev structure
dev->irq = pcidev->irq;

// Check that we actually have an IRQ
if (dev->irq == 0) {
dev_err(dev->device, "%s: No IRQ associated with PCI device\n", MOD_NAME);
probeReturn = -EINVAL;
// pci_irq_vector() returns a signed errno on failure; capture it in an
// int and reject < 0 before storing into the uint32_t dev->irq, otherwise
// a negative value would wrap to a bogus IRQ and reach request_irq().
ret = pci_irq_vector(pcidev, 0);
if (ret < 0) {
dev_err(&pcidev->dev,
"%s: Probe: pci_irq_vector() = %i\n", MOD_NAME, ret);
probeReturn = ret;
goto err_unmap;
}
dev->irq = ret;

dev_info(dev->device,
"Init: Probe: using %s interrupts, irq=%u\n",
pcidev->msix_enabled ? "MSI-X" :
pcidev->msi_enabled ? "MSI" : "legacy INTx",
dev->irq);

// Set basic device context

Expand Down Expand Up @@ -347,6 +397,8 @@ int DataDev_Probe(struct pci_dev *pcidev, const struct pci_device_id *dev_id) {
dev->utilData = NULL;
#endif
Dma_UnmapReg(dev); // Idempotent: safe even if Dma_MapReg never ran
pci_free_irq_vectors(pcidev); // Releases MSI/MSI-X/INTx allocation; no-op if
// pci_alloc_irq_vectors failed or never ran
err_post_en:
pci_disable_device(pcidev); // Disable PCI device on failure
err_pre_en:
Expand Down Expand Up @@ -407,9 +459,12 @@ void DataDev_Remove(struct pci_dev *pcidev) {
}
#endif

// Call common DMA clean function
// Call common DMA clean function (calls free_irq() internally)
Dma_Clean(dev);

// Release MSI/MSI-X/INTx vectors (must follow free_irq, precede pci_disable_device)
pci_free_irq_vectors(pcidev);

// Disable device
pci_disable_device(pcidev);

Expand Down
8 changes: 6 additions & 2 deletions docs/explanation/ci-pipeline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,12 @@ The combined Phase 2 + Phase 3 coverage across all distributions:
- 3
- Multi-destination (0, 7, 8), cross-contamination check
* - IRQ modes
- 3
- cfgIrqHold=1, cfgIrqHold=100000, polled (cfgIrqDis=1)
- 12
- ``emu_irq_mode`` {intx, msi, msix} sweep (CPU phase). Each mode
runs cfgIrqHold=1, cfgIrqHold=100000, and polled (cfgIrqDis=1),
plus a probe-cascade assertion that ``datadev`` selected the
matching INTx / MSI / MSI-X path. GPU phase runs the single-mode
legacy pass
* - Module lifecycle
- 4
- 3 rapid reload cycles, rmmod-under-load, cfgMode=1/2 transitions
Expand Down
7 changes: 7 additions & 0 deletions docs/explanation/kernel-compatibility.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ the source files listed.
``class_create()`` dropped its first argument (``THIS_MODULE``) in 6.4. Red Hat backported
this change in RHEL 9.4.

``< KERNEL_VERSION(6, 11, 0)`` — ``data_dev_top.c``
``PCI_IRQ_LEGACY`` was renamed to ``PCI_IRQ_INTX`` in 6.11 (the old name was kept as a
deprecated alias for a few releases, then dropped). A ``#ifndef PCI_IRQ_INTX`` guard
defines the new spelling as ``PCI_IRQ_LEGACY`` so the single ``pci_alloc_irq_vectors()``
call site builds on pre-6.11 kernels (e.g. Ubuntu 22.04 / kernel 5.15, which lacks
``PCI_IRQ_INTX``).

RHEL Backport Detection
------------------------

Expand Down
9 changes: 7 additions & 2 deletions docs/reference/test-applications.rst
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,13 @@ by ``scripts/ci/test-cpu.sh`` or ``scripts/ci/test-gpu.sh`` during CI.
* - ``test_backpressure.sh``
- Saturates TX buffers then verifies recovery and correct dmesg state
* - ``test_irq_modes.sh``
- Sweeps ``cfgIrqHold`` (1, 100000) and polled mode; verifies
loopback works under each IRQ configuration
- Sweeps the emulator's ``emu_irq_mode`` (``intx`` / ``msi`` /
``msix``). For each mode it reloads ``datadev`` across
``cfgIrqHold`` (1, 100000) and polled mode, verifies loopback +
PRBS integrity, then asserts that ``datadev``'s probe-time
``pci_alloc_irq_vectors`` cascade selected the matching interrupt
type. The ``emu_irq_mode`` sweep is CPU-phase only; the GPU phase
and emulator-less hosts run the legacy single-mode pass
* - ``test_concurrent_open.sh``
- Two ``dmaLoopTest`` instances on different destinations simultaneously
* - ``test_idx_loopback.sh``
Expand Down
2 changes: 1 addition & 1 deletion emulator/driver/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ else
endif

# Object files that make up the module
$(NAME)-objs := src/bar0_regs.o src/virt_irq.o src/virt_pci_host.o src/dma_engine.o src/prbs.o src/gpu_engine.o src/emu_main.o
$(NAME)-objs := src/bar0_regs.o src/virt_irq.o src/virt_msi.o src/virt_pci_host.o src/dma_engine.o src/prbs.o src/gpu_engine.o src/emu_main.o

# Target module
obj-m := $(NAME).o
Expand Down
35 changes: 33 additions & 2 deletions emulator/driver/src/dma_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,31 @@ static int emu_process_tx(struct emu_dma_engine *eng)
return tx_processed;
}

/* ----------------------------------------------------------------
* IRQ delivery dispatch
* ---------------------------------------------------------------- */

/*
* emu_dma_fire_irq - Trigger the IRQ that datadev currently has registered.
*
* In INTx mode datadev called request_irq() on emu_irq.virq, so we fire that.
* In MSI / MSI-X mode, the kernel allocated a child virq through our PCI-MSI
* domain when datadev called pci_alloc_irq_vectors(); the parent .alloc op
* stashed it in emu_msi.alloc_virq. Fire that one instead -- emu_irq.virq is
* stale (datadev never request_irq'd it).
*
* The MSI path may briefly have alloc_virq == 0 between emu_dma_start (which
* runs before the host bridge is created) and the kernel's MSI alloc during
* datadev probe; emu_msi_fire_safe() short-circuits in that window.
*/
static inline void emu_dma_fire_irq(struct emu_dma_engine *eng)
{
if (eng->msi != NULL && eng->msi->alloc_virq != 0)
emu_msi_fire_safe(eng->msi);
else
emu_irq_fire_safe(eng->irq);
}

/* ----------------------------------------------------------------
* Polling kthread
* ---------------------------------------------------------------- */
Expand Down Expand Up @@ -530,7 +555,7 @@ static int emu_poll_thread_fn(void *data)
need_irq = true;

if (need_irq) {
emu_irq_fire_safe(eng->irq);
emu_dma_fire_irq(eng);
eng->irq_count++;
}
}
Expand Down Expand Up @@ -592,7 +617,7 @@ static int emu_poll_thread_fn(void *data)
eng->wr_ring_idx = (eng->wr_ring_idx + 1) % eng->addr_count;
emu_reg_write(eng, EMU_REG_HWWRINDEX, eng->wr_ring_idx);

emu_irq_fire_safe(eng->irq);
emu_dma_fire_irq(eng);
eng->irq_count++;
eng->seed_pending = true;
eng->seed_idx++;
Expand All @@ -613,11 +638,17 @@ static int emu_poll_thread_fn(void *data)
* Engine lifecycle
* ---------------------------------------------------------------- */

void emu_dma_set_msi(struct emu_dma_engine *eng, struct emu_msi *msi)
{
eng->msi = msi;
}

int emu_dma_init(struct emu_dma_engine *eng, struct emu_bar0 *bar,
struct emu_irq *irq)
{
eng->bar = bar;
eng->irq = irq;
eng->msi = NULL;
eng->reg_base = bar->uc_virt + EMU_AGEN2_OFF;

eng->rd_ring = NULL;
Expand Down
5 changes: 4 additions & 1 deletion emulator/driver/src/dma_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <linux/spinlock.h>
#include "bar0_regs.h"
#include "virt_irq.h"
#include "virt_msi.h"

/* Maximum RX buffers in free pool (matches driver's default rxBuffers.count) */
#define EMU_FREE_POOL_MAX 4096
Expand Down Expand Up @@ -81,7 +82,8 @@ struct emu_free_buf {
struct emu_dma_engine {
/* Back-references */
struct emu_bar0 *bar;
struct emu_irq *irq;
struct emu_irq *irq; /* legacy INTx delivery (always populated) */
struct emu_msi *msi; /* MSI/MSI-X delivery (NULL in INTx mode) */

/* Register base (bar->virt + EMU_AGEN2_OFF) */
void *reg_base;
Expand Down Expand Up @@ -138,6 +140,7 @@ struct emu_dma_engine {

int emu_dma_init(struct emu_dma_engine *eng, struct emu_bar0 *bar,
struct emu_irq *irq);
void emu_dma_set_msi(struct emu_dma_engine *eng, struct emu_msi *msi);
void emu_dma_start(struct emu_dma_engine *eng);
void emu_dma_stop(struct emu_dma_engine *eng);
void emu_dma_destroy(struct emu_dma_engine *eng);
Expand Down
Loading