Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions .github/workflows/go-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Go Checks
on:
pull_request:
push:
branches: ["master"]
branches: ["master", "dev"]
workflow_dispatch:

permissions:
Expand All @@ -14,5 +14,38 @@ concurrency:
cancel-in-progress: true

jobs:
go-check:
uses: pl-strflt/uci/.github/workflows/go-check.yml@v0.0
check:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: "1.23"

- name: Check formatting
run: |
if [ -n "$(gofmt -l .)" ]; then
echo "Code is not formatted. Run 'gofmt -w .'"
gofmt -d .
exit 1
fi

- name: Run go vet
run: go vet ./...

- name: Run staticcheck
uses: dominikh/staticcheck-action@v1
with:
version: "latest"

- name: Check go mod tidy
run: |
go mod tidy
if [ -n "$(git status --porcelain go.mod go.sum)" ]; then
echo "go.mod or go.sum is not tidy. Run 'go mod tidy'"
git diff go.mod go.sum
exit 1
fi
58 changes: 55 additions & 3 deletions .github/workflows/go-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Go Test
on:
pull_request:
push:
branches: ["master"]
branches: ["master", "dev"]
workflow_dispatch:

permissions:
Expand All @@ -14,5 +14,57 @@ concurrency:
cancel-in-progress: true

jobs:
go-test:
uses: pl-strflt/uci/.github/workflows/go-test.yml@v0.0
# Full test suite on Ubuntu where mDNS multicast works
test:
strategy:
matrix:
go: ["1.22", "1.23"]
fail-fast: false
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go }}

- name: Download dependencies
run: go mod download

- name: Run tests
run: go test -v -race -coverprofile=coverage.txt -covermode=atomic ./...

- name: Upload coverage
if: matrix.go == '1.23'
uses: codecov/codecov-action@v4
with:
files: ./coverage.txt
fail_ci_if_error: false

# Build verification on other platforms (mDNS multicast doesn't work in CI)
build:
strategy:
matrix:
os: [macos-latest, windows-latest]
go: ["1.22", "1.23"]
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go }}

- name: Download dependencies
run: go mod download

- name: Build
run: go build ./...

- name: Run unit tests (no network)
run: go test -v -race -run "TestQuickShutdown|TestCPUSpinOnConnectionClose" ./...
16 changes: 14 additions & 2 deletions server.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,13 @@ func (s *Server) recv4(c *ipv4.PacketConn) {
var ifIndex int
n, cm, from, err := c.ReadFrom(buf)
if err != nil {
continue
// Backoff to prevent CPU spin on persistent errors
select {
case <-s.shouldShutdown:
return
case <-time.After(50 * time.Millisecond):
continue
}
}
if cm != nil {
ifIndex = cm.IfIndex
Expand All @@ -299,7 +305,13 @@ func (s *Server) recv6(c *ipv6.PacketConn) {
var ifIndex int
n, cm, from, err := c.ReadFrom(buf)
if err != nil {
continue
// Backoff to prevent CPU spin on persistent errors
select {
case <-s.shouldShutdown:
return
case <-time.After(50 * time.Millisecond):
continue
}
}
if cm != nil {
ifIndex = cm.IfIndex
Expand Down
73 changes: 72 additions & 1 deletion service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package zeroconf
import (
"context"
"log"
"runtime"
"testing"
"time"
)
Expand Down Expand Up @@ -181,7 +182,8 @@ func TestSubtype(t *testing.T) {
initialQueryInterval = 100 * time.Millisecond
cleanupFreq = 100 * time.Millisecond

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
// Use longer timeout for CI environments where timing can be inconsistent
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
startMDNS(t, mdnsPort, mdnsName, mdnsSubtype, mdnsDomain)

Expand All @@ -202,3 +204,72 @@ func TestSubtype(t *testing.T) {
}
})
}

// TestCPUSpinOnConnectionClose demonstrates the CPU spinning bug that occurs
// when connections are closed externally (simulating network interface changes).
// This reproduces the issue seen on Windows after hours of running.
func TestCPUSpinOnConnectionClose(t *testing.T) {
server, err := Register(mdnsName, mdnsService, mdnsDomain, mdnsPort,
[]string{"txtv=0", "lo=1", "la=2"}, nil)
if err != nil {
t.Fatal(err)
}

// Let the server start and stabilize
time.Sleep(100 * time.Millisecond)

// Measure baseline goroutine count
baselineGoroutines := runtime.NumGoroutine()
t.Logf("Baseline goroutines: %d", baselineGoroutines)

// Get baseline CPU stats
var baselineStats runtime.MemStats
runtime.ReadMemStats(&baselineStats)
baselineMallocs := baselineStats.Mallocs

// Close connections WITHOUT triggering shutdown
// This simulates what happens when network interfaces change on Windows
if server.ipv4conn != nil {
server.ipv4conn.Close()
}
if server.ipv6conn != nil {
server.ipv6conn.Close()
}

t.Log("Connections closed - recv loops should now be spinning on errors")

// Wait and measure - if spinning, we'll see high allocation rate
// because the tight loop keeps running
time.Sleep(500 * time.Millisecond)

var afterStats runtime.MemStats
runtime.ReadMemStats(&afterStats)
allocsDuring := afterStats.Mallocs - baselineMallocs

t.Logf("Allocations during 500ms after connection close: %d", allocsDuring)
t.Logf("Current goroutines: %d", runtime.NumGoroutine())

// A spinning loop will have many more allocations than a properly blocked one
// This threshold is somewhat arbitrary but a blocked recv should have near-zero
// while a spinning one will have thousands
if allocsDuring > 10000 {
t.Errorf("DETECTED CPU SPIN: %d allocations in 500ms indicates tight loop", allocsDuring)
t.Log("This confirms the bug: recv loops spin when ReadFrom returns errors")
} else {
t.Logf("Allocation count (%d) suggests recv loops may be handling errors correctly", allocsDuring)
}

// Clean up - this should work even with closed connections
done := make(chan struct{})
go func() {
defer close(done)
server.Shutdown()
}()

select {
case <-done:
t.Log("Shutdown completed successfully")
case <-time.After(2 * time.Second):
t.Error("Shutdown timed out - recv loops may be stuck")
}
}