Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f6a612b
chore: add backend client and inventory skeletons
jingxiang-z Apr 15, 2026
de7635f
fix: address initial review feedback
jingxiang-z Apr 15, 2026
31682f6
fix: address lint and toolchain drift
jingxiang-z Apr 15, 2026
31c9df0
refactor: model attestationloop as a workflow
jingxiang-z Apr 15, 2026
fb81647
feat: add inventory backend sync flow
jingxiang-z Apr 15, 2026
4def98c
refactor: use backend base URL for agent workflows
jingxiang-z Apr 16, 2026
be13d03
feat: implement backend attestation workflow
jingxiang-z Apr 16, 2026
89b7bc2
refactor: move agent config to inventory payload
jingxiang-z Apr 16, 2026
960dc02
refactor: simplify inventory and attestation workflows
jingxiang-z Apr 16, 2026
b60d845
refactor: align inventory payload and runtime flow
jingxiang-z Apr 20, 2026
8d55dd9
fix: review comments
jingxiang-z Apr 21, 2026
44938f0
chore: add license header to unenroll test
jingxiang-z Apr 21, 2026
47c3759
fix: harden backend state and inventory workflows
jingxiang-z Apr 21, 2026
466c374
refactor: rename agent state node uuid accessors
jingxiang-z Apr 21, 2026
8e7d86e
refactor: rename node id to node uuid
jingxiang-z Apr 21, 2026
142a839
fix: normalize backend enroll endpoint
jingxiang-z Apr 21, 2026
5ce40b0
fix: delay inventory loop after initial collect
jingxiang-z Apr 21, 2026
31a8997
fix: tolerate invalid exporter backend metadata
jingxiang-z Apr 21, 2026
58f42fe
fix: send LogicalCores and TotalBytes as strings in inventory payload
jingxiang-z Apr 22, 2026
b895ee4
refactor: drop InventoryHash from node upsert wire type
jingxiang-z Apr 22, 2026
26cf57e
refactor: drop NetPublicIP from inventory wire type
jingxiang-z Apr 22, 2026
f9db3b9
feat: add X-Agent-Mode header on OTLP requests
jingxiang-z Apr 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
branches: [ main, 'release/**' ]

env:
GO_VERSION: '1.26.1'
GO_VERSION: '1.26.2'
GOFLAGS: '-trimpath'

jobs:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- 'v*' # Triggers on version tags like v1.0.0, v2.1.3, etc.

env:
GO_VERSION: '1.26.1'
GO_VERSION: '1.26.2'

permissions:
contents: write # Needed for creating GitHub releases
Expand Down
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: "2"
run:
go: 1.26.1
go: 1.26.2
linters:
default: none
enable:
Expand Down
2 changes: 1 addition & 1 deletion SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ This repository contains the `fleetint` host agent. The notes below are intended
- Exporter token refresh and endpoint reload: [`internal/exporter/exporter.go`](internal/exporter/exporter.go)
- Local HTTP server routes: [`internal/server/server.go`](internal/server/server.go)
- Optional fault injection handler: [`internal/server/handlers_inject_fault.go`](internal/server/handlers_inject_fault.go)
- Remote attestation and `nvattest` invocation: [`internal/attestation/attestation.go`](internal/attestation/attestation.go)
- Remote attestation and `nvattest` invocation: [`internal/attestation/manager.go`](internal/attestation/manager.go), [`internal/attestation/collector.go`](internal/attestation/collector.go)

### Threat Model

Expand Down
94 changes: 3 additions & 91 deletions cmd/fleetint/enroll.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,12 @@ import (
"os"
"strings"

pkgmetadata "github.com/NVIDIA/fleet-intelligence-sdk/pkg/metadata"
"github.com/NVIDIA/fleet-intelligence-sdk/pkg/sqlite"
"github.com/urfave/cli"

"github.com/NVIDIA/fleet-intelligence-agent/internal/config"
"github.com/NVIDIA/fleet-intelligence-agent/internal/endpoint"
"github.com/NVIDIA/fleet-intelligence-agent/internal/enrollment"
)

var (
performEnrollment = func(enrollEndpoint, sakToken string) (string, error) {
return enrollment.PerformEnrollment(context.Background(), enrollEndpoint, sakToken)
}
storeEnrollmentConfig = storeConfigInMetadata
)
var performEnrollWorkflow = enrollment.Enroll

// resolveToken returns the SAK token from --token, --token-file, or stdin.
func resolveToken(cliContext *cli.Context) (string, error) {
Expand All @@ -48,7 +39,7 @@ func resolveToken(cliContext *cli.Context) (string, error) {
}

if tokenFile != "" {
const maxTokenSize = 1 << 20 // 1 MiB -- SAK tokens are small; anything larger is a mistake
const maxTokenSize = 1 << 20
var raw []byte
var err error
if tokenFile == "-" {
Expand Down Expand Up @@ -105,84 +96,5 @@ func enrollCommand(cliContext *cli.Context) error {
fmt.Fprintln(writerFromContext(cliContext), "Proceeding with enrollment because --force was provided")
}

baseURL, err := endpoint.ValidateBackendEndpoint(baseEndpoint)
if err != nil {
return fmt.Errorf("invalid enrollment endpoint: %w", err)
}

// Construct enroll endpoint
enrollEndpoint, err := endpoint.JoinPath(baseURL, "api", "v1", "health", "enroll")
if err != nil {
return fmt.Errorf("failed to construct enroll endpoint: %w", err)
}

// Make enrollment request to get JWT token
jwtToken, err := performEnrollment(enrollEndpoint, sakToken)
if err != nil {
// Error already printed to stderr by PerformEnrollment
return err
}

// Construct other endpoints using url.JoinPath for proper URL handling
metricsEndpoint, err := endpoint.JoinPath(baseURL, "api", "v1", "health", "metrics")
if err != nil {
return fmt.Errorf("failed to construct metrics endpoint: %w", err)
}
logsEndpoint, err := endpoint.JoinPath(baseURL, "api", "v1", "health", "logs")
if err != nil {
return fmt.Errorf("failed to construct logs endpoint: %w", err)
}
nonceEndpoint, err := endpoint.JoinPath(baseURL, "api", "v1", "health", "nonce")
if err != nil {
return fmt.Errorf("failed to construct nonce endpoint: %w", err)
}

// Store endpoints and JWT token in metadata table
if err := storeEnrollmentConfig(enrollEndpoint, metricsEndpoint, logsEndpoint, nonceEndpoint, jwtToken, sakToken); err != nil {
return fmt.Errorf("failed to store configuration: %w", err)
}

return nil
}

func storeConfigInMetadata(enrollEndpoint, metricsEndpoint, logsEndpoint, nonceEndpoint, jwtToken, sakToken string) error {
stateFile, err := config.DefaultStateFile()
if err != nil {
return fmt.Errorf("failed to get state file path: %w", err)
}

dbRW, err := sqlite.Open(stateFile)
if err != nil {
return fmt.Errorf("failed to open state database: %w", err)
}
defer dbRW.Close()

if err := pkgmetadata.CreateTableMetadata(context.Background(), dbRW); err != nil {
return fmt.Errorf("failed to create metadata table: %w", err)
}

// Store SAK token (for JWT refresh), JWT token (for API calls), and all endpoints
if err := pkgmetadata.SetMetadata(context.Background(), dbRW, "sak_token", sakToken); err != nil {
return fmt.Errorf("failed to set SAK token: %w", err)
}
if err := pkgmetadata.SetMetadata(context.Background(), dbRW, pkgmetadata.MetadataKeyToken, jwtToken); err != nil {
return fmt.Errorf("failed to set JWT token: %w", err)
}
if err := pkgmetadata.SetMetadata(context.Background(), dbRW, "enroll_endpoint", enrollEndpoint); err != nil {
return fmt.Errorf("failed to set enroll endpoint: %w", err)
}
if err := pkgmetadata.SetMetadata(context.Background(), dbRW, "metrics_endpoint", metricsEndpoint); err != nil {
return fmt.Errorf("failed to set metrics endpoint: %w", err)
}
if err := pkgmetadata.SetMetadata(context.Background(), dbRW, "logs_endpoint", logsEndpoint); err != nil {
return fmt.Errorf("failed to set logs endpoint: %w", err)
}
if err := pkgmetadata.SetMetadata(context.Background(), dbRW, "nonce_endpoint", nonceEndpoint); err != nil {
return fmt.Errorf("failed to set nonce endpoint: %w", err)
}
if err := config.SecureStateFilePermissions(stateFile); err != nil {
return fmt.Errorf("failed to secure state database permissions: %w", err)
}

return nil
return performEnrollWorkflow(context.Background(), baseEndpoint, sakToken)
}
57 changes: 7 additions & 50 deletions cmd/fleetint/enroll_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@ package main

import (
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -50,12 +49,10 @@ func TestEnrollCommandPrecheckError(t *testing.T) {

func TestEnrollCommandBlocksOnFailedPrecheck(t *testing.T) {
originalRunPrecheck := runPrecheck
originalPerformEnrollment := performEnrollment
originalStoreConfig := storeEnrollmentConfig
originalEnrollWorkflow := performEnrollWorkflow
t.Cleanup(func() {
runPrecheck = originalRunPrecheck
performEnrollment = originalPerformEnrollment
storeEnrollmentConfig = originalStoreConfig
performEnrollWorkflow = originalEnrollWorkflow
})

enrollmentCalled := false
Expand All @@ -66,11 +63,8 @@ func TestEnrollCommandBlocksOnFailedPrecheck(t *testing.T) {
},
}, nil
}
performEnrollment = func(enrollEndpoint, sakToken string) (string, error) {
performEnrollWorkflow = func(ctx context.Context, baseEndpoint, sakToken string) error {
enrollmentCalled = true
return "jwt-token", nil
}
storeEnrollmentConfig = func(enrollEndpoint, metricsEndpoint, logsEndpoint, nonceEndpoint, jwtToken, sakToken string) error {
return nil
}

Expand All @@ -88,12 +82,10 @@ func TestEnrollCommandBlocksOnFailedPrecheck(t *testing.T) {

func TestEnrollCommandForceBypassesFailedPrecheck(t *testing.T) {
originalRunPrecheck := runPrecheck
originalPerformEnrollment := performEnrollment
originalStoreConfig := storeEnrollmentConfig
originalEnrollWorkflow := performEnrollWorkflow
t.Cleanup(func() {
runPrecheck = originalRunPrecheck
performEnrollment = originalPerformEnrollment
storeEnrollmentConfig = originalStoreConfig
performEnrollWorkflow = originalEnrollWorkflow
})

enrollmentCalled := false
Expand All @@ -104,11 +96,8 @@ func TestEnrollCommandForceBypassesFailedPrecheck(t *testing.T) {
},
}, nil
}
performEnrollment = func(enrollEndpoint, sakToken string) (string, error) {
performEnrollWorkflow = func(ctx context.Context, baseEndpoint, sakToken string) error {
enrollmentCalled = true
return "jwt-token", nil
}
storeEnrollmentConfig = func(enrollEndpoint, metricsEndpoint, logsEndpoint, nonceEndpoint, jwtToken, sakToken string) error {
return nil
}

Expand All @@ -120,35 +109,3 @@ func TestEnrollCommandForceBypassesFailedPrecheck(t *testing.T) {
require.NoError(t, err)
assert.True(t, enrollmentCalled)
}

func TestStoreConfigInMetadataSecuresFreshStateFile(t *testing.T) {
if os.Geteuid() == 0 {
t.Skip("test expects non-root default state path resolution")
}

tmpHome := t.TempDir()
t.Setenv("HOME", tmpHome)

err := storeConfigInMetadata(
"https://example.com/api/v1/health/enroll",
"https://example.com/api/v1/health/metrics",
"https://example.com/api/v1/health/logs",
"https://example.com/api/v1/health/nonce",
"jwt-token",
"sak-token",
)
require.NoError(t, err)

stateFile := filepath.Join(tmpHome, ".fleetint", "fleetint.state")
for _, candidate := range []string{stateFile, stateFile + "-wal", stateFile + "-shm"} {
info, err := os.Stat(candidate)
if os.IsNotExist(err) {
if candidate == stateFile {
require.NoError(t, err)
}
continue
}
require.NoError(t, err)
assert.Equal(t, os.FileMode(0o600), info.Mode().Perm())
}
}
35 changes: 26 additions & 9 deletions cmd/fleetint/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,6 @@ func configureHealthExporterFromEnv(cfg *config.Config) error {
return err
}

// FLEETINT_ATTESTATION_JITTER_ENABLED - Enable/disable attestation jitter
if err := setBoolFromEnv("FLEETINT_ATTESTATION_JITTER_ENABLED", &he.Attestation.JitterEnabled, "set attestation jitter enabled from env", "attestation_jitter_enabled"); err != nil {
return err
}

if err := setDurationFromEnv("FLEETINT_ATTESTATION_INTERVAL", &he.Attestation.Interval, "set attestation interval from env", "attestation_interval", 0, 0); err != nil {
return err
}

// Lookbacks
if err := setDurationFromEnv("FLEETINT_METRICS_LOOKBACK", &he.MetricsLookback, "set health exporter metrics lookback from env", "metrics_lookback", 0, 0); err != nil {
return err
Expand All @@ -231,6 +222,29 @@ func configureHealthExporterFromEnv(cfg *config.Config) error {
return nil
}

func configureLoopConfigFromEnv(cfg *config.Config) error {
if cfg.Inventory != nil {
if err := setBoolFromEnv("FLEETINT_INVENTORY_ENABLED", &cfg.Inventory.Enabled, "set inventory enabled from env", "inventory_enabled"); err != nil {
return err
}
if err := setDurationFromEnv("FLEETINT_INVENTORY_INTERVAL", &cfg.Inventory.Interval, "set inventory interval from env", "inventory_interval", time.Minute, 0); err != nil {
return err
}
}
if cfg.Attestation != nil {
if err := setBoolFromEnv("FLEETINT_ATTESTATION_ENABLED", &cfg.Attestation.Enabled, "set attestation enabled from env", "attestation_enabled"); err != nil {
return err
}
if err := setDurationFromEnv("FLEETINT_ATTESTATION_INITIAL_INTERVAL", &cfg.Attestation.InitialInterval, "set attestation initial interval from env", "attestation_initial_interval", time.Minute, 0); err != nil {
return err
}
if err := setDurationFromEnv("FLEETINT_ATTESTATION_INTERVAL", &cfg.Attestation.Interval, "set attestation interval from env", "attestation_interval", time.Minute, 0); err != nil {
return err
}
}
return nil
}

func runCommand(cliContext *cli.Context) error {
logLevel := cliContext.String("log-level")
logFile := cliContext.String("log-file")
Expand Down Expand Up @@ -310,6 +324,9 @@ func runCommand(cliContext *cli.Context) error {
if err := configureHealthExporterFromEnv(cfg); err != nil {
return fmt.Errorf("failed to configure health exporter from environment variables: %w", err)
}
if err := configureLoopConfigFromEnv(cfg); err != nil {
return fmt.Errorf("failed to configure loop settings from environment variables: %w", err)
}
log.Logger.Infow("health exporter configuration", "cfg", cfg.HealthExporter)

if listenAddress != "" {
Expand Down
Loading
Loading