diff --git a/cmd/dashboard/main.go b/cmd/dashboard/main.go
index 25d6e59..3e26622 100644
--- a/cmd/dashboard/main.go
+++ b/cmd/dashboard/main.go
@@ -56,6 +56,9 @@ func main() {
// Initialize FinOps Engine
finopsEngine := finops.NewEngine(vmClient, st.PricingCatalog())
+ // Share Pricing Catalog with VM Client
+ vmClient.SetPricingCatalog(st.PricingCatalog())
+
auth.SetSecret(cfg.JWTSecret)
srv := &http.Server{
diff --git a/docs/TECHDEBT.md b/docs/TECHDEBT.md
index a2fbc10..3a80bf6 100644
--- a/docs/TECHDEBT.md
+++ b/docs/TECHDEBT.md
@@ -15,3 +15,7 @@
## Future Considerations
- [ ] **Retention Policies**: Configure distinct retention periods for high-precision metrics (15s interval) vs. aggregated historical data.
- [ ] **Refactor Store Locking**: Evaluate moving from heavy `RWMutex` usage in `store.go` to a more concurrent pattern if contention increases with 100+ agents.
+- [ ] **Dynamic Pricing & Savings Plans Support**:
+ - Problem: Agents no longer send costs; backend relies on static On-Demand rates.
+ - Solution: Implement a **Dynamic Pricing Engine** with DB overrides for Savings Plans, Reserved Instances, and Spot Pricing.
+ - Design: See `dynamic_pricing_design.md` artifact.
diff --git a/internal/api/handlers_health_test.go b/internal/api/handlers_health_test.go
index f5cf9b9..6dd6ba0 100644
--- a/internal/api/handlers_health_test.go
+++ b/internal/api/handlers_health_test.go
@@ -47,6 +47,12 @@ func (f *fakeMetricsProvider) ClusterMetadata(context.Context) (store.ClusterMet
func (f *fakeMetricsProvider) NetworkTopology(context.Context, store.NetworkTopologyOptions) ([]store.NetworkEdge, error) {
return nil, vm.ErrNoData
}
+func (f *fakeMetricsProvider) GetNodeStats(context.Context, string, string, time.Duration) (store.NodeStats, error) {
+ return store.NodeStats{}, vm.ErrNoData
+}
+func (f *fakeMetricsProvider) GetNodePods(context.Context, string, string, time.Duration) ([]store.PodMetrics, error) {
+ return nil, vm.ErrNoData
+}
func newTestHandler(meta store.ClusterMetadata, status store.AgentStatusPayload) *Handler {
return &Handler{vm: &fakeMetricsProvider{meta: meta, status: status}}
diff --git a/internal/api/handlers_nodes.go b/internal/api/handlers_nodes.go
index fcb30ef..b6a3884 100644
--- a/internal/api/handlers_nodes.go
+++ b/internal/api/handlers_nodes.go
@@ -2,6 +2,7 @@ package api
import (
"net/http"
+ "time"
"github.com/go-chi/chi/v5"
@@ -22,6 +23,7 @@ func (h *Handler) Nodes(w http.ResponseWriter, r *http.Request) {
Search: q.Get("search"),
Limit: parseLimit(q.Get("limit"), defaultNodeLimit, maxNodeLimit),
Offset: parseOffset(q.Get("offset")),
+ Window: q.Get("window"), // "24h", "7d", "30d"
}
resp, err := h.vm.NodeList(ctx, filter)
@@ -58,3 +60,49 @@ func (h *Handler) NodeDetail(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusOK, node)
}
+
+// NodeStats returns historical usage and cost stats for a node.
+func (h *Handler) NodeStats(w http.ResponseWriter, r *http.Request) {
+ name := chi.URLParam(r, "name")
+ if name == "" {
+ writeError(w, http.StatusBadRequest, "node name is required")
+ return
+ }
+ windowStr := r.URL.Query().Get("window")
+ window, _ := time.ParseDuration(windowStr)
+ if window <= 0 {
+ window = 24 * time.Hour
+ }
+
+ ctx := vm.WithClusterID(r.Context(), clusterIDFromRequest(r))
+ stats, err := h.vm.GetNodeStats(ctx, "", name, window)
+ if err != nil {
+ writeError(w, http.StatusInternalServerError, err.Error())
+ return
+ }
+
+ writeJSON(w, http.StatusOK, stats)
+}
+
+// NodePods returns the list of pods for a node with P95 metrics (Pod Audit).
+func (h *Handler) NodePods(w http.ResponseWriter, r *http.Request) {
+ name := chi.URLParam(r, "name")
+ if name == "" {
+ writeError(w, http.StatusBadRequest, "node name is required")
+ return
+ }
+ windowStr := r.URL.Query().Get("window")
+ window, _ := time.ParseDuration(windowStr)
+ if window <= 0 {
+ window = 24 * time.Hour
+ }
+
+ ctx := vm.WithClusterID(r.Context(), clusterIDFromRequest(r))
+ pods, err := h.vm.GetNodePods(ctx, "", name, window)
+ if err != nil {
+ writeError(w, http.StatusInternalServerError, err.Error())
+ return
+ }
+
+ writeJSON(w, http.StatusOK, pods)
+}
diff --git a/internal/api/router.go b/internal/api/router.go
index 28d403a..778f45d 100644
--- a/internal/api/router.go
+++ b/internal/api/router.go
@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"net/http"
+ "time"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
@@ -28,6 +29,8 @@ type MetricsProvider interface {
Agents(ctx context.Context) ([]store.AgentInfo, error)
ClusterMetadata(ctx context.Context) (store.ClusterMetadata, error)
NetworkTopology(ctx context.Context, opts store.NetworkTopologyOptions) ([]store.NetworkEdge, error)
+ GetNodeStats(ctx context.Context, clusterID, nodeName string, window time.Duration) (store.NodeStats, error)
+ GetNodePods(ctx context.Context, clusterID, nodeName string, window time.Duration) ([]store.PodMetrics, error)
}
// Handler wires HTTP requests to the VictoriaMetrics client.
@@ -73,6 +76,8 @@ func NewRouter(vmClient MetricsProvider, db *db.Store, st *store.Store, finopsEn
cost.Get("/namespaces/{name}", h.NamespaceDetail)
cost.Get("/nodes", h.Nodes)
cost.Get("/nodes/{name}", h.NodeDetail)
+ cost.Get("/nodes/{name}/stats", h.NodeStats)
+ cost.Get("/nodes/{name}/pods", h.NodePods)
cost.Get("/resources", h.Resources)
})
protected.Get("/agent", h.AgentStatus)
diff --git a/internal/store/pricing.go b/internal/store/pricing.go
index 3d4e060..6ddaaef 100644
--- a/internal/store/pricing.go
+++ b/internal/store/pricing.go
@@ -1,6 +1,11 @@
package store
-import "context"
+import (
+ "context"
+ "fmt"
+
+ "github.com/clustercost/clustercost-dashboard/internal/pricing"
+)
// Pricing constants
const (
@@ -10,51 +15,33 @@ const (
CostEgressInternal = 0.00 // Free
)
-// PricingProvider defines the interface for fetching node pricing.
-type PricingProvider interface {
- GetNodePrice(ctx context.Context, region, instanceType string) (float64, error)
-}
-
// PricingCatalog allows looking up node prices.
type PricingCatalog struct {
- // Map instance type to hourly price
- InstancePrices map[string]float64
- Provider PricingProvider
+ // No provider needed, we use static data from internal/pricing
}
-// NewPricingCatalog returns a catalog with some default mocked pricing.
-func NewPricingCatalog(provider PricingProvider) *PricingCatalog {
- return &PricingCatalog{
- InstancePrices: map[string]float64{
- "t3.medium": 0.0416,
- "t3.large": 0.0832,
- "m5.large": 0.096,
- "m5.xlarge": 0.192,
- "c5.large": 0.085,
- "r5.large": 0.126,
- "default": 0.05, // Fallback
- },
- Provider: provider,
- }
+// NewPricingCatalog returns a catalog.
+func NewPricingCatalog() *PricingCatalog {
+ return &PricingCatalog{}
}
// GetTotalNodePrice returns the total hourly cost of a node.
func (pc *PricingCatalog) GetTotalNodePrice(ctx context.Context, region, instanceType string) float64 {
- // Try Provider first
- if pc.Provider != nil && instanceType != "" && region != "" {
- price, err := pc.Provider.GetNodePrice(ctx, region, instanceType)
- if err == nil && price > 0 {
- pc.InstancePrices[instanceType] = price // Update cache
- return price
- }
+ // 1. Try Shared Static Data
+ key := fmt.Sprintf("%s|%s", region, instanceType)
+ if price, ok := pricing.InstancePrices[key]; ok {
+ return price
}
- // Fallback to local cache
- price, ok := pc.InstancePrices[instanceType]
- if !ok {
- price = pc.InstancePrices["default"]
+ // 2. Fallback to generic defaults if completely unknown
+ // check if we have a default for the instance type regardless of region (common for US-East-1 based defaults)
+ // (Optional optimization: try "us-east-1|instanceType" as fallback?)
+ fallbackKey := fmt.Sprintf("us-east-1|%s", instanceType)
+ if price, ok := pricing.InstancePrices[fallbackKey]; ok {
+ return price
}
- return price
+
+ return 0.05 // Ultimate fallback
}
// GetNodeResourcePrices calculates the cost per vCPU and per GB of RAM based on the instance type.
diff --git a/internal/store/pricing_test.go b/internal/store/pricing_test.go
index 7ab6916..630cdd7 100644
--- a/internal/store/pricing_test.go
+++ b/internal/store/pricing_test.go
@@ -6,7 +6,7 @@ import (
)
func TestPricingCatalog_GetNodeResourcePrices(t *testing.T) {
- pc := NewPricingCatalog(nil)
+ pc := NewPricingCatalog()
// Test case 1: m5.large (2 vCPU, 8GB RAM)
// Price: $0.096/hr
diff --git a/internal/store/store.go b/internal/store/store.go
index 9df7b58..cda9041 100644
--- a/internal/store/store.go
+++ b/internal/store/store.go
@@ -11,7 +11,6 @@ import (
"math"
"github.com/clustercost/clustercost-dashboard/internal/config"
- "github.com/clustercost/clustercost-dashboard/internal/pricing"
agentv1 "github.com/clustercost/clustercost-dashboard/internal/proto/agent/v1"
)
@@ -122,12 +121,21 @@ type NodeSummary struct {
InstanceType string `json:"instanceType,omitempty"`
Labels map[string]string `json:"labels"`
Taints []string `json:"taints"`
+ // Resource Requests (Allocated)
+ CPURequestedMilli int64 `json:"cpuRequestedMilli"`
+ CPULimitMilli int64 `json:"cpuLimitMilli"`
+ MemoryRequestedBytes int64 `json:"memoryRequestedBytes"`
+ MemoryLimitBytes int64 `json:"memoryLimitBytes"`
// Network (Host Level)
NetTxBytes int64 `json:"netTxBytes"`
NetRxBytes int64 `json:"netRxBytes"`
EgressPublicBytes int64 `json:"egressPublicBytes"`
EgressCrossAZBytes int64 `json:"egressCrossAZBytes"`
EgressInternalBytes int64 `json:"egressInternalBytes"`
+ // Historical / Window Data
+ ActiveHours float64 `json:"activeHours"` // Hours active in the selected window
+ ActiveRatio float64 `json:"activeRatio"` // 0.0 - 1.0
+ WindowCost float64 `json:"windowCost"` // Actual cost incurred in the window
}
// NodeListResponse wraps paginated node results.
@@ -271,6 +279,7 @@ type NodeFilter struct {
Search string
Limit int
Offset int
+ Window string // "24h", "7d", "30d"
}
// PodContext wraps a PodMetric with its location metadata.
@@ -282,6 +291,27 @@ type PodContext struct {
InstanceType string
}
+// NodeStats contains historical usage and cost analysis for a node.
+type NodeStats struct {
+ NodeName string `json:"nodeName"`
+ P95CPUUsagePercent float64 `json:"p95CpuUsagePercent"`
+ P95MemoryUsagePercent float64 `json:"p95MemoryUsagePercent"`
+ TotalMonthlyCost float64 `json:"totalMonthlyCost"`
+ RealUsageMonthlyCost float64 `json:"realUsageMonthlyCost"`
+ Window string `json:"window"`
+}
+
+// PodMetrics contains resource usage analysis for a single pod.
+type PodMetrics struct {
+ PodName string `json:"podName"`
+ Namespace string `json:"namespace"`
+ QoSClass string `json:"qosClass"`
+ CPURequestMilli int64 `json:"cpuRequestMilli"`
+ CPUP95Milli float64 `json:"cpuP95Milli"`
+ MemoryRequestBytes int64 `json:"memoryRequestBytes"`
+ MemoryP95Bytes float64 `json:"memoryP95Bytes"`
+}
+
// New creates a store seeded with agent configurations.
func New(cfgs []config.AgentConfig, recommendedAgentVersion string) *Store {
agentConfigs := make(map[string]config.AgentConfig, len(cfgs))
@@ -290,14 +320,13 @@ func New(cfgs []config.AgentConfig, recommendedAgentVersion string) *Store {
}
// Initialize Static Pricing Provider
- // Context is just placeholder for interface, static client doesn't need it
- pricingClient, _ := pricing.NewAWSClient(context.Background())
+ // We use the static map from internal/pricing/data.go, so no dynamic client needed.
return &Store{
agentConfigs: agentConfigs,
snapshots: make(map[string]*AgentSnapshot, len(cfgs)),
recommendedAgentVersion: recommendedAgentVersion,
- pricing: NewPricingCatalog(pricingClient),
+ pricing: NewPricingCatalog(),
}
}
@@ -972,6 +1001,28 @@ func (s *Store) aggregateNodesLocked() (map[string]*NodeSummary, error) {
}
haveData = true
+ // Pre-calculate Pod Limits/Requests per Node
+ nodeLimits := make(map[string]struct {
+ cpuReq, cpuLim, memReq, memLim int64
+ })
+
+ for _, p := range snap.Report.Pods {
+ nodeName := snap.Report.NodeName
+ if nodeName == "" {
+ continue
+ }
+ stats := nodeLimits[nodeName]
+ if p.Cpu != nil {
+ stats.cpuReq += safeInt64(p.Cpu.RequestMillicores)
+ stats.cpuLim += safeInt64(p.Cpu.LimitMillicores)
+ }
+ if p.Memory != nil {
+ stats.memReq += safeInt64(p.Memory.RequestBytes)
+ stats.memLim += safeInt64(p.Memory.LimitBytes)
+ }
+ nodeLimits[nodeName] = stats
+ }
+
// Iterate over all nodes reported by this agent
for _, n := range snap.Report.Nodes {
if n == nil || n.NodeName == "" {
@@ -979,6 +1030,11 @@ func (s *Store) aggregateNodesLocked() (map[string]*NodeSummary, error) {
}
name := n.NodeName
+ // Use aggregated values from pods if available, fallback to node metric if generic
+ // The Agent V2 NodeMetric.Requested... is arguably the same, but doesn't have Limits.
+ // We prioritize our calculated limits.
+ podStats := nodeLimits[name]
+
entry, ok := nodes[name]
if !ok {
entry = &NodeSummary{
@@ -988,7 +1044,20 @@ func (s *Store) aggregateNodesLocked() (map[string]*NodeSummary, error) {
InstanceType: "default", // placeholder
CPUAllocatableMilli: safeInt64(n.AllocatableCpuMillicores),
MemoryAllocatableBytes: safeInt64(n.AllocatableMemoryBytes),
+ CPURequestedMilli: safeInt64(n.RequestedCpuMillicores), // Fallback to agent metric
+ CPULimitMilli: podStats.cpuLim,
+ MemoryRequestedBytes: safeInt64(n.RequestedMemoryBytes), // Fallback to agent metric
+ MemoryLimitBytes: podStats.memLim,
+ IsUnderPressure: n.ThrottlingNs > 1_000_000,
+ }
+ // If agent metric is 0 (older agent?) use our aggregation for Requests too
+ if entry.CPURequestedMilli == 0 {
+ entry.CPURequestedMilli = podStats.cpuReq
}
+ if entry.MemoryRequestedBytes == 0 {
+ entry.MemoryRequestedBytes = podStats.memReq
+ }
+
nodes[name] = entry
}
diff --git a/internal/store/store_test.go b/internal/store/store_test.go
index 7702b7e..7911687 100644
--- a/internal/store/store_test.go
+++ b/internal/store/store_test.go
@@ -13,7 +13,7 @@ func newTestStore() *Store {
}
s := New(cfgs, "v1.0.0")
// Inject Mock Pricing
- s.pricing = NewPricingCatalog(&MockPricing{})
+ s.pricing = NewPricingCatalog()
return s
}
diff --git a/internal/vm/client.go b/internal/vm/client.go
index 91a7b8a..ecd5b7e 100644
--- a/internal/vm/client.go
+++ b/internal/vm/client.go
@@ -16,6 +16,7 @@ import (
"time"
"github.com/clustercost/clustercost-dashboard/internal/config"
+ "github.com/clustercost/clustercost-dashboard/internal/store"
)
// ErrNoData indicates that VictoriaMetrics returned no usable data.
@@ -42,6 +43,12 @@ type Client struct {
cacheTTL time.Duration
cacheMu sync.Mutex
cache map[string]cachedQuery
+ pricing *store.PricingCatalog
+}
+
+// SetPricingCatalog allows injecting the pricing catalog.
+func (c *Client) SetPricingCatalog(p *store.PricingCatalog) {
+ c.pricing = p
}
type cachedQuery struct {
@@ -376,6 +383,77 @@ func (c *Client) GetPodP95Usage(ctx context.Context, clusterID, namespace, podNa
return cpuCores, memoryBytes, nil
}
+// GetNodeStats calculates the average usage and real cost of a node over a time window.
+func (c *Client) GetNodeStats(ctx context.Context, clusterID, nodeName string, window time.Duration) (store.NodeStats, error) {
+ if nodeName == "" {
+ return store.NodeStats{}, fmt.Errorf("node name is required")
+ }
+ if window <= 0 {
+ window = 24 * time.Hour
+ }
+ windowStr := formatDuration(window)
+
+ labels := map[string]string{
+ "node": nodeName,
+ }
+ if clusterID != "" {
+ labels["cluster_id"] = clusterID
+ }
+
+ // 1. Get P95 Usage %
+ // quantile_over_time(0.95, clustercost_node_cpu_usage_percent{node="name"}[window])
+ cpuQuery := fmt.Sprintf("quantile_over_time(0.95, clustercost_node_cpu_usage_percent%s[%s])",
+ formatLabels(labels), windowStr)
+ memQuery := fmt.Sprintf("quantile_over_time(0.95, clustercost_node_memory_usage_percent%s[%s])",
+ formatLabels(labels), windowStr)
+
+ // 2. Get Average Hourly Cost (to account for potential spot price fluctuations or just stability)
+ costQuery := fmt.Sprintf("avg_over_time(clustercost_node_hourly_cost%s[%s])",
+ formatLabels(labels), windowStr)
+
+ cpuSamples, err := c.query(ctx, cpuQuery)
+ if err != nil {
+ return store.NodeStats{}, fmt.Errorf("query cpu stats: %w", err)
+ }
+ memSamples, err := c.query(ctx, memQuery)
+ if err != nil {
+ return store.NodeStats{}, fmt.Errorf("query mem stats: %w", err)
+ }
+ costSamples, err := c.query(ctx, costQuery)
+ if err != nil {
+ return store.NodeStats{}, fmt.Errorf("query cost stats: %w", err)
+ }
+
+ p95Cpu := 0.0
+ if len(cpuSamples) > 0 {
+ p95Cpu = cpuSamples[0].value
+ }
+ p95Mem := 0.0
+ if len(memSamples) > 0 {
+ p95Mem = memSamples[0].value
+ }
+ avgHourlyCost := 0.0
+ if len(costSamples) > 0 {
+ avgHourlyCost = costSamples[0].value
+ }
+
+ // Calculate Costs
+ totalMonthly := avgHourlyCost * hoursPerMonth // 720 hours
+ // "Real Usage" = (Total * 0.5 * Cpu%) + (Total * 0.5 * Mem%)
+ // Percentages are 0-100 in VM usually (based on ingestion code: `writeFloatSample(..., cpuPct, ...)` where `cpuPct` was * 100)
+
+ realUsageMonthly := (totalMonthly * 0.5 * (p95Cpu / 100.0)) + (totalMonthly * 0.5 * (p95Mem / 100.0))
+
+ return store.NodeStats{
+ NodeName: nodeName,
+ P95CPUUsagePercent: p95Cpu,
+ P95MemoryUsagePercent: p95Mem,
+ TotalMonthlyCost: totalMonthly,
+ RealUsageMonthlyCost: realUsageMonthly,
+ Window: windowStr,
+ }, nil
+}
+
func formatLabels(labels map[string]string) string {
if len(labels) == 0 {
return ""
@@ -400,3 +478,133 @@ func formatLabels(labels map[string]string) string {
b.WriteByte('}')
return b.String()
}
+
+// GetNodePods returns 24h P95 and Request metrics for all pods on a specific node.
+func (c *Client) GetNodePods(ctx context.Context, clusterID, nodeName string, window time.Duration) ([]store.PodMetrics, error) {
+ if nodeName == "" {
+ return nil, fmt.Errorf("node name is required")
+ }
+ if window <= 0 {
+ window = 24 * time.Hour
+ }
+ windowStr := formatDuration(window)
+
+ labels := map[string]string{
+ "node": nodeName,
+ }
+ if clusterID != "" {
+ labels["cluster_id"] = clusterID
+ }
+ labelStr := formatLabels(labels)
+
+ // We need 5 metrics per pod:
+ // 1. CPU Request (Max)
+ // 2. CPU Limit (Max) - to determine QoS
+ // 3. Mem Request (Max)
+ // 4. CPU Usage (P95)
+ // 5. Mem Usage (P95)
+
+ queries := map[string]string{
+ "cpu_req_max": fmt.Sprintf("max_over_time(clustercost_pod_cpu_request_millicores%s[%s])", labelStr, windowStr),
+ "cpu_lim_max": fmt.Sprintf("max_over_time(clustercost_pod_cpu_limit_millicores%s[%s])", labelStr, windowStr),
+ "mem_req_max": fmt.Sprintf("max_over_time(clustercost_pod_memory_request_bytes%s[%s])", labelStr, windowStr),
+ "cpu_add_p95": fmt.Sprintf("quantile_over_time(0.95, clustercost_pod_cpu_usage_milli%s[%s])", labelStr, windowStr),
+ "mem_add_p95": fmt.Sprintf("quantile_over_time(0.95, clustercost_pod_memory_rss_bytes%s[%s])", labelStr, windowStr),
+ }
+
+ // Helper struct to aggregate data
+ type podData struct {
+ Namespace string
+ PodName string
+ CPUReq float64
+ CPULim float64
+ MemReq float64
+ CPUP95 float64
+ MemP95 float64
+ }
+ podMap := make(map[string]*podData)
+
+ var wg sync.WaitGroup
+ var mu sync.Mutex
+ var firstErr error
+
+ for key, query := range queries {
+ wg.Add(1)
+ go func(k, q string) {
+ defer wg.Done()
+ samples, err := c.query(ctx, q)
+ if err != nil {
+ mu.Lock()
+ if firstErr == nil {
+ firstErr = err
+ }
+ mu.Unlock()
+ return
+ }
+ mu.Lock()
+ for _, s := range samples {
+ ns := s.labels["namespace"]
+ pod := s.labels["pod"]
+ if ns == "" || pod == "" {
+ continue
+ }
+ id := ns + "|" + pod
+ if _, exists := podMap[id]; !exists {
+ podMap[id] = &podData{Namespace: ns, PodName: pod}
+ }
+ p := podMap[id]
+
+ switch k {
+ case "cpu_req_max":
+ p.CPUReq = s.value
+ case "cpu_lim_max":
+ p.CPULim = s.value
+ case "mem_req_max":
+ p.MemReq = s.value
+ case "cpu_add_p95":
+ p.CPUP95 = s.value
+ case "mem_add_p95":
+ p.MemP95 = s.value
+ }
+ }
+ mu.Unlock()
+ }(key, query)
+ }
+ wg.Wait()
+
+ if firstErr != nil {
+ return nil, fmt.Errorf("failed to query pod metrics: %w", firstErr)
+ }
+
+ results := make([]store.PodMetrics, 0, len(podMap))
+ for _, p := range podMap {
+ // QoS Logic
+ qos := "Burstable"
+ if p.CPUReq == 0 && p.MemReq == 0 {
+ qos = "BestEffort"
+ } else if p.CPUReq == p.CPULim && p.CPULim > 0 {
+ qos = "Guaranteed" // Simplified, strictly checking CPU for now
+ }
+
+ results = append(results, store.PodMetrics{
+ PodName: p.PodName,
+ Namespace: p.Namespace,
+ QoSClass: qos,
+ CPURequestMilli: int64(p.CPUReq),
+ CPUP95Milli: p.CPUP95,
+ MemoryRequestBytes: int64(p.MemReq),
+ MemoryP95Bytes: p.MemP95,
+ })
+ }
+
+ // Sort by Waste Amount (heuristic: max diff)
+ sort.Slice(results, func(i, j int) bool {
+ // Just sorting by name for stability for now, frontend handles logic sort
+ if results[i].Namespace != results[j].Namespace {
+ return results[i].Namespace < results[j].Namespace
+ }
+ return results[i].PodName < results[j].PodName
+ })
+
+ return results, nil
+}
diff --git a/internal/vm/dashboard.go b/internal/vm/dashboard.go
index 34cc685..31996e0 100644
--- a/internal/vm/dashboard.go
+++ b/internal/vm/dashboard.go
@@ -131,7 +131,7 @@ func (c *Client) NamespaceDetail(ctx context.Context, name string) (store.Namesp
}
func (c *Client) NodeList(ctx context.Context, filter store.NodeFilter) (store.NodeListResponse, error) {
- nodes, ts, err := c.nodeMetrics(ctx, "")
+ nodes, ts, err := c.nodeMetrics(ctx, "", filter.Window)
if err != nil {
return store.NodeListResponse{}, err
}
@@ -169,7 +169,7 @@ func (c *Client) NodeList(ctx context.Context, filter store.NodeFilter) (store.N
}
func (c *Client) NodeDetail(ctx context.Context, name string) (store.NodeSummary, error) {
- nodes, _, err := c.nodeMetrics(ctx, name)
+ nodes, _, err := c.nodeMetrics(ctx, name, "")
if err != nil {
return store.NodeSummary{}, err
}
@@ -198,10 +198,8 @@ func (c *Client) Resources(ctx context.Context) (store.ResourcesPayload, error)
if err != nil && err != ErrNoData {
return store.ResourcesPayload{}, err
}
- nodeHourlyCost, _, err := c.scalarMetric(ctx, "clustercost_cluster_total_node_hourly_cost")
- if err != nil && err != ErrNoData {
- return store.ResourcesPayload{}, err
- }
+ // Node Hourly Cost is now fully calculated, no stored metric
+ nodeHourlyCost := 0.0
// Fetch Network Metrics
netTx, _, _ := c.scalarMetric(ctx, "clustercost_cluster_network_tx_bytes_total")
@@ -432,7 +430,7 @@ func (c *Client) AgentStatus(ctx context.Context) (store.AgentStatusPayload, err
}
nsTS := c.seriesTimestampSafe(ctx, "clustercost_namespace_hourly_cost")
- nodeTS := c.seriesTimestampSafe(ctx, "clustercost_node_hourly_cost")
+ nodeTS := c.seriesTimestampSafe(ctx, "clustercost_node_cpu_allocatable_milli")
resTS := c.seriesTimestampSafe(ctx, "clustercost_cluster_cpu_usage_milli_total")
datasets := store.AgentDatasetHealth{
@@ -678,36 +676,6 @@ func (c *Client) namespaceMetrics(ctx context.Context, environment, namespace st
}
}
- queryScalar := func(expr string) (float64, error) {
- samples, err := c.query(ctx, expr)
- if err != nil {
- return 0, err
- }
- if len(samples) == 0 {
- return 0, ErrNoData
- }
- return samples[0].value, nil
- }
-
- nodeCostExpr := fmt.Sprintf("sum(max by (node) (%s))", c.lookbackExpr("clustercost_node_hourly_cost", nil, clusterID))
- cpuAllocExpr := fmt.Sprintf("sum(max by (node) (%s))", c.lookbackExpr("clustercost_node_cpu_allocatable_milli", nil, clusterID))
- memAllocExpr := fmt.Sprintf("sum(max by (node) (%s))", c.lookbackExpr("clustercost_node_memory_allocatable_bytes", nil, clusterID))
-
- nodeCost, err := queryScalar(nodeCostExpr)
- if err == nil && nodeCost > 0 {
- cpuAllocMilli, errCPU := queryScalar(cpuAllocExpr)
- memAllocBytes, errMem := queryScalar(memAllocExpr)
- if errCPU == nil && errMem == nil && cpuAllocMilli > 0 && memAllocBytes > 0 {
- cpuPrice := (nodeCost * 0.5) / (cpuAllocMilli / 1000.0)
- memPrice := (nodeCost * 0.5) / (memAllocBytes / (1024.0 * 1024.0 * 1024.0))
- for _, entry := range out {
- cpuUsageCores := float64(entry.CPUUsageMilli) / 1000.0
- memUsageGB := float64(entry.MemoryUsageBytes) / (1024.0 * 1024.0 * 1024.0)
- entry.HourlyCost = (cpuUsageCores * cpuPrice) + (memUsageGB * memPrice)
- }
- }
- }
-
latest = c.seriesTimestampSafe(ctx, "clustercost_namespace_memory_rss_bytes_total")
type nodeAlloc struct {
@@ -755,7 +723,12 @@ func (c *Client) namespaceMetrics(ctx context.Context, environment, namespace st
})
}
- pricing := store.NewPricingCatalog(nil)
+ var pricing *store.PricingCatalog
+ if c.pricing != nil {
+ pricing = c.pricing
+ } else {
+ pricing = store.NewPricingCatalog()
+ }
totalNodeCost := 0.0
totalCpuCores := 0.0
totalMemGB := 0.0
@@ -785,7 +758,27 @@ func (c *Client) namespaceMetrics(ctx context.Context, environment, namespace st
return out, latest, nil
}
-func (c *Client) nodeMetrics(ctx context.Context, nodeName string) (map[string]*store.NodeSummary, time.Time, error) {
+func (c *Client) Nodes(ctx context.Context, window string) ([]store.NodeSummary, error) {
+ nodeMetrics, _, err := c.nodeMetrics(ctx, "", window)
+ if err != nil {
+ return nil, err
+ }
+
+ out := make([]store.NodeSummary, 0, len(nodeMetrics))
+ for _, n := range nodeMetrics {
+ n.Labels = nil // Optimization: potentially clear heavy labels if not needed
+ out = append(out, *n)
+ }
+
+ // Sort by Cost desc
+ sort.Slice(out, func(i, j int) bool {
+ return out[i].WindowCost > out[j].WindowCost
+ })
+
+ return out, nil
+}
+
+func (c *Client) nodeMetrics(ctx context.Context, nodeName, window string) (map[string]*store.NodeSummary, time.Time, error) {
clusterID := c.resolveClusterID(ctx)
ctx = WithClusterID(ctx, clusterID)
labels := map[string]string{}
@@ -793,53 +786,169 @@ func (c *Client) nodeMetrics(ctx context.Context, nodeName string) (map[string]*
labels["node"] = nodeName
}
+ // Parse Window
+ var windowDur time.Duration
+ var lookbackFunc string = "max_over_time" // Default for "current" view (snapshot-ish)
+ var windowStr string = c.lookback.String() // Default internal lookback
+
+ if window != "" {
+ d, err := time.ParseDuration(window)
+ if err == nil {
+ windowDur = d
+ windowStr = window
+ lookbackFunc = "avg_over_time"
+ }
+ } else {
+ // Assuming standard "current" view implies "1h" or just last scrape?
+ // For consistency with existing logic, we keep standard lookback but use max/last.
+ }
+
+ // If Windowed View: Primary source is agent_up to find ALL nodes active in window
+ // If Snapshot View: Primary source is usually node_info or just scraping metrics.
+ // We'll use the same multi-metric approach but adjust the aggregation.
+
+ out := make(map[string]*store.NodeSummary)
+
+ // Helper to safely assign to out map
+ getOrCreate := func(node string) *store.NodeSummary {
+ if node == "" {
+ return nil
+ }
+ if _, ok := out[node]; !ok {
+ out[node] = &store.NodeSummary{
+ NodeName: node,
+ Labels: map[string]string{},
+ Taints: []string{},
+ }
+ }
+ return out[node]
+ }
+
+ // 1. Availability / Active Time
+ // Query: avg_over_time(clustercost_agent_up[window])
+ // Value: 0.0 - 1.0 (fraction of time active)
+ availExpr := fmt.Sprintf("avg_over_time(clustercost_agent_up%s[%s])", formatLabels(c.scopedLabels(labels, clusterID)), windowStr)
+ availSamples, err := c.query(ctx, availExpr)
+ if err == nil {
+ for _, s := range availSamples {
+ node := s.labels["node"]
+ entry := getOrCreate(node)
+ if entry != nil {
+ entry.ActiveRatio = s.value
+ if windowDur > 0 {
+ entry.ActiveHours = s.value * windowDur.Hours()
+ } else {
+ // Default assumption if no window: 100% active (snapshot)
+ entry.ActiveRatio = 1.0
+ entry.ActiveHours = 24 * 30 // Monthly projection basis
+ }
+
+ // Extract Metadata from Agent Up
+ if entry.InstanceType == "" {
+ entry.InstanceType = valueOrDefault(s.labels["instance_type"],
+ valueOrDefault(s.labels["node_label_node_kubernetes_io_instance_type"],
+ s.labels["node_label_beta_kubernetes_io_instance_type"]))
+ }
+ if entry.Labels["topology_kubernetes_io_region"] == "" {
+ entry.Labels["topology_kubernetes_io_region"] = s.labels["cluster_region"]
+ }
+ }
+ }
+ }
+
+ // Helper to extract metadata from labels
+ updateMeta := func(entry *store.NodeSummary, labels map[string]string) {
+ if entry.InstanceType == "" {
+ entry.InstanceType = valueOrDefault(labels["instance_type"],
+ valueOrDefault(labels["node_label_node_kubernetes_io_instance_type"],
+ labels["node_label_beta_kubernetes_io_instance_type"]))
+ }
+ if entry.Labels["topology_kubernetes_io_region"] == "" {
+ entry.Labels["topology_kubernetes_io_region"] = labels["cluster_region"]
+ }
+ }
+
+ // 2. Metrics List
metrics := []struct {
- name string
- assign func(entry *store.NodeSummary, value float64, labels map[string]string)
+ name string
+ validLookback bool // if false, use standard lookback (e.g. for info that doesn't vary)
+ assign func(entry *store.NodeSummary, value float64, labels map[string]string)
}{
- {"clustercost_node_hourly_cost", func(e *store.NodeSummary, v float64, l map[string]string) {
- e.HourlyCost = v
- if e.InstanceType == "" {
- e.InstanceType = l["instance_type"]
- }
+ // hourly_cost metric removed as it's deprecated. Cost is calculated in post-processing.
+ {"clustercost_node_cpu_usage_percent", true, func(e *store.NodeSummary, v float64, _ map[string]string) { e.CPUUsagePercent = v }},
+ {"clustercost_node_memory_usage_percent", true, func(e *store.NodeSummary, v float64, _ map[string]string) { e.MemoryUsagePercent = v }},
+ {"clustercost_node_cpu_allocatable_milli", true, func(e *store.NodeSummary, v float64, l map[string]string) {
+ e.CPUAllocatableMilli = int64(v)
+ updateMeta(e, l)
}},
- {"clustercost_node_cpu_usage_percent", func(e *store.NodeSummary, v float64, _ map[string]string) { e.CPUUsagePercent = v }},
- {"clustercost_node_memory_usage_percent", func(e *store.NodeSummary, v float64, _ map[string]string) { e.MemoryUsagePercent = v }},
- {"clustercost_node_cpu_allocatable_milli", func(e *store.NodeSummary, v float64, _ map[string]string) { e.CPUAllocatableMilli = int64(v) }},
- {"clustercost_node_memory_allocatable_bytes", func(e *store.NodeSummary, v float64, _ map[string]string) { e.MemoryAllocatableBytes = int64(v) }},
- {"clustercost_node_pod_count", func(e *store.NodeSummary, v float64, _ map[string]string) { e.PodCount = int(v) }},
- {"clustercost_node_under_pressure", func(e *store.NodeSummary, v float64, _ map[string]string) { e.IsUnderPressure = v > 0.5 }},
+ {"clustercost_node_memory_allocatable_bytes", true, func(e *store.NodeSummary, v float64, l map[string]string) {
+ e.MemoryAllocatableBytes = int64(v)
+ updateMeta(e, l)
+ }},
+ {"clustercost_node_cpu_requested_milli", true, func(e *store.NodeSummary, v float64, l map[string]string) {
+ e.CPURequestedMilli = int64(v)
+ updateMeta(e, l)
+ }},
+ {"clustercost_node_memory_requested_bytes", true, func(e *store.NodeSummary, v float64, l map[string]string) {
+ e.MemoryRequestedBytes = int64(v)
+ updateMeta(e, l)
+ }},
+ {"clustercost_node_cpu_limit_milli", true, func(e *store.NodeSummary, v float64, _ map[string]string) { e.CPULimitMilli = int64(v) }},
+ {"clustercost_node_memory_limit_bytes", true, func(e *store.NodeSummary, v float64, _ map[string]string) { e.MemoryLimitBytes = int64(v) }},
}
- out := make(map[string]*store.NodeSummary)
for _, metric := range metrics {
by := "node"
- if metric.name == "clustercost_node_hourly_cost" {
- by = "node,instance_type"
+ // Preserve metadata labels in aggregation
+ if strings.Contains(metric.name, "requested") ||
+ strings.Contains(metric.name, "allocatable") {
+ by = "node,instance_type,node_label_node_kubernetes_io_instance_type,node_label_beta_kubernetes_io_instance_type,cluster_region,topology_kubernetes_io_region"
}
- expr := fmt.Sprintf("max by (%s) (%s)", by, c.lookbackExpr(metric.name, labels, clusterID))
- samples, err := c.query(ctx, expr)
+
+ // determine function
+ fn := lookbackFunc
+ // For cost, average over time gives the average hourly rate during that window.
+ // For usage %, average makes sense.
+ // For Requests/Limits/Allocatable, they might vary if node resized (rare) or replaced. Average is decent.
+
+ expr := fmt.Sprintf("%s(%s%s[%s])", fn, metric.name, formatLabels(c.scopedLabels(labels, clusterID)), windowStr)
+ // Need aggregation to preserve labels and unique by node
+ // max by (...) for snapshots, but avg by (...) for windows?
+ // Actually "avg by" works for all if we want the average stat.
+ aggOp := "avg"
+ if !strings.Contains(metric.name, "percent") && !strings.Contains(metric.name, "cost") {
+ // For allocatable/requests, max is often safer to see peak reservation?
+ // But for "Ghost Cost", average request is better?
+ // Let's stick to Average for Historical Analysis.
+ aggOp = "avg"
+ }
+
+ fullExpr := fmt.Sprintf("%s by (%s) (%s)", aggOp, by, expr)
+
+ samples, err := c.query(ctx, fullExpr)
if err != nil {
- return nil, time.Time{}, err
+ continue // Skip failing metrics rather than crash whole request
}
+
for _, sample := range samples {
node := sample.labels["node"]
- if node == "" {
- continue
- }
- entry := out[node]
- if entry == nil {
- entry = &store.NodeSummary{
- NodeName: node,
- Labels: map[string]string{},
- Taints: []string{},
- }
- out[node] = entry
+ entry := getOrCreate(node)
+ if entry != nil {
+ metric.assign(entry, sample.value, sample.labels)
}
- metric.assign(entry, sample.value, sample.labels)
}
}
+ // 3. Post-Processing & Cost Backfill
+ var pricing *store.PricingCatalog
+ if c.pricing != nil {
+ pricing = c.pricing
+ } else {
+ // NewPricingCatalog now takes 0 args (static)
+ pricing = store.NewPricingCatalog()
+ }
+
+ // Fetch node status
statusSamples, err := c.seriesTimestamp(ctx, "clustercost_node_status", labels)
if err != nil && err != ErrNoData {
return nil, time.Time{}, err
@@ -851,7 +960,56 @@ func (c *Client) nodeMetrics(ctx context.Context, nodeName string) (map[string]*
}
}
- latest := c.seriesTimestampSafe(ctx, "clustercost_node_hourly_cost")
+ for _, node := range out {
+ // Extract region from name fallback
+ if node.Labels["topology_kubernetes_io_region"] == "" {
+ re := regexp.MustCompile(`\.(us-[a-z]+-\d+)\.`)
+ matches := re.FindStringSubmatch(node.NodeName)
+ if len(matches) > 1 {
+ node.Labels["topology_kubernetes_io_region"] = matches[1]
+ }
+ }
+
+ region := node.Labels["topology_kubernetes_io_region"]
+ if region == "" {
+ // Fallback: Default region if unknown, often us-east-1 or inferred from node name
+ if strings.Contains(node.NodeName, "us-east-1") {
+ region = "us-east-1"
+ } else if strings.Contains(node.NodeName, "us-west-2") {
+ region = "us-west-2"
+ } else if strings.Contains(node.NodeName, "eu-west-1") {
+ region = "eu-west-1"
+ } else {
+ region = "us-east-1" // ultimate fallback
+ }
+ }
+
+ // Update region in labels so it persists
+ if node.Labels == nil {
+ node.Labels = map[string]string{}
+ }
+ node.Labels["topology_kubernetes_io_region"] = region
+
+ instanceType := node.InstanceType
+ if instanceType == "" {
+ instanceType = "m5.large" // Default fallback to avoid 0 cost
+ }
+
+ if node.HourlyCost == 0 {
+ node.HourlyCost = pricing.GetTotalNodePrice(context.Background(), region, instanceType)
+ }
+
+ // CALCULATE WINDOW COST / TOTAL COST
+ if windowDur > 0 {
+ // Real Cost = HourlyRate * ActiveHours
+ node.WindowCost = node.HourlyCost * node.ActiveHours
+ } else {
+ // Snapshot projection (Monthly)
+ node.WindowCost = node.HourlyCost * 730
+ }
+ }
+
+ latest := c.seriesTimestampSafe(ctx, "clustercost_node_cpu_allocatable_milli")
return out, latest, nil
}
@@ -1026,7 +1184,7 @@ func pickLatestStatus(samples []sample) map[string]string {
func (c *Client) nodeNames(ctx context.Context) []string {
clusterID := c.resolveClusterID(ctx)
- expr := fmt.Sprintf("max by (node) (%s)", c.lookbackExpr("clustercost_node_hourly_cost", nil, clusterID))
+ expr := fmt.Sprintf("max by (node) (%s)", c.lookbackExpr("clustercost_node_cpu_allocatable_milli", nil, clusterID))
samples, err := c.query(ctx, expr)
if err != nil {
return nil
diff --git a/internal/vm/ingestor.go b/internal/vm/ingestor.go
index 5ba8fee..d2aab8f 100644
--- a/internal/vm/ingestor.go
+++ b/internal/vm/ingestor.go
@@ -325,7 +325,7 @@ func (i *Ingestor) appendMetricsReport(buf, labelBuf *bytes.Buffer, scratch []by
}
// map[namespace]*nsAgg
nsMap := make(map[string]*nsAgg)
- pricing := store.NewPricingCatalog(nil)
+ pricing := store.NewPricingCatalog()
region := req.Region
if region == "" {
region = req.AvailabilityZone
@@ -534,6 +534,14 @@ func (i *Ingestor) appendMetricsReport(buf, labelBuf *bytes.Buffer, scratch []by
writeFloatSample(buf, scratch, "clustercost_node_memory_usage_percent", nodeLabelsBlob, memPct, tsMillis)
}
+ // Calculate Node Hourly Cost
+ // We use Capacity because you pay for the whole node, not just allocatable.
+ nodeCpuCores := float64(node.CapacityCpuMillicores) / 1000.0
+ nodeMemGB := float64(node.CapacityMemoryBytes) / (1024 * 1024 * 1024)
+ nodeHourlyCost := (nodeCpuCores * cpuPrice) + (nodeMemGB * memPrice)
+
+ writeFloatSample(buf, scratch, "clustercost_node_hourly_cost", nodeLabelsBlob, nodeHourlyCost, tsMillis)
+
// Node Network Metrics (Host Traffic)
if node.Network != nil {
nodeTx := safeInt64(node.Network.BytesSent)
diff --git a/web/package-lock.json b/web/package-lock.json
index 227b0d7..39dceb8 100644
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -14,6 +14,7 @@
"@radix-ui/react-select": "^2.2.6",
"@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-tabs": "^1.1.13",
+ "@radix-ui/react-tooltip": "^1.2.8",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^0.298.0",
@@ -35,6 +36,7 @@
"@types/react-dom": "^18.2.7",
"@vitejs/plugin-react": "^4.2.1",
"autoprefixer": "^10.4.16",
+ "baseline-browser-mapping": "^2.9.15",
"jsdom": "^24.0.0",
"postcss": "^8.4.31",
"tailwindcss": "^3.4.14",
@@ -1709,6 +1711,58 @@
}
}
},
+ "node_modules/@radix-ui/react-tooltip": {
+ "version": "1.2.8",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz",
+ "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==",
+ "license": "MIT",
+ "dependencies": {
+ "@radix-ui/primitive": "1.1.3",
+ "@radix-ui/react-compose-refs": "1.1.2",
+ "@radix-ui/react-context": "1.1.2",
+ "@radix-ui/react-dismissable-layer": "1.1.11",
+ "@radix-ui/react-id": "1.1.1",
+ "@radix-ui/react-popper": "1.2.8",
+ "@radix-ui/react-portal": "1.1.9",
+ "@radix-ui/react-presence": "1.1.5",
+ "@radix-ui/react-primitive": "2.1.3",
+ "@radix-ui/react-slot": "1.2.3",
+ "@radix-ui/react-use-controllable-state": "1.2.2",
+ "@radix-ui/react-visually-hidden": "1.2.3"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "@types/react-dom": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+ "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ },
+ "@types/react-dom": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": {
+ "version": "1.2.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+ "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+ "license": "MIT",
+ "dependencies": {
+ "@radix-ui/react-compose-refs": "1.1.2"
+ },
+ "peerDependencies": {
+ "@types/react": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ }
+ }
+ },
"node_modules/@radix-ui/react-use-callback-ref": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz",
@@ -3047,9 +3101,9 @@
"license": "MIT"
},
"node_modules/baseline-browser-mapping": {
- "version": "2.8.28",
- "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.28.tgz",
- "integrity": "sha512-gYjt7OIqdM0PcttNYP2aVrr2G0bMALkBaoehD4BuRGjAOtipg0b6wHg1yNL+s5zSnLZZrGHOw4IrND8CD+3oIQ==",
+ "version": "2.9.15",
+ "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.15.tgz",
+ "integrity": "sha512-kX8h7K2srmDyYnXRIppo4AH/wYgzWVCs+eKr3RusRSQ5PvRYoEFmR/I0PbdTjKFAoKqp5+kbxnNTFO9jOfSVJg==",
"dev": true,
"license": "Apache-2.0",
"bin": {
diff --git a/web/package.json b/web/package.json
index 5daf043..9ee6cae 100644
--- a/web/package.json
+++ b/web/package.json
@@ -17,13 +17,14 @@
"@radix-ui/react-select": "^2.2.6",
"@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-tabs": "^1.1.13",
+ "@radix-ui/react-tooltip": "^1.2.8",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^0.298.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
- "reactflow": "^11.11.4",
"react-router-dom": "^6.20.0",
+ "reactflow": "^11.11.4",
"recharts": "^2.8.0",
"shadcn-ui": "^0.9.5",
"tailwind-merge": "^1.14.0",
@@ -38,6 +39,7 @@
"@types/react-dom": "^18.2.7",
"@vitejs/plugin-react": "^4.2.1",
"autoprefixer": "^10.4.16",
+ "baseline-browser-mapping": "^2.9.15",
"jsdom": "^24.0.0",
"postcss": "^8.4.31",
"tailwindcss": "^3.4.14",
diff --git a/web/src/components/common/MetricCard.tsx b/web/src/components/common/MetricCard.tsx
new file mode 100644
index 0000000..83c7e7c
--- /dev/null
+++ b/web/src/components/common/MetricCard.tsx
@@ -0,0 +1,39 @@
+import { Card, CardContent, CardHeader, CardTitle } from "../ui/card";
+import { cn } from "../../lib/utils";
+
+interface MetricCardProps {
+ title: string;
+ value: string;
+ subtext?: string;
+ trend?: string;
+ trendUp?: boolean;
+ className?: string;
+ valueClassName?: string;
+}
+
+export function MetricCard({ title, value, subtext, trend, trendUp, className, valueClassName }: MetricCardProps) {
+ return (
+
+ {isHighRisk ? "Stability Risk: Bursting" : isOptimized ? "Perfectly Rightsized" : "Efficiency Gap Detected"} +
++ {isHighRisk + ? "Operating above guaranteed limits." + : isOptimized + ? "Balanced resource utilization." + : "Resources reserved but unused."} +
+Monthly cost
-{formatCurrency(node.monthlyCost)}
-{formatCurrency(node.hourlyCost, { maximumFractionDigits: 2 })}/hr
-Monthly Cost
+{formatCurrency(node.monthlyCost)}
Potential Savings
++ {stats ? formatCurrency(stats.totalMonthlyCost - stats.realUsageMonthlyCost) : "..."} +
We couldn’t find any nodes. Once data arrives it will show up here.
- -See how much each node costs and how full it is.
+Real-time analysis based on actual uptime.
{formatCurrency(summary.totalMonthly)}
-Monthly (hourly x 30 days)
-{formatPercentage(summary.avgCpu, { fractionDigits: 0 })}
- ++ Actual cost based on {getWindowLabel(timeWindow)} uptime +
{formatPercentage(summary.avgMem, { fractionDigits: 0 })}
- ++ Money burned on unused capacity +
- {summary.ready} Ready · {summary.issues} With issues -
-Issues = NotReady or under pressure
-Sorted by monthly cost
-+ Conservative achievable reduction +
+Looks good, no obvious wasted nodes right now.
- ) : ( - optimizationCandidates.map((node) => ( -