From dd5c397318ca8f770d630cad9d7942ae02dd4eb1 Mon Sep 17 00:00:00 2001 From: Zach Olinske Date: Sun, 15 Feb 2026 11:43:39 +0100 Subject: [PATCH 1/5] feat: Add multi-cloud FOCUS test data generator for FinOps Hub Add Generate-MultiCloudTestData.ps1 that generates synthetic, multi-cloud FOCUS-compliant cost data (v1.0-1.3) for testing FinOps Hub deployments. Supports Azure, AWS, GCP, and DataCenter providers with realistic data including commitment discounts, Azure Hybrid Benefit, spot pricing, marketplace purchases, tag coverage variation, and budget scaling. Generates up to 500K+ rows with Parquet/CSV output and optional Azure Storage upload with ADF trigger management. --- .../test/Generate-MultiCloudTestData.ps1 | 1432 +++++++++++++++++ 1 file changed, 1432 insertions(+) create mode 100644 src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 diff --git a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 new file mode 100644 index 000000000..da008cf88 --- /dev/null +++ b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 @@ -0,0 +1,1432 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +<# +.SYNOPSIS + Generates multi-cloud FOCUS-compliant test data for FinOps Hub validation. + +.DESCRIPTION + This script generates synthetic cost data in FOCUS 1.0-1.3 format for: + - Azure (Cost Management Managed Exports simulation) + - AWS (Data Exports / CUR FOCUS format) + - GCP (BigQuery FOCUS export simulation) + - Data Center (On-premises infrastructure) + + The generated data can be uploaded to Azure Storage for FinOps Hub ingestion testing. + + Features: + - ALL columns referenced by FinOps Hub dashboard KQL queries + - Correct PricingCategory values: Standard, Dynamic, Committed + - Full Azure Hybrid Benefit simulation with x_SkuLicense* columns + - Commitment discounts with x_SkuOrderId, x_SkuTerm linkage + - Commitment Purchase rows for invoicing page + - CPU architecture in x_SkuMeterName (Intel/AMD/Arm64 patterns) + - x_EffectiveUnitPrice, x_BilledUnitPrice for discount analysis + - x_SkuDescription, x_SkuInstanceType for SKU cost analysis + - x_OnDemandCost, x_OnDemandUnitPrice for savings calculations + - Tag coverage variation (~20% untagged for maturity scorecard) + - Data quality anomaly rows for validation page + - Negotiated discount rows (ListCost > ContractedCost) + - Persistent resources across days (realistic trending) + - Budget scaling to target total cost + +.PARAMETER OutputPath + Directory to save generated files. Default: ./test-data + +.PARAMETER CloudProvider + Which cloud provider data to generate. Options: Azure, AWS, GCP, DataCenter, All + Default: All + +.PARAMETER MonthsOfData + Number of months of historical data to generate, ending at today. + Default: 6 (generates 6 months ending today) + +.PARAMETER StartDate + Start date for generated data. Overrides MonthsOfData if specified. + +.PARAMETER EndDate + End date for generated data. Default: Today + +.PARAMETER TotalRowTarget + Target total rows across all providers and days. Default: 500000 + Rows are distributed: ~60% Azure, ~20% AWS, ~15% GCP, ~5% DataCenter + +.PARAMETER TotalBudget + Target total cost in USD for all generated data. Default: 500000 ($500K) + Costs are scaled proportionally to achieve this target. + +.PARAMETER FocusVersion + FOCUS specification version. Options: 1.0, 1.1, 1.2, 1.3 + Default: 1.3 + +.PARAMETER OutputFormat + Output file format. Options: Parquet, CSV, Both + Default: Parquet + +.PARAMETER StorageAccountName + Azure Storage account name for upload. + +.PARAMETER ResourceGroupName + Resource group containing the storage account (required for key-based auth). + +.PARAMETER AdfName + Azure Data Factory name for starting triggers. + +.PARAMETER Upload + Upload generated files to Azure Storage. + +.PARAMETER StartTriggers + Start ADF triggers before upload so BlobCreated events are captured. + +.EXAMPLE + .\Generate-MultiCloudTestData.ps1 + # Generates 6 months of data for all providers, 500K rows, $500K total budget + +.EXAMPLE + .\Generate-MultiCloudTestData.ps1 -MonthsOfData 3 -TotalRowTarget 100000 -TotalBudget 50000 + # Generates 3 months of data, 100K rows, $50K total budget + +.EXAMPLE + .\Generate-MultiCloudTestData.ps1 -Upload -StorageAccountName "stfinopshub" -ResourceGroupName "rg-finopshub" -AdfName "adf-finopshub" -StartTriggers + # Generates data, ensures ADF triggers are running, then uploads to trigger processing + +.NOTES + FOCUS Specification Reference: https://focus.finops.org/focus-specification/v1-3/ + + Prerequisites: + - Python 3 with pandas and pyarrow for Parquet output + - Azure CLI for upload functionality (az storage, az datafactory) +#> + +[CmdletBinding()] +param( + [string]$OutputPath = "./test-data", + + [ValidateSet("Azure", "AWS", "GCP", "DataCenter", "All")] + [string]$CloudProvider = "All", + + [int]$MonthsOfData = 6, + + [datetime]$StartDate, + + [datetime]$EndDate = (Get-Date), + + [int]$TotalRowTarget = 500000, + + [decimal]$TotalBudget = 500000, + + [ValidateSet("1.0", "1.1", "1.2", "1.3")] + [string]$FocusVersion = "1.3", + + [ValidateSet("Parquet", "CSV", "Both")] + [string]$OutputFormat = "Parquet", + + [string]$StorageAccountName, + + [string]$ResourceGroupName, + + [string]$AdfName, + + [switch]$Upload, + + [switch]$StartTriggers +) + +# Calculate StartDate from MonthsOfData if not explicitly provided +if (-not $PSBoundParameters.ContainsKey('StartDate')) { + $StartDate = (Get-Date -Day 1).AddMonths(-$MonthsOfData + 1) +} + +# Ensure EndDate is today max +if ($EndDate -gt (Get-Date)) { + $EndDate = Get-Date +} + +# ============================================================================ +# Provider Configurations +# ============================================================================ + +$ProviderConfigs = @{ + Azure = @{ + ServiceProviderName = "Microsoft" + InvoiceIssuerName = "Microsoft" + HostProviderName = "Microsoft" + BillingAccountType = "Enterprise Agreement" + SubAccountType = "Subscription" + BillingCurrency = "USD" + BillingAccountAgreement = "Microsoft Customer Agreement" + Regions = @( + @{ Id = "eastus"; Name = "East US" }, + @{ Id = "westus2"; Name = "West US 2" }, + @{ Id = "westeurope"; Name = "West Europe" }, + @{ Id = "northeurope"; Name = "North Europe" }, + @{ Id = "southeastasia"; Name = "Southeast Asia" } + ) + Services = @( + @{ Name = "Virtual Machines"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 30; CostMin = 50; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Azure Kubernetes Service"; Category = "Compute"; Subcategory = "Containers"; Weight = 15; CostMin = 100; CostMax = 3000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Azure SQL Database"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 12; CostMin = 30; CostMax = 800; PricingUnit = "DTU-Hours"; ConsumedUnit = "DTU Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 DTU-Hour" }, + @{ Name = "Storage Accounts"; Category = "Storage"; Subcategory = "General Purpose v2"; Weight = 10; CostMin = 5; CostMax = 300; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB/Month" }, + @{ Name = "Azure Cosmos DB"; Category = "Databases"; Subcategory = "NoSQL Databases"; Weight = 8; CostMin = 20; CostMax = 500; PricingUnit = "RU/s"; ConsumedUnit = "Request Units"; PricingBlockSize = 100; PricingUnitDescription = "100 RU/s" }, + @{ Name = "Azure Data Explorer"; Category = "Analytics"; Subcategory = "Data Analytics"; Weight = 7; CostMin = 100; CostMax = 2500; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Azure App Service"; Category = "Compute"; Subcategory = "App Services"; Weight = 5; CostMin = 10; CostMax = 200; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Azure Functions"; Category = "Compute"; Subcategory = "Serverless Compute"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Executions"; ConsumedUnit = "1M Executions"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Executions" }, + @{ Name = "Azure Key Vault"; Category = "Security"; Subcategory = "Key Management"; Weight = 2; CostMin = 0.50; CostMax = 20; PricingUnit = "Operations"; ConsumedUnit = "10K Operations"; PricingBlockSize = 10000; PricingUnitDescription = "10,000 Operations" }, + @{ Name = "Bandwidth"; Category = "Networking"; Subcategory = "Data Transfer"; Weight = 5; CostMin = 1; CostMax = 100; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, + @{ Name = "Marketplace - 3rd Party"; Category = "Compute"; Subcategory = "Marketplace"; Weight = 3; CostMin = 50; CostMax = 500; PricingUnit = "Units"; ConsumedUnit = "Units"; PricingBlockSize = 1; PricingUnitDescription = "1 Unit"; IsMarketplace = $true } + ) + ResourceTypes = @("microsoft.compute/virtualmachines", "microsoft.storage/storageaccounts", "microsoft.sql/servers", + "microsoft.containerservice/managedclusters", "microsoft.web/sites", "microsoft.keyvault/vaults", + "microsoft.kusto/clusters", "microsoft.documentdb/databaseaccounts", "microsoft.network/virtualnetworks") + VmSkus = @( + # Intel-based + @{ InstanceType = "Standard_D4s_v5"; Cores = 4; MemoryGB = 16; MeterName = "D4s v5"; Description = "Standard_D4s_v5 - Intel Ice Lake, 4 vCPUs, 16 GiB RAM" }, + @{ InstanceType = "Standard_D8s_v5"; Cores = 8; MemoryGB = 32; MeterName = "D8s v5"; Description = "Standard_D8s_v5 - Intel Ice Lake, 8 vCPUs, 32 GiB RAM" }, + @{ InstanceType = "Standard_E4s_v5"; Cores = 4; MemoryGB = 32; MeterName = "E4s v5"; Description = "Standard_E4s_v5 - Intel Ice Lake, 4 vCPUs, 32 GiB RAM" }, + @{ InstanceType = "Standard_E8s_v5"; Cores = 8; MemoryGB = 64; MeterName = "E8s v5"; Description = "Standard_E8s_v5 - Intel Ice Lake, 8 vCPUs, 64 GiB RAM" }, + @{ InstanceType = "Standard_F4s_v2"; Cores = 4; MemoryGB = 8; MeterName = "F4s v2"; Description = "Standard_F4s_v2 - Intel Cascade Lake, 4 vCPUs, 8 GiB RAM" }, + @{ InstanceType = "Standard_B2s"; Cores = 2; MemoryGB = 4; MeterName = "B2s"; Description = "Standard_B2s - Intel Broadwell, 2 vCPUs, 4 GiB RAM" }, + # AMD-based + @{ InstanceType = "Standard_D4as_v5"; Cores = 4; MemoryGB = 16; MeterName = "D4as v5"; Description = "Standard_D4as_v5 - AMD EPYC, 4 vCPUs, 16 GiB RAM" }, + @{ InstanceType = "Standard_D8as_v5"; Cores = 8; MemoryGB = 32; MeterName = "D8as v5"; Description = "Standard_D8as_v5 - AMD EPYC, 8 vCPUs, 32 GiB RAM" }, + @{ InstanceType = "Standard_E4as_v5"; Cores = 4; MemoryGB = 32; MeterName = "E4as v5"; Description = "Standard_E4as_v5 - AMD EPYC, 4 vCPUs, 32 GiB RAM" }, + @{ InstanceType = "Standard_L8as_v3"; Cores = 8; MemoryGB = 64; MeterName = "L8as v3"; Description = "Standard_L8as_v3 - AMD EPYC, 8 vCPUs, 64 GiB RAM" }, + # Arm64-based (Cobalt/Ampere) + @{ InstanceType = "Standard_D4ps_v5"; Cores = 4; MemoryGB = 16; MeterName = "D4ps v5"; Description = "Standard_D4ps_v5 - Arm64 Cobalt, 4 vCPUs, 16 GiB RAM" }, + @{ InstanceType = "Standard_D8ps_v5"; Cores = 8; MemoryGB = 32; MeterName = "D8ps v5"; Description = "Standard_D8ps_v5 - Arm64 Cobalt, 8 vCPUs, 32 GiB RAM" }, + @{ InstanceType = "Standard_E4ps_v5"; Cores = 4; MemoryGB = 32; MeterName = "E4ps v5"; Description = "Standard_E4ps_v5 - Arm64 Cobalt, 4 vCPUs, 32 GiB RAM" } + ) + } + AWS = @{ + ServiceProviderName = "Amazon Web Services" + InvoiceIssuerName = "Amazon Web Services" + HostProviderName = "Amazon Web Services" + BillingAccountType = "Management Account" + SubAccountType = "Member Account" + BillingCurrency = "USD" + BillingAccountAgreement = "AWS Customer Agreement" + Regions = @( + @{ Id = "us-east-1"; Name = "US East (N. Virginia)" }, + @{ Id = "us-west-2"; Name = "US West (Oregon)" }, + @{ Id = "eu-west-1"; Name = "Europe (Ireland)" }, + @{ Id = "ap-southeast-1"; Name = "Asia Pacific (Singapore)" } + ) + Services = @( + @{ Name = "Amazon EC2"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 35; CostMin = 50; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Amazon EKS"; Category = "Compute"; Subcategory = "Containers"; Weight = 18; CostMin = 100; CostMax = 2500; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Amazon RDS"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 12; CostMin = 30; CostMax = 800; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Amazon S3"; Category = "Storage"; Subcategory = "Object Storage"; Weight = 12; CostMin = 5; CostMax = 300; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, + @{ Name = "Amazon Redshift"; Category = "Analytics"; Subcategory = "Data Warehouses"; Weight = 8; CostMin = 50; CostMax = 1000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Amazon DynamoDB"; Category = "Databases"; Subcategory = "NoSQL Databases"; Weight = 6; CostMin = 10; CostMax = 400; PricingUnit = "RCUs"; ConsumedUnit = "Read Capacity Units"; PricingBlockSize = 1; PricingUnitDescription = "1 Read Capacity Unit" }, + @{ Name = "Amazon CloudFront"; Category = "Networking"; Subcategory = "Content Delivery"; Weight = 4; CostMin = 5; CostMax = 150; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, + @{ Name = "AWS Lambda"; Category = "Compute"; Subcategory = "Serverless Compute"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Requests"; ConsumedUnit = "1M Requests"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Requests" }, + @{ Name = "Amazon SQS"; Category = "Integration"; Subcategory = "Messaging"; Weight = 2; CostMin = 0.50; CostMax = 20; PricingUnit = "Requests"; ConsumedUnit = "1M Requests"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Requests" } + ) + ResourceTypes = @("AWS::EC2::Instance", "AWS::S3::Bucket", "AWS::RDS::DBInstance", "AWS::EKS::Cluster", "AWS::DynamoDB::Table", "AWS::Lambda::Function") + } + GCP = @{ + ServiceProviderName = "Google Cloud" + InvoiceIssuerName = "Google Cloud" + HostProviderName = "Google Cloud" + BillingAccountType = "Billing Account" + SubAccountType = "Project" + BillingCurrency = "USD" + BillingAccountAgreement = "Google Cloud Agreement" + Regions = @( + @{ Id = "us-central1"; Name = "Iowa" }, + @{ Id = "us-east1"; Name = "South Carolina" }, + @{ Id = "europe-west1"; Name = "Belgium" }, + @{ Id = "asia-east1"; Name = "Taiwan" } + ) + Services = @( + @{ Name = "Compute Engine"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 35; CostMin = 50; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Google Kubernetes Engine"; Category = "Compute"; Subcategory = "Containers"; Weight = 20; CostMin = 100; CostMax = 2500; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Cloud SQL"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 12; CostMin = 30; CostMax = 700; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Cloud Storage"; Category = "Storage"; Subcategory = "Object Storage"; Weight = 12; CostMin = 5; CostMax = 250; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, + @{ Name = "BigQuery"; Category = "Analytics"; Subcategory = "Data Warehouses"; Weight = 10; CostMin = 20; CostMax = 800; PricingUnit = "TB Scanned"; ConsumedUnit = "TB"; PricingBlockSize = 1; PricingUnitDescription = "1 TB Scanned" }, + @{ Name = "Cloud Spanner"; Category = "Databases"; Subcategory = "Distributed Databases"; Weight = 5; CostMin = 50; CostMax = 500; PricingUnit = "Node-Hours"; ConsumedUnit = "Node Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Node-Hour" }, + @{ Name = "Cloud Run"; Category = "Compute"; Subcategory = "Serverless Containers"; Weight = 3; CostMin = 5; CostMax = 100; PricingUnit = "vCPU-Seconds"; ConsumedUnit = "vCPU Seconds"; PricingBlockSize = 1; PricingUnitDescription = "1 vCPU-Second" }, + @{ Name = "Cloud Functions"; Category = "Compute"; Subcategory = "Serverless Compute"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Invocations"; ConsumedUnit = "1M Invocations"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Invocations" } + ) + ResourceTypes = @("compute.googleapis.com/Instance", "storage.googleapis.com/Bucket", "sql.googleapis.com/Instance", "container.googleapis.com/Cluster", "bigquery.googleapis.com/Dataset") + } + DataCenter = @{ + ServiceProviderName = "Internal IT" + InvoiceIssuerName = "Internal IT" + HostProviderName = "On-Premises" + BillingAccountType = "Cost Center" + SubAccountType = "Business Unit" + BillingCurrency = "USD" + BillingAccountAgreement = "Internal SLA" + Regions = @( + @{ Id = "dc-us-east"; Name = "US East Data Center" }, + @{ Id = "dc-eu-west"; Name = "EU West Data Center" }, + @{ Id = "dc-apac"; Name = "APAC Data Center" } + ) + Services = @( + @{ Name = "Physical Servers"; Category = "Compute"; Subcategory = "Bare Metal"; Weight = 30; CostMin = 200; CostMax = 5000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "VMware vSphere"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 25; CostMin = 100; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Oracle Database"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 15; CostMin = 500; CostMax = 10000; PricingUnit = "Processor Licenses"; ConsumedUnit = "Processor Licenses"; PricingBlockSize = 1; PricingUnitDescription = "1 Processor License" }, + @{ Name = "SAN Storage"; Category = "Storage"; Subcategory = "Block Storage"; Weight = 12; CostMin = 50; CostMax = 1500; PricingUnit = "TB"; ConsumedUnit = "TB"; PricingBlockSize = 1; PricingUnitDescription = "1 TB" }, + @{ Name = "Network Infrastructure"; Category = "Networking"; Subcategory = "Network Infrastructure"; Weight = 10; CostMin = 20; CostMax = 500; PricingUnit = "Ports"; ConsumedUnit = "Ports"; PricingBlockSize = 1; PricingUnitDescription = "1 Port" }, + @{ Name = "Facility Costs"; Category = "Other"; Subcategory = "Other"; Weight = 8; CostMin = 100; CostMax = 800; PricingUnit = "kWh"; ConsumedUnit = "kWh"; PricingBlockSize = 1; PricingUnitDescription = "1 kWh" } + ) + ResourceTypes = @("server/physical", "storage/san", "database/oracle", "virtualization/vmware") + } +} + +# ============================================================================ +# Helper Functions +# ============================================================================ + +function Get-RandomDecimal { + param( + [decimal]$Min = 0.01, + [decimal]$Max = 100.00 + ) + return [math]::Round($Min + (Get-Random -Maximum ([int](($Max - $Min) * 100 + 1))) / 100, 10) +} + +function Get-RandomElement { + param([array]$Array) + return $Array[(Get-Random -Maximum $Array.Count)] +} + +function Get-WeightedRandomService { + param([array]$Services) + + $totalWeight = ($Services | ForEach-Object { + if ($_.Weight) { $_.Weight } else { 1 } + } | Measure-Object -Sum).Sum + + $randomValue = Get-Random -Maximum $totalWeight + $cumulative = 0 + foreach ($service in $Services) { + $weight = if ($service.Weight) { $service.Weight } else { 1 } + $cumulative += $weight + if ($randomValue -lt $cumulative) { + return $service + } + } + return $Services[-1] +} + +function Get-IsoDateTime { + param([datetime]$Date) + return $Date.ToString("yyyy-MM-ddTHH:mm:ssZ") +} + +# ============================================================================ +# Persistent Identity Generation +# ============================================================================ + +function New-ProviderIdentities { + param( + [string]$Provider, + [hashtable]$Config + ) + + # 2-3 Billing Accounts per provider + $billingAccountCount = Get-Random -Minimum 2 -Maximum 4 + $billingAccounts = @() + for ($i = 1; $i -le $billingAccountCount; $i++) { + $baId = switch ($Provider) { + "Azure" { [guid]::NewGuid().ToString() } + "AWS" { "$(Get-Random -Minimum 100000000000 -Maximum 999999999999)" } + "GCP" { "ABCDEF-$((Get-Random -Minimum 100000 -Maximum 999999))-$((Get-Random -Minimum 100000 -Maximum 999999))" } + "DataCenter" { "CC-$(Get-Random -Minimum 10000 -Maximum 99999)" } + } + $baName = switch ($Provider) { + "Azure" { "Contoso EA $i" } + "AWS" { "AWS Org Account $i" } + "GCP" { "GCP Billing Account $i" } + "DataCenter" { "IT Cost Center $i" } + } + $billingAccounts += @{ Id = $baId; Name = $baName } + } + + # 4-8 Sub-Accounts per provider with realistic names + $subAccountNames = switch ($Provider) { + "Azure" { @("Production Subscription", "Staging Subscription", "Development Subscription", "Shared Services Subscription", "Data Platform Subscription", "Security Subscription", "Networking Subscription", "App Team A Subscription") } + "AWS" { @("prod-workloads", "staging-env", "dev-sandbox", "shared-services", "data-lake", "security-tools", "networking", "app-team-b") } + "GCP" { @("prod-services", "staging-services", "dev-playground", "shared-infra", "analytics-platform", "ml-experiments", "networking", "frontend-apps") } + "DataCenter" { @("Engineering", "Finance", "Operations", "Research", "Marketing", "IT Infrastructure", "Human Resources", "Executive") } + } + $subAccountCount = Get-Random -Minimum 4 -Maximum ([math]::Min(9, $subAccountNames.Count + 1)) + $subAccounts = @() + for ($i = 0; $i -lt $subAccountCount; $i++) { + $saName = $subAccountNames[$i] + $saId = switch ($Provider) { + "Azure" { "/subscriptions/$([guid]::NewGuid().ToString())" } + "AWS" { "$(Get-Random -Minimum 100000000000 -Maximum 999999999999)" } + "GCP" { "proj-$($Provider.ToLower())-$(Get-Random -Minimum 10000 -Maximum 99999)" } + "DataCenter" { "BU-$(Get-Random -Minimum 100 -Maximum 999)" } + } + $subAccounts += @{ Id = $saId; Name = $saName; BillingAccount = $billingAccounts[$i % $billingAccounts.Count] } + } + + # Billing profile IDs (consistent per provider) + $billingProfileIds = @() + for ($i = 1; $i -le 3; $i++) { + $billingProfileIds += "BP-$(Get-Random -Minimum 10000 -Maximum 99999)" + } + + # Resource Groups + $resourceGroups = @("rg-production-001", "rg-staging-001", "rg-development-001", "rg-data-platform", + "rg-shared-services", "rg-networking", "rg-security", "rg-analytics", + "rg-app-team-a", "rg-app-team-b", "rg-ml-training", "rg-monitoring") + + # Pre-generate a pool of persistent resources + $resourceCount = Get-Random -Minimum 150 -Maximum 400 + $resources = @() + for ($i = 1; $i -le $resourceCount; $i++) { + $service = Get-WeightedRandomService -Services $Config.Services + $region = Get-RandomElement -Array $Config.Regions + $resourceType = Get-RandomElement -Array $Config.ResourceTypes + $subAccount = Get-RandomElement -Array $subAccounts + $rg = Get-RandomElement -Array $resourceGroups + $shortId = ([guid]::NewGuid().ToString()).Substring(0, 8) + + $resourceId = switch ($Provider) { + "Azure" { "$($subAccount.Id)/resourceGroups/$rg/providers/$resourceType/$shortId" } + "AWS" { "arn:aws:$(($resourceType -split '::')[1].ToLower()):$($region.Id):$($subAccount.Id):instance/i-$shortId" } + "GCP" { "//$(($resourceType -split '/')[0])/projects/$($subAccount.Id)/zones/$($region.Id)-$(Get-RandomElement -Array @('a','b','c'))/instances/vm-$shortId" } + "DataCenter" { "dc://$($region.Id)/$resourceType/$shortId" } + } + + $resourceName = "$($service.Name.ToLower() -replace ' ','-')-$shortId" + + # Tags: ~80% of resources get tags, ~20% are untagged (for tag coverage analysis) + $tagHash = @{} + $hasTagsRoll = Get-Random -Maximum 100 + if ($hasTagsRoll -lt 80) { + $tagHash = @{ + "Environment" = Get-RandomElement -Array @("Production", "Staging", "Development", "Test") + "Department" = Get-RandomElement -Array @("Engineering", "Finance", "Operations", "Marketing", "Sales", "Research") + "CostCenter" = "CC-$(Get-Random -Minimum 100 -Maximum 999)" + "BusinessUnit" = Get-RandomElement -Array @("BU-1", "BU-2", "BU-3", "BU-4") + "Application" = Get-RandomElement -Array @("web-app", "api-service", "data-pipeline", "analytics", "backend", "frontend", "ml-training", "batch-jobs") + "Owner" = Get-RandomElement -Array @("team-alpha", "team-beta", "platform", "data-team", "infra", "security-team", "devops", "sre") + } + + # Azure-specific FinOps Hub tags on ~30% of tagged Azure resources + if ($Provider -eq "Azure" -and (Get-Random -Maximum 100) -lt 30) { + $hubStorageSuffix = Get-Random -Minimum 1000 -Maximum 9999 + $tagHash["ftk-tool"] = "FinOps hubs" + $tagHash["ftk-version"] = "0.8.0" + $tagHash["cm-resource-parent"] = "$($subAccount.Id)/resourceGroups/rg-finops-hub/providers/Microsoft.Storage/storageAccounts/stfinopshub$hubStorageSuffix" + } + + # AWS-specific tags + if ($Provider -eq "AWS") { + $tagHash["aws:createdBy"] = Get-RandomElement -Array @("CloudFormation", "Terraform", "CDK", "Console") + } + + # GCP-specific tags + if ($Provider -eq "GCP") { + $tagHash["goog-dm"] = Get-RandomElement -Array @("deployment-mgr", "terraform", "gcloud-cli") + } + } + + # VM SKU assignment (Azure VMs get specific instance types) + $vmSku = $null + if ($Provider -eq "Azure" -and $resourceType -eq "microsoft.compute/virtualmachines" -and $Config.VmSkus) { + $vmSku = Get-RandomElement -Array $Config.VmSkus + } + + # AHB eligibility: Azure VMs and SQL with ~40% eligible + $ahbEligible = $false + $ahbLicenseType = $null + if ($Provider -eq "Azure" -and $resourceType -in @("microsoft.compute/virtualmachines", "microsoft.sql/servers")) { + if ((Get-Random -Maximum 100) -lt 40) { + $ahbEligible = $true + $ahbLicenseType = if ($resourceType -eq "microsoft.sql/servers") { + "SQL Server" + } else { + Get-RandomElement -Array @("Windows Server", "Windows Server", "SUSE Linux", "RHEL Linux") + } + } + } + + # Determine a base daily cost for this specific resource (varies +/-20% daily) + $baseDailyCost = Get-RandomDecimal -Min $service.CostMin -Max $service.CostMax + + # SKU IDs (stable per resource) + $skuId = "SKU-$(Get-Random -Minimum 100000 -Maximum 999999)" + $skuPriceId = "PRICE-$(Get-Random -Minimum 100000 -Maximum 999999)" + $skuMeterId = [guid]::NewGuid().ToString() + + $resources += @{ + ResourceId = $resourceId + ResourceName = $resourceName + ResourceType = $resourceType + Service = $service + Region = $region + SubAccount = $subAccount + ResourceGroup = $rg + Tags = $tagHash + BaseDailyCost = $baseDailyCost + SkuId = $skuId + SkuPriceId = $skuPriceId + SkuMeterId = $skuMeterId + VmSku = $vmSku + AhbEligible = $ahbEligible + AhbLicenseType = $ahbLicenseType + } + } + + # Pre-generate commitment discounts (multi-cloud) + $commitments = @() + $commitmentCount = switch ($Provider) { + "Azure" { Get-Random -Minimum 8 -Maximum 16 } + "AWS" { Get-Random -Minimum 5 -Maximum 12 } + "GCP" { Get-Random -Minimum 3 -Maximum 8 } + "DataCenter" { 0 } + } + for ($i = 1; $i -le $commitmentCount; $i++) { + $commitType = Get-RandomElement -Array @("Reservation", "Savings Plan") + $commitId = switch ($Provider) { + "Azure" { + if ($commitType -eq "Reservation") { + "/providers/Microsoft.Capacity/reservationOrders/$([guid]::NewGuid().ToString())" + } else { + "/providers/Microsoft.BillingBenefits/savingsPlanOrders/$([guid]::NewGuid().ToString())" + } + } + "AWS" { "arn:aws:savingsplans::$(Get-Random -Minimum 100000000000 -Maximum 999999999999):savingsplan/sp-$([guid]::NewGuid().ToString().Substring(0,8))" } + "GCP" { "projects/test-project/commitments/$([guid]::NewGuid().ToString().Substring(0,8))" } + default { "" } + } + $skuOrderId = [guid]::NewGuid().ToString() + $skuTerm = Get-RandomElement -Array @(12, 36) # 1 year or 3 years in months + + $commitments += @{ + Id = $commitId + Name = "$($commitType -replace ' ','')-$(Get-Random -Minimum 1000 -Maximum 9999)" + Type = $commitType + # FOCUS spec: Reservation = "Usage" (committed usage), Savings Plan = "Spend" (committed spend) + Category = if ($commitType -eq "Reservation") { "Usage" } else { "Spend" } + SkuOrderId = $skuOrderId + SkuTerm = $skuTerm + } + } + + # Invoice IDs per billing period (populated lazily) + $invoiceIds = @{} + + # Marketplace publishers + $marketplacePublishers = @( + "Palo Alto Networks", "Fortinet", "Check Point", "Zscaler", "Cisco Meraki", + "Databricks", "Snowflake", "Confluent", + "Datadog", "Elastic", "Dynatrace", "New Relic", + "HashiCorp", "Red Hat", "SUSE", + "Twilio SendGrid", "MongoDB", "Salesforce", "ServiceNow" + ) + + return @{ + BillingAccounts = $billingAccounts + SubAccounts = $subAccounts + BillingProfileIds = $billingProfileIds + ResourceGroups = $resourceGroups + Resources = $resources + Commitments = $commitments + InvoiceIds = $invoiceIds + MarketplacePublishers = $marketplacePublishers + } +} + +# ============================================================================ +# Row Generation +# ============================================================================ + +function New-FocusRow { + param( + [string]$Provider, + [datetime]$ChargeDate, + [hashtable]$Config, + [hashtable]$Identity, + [switch]$IncludeCommitments, + [switch]$IncludeHybridBenefit + ) + + # Pick a persistent resource + $res = Get-RandomElement -Array $Identity.Resources + $service = $res.Service + $region = $res.Region + $subAccount = $res.SubAccount + $billingAccount = $subAccount.BillingAccount + + # Daily cost variation: base +/- 20% with slight upward trend over months + $monthIndex = (($ChargeDate.Year - $StartDate.Year) * 12 + $ChargeDate.Month - $StartDate.Month) + $trendFactor = 1.0 + ($monthIndex * 0.02) # 2% growth per month + $jitter = 0.80 + (Get-Random -Maximum 41) / 100.0 # 0.80 to 1.20 + $listCost = [math]::Round($res.BaseDailyCost * $trendFactor * $jitter, 10) + if ($listCost -lt 0.01) { $listCost = 0.01 } + + # On-demand cost: same as list cost (before any discounts) + $onDemandCost = $listCost + + # Negotiated/EA discount: 5-30% off list for ~60% of rows + $negotiatedDiscountPct = 0 + if ((Get-Random -Maximum 100) -lt 60) { + $negotiatedDiscountPct = Get-Random -Minimum 5 -Maximum 31 + } + $contractedCost = [math]::Round($listCost * (100 - $negotiatedDiscountPct) / 100, 10) + $billedCost = $contractedCost + $effectiveCost = $contractedCost + + $pricingQuantity = Get-RandomDecimal -Min 1 -Max 1000 + $consumedQuantity = $pricingQuantity + + $chargePeriodStart = $ChargeDate.Date + $chargePeriodEnd = $ChargeDate.Date.AddDays(1) + $billingPeriodStart = [datetime]::new($ChargeDate.Year, $ChargeDate.Month, 1) + $billingPeriodEnd = $billingPeriodStart.AddMonths(1) + + # Charge category distribution: 85% Usage, 8% Purchase, 3% Tax, 2% Credit, 2% Adjustment + $catRoll = Get-Random -Maximum 100 + $chargeCategory = if ($catRoll -lt 85) { "Usage" } + elseif ($catRoll -lt 93) { "Purchase" } + elseif ($catRoll -lt 96) { "Tax" } + elseif ($catRoll -lt 98) { "Credit" } + else { "Adjustment" } + + # ChargeClass: mostly null, ~3% are corrections + $chargeClass = $null + if ((Get-Random -Maximum 100) -lt 3) { + $chargeClass = "Correction" + } + + # ChargeFrequency based on ChargeCategory + $chargeFrequency = switch ($chargeCategory) { + "Purchase" { Get-RandomElement -Array @("One-Time", "Recurring") } + "Tax" { "Recurring" } + "Credit" { "One-Time" } + default { "Usage-Based" } + } + + # Credits and Adjustments are negative + if ($chargeCategory -in @("Credit", "Adjustment")) { + $listCost = -[math]::Abs($listCost) * 0.1 # Credits are ~10% of normal costs + $contractedCost = $listCost + $billedCost = $listCost + $effectiveCost = $listCost + $onDemandCost = [math]::Abs($listCost) + } + + # AvailabilityZone: vary a/b/c + $az = "$($region.Id)-$(Get-RandomElement -Array @('a', 'b', 'c'))" + + # === PRICING CATEGORY (start with Standard/on-demand) === + $pricingCategory = "Standard" + + # === COMMITMENT DISCOUNT SIMULATION (multi-cloud) === + $commitmentDiscountId = $null + $commitmentDiscountName = $null + $commitmentDiscountCategory = $null + $commitmentDiscountType = $null + $commitmentDiscountStatus = $null + $commitmentDiscountQuantity = $null + $commitmentDiscountUnit = $null + $x_SkuOrderId = $null + $x_SkuTerm = $null + + # 30% chance of commitment-covered usage (Azure/AWS/GCP - not DataCenter) + if ($IncludeCommitments -and $Provider -ne "DataCenter" -and $chargeCategory -eq "Usage" -and + $Identity.Commitments.Count -gt 0 -and (Get-Random -Maximum 100) -lt 30) { + + $commitment = Get-RandomElement -Array $Identity.Commitments + $commitmentDiscountId = $commitment.Id + $commitmentDiscountName = $commitment.Name + $commitmentDiscountCategory = $commitment.Category + $commitmentDiscountType = $commitment.Type + $x_SkuOrderId = $commitment.SkuOrderId + $x_SkuTerm = $commitment.SkuTerm + $pricingCategory = "Committed" + + # 85% utilization - most are Used, some Unused + if ((Get-Random -Maximum 100) -lt 85) { + $commitmentDiscountStatus = "Used" + $effectiveCost = [math]::Round($contractedCost * 0.40, 10) # 60% savings on contracted + $billedCost = 0 # Prepaid + } else { + $commitmentDiscountStatus = "Unused" + $effectiveCost = [math]::Round($contractedCost * 0.60, 10) # Wasted commitment + $billedCost = $effectiveCost + } + + $commitmentDiscountQuantity = $pricingQuantity + $commitmentDiscountUnit = $service.PricingUnit + } + + # === COMMITMENT PURCHASE ROWS (for invoicing page) === + if ($IncludeCommitments -and $Provider -ne "DataCenter" -and $chargeCategory -eq "Purchase" -and + $Identity.Commitments.Count -gt 0 -and (Get-Random -Maximum 100) -lt 50) { + + $commitment = Get-RandomElement -Array $Identity.Commitments + $commitmentDiscountId = $commitment.Id + $commitmentDiscountName = $commitment.Name + $commitmentDiscountCategory = $commitment.Category + $commitmentDiscountType = $commitment.Type + $x_SkuOrderId = $commitment.SkuOrderId + $x_SkuTerm = $commitment.SkuTerm + $pricingCategory = "Committed" + } + + # === SPOT / DYNAMIC INSTANCE SIMULATION === + $spotEligibleServices = @("Virtual Machines", "Azure Kubernetes Service", "Amazon EC2", "Amazon EKS", "Compute Engine", "Google Kubernetes Engine", "VMware vSphere") + if ($chargeCategory -eq "Usage" -and $null -eq $commitmentDiscountId -and $service.Name -in $spotEligibleServices) { + if ((Get-Random -Maximum 100) -lt 15) { + $pricingCategory = "Dynamic" # FOCUS spec uses "Dynamic", not "Spot" + + $spotDiscount = Get-Random -Minimum 60 -Maximum 90 + $effectiveCost = [math]::Round($listCost * (100 - $spotDiscount) / 100, 10) + $billedCost = $effectiveCost + $contractedCost = $effectiveCost + } + } + + # === AZURE HYBRID BENEFIT SIMULATION === + $x_SkuMeterCategory = $null + $x_SkuMeterSubcategory = $null + $x_SkuMeterName = $null + $x_SkuInstanceType = $null + $x_SkuCoreCount = $null + $x_SkuLicenseStatus = $null + $x_SkuLicenseQuantity = $null + $x_SkuLicenseType = $null + $x_SkuDescription = $null + + if ($Provider -eq "Azure") { + # Set VM SKU details for all Azure compute resources + if ($res.VmSku) { + $vmSku = $res.VmSku + $x_SkuInstanceType = $vmSku.InstanceType + $x_SkuCoreCount = $vmSku.Cores + $x_SkuDescription = $vmSku.Description + $x_SkuMeterCategory = "Virtual Machines" + $x_SkuMeterSubcategory = "$($vmSku.InstanceType) Series" + $x_SkuMeterName = $vmSku.MeterName # Contains architecture pattern (D4s, D4as, D4ps) + } elseif ($service.Category -eq "Compute") { + $x_SkuMeterCategory = $service.Name + $x_SkuMeterSubcategory = $service.Subcategory + $x_SkuDescription = "$($service.Name) - Standard" + } elseif ($service.Category -eq "Databases") { + $x_SkuMeterCategory = $service.Name + $x_SkuMeterSubcategory = "Compute" + $x_SkuDescription = "$($service.Name) - Standard Tier" + } else { + $x_SkuDescription = "$($service.Name) - $($service.Subcategory)" + } + + # AHB columns + if ($res.AhbEligible -and $chargeCategory -eq "Usage") { + $x_SkuLicenseType = $res.AhbLicenseType + + # 60% of eligible resources have AHB enabled + if ((Get-Random -Maximum 100) -lt 60) { + $x_SkuLicenseStatus = "Enabled" + $x_SkuLicenseQuantity = if ($res.VmSku) { $res.VmSku.Cores } else { Get-RandomElement -Array @(2, 4, 8, 16) } + # AHB savings: ~40% on license cost + $licenseSavings = [math]::Round([math]::Abs($effectiveCost) * 0.40, 10) + $effectiveCost = [math]::Max(0.01, [math]::Round($effectiveCost - $licenseSavings, 10)) + } else { + $x_SkuLicenseStatus = "Not Enabled" + $x_SkuLicenseQuantity = 0 + } + } + } + + # === UNIT PRICES (derived from costs / quantity) === + $listUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($listCost / $pricingQuantity, 10) } else { 0 } + $contractedUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($contractedCost / $pricingQuantity, 10) } else { 0 } + $effectiveUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($effectiveCost / $pricingQuantity, 10) } else { 0 } + $billedUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($billedCost / $pricingQuantity, 10) } else { 0 } + $onDemandUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($onDemandCost / $pricingQuantity, 10) } else { 0 } + + # === DATA QUALITY ANOMALIES (~2% of rows) === + $x_SourceChanges = $null + $qualityRoll = Get-Random -Maximum 100 + if ($qualityRoll -eq 0) { + # Effective > Contracted (anomaly) + $effectiveCost = [math]::Round($contractedCost * 1.1, 10) + $x_SourceChanges = "CostAdjustment" + } elseif ($qualityRoll -eq 1) { + # Contracted > List (anomaly) + $contractedCost = [math]::Round($listCost * 1.05, 10) + $x_SourceChanges = "PriceCorrection" + } + + # Invoice ID: stable per billing period + $invoiceKey = "$($billingAccount.Id)-$($billingPeriodStart.ToString('yyyyMM'))" + if (-not $Identity.InvoiceIds.ContainsKey($invoiceKey)) { + $Identity.InvoiceIds[$invoiceKey] = "INV-$($billingPeriodStart.ToString('yyyyMM'))-$(Get-Random -Minimum 10000 -Maximum 99999)" + } + $invoiceId = $Identity.InvoiceIds[$invoiceKey] + + # Tags as JSON + $tagsJson = if ($res.Tags.Count -gt 0) { ($res.Tags | ConvertTo-Json -Compress) } else { '{}' } + + # Publisher info + $isMarketplace = [bool]$service.IsMarketplace + $publisherName = if ($isMarketplace) { Get-RandomElement -Array $Identity.MarketplacePublishers } else { $Config.ServiceProviderName } + + return [PSCustomObject]@{ + # ===================== Mandatory FOCUS columns ===================== + BilledCost = $billedCost + BillingAccountId = $billingAccount.Id + BillingAccountName = $billingAccount.Name + BillingAccountType = $Config.BillingAccountType + BillingCurrency = $Config.BillingCurrency + BillingPeriodEnd = Get-IsoDateTime -Date $billingPeriodEnd + BillingPeriodStart = Get-IsoDateTime -Date $billingPeriodStart + ChargeCategory = $chargeCategory + ChargeClass = $chargeClass + ChargeDescription = "$($service.Name) usage in $($region.Name)" + ChargeFrequency = $chargeFrequency + ChargePeriodEnd = Get-IsoDateTime -Date $chargePeriodEnd + ChargePeriodStart = Get-IsoDateTime -Date $chargePeriodStart + ContractedCost = $contractedCost + EffectiveCost = $effectiveCost + InvoiceIssuerName = $Config.InvoiceIssuerName + ListCost = $listCost + PricingQuantity = $pricingQuantity + PricingUnit = $service.PricingUnit + ServiceProviderName = $Config.ServiceProviderName + + # ===================== Conditional FOCUS columns ===================== + AvailabilityZone = $az + CommitmentDiscountCategory = $commitmentDiscountCategory + CommitmentDiscountId = $commitmentDiscountId + CommitmentDiscountName = $commitmentDiscountName + CommitmentDiscountQuantity = $commitmentDiscountQuantity + CommitmentDiscountStatus = $commitmentDiscountStatus + CommitmentDiscountType = $commitmentDiscountType + CommitmentDiscountUnit = $commitmentDiscountUnit + ConsumedQuantity = $consumedQuantity + ConsumedUnit = $service.ConsumedUnit + ContractedUnitPrice = $contractedUnitPrice + HostProviderName = $Config.HostProviderName + InvoiceId = $invoiceId + ListUnitPrice = $listUnitPrice + PricingCategory = $pricingCategory + RegionId = $region.Id + RegionName = $region.Name + ResourceId = $res.ResourceId + ResourceName = $res.ResourceName + ResourceType = $res.ResourceType + ServiceCategory = $service.Category + ServiceName = $service.Name + ServiceSubcategory = $service.Subcategory + SkuId = $res.SkuId + SkuPriceId = $res.SkuPriceId + SubAccountId = $subAccount.Id + SubAccountName = $subAccount.Name + SubAccountType = $Config.SubAccountType + Tags = $tagsJson + + # ===================== FinOps Hub / Dashboard required columns ===================== + ProviderName = $Config.ServiceProviderName + x_BillingAccountId = $billingAccount.Id + x_BillingAccountAgreement = $Config.BillingAccountAgreement + x_BillingProfileId = Get-RandomElement -Array $Identity.BillingProfileIds + x_ResourceGroupName = $res.ResourceGroup + x_ResourceType = $res.ResourceType + + # Publisher + PublisherName = $publisherName + x_PublisherCategory = if ($isMarketplace) { "Marketplace" } else { $Provider } + + # Unit prices (dashboard discount analysis) + x_EffectiveUnitPrice = $effectiveUnitPrice + x_BilledUnitPrice = $billedUnitPrice + x_OnDemandCost = $onDemandCost + x_OnDemandUnitPrice = $onDemandUnitPrice + + # SKU columns (dashboard SKU cost analysis, CPU architecture, AHB) + x_SkuDescription = $x_SkuDescription + x_SkuInstanceType = $x_SkuInstanceType + x_SkuCoreCount = $x_SkuCoreCount + x_SkuMeterCategory = $x_SkuMeterCategory + x_SkuMeterSubcategory = $x_SkuMeterSubcategory + x_SkuMeterName = $x_SkuMeterName + x_SkuMeterId = $res.SkuMeterId + x_SkuOfferId = if ($Provider -eq "Azure") { "MS-AZR-0017P" } else { $null } + x_SkuLicenseStatus = $x_SkuLicenseStatus + x_SkuLicenseQuantity = $x_SkuLicenseQuantity + x_SkuLicenseType = $x_SkuLicenseType + + # Commitment linkage (dashboard invoicing/utilization) + x_SkuOrderId = $x_SkuOrderId + x_SkuTerm = $x_SkuTerm + + # Pricing detail (dashboard data quality page) + x_PricingBlockSize = $service.PricingBlockSize + x_PricingUnitDescription = $service.PricingUnitDescription + + # Data quality / metadata + x_SourceChanges = $x_SourceChanges + x_CloudProvider = $Provider + x_FocusVersion = $FocusVersion + x_IngestionTime = Get-IsoDateTime -Date (Get-Date) + } +} + +# ============================================================================ +# Main Execution +# ============================================================================ + +Write-Host ("=" * 70) -ForegroundColor Cyan +Write-Host "FinOps Hub Multi-Cloud FOCUS Test Data Generator" -ForegroundColor Cyan +Write-Host ("=" * 70) -ForegroundColor Cyan +Write-Host "" + +# Determine providers and row distribution +$providers = if ($CloudProvider -eq "All") { + @("Azure", "AWS", "GCP", "DataCenter") +} else { + @($CloudProvider) +} + +# Row distribution: ~60% Azure, ~20% AWS, ~15% GCP, ~5% DataCenter +$providerWeights = @{ + "Azure" = 0.60 + "AWS" = 0.20 + "GCP" = 0.15 + "DataCenter" = 0.05 +} + +# If single provider, 100% goes to it +if ($providers.Count -eq 1) { + $providerWeights = @{ $providers[0] = 1.0 } +} + +# Calculate total days +$totalDays = [math]::Max(1, (New-TimeSpan -Start $StartDate -End $EndDate).Days + 1) + +Write-Host "Configuration:" -ForegroundColor Yellow +Write-Host " Cloud Provider(s): $($providers -join ', ')" +Write-Host " FOCUS Version: $FocusVersion" +Write-Host " Date Range: $($StartDate.ToString('yyyy-MM-dd')) to $($EndDate.ToString('yyyy-MM-dd')) ($totalDays days)" +Write-Host " Total Row Target: $([string]::Format('{0:N0}', $TotalRowTarget))" +Write-Host " Total Budget: `$$([string]::Format('{0:N0}', $TotalBudget)) USD" +Write-Host " Output Format: $OutputFormat" +Write-Host " Output Path: $OutputPath" +Write-Host "" + +# Create output directory +if (-not (Test-Path $OutputPath)) { + New-Item -ItemType Directory -Path $OutputPath -Force | Out-Null + Write-Host "Created output directory: $OutputPath" -ForegroundColor Green +} + +# Pre-generate identities for each provider +Write-Host "Pre-generating persistent identities..." -ForegroundColor Yellow +$providerIdentities = @{} +foreach ($provider in $providers) { + $providerIdentities[$provider] = New-ProviderIdentities -Provider $provider -Config $ProviderConfigs[$provider] + $resCount = $providerIdentities[$provider].Resources.Count + $saCount = $providerIdentities[$provider].SubAccounts.Count + $baCount = $providerIdentities[$provider].BillingAccounts.Count + $cdCount = $providerIdentities[$provider].Commitments.Count + Write-Host " $provider : $resCount resources, $saCount sub-accounts, $baCount billing accounts, $cdCount commitments" -ForegroundColor Gray +} +Write-Host "" + +$totalRows = 0 +$allProviderCosts = @{} # Running sum of EffectiveCost per provider +$allProviderRowCounts = @{} # Row count per provider +$allProviderCsvPaths = @{} # Path to raw CSV per provider + +# Generate rows for each provider - streaming to CSV to avoid OOM +foreach ($provider in $providers) { + $weight = if ($providerWeights.ContainsKey($provider)) { $providerWeights[$provider] } else { 1.0 / $providers.Count } + $providerTotalRows = [math]::Max(1, [int]($TotalRowTarget * $weight)) + $dailyRowCount = [math]::Max(1, [int]($providerTotalRows / $totalDays)) + + Write-Host "Generating $provider data ($([string]::Format('{0:N0}', $providerTotalRows)) rows, ~$dailyRowCount/day)..." -ForegroundColor Yellow + + $config = $ProviderConfigs[$provider] + $identity = $providerIdentities[$provider] + $providerCsvPath = Join-Path $OutputPath "_raw_$($provider.ToLower()).csv" + $providerCostSum = [double]0 + $headerWritten = $false + + $currentDate = $StartDate + $rowsGenerated = 0 + $lastPct = -1 + + while ($currentDate -le $EndDate -and $rowsGenerated -lt $providerTotalRows) { + # Vary daily count slightly (+/- 10%) for realism + $variance = [int]($dailyRowCount * 0.1) + if ($variance -lt 1) { $variance = 1 } + $todayCount = [math]::Max(1, $dailyRowCount + (Get-Random -Minimum (-$variance) -Maximum ($variance + 1))) + + # Don't exceed target + if ($rowsGenerated + $todayCount -gt $providerTotalRows) { + $todayCount = $providerTotalRows - $rowsGenerated + } + + # Generate one day's rows in a small batch + $dayRows = [System.Collections.Generic.List[PSCustomObject]]::new($todayCount) + for ($i = 0; $i -lt $todayCount; $i++) { + $row = New-FocusRow -Provider $provider -ChargeDate $currentDate -Config $config -Identity $identity -IncludeCommitments -IncludeHybridBenefit + $providerCostSum += $row.EffectiveCost + $dayRows.Add($row) + } + + # Append daily batch to CSV (stream to disk, free memory) + if (-not $headerWritten) { + $dayRows | Export-Csv -Path $providerCsvPath -NoTypeInformation -Encoding UTF8 + $headerWritten = $true + } else { + $dayRows | Export-Csv -Path $providerCsvPath -NoTypeInformation -Encoding UTF8 -Append + } + $dayRows.Clear() + $dayRows = $null + + $rowsGenerated += $todayCount + $currentDate = $currentDate.AddDays(1) + + # Progress indicator every 10% + $pct = [math]::Floor($rowsGenerated / $providerTotalRows * 100) + if ($pct -ge $lastPct + 10) { + $lastPct = $pct + Write-Host " $provider : $pct% ($([string]::Format('{0:N0}', $rowsGenerated)) rows)" -ForegroundColor Gray + } + } + + Write-Host " $provider : Generated $([string]::Format('{0:N0}', $rowsGenerated)) rows" -ForegroundColor Green + $allProviderCosts[$provider] = $providerCostSum + $allProviderRowCounts[$provider] = $rowsGenerated + $allProviderCsvPaths[$provider] = $providerCsvPath + $totalRows += $rowsGenerated + + # Force GC between providers to reclaim memory + [System.GC]::Collect() +} + +# ============================================================================ +# Budget Scaling (calculated from tracked sums, applied via Python/pandas) +# ============================================================================ + +$totalGeneratedCost = 0 +foreach ($provider in $providers) { + $totalGeneratedCost += $allProviderCosts[$provider] +} + +$scaleFactor = if ($totalGeneratedCost -gt 0) { $TotalBudget / $totalGeneratedCost } else { 1 } +Write-Host "" +Write-Host "Scaling costs by factor $([math]::Round($scaleFactor, 4)) to target budget `$$([string]::Format('{0:N0}', $TotalBudget))" -ForegroundColor Cyan + +# ============================================================================ +# Export - Budget scaling + Parquet conversion via Python/pandas (memory-safe) +# ============================================================================ + +$generatedFiles = @() + +foreach ($provider in $providers) { + $rawCsvPath = $allProviderCsvPaths[$provider] + $providerRowCount = $allProviderRowCounts[$provider] + $providerScaledCost = [math]::Round($allProviderCosts[$provider] * $scaleFactor, 2) + + $baseFileName = "focus-$($provider.ToLower())-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" + $csvFileName = "$baseFileName.csv" + $csvFilePath = Join-Path $OutputPath $csvFileName + $parquetFileName = "$baseFileName.parquet" + $parquetFilePath = Join-Path $OutputPath $parquetFileName + + Write-Host " $provider : $([string]::Format('{0:N0}', $providerRowCount)) rows, `$$([string]::Format('{0:N2}', $providerScaledCost)) USD" -ForegroundColor Green + + # Use Python/pandas to: apply budget scaling, output CSV/Parquet + $rawCsvPy = ($rawCsvPath -replace '\\', '/') + $csvOutPy = ($csvFilePath -replace '\\', '/') + $parquetOutPy = ($parquetFilePath -replace '\\', '/') + + # Write Python script to temp file (avoids heredoc piping issues with pwsh -File) + $pyTempFile = [System.IO.Path]::GetTempFileName() -replace '\.tmp$', '.py' + $pythonScript = @" +import pandas as pd, sys +try: + df = pd.read_csv('$rawCsvPy', low_memory=False) + sf = $scaleFactor + cost_cols = ['BilledCost','ContractedCost','EffectiveCost','ListCost', + 'ContractedUnitPrice','ListUnitPrice', + 'x_EffectiveUnitPrice','x_BilledUnitPrice', + 'x_OnDemandCost','x_OnDemandUnitPrice'] + for col in cost_cols: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0) + df[col] = (df[col] * sf).round(10) + fmt = '$OutputFormat' + if fmt in ('CSV','Both'): + df.to_csv('$csvOutPy', index=False) + print('CSV_OK') + if fmt in ('Parquet','Both'): + df.to_parquet('$parquetOutPy', engine='pyarrow', compression='snappy', index=False) + print('PARQUET_OK') + if fmt == 'Parquet' and 'PARQUET_OK' not in locals().get('_printed', ''): + df.to_parquet('$parquetOutPy', engine='pyarrow', compression='snappy', index=False) + print('PARQUET_OK') + print('DONE') +except Exception as e: + print(f'ERROR: {e}') + sys.exit(1) +"@ + [System.IO.File]::WriteAllText($pyTempFile, $pythonScript) + $result = (python $pyTempFile 2>&1) -join "`n" + Remove-Item $pyTempFile -Force -ErrorAction SilentlyContinue + if ($result -match 'DONE') { + if ($result -match 'PARQUET_OK') { + Write-Host " Saved Parquet: $parquetFilePath" -ForegroundColor Gray + $generatedFiles += $parquetFilePath + } + if ($result -match 'CSV_OK') { + Write-Host " Saved CSV: $csvFilePath" -ForegroundColor Gray + $generatedFiles += $csvFilePath + } + } else { + Write-Host " Warning: Python scaling failed. Using raw CSV without scaling." -ForegroundColor Yellow + Write-Host " $result" -ForegroundColor Yellow + # Fall back: rename raw CSV as final CSV + Copy-Item $rawCsvPath $csvFilePath -Force + $generatedFiles += $csvFilePath + } + + # Clean up raw CSV (intermediate file) + if (Test-Path $rawCsvPath) { Remove-Item $rawCsvPath -Force } + + # Generate manifest.json + $manifestFilePath = Join-Path $OutputPath "manifest-$($provider.ToLower()).json" + + $dataFile = if ($OutputFormat -eq "Parquet" -or $OutputFormat -eq "Both") { "$baseFileName.parquet" } else { "$baseFileName.csv" } + $dataFilePath = Join-Path $OutputPath $dataFile + $fileSize = if (Test-Path $dataFilePath) { (Get-Item $dataFilePath).Length } else { 0 } + + $manifest = @{ + exportConfig = @{ + exportName = "focus-$($provider.ToLower())-export" + resourceId = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/test-rg" + dataVersion = "1.0" + apiVersion = "2023-08-01" + type = "FocusCost" + timeFrame = "Custom" + granularity = "Daily" + } + deliveryConfig = @{ + partitionData = $true + dataOverwriteBehavior = "OverwritePreviousReport" + fileFormat = if ($OutputFormat -eq "Parquet" -or $OutputFormat -eq "Both") { "Parquet" } else { "Csv" } + compressionMode = "Snappy" + } + blobs = @( + @{ + blobName = $dataFile + byteCount = $fileSize + } + ) + runInfo = @{ + executionType = "Scheduled" + submittedTime = (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssZ") + runId = [guid]::NewGuid().ToString() + startDate = $StartDate.ToString("yyyy-MM-ddT00:00:00Z") + endDate = $EndDate.ToString("yyyy-MM-ddT00:00:00Z") + } + } | ConvertTo-Json -Depth 5 + + $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 + Write-Host " Saved manifest: $manifestFilePath" -ForegroundColor Gray + $generatedFiles += $manifestFilePath +} + +# ============================================================================ +# Summary +# ============================================================================ + +Write-Host "" +Write-Host ("=" * 70) -ForegroundColor Cyan +Write-Host "Generation Complete!" -ForegroundColor Green +Write-Host ("=" * 70) -ForegroundColor Cyan +Write-Host "" +Write-Host "Summary:" -ForegroundColor Yellow +Write-Host " Total Rows Generated: $([string]::Format('{0:N0}', $totalRows))" +Write-Host " Total Cost: `$$([string]::Format('{0:N2}', $TotalBudget)) USD" +Write-Host " Output Format: $OutputFormat" +Write-Host " Files Created: $($generatedFiles.Count)" +Write-Host "" +Write-Host "Provider Breakdown:" -ForegroundColor Yellow +foreach ($provider in $providers) { + $providerScaledCost = [math]::Round($allProviderCosts[$provider] * $scaleFactor, 2) + $providerRowCount = $allProviderRowCounts[$provider] + Write-Host " $provider : $([string]::Format('{0:N0}', $providerRowCount)) rows | `$$([string]::Format('{0:N2}', $providerScaledCost))" +} +Write-Host "" +Write-Host "Generated Files:" -ForegroundColor Yellow +foreach ($file in $generatedFiles) { + if (Test-Path $file) { + $size = (Get-Item $file).Length / 1MB + Write-Host " - $file ($([math]::Round($size, 2)) MB)" + } +} + +Write-Host "" +Write-Host "Dashboard Coverage:" -ForegroundColor Cyan +Write-Host " PricingCategory: Standard, Dynamic, Committed" +Write-Host " CommitmentDiscountStatus: Used, Unused (with SkuOrderId/SkuTerm linkage)" +Write-Host " CommitmentDiscountType: Reservation, Savings Plan (+ Purchase rows)" +Write-Host " Azure Hybrid Benefit: x_SkuLicenseStatus Enabled/Not Enabled" +Write-Host " CPU Architecture: Intel/AMD/Arm64 patterns in x_SkuMeterName" +Write-Host " Tag coverage: ~80% tagged, ~20% untagged (maturity scorecard)" +Write-Host " Marketplace: x_PublisherCategory = 'Marketplace'" +Write-Host " Data quality anomalies: ~2% rows with cost relationship issues" +Write-Host " Negotiated discounts: ~60% rows with ListCost > ContractedCost" +Write-Host " x_EffectiveUnitPrice, x_BilledUnitPrice, x_OnDemandCost/UnitPrice" +Write-Host " x_SkuDescription, x_SkuInstanceType, x_SkuCoreCount" +Write-Host " x_BillingAccountAgreement, x_PricingBlockSize, x_PricingUnitDescription" +Write-Host "" + +# ============================================================================ +# Upload to Azure Storage +# ============================================================================ + +if ($Upload -and $StorageAccountName) { + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor Cyan + Write-Host "Uploading to Azure Storage..." -ForegroundColor Yellow + Write-Host ("=" * 70) -ForegroundColor Cyan + + # Get storage account key + $storageKey = $null + if ($ResourceGroupName) { + Write-Host " Getting storage account key..." -ForegroundColor Gray + $storageKey = (az storage account keys list --account-name $StorageAccountName --resource-group $ResourceGroupName --query "[0].value" -o tsv 2>$null) + if (-not $storageKey) { + Write-Host " Warning: Could not get storage key, falling back to default auth" -ForegroundColor Yellow + } + } + + # Start ADF triggers BEFORE uploading data + if ($StartTriggers -and $AdfName -and $ResourceGroupName) { + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor Cyan + Write-Host "Ensuring ADF Triggers are running (BEFORE upload)..." -ForegroundColor Yellow + Write-Host ("=" * 70) -ForegroundColor Cyan + + $triggers = @("msexports_ManifestAdded", "ingestion_ManifestAdded") + foreach ($trigger in $triggers) { + $state = (az datafactory trigger show --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --query "properties.runtimeState" -o tsv 2>$null) + if ($state -eq "Started") { + Write-Host " $trigger already running" -ForegroundColor Gray + } else { + Write-Host " Starting $trigger..." -ForegroundColor Cyan + az datafactory trigger start --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --only-show-errors 2>$null + Write-Host " $trigger started" -ForegroundColor Green + } + } + + Write-Host " Waiting 5 seconds for triggers to become active..." -ForegroundColor Gray + Start-Sleep -Seconds 5 + Write-Host "" + } + + $uploadedCount = 0 + $runId = [guid]::NewGuid().ToString() + $exportTime = (Get-Date).ToString("yyyyMMddHHmm") + + foreach ($provider in $providers) { + $providerLower = $provider.ToLower() + $baseFileName = "focus-$providerLower-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" + + $fileExt = if ($OutputFormat -eq "Parquet" -or $OutputFormat -eq "Both") { ".parquet" } else { ".csv" } + $dataFile = "$baseFileName$fileExt" + $dataFilePath = Join-Path $OutputPath $dataFile + + if (-not (Test-Path $dataFilePath)) { + Write-Host " Warning: $dataFilePath not found, skipping $provider" -ForegroundColor Yellow + continue + } + + $fileSize = (Get-Item $dataFilePath).Length + + if ($providerLower -eq "azure") { + # Azure: msexports with Cost Management folder structure + $container = "msexports" + $scopeId = "subscriptions/00000000-0000-0000-0000-000000000000" + $exportName = "focus-cost-export" + $dateRange = "$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" + $blobFolder = "$scopeId/$exportName/$dateRange/$exportTime/$runId" + $blobPath = "$blobFolder/$dataFile" + $manifestBlobPath = "$blobFolder/manifest.json" + + $manifest = @{ + manifestVersion = "2024-04-01" + byteCount = $fileSize + blobCount = 1 + dataRowCount = $allProviderRowCounts[$provider] + exportConfig = @{ + exportName = $exportName + resourceId = "/$scopeId/providers/Microsoft.CostManagement/exports/$exportName" + dataVersion = "1.0r2" + apiVersion = "2023-07-01-preview" + type = "FocusCost" + timeFrame = "Custom" + granularity = "Daily" + } + deliveryConfig = @{ + partitionData = $true + dataOverwriteBehavior = "OverwritePreviousReport" + fileFormat = if ($fileExt -eq ".parquet") { "Parquet" } else { "Csv" } + compressionMode = if ($fileExt -eq ".parquet") { "Snappy" } else { "None" } + containerUri = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/$StorageAccountName" + rootFolderPath = "" + } + runInfo = @{ + executionType = "Scheduled" + submittedTime = (Get-Date).ToString("yyyy-MM-ddTHH:mm:ss.fffffffZ") + runId = $runId + startDate = $StartDate.ToString("yyyy-MM-ddT00:00:00") + endDate = $EndDate.ToString("yyyy-MM-ddT00:00:00") + } + blobs = @( + @{ + blobName = $blobPath + byteCount = $fileSize + dataRowCount = $allProviderRowCounts[$provider] + } + ) + } | ConvertTo-Json -Depth 5 + + $manifestFilePath = Join-Path $OutputPath "manifest-$providerLower.json" + $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 + + Write-Host " Uploading $provider to msexports container..." -ForegroundColor Cyan + if ($storageKey) { + az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null + az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + } else { + az storage blob upload --account-name $StorageAccountName --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null + az storage blob upload --account-name $StorageAccountName --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + } + + } else { + # AWS/GCP/DataCenter: ingestion container + $container = "ingestion" + $scopePath = "$providerLower/test-account" + $ingestionId = (Get-Date).ToString("yyyyMMddHHmmss") + $blobFolder = "Costs/$($EndDate.ToString('yyyy'))/$($EndDate.ToString('MM'))/$scopePath" + $blobPath = "$blobFolder/${ingestionId}__$dataFile" + $manifestBlobPath = "$blobFolder/manifest.json" + + $manifest = @{ + note = "Trigger file for ADX ingestion" + provider = $providerLower + timestamp = (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssZ") + } | ConvertTo-Json -Depth 3 + + $manifestFilePath = Join-Path $OutputPath "manifest-$providerLower.json" + $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 + + Write-Host " Uploading $provider to ingestion container..." -ForegroundColor Cyan + if ($storageKey) { + az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null + az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + } else { + az storage blob upload --account-name $StorageAccountName --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null + az storage blob upload --account-name $StorageAccountName --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + } + } + + Write-Host " Uploaded: $blobPath" -ForegroundColor Green + Write-Host " Uploaded: $manifestBlobPath" -ForegroundColor Green + $uploadedCount++ + } + + Write-Host "" + Write-Host "Upload Complete! $uploadedCount providers uploaded." -ForegroundColor Green + + # Verify ADF pipeline execution + if ($StartTriggers -and $AdfName -and $ResourceGroupName) { + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor Cyan + Write-Host "Verifying ADF Pipeline Execution..." -ForegroundColor Yellow + Write-Host ("=" * 70) -ForegroundColor Cyan + + Write-Host " Waiting 15 seconds for blob events to propagate..." -ForegroundColor Gray + Start-Sleep -Seconds 15 + + $now = (Get-Date).ToUniversalTime() + $checkFrom = $now.AddMinutes(-5).ToString("yyyy-MM-ddTHH:mm:ssZ") + $checkTo = $now.AddMinutes(5).ToString("yyyy-MM-ddTHH:mm:ssZ") + + $pipelineRuns = az datafactory pipeline-run query-by-factory --factory-name $AdfName --resource-group $ResourceGroupName --last-updated-after $checkFrom --last-updated-before $checkTo -o json 2>$null | ConvertFrom-Json + + if ($pipelineRuns.value.Count -gt 0) { + Write-Host " ADF pipelines triggered successfully!" -ForegroundColor Green + foreach ($run in $pipelineRuns.value) { + Write-Host " $($run.pipelineName) | $($run.status)" -ForegroundColor Gray + } + } else { + Write-Host " Warning: No pipeline runs detected. Re-uploading manifests as safety net..." -ForegroundColor Yellow + + foreach ($provider in $providers) { + $providerLower = $provider.ToLower() + $manifestFilePath = Join-Path $OutputPath "manifest-$providerLower.json" + if (-not (Test-Path $manifestFilePath)) { continue } + + if ($providerLower -eq "azure") { + $container = "msexports" + $scopeId = "subscriptions/00000000-0000-0000-0000-000000000000" + $exportName = "focus-cost-export" + $dateRange = "$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" + $blobFolder = "$scopeId/$exportName/$dateRange/$exportTime/$runId" + $manifestBlobPath = "$blobFolder/manifest.json" + } else { + $container = "ingestion" + $scopePath = "$providerLower/test-account" + $blobFolder = "Costs/$($EndDate.ToString('yyyy'))/$($EndDate.ToString('MM'))/$scopePath" + $manifestBlobPath = "$blobFolder/manifest.json" + } + + Write-Host " Re-uploading $container/$manifestBlobPath" -ForegroundColor Cyan + if ($storageKey) { + az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + } else { + az storage blob upload --account-name $StorageAccountName --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + } + } + Write-Host " Manifests re-uploaded. Pipelines should trigger shortly." -ForegroundColor Green + } + + Write-Host "" + Write-Host "ADF triggers are running. Data will be processed automatically." -ForegroundColor Green + } elseif ($StartTriggers) { + Write-Host "" + Write-Host "Warning: -StartTriggers requires -AdfName and -ResourceGroupName" -ForegroundColor Yellow + } else { + Write-Host "" + Write-Host "Next Steps:" -ForegroundColor Yellow + Write-Host " 1. Ensure ADF triggers are started BEFORE uploading data:" + Write-Host " az datafactory trigger start --factory-name --resource-group --name msexports_ManifestAdded" + Write-Host " az datafactory trigger start --factory-name --resource-group --name ingestion_ManifestAdded" + Write-Host " 2. Then upload data (manifest upload fires BlobCreated event)" + Write-Host " 3. Or re-run with -StartTriggers -AdfName -ResourceGroupName " + } +} else { + Write-Host "" + Write-Host "Next Steps:" -ForegroundColor Yellow + Write-Host " 1. Run with -Upload -StorageAccountName to upload automatically" + Write-Host " 2. Or manually upload:" + Write-Host " - Azure data to msexports/{scope}/{export-name}/{date-range}/{time}/{guid}/" + Write-Host " - AWS/GCP/DC data to ingestion/Costs/{yyyy}/{mm}/{provider}/{account}/" + Write-Host " 3. Start ADF triggers to process the data" +} + +Write-Host "" +Write-Host "FOCUS Specification Notes:" -ForegroundColor Cyan +Write-Host " - Data follows FOCUS v$FocusVersion column definitions" +Write-Host " - ServiceProviderName replaces ProviderName (deprecated in 1.3)" +Write-Host " - HostProviderName added for multi-cloud visibility" +Write-Host " - Custom columns use x_ prefix per FOCUS spec" +Write-Host " - CommitmentDiscountCategory: Reservation=Usage, SavingsPlan=Spend (per FOCUS)" +Write-Host " - PricingCategory: Standard (on-demand), Dynamic (spot), Committed (reserved)" +Write-Host "" From 20ff2cb0829bfc6bd43d7b19e908d3e6636fb032 Mon Sep 17 00:00:00 2001 From: Zach Olinske Date: Mon, 16 Feb 2026 13:05:23 +0100 Subject: [PATCH 2/5] fix: address PR #2006 review feedback Comprehensive rewrite of Generate-MultiCloudTestData.ps1: Critical fixes: - Fix Get-Random [int] overflow with 12-digit AWS account IDs (New-AwsAccountId) - Eliminate Python dependency entirely (inline budget scaling via scale factor) - Remove dead code from Python/Parquet block Required by repo conventions: - Add #Requires -Version 7.0 - Add .LINK to comment-based help - Add [CmdletBinding(SupportsShouldProcess)] with WhatIf/Confirm support - Add changelog entry - Add test directory README.md FOCUS specification compliance: - Fix ~12 ServiceSubcategory values to match FOCUS closed enumeration - Fix cost invariants: unit prices calculated AFTER all cost modifications - Anomaly rows now set ChargeClass=Correction (exempt from invariant rules) - Credits/Adjustments get null InvoiceId (per FOCUS spec) - Version-aware column sets: v1.1+ gets CommitmentDiscountQuantity/Unit, v1.2+ gets BillingAccountType/SubAccountType/InvoiceId, v1.3+ gets HostProviderName/ServiceProviderName - Document scope as Cost and Usage dataset only Recommended improvements: - Add -Seed parameter for reproducible test data - Add -UseStorageKey switch, default to Azure AD auth (--auth-mode login) - Fix Get-RandomDecimal to use [long] instead of [int] for large ranges --- docs-mslearn/toolkit/changelog.md | 1 + .../test/Generate-MultiCloudTestData.ps1 | 1247 +++++++++-------- src/templates/finops-hub/test/README.md | 64 + 3 files changed, 744 insertions(+), 568 deletions(-) create mode 100644 src/templates/finops-hub/test/README.md diff --git a/docs-mslearn/toolkit/changelog.md b/docs-mslearn/toolkit/changelog.md index 2ecf1f2b4..c1901d0d7 100644 --- a/docs-mslearn/toolkit/changelog.md +++ b/docs-mslearn/toolkit/changelog.md @@ -981,6 +981,7 @@ _**Breaking change**_ - Added param to disable external access to Azure Data Lake and Azure Data Explorer. - Added param to specify subnet range of virtual network - minimum size = /26 - Support for storage account infrastructure encryption. + - Multi-cloud test data generator – Added `Generate-MultiCloudTestData.ps1` to create synthetic FOCUS-compliant cost data for Azure, AWS, GCP, and on-premises providers. Supports FOCUS versions 1.0-1.3 with version-specific column sets, commitment discounts, Azure Hybrid Benefit, tag variation, and inline budget scaling. Includes `-Seed` for reproducibility and Azure AD auth for uploads. - Published a [schema file](https://aka.ms/finops/hubs/settings-schema) for the hub settings.json file. - **Changed** - Changed dataset names in the ingestion container to facilitate Azure Data Explorer ingestion. diff --git a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 index da008cf88..00fed11e2 100644 --- a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 +++ b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 @@ -1,12 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +#Requires -Version 7.0 + <# .SYNOPSIS Generates multi-cloud FOCUS-compliant test data for FinOps Hub validation. .DESCRIPTION - This script generates synthetic cost data in FOCUS 1.0-1.3 format for: + This script generates synthetic cost data in FOCUS format for: - Azure (Cost Management Managed Exports simulation) - AWS (Data Exports / CUR FOCUS format) - GCP (BigQuery FOCUS export simulation) @@ -15,20 +17,19 @@ The generated data can be uploaded to Azure Storage for FinOps Hub ingestion testing. Features: + - Version-aware column sets matching FOCUS 1.0, 1.1, 1.2, or 1.3 specification - ALL columns referenced by FinOps Hub dashboard KQL queries - Correct PricingCategory values: Standard, Dynamic, Committed - Full Azure Hybrid Benefit simulation with x_SkuLicense* columns - Commitment discounts with x_SkuOrderId, x_SkuTerm linkage - Commitment Purchase rows for invoicing page - CPU architecture in x_SkuMeterName (Intel/AMD/Arm64 patterns) - - x_EffectiveUnitPrice, x_BilledUnitPrice for discount analysis - - x_SkuDescription, x_SkuInstanceType for SKU cost analysis - - x_OnDemandCost, x_OnDemandUnitPrice for savings calculations - Tag coverage variation (~20% untagged for maturity scorecard) - - Data quality anomaly rows for validation page + - Data quality anomaly rows (documented via x_SourceChanges, ChargeClass=Correction) - Negotiated discount rows (ListCost > ContractedCost) - Persistent resources across days (realistic trending) - - Budget scaling to target total cost + - Inline budget scaling (no external dependencies) + - Reproducible output via -Seed parameter .PARAMETER OutputPath Directory to save generated files. Default: ./test-data @@ -53,21 +54,26 @@ .PARAMETER TotalBudget Target total cost in USD for all generated data. Default: 500000 ($500K) - Costs are scaled proportionally to achieve this target. + Costs are scaled proportionally during generation to achieve this target. .PARAMETER FocusVersion FOCUS specification version. Options: 1.0, 1.1, 1.2, 1.3 Default: 1.3 + The output column set varies per version to match the official FOCUS specification. + Note: This generates the Cost and Usage dataset only. The Contract Commitment + dataset (introduced in v1.3) is not included. .PARAMETER OutputFormat - Output file format. Options: Parquet, CSV, Both - Default: Parquet + Output file format. Options: CSV, Both + Default: CSV + Note: Parquet output requires the PSParquet module (Install-Module PSParquet). + If PSParquet is not available and Parquet is requested, falls back to CSV. .PARAMETER StorageAccountName Azure Storage account name for upload. .PARAMETER ResourceGroupName - Resource group containing the storage account (required for key-based auth). + Resource group containing the storage account. Required only when using -UseStorageKey. .PARAMETER AdfName Azure Data Factory name for starting triggers. @@ -78,27 +84,49 @@ .PARAMETER StartTriggers Start ADF triggers before upload so BlobCreated events are captured. +.PARAMETER UseStorageKey + Use storage account key for authentication instead of Azure AD (--auth-mode login). + Requires -ResourceGroupName. Not recommended for production use. + +.PARAMETER Seed + Random seed for reproducible test data generation. When specified, the same seed + produces identical output (given the same parameters). + .EXAMPLE .\Generate-MultiCloudTestData.ps1 - # Generates 6 months of data for all providers, 500K rows, $500K total budget + # Generates 6 months of FOCUS 1.3 data for all providers, 500K rows, $500K budget .EXAMPLE .\Generate-MultiCloudTestData.ps1 -MonthsOfData 3 -TotalRowTarget 100000 -TotalBudget 50000 # Generates 3 months of data, 100K rows, $50K total budget .EXAMPLE - .\Generate-MultiCloudTestData.ps1 -Upload -StorageAccountName "stfinopshub" -ResourceGroupName "rg-finopshub" -AdfName "adf-finopshub" -StartTriggers - # Generates data, ensures ADF triggers are running, then uploads to trigger processing + .\Generate-MultiCloudTestData.ps1 -FocusVersion 1.0 -CloudProvider Azure -TotalRowTarget 50000 + # Generates FOCUS 1.0 Azure-only data with 43 columns + +.EXAMPLE + .\Generate-MultiCloudTestData.ps1 -Upload -StorageAccountName "stfinopshub" -AdfName "adf-finopshub" -StartTriggers + # Generates data using Azure AD auth, ensures ADF triggers are running, then uploads + +.EXAMPLE + .\Generate-MultiCloudTestData.ps1 -Seed 42 -TotalRowTarget 1000 + # Generates reproducible test data with 1000 rows + +.LINK + https://github.com/microsoft/finops-toolkit + +.LINK + https://focus.finops.org/focus-specification/ .NOTES FOCUS Specification Reference: https://focus.finops.org/focus-specification/v1-3/ + FinOps Hub Documentation: https://aka.ms/finops/hubs - Prerequisites: - - Python 3 with pandas and pyarrow for Parquet output - - Azure CLI for upload functionality (az storage, az datafactory) + Author: FinOps Hub Team + Version: 4.0.0 #> -[CmdletBinding()] +[CmdletBinding(SupportsShouldProcess)] param( [string]$OutputPath = "./test-data", @@ -118,8 +146,8 @@ param( [ValidateSet("1.0", "1.1", "1.2", "1.3")] [string]$FocusVersion = "1.3", - [ValidateSet("Parquet", "CSV", "Both")] - [string]$OutputFormat = "Parquet", + [ValidateSet("CSV", "Both")] + [string]$OutputFormat = "CSV", [string]$StorageAccountName, @@ -129,149 +157,169 @@ param( [switch]$Upload, - [switch]$StartTriggers + [switch]$StartTriggers, + + [switch]$UseStorageKey, + + [int]$Seed ) +# ============================================================================ +# Initialization +# ============================================================================ + +# Set random seed for reproducibility if specified +if ($PSBoundParameters.ContainsKey('Seed')) +{ + $null = Get-Random -SetSeed $Seed +} + # Calculate StartDate from MonthsOfData if not explicitly provided -if (-not $PSBoundParameters.ContainsKey('StartDate')) { +if (-not $PSBoundParameters.ContainsKey('StartDate')) +{ $StartDate = (Get-Date -Day 1).AddMonths(-$MonthsOfData + 1) } # Ensure EndDate is today max -if ($EndDate -gt (Get-Date)) { +if ($EndDate -gt (Get-Date)) +{ $EndDate = Get-Date } +# Parse FOCUS version for column selection +$focusMajorMinor = [version]$FocusVersion + # ============================================================================ -# Provider Configurations +# Provider-Specific Configuration # ============================================================================ +# ServiceSubcategory values aligned to FOCUS specification closed enumeration. +# Reference: https://focus.finops.org/focus-specification/ (ServiceSubcategory column) $ProviderConfigs = @{ Azure = @{ - ServiceProviderName = "Microsoft" - InvoiceIssuerName = "Microsoft" - HostProviderName = "Microsoft" - BillingAccountType = "Enterprise Agreement" - SubAccountType = "Subscription" - BillingCurrency = "USD" - BillingAccountAgreement = "Microsoft Customer Agreement" - Regions = @( + ProviderName = "Microsoft" + ServiceProviderName = "Microsoft" + InvoiceIssuerName = "Microsoft" + HostProviderName = "Microsoft" + BillingAccountType = "Billing Profile" + SubAccountType = "Subscription" + BillingCurrency = "USD" + BillingAccountAgreement = "Enterprise Agreement" + Regions = @( + @{ Id = "swedencentral"; Name = "Sweden Central" }, + @{ Id = "westeurope"; Name = "West Europe" }, @{ Id = "eastus"; Name = "East US" }, @{ Id = "westus2"; Name = "West US 2" }, - @{ Id = "westeurope"; Name = "West Europe" }, - @{ Id = "northeurope"; Name = "North Europe" }, - @{ Id = "southeastasia"; Name = "Southeast Asia" } + @{ Id = "italynorth"; Name = "Italy North" } ) - Services = @( - @{ Name = "Virtual Machines"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 30; CostMin = 50; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, - @{ Name = "Azure Kubernetes Service"; Category = "Compute"; Subcategory = "Containers"; Weight = 15; CostMin = 100; CostMax = 3000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, - @{ Name = "Azure SQL Database"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 12; CostMin = 30; CostMax = 800; PricingUnit = "DTU-Hours"; ConsumedUnit = "DTU Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 DTU-Hour" }, - @{ Name = "Storage Accounts"; Category = "Storage"; Subcategory = "General Purpose v2"; Weight = 10; CostMin = 5; CostMax = 300; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB/Month" }, - @{ Name = "Azure Cosmos DB"; Category = "Databases"; Subcategory = "NoSQL Databases"; Weight = 8; CostMin = 20; CostMax = 500; PricingUnit = "RU/s"; ConsumedUnit = "Request Units"; PricingBlockSize = 100; PricingUnitDescription = "100 RU/s" }, - @{ Name = "Azure Data Explorer"; Category = "Analytics"; Subcategory = "Data Analytics"; Weight = 7; CostMin = 100; CostMax = 2500; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, - @{ Name = "Azure App Service"; Category = "Compute"; Subcategory = "App Services"; Weight = 5; CostMin = 10; CostMax = 200; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, - @{ Name = "Azure Functions"; Category = "Compute"; Subcategory = "Serverless Compute"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Executions"; ConsumedUnit = "1M Executions"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Executions" }, - @{ Name = "Azure Key Vault"; Category = "Security"; Subcategory = "Key Management"; Weight = 2; CostMin = 0.50; CostMax = 20; PricingUnit = "Operations"; ConsumedUnit = "10K Operations"; PricingBlockSize = 10000; PricingUnitDescription = "10,000 Operations" }, - @{ Name = "Bandwidth"; Category = "Networking"; Subcategory = "Data Transfer"; Weight = 5; CostMin = 1; CostMax = 100; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, - @{ Name = "Marketplace - 3rd Party"; Category = "Compute"; Subcategory = "Marketplace"; Weight = 3; CostMin = 50; CostMax = 500; PricingUnit = "Units"; ConsumedUnit = "Units"; PricingBlockSize = 1; PricingUnitDescription = "1 Unit"; IsMarketplace = $true } + Services = @( + @{ Name = "Virtual Machines"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 35; CostMin = 50; CostMax = 2000; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Azure Kubernetes Service"; Category = "Compute"; Subcategory = "Containers"; Weight = 20; CostMin = 100; CostMax = 3000; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Azure SQL Database"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 12; CostMin = 30; CostMax = 800; PricingUnit = "1 Hour"; ConsumedUnit = "DTUs"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Cosmos DB"; Category = "Databases"; Subcategory = "NoSQL"; Weight = 8; CostMin = 20; CostMax = 500; PricingUnit = "100 RUs"; ConsumedUnit = "Request Units"; PricingBlockSize = 100; PricingUnitDescription = "100 Request Units" }, + @{ Name = "Storage Accounts"; Category = "Storage"; Subcategory = "Object Storage"; Weight = 10; CostMin = 5; CostMax = 200; PricingUnit = "1 GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB/Month" }, + @{ Name = "Azure Data Explorer"; Category = "Analytics"; Subcategory = "Other (Analytics)"; Weight = 5; CostMin = 50; CostMax = 600; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "App Service"; Category = "Compute"; Subcategory = "Other (Compute)"; Weight = 4; CostMin = 10; CostMax = 300; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Key Vault"; Category = "Security"; Subcategory = "Other (Security)"; Weight = 2; CostMin = 1; CostMax = 50; PricingUnit = "10K Operations"; ConsumedUnit = "10K Operations"; PricingBlockSize = 10000; PricingUnitDescription = "10,000 Operations" }, + @{ Name = "Virtual Network"; Category = "Networking"; Subcategory = "Network Infrastructure"; Weight = 2; CostMin = 5; CostMax = 100; PricingUnit = "1 GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, + @{ Name = "Azure Functions"; Category = "Compute"; Subcategory = "Functions"; Weight = 2; CostMin = 0.10; CostMax = 30; PricingUnit = "1M Executions"; ConsumedUnit = "1M Executions"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Executions" }, + @{ Name = "Azure Marketplace"; Category = "Marketplace"; Subcategory = "Other (Compute)"; Weight = 5; CostMin = 10; CostMax = 500; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour"; IsMarketplace = $true } ) - ResourceTypes = @("microsoft.compute/virtualmachines", "microsoft.storage/storageaccounts", "microsoft.sql/servers", - "microsoft.containerservice/managedclusters", "microsoft.web/sites", "microsoft.keyvault/vaults", - "microsoft.kusto/clusters", "microsoft.documentdb/databaseaccounts", "microsoft.network/virtualnetworks") - VmSkus = @( - # Intel-based - @{ InstanceType = "Standard_D4s_v5"; Cores = 4; MemoryGB = 16; MeterName = "D4s v5"; Description = "Standard_D4s_v5 - Intel Ice Lake, 4 vCPUs, 16 GiB RAM" }, - @{ InstanceType = "Standard_D8s_v5"; Cores = 8; MemoryGB = 32; MeterName = "D8s v5"; Description = "Standard_D8s_v5 - Intel Ice Lake, 8 vCPUs, 32 GiB RAM" }, - @{ InstanceType = "Standard_E4s_v5"; Cores = 4; MemoryGB = 32; MeterName = "E4s v5"; Description = "Standard_E4s_v5 - Intel Ice Lake, 4 vCPUs, 32 GiB RAM" }, - @{ InstanceType = "Standard_E8s_v5"; Cores = 8; MemoryGB = 64; MeterName = "E8s v5"; Description = "Standard_E8s_v5 - Intel Ice Lake, 8 vCPUs, 64 GiB RAM" }, - @{ InstanceType = "Standard_F4s_v2"; Cores = 4; MemoryGB = 8; MeterName = "F4s v2"; Description = "Standard_F4s_v2 - Intel Cascade Lake, 4 vCPUs, 8 GiB RAM" }, - @{ InstanceType = "Standard_B2s"; Cores = 2; MemoryGB = 4; MeterName = "B2s"; Description = "Standard_B2s - Intel Broadwell, 2 vCPUs, 4 GiB RAM" }, - # AMD-based - @{ InstanceType = "Standard_D4as_v5"; Cores = 4; MemoryGB = 16; MeterName = "D4as v5"; Description = "Standard_D4as_v5 - AMD EPYC, 4 vCPUs, 16 GiB RAM" }, - @{ InstanceType = "Standard_D8as_v5"; Cores = 8; MemoryGB = 32; MeterName = "D8as v5"; Description = "Standard_D8as_v5 - AMD EPYC, 8 vCPUs, 32 GiB RAM" }, - @{ InstanceType = "Standard_E4as_v5"; Cores = 4; MemoryGB = 32; MeterName = "E4as v5"; Description = "Standard_E4as_v5 - AMD EPYC, 4 vCPUs, 32 GiB RAM" }, - @{ InstanceType = "Standard_L8as_v3"; Cores = 8; MemoryGB = 64; MeterName = "L8as v3"; Description = "Standard_L8as_v3 - AMD EPYC, 8 vCPUs, 64 GiB RAM" }, - # Arm64-based (Cobalt/Ampere) - @{ InstanceType = "Standard_D4ps_v5"; Cores = 4; MemoryGB = 16; MeterName = "D4ps v5"; Description = "Standard_D4ps_v5 - Arm64 Cobalt, 4 vCPUs, 16 GiB RAM" }, - @{ InstanceType = "Standard_D8ps_v5"; Cores = 8; MemoryGB = 32; MeterName = "D8ps v5"; Description = "Standard_D8ps_v5 - Arm64 Cobalt, 8 vCPUs, 32 GiB RAM" }, - @{ InstanceType = "Standard_E4ps_v5"; Cores = 4; MemoryGB = 32; MeterName = "E4ps v5"; Description = "Standard_E4ps_v5 - Arm64 Cobalt, 4 vCPUs, 32 GiB RAM" } + ResourceTypes = @("microsoft.compute/virtualmachines", "microsoft.storage/storageaccounts", "microsoft.sql/servers", "microsoft.kusto/clusters", "microsoft.containerservice/managedclusters", "microsoft.documentdb/databaseaccounts", "microsoft.web/sites", "microsoft.keyvault/vaults", "microsoft.network/virtualnetworks") + VmSkus = @( + @{ InstanceType = "Standard_D4s_v5"; Cores = 4; Arch = "Intel"; MeterName = "D4s v5"; Description = "D4s v5 (4 vCPUs, 16 GB RAM)" }, + @{ InstanceType = "Standard_D8s_v5"; Cores = 8; Arch = "Intel"; MeterName = "D8s v5"; Description = "D8s v5 (8 vCPUs, 32 GB RAM)" }, + @{ InstanceType = "Standard_D16s_v5"; Cores = 16; Arch = "Intel"; MeterName = "D16s v5"; Description = "D16s v5 (16 vCPUs, 64 GB RAM)" }, + @{ InstanceType = "Standard_E4s_v5"; Cores = 4; Arch = "Intel"; MeterName = "E4s v5"; Description = "E4s v5 (4 vCPUs, 32 GB RAM)" }, + @{ InstanceType = "Standard_E16s_v5"; Cores = 16; Arch = "Intel"; MeterName = "E16s v5"; Description = "E16s v5 (16 vCPUs, 128 GB RAM)" }, + @{ InstanceType = "Standard_D4as_v5"; Cores = 4; Arch = "AMD"; MeterName = "D4as v5"; Description = "D4as v5 AMD (4 vCPUs, 16 GB RAM)" }, + @{ InstanceType = "Standard_D8as_v5"; Cores = 8; Arch = "AMD"; MeterName = "D8as v5"; Description = "D8as v5 AMD (8 vCPUs, 32 GB RAM)" }, + @{ InstanceType = "Standard_E8as_v5"; Cores = 8; Arch = "AMD"; MeterName = "E8as v5"; Description = "E8as v5 AMD (8 vCPUs, 64 GB RAM)" }, + @{ InstanceType = "Standard_D4ps_v5"; Cores = 4; Arch = "Arm64"; MeterName = "D4ps v5"; Description = "D4ps v5 Arm64 (4 vCPUs, 16 GB RAM)" }, + @{ InstanceType = "Standard_D8ps_v5"; Cores = 8; Arch = "Arm64"; MeterName = "D8ps v5"; Description = "D8ps v5 Arm64 (8 vCPUs, 32 GB RAM)" }, + @{ InstanceType = "Standard_E4ps_v5"; Cores = 4; Arch = "Arm64"; MeterName = "E4ps v5"; Description = "E4ps v5 Arm64 (4 vCPUs, 32 GB RAM)" }, + @{ InstanceType = "Standard_B2s"; Cores = 2; Arch = "Intel"; MeterName = "B2s"; Description = "B2s Burstable (2 vCPUs, 4 GB RAM)" }, + @{ InstanceType = "Standard_F4s_v2"; Cores = 4; Arch = "Intel"; MeterName = "F4s v2"; Description = "F4s v2 Compute (4 vCPUs, 8 GB RAM)" } ) } AWS = @{ - ServiceProviderName = "Amazon Web Services" - InvoiceIssuerName = "Amazon Web Services" - HostProviderName = "Amazon Web Services" - BillingAccountType = "Management Account" - SubAccountType = "Member Account" - BillingCurrency = "USD" + ProviderName = "Amazon Web Services" + ServiceProviderName = "Amazon Web Services" + InvoiceIssuerName = "Amazon Web Services" + HostProviderName = "Amazon Web Services" + BillingAccountType = "Management Account" + SubAccountType = "Member Account" + BillingCurrency = "USD" BillingAccountAgreement = "AWS Customer Agreement" - Regions = @( + Regions = @( @{ Id = "us-east-1"; Name = "US East (N. Virginia)" }, @{ Id = "us-west-2"; Name = "US West (Oregon)" }, @{ Id = "eu-west-1"; Name = "Europe (Ireland)" }, @{ Id = "ap-southeast-1"; Name = "Asia Pacific (Singapore)" } ) - Services = @( + Services = @( @{ Name = "Amazon EC2"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 35; CostMin = 50; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Amazon EKS"; Category = "Compute"; Subcategory = "Containers"; Weight = 18; CostMin = 100; CostMax = 2500; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Amazon RDS"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 12; CostMin = 30; CostMax = 800; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Amazon S3"; Category = "Storage"; Subcategory = "Object Storage"; Weight = 12; CostMin = 5; CostMax = 300; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, @{ Name = "Amazon Redshift"; Category = "Analytics"; Subcategory = "Data Warehouses"; Weight = 8; CostMin = 50; CostMax = 1000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, - @{ Name = "Amazon DynamoDB"; Category = "Databases"; Subcategory = "NoSQL Databases"; Weight = 6; CostMin = 10; CostMax = 400; PricingUnit = "RCUs"; ConsumedUnit = "Read Capacity Units"; PricingBlockSize = 1; PricingUnitDescription = "1 Read Capacity Unit" }, + @{ Name = "Amazon DynamoDB"; Category = "Databases"; Subcategory = "NoSQL"; Weight = 6; CostMin = 10; CostMax = 400; PricingUnit = "RCUs"; ConsumedUnit = "Read Capacity Units"; PricingBlockSize = 1; PricingUnitDescription = "1 Read Capacity Unit" }, @{ Name = "Amazon CloudFront"; Category = "Networking"; Subcategory = "Content Delivery"; Weight = 4; CostMin = 5; CostMax = 150; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, - @{ Name = "AWS Lambda"; Category = "Compute"; Subcategory = "Serverless Compute"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Requests"; ConsumedUnit = "1M Requests"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Requests" }, - @{ Name = "Amazon SQS"; Category = "Integration"; Subcategory = "Messaging"; Weight = 2; CostMin = 0.50; CostMax = 20; PricingUnit = "Requests"; ConsumedUnit = "1M Requests"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Requests" } + @{ Name = "AWS Lambda"; Category = "Compute"; Subcategory = "Functions"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Requests"; ConsumedUnit = "1M Requests"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Requests" }, + @{ Name = "Amazon SQS"; Category = "Integration"; Subcategory = "Other (Integration)"; Weight = 2; CostMin = 0.50; CostMax = 20; PricingUnit = "Requests"; ConsumedUnit = "1M Requests"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Requests" } ) - ResourceTypes = @("AWS::EC2::Instance", "AWS::S3::Bucket", "AWS::RDS::DBInstance", "AWS::EKS::Cluster", "AWS::DynamoDB::Table", "AWS::Lambda::Function") + ResourceTypes = @("AWS::EC2::Instance", "AWS::S3::Bucket", "AWS::RDS::DBInstance", "AWS::EKS::Cluster", "AWS::DynamoDB::Table", "AWS::Lambda::Function") } GCP = @{ - ServiceProviderName = "Google Cloud" - InvoiceIssuerName = "Google Cloud" - HostProviderName = "Google Cloud" - BillingAccountType = "Billing Account" - SubAccountType = "Project" - BillingCurrency = "USD" + ProviderName = "Google Cloud" + ServiceProviderName = "Google Cloud" + InvoiceIssuerName = "Google Cloud" + HostProviderName = "Google Cloud" + BillingAccountType = "Billing Account" + SubAccountType = "Project" + BillingCurrency = "USD" BillingAccountAgreement = "Google Cloud Agreement" - Regions = @( + Regions = @( @{ Id = "us-central1"; Name = "Iowa" }, @{ Id = "us-east1"; Name = "South Carolina" }, @{ Id = "europe-west1"; Name = "Belgium" }, @{ Id = "asia-east1"; Name = "Taiwan" } ) - Services = @( + Services = @( @{ Name = "Compute Engine"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 35; CostMin = 50; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Google Kubernetes Engine"; Category = "Compute"; Subcategory = "Containers"; Weight = 20; CostMin = 100; CostMax = 2500; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Cloud SQL"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 12; CostMin = 30; CostMax = 700; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Cloud Storage"; Category = "Storage"; Subcategory = "Object Storage"; Weight = 12; CostMin = 5; CostMax = 250; PricingUnit = "GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, @{ Name = "BigQuery"; Category = "Analytics"; Subcategory = "Data Warehouses"; Weight = 10; CostMin = 20; CostMax = 800; PricingUnit = "TB Scanned"; ConsumedUnit = "TB"; PricingBlockSize = 1; PricingUnitDescription = "1 TB Scanned" }, - @{ Name = "Cloud Spanner"; Category = "Databases"; Subcategory = "Distributed Databases"; Weight = 5; CostMin = 50; CostMax = 500; PricingUnit = "Node-Hours"; ConsumedUnit = "Node Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Node-Hour" }, - @{ Name = "Cloud Run"; Category = "Compute"; Subcategory = "Serverless Containers"; Weight = 3; CostMin = 5; CostMax = 100; PricingUnit = "vCPU-Seconds"; ConsumedUnit = "vCPU Seconds"; PricingBlockSize = 1; PricingUnitDescription = "1 vCPU-Second" }, - @{ Name = "Cloud Functions"; Category = "Compute"; Subcategory = "Serverless Compute"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Invocations"; ConsumedUnit = "1M Invocations"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Invocations" } + @{ Name = "Cloud Spanner"; Category = "Databases"; Subcategory = "Other (Databases)"; Weight = 5; CostMin = 50; CostMax = 500; PricingUnit = "Node-Hours"; ConsumedUnit = "Node Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Node-Hour" }, + @{ Name = "Cloud Run"; Category = "Compute"; Subcategory = "Containers"; Weight = 3; CostMin = 5; CostMax = 100; PricingUnit = "vCPU-Seconds"; ConsumedUnit = "vCPU Seconds"; PricingBlockSize = 1; PricingUnitDescription = "1 vCPU-Second" }, + @{ Name = "Cloud Functions"; Category = "Compute"; Subcategory = "Functions"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Invocations"; ConsumedUnit = "1M Invocations"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Invocations" } ) - ResourceTypes = @("compute.googleapis.com/Instance", "storage.googleapis.com/Bucket", "sql.googleapis.com/Instance", "container.googleapis.com/Cluster", "bigquery.googleapis.com/Dataset") + ResourceTypes = @("compute.googleapis.com/Instance", "storage.googleapis.com/Bucket", "sql.googleapis.com/Instance", "container.googleapis.com/Cluster", "bigquery.googleapis.com/Dataset") } DataCenter = @{ - ServiceProviderName = "Internal IT" - InvoiceIssuerName = "Internal IT" - HostProviderName = "On-Premises" - BillingAccountType = "Cost Center" - SubAccountType = "Business Unit" - BillingCurrency = "USD" + ProviderName = "Internal IT" + ServiceProviderName = "Internal IT" + InvoiceIssuerName = "Internal IT" + HostProviderName = "On-Premises" + BillingAccountType = "Cost Center" + SubAccountType = "Business Unit" + BillingCurrency = "USD" BillingAccountAgreement = "Internal SLA" - Regions = @( + Regions = @( @{ Id = "dc-us-east"; Name = "US East Data Center" }, @{ Id = "dc-eu-west"; Name = "EU West Data Center" }, @{ Id = "dc-apac"; Name = "APAC Data Center" } ) - Services = @( - @{ Name = "Physical Servers"; Category = "Compute"; Subcategory = "Bare Metal"; Weight = 30; CostMin = 200; CostMax = 5000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + Services = @( + @{ Name = "Physical Servers"; Category = "Compute"; Subcategory = "Other (Compute)"; Weight = 30; CostMin = 200; CostMax = 5000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "VMware vSphere"; Category = "Compute"; Subcategory = "Virtual Machines"; Weight = 25; CostMin = 100; CostMax = 2000; PricingUnit = "Hours"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, - @{ Name = "Oracle Database"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 15; CostMin = 500; CostMax = 10000; PricingUnit = "Processor Licenses"; ConsumedUnit = "Processor Licenses"; PricingBlockSize = 1; PricingUnitDescription = "1 Processor License" }, + @{ Name = "Oracle Database"; Category = "Databases"; Subcategory = "Relational Databases"; Weight = 15; CostMin = 500; CostMax = 5000; PricingUnit = "Processor Licenses"; ConsumedUnit = "Processor Licenses"; PricingBlockSize = 1; PricingUnitDescription = "1 Processor License" }, @{ Name = "SAN Storage"; Category = "Storage"; Subcategory = "Block Storage"; Weight = 12; CostMin = 50; CostMax = 1500; PricingUnit = "TB"; ConsumedUnit = "TB"; PricingBlockSize = 1; PricingUnitDescription = "1 TB" }, @{ Name = "Network Infrastructure"; Category = "Networking"; Subcategory = "Network Infrastructure"; Weight = 10; CostMin = 20; CostMax = 500; PricingUnit = "Ports"; ConsumedUnit = "Ports"; PricingBlockSize = 1; PricingUnitDescription = "1 Port" }, @{ Name = "Facility Costs"; Category = "Other"; Subcategory = "Other"; Weight = 8; CostMin = 100; CostMax = 800; PricingUnit = "kWh"; ConsumedUnit = "kWh"; PricingBlockSize = 1; PricingUnitDescription = "1 kWh" } ) - ResourceTypes = @("server/physical", "storage/san", "database/oracle", "virtualization/vmware") + ResourceTypes = @("server/physical", "storage/san", "database/oracle", "virtualization/vmware") } } @@ -279,48 +327,77 @@ $ProviderConfigs = @{ # Helper Functions # ============================================================================ -function Get-RandomDecimal { +function Get-RandomDecimal +{ param( [decimal]$Min = 0.01, [decimal]$Max = 100.00 ) - return [math]::Round($Min + (Get-Random -Maximum ([int](($Max - $Min) * 100 + 1))) / 100, 10) + # Use [long] to avoid [int] overflow with large ranges + $range = [long](($Max - $Min) * 100 + 1) + if ($range -le 0) { $range = 1 } + # Get-Random -Maximum accepts [long] when > [int]::MaxValue + $randomOffset = if ($range -gt [int]::MaxValue) + { + Get-Random -Minimum 0 -Maximum ([int]($range / 2)) + Get-Random -Minimum 0 -Maximum ([int]($range - $range / 2)) + } + else + { + Get-Random -Maximum ([int]$range) + } + return [math]::Round($Min + $randomOffset / 100, 2) } -function Get-RandomElement { +function Get-RandomElement +{ param([array]$Array) return $Array[(Get-Random -Maximum $Array.Count)] } -function Get-WeightedRandomService { +function Get-WeightedRandomService +{ param([array]$Services) $totalWeight = ($Services | ForEach-Object { - if ($_.Weight) { $_.Weight } else { 1 } - } | Measure-Object -Sum).Sum + if ($_.Weight) { $_.Weight } else { 1 } + } | Measure-Object -Sum).Sum $randomValue = Get-Random -Maximum $totalWeight $cumulative = 0 - foreach ($service in $Services) { + foreach ($service in $Services) + { $weight = if ($service.Weight) { $service.Weight } else { 1 } $cumulative += $weight - if ($randomValue -lt $cumulative) { + if ($randomValue -lt $cumulative) + { return $service } } return $Services[-1] } -function Get-IsoDateTime { +function Get-IsoDateTime +{ param([datetime]$Date) return $Date.ToString("yyyy-MM-ddTHH:mm:ssZ") } +# Generate a 12-digit AWS-style account ID without int overflow +function New-AwsAccountId +{ + # Split into two 6-digit segments to avoid [int] overflow + # (12-digit numbers exceed [int]::MaxValue ~2.1B) + $part1 = Get-Random -Minimum 100000 -Maximum 999999 + $part2 = Get-Random -Minimum 100000 -Maximum 999999 + return "$part1$part2" +} + # ============================================================================ # Persistent Identity Generation # ============================================================================ -function New-ProviderIdentities { +function New-ProviderIdentities +{ param( [string]$Provider, [hashtable]$Config @@ -329,37 +406,43 @@ function New-ProviderIdentities { # 2-3 Billing Accounts per provider $billingAccountCount = Get-Random -Minimum 2 -Maximum 4 $billingAccounts = @() - for ($i = 1; $i -le $billingAccountCount; $i++) { - $baId = switch ($Provider) { - "Azure" { [guid]::NewGuid().ToString() } - "AWS" { "$(Get-Random -Minimum 100000000000 -Maximum 999999999999)" } - "GCP" { "ABCDEF-$((Get-Random -Minimum 100000 -Maximum 999999))-$((Get-Random -Minimum 100000 -Maximum 999999))" } + for ($i = 1; $i -le $billingAccountCount; $i++) + { + $baId = switch ($Provider) + { + "Azure" { [guid]::NewGuid().ToString() } + "AWS" { New-AwsAccountId } + "GCP" { "ABCDEF-$((Get-Random -Minimum 100000 -Maximum 999999))-$((Get-Random -Minimum 100000 -Maximum 999999))" } "DataCenter" { "CC-$(Get-Random -Minimum 10000 -Maximum 99999)" } } - $baName = switch ($Provider) { - "Azure" { "Contoso EA $i" } - "AWS" { "AWS Org Account $i" } - "GCP" { "GCP Billing Account $i" } + $baName = switch ($Provider) + { + "Azure" { "Contoso EA $i" } + "AWS" { "AWS Org Account $i" } + "GCP" { "GCP Billing Account $i" } "DataCenter" { "IT Cost Center $i" } } $billingAccounts += @{ Id = $baId; Name = $baName } } # 4-8 Sub-Accounts per provider with realistic names - $subAccountNames = switch ($Provider) { - "Azure" { @("Production Subscription", "Staging Subscription", "Development Subscription", "Shared Services Subscription", "Data Platform Subscription", "Security Subscription", "Networking Subscription", "App Team A Subscription") } - "AWS" { @("prod-workloads", "staging-env", "dev-sandbox", "shared-services", "data-lake", "security-tools", "networking", "app-team-b") } - "GCP" { @("prod-services", "staging-services", "dev-playground", "shared-infra", "analytics-platform", "ml-experiments", "networking", "frontend-apps") } + $subAccountNames = switch ($Provider) + { + "Azure" { @("Production Subscription", "Staging Subscription", "Development Subscription", "Shared Services Subscription", "Data Platform Subscription", "Security Subscription", "Networking Subscription", "App Team A Subscription") } + "AWS" { @("prod-workloads", "staging-env", "dev-sandbox", "shared-services", "data-lake", "security-tools", "networking", "app-team-b") } + "GCP" { @("prod-services", "staging-services", "dev-playground", "shared-infra", "analytics-platform", "ml-experiments", "networking", "frontend-apps") } "DataCenter" { @("Engineering", "Finance", "Operations", "Research", "Marketing", "IT Infrastructure", "Human Resources", "Executive") } } $subAccountCount = Get-Random -Minimum 4 -Maximum ([math]::Min(9, $subAccountNames.Count + 1)) $subAccounts = @() - for ($i = 0; $i -lt $subAccountCount; $i++) { + for ($i = 0; $i -lt $subAccountCount; $i++) + { $saName = $subAccountNames[$i] - $saId = switch ($Provider) { - "Azure" { "/subscriptions/$([guid]::NewGuid().ToString())" } - "AWS" { "$(Get-Random -Minimum 100000000000 -Maximum 999999999999)" } - "GCP" { "proj-$($Provider.ToLower())-$(Get-Random -Minimum 10000 -Maximum 99999)" } + $saId = switch ($Provider) + { + "Azure" { "/subscriptions/$([guid]::NewGuid().ToString())" } + "AWS" { New-AwsAccountId } + "GCP" { "proj-$($Provider.ToLower())-$(Get-Random -Minimum 10000 -Maximum 99999)" } "DataCenter" { "BU-$(Get-Random -Minimum 100 -Maximum 999)" } } $subAccounts += @{ Id = $saId; Name = $saName; BillingAccount = $billingAccounts[$i % $billingAccounts.Count] } @@ -367,19 +450,21 @@ function New-ProviderIdentities { # Billing profile IDs (consistent per provider) $billingProfileIds = @() - for ($i = 1; $i -le 3; $i++) { + for ($i = 1; $i -le 3; $i++) + { $billingProfileIds += "BP-$(Get-Random -Minimum 10000 -Maximum 99999)" } # Resource Groups $resourceGroups = @("rg-production-001", "rg-staging-001", "rg-development-001", "rg-data-platform", - "rg-shared-services", "rg-networking", "rg-security", "rg-analytics", - "rg-app-team-a", "rg-app-team-b", "rg-ml-training", "rg-monitoring") + "rg-shared-services", "rg-networking", "rg-security", "rg-analytics", + "rg-app-team-a", "rg-app-team-b", "rg-ml-training", "rg-monitoring") # Pre-generate a pool of persistent resources $resourceCount = Get-Random -Minimum 150 -Maximum 400 $resources = @() - for ($i = 1; $i -le $resourceCount; $i++) { + for ($i = 1; $i -le $resourceCount; $i++) + { $service = Get-WeightedRandomService -Services $Config.Services $region = Get-RandomElement -Array $Config.Regions $resourceType = Get-RandomElement -Array $Config.ResourceTypes @@ -387,10 +472,11 @@ function New-ProviderIdentities { $rg = Get-RandomElement -Array $resourceGroups $shortId = ([guid]::NewGuid().ToString()).Substring(0, 8) - $resourceId = switch ($Provider) { - "Azure" { "$($subAccount.Id)/resourceGroups/$rg/providers/$resourceType/$shortId" } - "AWS" { "arn:aws:$(($resourceType -split '::')[1].ToLower()):$($region.Id):$($subAccount.Id):instance/i-$shortId" } - "GCP" { "//$(($resourceType -split '/')[0])/projects/$($subAccount.Id)/zones/$($region.Id)-$(Get-RandomElement -Array @('a','b','c'))/instances/vm-$shortId" } + $resourceId = switch ($Provider) + { + "Azure" { "$($subAccount.Id)/resourceGroups/$rg/providers/$resourceType/$shortId" } + "AWS" { "arn:aws:$(($resourceType -split '::')[1].ToLower()):$($region.Id):$($subAccount.Id):instance/i-$shortId" } + "GCP" { "//$(($resourceType -split '/')[0])/projects/$($subAccount.Id)/zones/$($region.Id)-$(Get-RandomElement -Array @('a','b','c'))/instances/vm-$shortId" } "DataCenter" { "dc://$($region.Id)/$resourceType/$shortId" } } @@ -399,7 +485,8 @@ function New-ProviderIdentities { # Tags: ~80% of resources get tags, ~20% are untagged (for tag coverage analysis) $tagHash = @{} $hasTagsRoll = Get-Random -Maximum 100 - if ($hasTagsRoll -lt 80) { + if ($hasTagsRoll -lt 80) + { $tagHash = @{ "Environment" = Get-RandomElement -Array @("Production", "Staging", "Development", "Test") "Department" = Get-RandomElement -Array @("Engineering", "Finance", "Operations", "Marketing", "Sales", "Research") @@ -410,7 +497,8 @@ function New-ProviderIdentities { } # Azure-specific FinOps Hub tags on ~30% of tagged Azure resources - if ($Provider -eq "Azure" -and (Get-Random -Maximum 100) -lt 30) { + if ($Provider -eq "Azure" -and (Get-Random -Maximum 100) -lt 30) + { $hubStorageSuffix = Get-Random -Minimum 1000 -Maximum 9999 $tagHash["ftk-tool"] = "FinOps hubs" $tagHash["ftk-version"] = "0.8.0" @@ -418,31 +506,39 @@ function New-ProviderIdentities { } # AWS-specific tags - if ($Provider -eq "AWS") { + if ($Provider -eq "AWS") + { $tagHash["aws:createdBy"] = Get-RandomElement -Array @("CloudFormation", "Terraform", "CDK", "Console") } # GCP-specific tags - if ($Provider -eq "GCP") { + if ($Provider -eq "GCP") + { $tagHash["goog-dm"] = Get-RandomElement -Array @("deployment-mgr", "terraform", "gcloud-cli") } } # VM SKU assignment (Azure VMs get specific instance types) $vmSku = $null - if ($Provider -eq "Azure" -and $resourceType -eq "microsoft.compute/virtualmachines" -and $Config.VmSkus) { + if ($Provider -eq "Azure" -and $resourceType -eq "microsoft.compute/virtualmachines" -and $Config.VmSkus) + { $vmSku = Get-RandomElement -Array $Config.VmSkus } # AHB eligibility: Azure VMs and SQL with ~40% eligible $ahbEligible = $false $ahbLicenseType = $null - if ($Provider -eq "Azure" -and $resourceType -in @("microsoft.compute/virtualmachines", "microsoft.sql/servers")) { - if ((Get-Random -Maximum 100) -lt 40) { + if ($Provider -eq "Azure" -and $resourceType -in @("microsoft.compute/virtualmachines", "microsoft.sql/servers")) + { + if ((Get-Random -Maximum 100) -lt 40) + { $ahbEligible = $true - $ahbLicenseType = if ($resourceType -eq "microsoft.sql/servers") { + $ahbLicenseType = if ($resourceType -eq "microsoft.sql/servers") + { "SQL Server" - } else { + } + else + { Get-RandomElement -Array @("Windows Server", "Windows Server", "SUSE Linux", "RHEL Linux") } } @@ -477,24 +573,31 @@ function New-ProviderIdentities { # Pre-generate commitment discounts (multi-cloud) $commitments = @() - $commitmentCount = switch ($Provider) { - "Azure" { Get-Random -Minimum 8 -Maximum 16 } - "AWS" { Get-Random -Minimum 5 -Maximum 12 } - "GCP" { Get-Random -Minimum 3 -Maximum 8 } + $commitmentCount = switch ($Provider) + { + "Azure" { Get-Random -Minimum 8 -Maximum 16 } + "AWS" { Get-Random -Minimum 5 -Maximum 12 } + "GCP" { Get-Random -Minimum 3 -Maximum 8 } "DataCenter" { 0 } } - for ($i = 1; $i -le $commitmentCount; $i++) { + for ($i = 1; $i -le $commitmentCount; $i++) + { $commitType = Get-RandomElement -Array @("Reservation", "Savings Plan") - $commitId = switch ($Provider) { - "Azure" { - if ($commitType -eq "Reservation") { + $commitId = switch ($Provider) + { + "Azure" + { + if ($commitType -eq "Reservation") + { "/providers/Microsoft.Capacity/reservationOrders/$([guid]::NewGuid().ToString())" - } else { + } + else + { "/providers/Microsoft.BillingBenefits/savingsPlanOrders/$([guid]::NewGuid().ToString())" } } - "AWS" { "arn:aws:savingsplans::$(Get-Random -Minimum 100000000000 -Maximum 999999999999):savingsplan/sp-$([guid]::NewGuid().ToString().Substring(0,8))" } - "GCP" { "projects/test-project/commitments/$([guid]::NewGuid().ToString().Substring(0,8))" } + "AWS" { "arn:aws:savingsplans::$(New-AwsAccountId):savingsplan/sp-$([guid]::NewGuid().ToString().Substring(0,8))" } + "GCP" { "projects/test-project/commitments/$([guid]::NewGuid().ToString().Substring(0,8))" } default { "" } } $skuOrderId = [guid]::NewGuid().ToString() @@ -539,12 +642,15 @@ function New-ProviderIdentities { # Row Generation # ============================================================================ -function New-FocusRow { +function New-FocusRow +{ param( [string]$Provider, [datetime]$ChargeDate, [hashtable]$Config, [hashtable]$Identity, + [double]$ScaleFactor, + [version]$FocusVer, [switch]$IncludeCommitments, [switch]$IncludeHybridBenefit ) @@ -560,7 +666,7 @@ function New-FocusRow { $monthIndex = (($ChargeDate.Year - $StartDate.Year) * 12 + $ChargeDate.Month - $StartDate.Month) $trendFactor = 1.0 + ($monthIndex * 0.02) # 2% growth per month $jitter = 0.80 + (Get-Random -Maximum 41) / 100.0 # 0.80 to 1.20 - $listCost = [math]::Round($res.BaseDailyCost * $trendFactor * $jitter, 10) + $listCost = [math]::Round($res.BaseDailyCost * $trendFactor * $jitter * $ScaleFactor, 2) if ($listCost -lt 0.01) { $listCost = 0.01 } # On-demand cost: same as list cost (before any discounts) @@ -568,10 +674,11 @@ function New-FocusRow { # Negotiated/EA discount: 5-30% off list for ~60% of rows $negotiatedDiscountPct = 0 - if ((Get-Random -Maximum 100) -lt 60) { + if ((Get-Random -Maximum 100) -lt 60) + { $negotiatedDiscountPct = Get-Random -Minimum 5 -Maximum 31 } - $contractedCost = [math]::Round($listCost * (100 - $negotiatedDiscountPct) / 100, 10) + $contractedCost = [math]::Round($listCost * (100 - $negotiatedDiscountPct) / 100, 2) $billedCost = $contractedCost $effectiveCost = $contractedCost @@ -586,27 +693,30 @@ function New-FocusRow { # Charge category distribution: 85% Usage, 8% Purchase, 3% Tax, 2% Credit, 2% Adjustment $catRoll = Get-Random -Maximum 100 $chargeCategory = if ($catRoll -lt 85) { "Usage" } - elseif ($catRoll -lt 93) { "Purchase" } - elseif ($catRoll -lt 96) { "Tax" } - elseif ($catRoll -lt 98) { "Credit" } - else { "Adjustment" } + elseif ($catRoll -lt 93) { "Purchase" } + elseif ($catRoll -lt 96) { "Tax" } + elseif ($catRoll -lt 98) { "Credit" } + else { "Adjustment" } # ChargeClass: mostly null, ~3% are corrections $chargeClass = $null - if ((Get-Random -Maximum 100) -lt 3) { + if ((Get-Random -Maximum 100) -lt 3) + { $chargeClass = "Correction" } # ChargeFrequency based on ChargeCategory - $chargeFrequency = switch ($chargeCategory) { + $chargeFrequency = switch ($chargeCategory) + { "Purchase" { Get-RandomElement -Array @("One-Time", "Recurring") } - "Tax" { "Recurring" } - "Credit" { "One-Time" } - default { "Usage-Based" } + "Tax" { "Recurring" } + "Credit" { "One-Time" } + default { "Usage-Based" } } # Credits and Adjustments are negative - if ($chargeCategory -in @("Credit", "Adjustment")) { + if ($chargeCategory -in @("Credit", "Adjustment")) + { $listCost = -[math]::Abs($listCost) * 0.1 # Credits are ~10% of normal costs $contractedCost = $listCost $billedCost = $listCost @@ -633,7 +743,8 @@ function New-FocusRow { # 30% chance of commitment-covered usage (Azure/AWS/GCP - not DataCenter) if ($IncludeCommitments -and $Provider -ne "DataCenter" -and $chargeCategory -eq "Usage" -and - $Identity.Commitments.Count -gt 0 -and (Get-Random -Maximum 100) -lt 30) { + $Identity.Commitments.Count -gt 0 -and (Get-Random -Maximum 100) -lt 30) + { $commitment = Get-RandomElement -Array $Identity.Commitments $commitmentDiscountId = $commitment.Id @@ -645,13 +756,16 @@ function New-FocusRow { $pricingCategory = "Committed" # 85% utilization - most are Used, some Unused - if ((Get-Random -Maximum 100) -lt 85) { + if ((Get-Random -Maximum 100) -lt 85) + { $commitmentDiscountStatus = "Used" - $effectiveCost = [math]::Round($contractedCost * 0.40, 10) # 60% savings on contracted + $effectiveCost = [math]::Round($contractedCost * 0.40, 2) # 60% savings on contracted $billedCost = 0 # Prepaid - } else { + } + else + { $commitmentDiscountStatus = "Unused" - $effectiveCost = [math]::Round($contractedCost * 0.60, 10) # Wasted commitment + $effectiveCost = [math]::Round($contractedCost * 0.60, 2) # Wasted commitment $billedCost = $effectiveCost } @@ -661,7 +775,8 @@ function New-FocusRow { # === COMMITMENT PURCHASE ROWS (for invoicing page) === if ($IncludeCommitments -and $Provider -ne "DataCenter" -and $chargeCategory -eq "Purchase" -and - $Identity.Commitments.Count -gt 0 -and (Get-Random -Maximum 100) -lt 50) { + $Identity.Commitments.Count -gt 0 -and (Get-Random -Maximum 100) -lt 50) + { $commitment = Get-RandomElement -Array $Identity.Commitments $commitmentDiscountId = $commitment.Id @@ -675,12 +790,14 @@ function New-FocusRow { # === SPOT / DYNAMIC INSTANCE SIMULATION === $spotEligibleServices = @("Virtual Machines", "Azure Kubernetes Service", "Amazon EC2", "Amazon EKS", "Compute Engine", "Google Kubernetes Engine", "VMware vSphere") - if ($chargeCategory -eq "Usage" -and $null -eq $commitmentDiscountId -and $service.Name -in $spotEligibleServices) { - if ((Get-Random -Maximum 100) -lt 15) { - $pricingCategory = "Dynamic" # FOCUS spec uses "Dynamic", not "Spot" + if ($chargeCategory -eq "Usage" -and $null -eq $commitmentDiscountId -and $service.Name -in $spotEligibleServices) + { + if ((Get-Random -Maximum 100) -lt 15) + { + $pricingCategory = "Dynamic" $spotDiscount = Get-Random -Minimum 60 -Maximum 90 - $effectiveCost = [math]::Round($listCost * (100 - $spotDiscount) / 100, 10) + $effectiveCost = [math]::Round($listCost * (100 - $spotDiscount) / 100, 2) $billedCost = $effectiveCost $contractedCost = $effectiveCost } @@ -697,40 +814,49 @@ function New-FocusRow { $x_SkuLicenseType = $null $x_SkuDescription = $null - if ($Provider -eq "Azure") { - # Set VM SKU details for all Azure compute resources - if ($res.VmSku) { + if ($Provider -eq "Azure") + { + if ($res.VmSku) + { $vmSku = $res.VmSku $x_SkuInstanceType = $vmSku.InstanceType $x_SkuCoreCount = $vmSku.Cores $x_SkuDescription = $vmSku.Description $x_SkuMeterCategory = "Virtual Machines" $x_SkuMeterSubcategory = "$($vmSku.InstanceType) Series" - $x_SkuMeterName = $vmSku.MeterName # Contains architecture pattern (D4s, D4as, D4ps) - } elseif ($service.Category -eq "Compute") { + $x_SkuMeterName = $vmSku.MeterName + } + elseif ($service.Category -eq "Compute") + { $x_SkuMeterCategory = $service.Name $x_SkuMeterSubcategory = $service.Subcategory $x_SkuDescription = "$($service.Name) - Standard" - } elseif ($service.Category -eq "Databases") { + } + elseif ($service.Category -eq "Databases") + { $x_SkuMeterCategory = $service.Name $x_SkuMeterSubcategory = "Compute" $x_SkuDescription = "$($service.Name) - Standard Tier" - } else { + } + else + { $x_SkuDescription = "$($service.Name) - $($service.Subcategory)" } # AHB columns - if ($res.AhbEligible -and $chargeCategory -eq "Usage") { + if ($res.AhbEligible -and $chargeCategory -eq "Usage") + { $x_SkuLicenseType = $res.AhbLicenseType - # 60% of eligible resources have AHB enabled - if ((Get-Random -Maximum 100) -lt 60) { + if ((Get-Random -Maximum 100) -lt 60) + { $x_SkuLicenseStatus = "Enabled" $x_SkuLicenseQuantity = if ($res.VmSku) { $res.VmSku.Cores } else { Get-RandomElement -Array @(2, 4, 8, 16) } - # AHB savings: ~40% on license cost - $licenseSavings = [math]::Round([math]::Abs($effectiveCost) * 0.40, 10) - $effectiveCost = [math]::Max(0.01, [math]::Round($effectiveCost - $licenseSavings, 10)) - } else { + $licenseSavings = [math]::Round([math]::Abs($effectiveCost) * 0.40, 2) + $effectiveCost = [math]::Max(0.01, [math]::Round($effectiveCost - $licenseSavings, 2)) + } + else + { $x_SkuLicenseStatus = "Not Enabled" $x_SkuLicenseQuantity = 0 } @@ -738,31 +864,49 @@ function New-FocusRow { } # === UNIT PRICES (derived from costs / quantity) === - $listUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($listCost / $pricingQuantity, 10) } else { 0 } - $contractedUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($contractedCost / $pricingQuantity, 10) } else { 0 } - $effectiveUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($effectiveCost / $pricingQuantity, 10) } else { 0 } - $billedUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($billedCost / $pricingQuantity, 10) } else { 0 } - $onDemandUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($onDemandCost / $pricingQuantity, 10) } else { 0 } + # Calculated AFTER all cost modifications to maintain FOCUS cost invariants: + # ListCost = ListUnitPrice * PricingQuantity + # ContractedCost = ContractedUnitPrice * PricingQuantity + $listUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($listCost / $pricingQuantity, 2) } else { 0 } + $contractedUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($contractedCost / $pricingQuantity, 2) } else { 0 } + $effectiveUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($effectiveCost / $pricingQuantity, 2) } else { 0 } + $billedUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($billedCost / $pricingQuantity, 2) } else { 0 } + $onDemandUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($onDemandCost / $pricingQuantity, 2) } else { 0 } # === DATA QUALITY ANOMALIES (~2% of rows) === + # Anomaly rows use ChargeClass=Correction to exempt them from FOCUS cost invariant rules. + # This is documented via x_SourceChanges for the data quality dashboard page. $x_SourceChanges = $null $qualityRoll = Get-Random -Maximum 100 - if ($qualityRoll -eq 0) { - # Effective > Contracted (anomaly) - $effectiveCost = [math]::Round($contractedCost * 1.1, 10) + if ($qualityRoll -eq 0) + { + # Effective > Contracted (anomaly) - mark as Correction + $effectiveCost = [math]::Round($contractedCost * 1.1, 2) + $effectiveUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($effectiveCost / $pricingQuantity, 2) } else { 0 } $x_SourceChanges = "CostAdjustment" - } elseif ($qualityRoll -eq 1) { - # Contracted > List (anomaly) - $contractedCost = [math]::Round($listCost * 1.05, 10) + $chargeClass = "Correction" + } + elseif ($qualityRoll -eq 1) + { + # Contracted > List (anomaly) - mark as Correction + $contractedCost = [math]::Round($listCost * 1.05, 2) + $contractedUnitPrice = if ($pricingQuantity -ne 0) { [math]::Round($contractedCost / $pricingQuantity, 2) } else { 0 } $x_SourceChanges = "PriceCorrection" + $chargeClass = "Correction" } # Invoice ID: stable per billing period - $invoiceKey = "$($billingAccount.Id)-$($billingPeriodStart.ToString('yyyyMM'))" - if (-not $Identity.InvoiceIds.ContainsKey($invoiceKey)) { - $Identity.InvoiceIds[$invoiceKey] = "INV-$($billingPeriodStart.ToString('yyyyMM'))-$(Get-Random -Minimum 10000 -Maximum 99999)" + # Credits and adjustments typically do not have invoices (FOCUS spec compliance) + $invoiceId = $null + if ($chargeCategory -notin @("Credit", "Adjustment") -or $chargeClass -eq "Correction") + { + $invoiceKey = "$($billingAccount.Id)-$($billingPeriodStart.ToString('yyyyMM'))" + if (-not $Identity.InvoiceIds.ContainsKey($invoiceKey)) + { + $Identity.InvoiceIds[$invoiceKey] = "INV-$($billingPeriodStart.ToString('yyyyMM'))-$(Get-Random -Minimum 10000 -Maximum 99999)" + } + $invoiceId = $Identity.InvoiceIds[$invoiceKey] } - $invoiceId = $Identity.InvoiceIds[$invoiceKey] # Tags as JSON $tagsJson = if ($res.Tags.Count -gt 0) { ($res.Tags | ConvertTo-Json -Compress) } else { '{}' } @@ -771,105 +915,127 @@ function New-FocusRow { $isMarketplace = [bool]$service.IsMarketplace $publisherName = if ($isMarketplace) { Get-RandomElement -Array $Identity.MarketplacePublishers } else { $Config.ServiceProviderName } - return [PSCustomObject]@{ - # ===================== Mandatory FOCUS columns ===================== - BilledCost = $billedCost - BillingAccountId = $billingAccount.Id - BillingAccountName = $billingAccount.Name - BillingAccountType = $Config.BillingAccountType - BillingCurrency = $Config.BillingCurrency - BillingPeriodEnd = Get-IsoDateTime -Date $billingPeriodEnd - BillingPeriodStart = Get-IsoDateTime -Date $billingPeriodStart - ChargeCategory = $chargeCategory - ChargeClass = $chargeClass - ChargeDescription = "$($service.Name) usage in $($region.Name)" - ChargeFrequency = $chargeFrequency - ChargePeriodEnd = Get-IsoDateTime -Date $chargePeriodEnd - ChargePeriodStart = Get-IsoDateTime -Date $chargePeriodStart - ContractedCost = $contractedCost - EffectiveCost = $effectiveCost - InvoiceIssuerName = $Config.InvoiceIssuerName - ListCost = $listCost - PricingQuantity = $pricingQuantity - PricingUnit = $service.PricingUnit - ServiceProviderName = $Config.ServiceProviderName - - # ===================== Conditional FOCUS columns ===================== - AvailabilityZone = $az - CommitmentDiscountCategory = $commitmentDiscountCategory - CommitmentDiscountId = $commitmentDiscountId - CommitmentDiscountName = $commitmentDiscountName - CommitmentDiscountQuantity = $commitmentDiscountQuantity - CommitmentDiscountStatus = $commitmentDiscountStatus - CommitmentDiscountType = $commitmentDiscountType - CommitmentDiscountUnit = $commitmentDiscountUnit - ConsumedQuantity = $consumedQuantity - ConsumedUnit = $service.ConsumedUnit - ContractedUnitPrice = $contractedUnitPrice - HostProviderName = $Config.HostProviderName - InvoiceId = $invoiceId - ListUnitPrice = $listUnitPrice - PricingCategory = $pricingCategory - RegionId = $region.Id - RegionName = $region.Name - ResourceId = $res.ResourceId - ResourceName = $res.ResourceName - ResourceType = $res.ResourceType - ServiceCategory = $service.Category - ServiceName = $service.Name - ServiceSubcategory = $service.Subcategory - SkuId = $res.SkuId - SkuPriceId = $res.SkuPriceId - SubAccountId = $subAccount.Id - SubAccountName = $subAccount.Name - SubAccountType = $Config.SubAccountType - Tags = $tagsJson - - # ===================== FinOps Hub / Dashboard required columns ===================== - ProviderName = $Config.ServiceProviderName - x_BillingAccountId = $billingAccount.Id - x_BillingAccountAgreement = $Config.BillingAccountAgreement - x_BillingProfileId = Get-RandomElement -Array $Identity.BillingProfileIds - x_ResourceGroupName = $res.ResourceGroup - x_ResourceType = $res.ResourceType - - # Publisher - PublisherName = $publisherName - x_PublisherCategory = if ($isMarketplace) { "Marketplace" } else { $Provider } - - # Unit prices (dashboard discount analysis) - x_EffectiveUnitPrice = $effectiveUnitPrice - x_BilledUnitPrice = $billedUnitPrice - x_OnDemandCost = $onDemandCost - x_OnDemandUnitPrice = $onDemandUnitPrice - - # SKU columns (dashboard SKU cost analysis, CPU architecture, AHB) - x_SkuDescription = $x_SkuDescription - x_SkuInstanceType = $x_SkuInstanceType - x_SkuCoreCount = $x_SkuCoreCount - x_SkuMeterCategory = $x_SkuMeterCategory - x_SkuMeterSubcategory = $x_SkuMeterSubcategory - x_SkuMeterName = $x_SkuMeterName - x_SkuMeterId = $res.SkuMeterId - x_SkuOfferId = if ($Provider -eq "Azure") { "MS-AZR-0017P" } else { $null } - x_SkuLicenseStatus = $x_SkuLicenseStatus - x_SkuLicenseQuantity = $x_SkuLicenseQuantity - x_SkuLicenseType = $x_SkuLicenseType - - # Commitment linkage (dashboard invoicing/utilization) - x_SkuOrderId = $x_SkuOrderId - x_SkuTerm = $x_SkuTerm - - # Pricing detail (dashboard data quality page) - x_PricingBlockSize = $service.PricingBlockSize - x_PricingUnitDescription = $service.PricingUnitDescription - - # Data quality / metadata - x_SourceChanges = $x_SourceChanges - x_CloudProvider = $Provider - x_FocusVersion = $FocusVersion - x_IngestionTime = Get-IsoDateTime -Date (Get-Date) + # Build the row as an ordered hashtable, then select version-appropriate columns + $row = [ordered]@{ + # ===================== Mandatory FOCUS columns (all versions) ===================== + BilledCost = $billedCost + BillingAccountId = $billingAccount.Id + BillingAccountName = $billingAccount.Name + BillingCurrency = $Config.BillingCurrency + BillingPeriodEnd = Get-IsoDateTime -Date $billingPeriodEnd + BillingPeriodStart = Get-IsoDateTime -Date $billingPeriodStart + ChargeCategory = $chargeCategory + ChargeClass = $chargeClass + ChargeDescription = "$($service.Name) usage in $($region.Name)" + ChargeFrequency = $chargeFrequency + ChargePeriodEnd = Get-IsoDateTime -Date $chargePeriodEnd + ChargePeriodStart = Get-IsoDateTime -Date $chargePeriodStart + ContractedCost = $contractedCost + EffectiveCost = $effectiveCost + InvoiceIssuerName = $Config.InvoiceIssuerName + ListCost = $listCost + ListUnitPrice = $listUnitPrice + PricingQuantity = $pricingQuantity + PricingUnit = $service.PricingUnit + # ProviderName is mandatory in FOCUS v1.0-v1.2; renamed to ServiceProviderName in v1.3 + ProviderName = $Config.ProviderName + RegionId = $region.Id + RegionName = $region.Name + ResourceId = $res.ResourceId + ResourceName = $res.ResourceName + ResourceType = $res.ResourceType + ServiceCategory = $service.Category + ServiceName = $service.Name + SkuId = $res.SkuId + SkuPriceId = $res.SkuPriceId + SubAccountId = $subAccount.Id + SubAccountName = $subAccount.Name + Tags = $tagsJson + + # ===================== Conditional FOCUS columns (all versions) ===================== + AvailabilityZone = $az + CommitmentDiscountCategory = $commitmentDiscountCategory + CommitmentDiscountId = $commitmentDiscountId + CommitmentDiscountName = $commitmentDiscountName + CommitmentDiscountStatus = $commitmentDiscountStatus + CommitmentDiscountType = $commitmentDiscountType + ConsumedQuantity = $consumedQuantity + ConsumedUnit = $service.ConsumedUnit + ContractedUnitPrice = $contractedUnitPrice + PricingCategory = $pricingCategory + ServiceSubcategory = $service.Subcategory + } + + # === Version-specific FOCUS columns === + + # v1.1+ columns + if ($FocusVer -ge [version]'1.1') + { + $row['CommitmentDiscountQuantity'] = $commitmentDiscountQuantity + $row['CommitmentDiscountUnit'] = $commitmentDiscountUnit + } + + # v1.2+ columns + if ($FocusVer -ge [version]'1.2') + { + $row['BillingAccountType'] = $Config.BillingAccountType + $row['InvoiceId'] = $invoiceId + $row['SubAccountType'] = $Config.SubAccountType } + + # v1.3+ columns (ServiceProviderName replaces ProviderName) + if ($FocusVer -ge [version]'1.3') + { + $row['HostProviderName'] = $Config.HostProviderName + $row['ServiceProviderName'] = $Config.ServiceProviderName + } + + # ===================== FinOps Hub / Dashboard required columns ===================== + # These x_ prefixed columns are always included for FinOps Hub dashboard compatibility + $row['x_BillingAccountId'] = $billingAccount.Id + $row['x_BillingAccountAgreement'] = $Config.BillingAccountAgreement + $row['x_BillingProfileId'] = Get-RandomElement -Array $Identity.BillingProfileIds + $row['x_ResourceGroupName'] = $res.ResourceGroup + $row['x_ResourceType'] = $res.ResourceType + + # Publisher + $row['PublisherName'] = $publisherName + $row['x_PublisherCategory'] = if ($isMarketplace) { "Marketplace" } else { $Provider } + + # Unit prices (dashboard discount analysis) + $row['x_EffectiveUnitPrice'] = $effectiveUnitPrice + $row['x_BilledUnitPrice'] = $billedUnitPrice + $row['x_OnDemandCost'] = $onDemandCost + $row['x_OnDemandUnitPrice'] = $onDemandUnitPrice + + # SKU columns + $row['x_SkuDescription'] = $x_SkuDescription + $row['x_SkuInstanceType'] = $x_SkuInstanceType + $row['x_SkuCoreCount'] = $x_SkuCoreCount + $row['x_SkuMeterCategory'] = $x_SkuMeterCategory + $row['x_SkuMeterSubcategory'] = $x_SkuMeterSubcategory + $row['x_SkuMeterName'] = $x_SkuMeterName + $row['x_SkuMeterId'] = $res.SkuMeterId + $row['x_SkuOfferId'] = if ($Provider -eq "Azure") { "MS-AZR-0017P" } else { $null } + $row['x_SkuLicenseStatus'] = $x_SkuLicenseStatus + $row['x_SkuLicenseQuantity'] = $x_SkuLicenseQuantity + $row['x_SkuLicenseType'] = $x_SkuLicenseType + + # Commitment linkage + $row['x_SkuOrderId'] = $x_SkuOrderId + $row['x_SkuTerm'] = $x_SkuTerm + + # Pricing detail + $row['x_PricingBlockSize'] = $service.PricingBlockSize + $row['x_PricingUnitDescription'] = $service.PricingUnitDescription + + # Data quality / metadata + $row['x_SourceChanges'] = $x_SourceChanges + $row['x_CloudProvider'] = $Provider + $row['x_FocusVersion'] = $FocusVersion + $row['x_IngestionTime'] = Get-IsoDateTime -Date (Get-Date) + + return [PSCustomObject]$row } # ============================================================================ @@ -877,14 +1043,17 @@ function New-FocusRow { # ============================================================================ Write-Host ("=" * 70) -ForegroundColor Cyan -Write-Host "FinOps Hub Multi-Cloud FOCUS Test Data Generator" -ForegroundColor Cyan +Write-Host "FinOps Hub Multi-Cloud FOCUS Test Data Generator v4.0" -ForegroundColor Cyan Write-Host ("=" * 70) -ForegroundColor Cyan Write-Host "" # Determine providers and row distribution -$providers = if ($CloudProvider -eq "All") { +$providers = if ($CloudProvider -eq "All") +{ @("Azure", "AWS", "GCP", "DataCenter") -} else { +} +else +{ @($CloudProvider) } @@ -897,7 +1066,8 @@ $providerWeights = @{ } # If single provider, 100% goes to it -if ($providers.Count -eq 1) { +if ($providers.Count -eq 1) +{ $providerWeights = @{ $providers[0] = 1.0 } } @@ -912,18 +1082,46 @@ Write-Host " Total Row Target: $([string]::Format('{0:N0}', $TotalRowTarget))" Write-Host " Total Budget: `$$([string]::Format('{0:N0}', $TotalBudget)) USD" Write-Host " Output Format: $OutputFormat" Write-Host " Output Path: $OutputPath" +if ($PSBoundParameters.ContainsKey('Seed')) { Write-Host " Random Seed: $Seed" } Write-Host "" # Create output directory -if (-not (Test-Path $OutputPath)) { - New-Item -ItemType Directory -Path $OutputPath -Force | Out-Null - Write-Host "Created output directory: $OutputPath" -ForegroundColor Green +if (-not (Test-Path $OutputPath)) +{ + if ($PSCmdlet.ShouldProcess($OutputPath, 'Create output directory')) + { + New-Item -ItemType Directory -Path $OutputPath -Force | Out-Null + Write-Host "Created output directory: $OutputPath" -ForegroundColor Green + } } +# === BUDGET SCALE FACTOR === +# Estimate average cost per row from config, then compute scale factor to hit target budget. +# This eliminates the Python post-processing step - costs are scaled inline during generation. +$estimatedTotalCost = [double]0 +foreach ($provider in $providers) +{ + $weight = if ($providerWeights.ContainsKey($provider)) { $providerWeights[$provider] } else { 1.0 / $providers.Count } + $providerRows = [int]($TotalRowTarget * $weight) + $config = $ProviderConfigs[$provider] + # Weighted average cost from service definitions + $totalServiceWeight = ($config.Services | ForEach-Object { if ($_.Weight) { $_.Weight } else { 1 } } | Measure-Object -Sum).Sum + $weightedAvg = [double]0 + foreach ($svc in $config.Services) + { + $svcWeight = if ($svc.Weight) { $svc.Weight } else { 1 } + $svcAvg = ($svc.CostMin + $svc.CostMax) / 2 + $weightedAvg += ($svcAvg * $svcWeight / $totalServiceWeight) + } + $estimatedTotalCost += $providerRows * $weightedAvg +} +$budgetScaleFactor = if ($estimatedTotalCost -gt 0) { [double]$TotalBudget / $estimatedTotalCost } else { 1.0 } + # Pre-generate identities for each provider Write-Host "Pre-generating persistent identities..." -ForegroundColor Yellow $providerIdentities = @{} -foreach ($provider in $providers) { +foreach ($provider in $providers) +{ $providerIdentities[$provider] = New-ProviderIdentities -Provider $provider -Config $ProviderConfigs[$provider] $resCount = $providerIdentities[$provider].Resources.Count $saCount = $providerIdentities[$provider].SubAccounts.Count @@ -934,12 +1132,13 @@ foreach ($provider in $providers) { Write-Host "" $totalRows = 0 -$allProviderCosts = @{} # Running sum of EffectiveCost per provider -$allProviderRowCounts = @{} # Row count per provider -$allProviderCsvPaths = @{} # Path to raw CSV per provider +$allProviderCosts = @{} +$allProviderRowCounts = @{} +$generatedFiles = @() # Generate rows for each provider - streaming to CSV to avoid OOM -foreach ($provider in $providers) { +foreach ($provider in $providers) +{ $weight = if ($providerWeights.ContainsKey($provider)) { $providerWeights[$provider] } else { 1.0 / $providers.Count } $providerTotalRows = [math]::Max(1, [int]($TotalRowTarget * $weight)) $dailyRowCount = [math]::Max(1, [int]($providerTotalRows / $totalDays)) @@ -948,7 +1147,8 @@ foreach ($provider in $providers) { $config = $ProviderConfigs[$provider] $identity = $providerIdentities[$provider] - $providerCsvPath = Join-Path $OutputPath "_raw_$($provider.ToLower()).csv" + $baseFileName = "focus-$($provider.ToLower())-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" + $csvFilePath = Join-Path $OutputPath "$baseFileName.csv" $providerCostSum = [double]0 $headerWritten = $false @@ -956,31 +1156,40 @@ foreach ($provider in $providers) { $rowsGenerated = 0 $lastPct = -1 - while ($currentDate -le $EndDate -and $rowsGenerated -lt $providerTotalRows) { + while ($currentDate -le $EndDate -and $rowsGenerated -lt $providerTotalRows) + { # Vary daily count slightly (+/- 10%) for realism $variance = [int]($dailyRowCount * 0.1) if ($variance -lt 1) { $variance = 1 } $todayCount = [math]::Max(1, $dailyRowCount + (Get-Random -Minimum (-$variance) -Maximum ($variance + 1))) # Don't exceed target - if ($rowsGenerated + $todayCount -gt $providerTotalRows) { + if ($rowsGenerated + $todayCount -gt $providerTotalRows) + { $todayCount = $providerTotalRows - $rowsGenerated } # Generate one day's rows in a small batch $dayRows = [System.Collections.Generic.List[PSCustomObject]]::new($todayCount) - for ($i = 0; $i -lt $todayCount; $i++) { - $row = New-FocusRow -Provider $provider -ChargeDate $currentDate -Config $config -Identity $identity -IncludeCommitments -IncludeHybridBenefit + for ($i = 0; $i -lt $todayCount; $i++) + { + $row = New-FocusRow -Provider $provider -ChargeDate $currentDate -Config $config -Identity $identity -ScaleFactor $budgetScaleFactor -FocusVer $focusMajorMinor -IncludeCommitments -IncludeHybridBenefit $providerCostSum += $row.EffectiveCost $dayRows.Add($row) } # Append daily batch to CSV (stream to disk, free memory) - if (-not $headerWritten) { - $dayRows | Export-Csv -Path $providerCsvPath -NoTypeInformation -Encoding UTF8 - $headerWritten = $true - } else { - $dayRows | Export-Csv -Path $providerCsvPath -NoTypeInformation -Encoding UTF8 -Append + if ($PSCmdlet.ShouldProcess($csvFilePath, "Write $todayCount rows")) + { + if (-not $headerWritten) + { + $dayRows | Export-Csv -Path $csvFilePath -NoTypeInformation -Encoding UTF8 + $headerWritten = $true + } + else + { + $dayRows | Export-Csv -Path $csvFilePath -NoTypeInformation -Encoding UTF8 -Append + } } $dayRows.Clear() $dayRows = $null @@ -990,142 +1199,45 @@ foreach ($provider in $providers) { # Progress indicator every 10% $pct = [math]::Floor($rowsGenerated / $providerTotalRows * 100) - if ($pct -ge $lastPct + 10) { + if ($pct -ge $lastPct + 10) + { $lastPct = $pct Write-Host " $provider : $pct% ($([string]::Format('{0:N0}', $rowsGenerated)) rows)" -ForegroundColor Gray } } - Write-Host " $provider : Generated $([string]::Format('{0:N0}', $rowsGenerated)) rows" -ForegroundColor Green + Write-Host " $provider : Generated $([string]::Format('{0:N0}', $rowsGenerated)) rows, `$$([string]::Format('{0:N2}', $providerCostSum))" -ForegroundColor Green + $generatedFiles += $csvFilePath $allProviderCosts[$provider] = $providerCostSum $allProviderRowCounts[$provider] = $rowsGenerated - $allProviderCsvPaths[$provider] = $providerCsvPath $totalRows += $rowsGenerated - # Force GC between providers to reclaim memory - [System.GC]::Collect() -} - -# ============================================================================ -# Budget Scaling (calculated from tracked sums, applied via Python/pandas) -# ============================================================================ - -$totalGeneratedCost = 0 -foreach ($provider in $providers) { - $totalGeneratedCost += $allProviderCosts[$provider] -} - -$scaleFactor = if ($totalGeneratedCost -gt 0) { $TotalBudget / $totalGeneratedCost } else { 1 } -Write-Host "" -Write-Host "Scaling costs by factor $([math]::Round($scaleFactor, 4)) to target budget `$$([string]::Format('{0:N0}', $TotalBudget))" -ForegroundColor Cyan - -# ============================================================================ -# Export - Budget scaling + Parquet conversion via Python/pandas (memory-safe) -# ============================================================================ - -$generatedFiles = @() - -foreach ($provider in $providers) { - $rawCsvPath = $allProviderCsvPaths[$provider] - $providerRowCount = $allProviderRowCounts[$provider] - $providerScaledCost = [math]::Round($allProviderCosts[$provider] * $scaleFactor, 2) - - $baseFileName = "focus-$($provider.ToLower())-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" - $csvFileName = "$baseFileName.csv" - $csvFilePath = Join-Path $OutputPath $csvFileName - $parquetFileName = "$baseFileName.parquet" - $parquetFilePath = Join-Path $OutputPath $parquetFileName - - Write-Host " $provider : $([string]::Format('{0:N0}', $providerRowCount)) rows, `$$([string]::Format('{0:N2}', $providerScaledCost)) USD" -ForegroundColor Green - - # Use Python/pandas to: apply budget scaling, output CSV/Parquet - $rawCsvPy = ($rawCsvPath -replace '\\', '/') - $csvOutPy = ($csvFilePath -replace '\\', '/') - $parquetOutPy = ($parquetFilePath -replace '\\', '/') - - # Write Python script to temp file (avoids heredoc piping issues with pwsh -File) - $pyTempFile = [System.IO.Path]::GetTempFileName() -replace '\.tmp$', '.py' - $pythonScript = @" -import pandas as pd, sys -try: - df = pd.read_csv('$rawCsvPy', low_memory=False) - sf = $scaleFactor - cost_cols = ['BilledCost','ContractedCost','EffectiveCost','ListCost', - 'ContractedUnitPrice','ListUnitPrice', - 'x_EffectiveUnitPrice','x_BilledUnitPrice', - 'x_OnDemandCost','x_OnDemandUnitPrice'] - for col in cost_cols: - if col in df.columns: - df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0) - df[col] = (df[col] * sf).round(10) - fmt = '$OutputFormat' - if fmt in ('CSV','Both'): - df.to_csv('$csvOutPy', index=False) - print('CSV_OK') - if fmt in ('Parquet','Both'): - df.to_parquet('$parquetOutPy', engine='pyarrow', compression='snappy', index=False) - print('PARQUET_OK') - if fmt == 'Parquet' and 'PARQUET_OK' not in locals().get('_printed', ''): - df.to_parquet('$parquetOutPy', engine='pyarrow', compression='snappy', index=False) - print('PARQUET_OK') - print('DONE') -except Exception as e: - print(f'ERROR: {e}') - sys.exit(1) -"@ - [System.IO.File]::WriteAllText($pyTempFile, $pythonScript) - $result = (python $pyTempFile 2>&1) -join "`n" - Remove-Item $pyTempFile -Force -ErrorAction SilentlyContinue - if ($result -match 'DONE') { - if ($result -match 'PARQUET_OK') { - Write-Host " Saved Parquet: $parquetFilePath" -ForegroundColor Gray - $generatedFiles += $parquetFilePath - } - if ($result -match 'CSV_OK') { - Write-Host " Saved CSV: $csvFilePath" -ForegroundColor Gray - $generatedFiles += $csvFilePath - } - } else { - Write-Host " Warning: Python scaling failed. Using raw CSV without scaling." -ForegroundColor Yellow - Write-Host " $result" -ForegroundColor Yellow - # Fall back: rename raw CSV as final CSV - Copy-Item $rawCsvPath $csvFilePath -Force - $generatedFiles += $csvFilePath - } - - # Clean up raw CSV (intermediate file) - if (Test-Path $rawCsvPath) { Remove-Item $rawCsvPath -Force } - # Generate manifest.json $manifestFilePath = Join-Path $OutputPath "manifest-$($provider.ToLower()).json" - - $dataFile = if ($OutputFormat -eq "Parquet" -or $OutputFormat -eq "Both") { "$baseFileName.parquet" } else { "$baseFileName.csv" } - $dataFilePath = Join-Path $OutputPath $dataFile - $fileSize = if (Test-Path $dataFilePath) { (Get-Item $dataFilePath).Length } else { 0 } - + $fileSize = if (Test-Path $csvFilePath) { (Get-Item $csvFilePath).Length } else { 0 } $manifest = @{ - exportConfig = @{ - exportName = "focus-$($provider.ToLower())-export" - resourceId = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/test-rg" - dataVersion = "1.0" - apiVersion = "2023-08-01" - type = "FocusCost" - timeFrame = "Custom" - granularity = "Daily" + exportConfig = @{ + exportName = "focus-$($Provider.ToLower())-export" + resourceId = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/test-rg" + dataVersion = "1.0" + apiVersion = "2023-08-01" + type = "FocusCost" + timeFrame = "Custom" + granularity = "Daily" } deliveryConfig = @{ - partitionData = $true - dataOverwriteBehavior = "OverwritePreviousReport" - fileFormat = if ($OutputFormat -eq "Parquet" -or $OutputFormat -eq "Both") { "Parquet" } else { "Csv" } - compressionMode = "Snappy" + partitionData = $true + dataOverwriteBehavior = "OverwritePreviousReport" + fileFormat = "Csv" + compressionMode = "None" } - blobs = @( + blobs = @( @{ - blobName = $dataFile + blobName = "$baseFileName.csv" byteCount = $fileSize } ) - runInfo = @{ + runInfo = @{ executionType = "Scheduled" submittedTime = (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssZ") runId = [guid]::NewGuid().ToString() @@ -1134,9 +1246,15 @@ except Exception as e: } } | ConvertTo-Json -Depth 5 - $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 - Write-Host " Saved manifest: $manifestFilePath" -ForegroundColor Gray - $generatedFiles += $manifestFilePath + if ($PSCmdlet.ShouldProcess($manifestFilePath, 'Create manifest file')) + { + $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 + Write-Host " Saved manifest: $manifestFilePath" -ForegroundColor Gray + $generatedFiles += $manifestFilePath + } + + # Force GC between providers to reclaim memory + [System.GC]::Collect() } # ============================================================================ @@ -1150,77 +1268,106 @@ Write-Host ("=" * 70) -ForegroundColor Cyan Write-Host "" Write-Host "Summary:" -ForegroundColor Yellow Write-Host " Total Rows Generated: $([string]::Format('{0:N0}', $totalRows))" -Write-Host " Total Cost: `$$([string]::Format('{0:N2}', $TotalBudget)) USD" +$actualTotal = ($allProviderCosts.Values | Measure-Object -Sum).Sum +Write-Host " Total Cost: `$$([string]::Format('{0:N2}', $actualTotal)) USD" +Write-Host " FOCUS Version: $FocusVersion" Write-Host " Output Format: $OutputFormat" Write-Host " Files Created: $($generatedFiles.Count)" Write-Host "" Write-Host "Provider Breakdown:" -ForegroundColor Yellow -foreach ($provider in $providers) { - $providerScaledCost = [math]::Round($allProviderCosts[$provider] * $scaleFactor, 2) +foreach ($provider in $providers) +{ + $providerCost = $allProviderCosts[$provider] $providerRowCount = $allProviderRowCounts[$provider] - Write-Host " $provider : $([string]::Format('{0:N0}', $providerRowCount)) rows | `$$([string]::Format('{0:N2}', $providerScaledCost))" + Write-Host " $provider : $([string]::Format('{0:N0}', $providerRowCount)) rows | `$$([string]::Format('{0:N2}', $providerCost))" } Write-Host "" Write-Host "Generated Files:" -ForegroundColor Yellow -foreach ($file in $generatedFiles) { - if (Test-Path $file) { +foreach ($file in $generatedFiles) +{ + if (Test-Path $file) + { $size = (Get-Item $file).Length / 1MB Write-Host " - $file ($([math]::Round($size, 2)) MB)" } } Write-Host "" -Write-Host "Dashboard Coverage:" -ForegroundColor Cyan +Write-Host "FOCUS Column Coverage (v$FocusVersion):" -ForegroundColor Cyan Write-Host " PricingCategory: Standard, Dynamic, Committed" Write-Host " CommitmentDiscountStatus: Used, Unused (with SkuOrderId/SkuTerm linkage)" Write-Host " CommitmentDiscountType: Reservation, Savings Plan (+ Purchase rows)" +if ($focusMajorMinor -ge [version]'1.1') { Write-Host " CommitmentDiscountQuantity/Unit: Included (v1.1+)" } +if ($focusMajorMinor -ge [version]'1.2') { Write-Host " BillingAccountType, SubAccountType, InvoiceId: Included (v1.2+)" } +if ($focusMajorMinor -ge [version]'1.3') { Write-Host " HostProviderName, ServiceProviderName: Included (v1.3+)" } Write-Host " Azure Hybrid Benefit: x_SkuLicenseStatus Enabled/Not Enabled" Write-Host " CPU Architecture: Intel/AMD/Arm64 patterns in x_SkuMeterName" Write-Host " Tag coverage: ~80% tagged, ~20% untagged (maturity scorecard)" -Write-Host " Marketplace: x_PublisherCategory = 'Marketplace'" -Write-Host " Data quality anomalies: ~2% rows with cost relationship issues" -Write-Host " Negotiated discounts: ~60% rows with ListCost > ContractedCost" -Write-Host " x_EffectiveUnitPrice, x_BilledUnitPrice, x_OnDemandCost/UnitPrice" -Write-Host " x_SkuDescription, x_SkuInstanceType, x_SkuCoreCount" -Write-Host " x_BillingAccountAgreement, x_PricingBlockSize, x_PricingUnitDescription" +Write-Host " Data quality anomalies: ~2% rows (ChargeClass=Correction, x_SourceChanges set)" +Write-Host " Note: Cost and Usage dataset only. Contract Commitment dataset not included." Write-Host "" # ============================================================================ # Upload to Azure Storage # ============================================================================ -if ($Upload -and $StorageAccountName) { +if ($Upload -and $StorageAccountName) +{ Write-Host "" Write-Host ("=" * 70) -ForegroundColor Cyan Write-Host "Uploading to Azure Storage..." -ForegroundColor Yellow Write-Host ("=" * 70) -ForegroundColor Cyan - # Get storage account key - $storageKey = $null - if ($ResourceGroupName) { - Write-Host " Getting storage account key..." -ForegroundColor Gray - $storageKey = (az storage account keys list --account-name $StorageAccountName --resource-group $ResourceGroupName --query "[0].value" -o tsv 2>$null) - if (-not $storageKey) { - Write-Host " Warning: Could not get storage key, falling back to default auth" -ForegroundColor Yellow + # Authentication: prefer Azure AD (--auth-mode login) unless -UseStorageKey + $authArgs = @() + if ($UseStorageKey) + { + if (-not $ResourceGroupName) + { + Write-Warning "-UseStorageKey requires -ResourceGroupName to retrieve the key." } + else + { + Write-Host " Getting storage account key..." -ForegroundColor Gray + $storageKey = (az storage account keys list --account-name $StorageAccountName --resource-group $ResourceGroupName --query "[0].value" -o tsv 2>$null) + if ($storageKey) + { + $authArgs = @("--account-key", $storageKey) + } + else + { + Write-Warning "Could not retrieve storage key, falling back to Azure AD auth." + $authArgs = @("--auth-mode", "login") + } + } + } + else + { + $authArgs = @("--auth-mode", "login") } # Start ADF triggers BEFORE uploading data - if ($StartTriggers -and $AdfName -and $ResourceGroupName) { + if ($StartTriggers -and $AdfName -and $ResourceGroupName) + { Write-Host "" - Write-Host ("=" * 70) -ForegroundColor Cyan Write-Host "Ensuring ADF Triggers are running (BEFORE upload)..." -ForegroundColor Yellow - Write-Host ("=" * 70) -ForegroundColor Cyan $triggers = @("msexports_ManifestAdded", "ingestion_ManifestAdded") - foreach ($trigger in $triggers) { - $state = (az datafactory trigger show --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --query "properties.runtimeState" -o tsv 2>$null) - if ($state -eq "Started") { - Write-Host " $trigger already running" -ForegroundColor Gray - } else { - Write-Host " Starting $trigger..." -ForegroundColor Cyan - az datafactory trigger start --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --only-show-errors 2>$null - Write-Host " $trigger started" -ForegroundColor Green + foreach ($trigger in $triggers) + { + if ($PSCmdlet.ShouldProcess($trigger, "Start ADF trigger")) + { + $state = (az datafactory trigger show --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --query "properties.runtimeState" -o tsv 2>$null) + if ($state -eq "Started") + { + Write-Host " $trigger already running" -ForegroundColor Gray + } + else + { + Write-Host " Starting $trigger..." -ForegroundColor Cyan + az datafactory trigger start --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --only-show-errors 2>$null + Write-Host " $trigger started" -ForegroundColor Green + } } } @@ -1233,22 +1380,24 @@ if ($Upload -and $StorageAccountName) { $runId = [guid]::NewGuid().ToString() $exportTime = (Get-Date).ToString("yyyyMMddHHmm") - foreach ($provider in $providers) { + foreach ($provider in $providers) + { $providerLower = $provider.ToLower() $baseFileName = "focus-$providerLower-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" - $fileExt = if ($OutputFormat -eq "Parquet" -or $OutputFormat -eq "Both") { ".parquet" } else { ".csv" } - $dataFile = "$baseFileName$fileExt" + $dataFile = "$baseFileName.csv" $dataFilePath = Join-Path $OutputPath $dataFile - if (-not (Test-Path $dataFilePath)) { + if (-not (Test-Path $dataFilePath)) + { Write-Host " Warning: $dataFilePath not found, skipping $provider" -ForegroundColor Yellow continue } $fileSize = (Get-Item $dataFilePath).Length - if ($providerLower -eq "azure") { + if ($providerLower -eq "azure") + { # Azure: msexports with Cost Management folder structure $container = "msexports" $scopeId = "subscriptions/00000000-0000-0000-0000-000000000000" @@ -1263,7 +1412,7 @@ if ($Upload -and $StorageAccountName) { byteCount = $fileSize blobCount = 1 dataRowCount = $allProviderRowCounts[$provider] - exportConfig = @{ + exportConfig = @{ exportName = $exportName resourceId = "/$scopeId/providers/Microsoft.CostManagement/exports/$exportName" dataVersion = "1.0r2" @@ -1272,22 +1421,22 @@ if ($Upload -and $StorageAccountName) { timeFrame = "Custom" granularity = "Daily" } - deliveryConfig = @{ - partitionData = $true - dataOverwriteBehavior = "OverwritePreviousReport" - fileFormat = if ($fileExt -eq ".parquet") { "Parquet" } else { "Csv" } - compressionMode = if ($fileExt -eq ".parquet") { "Snappy" } else { "None" } - containerUri = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/$StorageAccountName" - rootFolderPath = "" + deliveryConfig = @{ + partitionData = $true + dataOverwriteBehavior = "OverwritePreviousReport" + fileFormat = "Csv" + compressionMode = "None" + containerUri = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/$StorageAccountName" + rootFolderPath = "" } - runInfo = @{ + runInfo = @{ executionType = "Scheduled" submittedTime = (Get-Date).ToString("yyyy-MM-ddTHH:mm:ss.fffffffZ") runId = $runId startDate = $StartDate.ToString("yyyy-MM-ddT00:00:00") endDate = $EndDate.ToString("yyyy-MM-ddT00:00:00") } - blobs = @( + blobs = @( @{ blobName = $blobPath byteCount = $fileSize @@ -1299,16 +1448,16 @@ if ($Upload -and $StorageAccountName) { $manifestFilePath = Join-Path $OutputPath "manifest-$providerLower.json" $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 - Write-Host " Uploading $provider to msexports container..." -ForegroundColor Cyan - if ($storageKey) { - az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null - az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null - } else { - az storage blob upload --account-name $StorageAccountName --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null - az storage blob upload --account-name $StorageAccountName --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + if ($PSCmdlet.ShouldProcess("$container/$blobPath", "Upload to Azure Storage")) + { + Write-Host " Uploading $provider to msexports container..." -ForegroundColor Cyan + az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null + az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null } - } else { + } + else + { # AWS/GCP/DataCenter: ingestion container $container = "ingestion" $scopePath = "$providerLower/test-account" @@ -1326,13 +1475,11 @@ if ($Upload -and $StorageAccountName) { $manifestFilePath = Join-Path $OutputPath "manifest-$providerLower.json" $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 - Write-Host " Uploading $provider to ingestion container..." -ForegroundColor Cyan - if ($storageKey) { - az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null - az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null - } else { - az storage blob upload --account-name $StorageAccountName --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null - az storage blob upload --account-name $StorageAccountName --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + if ($PSCmdlet.ShouldProcess("$container/$blobPath", "Upload to Azure Storage")) + { + Write-Host " Uploading $provider to ingestion container..." -ForegroundColor Cyan + az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null + az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null } } @@ -1345,11 +1492,10 @@ if ($Upload -and $StorageAccountName) { Write-Host "Upload Complete! $uploadedCount providers uploaded." -ForegroundColor Green # Verify ADF pipeline execution - if ($StartTriggers -and $AdfName -and $ResourceGroupName) { + if ($StartTriggers -and $AdfName -and $ResourceGroupName) + { Write-Host "" - Write-Host ("=" * 70) -ForegroundColor Cyan Write-Host "Verifying ADF Pipeline Execution..." -ForegroundColor Yellow - Write-Host ("=" * 70) -ForegroundColor Cyan Write-Host " Waiting 15 seconds for blob events to propagate..." -ForegroundColor Gray Start-Sleep -Seconds 15 @@ -1360,58 +1506,31 @@ if ($Upload -and $StorageAccountName) { $pipelineRuns = az datafactory pipeline-run query-by-factory --factory-name $AdfName --resource-group $ResourceGroupName --last-updated-after $checkFrom --last-updated-before $checkTo -o json 2>$null | ConvertFrom-Json - if ($pipelineRuns.value.Count -gt 0) { + if ($pipelineRuns.value.Count -gt 0) + { Write-Host " ADF pipelines triggered successfully!" -ForegroundColor Green - foreach ($run in $pipelineRuns.value) { + foreach ($run in $pipelineRuns.value) + { Write-Host " $($run.pipelineName) | $($run.status)" -ForegroundColor Gray } - } else { - Write-Host " Warning: No pipeline runs detected. Re-uploading manifests as safety net..." -ForegroundColor Yellow - - foreach ($provider in $providers) { - $providerLower = $provider.ToLower() - $manifestFilePath = Join-Path $OutputPath "manifest-$providerLower.json" - if (-not (Test-Path $manifestFilePath)) { continue } - - if ($providerLower -eq "azure") { - $container = "msexports" - $scopeId = "subscriptions/00000000-0000-0000-0000-000000000000" - $exportName = "focus-cost-export" - $dateRange = "$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" - $blobFolder = "$scopeId/$exportName/$dateRange/$exportTime/$runId" - $manifestBlobPath = "$blobFolder/manifest.json" - } else { - $container = "ingestion" - $scopePath = "$providerLower/test-account" - $blobFolder = "Costs/$($EndDate.ToString('yyyy'))/$($EndDate.ToString('MM'))/$scopePath" - $manifestBlobPath = "$blobFolder/manifest.json" - } - - Write-Host " Re-uploading $container/$manifestBlobPath" -ForegroundColor Cyan - if ($storageKey) { - az storage blob upload --account-name $StorageAccountName --account-key $storageKey --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null - } else { - az storage blob upload --account-name $StorageAccountName --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null - } - } - Write-Host " Manifests re-uploaded. Pipelines should trigger shortly." -ForegroundColor Green } - - Write-Host "" - Write-Host "ADF triggers are running. Data will be processed automatically." -ForegroundColor Green - } elseif ($StartTriggers) { + else + { + Write-Host " No pipeline runs detected yet. Manifests may trigger shortly." -ForegroundColor Yellow + } + } + elseif ($StartTriggers) + { Write-Host "" Write-Host "Warning: -StartTriggers requires -AdfName and -ResourceGroupName" -ForegroundColor Yellow - } else { - Write-Host "" - Write-Host "Next Steps:" -ForegroundColor Yellow - Write-Host " 1. Ensure ADF triggers are started BEFORE uploading data:" - Write-Host " az datafactory trigger start --factory-name --resource-group --name msexports_ManifestAdded" - Write-Host " az datafactory trigger start --factory-name --resource-group --name ingestion_ManifestAdded" - Write-Host " 2. Then upload data (manifest upload fires BlobCreated event)" - Write-Host " 3. Or re-run with -StartTriggers -AdfName -ResourceGroupName " } -} else { +} +elseif ($Upload) +{ + Write-Warning "-Upload requires -StorageAccountName." +} +else +{ Write-Host "" Write-Host "Next Steps:" -ForegroundColor Yellow Write-Host " 1. Run with -Upload -StorageAccountName to upload automatically" @@ -1422,11 +1541,3 @@ if ($Upload -and $StorageAccountName) { } Write-Host "" -Write-Host "FOCUS Specification Notes:" -ForegroundColor Cyan -Write-Host " - Data follows FOCUS v$FocusVersion column definitions" -Write-Host " - ServiceProviderName replaces ProviderName (deprecated in 1.3)" -Write-Host " - HostProviderName added for multi-cloud visibility" -Write-Host " - Custom columns use x_ prefix per FOCUS spec" -Write-Host " - CommitmentDiscountCategory: Reservation=Usage, SavingsPlan=Spend (per FOCUS)" -Write-Host " - PricingCategory: Standard (on-demand), Dynamic (spot), Committed (reserved)" -Write-Host "" diff --git a/src/templates/finops-hub/test/README.md b/src/templates/finops-hub/test/README.md new file mode 100644 index 000000000..9686f11e0 --- /dev/null +++ b/src/templates/finops-hub/test/README.md @@ -0,0 +1,64 @@ +# FinOps Hub Test Data + +This directory contains test data generation utilities for validating FinOps Hub deployments. + +## Generate-MultiCloudTestData.ps1 + +Generates synthetic multi-cloud cost data in [FOCUS format](https://focus.finops.org/) for Azure, AWS, GCP, and on-premises data center scenarios. + +### Prerequisites + +- PowerShell 7.0 or later +- Azure CLI (for upload to storage) +- Azure AD credentials with Storage Blob Data Contributor role (for upload) + +### Quick Start + +```powershell +# Generate 6 months of FOCUS 1.3 data for all providers (500K rows, $500K budget) +./Generate-MultiCloudTestData.ps1 + +# Generate a small reproducible dataset +./Generate-MultiCloudTestData.ps1 -Seed 42 -TotalRowTarget 1000 + +# Generate Azure-only FOCUS 1.0 data +./Generate-MultiCloudTestData.ps1 -FocusVersion 1.0 -CloudProvider Azure -TotalRowTarget 50000 + +# Generate and upload using Azure AD auth +./Generate-MultiCloudTestData.ps1 -Upload -StorageAccountName "stfinopshub" -AdfName "adf-finopshub" -StartTriggers +``` + +### Features + +| Feature | Description | +|---------|-------------| +| Multi-cloud | Azure, AWS, GCP, and Data Center providers | +| FOCUS versions | 1.0, 1.1, 1.2, 1.3 with version-specific column sets | +| Budget scaling | Inline cost scaling to hit target budget (no Python dependency) | +| Commitment discounts | Reservations and Savings Plans with Used/Unused status | +| Azure Hybrid Benefit | x_SkuLicenseStatus Enabled/Not Enabled simulation | +| CPU architecture | Intel/AMD/Arm64 patterns in x_SkuMeterName | +| Tag variation | ~80% tagged, ~20% untagged for maturity scorecard testing | +| Data quality | ~2% anomaly rows with ChargeClass=Correction | +| Reproducibility | `-Seed` parameter for deterministic output | +| Azure AD auth | Default `--auth-mode login` for uploads (storage keys opt-in via `-UseStorageKey`) | +| ShouldProcess | Full `-WhatIf` / `-Confirm` support for destructive operations | + +### Parameters + +Run `Get-Help ./Generate-MultiCloudTestData.ps1 -Detailed` for the full parameter reference. + +### FOCUS Specification Compliance + +The script generates the **Cost and Usage** dataset. The Contract Commitment dataset (introduced in v1.3) is not included. + +Column sets vary by FOCUS version: + +| Version | Mandatory | Conditional | Total | +|---------|-----------|-------------|-------| +| 1.0 | 32 | 11 | 43 | +| 1.1 | 32 | 13 | 45 | +| 1.2 | 35 | 13 | 48 | +| 1.3 | 37 | 13 | 50 | + +Plus FinOps Hub-specific `x_` prefixed extension columns for dashboard compatibility. From 9dbdc0f63f21c3f3d3c1e411b2e8f42a58e4bcbc Mon Sep 17 00:00:00 2001 From: Zach Olinske Date: Mon, 16 Feb 2026 13:21:59 +0100 Subject: [PATCH 3/5] fix: resolve PSScriptAnalyzer warnings - Add SuppressMessage attributes on internal New-* helper functions (New-AwsAccountId, New-ProviderIdentity, New-FocusRow create in-memory objects, not system state changes) - Rename New-ProviderIdentities -> New-ProviderIdentity (singular noun) - Gate AHB simulation with IncludeHybridBenefit switch (was declared but never checked, causing PSReviewUnusedParameter warning) - Ran Invoke-ScriptAnalyzer: 0 errors, 0 warnings (excluding WriteHost) --- .../finops-hub/test/Generate-MultiCloudTestData.ps1 | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 index 00fed11e2..10aafbdcd 100644 --- a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 +++ b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 @@ -385,6 +385,8 @@ function Get-IsoDateTime # Generate a 12-digit AWS-style account ID without int overflow function New-AwsAccountId { + [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] + param() # Split into two 6-digit segments to avoid [int] overflow # (12-digit numbers exceed [int]::MaxValue ~2.1B) $part1 = Get-Random -Minimum 100000 -Maximum 999999 @@ -396,8 +398,9 @@ function New-AwsAccountId # Persistent Identity Generation # ============================================================================ -function New-ProviderIdentities +function New-ProviderIdentity { + [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] param( [string]$Provider, [hashtable]$Config @@ -644,6 +647,7 @@ function New-ProviderIdentities function New-FocusRow { + [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] param( [string]$Provider, [datetime]$ChargeDate, @@ -844,7 +848,7 @@ function New-FocusRow } # AHB columns - if ($res.AhbEligible -and $chargeCategory -eq "Usage") + if ($IncludeHybridBenefit -and $res.AhbEligible -and $chargeCategory -eq "Usage") { $x_SkuLicenseType = $res.AhbLicenseType @@ -1122,7 +1126,7 @@ Write-Host "Pre-generating persistent identities..." -ForegroundColor Yellow $providerIdentities = @{} foreach ($provider in $providers) { - $providerIdentities[$provider] = New-ProviderIdentities -Provider $provider -Config $ProviderConfigs[$provider] + $providerIdentities[$provider] = New-ProviderIdentity -Provider $provider -Config $ProviderConfigs[$provider] $resCount = $providerIdentities[$provider].Resources.Count $saCount = $providerIdentities[$provider].SubAccounts.Count $baCount = $providerIdentities[$provider].BillingAccounts.Count From 5ab8e76dbcdcc38b26ff91d95414385c9228f973 Mon Sep 17 00:00:00 2001 From: Zach Olinske Date: Mon, 16 Feb 2026 19:41:50 +0100 Subject: [PATCH 4/5] fix: Address PR review feedback and expand test data generator - Added AllocatedResourceType column (FOCUS v1.3) - Populated ContractApplied with JSON for committed-discount rows (v1.3+) - Added split cost allocation simulation (~10% AKS/EKS/GKE rows) - Extracted ADF trigger names to reusable variable - Documented column emission per FOCUS version in summary output - Updated README with NukeTestData section, output formats, additional datasets - Removed .duplicate backup file --- .../test/Generate-MultiCloudTestData.ps1 | 1503 ++++++++++++++++- src/templates/finops-hub/test/README.md | 24 + 2 files changed, 1448 insertions(+), 79 deletions(-) diff --git a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 index 10aafbdcd..3700c0f75 100644 --- a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 +++ b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 @@ -14,6 +14,12 @@ - GCP (BigQuery FOCUS export simulation) - Data Center (On-premises infrastructure) + In addition, for Azure, it generates ALL FOCUS dataset types: + - Prices (Azure EA/MCA price sheet → Prices_raw → Prices_final_v1_2) + - CommitmentDiscountUsage (Reservation details → CommitmentDiscountUsage_raw) + - Recommendations (Reservation recommendations → Recommendations_raw) + - Transactions (Reservation transactions → Transactions_raw) + The generated data can be uploaded to Azure Storage for FinOps Hub ingestion testing. Features: @@ -64,10 +70,12 @@ dataset (introduced in v1.3) is not included. .PARAMETER OutputFormat - Output file format. Options: CSV, Both - Default: CSV - Note: Parquet output requires the PSParquet module (Install-Module PSParquet). - If PSParquet is not available and Parquet is requested, falls back to CSV. + Output file format. Options: CSV, Parquet + Default: Parquet + Parquet is recommended as it simulates real Cost Management export output. + Data is generated via per-day CSV streaming (to avoid OOM), then converted + to Parquet at the end. Requires the PSParquet module (Install-Module PSParquet). + CSV mode skips the final conversion and uploads raw CSV files. .PARAMETER StorageAccountName Azure Storage account name for upload. @@ -92,6 +100,17 @@ Random seed for reproducible test data generation. When specified, the same seed produces identical output (given the same parameters). +.PARAMETER NukeTestData + When specified, purges ALL data from the ADX cluster (Hub + Ingestion databases), + deletes all blobs from the msexports and ingestion storage containers, and removes + local test-data folder, then exits. Requires -AdxClusterUri and -StorageAccountName. + Optionally use -StartTriggers with -AdfName and -ResourceGroupName to stop ADF + triggers during cleanup. + +.PARAMETER AdxClusterUri + Azure Data Explorer cluster URI for NukeTestData operations. + Example: https://finopsdemoadx4uj6pm.italynorth.kusto.windows.net + .EXAMPLE .\Generate-MultiCloudTestData.ps1 # Generates 6 months of FOCUS 1.3 data for all providers, 500K rows, $500K budget @@ -112,6 +131,14 @@ .\Generate-MultiCloudTestData.ps1 -Seed 42 -TotalRowTarget 1000 # Generates reproducible test data with 1000 rows +.EXAMPLE + .\Generate-MultiCloudTestData.ps1 -NukeTestData -AdxClusterUri "https://finopsdemoadx4uj6pm.italynorth.kusto.windows.net" -StorageAccountName "stfinopsdemo4uj6pmwee34z" + # Purges ALL data from ADX tables, storage containers, and local test-data for a clean reset + +.EXAMPLE + .\Generate-MultiCloudTestData.ps1 -NukeTestData -AdxClusterUri "https://finopsdemoadx4uj6pm.italynorth.kusto.windows.net" -StorageAccountName "stfinopsdemo4uj6pmwee34z" -AdfName "adf-finopsdemo-4uj6pm" -ResourceGroupName "rg-finopshub-demo" -StartTriggers + # Full nuke: stop ADF triggers, purge ADX + storage + local, then re-start triggers after regen + .LINK https://github.com/microsoft/finops-toolkit @@ -146,8 +173,8 @@ param( [ValidateSet("1.0", "1.1", "1.2", "1.3")] [string]$FocusVersion = "1.3", - [ValidateSet("CSV", "Both")] - [string]$OutputFormat = "CSV", + [ValidateSet("CSV", "Parquet")] + [string]$OutputFormat = "Parquet", [string]$StorageAccountName, @@ -161,7 +188,13 @@ param( [switch]$UseStorageKey, - [int]$Seed + [int]$Seed, + + [Parameter(HelpMessage = "Nuke all test data: purge ADX tables, storage blobs, and local test-data folder, then exit.")] + [switch]$NukeTestData, + + [Parameter(HelpMessage = "ADX cluster URI for NukeTestData (e.g., https://mycluster.region.kusto.windows.net).")] + [string]$AdxClusterUri ) # ============================================================================ @@ -189,6 +222,11 @@ if ($EndDate -gt (Get-Date)) # Parse FOCUS version for column selection $focusMajorMinor = [version]$FocusVersion +# ADF trigger names used during upload and NukeTestData operations. +# These match the default FinOps Hub ADF trigger names. Override if your deployment +# uses custom trigger names. +$AdfTriggerNames = @('msexports_ManifestAdded', 'ingestion_ManifestAdded') + # ============================================================================ # Provider-Specific Configuration # ============================================================================ @@ -222,10 +260,13 @@ $ProviderConfigs = @{ @{ Name = "App Service"; Category = "Compute"; Subcategory = "Other (Compute)"; Weight = 4; CostMin = 10; CostMax = 300; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Key Vault"; Category = "Security"; Subcategory = "Other (Security)"; Weight = 2; CostMin = 1; CostMax = 50; PricingUnit = "10K Operations"; ConsumedUnit = "10K Operations"; PricingBlockSize = 10000; PricingUnitDescription = "10,000 Operations" }, @{ Name = "Virtual Network"; Category = "Networking"; Subcategory = "Network Infrastructure"; Weight = 2; CostMin = 5; CostMax = 100; PricingUnit = "1 GB"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB" }, + @{ Name = "Managed Disks"; Category = "Storage"; Subcategory = "Disk Storage"; Weight = 6; CostMin = 5; CostMax = 150; PricingUnit = "1 GB/Month"; ConsumedUnit = "GB"; PricingBlockSize = 1; PricingUnitDescription = "1 GB/Month" }, + @{ Name = "Public IP Addresses"; Category = "Networking"; Subcategory = "IP Addresses"; Weight = 3; CostMin = 3; CostMax = 30; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, + @{ Name = "Load Balancer"; Category = "Networking"; Subcategory = "Load Balancing"; Weight = 3; CostMin = 18; CostMax = 100; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour" }, @{ Name = "Azure Functions"; Category = "Compute"; Subcategory = "Functions"; Weight = 2; CostMin = 0.10; CostMax = 30; PricingUnit = "1M Executions"; ConsumedUnit = "1M Executions"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Executions" }, @{ Name = "Azure Marketplace"; Category = "Marketplace"; Subcategory = "Other (Compute)"; Weight = 5; CostMin = 10; CostMax = 500; PricingUnit = "1 Hour"; ConsumedUnit = "Hours"; PricingBlockSize = 1; PricingUnitDescription = "1 Hour"; IsMarketplace = $true } ) - ResourceTypes = @("microsoft.compute/virtualmachines", "microsoft.storage/storageaccounts", "microsoft.sql/servers", "microsoft.kusto/clusters", "microsoft.containerservice/managedclusters", "microsoft.documentdb/databaseaccounts", "microsoft.web/sites", "microsoft.keyvault/vaults", "microsoft.network/virtualnetworks") + ResourceTypes = @("microsoft.compute/virtualmachines", "microsoft.compute/disks", "microsoft.storage/storageaccounts", "microsoft.sql/servers", "microsoft.kusto/clusters", "microsoft.containerservice/managedclusters", "microsoft.documentdb/databaseaccounts", "microsoft.web/sites", "microsoft.keyvault/vaults", "microsoft.network/virtualnetworks", "microsoft.network/publicipaddresses", "microsoft.network/loadbalancers") VmSkus = @( @{ InstanceType = "Standard_D4s_v5"; Cores = 4; Arch = "Intel"; MeterName = "D4s v5"; Description = "D4s v5 (4 vCPUs, 16 GB RAM)" }, @{ InstanceType = "Standard_D8s_v5"; Cores = 8; Arch = "Intel"; MeterName = "D8s v5"; Description = "D8s v5 (8 vCPUs, 32 GB RAM)" }, @@ -239,7 +280,17 @@ $ProviderConfigs = @{ @{ InstanceType = "Standard_D8ps_v5"; Cores = 8; Arch = "Arm64"; MeterName = "D8ps v5"; Description = "D8ps v5 Arm64 (8 vCPUs, 32 GB RAM)" }, @{ InstanceType = "Standard_E4ps_v5"; Cores = 4; Arch = "Arm64"; MeterName = "E4ps v5"; Description = "E4ps v5 Arm64 (4 vCPUs, 32 GB RAM)" }, @{ InstanceType = "Standard_B2s"; Cores = 2; Arch = "Intel"; MeterName = "B2s"; Description = "B2s Burstable (2 vCPUs, 4 GB RAM)" }, - @{ InstanceType = "Standard_F4s_v2"; Cores = 4; Arch = "Intel"; MeterName = "F4s v2"; Description = "F4s v2 Compute (4 vCPUs, 8 GB RAM)" } + @{ InstanceType = "Standard_F4s_v2"; Cores = 4; Arch = "Intel"; MeterName = "F4s v2"; Description = "F4s v2 Compute (4 vCPUs, 8 GB RAM)" }, + # GPU N-series VMs (NC = training/inference, ND = distributed training, NV = visualization, NG = gaming/VDI) + @{ InstanceType = "Standard_NC4as_T4_v3"; Cores = 4; Arch = "GPU"; GpuType = "T4"; GpuCount = 1; MeterName = "NC4as T4 v3"; Description = "NC4as T4 v3 GPU (4 vCPUs, 28 GB, 1x T4)" }, + @{ InstanceType = "Standard_NC24ads_A100_v4"; Cores = 24; Arch = "GPU"; GpuType = "A100"; GpuCount = 1; MeterName = "NC24ads A100 v4"; Description = "NC24ads A100 v4 GPU (24 vCPUs, 220 GB, 1x A100)" }, + @{ InstanceType = "Standard_NC40ads_H100_v5"; Cores = 40; Arch = "GPU"; GpuType = "H100"; GpuCount = 1; MeterName = "NC40ads H100 v5"; Description = "NC40ads H100 v5 GPU (40 vCPUs, 320 GB, 1x H100)" }, + @{ InstanceType = "Standard_ND96asr_v4"; Cores = 96; Arch = "GPU"; GpuType = "A100"; GpuCount = 8; MeterName = "ND96asr v4"; Description = "ND96asr v4 GPU (96 vCPUs, 900 GB, 8x A100)" }, + @{ InstanceType = "Standard_ND96isr_H100_v5"; Cores = 96; Arch = "GPU"; GpuType = "H100"; GpuCount = 8; MeterName = "ND96isr H100 v5"; Description = "ND96isr H100 v5 GPU (96 vCPUs, 1900 GB, 8x H100)" }, + @{ InstanceType = "Standard_ND96isr_MI300X_v5"; Cores = 96; Arch = "GPU"; GpuType = "MI300X"; GpuCount = 8; MeterName = "ND96isr MI300X v5"; Description = "ND96isr MI300X v5 GPU (96 vCPUs, 1700 GB, 8x MI300X)" }, + @{ InstanceType = "Standard_NV36ads_A10_v5"; Cores = 36; Arch = "GPU"; GpuType = "A10"; GpuCount = 1; MeterName = "NV36ads A10 v5"; Description = "NV36ads A10 v5 GPU (36 vCPUs, 440 GB, 1x A10)" }, + @{ InstanceType = "Standard_NV36adms_A10_v5"; Cores = 36; Arch = "GPU"; GpuType = "A10"; GpuCount = 1; MeterName = "NV36adms A10 v5"; Description = "NV36adms A10 v5 GPU (36 vCPUs, 880 GB, 1x A10)" }, + @{ InstanceType = "Standard_NG8ads_V620_v1"; Cores = 8; Arch = "GPU"; GpuType = "V620"; GpuCount = 1; MeterName = "NG8ads V620 v1"; Description = "NG8ads V620 v1 GPU (8 vCPUs, 16 GB, 1/2 V620)" } ) } AWS = @{ @@ -269,6 +320,25 @@ $ProviderConfigs = @{ @{ Name = "Amazon SQS"; Category = "Integration"; Subcategory = "Other (Integration)"; Weight = 2; CostMin = 0.50; CostMax = 20; PricingUnit = "Requests"; ConsumedUnit = "1M Requests"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Requests" } ) ResourceTypes = @("AWS::EC2::Instance", "AWS::S3::Bucket", "AWS::RDS::DBInstance", "AWS::EKS::Cluster", "AWS::DynamoDB::Table", "AWS::Lambda::Function") + VmSkus = @( + # CPU instance types + @{ InstanceType = "m5.xlarge"; Cores = 4; Arch = "Intel"; MeterName = "m5.xlarge"; Description = "m5.xlarge (4 vCPUs, 16 GB)" }, + @{ InstanceType = "m5.2xlarge"; Cores = 8; Arch = "Intel"; MeterName = "m5.2xlarge"; Description = "m5.2xlarge (8 vCPUs, 32 GB)" }, + @{ InstanceType = "m6a.xlarge"; Cores = 4; Arch = "AMD"; MeterName = "m6a.xlarge"; Description = "m6a.xlarge AMD (4 vCPUs, 16 GB)" }, + @{ InstanceType = "m6a.2xlarge"; Cores = 8; Arch = "AMD"; MeterName = "m6a.2xlarge"; Description = "m6a.2xlarge AMD (8 vCPUs, 32 GB)" }, + @{ InstanceType = "m6g.xlarge"; Cores = 4; Arch = "Arm64"; MeterName = "m6g.xlarge"; Description = "m6g.xlarge Graviton (4 vCPUs, 16 GB)" }, + @{ InstanceType = "m6g.2xlarge"; Cores = 8; Arch = "Arm64"; MeterName = "m6g.2xlarge"; Description = "m6g.2xlarge Graviton (8 vCPUs, 32 GB)" }, + @{ InstanceType = "c5.2xlarge"; Cores = 8; Arch = "Intel"; MeterName = "c5.2xlarge"; Description = "c5.2xlarge Compute (8 vCPUs, 16 GB)" }, + @{ InstanceType = "r5.xlarge"; Cores = 4; Arch = "Intel"; MeterName = "r5.xlarge"; Description = "r5.xlarge Memory (4 vCPUs, 32 GB)" }, + # GPU instance types + @{ InstanceType = "p4d.24xlarge"; Cores = 96; Arch = "GPU"; GpuType = "A100"; GpuCount = 8; MeterName = "p4d.24xlarge"; Description = "p4d.24xlarge GPU (96 vCPUs, 1152 GB, 8x A100)" }, + @{ InstanceType = "p5.48xlarge"; Cores = 192; Arch = "GPU"; GpuType = "H100"; GpuCount = 8; MeterName = "p5.48xlarge"; Description = "p5.48xlarge GPU (192 vCPUs, 2048 GB, 8x H100)" }, + @{ InstanceType = "g5.xlarge"; Cores = 4; Arch = "GPU"; GpuType = "A10G"; GpuCount = 1; MeterName = "g5.xlarge"; Description = "g5.xlarge GPU (4 vCPUs, 16 GB, 1x A10G)" }, + @{ InstanceType = "g5.2xlarge"; Cores = 8; Arch = "GPU"; GpuType = "A10G"; GpuCount = 1; MeterName = "g5.2xlarge"; Description = "g5.2xlarge GPU (8 vCPUs, 32 GB, 1x A10G)" }, + @{ InstanceType = "g4dn.xlarge"; Cores = 4; Arch = "GPU"; GpuType = "T4"; GpuCount = 1; MeterName = "g4dn.xlarge"; Description = "g4dn.xlarge GPU (4 vCPUs, 16 GB, 1x T4)" }, + @{ InstanceType = "inf2.xlarge"; Cores = 4; Arch = "GPU"; GpuType = "Inferentia2"; GpuCount = 1; MeterName = "inf2.xlarge"; Description = "inf2.xlarge Inferentia (4 vCPUs, 32 GB, 1x Inf2)" }, + @{ InstanceType = "trn1.2xlarge"; Cores = 8; Arch = "GPU"; GpuType = "Trainium"; GpuCount = 1; MeterName = "trn1.2xlarge"; Description = "trn1.2xlarge Trainium (8 vCPUs, 32 GB, 1x Trn1)" } + ) } GCP = @{ ProviderName = "Google Cloud" @@ -296,6 +366,23 @@ $ProviderConfigs = @{ @{ Name = "Cloud Functions"; Category = "Compute"; Subcategory = "Functions"; Weight = 3; CostMin = 0.10; CostMax = 30; PricingUnit = "Invocations"; ConsumedUnit = "1M Invocations"; PricingBlockSize = 1000000; PricingUnitDescription = "1,000,000 Invocations" } ) ResourceTypes = @("compute.googleapis.com/Instance", "storage.googleapis.com/Bucket", "sql.googleapis.com/Instance", "container.googleapis.com/Cluster", "bigquery.googleapis.com/Dataset") + VmSkus = @( + # CPU instance types + @{ InstanceType = "n2-standard-4"; Cores = 4; Arch = "Intel"; MeterName = "n2-standard-4"; Description = "n2-standard-4 (4 vCPUs, 16 GB)" }, + @{ InstanceType = "n2-standard-8"; Cores = 8; Arch = "Intel"; MeterName = "n2-standard-8"; Description = "n2-standard-8 (8 vCPUs, 32 GB)" }, + @{ InstanceType = "n2d-standard-4"; Cores = 4; Arch = "AMD"; MeterName = "n2d-standard-4"; Description = "n2d-standard-4 AMD (4 vCPUs, 16 GB)" }, + @{ InstanceType = "n2d-standard-8"; Cores = 8; Arch = "AMD"; MeterName = "n2d-standard-8"; Description = "n2d-standard-8 AMD (8 vCPUs, 32 GB)" }, + @{ InstanceType = "t2a-standard-4"; Cores = 4; Arch = "Arm64"; MeterName = "t2a-standard-4"; Description = "t2a-standard-4 Arm (4 vCPUs, 16 GB)" }, + @{ InstanceType = "c2-standard-8"; Cores = 8; Arch = "Intel"; MeterName = "c2-standard-8"; Description = "c2-standard-8 Compute (8 vCPUs, 32 GB)" }, + @{ InstanceType = "e2-standard-4"; Cores = 4; Arch = "Intel"; MeterName = "e2-standard-4"; Description = "e2-standard-4 (4 vCPUs, 16 GB)" }, + # GPU instance types + @{ InstanceType = "a2-highgpu-1g"; Cores = 12; Arch = "GPU"; GpuType = "A100"; GpuCount = 1; MeterName = "a2-highgpu-1g"; Description = "a2-highgpu-1g GPU (12 vCPUs, 85 GB, 1x A100)" }, + @{ InstanceType = "a2-ultragpu-8g"; Cores = 96; Arch = "GPU"; GpuType = "A100"; GpuCount = 8; MeterName = "a2-ultragpu-8g"; Description = "a2-ultragpu-8g GPU (96 vCPUs, 1360 GB, 8x A100 80GB)" }, + @{ InstanceType = "a3-highgpu-8g"; Cores = 208; Arch = "GPU"; GpuType = "H100"; GpuCount = 8; MeterName = "a3-highgpu-8g"; Description = "a3-highgpu-8g GPU (208 vCPUs, 1872 GB, 8x H100)" }, + @{ InstanceType = "g2-standard-4"; Cores = 4; Arch = "GPU"; GpuType = "L4"; GpuCount = 1; MeterName = "g2-standard-4"; Description = "g2-standard-4 GPU (4 vCPUs, 16 GB, 1x L4)" }, + @{ InstanceType = "g2-standard-8"; Cores = 8; Arch = "GPU"; GpuType = "L4"; GpuCount = 1; MeterName = "g2-standard-8"; Description = "g2-standard-8 GPU (8 vCPUs, 32 GB, 1x L4)" }, + @{ InstanceType = "n1-standard-4-t4"; Cores = 4; Arch = "GPU"; GpuType = "T4"; GpuCount = 1; MeterName = "n1-standard-4+T4"; Description = "n1-standard-4+T4 GPU (4 vCPUs, 15 GB, 1x T4)" } + ) } DataCenter = @{ ProviderName = "Internal IT" @@ -521,9 +608,12 @@ function New-ProviderIdentity } } - # VM SKU assignment (Azure VMs get specific instance types) + # VM SKU assignment (providers with VmSkus get instance types for VM compute services) $vmSku = $null - if ($Provider -eq "Azure" -and $resourceType -eq "microsoft.compute/virtualmachines" -and $Config.VmSkus) + $isVmResource = ($Provider -eq "Azure" -and $resourceType -eq "microsoft.compute/virtualmachines") -or + ($Provider -eq "AWS" -and $resourceType -eq "AWS::EC2::Instance") -or + ($Provider -eq "GCP" -and $resourceType -eq "compute.googleapis.com/Instance") + if ($isVmResource -and $Config.VmSkus) { $vmSku = Get-RandomElement -Array $Config.VmSkus } @@ -604,7 +694,7 @@ function New-ProviderIdentity default { "" } } $skuOrderId = [guid]::NewGuid().ToString() - $skuTerm = Get-RandomElement -Array @(12, 36) # 1 year or 3 years in months + $skuTerm = [string](Get-RandomElement -Array @(12, 36)) # 1 year or 3 years in months $commitments += @{ Id = $commitId @@ -847,22 +937,46 @@ function New-FocusRow $x_SkuDescription = "$($service.Name) - $($service.Subcategory)" } - # AHB columns + # AHB columns — set x_SkuMeterSubcategory patterns that the ingestion KQL uses to derive + # x_SkuLicenseType and x_SkuLicenseStatus (the KQL re-derives these, so raw values are overwritten) if ($IncludeHybridBenefit -and $res.AhbEligible -and $chargeCategory -eq "Usage") { $x_SkuLicenseType = $res.AhbLicenseType if ((Get-Random -Maximum 100) -lt 60) { + # AHB Enabled — ingestion KQL checks: x_SkuMeterSubcategory contains 'Azure Hybrid Benefit' $x_SkuLicenseStatus = "Enabled" $x_SkuLicenseQuantity = if ($res.VmSku) { $res.VmSku.Cores } else { Get-RandomElement -Array @(2, 4, 8, 16) } $licenseSavings = [math]::Round([math]::Abs($effectiveCost) * 0.40, 2) $effectiveCost = [math]::Max(0.01, [math]::Round($effectiveCost - $licenseSavings, 2)) + + if ($res.AhbLicenseType -eq "SQL Server") + { + $x_SkuMeterSubcategory = "SQL Server Azure Hybrid Benefit" + } + elseif ($res.VmSku) + { + # Real Azure pattern: "Dv5 Series Windows" + BYOL image type + $x_SkuMeterSubcategory = "$($vmSku.InstanceType) Series Azure Hybrid Benefit" + } } else { + # AHB Not Enabled — ingestion KQL checks: x_SkuMeterSubcategory contains 'Windows' $x_SkuLicenseStatus = "Not Enabled" $x_SkuLicenseQuantity = 0 + + if ($res.AhbLicenseType -eq "SQL Server") + { + $x_SkuMeterCategory = "SQL Database" + $x_SkuMeterSubcategory = "Compute" + } + elseif ($res.VmSku) + { + # Real Azure pattern: "Dv5 Series Windows" (contains 'Windows' but not 'Azure Hybrid Benefit') + $x_SkuMeterSubcategory = "$($vmSku.InstanceType) Series Windows" + } } } } @@ -919,6 +1033,113 @@ function New-FocusRow $isMarketplace = [bool]$service.IsMarketplace $publisherName = if ($isMarketplace) { Get-RandomElement -Array $Identity.MarketplacePublishers } else { $Config.ServiceProviderName } + # === CAPACITY RESERVATION SIMULATION (v1.3+) === + $capacityReservationId = $null + $capacityReservationStatus = $null + # ~10% of Usage rows for cloud providers with VM services get capacity reservations + if ($chargeCategory -eq 'Usage' -and $null -eq $commitmentDiscountId -and + $Provider -ne 'DataCenter' -and $service.Category -eq 'Compute' -and + (Get-Random -Maximum 100) -lt 10) + { + $capacityReservationId = "/subscriptions/$($subAccount.Id)/providers/Microsoft.Compute/capacityReservationGroups/crg-$(Get-Random -Minimum 1000 -Maximum 9999)" + $capacityReservationStatus = if ((Get-Random -Maximum 100) -lt 85) { 'Used' } else { 'Unused' } + } + + # === SKU METER (v1.3+) — describes what the SKU is metering === + # For Azure VMs, use the VM SKU meter name (e.g., "D4s v5/Hour") so the CPU + # architecture dashboard can classify by AMD/Intel/Arm64 using SKU naming patterns. + $skuMeter = if ($res.VmSku) + { + "$($res.VmSku.MeterName)/Hour" + } + else + { + switch ($service.Category) + { + 'Compute' { 'Compute Usage' } + 'Storage' { 'Block Volume Usage' } + 'Databases' { 'Database Usage' } + 'Networking' { 'Data Transfer' } + default { 'API Requests' } + } + } + + # === CONTRACT APPLIED (v1.3+) — JSON bridging Cost rows to Contract Commitment dataset === + $contractApplied = $null + if ($commitmentDiscountId -and $pricingCategory -eq 'Committed') + { + $contractApplied = (@{ + ContractId = $x_SkuOrderId + ContractType = $commitmentDiscountType + ContractTerm = $x_SkuTerm + ContractStatus = $commitmentDiscountStatus + } | ConvertTo-Json -Compress) + } + + # === SPLIT COST ALLOCATION (v1.3+) — simulates shared resource cost splitting === + # ~10% of container service rows (AKS, EKS, GKE) get allocation data to test the + # split cost allocation dashboard features. + $allocatedMethodDetails = $null + $allocatedMethodId = $null + $allocatedResourceId = $null + $allocatedResourceName = $null + $allocatedResourceType = $null + $allocatedTags = $null + + $allocationEligibleServices = @('Azure Kubernetes Service', 'Amazon EKS', 'Google Kubernetes Engine') + if ($chargeCategory -eq 'Usage' -and $service.Name -in $allocationEligibleServices -and + (Get-Random -Maximum 100) -lt 10) + { + $allocatedNamespace = Get-RandomElement -Array @('frontend', 'backend', 'data-pipeline', 'monitoring', 'ml-inference', 'batch-jobs') + $allocatedMethodId = 'ProportionalByCPU' + $allocatedMethodDetails = (@{ + Method = 'Proportional' + SplitBy = 'CPU Requests' + Namespace = $allocatedNamespace + Percentage = [math]::Round((Get-Random -Minimum 5 -Maximum 60), 0) + } | ConvertTo-Json -Compress) + $allocatedResourceId = "$($res.ResourceId)/namespaces/$allocatedNamespace" + $allocatedResourceName = "$($res.ResourceName)/$allocatedNamespace" + $allocatedResourceType = switch ($Provider) + { + 'Azure' { 'microsoft.containerservice/managedclusters/namespaces' } + 'AWS' { 'AWS::EKS::Cluster::Namespace' } + 'GCP' { 'container.googleapis.com/Cluster/Namespace' } + default { $res.ResourceType } + } + $allocatedTags = (@{ + Namespace = $allocatedNamespace + Team = Get-RandomElement -Array @('team-alpha', 'team-beta', 'platform', 'data-team') + } | ConvertTo-Json -Compress) + } + + # === SKU PRICE DETAILS (v1.2+) — JSON with FOCUS-defined properties === + $skuPriceDetails = $null + if ($chargeCategory -in @('Usage', 'Purchase')) + { + $spd = [ordered]@{} + if ($res.VmSku) + { + $spd['CoreCount'] = $res.VmSku.Cores + $spd['OperatingSystem'] = Get-RandomElement -Array @('Linux', 'Windows') + } + elseif ($service.Category -eq 'Storage') + { + $spd['DiskSpace'] = Get-RandomElement -Array @(32, 64, 128, 256, 512, 1024) + $spd['DiskType'] = Get-RandomElement -Array @('SSD', 'HDD') + $spd['Redundancy'] = Get-RandomElement -Array @('Local', 'Zonal', 'Global') + $spd['StorageClass'] = Get-RandomElement -Array @('Hot', 'Cool', 'Archive') + } + elseif ($service.Category -eq 'Databases') + { + $spd['CoreCount'] = Get-RandomElement -Array @(2, 4, 8, 16) + } + if ($spd.Count -gt 0) + { + $skuPriceDetails = ($spd | ConvertTo-Json -Compress) + } + } + # Build the row as an ordered hashtable, then select version-appropriate columns $row = [ordered]@{ # ===================== Mandatory FOCUS columns (all versions) ===================== @@ -968,84 +1189,848 @@ function New-FocusRow ContractedUnitPrice = $contractedUnitPrice PricingCategory = $pricingCategory ServiceSubcategory = $service.Subcategory + + # ===================== Version-specific FOCUS columns ===================== + # All columns included in the ordered hashtable upfront so Export-Parquet sees + # a consistent 80-column schema from the very first row. + + # v1.1+ + CommitmentDiscountQuantity = if ($FocusVer -ge [version]'1.1') { $commitmentDiscountQuantity } else { [double]0 } + CommitmentDiscountUnit = if ($FocusVer -ge [version]'1.1') { $commitmentDiscountUnit } else { '' } + + # v1.2+ + BillingAccountType = if ($FocusVer -ge [version]'1.2') { $Config.BillingAccountType } else { '' } + InvoiceId = if ($FocusVer -ge [version]'1.2') { $invoiceId } else { '' } + PricingCurrency = if ($FocusVer -ge [version]'1.2') { $Config.BillingCurrency } else { '' } + PricingCurrencyContractedUnitPrice = if ($FocusVer -ge [version]'1.2') { $contractedUnitPrice } else { $null } + PricingCurrencyEffectiveCost = if ($FocusVer -ge [version]'1.2') { $effectiveCost } else { $null } + PricingCurrencyListUnitPrice = if ($FocusVer -ge [version]'1.2') { $listUnitPrice } else { $null } + SkuPriceDetails = if ($FocusVer -ge [version]'1.2') { $skuPriceDetails } else { $null } + SubAccountType = if ($FocusVer -ge [version]'1.2') { $Config.SubAccountType } else { '' } + + # v1.3+ + CapacityReservationId = if ($FocusVer -ge [version]'1.3') { $capacityReservationId } else { $null } + CapacityReservationStatus = if ($FocusVer -ge [version]'1.3') { $capacityReservationStatus } else { $null } + ContractApplied = if ($FocusVer -ge [version]'1.3') { $contractApplied } else { $null } + HostProviderName = if ($FocusVer -ge [version]'1.3') { $Config.HostProviderName } else { '' } + ServiceProviderName = if ($FocusVer -ge [version]'1.3') { $Config.ServiceProviderName } else { '' } + SkuMeter = if ($FocusVer -ge [version]'1.3') { $skuMeter } else { '' } + + # v1.3+ Allocation columns (Recommended/Conditional) + # Populated for shared/container services (~10% of AKS/EKS/GKE rows) to simulate split cost allocation. + AllocatedMethodDetails = if ($FocusVer -ge [version]'1.3') { $allocatedMethodDetails } else { $null } + AllocatedMethodId = if ($FocusVer -ge [version]'1.3') { $allocatedMethodId } else { $null } + AllocatedResourceId = if ($FocusVer -ge [version]'1.3') { $allocatedResourceId } else { $null } + AllocatedResourceName = if ($FocusVer -ge [version]'1.3') { $allocatedResourceName } else { $null } + AllocatedResourceType = if ($FocusVer -ge [version]'1.3') { $allocatedResourceType } else { $null } + AllocatedTags = if ($FocusVer -ge [version]'1.3') { $allocatedTags } else { $null } + + # ===================== FinOps Hub / Dashboard required columns ===================== + x_BillingAccountId = $billingAccount.Id + x_BillingAccountAgreement = $Config.BillingAccountAgreement + x_BillingProfileId = (Get-RandomElement -Array $Identity.BillingProfileIds) + x_ResourceGroupName = $res.ResourceGroup + x_ResourceType = $res.ResourceType + + # Publisher + PublisherName = $publisherName + x_PublisherCategory = if ($isMarketplace) { 'Marketplace' } else { $Provider } + + # Unit prices (dashboard discount analysis) + x_EffectiveUnitPrice = $effectiveUnitPrice + x_BilledUnitPrice = $billedUnitPrice + x_OnDemandCost = $onDemandCost + x_OnDemandUnitPrice = $onDemandUnitPrice + + # SKU columns + x_SkuDescription = $x_SkuDescription + x_SkuInstanceType = $x_SkuInstanceType + x_SkuCoreCount = $x_SkuCoreCount + x_SkuMeterCategory = $x_SkuMeterCategory + x_SkuMeterSubcategory = $x_SkuMeterSubcategory + x_SkuMeterName = $x_SkuMeterName + x_SkuMeterId = $res.SkuMeterId + x_SkuOfferId = if ($Provider -eq 'Azure') { 'MS-AZR-0017P' } else { '' } + x_SkuLicenseStatus = $x_SkuLicenseStatus + x_SkuLicenseQuantity = $x_SkuLicenseQuantity + x_SkuLicenseType = $x_SkuLicenseType + + # Commitment linkage + x_SkuOrderId = $x_SkuOrderId + x_SkuTerm = $x_SkuTerm + + # Pricing detail + x_PricingBlockSize = $service.PricingBlockSize + x_PricingUnitDescription = $service.PricingUnitDescription + + # Source metadata (required by ingestion pipeline) + x_SourceName = "test-data-generator" + x_SourceProvider = "Microsoft" + x_SourceType = "FocusCost" + x_SourceVersion = "1.0-preview(v1)" + + # Data quality / metadata + x_SourceChanges = $x_SourceChanges + x_CloudProvider = $Provider + x_FocusVersion = $FocusVersion + x_IngestionTime = (Get-IsoDateTime -Date (Get-Date)) } - # === Version-specific FOCUS columns === + return [PSCustomObject]$row +} + +# ============================================================================ +# Additional FOCUS Dataset Generators +# ============================================================================ +# These functions generate data for the non-Cost FOCUS datasets that the +# FinOps Hub ingestion pipeline expects: +# - Prices (Azure EA/MCA price sheet) +# - CommitmentDiscountUsage (Azure reservation details) +# - Recommendations (Azure reservation recommendations) +# - Transactions (Azure reservation transactions) +# +# These datasets are Azure-only (Cost Management exports). They use the same +# persistent identity pool so that reservation IDs, subscription IDs, and +# meter IDs are consistent with the Cost/Usage data. +# ============================================================================ + +function New-PriceRow +{ + [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] + param( + [hashtable]$Config, + [hashtable]$Identity, + [hashtable]$ServiceDef, + [hashtable]$ResourceDef, + [string]$PriceType, + [string]$Term + ) + + $billingAccount = (Get-RandomElement -Array $Identity.BillingAccounts) + $region = $ResourceDef.Region + + # Realistic unit and market prices + $unitPrice = Get-RandomDecimal -Min ([decimal]$ServiceDef.CostMin / 100) -Max ([decimal]$ServiceDef.CostMax / 10) + $marketPrice = [math]::Round($unitPrice * (Get-RandomDecimal -Min 1.0 -Max 1.3), 6) + $basePrice = [math]::Round($marketPrice * (Get-RandomDecimal -Min 0.9 -Max 1.0), 6) + + # SKU IDs + $meterId = $ResourceDef.SkuMeterId + $productId = "DZH318Z0$((Get-Random -Minimum 1000 -Maximum 9999))" + $skuId = $ResourceDef.SkuId + + # Meter details from service + $meterCategory = if ($ResourceDef.VmSku) { "Virtual Machines" } else { $ServiceDef.Name } + $meterSubCategory = if ($ResourceDef.VmSku) { "$($ResourceDef.VmSku.InstanceType) Series" } else { $ServiceDef.Subcategory } + $meterName = if ($ResourceDef.VmSku) { $ResourceDef.VmSku.MeterName } else { "$($ServiceDef.Name) - Standard" } + $meterRegion = $region.Name + $meterType = if ($PriceType -eq 'Consumption') { 'Consumption' } elseif ($PriceType -eq 'ReservedInstance') { 'Reservation' } else { 'SavingsPlan' } + + $effectiveStart = [datetime]::new($StartDate.Year, $StartDate.Month, 1) + $effectiveEnd = $effectiveStart.AddYears(1) + + $row = [ordered]@{ + BasePrice = [double]$basePrice + BillingAccountId = $billingAccount.Id + BillingAccountName = $billingAccount.Name + BillingCurrency = $Config.BillingCurrency + BillingProfileId = (Get-RandomElement -Array $Identity.BillingProfileIds) + BillingProfileName = "Billing Profile 1" + Currency = $Config.BillingCurrency + CurrencyCode = $Config.BillingCurrency + EffectiveEndDate = $effectiveEnd.ToString("yyyy-MM-ddT00:00:00Z") + EffectiveStartDate = $effectiveStart.ToString("yyyy-MM-ddT00:00:00Z") + EnrollmentNumber = $billingAccount.Id + IncludedQuantity = [double]0 + MarketPrice = [double]$marketPrice + MeterCategory = $meterCategory + MeterId = $meterId + MeterName = $meterName + MeterRegion = $meterRegion + MeterSubCategory = $meterSubCategory + MeterType = $meterType + OfferID = "MS-AZR-0017P" + PartNumber = "PT-$(Get-Random -Minimum 10000 -Maximum 99999)" + PriceType = $PriceType + Product = "$meterCategory - $meterName" + ProductId = $productId + ServiceFamily = $ServiceDef.Category + SkuId = $skuId + Term = $Term + TierMinimumUnits = [double]0 + UnitOfMeasure = $ServiceDef.PricingUnitDescription + UnitPrice = [double]$unitPrice + x_SourceName = "test-data-generator" + x_SourceProvider = "Microsoft" + x_SourceType = "PriceSheet" + x_SourceVersion = "2023-05-01" + } + + return [PSCustomObject]$row +} + +function New-CommitmentDiscountUsageRow +{ + [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] + param( + [hashtable]$Commitment, + [datetime]$UsageDate, + [hashtable]$ResourceDef + ) + + # Split commitment ID to extract order and reservation IDs + # Azure format: /providers/Microsoft.Capacity/reservationOrders/{orderId} + $orderId = $Commitment.SkuOrderId + $reservationId = [guid]::NewGuid().ToString() + + # Hours in a day = 24; simulate utilization 70-100% + $reservedHours = [double]24 + $utilizationPct = (Get-Random -Minimum 70 -Maximum 101) / 100.0 + $usedHours = [math]::Round($reservedHours * $utilizationPct, 2) + + # Instance flexibility + $flexGroup = if ($ResourceDef.VmSku) { "$($ResourceDef.VmSku.InstanceType) Series" } else { "Standard" } + $flexRatio = Get-RandomElement -Array @([double]1.0, [double]0.5, [double]2.0, [double]4.0) + + # SkuName from resource + $skuName = if ($ResourceDef.VmSku) { $ResourceDef.VmSku.InstanceType } else { "Standard_D4s_v5" } + + $row = [ordered]@{ + InstanceFlexibilityGroup = $flexGroup + InstanceFlexibilityRatio = $flexRatio + InstanceId = $ResourceDef.ResourceId + Kind = "reservation" + ReservationId = $reservationId + ReservationOrderId = $orderId + ReservedHours = $reservedHours + SkuName = $skuName + TotalReservedQuantity = $reservedHours + UsageDate = $UsageDate.ToString("yyyy-MM-ddT00:00:00Z") + UsedHours = $usedHours + x_SourceName = "test-data-generator" + x_SourceProvider = "Microsoft" + x_SourceType = "ReservationDetails" + x_SourceVersion = "2024-03-01" + } + + return [PSCustomObject]$row +} + +function New-RecommendationRow +{ + [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] + param( + [hashtable]$ResourceDef, + [datetime]$RecommendationDate + ) + + $subAccount = $ResourceDef.SubAccount + $region = $ResourceDef.Region + + # Lookback period + $lookBackPeriod = Get-RandomElement -Array @("Last7Days", "Last30Days", "Last60Days") + $lookBackDays = switch ($lookBackPeriod) { "Last7Days" { 7 } "Last30Days" { 30 } "Last60Days" { 60 } } + $firstUsageDate = $RecommendationDate.AddDays(-$lookBackDays) + + # Cost projections + $costWithoutRI = Get-RandomDecimal -Min 500 -Max 50000 + $netSavings = [math]::Round($costWithoutRI * (Get-Random -Minimum 15 -Maximum 45) / 100.0, 2) + $totalCostWithRI = [math]::Round($costWithoutRI - $netSavings, 2) + + # Recommended quantities + $recommendedQty = [double](Get-Random -Minimum 1 -Maximum 20) + $recommendedQtyNormalized = [math]::Round($recommendedQty * (Get-RandomElement -Array @([double]1, [double]2, [double]4, [double]8)), 2) + + # Flexibility + $flexGroup = if ($ResourceDef.VmSku) { "$($ResourceDef.VmSku.InstanceType) Series" } else { "Standard" } + $flexRatio = Get-RandomElement -Array @([double]1.0, [double]0.5, [double]2.0, [double]4.0) + $normalizedSize = if ($ResourceDef.VmSku) { $ResourceDef.VmSku.InstanceType } else { "Standard_D4s_v5" } + $skuName = $normalizedSize + $term = Get-RandomElement -Array @("P1Y", "P3Y") + $scope = Get-RandomElement -Array @("Shared", "Single") + + $effectiveCostBefore = $costWithoutRI + $effectiveCostAfter = $totalCostWithRI + $effectiveCostSavings = $netSavings + + # x_RecommendationDetails as JSON string (ADX will parse it as dynamic) + $recoDetails = @{ + CommitmentDiscountNormalizedGroup = $flexGroup + CommitmentDiscountNormalizedRatio = $flexRatio + CommitmentDiscountNormalizedSize = $normalizedSize + CommitmentDiscountResourceType = $ResourceDef.ResourceType + CommitmentDiscountScope = $scope + LookbackPeriodDuration = $lookBackPeriod + LookbackPeriodStart = $firstUsageDate.ToString("yyyy-MM-ddT00:00:00Z") + RecommendedQuantity = $recommendedQty + RecommendedQuantityNormalized = $recommendedQtyNormalized + RegionId = $region.Id + RegionName = $region.Name + SkuMeterId = $ResourceDef.SkuMeterId + SkuSize = $normalizedSize + SkuTerm = $term + } | ConvertTo-Json -Compress + + $row = [ordered]@{ + CostWithNoReservedInstances = [double]$costWithoutRI + CostWithNoReservedInstancesJson = '' + FirstUsageDate = $firstUsageDate.ToString("yyyy-MM-ddT00:00:00Z") + InstanceFlexibilityGroup = $flexGroup + InstanceFlexibilityRatio = $flexRatio + Location = $region.Name + LookBackPeriod = $lookBackPeriod + MeterId = $ResourceDef.SkuMeterId + NetSavings = [double]$netSavings + NetSavingsJson = '' + NormalizedSize = $normalizedSize + ProviderName = "Microsoft" + RecommendedQuantity = $recommendedQty + RecommendedQuantityNormalized = $recommendedQtyNormalized + ResourceId = $ResourceDef.ResourceId + ResourceName = $ResourceDef.ResourceName + ResourceType = $ResourceDef.ResourceType + Scope = $scope + SKU = $skuName + SkuName = $skuName + SkuProperties = '' + SubAccountId = $subAccount.Id + SubAccountName = $subAccount.Name + SubscriptionId = ($subAccount.Id -replace '^/subscriptions/', '') + Term = $term + TotalCostWithReservedInstances = [double]$totalCostWithRI + TotalCostWithReservedInstancesJson = '' + x_EffectiveCostAfter = [double]$effectiveCostAfter + x_EffectiveCostBefore = [double]$effectiveCostBefore + x_EffectiveCostSavings = [double]$effectiveCostSavings + x_RecommendationCategory = "Reservation" + x_RecommendationDate = $RecommendationDate.ToString("yyyy-MM-ddT00:00:00Z") + x_RecommendationDescription = "Purchase $([int]$recommendedQty) $normalizedSize reservation ($term) in $($region.Name) to save `$$([string]::Format('{0:N2}', $netSavings))" + x_RecommendationDetails = $recoDetails + x_RecommendationId = [guid]::NewGuid().ToString() + x_ResourceGroupName = $ResourceDef.ResourceGroup + x_SourceName = "test-data-generator" + x_SourceProvider = "Microsoft" + x_SourceType = "ReservationRecommendations" + x_SourceVersion = "2023-05-01" + } - # v1.1+ columns - if ($FocusVer -ge [version]'1.1') + return [PSCustomObject]$row +} + +function New-TransactionRow +{ + [Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseShouldProcessForStateChangingFunctions', '')] + param( + [hashtable]$Identity, + [hashtable]$Commitment, + [datetime]$EventDate + ) + + $subAccount = Get-RandomElement -Array $Identity.SubAccounts + $billingAccount = $subAccount.BillingAccount + $region = Get-RandomElement -Array @("swedencentral", "eastus", "westeurope", "westus2", "italynorth") + + # Event type distribution: 70% Purchase, 15% Refund, 15% Cancel + $eventRoll = Get-Random -Maximum 100 + $eventType = if ($eventRoll -lt 70) { "Purchase" } elseif ($eventRoll -lt 85) { "Refund" } else { "Cancel" } + + # Amount: purchases are positive, refunds/cancels are negative + $amount = Get-RandomDecimal -Min 500 -Max 50000 + if ($eventType -in @("Refund", "Cancel")) + { + $amount = -[math]::Abs($amount) + } + + $billingFrequency = Get-RandomElement -Array @("OneTime", "Recurring") + $billingMonth = [datetime]::new($EventDate.Year, $EventDate.Month, 1) + $term = Get-RandomElement -Array @("P1Y", "P3Y") + $quantity = [double](Get-Random -Minimum 1 -Maximum 10) + + # SKU name from commitment type + $armSkuName = Get-RandomElement -Array @("Standard_D4s_v5", "Standard_D8s_v5", "Standard_E4s_v5", "Standard_D4as_v5", "Standard_E16s_v5") + + $orderId = $Commitment.SkuOrderId + $orderName = $Commitment.Name + + # Invoice info + $invoiceKey = "$($billingAccount.Id)-$($billingMonth.ToString('yyyyMM'))" + if (-not $Identity.InvoiceIds.ContainsKey($invoiceKey)) { - $row['CommitmentDiscountQuantity'] = $commitmentDiscountQuantity - $row['CommitmentDiscountUnit'] = $commitmentDiscountUnit + $Identity.InvoiceIds[$invoiceKey] = "INV-$($billingMonth.ToString('yyyyMM'))-$(Get-Random -Minimum 10000 -Maximum 99999)" + } + $invoiceId = $Identity.InvoiceIds[$invoiceKey] + + $row = [ordered]@{ + AccountName = $billingAccount.Name + AccountOwnerEmail = "owner@contoso.com" + Amount = [double]$amount + ArmSkuName = $armSkuName + BillingFrequency = $billingFrequency + BillingMonth = $billingMonth.ToString("yyyy-MM-ddT00:00:00Z") + BillingProfileId = (Get-RandomElement -Array $Identity.BillingProfileIds) + BillingProfileName = "Billing Profile 1" + CostCenter = "CC-$(Get-Random -Minimum 100 -Maximum 999)" + Currency = "USD" + CurrentEnrollmentId = $billingAccount.Id + DepartmentName = Get-RandomElement -Array @("Engineering", "Finance", "Operations", "IT") + Description = "$armSkuName $($Commitment.Type) $term in $region" + EventDate = $EventDate.ToString("yyyy-MM-ddT00:00:00Z") + EventType = $eventType + Invoice = $invoiceId + InvoiceId = $invoiceId + InvoiceSectionId = "IS-$(Get-Random -Minimum 1000 -Maximum 9999)" + InvoiceSectionName = "Default Invoice Section" + MonetaryCommitment = [double]0 + Overage = [double]0 + PurchasingEnrollment = $billingAccount.Id + PurchasingSubscriptionGuid = ($subAccount.Id -replace '^/subscriptions/', '') + PurchasingSubscriptionName = $subAccount.Name + Quantity = $quantity + Region = $region + ReservationOrderId = $orderId + ReservationOrderName = $orderName + Term = $term + x_SourceName = "test-data-generator" + x_SourceProvider = "Microsoft" + x_SourceType = "ReservationTransactions" + x_SourceVersion = "2023-05-01" } - # v1.2+ columns - if ($FocusVer -ge [version]'1.2') + return [PSCustomObject]$row +} + +# ============================================================================ +# ADX Update Policy Verification +# ============================================================================ +# Ensures all required update policies exist on the final tables in the +# Ingestion database. These policies are the critical link that transforms +# raw ingested data (e.g., Costs_raw) into the final tables (e.g., +# Costs_final_v1_2) that the Hub database queries. +# +# If a policy is missing, data will land in the raw table but never appear +# in dashboard queries. This function checks each policy and re-applies +# any that are missing or empty. +# ============================================================================ + +function Invoke-EnsureUpdatePolicy +{ + [CmdletBinding()] + param( + [Parameter(Mandatory)] + [string]$AdxClusterUri, + + [switch]$Backfill + ) + + Write-Host "" + Write-Host "Verifying ADX Update Policies..." -ForegroundColor Cyan + + # Get ADX token + $adxToken = $null + try { - $row['BillingAccountType'] = $Config.BillingAccountType - $row['InvoiceId'] = $invoiceId - $row['SubAccountType'] = $Config.SubAccountType + $adxToken = (az account get-access-token --resource $AdxClusterUri --query accessToken -o tsv 2>$null) + } + catch + { + Write-Warning "Could not get ADX access token. Skipping update policy verification." + return } - # v1.3+ columns (ServiceProviderName replaces ProviderName) - if ($FocusVer -ge [version]'1.3') + if (-not $adxToken) { - $row['HostProviderName'] = $Config.HostProviderName - $row['ServiceProviderName'] = $Config.ServiceProviderName + Write-Warning "No ADX token available. Skipping update policy verification." + return } - # ===================== FinOps Hub / Dashboard required columns ===================== - # These x_ prefixed columns are always included for FinOps Hub dashboard compatibility - $row['x_BillingAccountId'] = $billingAccount.Id - $row['x_BillingAccountAgreement'] = $Config.BillingAccountAgreement - $row['x_BillingProfileId'] = Get-RandomElement -Array $Identity.BillingProfileIds - $row['x_ResourceGroupName'] = $res.ResourceGroup - $row['x_ResourceType'] = $res.ResourceType + $adxHeaders = @{ "Authorization" = "Bearer $adxToken"; "Content-Type" = "application/json" } - # Publisher - $row['PublisherName'] = $publisherName - $row['x_PublisherCategory'] = if ($isMarketplace) { "Marketplace" } else { $Provider } + # Define all required update policies: FinalTable -> SourceTable -> TransformFunction + $requiredPolicies = @( + @{ FinalTable = "Costs_final_v1_2"; SourceTable = "Costs_raw"; TransformFunction = "Costs_transform_v1_2()" } + @{ FinalTable = "Prices_final_v1_2"; SourceTable = "Prices_raw"; TransformFunction = "Prices_transform_v1_2()" } + @{ FinalTable = "CommitmentDiscountUsage_final_v1_2"; SourceTable = "CommitmentDiscountUsage_raw"; TransformFunction = "CommitmentDiscountUsage_transform_v1_2()" } + @{ FinalTable = "Recommendations_final_v1_2"; SourceTable = "Recommendations_raw"; TransformFunction = "Recommendations_transform_v1_2()" } + @{ FinalTable = "Transactions_final_v1_2"; SourceTable = "Transactions_raw"; TransformFunction = "Transactions_transform_v1_2()" } + ) - # Unit prices (dashboard discount analysis) - $row['x_EffectiveUnitPrice'] = $effectiveUnitPrice - $row['x_BilledUnitPrice'] = $billedUnitPrice - $row['x_OnDemandCost'] = $onDemandCost - $row['x_OnDemandUnitPrice'] = $onDemandUnitPrice + $policiesFixed = 0 + $policiesOk = 0 + $backfillCount = 0 - # SKU columns - $row['x_SkuDescription'] = $x_SkuDescription - $row['x_SkuInstanceType'] = $x_SkuInstanceType - $row['x_SkuCoreCount'] = $x_SkuCoreCount - $row['x_SkuMeterCategory'] = $x_SkuMeterCategory - $row['x_SkuMeterSubcategory'] = $x_SkuMeterSubcategory - $row['x_SkuMeterName'] = $x_SkuMeterName - $row['x_SkuMeterId'] = $res.SkuMeterId - $row['x_SkuOfferId'] = if ($Provider -eq "Azure") { "MS-AZR-0017P" } else { $null } - $row['x_SkuLicenseStatus'] = $x_SkuLicenseStatus - $row['x_SkuLicenseQuantity'] = $x_SkuLicenseQuantity - $row['x_SkuLicenseType'] = $x_SkuLicenseType + foreach ($policy in $requiredPolicies) + { + $finalTable = $policy.FinalTable + $sourceTable = $policy.SourceTable + $transformFn = $policy.TransformFunction + + # Check current update policy + $showBody = @{ db = "Ingestion"; csl = ".show table ['$finalTable'] policy update" } | ConvertTo-Json + $policyExists = $false + + try + { + $showResult = Invoke-RestMethod -Uri "$AdxClusterUri/v1/rest/mgmt" -Method Post -Headers $adxHeaders -Body $showBody -ErrorAction Stop + if ($showResult.Tables -and $showResult.Tables[0].Rows) + { + # The policy column contains the JSON array — check if it has entries + foreach ($row in $showResult.Tables[0].Rows) + { + $policyJson = if ($row -is [array]) { $row[1] } else { $row } + if ($policyJson -and $policyJson -ne "[]" -and $policyJson -ne "null" -and $policyJson.Length -gt 5) + { + $policyExists = $true + } + } + } + } + catch + { + Write-Host " ⚠ Could not check policy for $finalTable (table may not exist yet)" -ForegroundColor Gray + continue + } - # Commitment linkage - $row['x_SkuOrderId'] = $x_SkuOrderId - $row['x_SkuTerm'] = $x_SkuTerm + if ($policyExists) + { + Write-Host " ✓ $finalTable — update policy OK" -ForegroundColor DarkGreen + $policiesOk++ + } + else + { + Write-Host " ✗ $finalTable — update policy MISSING, re-applying..." -ForegroundColor Yellow - # Pricing detail - $row['x_PricingBlockSize'] = $service.PricingBlockSize - $row['x_PricingUnitDescription'] = $service.PricingUnitDescription + $policyDef = "[{""IsEnabled"":true, ""Source"":""$sourceTable"", ""Query"":""$transformFn"", ""IsTransactional"":true, ""PropagateIngestionProperties"":true}]" + $alterCsl = ".alter table ['$finalTable'] policy update @'$policyDef'" + $alterBody = @{ db = "Ingestion"; csl = $alterCsl } | ConvertTo-Json - # Data quality / metadata - $row['x_SourceChanges'] = $x_SourceChanges - $row['x_CloudProvider'] = $Provider - $row['x_FocusVersion'] = $FocusVersion - $row['x_IngestionTime'] = Get-IsoDateTime -Date (Get-Date) + try + { + Invoke-RestMethod -Uri "$AdxClusterUri/v1/rest/mgmt" -Method Post -Headers $adxHeaders -Body $alterBody -ErrorAction Stop | Out-Null + Write-Host " ✓ Policy applied: $sourceTable → $transformFn → $finalTable" -ForegroundColor Green + $policiesFixed++ + } + catch + { + Write-Host " ✗ Failed to apply policy for $finalTable — $($_.Exception.Message)" -ForegroundColor Red + } + } - return [PSCustomObject]$row + # Backfill check: if raw has data but final is empty, run the transform + if ($Backfill) + { + try + { + $rawCountBody = @{ db = "Ingestion"; csl = "['$sourceTable'] | count" } | ConvertTo-Json + $rawResult = Invoke-RestMethod -Uri "$AdxClusterUri/v1/rest/query" -Method Post -Headers $adxHeaders -Body $rawCountBody -ErrorAction Stop + $rawCount = 0 + if ($rawResult.Tables -and $rawResult.Tables[0].Rows) + { + $rawCount = [long]($rawResult.Tables[0].Rows[0] | Select-Object -First 1) + } + + $finalCountBody = @{ db = "Ingestion"; csl = "['$finalTable'] | count" } | ConvertTo-Json + $finalResult = Invoke-RestMethod -Uri "$AdxClusterUri/v1/rest/query" -Method Post -Headers $adxHeaders -Body $finalCountBody -ErrorAction Stop + $finalCount = 0 + if ($finalResult.Tables -and $finalResult.Tables[0].Rows) + { + $finalCount = [long]($finalResult.Tables[0].Rows[0] | Select-Object -First 1) + } + + if ($rawCount -gt 0 -and $finalCount -eq 0) + { + Write-Host " ⚠ Backfill needed: $sourceTable has $rawCount rows but $finalTable has $finalCount" -ForegroundColor Yellow + Write-Host " Running backfill: .set-or-append $finalTable <| $transformFn ..." -ForegroundColor Gray + $backfillCsl = ".set-or-append ['$finalTable'] <| $transformFn" + $backfillBody = @{ db = "Ingestion"; csl = $backfillCsl } | ConvertTo-Json + Invoke-RestMethod -Uri "$AdxClusterUri/v1/rest/mgmt" -Method Post -Headers $adxHeaders -Body $backfillBody -ErrorAction Stop | Out-Null + Write-Host " ✓ Backfill complete for $finalTable" -ForegroundColor Green + $backfillCount++ + } + elseif ($rawCount -gt 0) + { + Write-Host " ℹ $sourceTable=$rawCount, $finalTable=$finalCount — no backfill needed" -ForegroundColor Gray + } + } + catch + { + Write-Host " ⚠ Backfill check failed for $finalTable — $($_.Exception.Message)" -ForegroundColor Gray + } + } + } + + # Summary + Write-Host "" + if ($policiesFixed -gt 0 -or $backfillCount -gt 0) + { + Write-Host " Update Policy Summary: $policiesOk OK, $policiesFixed fixed" -ForegroundColor Yellow + if ($backfillCount -gt 0) { Write-Host " Backfill Summary: $backfillCount tables backfilled" -ForegroundColor Yellow } + } + else + { + Write-Host " ✓ All $policiesOk update policies verified" -ForegroundColor Green + } +} + +# ============================================================================ +# Test Data Cleanup (NukeTestData) +# ============================================================================ +# Purges ALL test data from the ADX cluster, storage containers, and local +# test-data folder so you can re-run the generator with a clean slate. Steps: +# 1. Stop ADF triggers (optional, prevents re-ingestion during cleanup) +# 2. Purge all ADX tables in the Hub and Ingestion databases (via REST API) +# 3. Delete all blobs from the msexports and ingestion storage containers +# 4. Delete local test-data folder +# +# Usage: +# Invoke-NukeTestData -AdxClusterUri "https://finopsdemoadx4uj6pm.italynorth.kusto.windows.net" ` +# -StorageAccountName "stfinopsdemo4uj6pmwee34z" ` +# -AdfName "adf-finopsdemo-4uj6pm" ` +# -ResourceGroupName "rg-finopshub-demo" +# ============================================================================ + +function Invoke-NukeTestData +{ + [CmdletBinding(SupportsShouldProcess, ConfirmImpact = 'High')] + param( + [Parameter(Mandatory)] + [string]$AdxClusterUri, + + [Parameter(Mandatory)] + [string]$StorageAccountName, + + [string]$AdfName, + + [string]$ResourceGroupName, + + [ValidateSet("login", "key")] + [string]$StorageAuthMode = "login", + + [switch]$StopTriggers, + + [switch]$Force + ) + + $ErrorActionPreference = 'Continue' + + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor Red + Write-Host " ☢️ NUKE TEST DATA — Full Environment Cleanup" -ForegroundColor Red + Write-Host ("=" * 70) -ForegroundColor Red + Write-Host "" + Write-Host " ADX Cluster: $AdxClusterUri" -ForegroundColor Yellow + Write-Host " Storage Account: $StorageAccountName" -ForegroundColor Yellow + if ($AdfName) { Write-Host " ADF Factory: $AdfName" -ForegroundColor Yellow } + Write-Host "" + + if (-not $Force -and -not $PSCmdlet.ShouldProcess("ALL demo data", "Permanently delete")) + { + Write-Host "Aborted." -ForegroundColor Yellow + return + } + + # ----------------------------------------------------------------- + # Step 1: Stop ADF Triggers (prevents re-ingestion during cleanup) + # ----------------------------------------------------------------- + if ($StopTriggers -and $AdfName -and $ResourceGroupName) + { + Write-Host "[1/3] Stopping ADF triggers..." -ForegroundColor Cyan + $triggers = $AdfTriggerNames + foreach ($trigger in $triggers) + { + $state = (az datafactory trigger show --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --query "properties.runtimeState" -o tsv 2>$null) + if ($state -eq "Started") + { + Write-Host " Stopping $trigger..." -ForegroundColor Gray + az datafactory trigger stop --factory-name $AdfName --resource-group $ResourceGroupName --name $trigger --only-show-errors 2>$null + Write-Host " ✓ $trigger stopped" -ForegroundColor Green + } + else + { + Write-Host " $trigger already stopped (state=$state)" -ForegroundColor Gray + } + } + } + else + { + Write-Host "[1/3] Skipping ADF trigger stop (no -StopTriggers or missing -AdfName/-ResourceGroupName)" -ForegroundColor Gray + } + + # ----------------------------------------------------------------- + # Step 2: Purge all ADX tables in Hub and Ingestion databases + # ----------------------------------------------------------------- + Write-Host "" + Write-Host "[2/4] Purging ADX tables..." -ForegroundColor Cyan + + # Use REST API directly — more reliable than az kusto query + $adxToken = $null + try + { + $adxToken = (az account get-access-token --resource $AdxClusterUri --query accessToken -o tsv 2>$null) + } + catch + { + Write-Warning "Could not get ADX access token. ADX tables will not be purged." + } + + if ($adxToken) + { + $adxHeaders = @{ "Authorization" = "Bearer $adxToken"; "Content-Type" = "application/json" } + + $databases = @("Ingestion", "Hub") + foreach ($db in $databases) + { + Write-Host " Database: $db" -ForegroundColor Yellow + + # List all tables via REST API + $listBody = @{ db = $db; csl = ".show tables | project TableName" } | ConvertTo-Json + $tableNames = @() + try + { + $listResult = Invoke-RestMethod -Uri "$AdxClusterUri/v1/rest/mgmt" -Method Post -Headers $adxHeaders -Body $listBody -ErrorAction Stop + if ($listResult.Tables -and $listResult.Tables[0].Rows) + { + foreach ($row in $listResult.Tables[0].Rows) + { + $name = if ($row -is [array]) { $row[0] } else { $row } + if ($name -and $name -notlike '.*') { $tableNames += $name } + } + } + } + catch + { + Write-Host " Could not list tables in $db, trying known table names..." -ForegroundColor Gray + $tableNames = if ($db -eq "Ingestion") + { + @( + "Costs_raw", "Prices_raw", "Recommendations_raw", "Transactions_raw", + "CommitmentDiscountUsage_raw", "ActualCosts_raw", "AmortizedCosts_raw", + "MACC_Lots_raw", "MACC_Events_raw" + ) + } + else + { + @( + "Costs_final_v1_0", "Costs_final_v1_2", "Costs_final_v1_3", + "Prices_final_v1_2", "Recommendations_final_v1_2", + "Transactions_final_v1_2", "CommitmentDiscountUsage_final_v1_2", + "MACC_ManualEntry" + ) + } + } + + foreach ($tableName in $tableNames) + { + Write-Host " Clearing $tableName..." -ForegroundColor Gray + $clearBody = @{ db = $db; csl = ".clear table ['$tableName'] data" } | ConvertTo-Json + try + { + Invoke-RestMethod -Uri "$AdxClusterUri/v1/rest/mgmt" -Method Post -Headers $adxHeaders -Body $clearBody -ErrorAction Stop | Out-Null + Write-Host " ✓ $tableName cleared" -ForegroundColor DarkGreen + } + catch + { + Write-Host " ⚠ $tableName skipped (may not exist)" -ForegroundColor Gray + } + } + } + } + + # ----------------------------------------------------------------- + # Step 2b: Verify update policies are still intact after clearing + # ----------------------------------------------------------------- + if ($adxToken) + { + Write-Host "" + Write-Host "[2b/4] Verifying update policies..." -ForegroundColor Cyan + Invoke-EnsureUpdatePolicy -AdxClusterUri $AdxClusterUri + } + + # ----------------------------------------------------------------- + # Step 3: Delete all blobs from storage containers + # ----------------------------------------------------------------- + Write-Host "" + Write-Host "[3/4] Purging storage containers..." -ForegroundColor Cyan + + $containers = @("msexports", "ingestion") + $authArgs = if ($StorageAuthMode -eq "login") { @("--auth-mode", "login") } else { @() } + + foreach ($container in $containers) + { + Write-Host " Container: $container" -ForegroundColor Yellow + Write-Host " Deleting all blobs..." -ForegroundColor Gray + az storage blob delete-batch --account-name $StorageAccountName @authArgs --source $container --only-show-errors 2>$null + Write-Host " ✓ $container purged" -ForegroundColor Green + } + + # ----------------------------------------------------------------- + # Step 4: Delete local test-data folder + # ----------------------------------------------------------------- + Write-Host "" + Write-Host "[4/4] Cleaning local test-data..." -ForegroundColor Cyan + + $localTestDataPath = Join-Path (Split-Path $PSScriptRoot -Parent) "test-data" + if (Test-Path $localTestDataPath) + { + Remove-Item -Recurse -Force $localTestDataPath + Write-Host " ✓ Deleted $localTestDataPath" -ForegroundColor Green + } + else + { + Write-Host " No local test-data folder found" -ForegroundColor Gray + } + + # Also check OutputPath if different + $cwdOutputPath = Join-Path (Get-Location) "test-data" + if ((Test-Path $cwdOutputPath) -and ($cwdOutputPath -ne $localTestDataPath)) + { + Remove-Item -Recurse -Force $cwdOutputPath + Write-Host " ✓ Deleted $cwdOutputPath" -ForegroundColor Green + } + + # ----------------------------------------------------------------- + # Done + # ----------------------------------------------------------------- + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor Green + Write-Host " ✅ All test data nuked successfully!" -ForegroundColor Green + Write-Host ("=" * 70) -ForegroundColor Green + Write-Host "" + Write-Host "Next steps:" -ForegroundColor Yellow + Write-Host " Re-generate and upload in one command:" -ForegroundColor White + Write-Host " .\Generate-MultiCloudTestData.ps1 -TotalRowTarget 300000 -Upload ``" -ForegroundColor Gray + Write-Host " -StorageAccountName $StorageAccountName ``" -ForegroundColor Gray + Write-Host " -AdfName -ResourceGroupName -StartTriggers" -ForegroundColor Gray + Write-Host "" } # ============================================================================ # Main Execution # ============================================================================ +# Handle NukeTestData mode — clean everything and exit +if ($NukeTestData) +{ + if (-not $AdxClusterUri) + { + Write-Error "-NukeTestData requires -AdxClusterUri (e.g., https://finopsdemoadx4uj6pm.italynorth.kusto.windows.net)" + exit 1 + } + if (-not $StorageAccountName) + { + Write-Error "-NukeTestData requires -StorageAccountName" + exit 1 + } + + Invoke-NukeTestData -AdxClusterUri $AdxClusterUri ` + -StorageAccountName $StorageAccountName ` + -AdfName $AdfName ` + -ResourceGroupName $ResourceGroupName ` + -StopTriggers:$StartTriggers ` + -Force:$true + + exit 0 +} + Write-Host ("=" * 70) -ForegroundColor Cyan Write-Host "FinOps Hub Multi-Cloud FOCUS Test Data Generator v4.0" -ForegroundColor Cyan Write-Host ("=" * 70) -ForegroundColor Cyan @@ -1152,10 +2137,22 @@ foreach ($provider in $providers) $config = $ProviderConfigs[$provider] $identity = $providerIdentities[$provider] $baseFileName = "focus-$($provider.ToLower())-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" - $csvFilePath = Join-Path $OutputPath "$baseFileName.csv" + $fileExt = if ($OutputFormat -eq 'Parquet') { 'parquet' } else { 'csv' } + $dataFilePath = Join-Path $OutputPath "$baseFileName.$fileExt" $providerCostSum = [double]0 $headerWritten = $false + # Always stream daily batches to CSV on disk (never OOMs). + # If Parquet is requested, convert the CSV to Parquet at the end. + $csvTempPath = if ($OutputFormat -eq 'Parquet') + { + [IO.Path]::ChangeExtension($dataFilePath, '.csv') + } + else + { + $dataFilePath + } + $currentDate = $StartDate $rowsGenerated = 0 $lastPct = -1 @@ -1183,16 +2180,16 @@ foreach ($provider in $providers) } # Append daily batch to CSV (stream to disk, free memory) - if ($PSCmdlet.ShouldProcess($csvFilePath, "Write $todayCount rows")) + if ($PSCmdlet.ShouldProcess($csvTempPath, "Write $todayCount rows")) { if (-not $headerWritten) { - $dayRows | Export-Csv -Path $csvFilePath -NoTypeInformation -Encoding UTF8 + $dayRows | Export-Csv -Path $csvTempPath -NoTypeInformation -Encoding UTF8 $headerWritten = $true } else { - $dayRows | Export-Csv -Path $csvFilePath -NoTypeInformation -Encoding UTF8 -Append + $dayRows | Export-Csv -Path $csvTempPath -NoTypeInformation -Encoding UTF8 -Append } } $dayRows.Clear() @@ -1210,15 +2207,25 @@ foreach ($provider in $providers) } } + # Convert CSV → Parquet if requested (reads from disk, never holds full dataset in memory at once) + if ($OutputFormat -eq 'Parquet' -and (Test-Path $csvTempPath)) + { + if ($PSCmdlet.ShouldProcess($dataFilePath, "Convert $rowsGenerated rows CSV→Parquet")) + { + Import-Csv -Path $csvTempPath -Encoding UTF8 | Export-Parquet -FilePath $dataFilePath -Force + Remove-Item $csvTempPath -Force -ErrorAction SilentlyContinue + } + } + Write-Host " $provider : Generated $([string]::Format('{0:N0}', $rowsGenerated)) rows, `$$([string]::Format('{0:N2}', $providerCostSum))" -ForegroundColor Green - $generatedFiles += $csvFilePath + $generatedFiles += $dataFilePath $allProviderCosts[$provider] = $providerCostSum $allProviderRowCounts[$provider] = $rowsGenerated $totalRows += $rowsGenerated # Generate manifest.json $manifestFilePath = Join-Path $OutputPath "manifest-$($provider.ToLower()).json" - $fileSize = if (Test-Path $csvFilePath) { (Get-Item $csvFilePath).Length } else { 0 } + $fileSize = if (Test-Path $dataFilePath) { (Get-Item $dataFilePath).Length } else { 0 } $manifest = @{ exportConfig = @{ exportName = "focus-$($Provider.ToLower())-export" @@ -1232,12 +2239,12 @@ foreach ($provider in $providers) deliveryConfig = @{ partitionData = $true dataOverwriteBehavior = "OverwritePreviousReport" - fileFormat = "Csv" + fileFormat = $OutputFormat compressionMode = "None" } blobs = @( @{ - blobName = "$baseFileName.csv" + blobName = "$baseFileName.$fileExt" byteCount = $fileSize } ) @@ -1261,6 +2268,261 @@ foreach ($provider in $providers) [System.GC]::Collect() } +# ============================================================================ +# Generate Additional FOCUS Datasets (Azure-only) +# ============================================================================ +# These datasets are Azure Cost Management-specific exports: +# - Prices (PriceSheet) +# - CommitmentDiscountUsage (ReservationDetails) +# - Recommendations (ReservationRecommendations) +# - Transactions (ReservationTransactions) +# ============================================================================ + +$additionalDatasetFiles = @{} + +if ($providers -contains "Azure") +{ + $azureConfig = $ProviderConfigs["Azure"] + $azureIdentity = $providerIdentities["Azure"] + + # --- Prices Dataset --- + Write-Host "" + Write-Host "Generating Prices dataset (Azure price sheet)..." -ForegroundColor Yellow + + $priceRows = [System.Collections.Generic.List[PSCustomObject]]::new() + # Generate price rows for each resource in the identity pool × price types + $priceTypes = @( + @{ Type = "Consumption"; Term = "" }, + @{ Type = "ReservedInstance"; Term = "P1Y" }, + @{ Type = "ReservedInstance"; Term = "P3Y" }, + @{ Type = "SavingsPlan"; Term = "P1Y" }, + @{ Type = "SavingsPlan"; Term = "P3Y" } + ) + + # Sample a subset of resources for price rows (one price per resource per type) + $priceResourceSample = $azureIdentity.Resources | Get-Random -Count ([math]::Min(100, $azureIdentity.Resources.Count)) + foreach ($res in $priceResourceSample) + { + foreach ($pt in $priceTypes) + { + # Consumption rows for all, RI/SP only for eligible services + if ($pt.Type -ne "Consumption" -and $res.Service.Category -notin @("Compute", "Databases")) + { + continue + } + $priceRow = New-PriceRow -Config $azureConfig -Identity $azureIdentity -ServiceDef $res.Service -ResourceDef $res -PriceType $pt.Type -Term $pt.Term + $priceRows.Add($priceRow) + } + } + + $priceFileExt = if ($OutputFormat -eq 'Parquet') { 'parquet' } else { 'csv' } + $priceFileName = "prices-azure-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd')).$priceFileExt" + $priceFilePath = Join-Path $OutputPath $priceFileName + + if ($OutputFormat -eq 'Parquet' -and $priceRows.Count -gt 0) + { + # Handle nulls for PSParquet + $numericCols = [System.Collections.Generic.HashSet[string]]::new([string[]]@( + 'BasePrice', 'IncludedQuantity', 'MarketPrice', 'TierMinimumUnits', 'UnitPrice' + )) + $propNames = $priceRows[0].PSObject.Properties.Name + foreach ($r in $priceRows) + { + foreach ($pn in $propNames) + { + if ($null -eq $r.$pn) + { + $r.$pn = if ($numericCols.Contains($pn)) { [double]0 } else { '' } + } + } + } + $priceRows.ToArray() | Export-Parquet -FilePath $priceFilePath -Force + } + elseif ($priceRows.Count -gt 0) + { + $priceRows | Export-Csv -Path $priceFilePath -NoTypeInformation -Encoding UTF8 + } + + Write-Host " Prices: $($priceRows.Count) rows saved to $priceFileName" -ForegroundColor Green + $generatedFiles += $priceFilePath + $additionalDatasetFiles["Prices"] = $priceFilePath + $priceRows.Clear(); $priceRows = $null + [System.GC]::Collect() + + # --- CommitmentDiscountUsage Dataset --- + Write-Host "Generating CommitmentDiscountUsage dataset (reservation details)..." -ForegroundColor Yellow + + $cduRows = [System.Collections.Generic.List[PSCustomObject]]::new() + if ($azureIdentity.Commitments.Count -gt 0) + { + # Only Reservation commitments have usage details + $reservationCommitments = $azureIdentity.Commitments | Where-Object { $_.Type -eq "Reservation" } + if ($reservationCommitments.Count -gt 0) + { + # For each day in the date range, generate usage for each reservation + $currentDate = $StartDate + while ($currentDate -le $EndDate) + { + foreach ($commitment in $reservationCommitments) + { + # Each reservation may cover 1-3 resources per day + $coveredCount = Get-Random -Minimum 1 -Maximum 4 + $coveredResources = $azureIdentity.Resources | Where-Object { $_.Service.Category -in @("Compute", "Databases") } | Get-Random -Count ([math]::Min($coveredCount, ($azureIdentity.Resources | Where-Object { $_.Service.Category -in @("Compute", "Databases") }).Count)) + foreach ($res in $coveredResources) + { + $cduRow = New-CommitmentDiscountUsageRow -Commitment $commitment -UsageDate $currentDate -ResourceDef $res + $cduRows.Add($cduRow) + } + } + $currentDate = $currentDate.AddDays(1) + } + } + } + + $cduFileExt = if ($OutputFormat -eq 'Parquet') { 'parquet' } else { 'csv' } + $cduFileName = "commitmentdiscountusage-azure-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd')).$cduFileExt" + $cduFilePath = Join-Path $OutputPath $cduFileName + + if ($OutputFormat -eq 'Parquet' -and $cduRows.Count -gt 0) + { + $numericCols = [System.Collections.Generic.HashSet[string]]::new([string[]]@( + 'InstanceFlexibilityRatio', 'ReservedHours', 'TotalReservedQuantity', 'UsedHours' + )) + $propNames = $cduRows[0].PSObject.Properties.Name + foreach ($r in $cduRows) + { + foreach ($pn in $propNames) + { + if ($null -eq $r.$pn) + { + $r.$pn = if ($numericCols.Contains($pn)) { [double]0 } else { '' } + } + } + } + $cduRows.ToArray() | Export-Parquet -FilePath $cduFilePath -Force + } + elseif ($cduRows.Count -gt 0) + { + $cduRows | Export-Csv -Path $cduFilePath -NoTypeInformation -Encoding UTF8 + } + + Write-Host " CommitmentDiscountUsage: $($cduRows.Count) rows saved to $cduFileName" -ForegroundColor Green + $generatedFiles += $cduFilePath + $additionalDatasetFiles["CommitmentDiscountUsage"] = $cduFilePath + $cduRows.Clear(); $cduRows = $null + [System.GC]::Collect() + + # --- Recommendations Dataset --- + Write-Host "Generating Recommendations dataset (reservation recommendations)..." -ForegroundColor Yellow + + $recoRows = [System.Collections.Generic.List[PSCustomObject]]::new() + # Generate recommendations for compute/database resources eligible for reservations + $eligibleResources = $azureIdentity.Resources | Where-Object { $_.Service.Category -in @("Compute", "Databases") } + $recoResourceSample = $eligibleResources | Get-Random -Count ([math]::Min(50, $eligibleResources.Count)) + + foreach ($res in $recoResourceSample) + { + # 1-3 recommendations per resource (different lookback/term combos) + $recoCount = Get-Random -Minimum 1 -Maximum 4 + for ($i = 0; $i -lt $recoCount; $i++) + { + $recoDate = $EndDate.AddDays(-(Get-Random -Minimum 0 -Maximum 30)) + $recoRow = New-RecommendationRow -ResourceDef $res -RecommendationDate $recoDate + $recoRows.Add($recoRow) + } + } + + $recoFileExt = if ($OutputFormat -eq 'Parquet') { 'parquet' } else { 'csv' } + $recoFileName = "recommendations-azure-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd')).$recoFileExt" + $recoFilePath = Join-Path $OutputPath $recoFileName + + if ($OutputFormat -eq 'Parquet' -and $recoRows.Count -gt 0) + { + $numericCols = [System.Collections.Generic.HashSet[string]]::new([string[]]@( + 'CostWithNoReservedInstances', 'InstanceFlexibilityRatio', 'NetSavings', + 'RecommendedQuantity', 'RecommendedQuantityNormalized', 'TotalCostWithReservedInstances', + 'x_EffectiveCostAfter', 'x_EffectiveCostBefore', 'x_EffectiveCostSavings' + )) + $propNames = $recoRows[0].PSObject.Properties.Name + foreach ($r in $recoRows) + { + foreach ($pn in $propNames) + { + if ($null -eq $r.$pn) + { + $r.$pn = if ($numericCols.Contains($pn)) { [double]0 } else { '' } + } + } + } + $recoRows.ToArray() | Export-Parquet -FilePath $recoFilePath -Force + } + elseif ($recoRows.Count -gt 0) + { + $recoRows | Export-Csv -Path $recoFilePath -NoTypeInformation -Encoding UTF8 + } + + Write-Host " Recommendations: $($recoRows.Count) rows saved to $recoFileName" -ForegroundColor Green + $generatedFiles += $recoFilePath + $additionalDatasetFiles["Recommendations"] = $recoFilePath + $recoRows.Clear(); $recoRows = $null + [System.GC]::Collect() + + # --- Transactions Dataset --- + Write-Host "Generating Transactions dataset (reservation transactions)..." -ForegroundColor Yellow + + $transRows = [System.Collections.Generic.List[PSCustomObject]]::new() + if ($azureIdentity.Commitments.Count -gt 0) + { + foreach ($commitment in $azureIdentity.Commitments) + { + # 1-4 transactions per commitment over the date range + $transCount = Get-Random -Minimum 1 -Maximum 5 + for ($i = 0; $i -lt $transCount; $i++) + { + $eventDate = $StartDate.AddDays((Get-Random -Minimum 0 -Maximum ([math]::Max(1, $totalDays)))) + if ($eventDate -gt $EndDate) { $eventDate = $EndDate } + $transRow = New-TransactionRow -Identity $azureIdentity -Commitment $commitment -EventDate $eventDate + $transRows.Add($transRow) + } + } + } + + $transFileExt = if ($OutputFormat -eq 'Parquet') { 'parquet' } else { 'csv' } + $transFileName = "transactions-azure-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd')).$transFileExt" + $transFilePath = Join-Path $OutputPath $transFileName + + if ($OutputFormat -eq 'Parquet' -and $transRows.Count -gt 0) + { + $numericCols = [System.Collections.Generic.HashSet[string]]::new([string[]]@( + 'Amount', 'MonetaryCommitment', 'Overage', 'Quantity' + )) + $propNames = $transRows[0].PSObject.Properties.Name + foreach ($r in $transRows) + { + foreach ($pn in $propNames) + { + if ($null -eq $r.$pn) + { + $r.$pn = if ($numericCols.Contains($pn)) { [double]0 } else { '' } + } + } + } + $transRows.ToArray() | Export-Parquet -FilePath $transFilePath -Force + } + elseif ($transRows.Count -gt 0) + { + $transRows | Export-Csv -Path $transFilePath -NoTypeInformation -Encoding UTF8 + } + + Write-Host " Transactions: $($transRows.Count) rows saved to $transFileName" -ForegroundColor Green + $generatedFiles += $transFilePath + $additionalDatasetFiles["Transactions"] = $transFilePath + $transRows.Clear(); $transRows = $null + + [System.GC]::Collect() + Write-Host "" +} + # ============================================================================ # Summary # ============================================================================ @@ -1304,11 +2566,32 @@ Write-Host " CommitmentDiscountType: Reservation, Savings Plan (+ Purchase rows if ($focusMajorMinor -ge [version]'1.1') { Write-Host " CommitmentDiscountQuantity/Unit: Included (v1.1+)" } if ($focusMajorMinor -ge [version]'1.2') { Write-Host " BillingAccountType, SubAccountType, InvoiceId: Included (v1.2+)" } if ($focusMajorMinor -ge [version]'1.3') { Write-Host " HostProviderName, ServiceProviderName: Included (v1.3+)" } +if ($focusMajorMinor -ge [version]'1.3') { Write-Host " ContractApplied: JSON for committed-discount rows (v1.3+)" } +if ($focusMajorMinor -ge [version]'1.3') { Write-Host " Allocated* columns: ~10% of AKS/EKS/GKE rows with split cost data (v1.3+)" } Write-Host " Azure Hybrid Benefit: x_SkuLicenseStatus Enabled/Not Enabled" Write-Host " CPU Architecture: Intel/AMD/Arm64 patterns in x_SkuMeterName" Write-Host " Tag coverage: ~80% tagged, ~20% untagged (maturity scorecard)" Write-Host " Data quality anomalies: ~2% rows (ChargeClass=Correction, x_SourceChanges set)" -Write-Host " Note: Cost and Usage dataset only. Contract Commitment dataset not included." +Write-Host " Note: All columns emitted for every version (empty/null for non-applicable) to maintain" +Write-Host " a consistent Parquet schema. Contract Commitment dataset not included." +Write-Host "" +Write-Host "Additional FOCUS Datasets (Azure-only):" -ForegroundColor Cyan +if ($additionalDatasetFiles.Count -gt 0) +{ + foreach ($ds in $additionalDatasetFiles.Keys) + { + $dsPath = $additionalDatasetFiles[$ds] + if (Test-Path $dsPath) + { + $dsSize = [math]::Round((Get-Item $dsPath).Length / 1MB, 2) + Write-Host " $ds : $dsSize MB" + } + } +} +else +{ + Write-Host " (Azure not in provider list — additional datasets skipped)" +} Write-Host "" # ============================================================================ @@ -1356,7 +2639,7 @@ if ($Upload -and $StorageAccountName) Write-Host "" Write-Host "Ensuring ADF Triggers are running (BEFORE upload)..." -ForegroundColor Yellow - $triggers = @("msexports_ManifestAdded", "ingestion_ManifestAdded") + $triggers = $AdfTriggerNames foreach ($trigger in $triggers) { if ($PSCmdlet.ShouldProcess($trigger, "Start ADF trigger")) @@ -1388,8 +2671,9 @@ if ($Upload -and $StorageAccountName) { $providerLower = $provider.ToLower() $baseFileName = "focus-$providerLower-$($StartDate.ToString('yyyyMMdd'))-$($EndDate.ToString('yyyyMMdd'))" + $uploadFileExt = if ($OutputFormat -eq 'Parquet') { 'parquet' } else { 'csv' } - $dataFile = "$baseFileName.csv" + $dataFile = "$baseFileName.$uploadFileExt" $dataFilePath = Join-Path $OutputPath $dataFile if (-not (Test-Path $dataFilePath)) @@ -1428,7 +2712,7 @@ if ($Upload -and $StorageAccountName) deliveryConfig = @{ partitionData = $true dataOverwriteBehavior = "OverwritePreviousReport" - fileFormat = "Csv" + fileFormat = "Parquet" compressionMode = "None" containerUri = "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg/providers/Microsoft.Storage/storageAccounts/$StorageAccountName" rootFolderPath = "" @@ -1455,7 +2739,9 @@ if ($Upload -and $StorageAccountName) if ($PSCmdlet.ShouldProcess("$container/$blobPath", "Upload to Azure Storage")) { Write-Host " Uploading $provider to msexports container..." -ForegroundColor Cyan + Write-Host " Step 1: Upload data file (before manifest to ensure ADF finds it)..." -ForegroundColor Gray az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $dataFilePath --name $blobPath --overwrite --only-show-errors 2>$null + Write-Host " Step 2: Upload manifest.json (triggers ADF pipeline)..." -ForegroundColor Gray az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null } @@ -1495,6 +2781,55 @@ if ($Upload -and $StorageAccountName) Write-Host "" Write-Host "Upload Complete! $uploadedCount providers uploaded." -ForegroundColor Green + # Upload additional FOCUS datasets (Prices, CommitmentDiscountUsage, Recommendations, Transactions) + if ($additionalDatasetFiles.Count -gt 0) + { + Write-Host "" + Write-Host "Uploading additional FOCUS datasets to ingestion container..." -ForegroundColor Yellow + + $container = "ingestion" + $ingestionId = (Get-Date).ToString("yyyyMMddHHmmss") + + foreach ($datasetName in $additionalDatasetFiles.Keys) + { + $datasetFilePath = $additionalDatasetFiles[$datasetName] + if (-not (Test-Path $datasetFilePath)) + { + Write-Host " Warning: $datasetFilePath not found, skipping $datasetName" -ForegroundColor Yellow + continue + } + + $datasetFileName = [System.IO.Path]::GetFileName($datasetFilePath) + $scopePath = "azure/test-account" + $blobFolder = "$datasetName/$($EndDate.ToString('yyyy'))/$($EndDate.ToString('MM'))/$scopePath" + $blobPath = "$blobFolder/${ingestionId}__$datasetFileName" + $manifestBlobPath = "$blobFolder/manifest.json" + + # Create a simple manifest for ingestion trigger + $manifest = @{ + note = "Trigger file for ADX ingestion - $datasetName" + provider = "azure" + dataset = $datasetName + timestamp = (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssZ") + } | ConvertTo-Json -Depth 3 + + $manifestFilePath = Join-Path $OutputPath "manifest-$($datasetName.ToLower()).json" + $manifest | Out-File -FilePath $manifestFilePath -Encoding UTF8 + + if ($PSCmdlet.ShouldProcess("$container/$blobPath", "Upload $datasetName to Azure Storage")) + { + Write-Host " Uploading $datasetName..." -ForegroundColor Cyan + az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $datasetFilePath --name $blobPath --overwrite --only-show-errors 2>$null + az storage blob upload --account-name $StorageAccountName @authArgs --container-name $container --file $manifestFilePath --name $manifestBlobPath --overwrite --only-show-errors 2>$null + Write-Host " Uploaded: $blobPath" -ForegroundColor Green + Write-Host " Uploaded: $manifestBlobPath" -ForegroundColor Green + } + } + + Write-Host "" + Write-Host "Additional datasets uploaded!" -ForegroundColor Green + } + # Verify ADF pipeline execution if ($StartTriggers -and $AdfName -and $ResourceGroupName) { @@ -1528,6 +2863,12 @@ if ($Upload -and $StorageAccountName) Write-Host "" Write-Host "Warning: -StartTriggers requires -AdfName and -ResourceGroupName" -ForegroundColor Yellow } + + # Verify update policies and backfill if needed (runs after every upload) + if ($AdxClusterUri) + { + Invoke-EnsureUpdatePolicy -AdxClusterUri $AdxClusterUri -Backfill + } } elseif ($Upload) { @@ -1541,7 +2882,11 @@ else Write-Host " 2. Or manually upload:" Write-Host " - Azure data to msexports/{scope}/{export-name}/{date-range}/{time}/{guid}/" Write-Host " - AWS/GCP/DC data to ingestion/Costs/{yyyy}/{mm}/{provider}/{account}/" + Write-Host " - Prices to ingestion/Prices/{yyyy}/{mm}/azure/test-account/" + Write-Host " - CommitmentDiscountUsage to ingestion/CommitmentDiscountUsage/{yyyy}/{mm}/azure/test-account/" + Write-Host " - Recommendations to ingestion/Recommendations/{yyyy}/{mm}/azure/test-account/" + Write-Host " - Transactions to ingestion/Transactions/{yyyy}/{mm}/azure/test-account/" Write-Host " 3. Start ADF triggers to process the data" } -Write-Host "" +Write-Host "" \ No newline at end of file diff --git a/src/templates/finops-hub/test/README.md b/src/templates/finops-hub/test/README.md index 9686f11e0..599d3a3b6 100644 --- a/src/templates/finops-hub/test/README.md +++ b/src/templates/finops-hub/test/README.md @@ -26,6 +26,12 @@ Generates synthetic multi-cloud cost data in [FOCUS format](https://focus.finops # Generate and upload using Azure AD auth ./Generate-MultiCloudTestData.ps1 -Upload -StorageAccountName "stfinopshub" -AdfName "adf-finopshub" -StartTriggers + +# Nuke all test data (ADX tables, storage blobs, local files) for a clean reset +./Generate-MultiCloudTestData.ps1 -NukeTestData -AdxClusterUri "https://mycluster.region.kusto.windows.net" -StorageAccountName "stfinopshub" + +# Full nuke with ADF trigger management +./Generate-MultiCloudTestData.ps1 -NukeTestData -AdxClusterUri "https://mycluster.region.kusto.windows.net" -StorageAccountName "stfinopshub" -AdfName "adf-finopshub" -ResourceGroupName "rg-finopshub" -StartTriggers ``` ### Features @@ -40,9 +46,14 @@ Generates synthetic multi-cloud cost data in [FOCUS format](https://focus.finops | CPU architecture | Intel/AMD/Arm64 patterns in x_SkuMeterName | | Tag variation | ~80% tagged, ~20% untagged for maturity scorecard testing | | Data quality | ~2% anomaly rows with ChargeClass=Correction | +| Split cost allocation | ~10% of AKS/EKS/GKE rows with Allocated* columns (v1.3+) | +| ContractApplied | JSON contract reference on committed-discount rows (v1.3+) | | Reproducibility | `-Seed` parameter for deterministic output | | Azure AD auth | Default `--auth-mode login` for uploads (storage keys opt-in via `-UseStorageKey`) | | ShouldProcess | Full `-WhatIf` / `-Confirm` support for destructive operations | +| NukeTestData | Purges ADX tables, storage blobs, and local test-data for clean resets | +| Output formats | CSV and Parquet (via PSParquet module) | +| Additional datasets | Azure-only Prices, CommitmentDiscountUsage, Recommendations, Transactions | ### Parameters @@ -62,3 +73,16 @@ Column sets vary by FOCUS version: | 1.3 | 37 | 13 | 50 | Plus FinOps Hub-specific `x_` prefixed extension columns for dashboard compatibility. + +> **Note:** All columns are emitted regardless of version (with empty/null values for non-applicable columns) to maintain a consistent Parquet schema. This is intentional for test data compatibility. + +### NukeTestData (Clean Reset) + +The `-NukeTestData` switch performs a full cleanup for re-testing: + +1. **Stops ADF triggers** (optional, with `-StartTriggers -AdfName -ResourceGroupName`) to prevent re-ingestion during cleanup +2. **Purges all ADX tables** in both Hub and Ingestion databases via REST API +3. **Deletes all blobs** from `msexports` and `ingestion` storage containers +4. **Removes local test-data folder** + +Requires `-AdxClusterUri` and `-StorageAccountName`. Uses `ConfirmImpact = 'High'` so PowerShell will prompt for confirmation unless `-Force` or `-Confirm:$false` is passed. From 4fde70e3c53d0d3950133e0a9956c80a03f5c717 Mon Sep 17 00:00:00 2001 From: Zach Olinske Date: Mon, 16 Feb 2026 20:26:53 +0100 Subject: [PATCH 5/5] fix: resolve OutputPath to absolute for Export-Parquet .NET cmdlet compatibility --- .../finops-hub/test/Generate-MultiCloudTestData.ps1 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 index 3700c0f75..0def9b0e1 100644 --- a/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 +++ b/src/templates/finops-hub/test/Generate-MultiCloudTestData.ps1 @@ -2074,6 +2074,11 @@ Write-Host " Output Path: $OutputPath" if ($PSBoundParameters.ContainsKey('Seed')) { Write-Host " Random Seed: $Seed" } Write-Host "" +# Resolve OutputPath to absolute — Export-Parquet (a .NET cmdlet) uses +# [IO.Directory]::GetCurrentDirectory() which may differ from PowerShell's $PWD. +# Converting to absolute here avoids "Could not find a part of the path" errors. +$OutputPath = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($OutputPath) + # Create output directory if (-not (Test-Path $OutputPath)) { @@ -2889,4 +2894,4 @@ else Write-Host " 3. Start ADF triggers to process the data" } -Write-Host "" \ No newline at end of file +Write-Host ""