diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 0000000..72f00e2 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1,20 @@ +# Gradle +.gradle/ +build/ +!gradle/wrapper/gradle-wrapper.jar + +# IDE +.idea/ +*.iml +.vscode/ +*.swp +*.swo +*~ + +# Gatling +results/ + +# Logs +*.log + + diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle new file mode 100644 index 0000000..94f590d --- /dev/null +++ b/benchmarks/build.gradle @@ -0,0 +1,57 @@ +plugins { + id 'scala' + id 'io.gatling.gradle' version '3.11.5.2' +} + +group = 'com.altinity.ice.benchmarks' +version = '0.0.0-SNAPSHOT' + +repositories { + mavenCentral() +} + +ext { + gatlingVersion = '3.11.5' + scalaVersion = '2.13.14' +} + +dependencies { + // Gatling dependencies + gatling "io.gatling.highcharts:gatling-charts-highcharts:${gatlingVersion}" + gatling "io.gatling:gatling-app:${gatlingVersion}" + gatling "io.gatling:gatling-recorder:${gatlingVersion}" + + // Scala + implementation "org.scala-lang:scala-library:${scalaVersion}" + + // JSON processing + implementation 'com.fasterxml.jackson.core:jackson-databind:2.17.0' + implementation 'com.fasterxml.jackson.module:jackson-module-scala_2.13:2.17.0' + + // Configuration + implementation 'com.typesafe:config:1.4.3' + + // Logging + implementation 'ch.qos.logback:logback-classic:1.5.6' +} + +gatling { + // Gatling configuration + simulations = { + include "**/*Simulation.scala" + } + + // JVM arguments for Gatling + jvmArgs = [ + '-Xms2G', + '-Xmx4G', + '-XX:+UseG1GC', + '-XX:MaxGCPauseMillis=30' + ] +} + +tasks.withType(ScalaCompile) { + scalaCompileOptions.additionalParameters = ['-deprecation', '-feature'] +} + + diff --git a/benchmarks/gradle/wrapper/gradle-wrapper.jar b/benchmarks/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..d64cd49 Binary files /dev/null and b/benchmarks/gradle/wrapper/gradle-wrapper.jar differ diff --git a/benchmarks/gradle/wrapper/gradle-wrapper.properties b/benchmarks/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..93d17e8 --- /dev/null +++ b/benchmarks/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,8 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.12-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists + diff --git a/benchmarks/gradlew b/benchmarks/gradlew new file mode 100755 index 0000000..25181ba --- /dev/null +++ b/benchmarks/gradlew @@ -0,0 +1,248 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsenv variables from the environment, and +# * the '-classpath' argument +# and finally the main class name. +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" + + diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml new file mode 100644 index 0000000..080158e --- /dev/null +++ b/benchmarks/pom.xml @@ -0,0 +1,63 @@ + + + 4.0.0 + + com.altinity.ice.benchmarks + ice-benchmarks + 0.0.0-SNAPSHOT + + + 17 + 17 + UTF-8 + 3.11.5 + 4.9.6 + 2.13.14 + + + + + io.gatling.highcharts + gatling-charts-highcharts + ${gatling.version} + test + + + org.scala-lang + scala-library + ${scala.version} + + + com.typesafe + config + 1.4.3 + + + + + src/test/scala + + + net.alchim31.maven + scala-maven-plugin + 4.9.2 + + + + testCompile + + + + + + io.gatling + gatling-maven-plugin + ${gatling-maven-plugin.version} + + + + + + diff --git a/benchmarks/settings.gradle b/benchmarks/settings.gradle new file mode 100644 index 0000000..f175deb --- /dev/null +++ b/benchmarks/settings.gradle @@ -0,0 +1,3 @@ +rootProject.name = 'ice-benchmarks' + + diff --git a/benchmarks/src/test/resources/application.conf b/benchmarks/src/test/resources/application.conf new file mode 100644 index 0000000..3b32659 --- /dev/null +++ b/benchmarks/src/test/resources/application.conf @@ -0,0 +1,101 @@ +# ICE REST Catalog Benchmark Configuration +# +# This configuration file defines parameters for load testing the ICE REST catalog. +# It follows the same structure as the Polaris benchmarking framework. + +# HTTP connection settings +http { + # Base URL of the ICE REST catalog + base-url = "http://localhost:5000" + base-url = ${?ICE_CATALOG_URL} + + # Connection timeout in milliseconds + connect-timeout = 30000 + + # Request timeout in milliseconds + request-timeout = 60000 +} + +# Authentication settings +auth { + # Bearer token for authentication + # Set this via environment variable: export ICE_BEARER_TOKEN="your-token" + # Or set directly in this file: bearer-token = "your-token-here" + bearer-token = "foo" + bearer-token = ${?ICE_BEARER_TOKEN} +} + +# Test dataset configuration +dataset { + # Catalog configuration + catalog-name = "test-catalog" + + # Tree dataset structure + tree { + # Number of catalogs to create (for multi-catalog tests) + num-catalogs = 1 + + # Namespace hierarchy configuration + # Width: number of child namespaces per parent + namespace-width = 2 + # Depth: levels of namespace nesting + namespace-depth = 3 + + # Tables and views per namespace + tables-per-namespace = 5 + views-per-namespace = 2 + + # Metadata properties + namespace-properties = 10 + table-properties = 20 + view-properties = 15 + + # Schema configuration + columns-per-table = 50 + columns-per-view = 30 + + # Default base location for tables + default-base-location = "s3://test-warehouse/" + default-base-location = ${?ICE_BASE_LOCATION} + } +} + +# Workload configurations +workload { + # Pure write workload - creates namespaces, tables, and views + create-tree-dataset { + # Concurrency for namespace creation + namespace-concurrency = 20 + # Concurrency for table creation + table-concurrency = 40 + # Concurrency for view creation + view-concurrency = 40 + } + + # Mixed read/write workload - 90% reads, 10% writes + read-update-tree-dataset { + # Ratio of read operations (0.0 to 1.0) + read-write-ratio = 0.9 + # Target throughput (operations per second) + throughput = 150 + # Duration of the benchmark in minutes + duration-in-minutes = 10 + } + + # Read-heavy workload - 100% reads + read-only-workload { + # Target throughput (operations per second) + throughput = 500 + # Duration of the benchmark in minutes + duration-in-minutes = 5 + } + + # Write-heavy workload - table updates + write-heavy-workload { + # Concurrency for table updates + update-concurrency = 50 + # Number of updates per table + updates-per-table = 10 + } +} + diff --git a/benchmarks/src/test/resources/logback.xml b/benchmarks/src/test/resources/logback.xml new file mode 100644 index 0000000..c54715c --- /dev/null +++ b/benchmarks/src/test/resources/logback.xml @@ -0,0 +1,26 @@ + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + build/reports/gatling/benchmark.log + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + diff --git a/benchmarks/src/test/scala/com/altinity/ice/benchmarks/DatasetGenerator.scala b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/DatasetGenerator.scala new file mode 100644 index 0000000..3d20c25 --- /dev/null +++ b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/DatasetGenerator.scala @@ -0,0 +1,155 @@ +package com.altinity.ice.benchmarks + +import com.typesafe.config.{Config, ConfigFactory} + +import scala.util.Random + +/** + * Procedural dataset generator for ICE REST Catalog benchmarks. + * Given the same configuration, generates the same dataset for reproducible benchmarks. + */ +object DatasetGenerator { + + private val config: Config = ConfigFactory.load().getConfig("dataset.tree") + + val numCatalogs: Int = config.getInt("num-catalogs") + val namespaceWidth: Int = config.getInt("namespace-width") + val namespaceDepth: Int = config.getInt("namespace-depth") + val tablesPerNamespace: Int = config.getInt("tables-per-namespace") + val viewsPerNamespace: Int = config.getInt("views-per-namespace") + val namespaceProperties: Int = config.getInt("namespace-properties") + val tableProperties: Int = config.getInt("table-properties") + val viewProperties: Int = config.getInt("view-properties") + val columnsPerTable: Int = config.getInt("columns-per-table") + val columnsPerView: Int = config.getInt("columns-per-view") + val defaultBaseLocation: String = config.getString("default-base-location") + val catalogName: String = ConfigFactory.load().getString("dataset.catalog-name") + + /** + * Generates all namespaces in a tree structure. + * Returns a list of namespace paths (e.g., ["ns1", "ns1.ns2", "ns1.ns2.ns3"]) + */ + def generateNamespaces(): Seq[String] = { + def buildTree(prefix: String, depth: Int): Seq[String] = { + if (depth == 0) Seq.empty + else { + val currentLevel = (1 to namespaceWidth).map { i => + val name = if (prefix.isEmpty) s"ns_${depth}_$i" else s"$prefix.ns_${depth}_$i" + name +: buildTree(name, depth - 1) + }.flatten + currentLevel + } + } + buildTree("", namespaceDepth) + } + + /** + * Generates table identifiers for all namespaces. + * Returns a list of (namespace, tableName) tuples. + */ + def generateTables(): Seq[(String, String)] = { + val namespaces = generateNamespaces() + namespaces.flatMap { ns => + (1 to tablesPerNamespace).map { i => + (ns, s"table_$i") + } + } + } + + /** + * Generates view identifiers for all namespaces. + * Returns a list of (namespace, viewName) tuples. + */ + def generateViews(): Seq[(String, String)] = { + val namespaces = generateNamespaces() + namespaces.flatMap { ns => + (1 to viewsPerNamespace).map { i => + (ns, s"view_$i") + } + } + } + + /** + * Generates properties for a namespace. + */ + def generateNamespaceProperties(namespace: String): Map[String, String] = { + val seed = namespace.hashCode + val rnd = new Random(seed) + (1 to namespaceProperties).map { i => + s"prop_$i" -> s"value_${rnd.nextInt(1000)}" + }.toMap + } + + /** + * Generates a table schema with the configured number of columns. + */ + def generateTableSchema(tableName: String): String = { + val seed = tableName.hashCode + val rnd = new Random(seed) + val types = Seq("integer", "long", "double", "string", "boolean", "date", "timestamp") + + val fields = (1 to columnsPerTable).map { i => + val fieldType = types(rnd.nextInt(types.length)) + s"""{"id": $i, "name": "col_$i", "required": false, "type": "$fieldType"}""" + }.mkString(",\n ") + + s"""{"type": "struct", "fields": [$fields]}""" + } + + /** + * Generates properties for a table. + */ + def generateTableProperties(tableName: String): Map[String, String] = { + val seed = tableName.hashCode + val rnd = new Random(seed) + (1 to tableProperties).map { i => + s"prop_$i" -> s"value_${rnd.nextInt(1000)}" + }.toMap + ("write.format.default" -> "parquet") + } + + /** + * Generates a view schema with the configured number of columns. + */ + def generateViewSchema(viewName: String): String = { + val seed = viewName.hashCode + val rnd = new Random(seed) + val types = Seq("integer", "long", "double", "string", "boolean", "date", "timestamp") + + val fields = (1 to columnsPerView).map { i => + val fieldType = types(rnd.nextInt(types.length)) + s"""{"id": $i, "name": "col_$i", "required": false, "type": "$fieldType"}""" + }.mkString(",\n ") + + s"""{"type": "struct", "fields": [$fields]}""" + } + + /** + * Generates properties for a view. + */ + def generateViewProperties(viewName: String): Map[String, String] = { + val seed = viewName.hashCode + val rnd = new Random(seed) + (1 to viewProperties).map { i => + s"prop_$i" -> s"value_${rnd.nextInt(1000)}" + }.toMap + } + + /** + * Converts properties map to JSON string. + */ + def propertiesToJson(properties: Map[String, String]): String = { + properties.map { case (k, v) => s""""$k": "$v"""" }.mkString("{", ", ", "}") + } + + /** + * Returns total number of entities that will be created. + */ + def getTotalEntityCount: (Int, Int, Int) = { + val namespaces = generateNamespaces() + val tables = generateTables() + val views = generateViews() + (namespaces.length, tables.length, views.length) + } +} + + diff --git a/benchmarks/src/test/scala/com/altinity/ice/benchmarks/IceRestProtocol.scala b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/IceRestProtocol.scala new file mode 100644 index 0000000..e9cff43 --- /dev/null +++ b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/IceRestProtocol.scala @@ -0,0 +1,49 @@ +package com.altinity.ice.benchmarks + +import com.typesafe.config.{Config, ConfigFactory} +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import io.gatling.http.protocol.HttpProtocolBuilder + +import scala.concurrent.duration._ + +/** + * Protocol configuration for ICE REST Catalog benchmarks. + * Handles bearer token authentication and HTTP settings. + */ +object IceRestProtocol { + + private val config: Config = ConfigFactory.load() + + val baseUrl: String = config.getString("http.base-url") + val bearerToken: String = config.getString("auth.bearer-token") + val connectTimeout: Duration = config.getInt("http.connect-timeout").milliseconds + val requestTimeout: Duration = config.getInt("http.request-timeout").milliseconds + + /** + * Sets the bearer token in the session. + * This must be called at the start of each scenario. + */ + val authenticate = exec(session => { + session.set("accessToken", bearerToken) + }) + + /** + * Returns the HTTP protocol configuration for ICE REST catalog. + */ + def httpProtocol: HttpProtocolBuilder = { + val protocol = http + .baseUrl(baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + .shareConnections + + // Only add Authorization header if bearer token is provided + if (bearerToken.nonEmpty) { + protocol.header("Authorization", "Bearer ${accessToken}") + } else { + protocol + } + } +} + diff --git a/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/CreateTreeDatasetSimulation.scala b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/CreateTreeDatasetSimulation.scala new file mode 100644 index 0000000..4e0e06e --- /dev/null +++ b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/CreateTreeDatasetSimulation.scala @@ -0,0 +1,151 @@ +package com.altinity.ice.benchmarks.simulations + +import com.altinity.ice.benchmarks.{DatasetGenerator, IceRestProtocol} +import com.typesafe.config.ConfigFactory +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ + +import scala.concurrent.duration._ + +/** + * 100% Write Workload: Creates a tree of namespaces, tables, and views. + * + * This simulation creates a hierarchical dataset structure: + * - N-ary tree of namespaces (width and depth configurable) + * - Tables and views in each namespace + * - Properties and schemas for each entity + * + * Similar to Polaris CreateTreeDataset benchmark. + */ +class CreateTreeDatasetSimulation extends Simulation { + + private val config = ConfigFactory.load() + private val workloadConfig = config.getConfig("workload.create-tree-dataset") + + private val namespaceConcurrency = workloadConfig.getInt("namespace-concurrency") + private val tableConcurrency = workloadConfig.getInt("table-concurrency") + private val viewConcurrency = workloadConfig.getInt("view-concurrency") + + private val catalogName = config.getString("dataset.catalog-name") + + // Generate dataset + private val namespaces = DatasetGenerator.generateNamespaces() + private val tables = DatasetGenerator.generateTables() + private val views = DatasetGenerator.generateViews() + + println(s"Dataset configuration:") + println(s" Namespaces: ${namespaces.length}") + println(s" Tables: ${tables.length}") + println(s" Views: ${views.length}") + println(s" Total entities: ${namespaces.length + tables.length + views.length}") + + // Create namespace feeder + private val namespaceFeeder = namespaces.map { ns => + Map( + "namespace" -> ns.replace(".", "\u001F"), // Use unit separator for Iceberg REST API + "properties" -> DatasetGenerator.propertiesToJson( + DatasetGenerator.generateNamespaceProperties(ns) + ) + ) + }.iterator + + // Create table feeder + private val tableFeeder = tables.map { case (ns, tableName) => + Map( + "namespace" -> ns.replace(".", "\u001F"), + "tableName" -> tableName, + "schema" -> DatasetGenerator.generateTableSchema(tableName), + "properties" -> DatasetGenerator.propertiesToJson( + DatasetGenerator.generateTableProperties(tableName) + ), + "location" -> s"${DatasetGenerator.defaultBaseLocation}$catalogName/$ns/$tableName" + ) + }.iterator + + // Create view feeder + private val viewFeeder = views.map { case (ns, viewName) => + Map( + "namespace" -> ns.replace(".", "\u001F"), + "viewName" -> viewName, + "schema" -> DatasetGenerator.generateViewSchema(viewName), + "properties" -> DatasetGenerator.propertiesToJson( + DatasetGenerator.generateViewProperties(viewName) + ), + "sqlQuery" -> s"SELECT * FROM $ns.table_1" + ) + }.iterator + + // Scenario: Create namespaces + private val createNamespaces = scenario("Create Namespaces") + .exec(IceRestProtocol.authenticate) + .feed(namespaceFeeder) + .exec( + http("Create Namespace") + .post("/v1/namespaces") + .body(StringBody( + """{"namespace": ["${namespace}"], "properties": ${properties}}""" + )).asJson + .check(status.in(200, 409)) // 409 = already exists + ) + + // Scenario: Create tables + private val createTables = scenario("Create Tables") + .feed(tableFeeder) + .exec( + http("Create Table") + .post("/v1/namespaces/${namespace}/tables") + .body(StringBody( + """{ + "name": "${tableName}", + "schema": ${schema}, + "location": "${location}", + "properties": ${properties} + }""" + )).asJson + .check(status.in(200, 409)) + ) + + // Scenario: Create views + private val createViews = scenario("Create Views") + .feed(viewFeeder) + .exec( + http("Create View") + .post("/v1/namespaces/${namespace}/views") + .body(StringBody( + """{ + "name": "${viewName}", + "schema": ${schema}, + "view-version": { + "version-id": 1, + "schema-id": 0, + "timestamp-ms": ${__time()}, + "summary": {"operation": "create"}, + "representations": [{ + "type": "sql", + "sql": "${sqlQuery}", + "dialect": "spark" + }], + "default-namespace": ["${namespace}"] + }, + "properties": ${properties} + }""" + )).asJson + .check(status.in(200, 409)) + ) + + // Setup with authentication + setUp( + createNamespaces.inject( + rampUsers(namespaces.length).during(30.seconds) + ), + createTables.inject( + nothingFor(5.seconds), + rampUsers(tables.length).during(60.seconds) + ), + createViews.inject( + nothingFor(70.seconds), + rampUsers(views.length).during(30.seconds) + ) + ).protocols(IceRestProtocol.httpProtocol) +} + diff --git a/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/ReadOnlyWorkloadSimulation.scala b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/ReadOnlyWorkloadSimulation.scala new file mode 100644 index 0000000..4e5edcc --- /dev/null +++ b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/ReadOnlyWorkloadSimulation.scala @@ -0,0 +1,95 @@ +package com.altinity.ice.benchmarks.simulations + +import com.altinity.ice.benchmarks.{DatasetGenerator, IceRestProtocol} +import com.typesafe.config.ConfigFactory +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ + +import scala.concurrent.duration._ +import scala.util.Random + +/** + * 100% Read Workload: Exercises read-only operations on the catalog. + * + * This simulation tests read performance with various operations: + * - List namespaces/tables/views + * - Load table/view metadata + * - Check entity existence + * + * Useful for testing catalog read scalability and caching effectiveness. + */ +class ReadOnlyWorkloadSimulation extends Simulation { + + private val config = ConfigFactory.load() + private val workloadConfig = config.getConfig("workload.read-only-workload") + + private val throughput = workloadConfig.getInt("throughput") + private val durationMinutes = workloadConfig.getInt("duration-in-minutes") + + // Generate dataset + private val namespaces = DatasetGenerator.generateNamespaces() + private val tables = DatasetGenerator.generateTables() + private val views = DatasetGenerator.generateViews() + + private val rnd = new Random(42) + + println(s"Read-only workload configuration:") + println(s" Target throughput: $throughput ops/s") + println(s" Duration: $durationMinutes minutes") + + // Create feeders + private val namespaceFeeder = Iterator.continually { + val ns = namespaces(rnd.nextInt(namespaces.length)) + Map("namespace" -> ns.replace(".", "\u001F")) + } + + private val tableFeeder = Iterator.continually { + val (ns, tableName) = tables(rnd.nextInt(tables.length)) + Map( + "namespace" -> ns.replace(".", "\u001F"), + "tableName" -> tableName + ) + } + + // Read-only scenario + private val readOnlyWorkload = scenario("Read-Only Workload") + .exec(IceRestProtocol.authenticate) + .during(durationMinutes.minutes) { + feed(namespaceFeeder) + .feed(tableFeeder) + .randomSwitch( + 20.0 -> exec( + http("List Namespaces") + .get("/v1/namespaces") + .check(status.is(200)) + ), + 20.0 -> exec( + http("List Tables") + .get("/v1/namespaces/${namespace}/tables") + .check(status.in(200, 404)) + ), + 20.0 -> exec( + http("Check Table Exists") + .head("/v1/namespaces/${namespace}/tables/${tableName}") + .check(status.in(200, 204, 404)) + ), + 30.0 -> exec( + http("Load Table") + .get("/v1/namespaces/${namespace}/tables/${tableName}") + .check(status.in(200, 404)) + ), + 10.0 -> exec( + http("Load Namespace") + .get("/v1/namespaces/${namespace}") + .check(status.in(200, 404)) + ) + ) + } + + setUp( + readOnlyWorkload.inject( + constantUsersPerSec(throughput).during(durationMinutes.minutes) + ) + ).protocols(IceRestProtocol.httpProtocol) +} + diff --git a/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/ReadUpdateTreeDatasetSimulation.scala b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/ReadUpdateTreeDatasetSimulation.scala new file mode 100644 index 0000000..fc6cd3f --- /dev/null +++ b/benchmarks/src/test/scala/com/altinity/ice/benchmarks/simulations/ReadUpdateTreeDatasetSimulation.scala @@ -0,0 +1,159 @@ +package com.altinity.ice.benchmarks.simulations + +import com.altinity.ice.benchmarks.{DatasetGenerator, IceRestProtocol} +import com.typesafe.config.ConfigFactory +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ + +import scala.concurrent.duration._ +import scala.util.Random + +/** + * Mixed Read/Write Workload: 90% reads, 10% writes (configurable). + * + * This simulation exercises various read and write operations: + * - Read: namespace exists, table exists, load table, list tables, etc. + * - Write: update namespace properties, update table metadata + * + * Similar to Polaris ReadUpdateTreeDataset benchmark. + */ +class ReadUpdateTreeDatasetSimulation extends Simulation { + + private val config = ConfigFactory.load() + private val workloadConfig = config.getConfig("workload.read-update-tree-dataset") + + private val readWriteRatio = workloadConfig.getDouble("read-write-ratio") + private val throughput = workloadConfig.getInt("throughput") + private val durationMinutes = workloadConfig.getInt("duration-in-minutes") + + // Generate dataset + private val namespaces = DatasetGenerator.generateNamespaces() + private val tables = DatasetGenerator.generateTables() + private val views = DatasetGenerator.generateViews() + + private val rnd = new Random(42) // Fixed seed for reproducibility + + println(s"Mixed workload configuration:") + println(s" Read/Write ratio: ${readWriteRatio * 100}% / ${(1 - readWriteRatio) * 100}%") + println(s" Target throughput: $throughput ops/s") + println(s" Duration: $durationMinutes minutes") + println(s" Total operations: ${throughput * durationMinutes * 60}") + + // Create feeders + private val namespaceFeeder = Iterator.continually { + val ns = namespaces(rnd.nextInt(namespaces.length)) + Map("namespace" -> ns.replace(".", "\u001F")) + } + + private val tableFeeder = Iterator.continually { + val (ns, tableName) = tables(rnd.nextInt(tables.length)) + Map( + "namespace" -> ns.replace(".", "\u001F"), + "tableName" -> tableName + ) + } + + private val viewFeeder = Iterator.continually { + val (ns, viewName) = views(rnd.nextInt(views.length)) + Map( + "namespace" -> ns.replace(".", "\u001F"), + "viewName" -> viewName + ) + } + + // Read operations + private val readOps = Seq( + exec( + http("Read / Check Namespace Exists") + .head("/v1/namespaces/${namespace}") + .check(status.in(200, 204, 404)) + ), + exec( + http("Read / Fetch Namespace") + .get("/v1/namespaces/${namespace}") + .check(status.in(200, 404)) + ), + exec( + http("Read / List Tables") + .get("/v1/namespaces/${namespace}/tables") + .check(status.in(200, 404)) + ), + exec( + http("Read / Check Table Exists") + .head("/v1/namespaces/${namespace}/tables/${tableName}") + .check(status.in(200, 204, 404)) + ), + exec( + http("Read / Fetch Table") + .get("/v1/namespaces/${namespace}/tables/${tableName}") + .check(status.in(200, 404)) + ), + exec( + http("Read / List Views") + .get("/v1/namespaces/${namespace}/views") + .check(status.in(200, 404)) + ), + exec( + http("Read / Check View Exists") + .head("/v1/namespaces/${namespace}/views/${viewName}") + .check(status.in(200, 204, 404)) + ), + exec( + http("Read / Fetch View") + .get("/v1/namespaces/${namespace}/views/${viewName}") + .check(status.in(200, 404)) + ) + ) + + // Write operations + private val writeOps = Seq( + exec( + http("Write / Update Namespace Properties") + .post("/v1/namespaces/${namespace}/properties") + .body(StringBody( + s"""{"updates": {"updated_at": "$${__time()}"}}""" + )).asJson + .check(status.in(200, 404)) + ), + exec( + http("Write / Update Table Metadata") + .post("/v1/namespaces/${namespace}/tables/${tableName}") + .body(StringBody( + """{ + "requirements": [], + "updates": [{ + "action": "set-properties", + "updates": {"updated_at": "${__time()}"} + }] + }""" + )).asJson + .check(status.in(200, 404)) + ) + ) + + // Mixed scenario + private val mixedWorkload = scenario("Mixed Read/Write Workload") + .exec(IceRestProtocol.authenticate) + .during(durationMinutes.minutes) { + randomSwitch( + readWriteRatio -> group("Read")( + feed(namespaceFeeder) + .feed(tableFeeder) + .feed(viewFeeder) + .exec(readOps(rnd.nextInt(readOps.length))) + ), + (1 - readWriteRatio) -> group("Write")( + feed(namespaceFeeder) + .feed(tableFeeder) + .exec(writeOps(rnd.nextInt(writeOps.length))) + ) + ) + } + + setUp( + mixedWorkload.inject( + constantUsersPerSec(throughput).during(durationMinutes.minutes) + ) + ).protocols(IceRestProtocol.httpProtocol) +} +