From 1d305bf7143275db753e42b5c7d6947a336e4968 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 2 Mar 2026 11:42:47 +0100 Subject: [PATCH 1/4] save --- docs/parameters.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/parameters.md b/docs/parameters.md index f70ec3dbdd..12c375a630 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -101,9 +101,9 @@ Shared configuration options for the pull, and pull & start mode. In the presenc When pulling models outside of OpenVINO organization the optimum-cli api is used inside ovms. You can set two additional parameters for this mode. | Option | Value format | Description | |------------------------------|--------------|---------------------------------------------------------------------------------------------------------------| -| `--extra_quantization_params`| ` ` | Add advanced quantization parameters. Check [optimum-intel](https://github.com/huggingface/optimum-intel) documentation. Example: `--sym --group-size -1 --ratio 1.0 --awq --scale-estimation --dataset wikitext2` | -| `--weight-format` | `string` | Model precision used in optimum-cli export with conversion. Default `int8`. | - +| `--extra_quantization_params`| `string` | Add advanced quantization parameters. Check [optimum-intel](https://github.com/huggingface/optimum-intel) documentation. Example: `--sym --group-size -1 --ratio 1.0 --awq --scale-estimation --dataset wikitext2` | +| `--weight-format` | `string` | Model precision used in optimum-cli export with conversion. Default `int8`. | +| `--vocoder` | `string` | The vocoder model to use for text2speech. For example `microsoft/speecht5_hifigan`. | There are also additional environment variables that may change the behavior of pulling: @@ -161,7 +161,7 @@ Task specific parameters for different tasks (text generation/image generation/e | `--num_streams` | `integer` | The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems. Default: 1. | | `--normalize` | `bool` | Normalize the embeddings. Default: true. | | `--truncate` | `bool` | Truncate input when it exceeds model context length. Default: false | -| `--mean_pooling` | `bool` | Mean pooling option. Default: false. | +| `--pooling` | `string` | Pooling option. One of: CLS, LAST, MEAN. Default: CLS. | ### Rerank | option | Value format | Description | From 8bda8ee71e0ff97b459910b073e07f578adfb665 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 2 Mar 2026 11:44:13 +0100 Subject: [PATCH 2/4] remove unused param --- src/cli_parser.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp index 1ca3a57e3f..d0c0d89634 100644 --- a/src/cli_parser.cpp +++ b/src/cli_parser.cpp @@ -279,10 +279,6 @@ std::variant> CLIParser::parse(int argc, char* "Resets model precision.", cxxopts::value(), "PRECISION") - ("resize", - "Resets model resize dimensions.", - cxxopts::value(), - "resize") ("model_version_policy", "Model version policy", cxxopts::value(), From 116fabdff7278352b1aa93f78c26413210cb5ae7 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 2 Mar 2026 11:46:13 +0100 Subject: [PATCH 3/4] save --- src/graph_export/embeddings_graph_cli_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph_export/embeddings_graph_cli_parser.cpp b/src/graph_export/embeddings_graph_cli_parser.cpp index 8bdcffe7bf..c1cdf218fc 100644 --- a/src/graph_export/embeddings_graph_cli_parser.cpp +++ b/src/graph_export/embeddings_graph_cli_parser.cpp @@ -53,7 +53,7 @@ void EmbeddingsGraphCLIParser::createOptions() { cxxopts::value()->default_value("false"), "truncate") ("pooling", - "Mean pooling option.", + "Pooling option. One of: CLS, LAST, MEAN.", cxxopts::value()->default_value("CLS"), "POOLING"); } From 531b805314ae4d2da6bed349ac39adda92436f19 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 2 Mar 2026 14:00:32 +0100 Subject: [PATCH 4/4] fix --- docs/parameters.md | 2 +- src/graph_export/embeddings_graph_cli_parser.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/parameters.md b/docs/parameters.md index 12c375a630..d83e5497c6 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -98,7 +98,7 @@ Shared configuration options for the pull, and pull & start mode. In the presenc ## Pull Mode Options for optimum-cli mode -When pulling models outside of OpenVINO organization the optimum-cli api is used inside ovms. You can set two additional parameters for this mode. +When pulling models outside of OpenVINO organization the optimum-cli api is used inside ovms. You can set additional parameters for this mode. | Option | Value format | Description | |------------------------------|--------------|---------------------------------------------------------------------------------------------------------------| | `--extra_quantization_params`| `string` | Add advanced quantization parameters. Check [optimum-intel](https://github.com/huggingface/optimum-intel) documentation. Example: `--sym --group-size -1 --ratio 1.0 --awq --scale-estimation --dataset wikitext2` | diff --git a/src/graph_export/embeddings_graph_cli_parser.cpp b/src/graph_export/embeddings_graph_cli_parser.cpp index c1cdf218fc..192dd6c748 100644 --- a/src/graph_export/embeddings_graph_cli_parser.cpp +++ b/src/graph_export/embeddings_graph_cli_parser.cpp @@ -98,7 +98,7 @@ void EmbeddingsGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl embeddingsGraphSettings.truncate = result->operator[]("truncate").as(); embeddingsGraphSettings.pooling = result->operator[]("pooling").as(); } - if (!(embeddingsGraphSettings.pooling == "CLS" || embeddingsGraphSettings.pooling == "LAST")){ + if (!(embeddingsGraphSettings.pooling == "CLS" || embeddingsGraphSettings.pooling == "LAST" || embeddingsGraphSettings.pooling == "MEAN")){ throw std::invalid_argument("Only CLS and LAST pooling modes are supported"); } hfSettings.graphSettings = std::move(embeddingsGraphSettings);