From e4d732514d1d4e0fc36665a9b943d30bc41ad092 Mon Sep 17 00:00:00 2001 From: morningman Date: Sat, 28 Feb 2026 13:25:28 +0800 Subject: [PATCH] docs(lakehouse): add Apache Ozone storage docs and catalog examples (4.0.4) --- docs/lakehouse/catalogs/hive-catalog.mdx | 16 +++++ docs/lakehouse/catalogs/iceberg-catalog.mdx | 17 +++++ docs/lakehouse/catalogs/paimon-catalog.mdx | 19 ++++++ docs/lakehouse/storages/ozone.md | 68 +++++++++++++++++++ .../lakehouse/catalogs/hive-catalog.mdx | 18 ++++- .../lakehouse/catalogs/iceberg-catalog.mdx | 18 +++++ .../lakehouse/catalogs/paimon-catalog.mdx | 19 ++++++ .../current/lakehouse/storages/ozone.md | 68 +++++++++++++++++++ sidebars.ts | 1 + 9 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 docs/lakehouse/storages/ozone.md create mode 100644 i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/storages/ozone.md diff --git a/docs/lakehouse/catalogs/hive-catalog.mdx b/docs/lakehouse/catalogs/hive-catalog.mdx index 190dd3bce7c7d..7f6ecf352acf3 100644 --- a/docs/lakehouse/catalogs/hive-catalog.mdx +++ b/docs/lakehouse/catalogs/hive-catalog.mdx @@ -104,6 +104,7 @@ Hive transactional tables are supported from version 3.x onwards. For details, r * [AWS S3](../storages/s3.md) * [Google Cloud Storage](../storages/gcs.md) * [Azure Blob](../storages/azure-blob.md) +* [Apache Ozone](../storages/ozone.md) (supported since 4.0.4) * [Alibaba Cloud OSS](../storages/aliyun-oss.md) * [Tencent Cloud COS](../storages/tencent-cos.md) * [Huawei Cloud OBS](../storages/huawei-obs.md) @@ -359,6 +360,21 @@ Hive transactional tables are supported from version 3.x onwards. For details, r ); ``` + + Supported since 4.0.4 + ```sql + CREATE CATALOG `hive_hms_on_ozone_new_catalog` PROPERTIES ( + 'type' = 'hms', + 'hive.metastore.uris' = 'thrift://127.0.0.1:9383', + 'fs.ozone.support' = 'true', + 'ozone.endpoint' = 'http://ozone-s3g:9878', + 'ozone.access_key' = '', + 'ozone.secret_key' = '', + 'ozone.region' = 'us-east-1', + 'ozone.use_path_style' = 'true' + ); + ``` + ```sql CREATE CATALOG test_hive_on_hms_minio_catalog PROPERTIES ( diff --git a/docs/lakehouse/catalogs/iceberg-catalog.mdx b/docs/lakehouse/catalogs/iceberg-catalog.mdx index f8c2b78d05181..debc3d5494701 100644 --- a/docs/lakehouse/catalogs/iceberg-catalog.mdx +++ b/docs/lakehouse/catalogs/iceberg-catalog.mdx @@ -127,6 +127,7 @@ CREATE CATALOG [IF NOT EXISTS] catalog_name PROPERTIES ( * [AWS S3](../storages/s3.md) * [Google Cloud Storage](../storages/gcs.md) * [Azure Blob](../storages/azure-blob.md) +* [Apache Ozone](../storages/ozone.md) (supported since 4.0.4) * [Aliyun OSS](../storages/aliyun-oss.md) * [Tencent COS](../storages/tencent-cos.md) * [Huawei OBS](../storages/huawei-obs.md) @@ -409,6 +410,22 @@ Support for Nested Namespace needs to be explicitly enabled. For details, please ); ``` + + Supported since 4.0.4 + ```sql + CREATE CATALOG iceberg_fs_on_ozone_catalog PROPERTIES ( + 'type' = 'iceberg', + 'iceberg.catalog.type' = 'hadoop', + 'warehouse' = 's3a://bucket/iceberg_warehouse', + 'fs.ozone.support' = 'true', + 'ozone.endpoint' = 'http://ozone-s3g:9878', + 'ozone.access_key' = '', + 'ozone.secret_key' = '', + 'ozone.region' = 'us-east-1', + 'ozone.use_path_style' = 'true' + ); + ``` + ```sql CREATE CATALOG test_iceberg_on_hms_minio_catalog PROPERTIES ( diff --git a/docs/lakehouse/catalogs/paimon-catalog.mdx b/docs/lakehouse/catalogs/paimon-catalog.mdx index 9a07d2ed16268..3029d2f2e1784 100644 --- a/docs/lakehouse/catalogs/paimon-catalog.mdx +++ b/docs/lakehouse/catalogs/paimon-catalog.mdx @@ -114,6 +114,8 @@ The currently dependent Paimon version is 1.0.0. * [Google Cloud Storage](../storages/gcs.md) +* [Apache Ozone](../storages/ozone.md) (supported since 4.0.4) + * [Alibaba Cloud OSS](../storages/aliyun-oss.md) * [Tencent Cloud COS](../storages/tencent-cos.md) @@ -261,6 +263,23 @@ Supported since version 4.0.3, `timestamp_with_local_time_zone` can be mapped to ); ``` + + Supported since 4.0.4 + ```sql + CREATE CATALOG paimon_hms_on_ozone_catalog PROPERTIES ( + 'type' = 'paimon', + 'paimon.catalog.type' = 'hms', + 'warehouse' = 's3a://test-bucket/paimon-warehouse', + 'hive.metastore.uris' = 'thrift://127.0.0.1:9383', + 'fs.ozone.support' = 'true', + 'ozone.endpoint' = 'http://ozone-s3g:9878', + 'ozone.access_key' = '', + 'ozone.secret_key' = '', + 'ozone.region' = 'us-east-1', + 'ozone.use_path_style' = 'true' + ); + ``` + ```sql CREATE CATALOG paimon_hms_on_minio_catalog PROPERTIES ( diff --git a/docs/lakehouse/storages/ozone.md b/docs/lakehouse/storages/ozone.md new file mode 100644 index 0000000000000..b6005c5fe6e2e --- /dev/null +++ b/docs/lakehouse/storages/ozone.md @@ -0,0 +1,68 @@ +--- +{ + "title": "Apache Ozone | Storages", + "language": "en", + "description": "Starting from version 4.0.4, Doris supports accessing Apache Ozone through the S3 Gateway.", + "sidebar_label": "Apache Ozone" +} +--- + +# Apache Ozone + +Starting from version 4.0.4, Doris supports accessing Apache Ozone through the S3 Gateway. + +This document describes the parameters required to access Apache Ozone. These parameters apply to: + +- Catalog properties +- Table Valued Function properties +- Broker Load properties +- Export properties +- Outfile properties + +**To use Ozone as a dedicated storage type, configure `"fs.ozone.support" = "true"` explicitly.** + +## Parameter Overview + +| Property Name | Legacy Name | Description | Default Value | Required | +| --- | --- | --- | --- | --- | +| ozone.endpoint | s3.endpoint | Ozone S3 Gateway endpoint, for example `http://ozone-s3g:9878` | None | Yes | +| ozone.region | s3.region | Region of Ozone S3 Gateway | `us-east-1` | No | +| ozone.access_key | s3.access_key, s3.access-key-id | Access key used for authentication | None | No* | +| ozone.secret_key | s3.secret_key, s3.secret-access-key | Secret key used for authentication | None | No* | +| ozone.session_token | s3.session_token, s3.session-token | Session token | None | No | +| ozone.connection.maximum | s3.connection.maximum | Maximum number of connections | `100` | No | +| ozone.connection.request.timeout | s3.connection.request.timeout | Request timeout in milliseconds | `10000` | No | +| ozone.connection.timeout | s3.connection.timeout | Connection timeout in milliseconds | `10000` | No | +| ozone.use_path_style | use_path_style, s3.path-style-access | Whether to use path-style access | `true` | No | +| ozone.force_parsing_by_standard_uri | force_parsing_by_standard_uri | Whether to force standard URI parsing | `false` | No | +| fs.ozone.support | | Whether to enable Ozone as storage type | `false` | Yes | + +Notes: + +- `ozone.access_key` and `ozone.secret_key` must be configured together. +- `fs.s3a.*` keys are not parsed directly by Ozone properties. Use `ozone.*` or compatible `s3.*` keys. +- Ozone supports `s3://`, `s3a://`, and `s3n://` URI schemas. + +## Example Configuration + +Using `ozone.*` keys: + +```properties +"fs.ozone.support" = "true", +"ozone.endpoint" = "http://ozone-s3g:9878", +"ozone.access_key" = "hadoop", +"ozone.secret_key" = "hadoop", +"ozone.region" = "us-east-1", +"ozone.use_path_style" = "true" +``` + +Using compatible `s3.*` aliases: + +```properties +"fs.ozone.support" = "true", +"s3.endpoint" = "http://ozone-s3g:9878", +"s3.access_key" = "hadoop", +"s3.secret_key" = "hadoop", +"s3.region" = "us-east-1", +"s3.path-style-access" = "true" +``` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/hive-catalog.mdx b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/hive-catalog.mdx index 34ec561730e8d..119a293d68c82 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/hive-catalog.mdx +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/hive-catalog.mdx @@ -112,6 +112,8 @@ CREATE CATALOG [IF NOT EXISTS] catalog_name PROPERTIES ( * [Azure Blob](../storages/azure-blob.md) +* [Apache Ozone](../storages/ozone.md)(自 4.0.4 起支持) + * [阿里云 OSS](../storages/aliyun-oss.md) * [腾讯云 COS](../storages/tencent-cos.md) @@ -370,6 +372,21 @@ CREATE CATALOG [IF NOT EXISTS] catalog_name PROPERTIES ( ); ``` + + 自 4.0.4 起支持 + ```sql + CREATE CATALOG `hive_hms_on_ozone_new_catalog` PROPERTIES ( + 'type' = 'hms', + 'hive.metastore.uris' = 'thrift://127.0.0.1:9383', + 'fs.ozone.support' = 'true', + 'ozone.endpoint' = 'http://ozone-s3g:9878', + 'ozone.access_key' = '', + 'ozone.secret_key' = '', + 'ozone.region' = 'us-east-1', + 'ozone.use_path_style' = 'true' + ); + ``` + ```sql CREATE CATALOG test_hive_on_hms_minio_catalog PROPERTIES ( @@ -1101,4 +1118,3 @@ DROP DATABASE [IF EXISTS] hive_ctl.hive_db; | -------- | ------------------------------------ | | 2.1.6 | 支持 Hive 表数据写回 | | 3.0.4 | 支持 JsonSerDe 格式的 Hive 表。支持 Hive4 的事务表。 | - diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/iceberg-catalog.mdx b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/iceberg-catalog.mdx index b41d97363498f..e51807eb2b9bb 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/iceberg-catalog.mdx +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/iceberg-catalog.mdx @@ -140,6 +140,8 @@ CREATE CATALOG [IF NOT EXISTS] catalog_name PROPERTIES ( * [Azure Blob](../storages/azure-blob.md) +* [Apache Ozone](../storages/ozone.md)(自 4.0.4 起支持) + * [阿里云 OSS](../storages/aliyun-oss.md) * [腾讯云 COS](../storages/tencent-cos.md) @@ -1125,6 +1127,22 @@ Iceberg 的元数层级关系是 Catalog -> Namespace -> Table。其中 Namespac ); ``` + + 自 4.0.4 起支持 + ```sql + CREATE CATALOG iceberg_fs_on_ozone_catalog PROPERTIES ( + 'type' = 'iceberg', + 'iceberg.catalog.type' = 'hadoop', + 'warehouse' = 's3a://bucket/iceberg_warehouse', + 'fs.ozone.support' = 'true', + 'ozone.endpoint' = 'http://ozone-s3g:9878', + 'ozone.access_key' = '', + 'ozone.secret_key' = '', + 'ozone.region' = 'us-east-1', + 'ozone.use_path_style' = 'true' + ); + ``` + ```sql CREATE CATALOG test_iceberg_fs_on_minio PROPERTIES ( diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/paimon-catalog.mdx b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/paimon-catalog.mdx index 3f209206b15f6..80177e17bcb50 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/paimon-catalog.mdx +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/catalogs/paimon-catalog.mdx @@ -114,6 +114,8 @@ CREATE CATALOG [IF NOT EXISTS] catalog_name PROPERTIES ( * [Google Cloud Storage](../storages/gcs.md) +* [Apache Ozone](../storages/ozone.md)(自 4.0.4 起支持) + * [阿里云 OSS](../storages/aliyun-oss.md) * [腾讯云 COS](../storages/tencent-cos.md) @@ -263,6 +265,23 @@ CREATE CATALOG [IF NOT EXISTS] catalog_name PROPERTIES ( ); ``` + + 自 4.0.4 起支持 + ```sql + CREATE CATALOG paimon_hms_on_ozone_catalog PROPERTIES ( + 'type' = 'paimon', + 'paimon.catalog.type' = 'hms', + 'warehouse' = 's3a://test-bucket/paimon-warehouse', + 'hive.metastore.uris' = 'thrift://127.0.0.1:9383', + 'fs.ozone.support' = 'true', + 'ozone.endpoint' = 'http://ozone-s3g:9878', + 'ozone.access_key' = '', + 'ozone.secret_key' = '', + 'ozone.region' = 'us-east-1', + 'ozone.use_path_style' = 'true' + ); + ``` + ```sql CREATE CATALOG paimon_hms_on_minio_catalog PROPERTIES ( diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/storages/ozone.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/storages/ozone.md new file mode 100644 index 0000000000000..1df0d173d59d8 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/lakehouse/storages/ozone.md @@ -0,0 +1,68 @@ +--- +{ + "title": "Apache Ozone | Storages", + "language": "zh-CN", + "description": "自 4.0.4 版本起,Doris 支持通过 S3 Gateway 访问 Apache Ozone。", + "sidebar_label": "Apache Ozone" +} +--- + +# Apache Ozone + +自 4.0.4 版本起,Doris 支持通过 S3 Gateway 访问 Apache Ozone。 + +本文档介绍访问 Apache Ozone 所需的参数,这些参数适用于以下场景: + +- Catalog 属性 +- Table Valued Function 属性 +- Broker Load 属性 +- Export 属性 +- Outfile 属性 + +**如果要将 Ozone 作为独立存储类型使用,需要显式配置 `"fs.ozone.support" = "true"`。** + +## 参数总览 + +| 属性名称 | 曾用名 | 描述 | 默认值 | 是否必须 | +| --- | --- | --- | --- | --- | +| ozone.endpoint | s3.endpoint | Ozone S3 Gateway 访问端点,例如 `http://ozone-s3g:9878` | 无 | 是 | +| ozone.region | s3.region | Ozone S3 Gateway 区域 | `us-east-1` | 否 | +| ozone.access_key | s3.access_key, s3.access-key-id | 用于认证的 Access Key | 无 | 否* | +| ozone.secret_key | s3.secret_key, s3.secret-access-key | 用于认证的 Secret Key | 无 | 否* | +| ozone.session_token | s3.session_token, s3.session-token | Session Token | 无 | 否 | +| ozone.connection.maximum | s3.connection.maximum | 最大连接数 | `100` | 否 | +| ozone.connection.request.timeout | s3.connection.request.timeout | 请求超时时间(毫秒) | `10000` | 否 | +| ozone.connection.timeout | s3.connection.timeout | 连接超时时间(毫秒) | `10000` | 否 | +| ozone.use_path_style | use_path_style, s3.path-style-access | 是否使用 path-style 访问 | `true` | 否 | +| ozone.force_parsing_by_standard_uri | force_parsing_by_standard_uri | 是否强制使用标准 URI 解析 | `false` | 否 | +| fs.ozone.support | | 是否启用 Ozone 存储类型 | `false` | 是 | + +说明: + +- `ozone.access_key` 和 `ozone.secret_key` 需要成对配置。 +- `fs.s3a.*` 参数不会被 Ozone 属性直接解析,请使用 `ozone.*` 或兼容的 `s3.*` 参数。 +- Ozone 支持 `s3://`、`s3a://`、`s3n://` 三种 URI 协议。 + +## 配置示例 + +使用 `ozone.*` 参数: + +```properties +"fs.ozone.support" = "true", +"ozone.endpoint" = "http://ozone-s3g:9878", +"ozone.access_key" = "hadoop", +"ozone.secret_key" = "hadoop", +"ozone.region" = "us-east-1", +"ozone.use_path_style" = "true" +``` + +使用兼容的 `s3.*` 参数: + +```properties +"fs.ozone.support" = "true", +"s3.endpoint" = "http://ozone-s3g:9878", +"s3.access_key" = "hadoop", +"s3.secret_key" = "hadoop", +"s3.region" = "us-east-1", +"s3.path-style-access" = "true" +``` diff --git a/sidebars.ts b/sidebars.ts index 31f7044a02d8c..b64b7b668fbcb 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -528,6 +528,7 @@ const sidebars: SidebarsConfig = { items: [ 'lakehouse/storages/hdfs', 'lakehouse/storages/s3', + 'lakehouse/storages/ozone', 'lakehouse/storages/azure-blob', 'lakehouse/storages/gcs', 'lakehouse/storages/aliyun-oss',