From 85f3806e2dfb9f2afee16a25b89fd423e4031e2e Mon Sep 17 00:00:00 2001 From: Tamal Saha Date: Wed, 1 Jul 2026 03:07:27 +0600 Subject: [PATCH] Add MariaDB distributed disaster recovery (DC-DR) guide Signed-off-by: Tamal Saha --- .../distributed/disaster-recovery/_index.md | 10 + .../disaster-recovery/overview/index.md | 289 +++++++++++++++++ .../disaster-recovery/setup/index.md | 304 ++++++++++++++++++ .../setup/yamls/mariadb.yaml | 28 ++ .../setup/yamls/placement-policy.yaml | 49 +++ 5 files changed, 680 insertions(+) create mode 100644 docs/guides/mariadb/distributed/disaster-recovery/_index.md create mode 100644 docs/guides/mariadb/distributed/disaster-recovery/overview/index.md create mode 100644 docs/guides/mariadb/distributed/disaster-recovery/setup/index.md create mode 100644 docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/mariadb.yaml create mode 100644 docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/placement-policy.yaml diff --git a/docs/guides/mariadb/distributed/disaster-recovery/_index.md b/docs/guides/mariadb/distributed/disaster-recovery/_index.md new file mode 100644 index 000000000..67faf8347 --- /dev/null +++ b/docs/guides/mariadb/distributed/disaster-recovery/_index.md @@ -0,0 +1,10 @@ +--- +title: Disaster Recovery (DC-DR) +menu: + docs_{{ .version }}: + identifier: guides-mariadb-distributed-disaster-recovery + name: Disaster Recovery (DC-DR) + parent: guides-mariadb-distributed + weight: 40 +menu_name: docs_{{ .version }} +--- diff --git a/docs/guides/mariadb/distributed/disaster-recovery/overview/index.md b/docs/guides/mariadb/distributed/disaster-recovery/overview/index.md new file mode 100644 index 000000000..6c3dc1794 --- /dev/null +++ b/docs/guides/mariadb/distributed/disaster-recovery/overview/index.md @@ -0,0 +1,289 @@ +--- +title: MariaDB Cross Data Center Disaster Recovery Overview +menu: + docs_{{ .version }}: + identifier: guides-mariadb-distributed-disaster-recovery-overview + name: Overview + parent: guides-mariadb-distributed-disaster-recovery + weight: 10 +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# MariaDB Cross Data Center Disaster Recovery (DC-DR) Overview + +> **New to KubeDB?** Please start [here](/docs/README.md). + +## Introduction + +The [Distributed MariaDB](/docs/guides/mariadb/distributed/overview/index.md) guide deploys a single Galera +cluster whose pod ordinals are stretched across multiple Kubernetes clusters +over KubeSlice. Every node is a synchronous wsrep writer and peers resolve each +other over `*.slice.local` ServiceExports. That layout maximizes write +availability inside a single failure domain, but a synchronous Galera primary +component that spans data centers (DCs) is fragile: inter-DC network latency +slows down every commit, and an inter-DC partition can stall the cluster or +split the primary component. + +**Cross Data Center Disaster Recovery (DC-DR)** changes the shape of the +deployment to survive a full data center loss. Instead of one Galera cluster +stretched across DCs, each Member DC runs its own self contained Galera cluster, +and the DCs are linked by asynchronous, leader to leader replication. A single +cross-DC failover authority decides which DC is writable at any instant, so +there is exactly one active (writable) DC and one or more read-only standby DCs. + +This guide explains the DC-DR architecture and concepts. For a hands on setup, +see [Setup DC-DR](/docs/guides/mariadb/distributed/disaster-recovery/setup/index.md). + +## Why not a stretched Galera cluster + +Galera certification is synchronous: a commit is acknowledged only after the +write set has been ordered across the whole primary component. When that primary +component spans DCs: + +- Every write pays the inter-DC round trip latency. +- A network partition between DCs can drop the cluster below quorum, stalling + writes in both DCs. +- A flapping inter-DC link repeatedly evicts and rejoins nodes, triggering + expensive state transfers (SST). + +DC-DR removes the cross-DC link from the synchronous write path. Galera quorum +becomes strictly intra-DC, and the only thing that crosses the DC boundary is an +asynchronous replication stream plus a single failover decision. + +## Core architecture rule: Galera quorum is strictly intra-DC + +- **Each Member DC runs its own self contained Galera cluster.** It has its own + wsrep primary component, its own SST and IST, and (only when its local node + count is even) its own local **garbd** (Galera Arbitrator) for intra-DC + even-node quorum. The wsrep certification quorum never crosses the DC boundary, + so inter-DC latency cannot stall commits and an inter-DC partition cannot split + the primary component. +- **One MariaDB CR, expanded by the operator.** You still manage a single + distributed `MariaDB`. The operator partitions `spec.replicas` by the + PlacementPolicy `distributionRules` and materializes one Galera cluster per + Member DC, each with its own governing ServiceExport and its own gcomm peer set + scoped to that DC, plus the cross-DC asynchronous link. The single CR's + `status.disasterRecovery` carries the per-DC view. + +## The cross-DC failover authority + +The cross-DC decision is made by the `dr-controlplane`, a three site etcd quorum +running behind an OCM control plane. It publishes one +`coordination.k8s.io` Lease named `primary-dc` for the global failover scope. The +Lease holder is the active DC. This is the single cross-DC failover authority, +and exactly one DC is writable at a time. + +Everything keys off one string, the **OCM spoke cluster name**, which is the DC +name. It is the same value used as the Lease `holderIdentity`, the marker +`activeDC`, the pod label `open-cluster-management.io/cluster-name`, and the +PlacementPolicy `distributionRule.clusterName`. Keep them identical. + +The per-DC `dr-controlplane` agent projects the Lease holder onto its spoke as a +marker ConfigMap so the data plane never has to reach across DCs to decide +writability: + +``` +ConfigMap primary-dc (namespace dc-failover, on each spoke) + data.activeDC = the DC the quorum currently trusts + data.renewTime = RFC3339, the observed primary-dc Lease renewTime + TTL 30s, fail closed: absent, stale, unparseable, or another DC => not active +``` + +## Cross-DC replication: leader to leader, asynchronous + +MariaDB has no asynchronous replication between two Galera clusters out of the +box, so the cross-DC link is net new. Under DC-DR the standby DC's node 0 is a +GTID asynchronous replica of the active DC's writer endpoint: + +```sql +CHANGE MASTER TO + MASTER_HOST = '..svc.slice.local', -- the active DC primary ServiceExport + MASTER_USE_GTID = slave_pos; +``` + +The replica uses GTID auto-positioning against the active DC's primary Service +(the load balanced active endpoint), not a fixed node, so an intra-active-DC +writer change is transparent and any active-DC node can serve the stream. Inside +the standby DC, Galera then certifies those applied writes synchronously to the +rest of that DC's nodes. So a DC is internally a normal KubeDB Galera cluster, +and externally either the writable head or a single asynchronous follower, +decided by the Lease. With more than two data DCs, each standby DC runs its own +asynchronous link from the active DC, and an unplanned failover promotes one +survivor while every other standby re-points its `CHANGE MASTER` at the new +active. + +### Galera to Galera GTID needs explicit configuration + +Linking two Galera clusters by GTID requires settings that the stretched layout +never needed: + +- `wsrep_gtid_mode = ON`. +- A `gtid_domain_id` for the asynchronous stream that is distinct from the wsrep + domain, so the two GTID sources do not collide. +- `log_slave_updates = ON` on every active-DC node, so any of them can serve the + binlog from a given GTID. +- A dedicated replication user for the cross-DC stream. +- Bounded binlog retention on the source (`expire_logs_days` / + `binlog_expire_logs_seconds`), so a slow or dead DR DC cannot make the active DC + retain binlog until its disk fills. + +## Fail-closed fence and split-brain safety + +Writability is gated by the Lease and fenced locally, and the fence fails closed. + +- A non-active DC's Galera cluster is held `read_only = ON` and + `super_read_only = ON`. These block client writes but not the replication SQL + thread or the wsrep applier, so the standby DC's asynchronous replica still + applies the incoming stream and Galera still certifies it to the rest of that + DC's nodes. Only client connections are refused. +- The in-DC fence reads the projected `primary-dc` marker ConfigMap. If the DC + cannot confirm it holds the Lease (marker absent, stale past the 30s TTL, + unparseable, or naming another DC), it forces `super_read_only`. +- This local fence plus the etcd majority is the split-brain guarantee. A + partitioned old-active DC that still sees clients cannot accept writes, because + it can no longer confirm it holds the Lease. + +## Role labeling and the primary Service + +In plain distributed mode the `md-coordinator` labels every Galera node +`kubedb.com/role: Primary` (multi-writer), so the `` primary Service load +balances writes across all nodes. Under DC-DR: + +- Only the active DC's nodes carry `kubedb.com/role: Primary`. Within the active + DC, Galera remains multi-writer, so all active-DC nodes are `Primary`. +- A standby DC's nodes are labeled `standby`, even though Galera considers them + part of a (separate) primary component. +- As a result the single primary Service and the AppBinding resolve only to the + active DC. The fence sets this label from the Lease. + +The Galera health check is DC-aware: it requires `Primary` only for active-DC +nodes, expects standby-DC nodes to be `standby` with `super_read_only = ON` and a +healthy asynchronous replica state, and scopes the +`wsrep_cluster_state_uuid` / `wsrep_cluster_conf_id` split-brain comparison per +DC, since the two DCs are now separate Galera clusters with different uuids. + +## Arbiter DC and per-DC garbd + +- The **Arbiter DC** (`role: Arbiter`, empty `replicaIndices`) holds only the + `dr-controlplane` etcd member and no MariaDB data. It contributes the third + vote to the cross-DC etcd quorum so a two data center deployment can still reach + a majority when one data DC is lost. +- A Member DC whose local node count is even gets its own intra-DC **garbd** + (Galera Arbitrator) so the local Galera cluster keeps odd quorum. Parity is + evaluated per DC group, not on the global `spec.replicas`. Prefer odd local + group sizes to avoid needing a per-DC garbd. + +## Failover, switchover, and failback + +### Planned switchover (zero RPO) + +Quiesce writes on the active DC (set `read_only = ON` on its nodes), wait until +the standby DC's asynchronous replica's GTID reaches the active DC's binlog GTID, +then move the Lease and swap source and follower. Because writes are quiesced and +the standby is fully caught up before the handoff, no rows are lost. + +Trigger a planned switchover with the CR annotation: + +``` +dr.kubedb.com/switchover-to: +``` + +This is hub driven. There is no `Switchover` OpsRequest type, because the +engine-aware quiesce and catch-up must run in the hub, not in the +engine-agnostic `dr-controlplane`. + +### Unplanned failover (DC loss) + +If the active DC is lost, the survivor stops its slave thread and becomes +writable without the catch-up wait. The bounded loss is the GTID tail that the +active DC committed but had not yet shipped to the standby. Every other standby +re-points its `CHANGE MASTER` at the new active DC. + +### Failback via SST re-seed + +When a failed DC returns, it re-attaches as the asynchronous follower of the new +active DC. Because Galera cannot rewind a multi-writer node, the safe and simple +failback is a full SST re-seed of the returned cluster from the new active DC +(dropping its forked tail), then a GTID asynchronous catch-up. Only when the GTID +histories are provably non-divergent can it skip the re-seed. After catch-up, a +coordinated zero RPO Lease handoff returns the active DC. + +## Cross-DC lag guard + +Plain MariaDB health is the binary wsrep `Synced` signal. DC-DR adds a cross-DC +lag metric, measured on the standby DC's asynchronous replica as +`Seconds_Behind_Master` and the GTID gap (`@@gtid_slave_pos` versus the source's +`@@gtid_binlog_pos`). The lag budget is checked before a planned switchover so a +switchover never moves the Lease to a lagging standby. + +## Status: `status.disasterRecovery` + +The single distributed `MariaDB` CR exposes the cross-DC view in +`status.disasterRecovery`: + +| Field | Meaning | +| --- | --- | +| `activeDC` | The DC that currently holds the `primary-dc` Lease (the writable DC). | +| `phase` | `Steady`, `FailingOver`, `FailingBack`, or `Degraded`. | +| `dataCenters[]` | Per-DC view: `clusterName`, `role`, `leader`, `writable`, `lagBytes`, `healthy`. | +| `lastTransitionTime` | When the DR phase last changed. | + +## Architecture at a glance + +The example below uses two Member DCs (`dc-a`, `dc-b`) plus one Arbiter DC +(`dc-c`), with `dc-a` holding the Lease. + +``` + dr-controlplane (3 site etcd quorum) + publishes Lease primary-dc => holder: dc-a + | + +----------------------+----------------------+ + | | | + project marker project marker etcd member only + v v v + +------------------+ +------------------+ +------------------+ + | dc-a | | dc-b | | dc-c | + | (active DC) | | (standby DC) | | (Arbiter DC) | + | | | | | | + | Galera cluster | | Galera cluster | | no MariaDB data | + | nodes 0,1,2 | | nodes 3,4,5 | | etcd member only | + | role=Primary | | role=standby | | | + | read_only=OFF | | super_read_only | | | + | | | = ON | | | + | serves binlog | | node 3 is the | | | + | via primary Svc | | GTID async | | | + | | | replica of dc-a | | | + +--------+---------+ +---------+--------+ +------------------+ + | ^ + | async GTID stream | + +-----------------------+ + ..svc.slice.local (active DC primary ServiceExport) +``` + +- Clients reach the single `` primary Service, which resolves only to the + active DC's nodes (the `Primary` labeled nodes), so writes always land on the + Lease holder. +- The standby DC stays read only and catches up asynchronously. +- The Arbiter DC carries no MariaDB data and only contributes its etcd vote. + +## Enabling DC-DR + +DC-DR is currently enabled with an interim annotation on the MariaDB CR: + +``` +dr.kubedb.com/enabled: "true" +``` + +This is transitioning to the PlacementPolicy `clusterSpreadConstraint.failoverPolicy` +as the single source of truth. The PlacementPolicy already carries the +`failoverPolicy` and the per-DC `role` (`Member` / `Arbiter`) on its +`distributionRules`. + +## Next Steps + +- Follow [Setup DC-DR](/docs/guides/mariadb/distributed/disaster-recovery/setup/index.md) + to deploy a two Member DC plus Arbiter DC MariaDB and verify exactly one + writable DC. +- Review the [Distributed MariaDB Overview](/docs/guides/mariadb/distributed/overview/index.md) + for the OCM, KubeSlice, and PlacementPolicy substrate that DC-DR builds on. diff --git a/docs/guides/mariadb/distributed/disaster-recovery/setup/index.md b/docs/guides/mariadb/distributed/disaster-recovery/setup/index.md new file mode 100644 index 000000000..733a08e2c --- /dev/null +++ b/docs/guides/mariadb/distributed/disaster-recovery/setup/index.md @@ -0,0 +1,304 @@ +--- +title: Setup MariaDB DC-DR +menu: + docs_{{ .version }}: + identifier: guides-mariadb-distributed-disaster-recovery-setup + name: Setup + parent: guides-mariadb-distributed-disaster-recovery + weight: 20 +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# Setup MariaDB Cross Data Center Disaster Recovery (DC-DR) + +> **New to KubeDB?** Please start [here](/docs/README.md). + +This guide walks through deploying a DC-DR enabled distributed MariaDB across two +Member data centers (DCs) plus one Arbiter DC, and verifying that exactly one DC +is writable. Read the +[DC-DR Overview](/docs/guides/mariadb/distributed/disaster-recovery/overview/index.md) +first for the architecture and the concepts referenced below (the `primary-dc` +Lease, the marker fence, role labeling, and the cross-DC asynchronous link). + +## Before you begin + +DC-DR builds directly on the distributed MariaDB substrate. Complete the +following from the +[Distributed MariaDB Overview](/docs/guides/mariadb/distributed/overview/index.md) +before you start here: + +- An **OCM** hub with the three participating spoke clusters joined and accepted. + In this guide they are `dc-a`, `dc-b`, and `dc-c`. The OCM spoke cluster name is + the DC name and must match the PlacementPolicy `clusterName` exactly. +- The OCM **WorkConfiguration** patch (`RawFeedbackJsonString`) applied on every + spoke. +- **KubeSlice** installed, a project and `SliceConfig` covering all three + clusters, and CoreDNS forwarding `*.slice.local` on every cluster. +- The **KubeDB operator** installed on the hub with + `--set petset.features.ocm.enabled=true`. + +In addition, DC-DR requires the cross-DC failover authority: + +- The **`dr-controlplane`** three site etcd quorum running behind the OCM control + plane, with one etcd member in each of `dc-a`, `dc-b`, and `dc-c` (the Arbiter + DC contributes its vote here). +- The per-DC `dr-controlplane` agent running on each spoke, projecting the + `primary-dc` marker ConfigMap into the `dc-failover` namespace. +- The KubeDB operator started with the DC-DR flags so its hub orchestrator watches + the Lease: `--dc-dr-enabled`, `--dc-dr-coord-kubeconfig`, and + `--dc-dr-local-dc`. + +> **Note:** The `dr-controlplane` agent needs write access to ConfigMaps in each +> spoke's `dc-failover` namespace, and the MariaDB coordinator needs read access +> to that ConfigMap from the database namespace. These RBAC rules ship with the +> DC-DR Helm values. + +## Step 1: Define the DC-DR PlacementPolicy + +The PlacementPolicy is what turns a plain distributed MariaDB into a DC-DR +cluster. Two things matter here: + +- `clusterSpreadConstraint.failoverPolicy` with `mode: TwoDC` and + `trigger.scope: Global`. This declares the two Member DC plus Arbiter DC layout + and that a single `primary-dc` Lease decides the writable DC for the whole + cluster. +- A `role` on each `distributionRule`. The two data centers are `role: Member` + (each becomes a self contained Galera cluster), and the third is `role: Arbiter` + with an empty `replicaIndices` (no MariaDB data, only the `dr-controlplane` etcd + vote). + +Create `placement-policy.yaml`: + +```yaml +apiVersion: apps.k8s.appscode.com/v1 +kind: PlacementPolicy +metadata: + labels: + app.kubernetes.io/managed-by: Helm + name: distributed-mariadb-dcdr +spec: + clusterSpreadConstraint: + slice: + projectNamespace: kubeslice-demo-distributed-mariadb + sliceName: demo-slice + failoverPolicy: + mode: TwoDC + trigger: + scope: Global + distributionRules: + - clusterName: dc-a + role: Member + storageClassName: local-path # optional; omit to use the cluster default + replicaIndices: + - 0 + - 1 + - 2 + - clusterName: dc-b + role: Member + storageClassName: local-path # optional; omit to use the cluster default + replicaIndices: + - 3 + - 4 + - 5 + - clusterName: dc-c + role: Arbiter + replicaIndices: [] + nodeSpreadConstraint: + maxSkew: 1 + whenUnsatisfiable: ScheduleAnyway + zoneSpreadConstraint: + maxSkew: 1 + whenUnsatisfiable: ScheduleAnyway +``` + +> **Note:** Each Member DC's `replicaIndices` set becomes one independent Galera +> cluster with its own gcomm peer set and its own quorum. Use an odd count per +> Member DC (3 here) so each local Galera cluster keeps odd quorum without a +> per-DC garbd. A Member DC with an even local node count gets its own intra-DC +> garbd automatically. + +Apply the policy on the hub: + +```bash +$ kubectl apply -f placement-policy.yaml --context dc-a --kubeconfig $HOME/.kube/config +``` + +## Step 2: Create the DC-DR MariaDB + +Create the `demo` namespace if it does not exist: + +```bash +$ kubectl create namespace demo +``` + +Define the distributed MariaDB and reference the PlacementPolicy. The interim +annotation `dr.kubedb.com/enabled: "true"` enables the DC-DR behavior (this is +transitioning to the PlacementPolicy `failoverPolicy` as the single source of +truth). Create `mariadb.yaml`: + +```yaml +apiVersion: kubedb.com/v1 +kind: MariaDB +metadata: + name: mariadb-dcdr + namespace: demo + annotations: + dr.kubedb.com/enabled: "true" +spec: + distributed: true + deletionPolicy: WipeOut + replicas: 6 + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Mi + storageType: Durable + version: 12.1.2 + podTemplate: + spec: + podPlacementPolicy: + name: distributed-mariadb-dcdr +``` + +`spec.replicas: 6` is partitioned across the Member DCs by the PlacementPolicy: +3 nodes in `dc-a` and 3 in `dc-b`. The Arbiter DC (`dc-c`) carries no MariaDB +data. + +Apply the resource on the hub: + +```bash +$ kubectl apply -f mariadb.yaml --context dc-a --kubeconfig $HOME/.kube/config +``` + +The operator expands this one CR into one Galera cluster per Member DC, each with +its own governing ServiceExport, and configures the standby DC's node 0 as a GTID +asynchronous replica of the active DC's primary ServiceExport. The DC that first +acquires the `primary-dc` Lease bootstraps writable; the other Member DC seeds +from it and follows. + +## Step 3: Verify exactly one writable DC + +### 1. Check which DC holds the Lease + +The active DC is whichever spoke holds the `primary-dc` Lease. Inspect the +projected marker ConfigMap on each spoke: + +```bash +$ kubectl get configmap primary-dc -n dc-failover -o yaml --context dc-a +$ kubectl get configmap primary-dc -n dc-failover -o yaml --context dc-b +``` + +The `data.activeDC` value names the active DC and is the same on every spoke. In +this example assume it is `dc-a`. + +### 2. Confirm the DR status on the CR + +```bash +$ kubectl get mariadb mariadb-dcdr -n demo -o jsonpath='{.status.disasterRecovery}' --context dc-a | jq +``` + +**Output (abridged):** + +```json +{ + "activeDC": "dc-a", + "phase": "Steady", + "dataCenters": [ + { "clusterName": "dc-a", "role": "Member", "writable": true, "healthy": true }, + { "clusterName": "dc-b", "role": "Member", "writable": false, "healthy": true, "lagBytes": 0 }, + { "clusterName": "dc-c", "role": "Arbiter", "healthy": true } + ] +} +``` + +Exactly one `dataCenters[]` entry has `writable: true`. + +### 3. Confirm role labels resolve only to the active DC + +Only the active DC's nodes carry `kubedb.com/role: Primary`; the standby DC's +nodes are `standby`: + +```bash +# Active DC nodes are Primary +$ kubectl get pods -n demo -l 'kubedb.com/role=Primary' --context dc-a + +# Standby DC nodes are standby +$ kubectl get pods -n demo -l 'kubedb.com/role=standby' --context dc-b +``` + +Because the single `` primary Service resolves only to the `Primary` labeled +nodes, every client write lands on the active DC. + +### 4. Confirm the standby DC is read only and following + +Connect to the standby DC's node 0 and confirm the fence and the asynchronous +replica: + +```bash +$ kubectl exec -it -n demo pod/mariadb-dcdr-3 --context dc-b -- bash +mariadb -uroot -p$MYSQL_ROOT_PASSWORD +``` + +```sql +SHOW VARIABLES LIKE 'super_read_only'; +SHOW SLAVE STATUS\G +``` + +`super_read_only` is `ON`, and `SHOW SLAVE STATUS` shows the GTID asynchronous +replica streaming from the active DC's primary endpoint +(`mariadb-dcdr.demo.svc.slice.local`) with both threads running and a small +`Seconds_Behind_Master`. + +### 5. Confirm writes are refused on the standby DC + +A direct write attempt against the standby DC is rejected by the fence: + +```sql +CREATE DATABASE should_fail; +-- ERROR 1290 (HY000): The MariaDB server is running with the --super-read-only option +``` + +This confirms the fail-closed guarantee: only the Lease holder accepts writes. + +## Triggering a planned switchover + +To move the active DC on purpose (for example to drain a DC for maintenance) with +zero data loss, set the switchover annotation on the CR. The hub quiesces writes +on the current active DC, waits for the target's GTID to catch up, then moves the +Lease: + +```bash +$ kubectl annotate mariadb mariadb-dcdr -n demo \ + dr.kubedb.com/switchover-to=dc-b --overwrite --context dc-a +``` + +Watch the DR status transition through `FailingOver` back to `Steady` with +`activeDC: dc-b`: + +```bash +$ kubectl get mariadb mariadb-dcdr -n demo \ + -o jsonpath='{.status.disasterRecovery.phase} {.status.disasterRecovery.activeDC}{"\n"}' \ + --context dc-a --watch +``` + +## Cleanup + +```bash +$ kubectl delete mariadb mariadb-dcdr -n demo --context dc-a +$ kubectl delete placementpolicy distributed-mariadb-dcdr --context dc-a +``` + +> **Note:** Per-DC PlacementPolicies and ServiceExports created by the operator +> are cleaned up with the MariaDB. The Arbiter DC's `dr-controlplane` etcd member +> is part of the control plane, not the database, and is not removed by deleting +> the MariaDB. + +## Next Steps + +- Review the [DC-DR Overview](/docs/guides/mariadb/distributed/disaster-recovery/overview/index.md) + for failover, failback, and the lag guard semantics. +- See the [Distributed MariaDB Overview](/docs/guides/mariadb/distributed/overview/index.md) + for the OCM, KubeSlice, and operator install that DC-DR depends on. diff --git a/docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/mariadb.yaml b/docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/mariadb.yaml new file mode 100644 index 000000000..794b333ee --- /dev/null +++ b/docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/mariadb.yaml @@ -0,0 +1,28 @@ +apiVersion: kubedb.com/v1 +kind: MariaDB +metadata: + name: mariadb-dcdr + namespace: demo + annotations: + # Interim DC-DR trigger. This is transitioning to the PlacementPolicy + # clusterSpreadConstraint.failoverPolicy as the single source of truth. + dr.kubedb.com/enabled: "true" +spec: + distributed: true + deletionPolicy: WipeOut + # spec.replicas is partitioned across the Member DCs by the PlacementPolicy + # distributionRules: 3 nodes in dc-a and 3 in dc-b. The Arbiter DC carries no + # MariaDB data. + replicas: 6 + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Mi + storageType: Durable + version: 12.1.2 + podTemplate: + spec: + podPlacementPolicy: + name: distributed-mariadb-dcdr diff --git a/docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/placement-policy.yaml b/docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/placement-policy.yaml new file mode 100644 index 000000000..e33a18d2f --- /dev/null +++ b/docs/guides/mariadb/distributed/disaster-recovery/setup/yamls/placement-policy.yaml @@ -0,0 +1,49 @@ +apiVersion: apps.k8s.appscode.com/v1 +kind: PlacementPolicy +metadata: + labels: + app.kubernetes.io/managed-by: Helm + name: distributed-mariadb-dcdr +spec: + clusterSpreadConstraint: + slice: + projectNamespace: kubeslice-demo-distributed-mariadb + sliceName: demo-slice + # The failoverPolicy turns this distributed cluster into a DC-DR cluster: + # each Member DC runs its own self contained Galera cluster, and exactly one + # DC is writable at a time. mode TwoDC is the two Member DC plus Arbiter + # layout; trigger scope Global means one primary-dc Lease decides the + # writable DC for the whole cluster. + failoverPolicy: + mode: TwoDC + trigger: + scope: Global + distributionRules: + # dc-a: a self contained Galera cluster with its own gcomm and quorum. + - clusterName: dc-a + role: Member + storageClassName: local-path # optional; omit to use the cluster default + replicaIndices: + - 0 + - 1 + - 2 + # dc-b: a self contained Galera cluster; read-only, async-follows the + # active DC via a GTID stream from node 0. + - clusterName: dc-b + role: Member + storageClassName: local-path # optional; omit to use the cluster default + replicaIndices: + - 3 + - 4 + - 5 + # dc-c: Arbiter DC. No MariaDB data; holds only the dr-controlplane etcd + # member, which contributes the third vote to the cross-DC quorum. + - clusterName: dc-c + role: Arbiter + replicaIndices: [] + nodeSpreadConstraint: + maxSkew: 1 + whenUnsatisfiable: ScheduleAnyway + zoneSpreadConstraint: + maxSkew: 1 + whenUnsatisfiable: ScheduleAnyway