From e4a4df9fc8b60d4ad56df71b5507bbd65cd4b418 Mon Sep 17 00:00:00 2001 From: "Diego Nieto (lesandie)" Date: Tue, 9 Dec 2025 13:55:58 +0100 Subject: [PATCH 1/2] RO replicas: a script that automates the whole recovery procedure per replica --- ...altinity-kb-check-replication-ddl-queue.md | 319 +++++++++++++++--- 1 file changed, 272 insertions(+), 47 deletions(-) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md index 4730321494..d4a86215b3 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md @@ -113,69 +113,294 @@ SELECT name FROM system.detached_parts WHERE table = 'table_name'; -- check for Starting from version 23, it's possible to use syntax [SYSTEM DROP REPLICA \'replica_name\' FROM TABLE db.table](https://clickhouse.com/docs/en/sql-reference/statements/system#drop-replica) instead of the `ZKPATH` variant, but you need to execute the above command from a different replica than the one you want to drop, which is not convenient sometimes. We recommend using the above method because it works with any version and is more reliable. -## Procedure for many replicas generating DDL +## Procedure to restore multiple tables in Read-Only mode per replica -```sql -SELECT DISTINCT 'DETACH TABLE ' || database || '.' || table || ' ON CLUSTER \'data\';' FROM clusterAllReplicas('data',system.replicas) WHERE active_replicas < total_replicas FORMAT TSVRaw; - -SELECT DISTINCT 'SYSTEM DROP REPLICA \'' || replica_name || '\' FROM ZKPATH \'' || zookeeper_path || '\';' FROM clusterAllReplicas('data',system.replicas) WHERE active_replicas < total_replicas FORMAT TSVRaw; - -SELECT DISTINCT 'ATTACH TABLE ' || database || '.' || table || ' ON CLUSTER \'data\';' FROM clusterAllReplicas('data',system.replicas) WHERE active_replicas < total_replicas FORMAT TSVRaw; - -SELECT DISTINCT 'SYSTEM RESTORE REPLICA ' || database || '.' || table || ' ON CLUSTER \'data\';' FROM clusterAllReplicas('data',system.replicas) WHERE active_replicas < total_replicas FORMAT TSVRaw; - --- check detached parts afterwards -SELECT * FROM clusterAllReplicas('data',system.detached_parts) +It is better to make an approach per replica, because restoring a replica using ON CLUSTER could lead to race conditions that would cause errors and a big stress in zookeeper/keeper --- make clickhouse 'forget' about the table (data persisted on disk) -DETACH TABLE db.table ON CLUSTER '...'; --- remove the zookeeper data about that table in zookeeper -SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk'; -- run the commands generated before. - --- register table in clickhouse again - it will be in readonly mode. -ATTACH TABLE db.table ON CLUSTER '...'; +```sql +SELECT + '-- Table ' || toString(row_num) || '\n' || + 'DETACH TABLE `' || database || '`.`' || table || '`;\n' || + 'SYSTEM DROP REPLICA ''' || replica_name || ''' FROM ZKPATH ''' || zookeeper_path || ''';\n' || + 'ATTACH TABLE `' || database || '`.`' || table || '`;\n' || + 'SYSTEM RESTORE REPLICA `' || database || '`.`' || table || '`;\n' +FROM ( + SELECT + *, + rowNumberInAllBlocks() + 1 as row_num + FROM ( + SELECT + database, + table, + any(replica_name) as replica_name, + any(zookeeper_path) as zookeeper_path + FROM system.replicas + WHERE is_readonly + GROUP BY database, table + ORDER BY database, table + ) + ORDER BY database, table +) +FORMAT TSVRaw; +``` --- recreate the zookeeper data from the -SYSTEM RESTORE REPLICA db.name ON CLUSTER '...'; +This will generate the DDL statements to be executed per replica and generate an ouput that can be saved as an SQL file . It is important to execute the commands per replica in the sequence generated by the above DDL: ---- do restart replica +- DETACH the table +- DROP REPLICA +- ATTACH the table +- RESTORE REPLICA -SELECT DISTINCT 'clickhouse-client --host=' || left(hostName(),-2) || ' --query=\'SYSTEM RESTART REPLICA '||database || '.' || table|| '\'' FROM clusterAllReplicas('all-sharded', system.replication_queue) WHERE last_exception != '' and create_time > now() -130 FORMAT TSVRaw; -``` +If we do this in parallel a table could still be attaching while another query is dropping/restoring the replica in zookeeper, causing errors. -Here a bash script that will do the same as above but tailored to a single replica, you can call it like `bash restore_replica.sh chi-clickhouse-cluster-main-cluster-1-3`: +The following bash script will read the generated SQL file and execute the commands sequentially, asking for user input in case of errors. Simply save the generated SQL to a file (e.g. `recovery_commands.sql`) and run the script below (that you can name as `clickhouse_replica_recovery.sh`): ```bash -#!/usr/bin/env bash +$ clickhouse_replica_recovery.sh recovery_commands.sql +``` -#Call like bash restore_replica.sh chi-clickhouse-cluster-main-cluster-1-3 -set -o errexit # exit on fail -set -o pipefail # catch errors in pipelines -set -o nounset # exit on undeclared variable -set -o xtrace # trace execution +Here the script: -restore_replica() { - local chi_name=$1 - # assumes `chi-...-cluster--` naming ou can change this patter to your needs - local shard=$(echo $chi_name |grep -oP '(?<=cluster-)\d+(?=-\d+$)') +```bash +#!/bin/bash + +# ClickHouse Replica Recovery Script +# This script executes DETACH, DROP REPLICA, ATTACH, and RESTORE REPLICA commands sequentially + +# Configuration +CLICKHOUSE_HOST="${CLICKHOUSE_HOST:-localhost}" +CLICKHOUSE_PORT="${CLICKHOUSE_PORT:-9000}" +CLICKHOUSE_USER="${CLICKHOUSE_USER:-clickhouse_operator}" +CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-xxxxxxxxx}" +COMMANDS_FILE="${1:-recovery_commands.sql}" +LOG_FILE="recovery_$(date +%Y%m%d_%H%M%S).log" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +MAGENTA='\033[0;35m' +NC='\033[0m' # No Color + +# Function to log messages +log() { + echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" +} +# Function to execute a SQL statement with retry logic +execute_sql() { + local sql="$1" + local table_num="$2" + local step_name="$3" + while true; do - - clickhouse-client --host=${chi_name} --user=admin --password=the_admin_password --query="select concat(database, '.\`', table, '\`', ' ', database, '/', table) FROM system.replicas WHERE is_readonly = 1 ORDER BY database, table" | - while read -r db_table zk_path; do - clickhouse-client --host=${chi_name} --user=admin --password=the_admin_password --query="DETACH TABLE ${db_table}" - clickhouse-client --host=${chi_name} --user=admin --password=the_admin_password --query="SYSTEM DROP REPLICA '"${chi_name}"' FROM ZKPATH '/clickhouse/tables/${shard}/${zk_path}'" || true - clickhouse-client --host=${chi_name} --user=admin --password=the_admin_password --query="ATTACH TABLE ${db_table}" - clickhouse-client --host=${chi_name} --user=admin --password=the_admin_password --query="SYSTEM RESTORE REPLICA ${db_table}" - done || true - - sleep 5 - + log "${YELLOW}Executing command for Table $table_num - $step_name:${NC}" + log "$sql" + + # Build clickhouse-client command + local ch_cmd="clickhouse-client --host=$CLICKHOUSE_HOST --port=$CLICKHOUSE_PORT --user=$CLICKHOUSE_USER" + + if [ -n "$CLICKHOUSE_PASSWORD" ]; then + ch_cmd="$ch_cmd --password=$CLICKHOUSE_PASSWORD" + fi + + # Execute the command and capture output and exit code + local output + local exit_code + output=$(echo "$sql" | $ch_cmd 2>&1) + exit_code=$? + + # Log the output + echo "$output" | tee -a "$LOG_FILE" + + if [ $exit_code -eq 0 ]; then + log "${GREEN}✓ Successfully executed${NC}" + return 0 + else + log "${RED}✗ Failed to execute (Exit code: $exit_code)${NC}" + log "${RED}Error output: $output${NC}" + + # Ask user what to do + while true; do + echo "" + log "${MAGENTA}========================================${NC}" + log "${MAGENTA}Error occurred! Choose an option:${NC}" + log "${MAGENTA}========================================${NC}" + echo -e "${YELLOW}[R]${NC} - Retry this command" + echo -e "${YELLOW}[I]${NC} - Ignore this error and continue to next command in this table" + echo -e "${YELLOW}[S]${NC} - Skip this entire table and move to next table" + echo -e "${YELLOW}[A]${NC} - Abort script execution" + echo "" + echo -n "Enter your choice (R/I/S/A): " + + # Read from /dev/tty to get user input from terminal + read -r response < /dev/tty + + case "${response^^}" in + R|RETRY) + log "${BLUE}Retrying command...${NC}" + break # Break inner loop to retry + ;; + I|IGNORE) + log "${YELLOW}Ignoring error and continuing to next command...${NC}" + return 1 # Return error but continue + ;; + S|SKIP) + log "${YELLOW}Skipping entire table $table_num...${NC}" + return 2 # Return special code to skip table + ;; + A|ABORT) + log "${RED}Aborting script execution...${NC}" + exit 1 + ;; + *) + echo -e "${RED}Invalid option '$response'. Please enter R, I, S, or A.${NC}" + ;; + esac + done + fi done } -restore_replica "$@" +# Main execution function +main() { + log "${BLUE}========================================${NC}" + log "${BLUE}ClickHouse Replica Recovery Script${NC}" + log "${BLUE}========================================${NC}" + log "Host: $CLICKHOUSE_HOST:$CLICKHOUSE_PORT" + log "User: $CLICKHOUSE_USER" + log "Commands file: $COMMANDS_FILE" + log "Log file: $LOG_FILE" + echo "" + + # Check if commands file exists + if [ ! -f "$COMMANDS_FILE" ]; then + log "${RED}Error: Commands file '$COMMANDS_FILE' not found!${NC}" + echo "" + echo "Usage: $0 [commands_file]" + echo " commands_file: Path to SQL commands file (default: recovery_commands.sql)" + echo "" + echo "Example: $0 my_commands.sql" + exit 1 + fi + + # Process SQL commands from file + local current_sql="" + local table_counter=0 + local step_in_table=0 + local failed_count=0 + local success_count=0 + local ignored_count=0 + local skipped_tables=() + local skip_current_table=false + + while IFS= read -r line || [ -n "$line" ]; do + # Skip empty lines + if [[ -z "$line" ]] || [[ "$line" =~ ^[[:space:]]*$ ]]; then + continue + fi + + # Check if this is a comment line indicating a new table + if [[ "$line" =~ ^[[:space:]]*--[[:space:]]*Table[[:space:]]+([0-9]+) ]]; then + table_counter="${BASH_REMATCH[1]}" + step_in_table=0 + skip_current_table=false + log "" + log "${BLUE}========================================${NC}" + log "${BLUE}Processing Table $table_counter${NC}" + log "${BLUE}========================================${NC}" + continue + elif [[ "$line" =~ ^[[:space:]]*-- ]]; then + # Skip other comment lines + continue + fi + + # Skip if we're skipping this table + if [ "$skip_current_table" = true ]; then + # Check if line ends with semicolon to count statements + if [[ "$line" =~ \;[[:space:]]*$ ]]; then + step_in_table=$((step_in_table + 1)) + fi + continue + fi + + # Accumulate the SQL statement + current_sql+="$line " + + # Check if we have a complete statement (ends with semicolon) + if [[ "$line" =~ \;[[:space:]]*$ ]]; then + step_in_table=$((step_in_table + 1)) + + # Determine the step name + local step_name="" + if [[ "$current_sql" =~ ^[[:space:]]*DETACH ]]; then + step_name="DETACH" + elif [[ "$current_sql" =~ ^[[:space:]]*SYSTEM[[:space:]]+DROP[[:space:]]+REPLICA ]]; then + step_name="DROP REPLICA" + elif [[ "$current_sql" =~ ^[[:space:]]*ATTACH ]]; then + step_name="ATTACH" + elif [[ "$current_sql" =~ ^[[:space:]]*SYSTEM[[:space:]]+RESTORE[[:space:]]+REPLICA ]]; then + step_name="RESTORE REPLICA" + fi + + log "" + log "Step $step_in_table/4: $step_name" + + # Execute the statement + local result + execute_sql "$current_sql" "$table_counter" "$step_name" + result=$? + + if [ $result -eq 0 ]; then + success_count=$((success_count + 1)) + sleep 1 # Small delay between commands + elif [ $result -eq 1 ]; then + # User chose to ignore this error + failed_count=$((failed_count + 1)) + ignored_count=$((ignored_count + 1)) + sleep 1 + elif [ $result -eq 2 ]; then + # User chose to skip this table + skip_current_table=true + skipped_tables+=("$table_counter") + log "${YELLOW}Skipping remaining commands for Table $table_counter${NC}" + fi + + # Reset current_sql for next statement + current_sql="" + fi + done < "$COMMANDS_FILE" + + # Summary + log "" + log "${BLUE}========================================${NC}" + log "${BLUE}Execution Summary${NC}" + log "${BLUE}========================================${NC}" + log "Total successful commands: ${GREEN}$success_count${NC}" + log "Total failed commands: ${RED}$failed_count${NC}" + log "Total ignored errors: ${YELLOW}$ignored_count${NC}" + log "Total tables processed: $table_counter" + + if [ ${#skipped_tables[@]} -gt 0 ]; then + log "Skipped tables: ${YELLOW}${skipped_tables[*]}${NC}" + fi + + log "Log file: $LOG_FILE" + + if [ $failed_count -eq 0 ]; then + log "${GREEN}All commands executed successfully!${NC}" + exit 0 + else + log "${YELLOW}Some commands failed or were ignored. Please check the log file.${NC}" + exit 1 + fi +} + +# Run the main function +main + ``` From 0eb694e3bb7f65f4f04296303715659d11349b2b Mon Sep 17 00:00:00 2001 From: "Diego Nieto (lesandie)" Date: Tue, 9 Dec 2025 14:01:32 +0100 Subject: [PATCH 2/2] min fixes --- .../altinity-kb-check-replication-ddl-queue.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md index d4a86215b3..4c505878ef 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md @@ -98,19 +98,20 @@ FORMAT TSVRaw; Sometimes, due to crashes, zookeeper unavailability, slowness, or other reasons, some of the tables can be in Read-Only mode. This allows SELECTS but not INSERTS. So we need to do DROP / RESTORE replica procedure. Just to be clear, this procedure **will not delete any data**, it will just re-create the metadata in zookeeper with the current state of the [ClickHouse replica](/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica/). + +How it works: ```sql ALTER TABLE table_name DROP DETACHED PARTITION ALL -- clean detached folder before operation. PARTITION ALL works only for the fresh clickhouse versions DETACH TABLE table_name; -- Required for DROP REPLICA --- Use the zookeeper_path and replica_name from the above query. +-- Use the zookeeper_path and replica_name from system.replicas. SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/table_path_in_zk'; -- It will remove everything from the /table_path_in_zk/replicas/replica_name ATTACH TABLE table_name; -- Table will be in readonly mode, because there is no metadata in ZK and after that execute SYSTEM RESTORE REPLICA table_name; -- It will detach all partitions, re-create metadata in ZK (like it's new empty table), and then attach all partitions back -SYSTEM SYNC REPLICA table_name; -- Wait for replicas to synchronize parts. Also it's recommended to check `system.detached_parts` on all replicas after recovery is finished. -SELECT name FROM system.detached_parts WHERE table = 'table_name'; -- check for leftovers. See the potential problem here - https://gist.github.com/den-crane/702e4c8a1162dae7c2edf48a7c2dd00d +SYSTEM SYNC REPLICA table_name; -- Not mandatory. It will Wait for replicas to synchronize parts. Also it's recommended to check `system.detached_parts` on all replicas after recovery is finished. +SELECT name FROM system.detached_parts WHERE table = 'table_name'; -- check for leftovers. See the potential problems here https://altinity.com/blog/understanding-detached-parts-in-clickhouse ``` - Starting from version 23, it's possible to use syntax [SYSTEM DROP REPLICA \'replica_name\' FROM TABLE db.table](https://clickhouse.com/docs/en/sql-reference/statements/system#drop-replica) instead of the `ZKPATH` variant, but you need to execute the above command from a different replica than the one you want to drop, which is not convenient sometimes. We recommend using the above method because it works with any version and is more reliable. ## Procedure to restore multiple tables in Read-Only mode per replica