From 2977866d28a1e802a1280e882035c5d522be8535 Mon Sep 17 00:00:00 2001 From: Herklos Date: Fri, 20 Mar 2026 19:01:51 +0100 Subject: [PATCH] [Sync] Fix ansible deployment Signed-off-by: Herklos --- infra/sync/README.md | 42 ++++ .../development/group_vars/all/vault.yml | 70 +++--- .../ansible/roles/garage_init/tasks/main.yml | 208 +++++++++++------- .../ansible/roles/garage_init/vars/main.yml | 5 + .../ansible/roles/stack/tasks/healthcheck.yml | 8 +- .../stack/templates/docker-compose.yml.j2 | 15 +- packages/sync/octobot_sync/app.py | 4 + 7 files changed, 229 insertions(+), 123 deletions(-) create mode 100644 infra/sync/ansible/roles/garage_init/vars/main.yml diff --git a/infra/sync/README.md b/infra/sync/README.md index 5a0e72a50..f24cbd718 100644 --- a/infra/sync/README.md +++ b/infra/sync/README.md @@ -202,6 +202,48 @@ See `vault.yml.example` for the full list: | `vault_encryption_secret` | User data encryption key | | `vault_platform_encryption_secret` | Platform data encryption key | +### Regenerating S3 keys + +The S3 secret key is only returned by Garage at creation time. If it was lost, delete the key and recreate it: + +```bash +# 1. SSH into any cluster node +ssh deploy@ + +# 2. Read the admin token from the deployed Garage config +GARAGE_TOKEN=$(grep admin_token /opt/octobot-sync/garage.toml | awk -F'"' '{print $2}') + +# 3. Get the key ID +KEY_ID=$(curl -s -H "Authorization: Bearer $GARAGE_TOKEN" \ + 'http://127.0.0.1:3903/v2/GetKeyInfo?search=octobot-sync-key' \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['accessKeyId'])") + +echo "Key ID: $KEY_ID" + +# 4. Delete the key +curl -s -X POST -H "Authorization: Bearer $GARAGE_TOKEN" \ + "http://127.0.0.1:3903/v2/DeleteKey?id=$KEY_ID" +``` + +Then re-run `setup-garage.yml` — it will create a new key and display the credentials: + +```bash +ansible-playbook playbooks/setup-garage.yml -i inventories/ +``` + +Save the new credentials into vault.yml: + +```bash +ansible-vault edit inventories//group_vars/all/vault.yml +# Update vault_s3_access_key and vault_s3_secret_key +``` + +Re-deploy to apply the new credentials: + +```bash +ansible-playbook playbooks/site.yml -i inventories/ +``` + ## Adding a new node 1. Edit the environment's `hosts.yml` — add a new entry under `sync_nodes` diff --git a/infra/sync/ansible/inventories/development/group_vars/all/vault.yml b/infra/sync/ansible/inventories/development/group_vars/all/vault.yml index 268b584e7..ea24ce804 100644 --- a/infra/sync/ansible/inventories/development/group_vars/all/vault.yml +++ b/infra/sync/ansible/inventories/development/group_vars/all/vault.yml @@ -1,34 +1,38 @@ $ANSIBLE_VAULT;1.1;AES256 -38396633616531373263656566303465356634396366336261383766316563333132326235633138 -6436313432343632343331633432623937386531386162660a653432346263326537373436376537 -66336561663461643638663062633866376131353762393034316331383232393634303930303833 -3234653330316338620a623061393639333238613837373064363532306639356239393863333138 -39306133643464383530653237613934396334316163636335333166616333356437613131643738 -30373765306363626165623632303233383863666535383762303266333636656136346536396639 -62653262386566373562643034623135626637376330303635366564383465343365393739383466 -38653865316233306361366463353262343534303833303130316665626237636131393930663464 -36636364306234346461323234636634626136396631333831373532666535363731623037386533 -38336630316636616230393066653730366366393063636637353131376438636637636365316265 -35306339343963636138326139653431343138626539316564336132316531393730303635356462 -63363564356637373764373938623538366534656135343163376235316131666535633062313736 -35633038376662303533626234323166663231313565616361306665633538306635376236656632 -35663935336333396264306464386531623537343165323736333533656566636262306132306531 -38383237623139353661666232626437323366323864386264303731623339343330616132313365 -36623634353334653835643234373536666134366535656339623235626131643161343232313561 -62303731646663343335313531313736353734346362343461616432326238343665666436376363 -38396431643034313531396662663639343863373663353234656437633834633030653263306339 -63376339356665623664616237303238303065356161353465376533626337336365346266656137 -66363864333161373936356666336235306565353833636563363565373638623932383238626439 -66643964636566613562336138323436306530303332626139343137356339303265643864323561 -32343431363538656464643434623165366632616263386363623236393632663866623537376232 -66393337376139323437363963303032313035393664313435346163306230386235386431386235 -62626339656262386237633462383466313063303637643362313262633562393161373738653861 -32393038363835643234653836353730366333363432663839393164396135626533393263616532 -63323063323364383737356235623365303637323037653333623235623065386535316431383233 -33626163343434333062343030376430366238353836646663393465363038633039646561313534 -62333432663864393062323461303230636261643663626432383838613835363639653836323265 -62663336653865633839346635303833303061306437663831353662306332373230373234303735 -38363538363436313530353435326532333263323431343438373737613366656330656330383630 -36666463313330356331373366303861666332373130356532646564316537346235626236636462 -62656430623131643163623937316636663962626163613466656332316132366462363765313438 -38376166326565316139656530306135633339636533333264333830343934636566 +32326365386431663361393638653262666330623463663732633330373538373334633939653033 +3264333466313437383933323731343230346637323835370a366432656632613830383537353665 +65333866363931643538373735326665626638613661643835653865613231613461333566346265 +6133363536653037310a303261326230383139626662363035366136386563383663643030323839 +38653865363465633339663763313362393764633933613065323561643830623438616164303462 +61396532643838356136363832353631366566643732616362376138653730616462363936656135 +35613537323435616633396261623732356133393435383134643166383164653466346337376338 +63343665313233353465363732373535623938663834333133383365376138383634653136383063 +31306638303933373732316132616430636462396265393061346330323163396630303933653736 +63303365653835656230666233363933366364373937663764653933306566393862393338663835 +37653864646663386266363364643562613235643035653036613663336232623361653162656265 +32323732303962383232343166313361366264626333666164383735623332653865386365373130 +38343438613738666530336638633666653162343338333636393761613131336431313532333935 +34643232666637356232663437383034633363636435373062356133343631376434363933316430 +39333461343330353037636537326265383332363266346138333633306434626631306661653763 +62633563383262343630643764353833623735303663303962393863333262653531656233626238 +30646331376566346263616162396638623035616138313634616636613132653236643634626166 +65323837396163376235343861353965313733623339326164613336373963656135666231353433 +31613339343066303438363639656530353831656537646535663534336261373464383537396661 +35656162623164313361613838303762306630393933396535336666613637623737623061363232 +37613132646463613438316263663761356236643339623134633864383536323939363234616466 +36613736353330376239636331653737333038656138356134346262373333613963333938663562 +61323730646335613034666131343336633535306234616534306332373963353430326539633537 +62366138656435303435353539333734383263653836613931343866613231303438633235333833 +34373337623834323038663563313461363962663432393063313638343461303165303163383262 +30343166383863646363633264386165393933616436343431663337626165346433383438376536 +62353532656239393263333033333765326339393938316631333765643836623433363437303334 +65363638313031303434346465356635306331633163353133656238393435666435653962343262 +65656263393635353132323238316532353763346135373163646639393134616663383032316133 +35313231386665386339376530663038633461663036386166646638323538383333633262393738 +36393263323437336530393839363661303139353436363739393335376230343039376232633061 +66613930386361353130386534376263633435376339663838616239323933653732313736666165 +32353035336364346533636434356532396366613632396565646630633264343666356538643666 +39356134643833613461373133383161663366326237353930346561383537666430326436306135 +35623163326539393636616437303162653261626533616639316166653335616338643430396235 +66353836623033393835366538396264653562313334393662643831316431366133613464316562 +35313333386537363933376632323631323831376336366664366564396436646235 diff --git a/infra/sync/ansible/roles/garage_init/tasks/main.yml b/infra/sync/ansible/roles/garage_init/tasks/main.yml index 483e2bd65..eb57abcbb 100644 --- a/infra/sync/ansible/roles/garage_init/tasks/main.yml +++ b/infra/sync/ansible/roles/garage_init/tasks/main.yml @@ -1,27 +1,32 @@ --- # One-time cluster bootstrap — run via setup-garage.yml # Idempotent: skips bucket/key creation if they already exist (replicated metadata) +# Uses Garage v2 admin HTTP API instead of CLI (avoids Docker networking issues) -- name: Ensure Garage is running - ansible.builtin.command: - cmd: docker compose exec -T garage wget -qO- http://127.0.0.1:3903/health - chdir: "{{ stack_deploy_dir }}" - register: garage_health - retries: 10 - delay: 5 - until: garage_health.rc == 0 - changed_when: false +- name: Wait for Garage admin API + ansible.builtin.wait_for: + host: 127.0.0.1 + port: 3903 + timeout: 60 -- name: Discover Garage node ID - ansible.builtin.command: - cmd: docker compose exec -T garage /garage node id -q - chdir: "{{ stack_deploy_dir }}" - register: node_id_result - changed_when: false +# --- Discover node IDs (runs on every node) --- -- name: Set node ID fact +- name: Get cluster status + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/GetClusterStatus" + method: GET + headers: "{{ garage_admin_headers }}" + return_content: true + register: cluster_status + +- name: Find local node in cluster status ansible.builtin.set_fact: - discovered_garage_node_id: "{{ node_id_result.stdout | trim }}" + local_node: "{{ cluster_status.json.nodes | selectattr('addr', 'equalto', garage_rpc_public_addr) | first }}" + +- name: Set node ID facts + ansible.builtin.set_fact: + discovered_garage_node_hex: "{{ local_node.id }}" + discovered_garage_node_id: "{{ local_node.id }}@{{ garage_rpc_public_addr }}" - name: Display node ID ansible.builtin.debug: @@ -30,98 +35,149 @@ # --- Cluster setup (delegated to first node) --- - name: Connect to all peers - ansible.builtin.command: - cmd: >- - docker compose exec -T garage /garage node connect - {{ hostvars[item]['discovered_garage_node_id'] }}@{{ hostvars[item]['garage_rpc_public_addr'] }} - chdir: "{{ stack_deploy_dir }}" + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/ConnectClusterNodes" + method: POST + headers: "{{ garage_admin_headers }}" + body_format: json + body: + - "{{ hostvars[item]['discovered_garage_node_hex'] }}@{{ hostvars[item]['garage_rpc_public_addr'] }}" + status_code: [200] + return_content: true loop: "{{ groups['sync_nodes'] }}" when: - inventory_hostname == groups['sync_nodes'][0] - item != inventory_hostname changed_when: false -- name: Assign layout for each node - ansible.builtin.command: - cmd: >- - docker compose exec -T garage /garage layout assign - -z {{ hostvars[item]['garage_zone'] }} - -c {{ hostvars[item]['garage_capacity'] }}G - -t {{ item }} - {{ hostvars[item]['discovered_garage_node_id'] }} - chdir: "{{ stack_deploy_dir }}" +- name: Build layout assignment + ansible.builtin.set_fact: + garage_layout_roles: "{{ garage_layout_roles | default([]) + [{'id': hostvars[item]['discovered_garage_node_hex'], 'zone': hostvars[item]['garage_zone'], 'capacity': hostvars[item]['garage_capacity'] | int * 1073741824, 'tags': [item]}] }}" loop: "{{ groups['sync_nodes'] }}" when: inventory_hostname == groups['sync_nodes'][0] - changed_when: true + +- name: Assign layout for all nodes + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/UpdateClusterLayout" + method: POST + headers: "{{ garage_admin_headers }}" + body_format: json + body: + roles: "{{ garage_layout_roles }}" + status_code: [200, 204, 500] + return_content: true + register: layout_assign + when: inventory_hostname == groups['sync_nodes'][0] + changed_when: layout_assign.status in [200, 204] - name: Get current layout version - ansible.builtin.shell: - cmd: >- - docker compose exec -T garage /garage layout show 2>&1 - | grep -oP 'Current layout version: \K\d+' || echo 0 - chdir: "{{ stack_deploy_dir }}" - register: layout_version - changed_when: false + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/GetClusterLayout" + method: GET + headers: "{{ garage_admin_headers }}" + return_content: true + register: cluster_layout when: inventory_hostname == groups['sync_nodes'][0] - name: Apply layout - ansible.builtin.command: - cmd: >- - docker compose exec -T garage /garage layout apply - --version {{ (layout_version.stdout | int) + 1 }} - chdir: "{{ stack_deploy_dir }}" - when: inventory_hostname == groups['sync_nodes'][0] + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/ApplyClusterLayout" + method: POST + headers: "{{ garage_admin_headers }}" + body_format: json + body: + version: "{{ (cluster_layout.json.version | default(0) | int) + 1 }}" + status_code: [200, 204] + return_content: true + when: + - inventory_hostname == groups['sync_nodes'][0] + - cluster_layout.json.stagedRoleChanges | default([]) | length > 0 changed_when: true # --- Bucket (idempotent — skipped if already exists via replication) --- - name: Check if bucket exists - ansible.builtin.command: - cmd: docker compose exec -T garage /garage bucket info {{ s3_bucket }} - chdir: "{{ stack_deploy_dir }}" + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/GetBucketInfo?globalAlias={{ s3_bucket }}" + method: GET + headers: "{{ garage_admin_headers }}" + return_content: true + status_code: [200, 404] register: bucket_check - failed_when: false - changed_when: false when: inventory_hostname == groups['sync_nodes'][0] - name: Create S3 bucket - ansible.builtin.command: - cmd: docker compose exec -T garage /garage bucket create {{ s3_bucket }} - chdir: "{{ stack_deploy_dir }}" + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/CreateBucket" + method: POST + headers: "{{ garage_admin_headers }}" + body_format: json + body: + globalAlias: "{{ s3_bucket }}" + status_code: [200] + return_content: true + register: bucket_create when: - inventory_hostname == groups['sync_nodes'][0] - - bucket_check.rc != 0 + - bucket_check.status == 404 + +- name: Set bucket ID fact + ansible.builtin.set_fact: + garage_bucket_id: "{{ bucket_create.json.id if bucket_check.status == 404 else bucket_check.json.id }}" + when: inventory_hostname == groups['sync_nodes'][0] # --- API key (idempotent — skipped if already exists via replication) --- - name: Check if API key exists - ansible.builtin.command: - cmd: docker compose exec -T garage /garage key info octobot-sync-key - chdir: "{{ stack_deploy_dir }}" + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/GetKeyInfo?search={{ garage_s3_key_name }}" + method: GET + headers: "{{ garage_admin_headers }}" + return_content: true + status_code: [200, 400, 404] register: key_check - failed_when: false - changed_when: false when: inventory_hostname == groups['sync_nodes'][0] - name: Create S3 API key - ansible.builtin.command: - cmd: docker compose exec -T garage /garage key create octobot-sync-key - chdir: "{{ stack_deploy_dir }}" + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/CreateKey" + method: POST + headers: "{{ garage_admin_headers }}" + body_format: json + body: + name: "{{ garage_s3_key_name }}" + status_code: [200] + return_content: true + register: key_create when: - inventory_hostname == groups['sync_nodes'][0] - - key_check.rc != 0 - register: key_output + - key_check.status != 200 + no_log: true + +- name: Set API key ID fact + ansible.builtin.set_fact: + garage_access_key_id: "{{ key_create.json.accessKeyId if key_check.status != 200 else key_check.json.accessKeyId }}" + when: inventory_hostname == groups['sync_nodes'][0] no_log: true - name: Grant key access to bucket - ansible.builtin.command: - cmd: >- - docker compose exec -T garage /garage bucket allow - --read --write --owner {{ s3_bucket }} --key octobot-sync-key - chdir: "{{ stack_deploy_dir }}" - when: - - inventory_hostname == groups['sync_nodes'][0] - - key_check.rc != 0 + ansible.builtin.uri: + url: "{{ garage_admin_url }}/v2/AllowBucketKey" + method: POST + headers: "{{ garage_admin_headers }}" + body_format: json + body: + bucketId: "{{ garage_bucket_id }}" + accessKeyId: "{{ garage_access_key_id }}" + permissions: + read: true + write: true + owner: true + status_code: [200] + return_content: true + when: inventory_hostname == groups['sync_nodes'][0] + +# --- Display credentials --- - name: Display new credentials ansible.builtin.debug: @@ -130,11 +186,15 @@ cp vault.yml.example inventories//group_vars/all/vault.yml ansible-vault edit inventories//group_vars/all/vault.yml - {{ key_output.stdout | default('(key already existed)') }} + {% if key_check.status != 200 %} + S3 Access Key ID: {{ key_create.json.accessKeyId }} + S3 Secret Access Key: {{ key_create.json.secretAccessKey }} + {% else %} + (key already existed — secret not available via API, check vault) + {% endif %} Node IDs for host_vars: {% for host in groups['sync_nodes'] %} {{ host }}: {{ hostvars[host]['discovered_garage_node_id'] }} {% endfor %} when: inventory_hostname == groups['sync_nodes'][0] - no_log: "{{ key_output is changed }}" diff --git a/infra/sync/ansible/roles/garage_init/vars/main.yml b/infra/sync/ansible/roles/garage_init/vars/main.yml new file mode 100644 index 000000000..253a68a46 --- /dev/null +++ b/infra/sync/ansible/roles/garage_init/vars/main.yml @@ -0,0 +1,5 @@ +--- +garage_admin_url: "http://127.0.0.1:3903" +garage_admin_headers: + Authorization: "Bearer {{ garage_admin_token }}" +garage_s3_key_name: "octobot-sync-key" diff --git a/infra/sync/ansible/roles/stack/tasks/healthcheck.yml b/infra/sync/ansible/roles/stack/tasks/healthcheck.yml index af6475e83..606907db0 100644 --- a/infra/sync/ansible/roles/stack/tasks/healthcheck.yml +++ b/infra/sync/ansible/roles/stack/tasks/healthcheck.yml @@ -15,16 +15,10 @@ return_content: true status_code: [200] register: octobot_health - until: octobot_health.status == 200 + until: octobot_health.status == 200 and (octobot_health.json.ok | default(false)) retries: 30 delay: 5 -- name: Verify OctoBot sync responds ok - ansible.builtin.assert: - that: - - octobot_health.json.ok == true - fail_msg: "OctoBot sync health check failed on {{ inventory_hostname }}" - - name: Wait for Nginx health (proxied) ansible.builtin.uri: url: "http://127.0.0.1:{{ nginx_port }}/health" diff --git a/infra/sync/ansible/roles/stack/templates/docker-compose.yml.j2 b/infra/sync/ansible/roles/stack/templates/docker-compose.yml.j2 index 2281597b1..56f119d8b 100644 --- a/infra/sync/ansible/roles/stack/templates/docker-compose.yml.j2 +++ b/infra/sync/ansible/roles/stack/templates/docker-compose.yml.j2 @@ -14,26 +14,23 @@ services: - garage_meta:/var/lib/garage/meta - garage_data:/var/lib/garage/data - ./garage.toml:/etc/garage.toml:ro - healthcheck: - test: ["CMD", "wget", "-qO-", "http://127.0.0.1:3903/health"] - interval: 10s - retries: 5 networks: - backend octobot-sync: image: {{ octobot_sync_image }}:{{ octobot_image_tag }} - command: ["OctoBot", "sync", "--host", "0.0.0.0", "--port", "{{ octobot_sync_port }}"] + command: ["sync", "--host", "0.0.0.0", "--port", "{{ octobot_sync_port }}"] env_file: .env - expose: - - "{{ octobot_sync_port }}" + volumes: + - ./collections.json:/octobot/user/collections.json:ro + ports: + - "127.0.0.1:{{ octobot_sync_port }}:{{ octobot_sync_port }}" restart: unless-stopped mem_limit: 1g security_opt: - no-new-privileges:true depends_on: - garage: - condition: service_healthy + - garage networks: - frontend - backend diff --git a/packages/sync/octobot_sync/app.py b/packages/sync/octobot_sync/app.py index d5df2d45a..3de2031f6 100644 --- a/packages/sync/octobot_sync/app.py +++ b/packages/sync/octobot_sync/app.py @@ -85,6 +85,10 @@ def create_app( ) app.include_router(sync_router, prefix="/v1") + @app.get("/health") + async def health(): + return {"ok": True} + if replica_manager: @app.on_event("startup") async def _start_replica():