-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfly.toml.example
More file actions
54 lines (47 loc) · 1.73 KB
/
fly.toml.example
File metadata and controls
54 lines (47 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Fly.io deployment template for mnemon.
#
# Setup:
# cp fly.toml.example fly.toml
# # Edit the three REPLACE_ME placeholders below, then:
# fly launch --copy-config --no-deploy # creates the app, don't deploy yet
# fly volume create mnemon_data --size 1 --region <your-region>
# fly secrets set MNEMON_LOCAL_TOKEN=... MNEMON_AS_ENABLED=true MNEMON_AS_PASSPHRASE=...
# fly deploy
#
# See the "Self-host on Fly.io" section of README.md for the full runbook.
app = "REPLACE_ME_fly_app_name" # must be globally unique on Fly
primary_region = "sjc" # pick one close to you: fly platform regions
[build]
[env]
PORT = "8080"
MNEMON_VAULT_DIR = "/data"
MNEMON_PUBLIC_URL = "https://REPLACE_ME_fly_app_name.fly.dev"
MNEMON_ALLOWED_HOSTS = "REPLACE_ME_fly_app_name.fly.dev"
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = "stop"
auto_start_machines = true
min_machines_running = 0
# First request after a cold start loads the FastEmbed bge-small-en-v1.5
# ONNX model (~15–25s). grace_period must be longer than that to avoid
# false-positive "app not listening on 0.0.0.0:8080" warnings during
# rolling deploys and `fly secrets set` restarts.
[[http_service.checks]]
interval = "30s"
timeout = "5s"
grace_period = "40s"
method = "GET"
path = "/health"
[mounts]
source = "mnemon_data"
destination = "/data"
# Memory must be at least 1GB because the first memory_search call loads
# the FastEmbed bge-small-en-v1.5 ONNX model (~130MB resident) into RAM
# on top of Python, uvicorn, FastMCP, and the vector store. The Fly
# default (shared-cpu-1x:256mb) OOM-kills the mnemon process on first
# search.
[[vm]]
memory = "1gb"
cpu_kind = "shared"
cpus = 1