diff --git a/src/controllers/root.cr b/src/controllers/root.cr index 13f7a36..4b37867 100644 --- a/src/controllers/root.cr +++ b/src/controllers/root.cr @@ -7,10 +7,18 @@ module PlaceOS::Source::Api class Root < Application base "/api/source/v1/" - # healthcheck, returns OK if all connections are good + # healthcheck, returns JSON with status of all services @[AC::Route::GET("/")] - def index : Nil - raise "health check failed" unless self.class.healthcheck? + def index : NamedTuple(healthy: Bool, services: Hash(String, NamedTuple(status: String, error: String?))) + result = self.class.healthcheck + unless result[:healthy] + failed_services = result[:services].select { |_, service_info| service_info[:status] == "unhealthy" } + error_details = failed_services.map { |service, info| "#{service}: #{info[:error]}" }.join(", ") + Log.error { "HEALTH CHECK FAILED - #{error_details}" } + end + + # Return 200 if all healthy, 503 (Service Unavailable) if any service is unhealthy + render status: (result[:healthy] ? 200 : 503), json: result end @[AC::Route::GET("/version")] @@ -23,26 +31,47 @@ module PlaceOS::Source::Api ) end - def self.healthcheck? : Bool - Promise.all( + def self.healthcheck : NamedTuple(healthy: Bool, services: Hash(String, NamedTuple(status: String, error: String?))) + results = Promise.all( Promise.defer { - check_resource?("redis") { redis.ping } + check_resource("redis") { redis.ping } }, Promise.defer { - check_resource?("postgres") { pg_healthcheck } + check_resource("postgres") { pg_healthcheck } }, Promise.defer { - check_resource?("influx") { influx_healthcheck } + check_resource("influx") { influx_healthcheck } }, - ).then(&.all?).get + ).get + + services = Hash(String, NamedTuple(status: String, error: String?)).new + overall_healthy = true + + results.each do |result| + if result[:success] + services[result[:service]] = {status: "healthy", error: nil} + else + services[result[:service]] = {status: "unhealthy", error: result[:error]} + overall_healthy = false + end + end + + { + healthy: overall_healthy, + services: services, + } end - private def self.check_resource?(resource, &) - Log.trace { "healthchecking #{resource}" } - !!yield + private def self.check_resource(service_name : String, &) : NamedTuple(service: String, success: Bool, error: String?) + Log.trace { "healthchecking #{service_name}" } + yield + {service: service_name, success: true, error: nil} rescue exception - Log.error(exception: exception) { {"connection check to #{resource} failed"} } - false + error_msg = exception.message || exception.class.name + Log.error(exception: exception) { {"connection check to #{service_name} failed"} } + # Also log to console for Docker/K8s visibility + Log.error { "Health check failed for #{service_name}: #{error_msg}" } + {service: service_name, success: false, error: error_msg} end private def self.pg_healthcheck @@ -74,15 +103,10 @@ module PlaceOS::Source::Api influx_host = INFLUX_HOST return false if influx_host.nil? - begin - HTTP::Client.new(URI.parse(influx_host)) do |client| - client.connect_timeout = 5.seconds - client.read_timeout = 5.seconds - response = client.get("/health") - response.status_code == 200 - end - rescue - false + HTTP::Client.new(URI.parse(influx_host)) do |client| + client.connect_timeout = 5.seconds + client.read_timeout = 5.seconds + client.get("/health").success? end end