From bf5721539f3039fccaed83dd304d237d0817931f Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Sun, 24 May 2026 22:47:00 +0200 Subject: [PATCH 1/2] add support for self-hosted LLMs on kubenretes --- helm/templates/configmap.yaml | 3 + helm/templates/llm.yaml | 184 ++++++++++++++++++++++++ helm/values.yaml | 37 ++++- kubernetes/krawl-all-in-one-deploy.yaml | 147 ++++++++++++++++++- 4 files changed, 368 insertions(+), 3 deletions(-) create mode 100644 helm/templates/llm.yaml diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 378ec15..a3028d6 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -54,6 +54,9 @@ data: ai: enabled: {{ .Values.config.ai.enabled }} provider: {{ .Values.config.ai.provider | quote }} + {{- if .Values.config.ai.openai_base_url }} + openai_base_url: {{ .Values.config.ai.openai_base_url | quote }} + {{- end }} {{- if .Values.config.ai.model }} model: {{ .Values.config.ai.model | quote }} {{- end }} diff --git a/helm/templates/llm.yaml b/helm/templates/llm.yaml new file mode 100644 index 0000000..7efc418 --- /dev/null +++ b/helm/templates/llm.yaml @@ -0,0 +1,184 @@ +{{- if .Values.llm.ollama.enabled }} +{{- $name := printf "%s-ollama" (include "krawl.fullname" .) }} +{{- $port := .Values.llm.ollama.service.port }} +{{- if .Values.llm.ollama.persistence.enabled }} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ $name }} + labels: + {{- include "krawl.labels" . | nindent 4 }} + app.kubernetes.io/component: ollama +spec: + accessModes: + - {{ .Values.llm.ollama.persistence.accessMode }} + resources: + requests: + storage: {{ .Values.llm.ollama.persistence.size }} +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $name }} + labels: + {{- include "krawl.labels" . | nindent 4 }} + app.kubernetes.io/component: ollama +spec: + type: ClusterIP + ports: + - name: http + port: {{ $port }} + targetPort: http + protocol: TCP + selector: + {{- include "krawl.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: ollama +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $name }} + labels: + {{- include "krawl.labels" . | nindent 4 }} + app.kubernetes.io/component: ollama +spec: + replicas: 1 + selector: + matchLabels: + {{- include "krawl.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ollama + template: + metadata: + labels: + {{- include "krawl.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: ollama + spec: + containers: + - name: ollama + image: "{{ .Values.llm.ollama.image.repository }}:{{ .Values.llm.ollama.image.tag }}" + imagePullPolicy: {{ .Values.llm.ollama.image.pullPolicy }} + ports: + - name: http + containerPort: {{ $port }} + protocol: TCP + env: + - name: OLLAMA_HOST + value: "0.0.0.0:{{ $port }}" + - name: OLLAMA_MODELS + value: /models + command: + - /bin/sh + - -c + args: + - | + /bin/ollama serve & + until /bin/ollama list >/dev/null 2>&1; do + sleep 2 + done + {{- if .Values.llm.ollama.pullModel }} + /bin/ollama pull {{ .Values.llm.ollama.model }} + {{- end }} + wait + volumeMounts: + - name: models + mountPath: /models + volumes: + - name: models + {{- if .Values.llm.ollama.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ $name }} + {{- else }} + emptyDir: {} + {{- end }} +{{- end }} + +{{- if .Values.llm.llamaCpp.enabled }} +{{- $name := printf "%s-llamacpp" (include "krawl.fullname" .) }} +{{- $port := .Values.llm.llamaCpp.service.port }} +{{- if .Values.llm.llamaCpp.persistence.enabled }} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ $name }} + labels: + {{- include "krawl.labels" . | nindent 4 }} + app.kubernetes.io/component: llama-cpp +spec: + accessModes: + - {{ .Values.llm.llamaCpp.persistence.accessMode }} + resources: + requests: + storage: {{ .Values.llm.llamaCpp.persistence.size }} +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $name }} + labels: + {{- include "krawl.labels" . | nindent 4 }} + app.kubernetes.io/component: llama-cpp +spec: + type: ClusterIP + ports: + - name: http + port: {{ $port }} + targetPort: http + protocol: TCP + selector: + {{- include "krawl.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: llama-cpp +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $name }} + labels: + {{- include "krawl.labels" . | nindent 4 }} + app.kubernetes.io/component: llama-cpp +spec: + replicas: 1 + selector: + matchLabels: + {{- include "krawl.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: llama-cpp + template: + metadata: + labels: + {{- include "krawl.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: llama-cpp + spec: + containers: + - name: llama-cpp + image: "{{ .Values.llm.llamaCpp.image.repository }}:{{ .Values.llm.llamaCpp.image.tag }}" + imagePullPolicy: {{ .Values.llm.llamaCpp.image.pullPolicy }} + ports: + - name: http + containerPort: {{ $port }} + protocol: TCP + args: + - --hf-repo + - {{ .Values.llm.llamaCpp.hfRepo | quote }} + - --hf-file + - {{ .Values.llm.llamaCpp.hfFile | quote }} + - --port + - {{ $port | quote }} + - --host + - 0.0.0.0 + - -n + - -1 + volumeMounts: + - name: models + mountPath: /root/.cache + volumes: + - name: models + {{- if .Values.llm.llamaCpp.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ $name }} + {{- else }} + emptyDir: {} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/values.yaml b/helm/values.yaml index 42b4a29..95c6abd 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -115,8 +115,8 @@ config: ban_duration_seconds: 600 ai: enabled: false - provider: "openrouter" # "openrouter" or "openai" -# openai_base_url: "https://api.openai.com/v1" is only needed if provider is set to "openai" and you want to use a custom endpoint to reach different models + provider: "openrouter" # "openrouter" or "openai". Use "openai" for OpenAI-compatible endpoints such as Ollama or llama.cpp. + openai_base_url: "https://api.openai.com/v1" # Set to your OpenAI-compatible endpoint, e.g. http://krawl-ollama:8080/v1 or http://krawl-llamacpp:8080/v1 api_key: null # set your OpenAI or OpenRouter API key here model: null # for example nvidia/nemotron-3-super-120b-a12b:free or gpt-5.1-mini timeout: 60 # Request timeout in seconds for API calls @@ -230,6 +230,39 @@ redis: cpu: 50m memory: 64Mi +# Optional bundled local LLM services (Ollama and/or llama.cpp) +# When enabled, point config.ai.openai_base_url to http://-ollama:8080/v1 +# or http://-llamacpp:8080/v1 +llm: + ollama: + enabled: false + image: + repository: ollama/ollama + tag: "latest" + pullPolicy: IfNotPresent + service: + port: 8080 + persistence: + enabled: true + size: 5Gi + accessMode: ReadWriteOnce + model: "qwen:1.8b" + pullModel: true + llamaCpp: + enabled: false + image: + repository: ghcr.io/ggml-org/llama.cpp + tag: "server" + pullPolicy: IfNotPresent + service: + port: 8080 + persistence: + enabled: true + size: 5Gi + accessMode: ReadWriteOnce + hfRepo: "Qwen/Qwen1.5-1.8B-Chat-GGUF" + hfFile: "qwen1_5-1_8b-chat-q4_k_m.gguf" + # SQLite -> PostgreSQL migration job settings migration: enabled: false diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml index ac06df2..14b5275 100644 --- a/kubernetes/krawl-all-in-one-deploy.yaml +++ b/kubernetes/krawl-all-in-one-deploy.yaml @@ -39,6 +39,150 @@ spec: - namespaceSelector: {} - ipBlock: cidr: 0.0.0.0/0 + + # Optional local Ollama deployment + # Uncomment and adjust the AI config to use: + # provider: "openai" + # openai_base_url: "http://krawl-ollama:8080/v1" + # model: "qwen:1.8b" + # apiVersion: v1 + # kind: PersistentVolumeClaim + # metadata: + # name: krawl-ollama + # namespace: krawl-system + # labels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/version: "2.1.0" + # app.kubernetes.io/component: llm + # spec: + # accessModes: + # - ReadWriteOnce + # resources: + # requests: + # storage: 5Gi + # --- + # apiVersion: v1 + # kind: Service + # metadata: + # name: krawl-ollama + # namespace: krawl-system + # labels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/version: "2.1.0" + # app.kubernetes.io/component: llm + # spec: + # type: ClusterIP + # ports: + # - name: http + # port: 8080 + # targetPort: http + # protocol: TCP + # selector: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/component: llm + # --- + # apiVersion: apps/v1 + # kind: Deployment + # metadata: + # name: krawl-ollama + # namespace: krawl-system + # labels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/version: "2.1.0" + # app.kubernetes.io/component: llm + # spec: + # replicas: 1 + # selector: + # matchLabels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/component: llm + # template: + # metadata: + # labels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/component: llm + # spec: + # containers: + # - name: ollama + # image: "ollama/ollama:latest" + # imagePullPolicy: IfNotPresent + # ports: + # - name: http + # containerPort: 8080 + # protocol: TCP + # env: + # - name: OLLAMA_HOST + # value: "0.0.0.0:8080" + # - name: OLLAMA_MODELS + # value: /models + # command: + # - /bin/sh + # - -c + # args: + # - | + # /bin/ollama serve & + # until /bin/ollama list >/dev/null 2>&1; do + # sleep 2 + # done + # /bin/ollama pull qwen:1.8b + # wait + # volumeMounts: + # - name: models + # mountPath: /models + # volumes: + # - name: models + # persistentVolumeClaim: + # claimName: krawl-ollama + # --- + # Optional local llama.cpp deployment + # Uncomment and adjust the AI config to use: + # provider: "openai" + # openai_base_url: "http://krawl-llamacpp:8080/v1" + # model: "qwen1.5-1.8b-chat" + # apiVersion: apps/v1 + # kind: Deployment + # metadata: + # name: krawl-llamacpp + # namespace: krawl-system + # labels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/version: "2.1.0" + # app.kubernetes.io/component: llm + # spec: + # replicas: 1 + # selector: + # matchLabels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/component: llm + # template: + # metadata: + # labels: + # app.kubernetes.io/name: krawl + # app.kubernetes.io/instance: krawl + # app.kubernetes.io/component: llm + # spec: + # containers: + # - name: llama-cpp + # image: "ghcr.io/ggml-org/llama.cpp:server" + # imagePullPolicy: IfNotPresent + # ports: + # - name: http + # containerPort: 8080 + # protocol: TCP + # command: + # - /bin/sh + # - -c + # args: + # - | + # /bin/llama-server --hf-repo Qwen/Qwen1.5-1.8B-Chat-GGUF --hf-file qwen1_5-1_8b-chat-q4_k_m.gguf --port 8080 --host 0.0.0.0 -n -1 --- # Source: krawl-chart/templates/configmap.yaml apiVersion: v1 @@ -94,7 +238,8 @@ data: ban_duration_seconds: 600 ai: enabled: false - provider: "openrouter" # "openrouter" or "openai" + provider: "openrouter" # "openrouter" or "openai". Use "openai" for OpenAI-compatible endpoints such as Ollama or llama.cpp. + openai_base_url: "https://api.openai.com/v1" # Set to your OpenAI-compatible endpoint, e.g. http://krawl-ollama:8080/v1 or http://krawl-llamacpp:8080/v1 model: null api_key: null timeout: 60 From 9d34ad935c0a2b952f75bafe6063bf4c77a678f0 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Sun, 24 May 2026 22:50:41 +0200 Subject: [PATCH 2/2] bumped version --- helm/Chart.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 776edbf..c96fac2 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 2.1.3 -appVersion: 2.1.3 +version: 2.1.4 +appVersion: 2.1.4 keywords: - honeypot - security