services: bot: build: . environment: BOT_TOKEN: "" CHAT_ID: "" OPENAI_BASE_URL: "http://localhost:8080/v1" SYSTEM_PROMPT_PATH: /etc/sysprompt.txt volumes: - ./sysprompt.txt:/etc/sysprompt.txt:ro restart: unless-stopped llama-server: image: ghcr.io/ggml-org/llama.cpp:server container_name: llama-server ports: - "8080:8080" volumes: - ~/models:/models deploy: resources: limits: memory: 2g memswap_limit: 6g command: > -m /models/qwen25_15B.gguf --port 8080 --host 0.0.0.0 -n 512