services: bot: build: . environment: BOT_TOKEN: "" CHAT_ID: "" CHANNEL_ID: -1003290014225 OPENAI_BASE_URL: http://localhost:8080/v1 SYSTEM_PROMPT_PATH: /etc/sysprompt.txt MAX_CONCURRENT_REQUESTS: 2 volumes: - ./sysprompt.txt:/etc/sysprompt.txt:ro restart: unless-stopped llama-server: image: ghcr.io/ggml-org/llama.cpp:server container_name: llama-server ports: - "8080:8080" volumes: - ~/models:/models deploy: resources: limits: memory: 2g memswap_limit: 6g command: > -m /models/qwen25_15B.gguf --port 8080 --host 0.0.0.0 -n 512