grok/docker-compose.yml

services:
  bot:
    build: .
    environment:
      BOT_TOKEN: ""
      CHAT_ID: ""
      CHANNEL_ID: -1003290014225
      OPENAI_BASE_URL: http://localhost:8080/v1
      SYSTEM_PROMPT_PATH: /etc/sysprompt.txt
      MAX_CONCURRENT_REQUESTS: 2
    volumes:
      - ./sysprompt.txt:/etc/sysprompt.txt:ro
    restart: unless-stopped
  llama-server:
    image: ghcr.io/ggml-org/llama.cpp:server
    container_name: llama-server
    ports:
      - "8080:8080"
    volumes:
      - ~/models:/models
    deploy:
      resources:
        limits:
          memory: 2g
    memswap_limit: 6g
    command: >
      -m /models/qwen25_15B.gguf
      --port 8080
      --host 0.0.0.0
      -n 512