services:
  # Whisper.cpp STT service
  whisper:
    image: ghcr.io/ggml-org/whisper.cpp:main-cuda
    container_name: whisper-stt
    ports:
      - "8081:8081"
    volumes:
      - ./models:/app/models
      - ./audio:/app/audio
    working_dir: /app
    entrypoint: ""
    command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo-q5_0", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"]
    environment:
      - WHISPER_LOG_LEVEL=3
    # Restart policy in case the service fails
    restart: unless-stopped

  # Kokoro-FastAPI TTS service
  kokoro-tts:
    # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
    image: ghcr.io/remsky/kokoro-fastapi-gpu:latest
    container_name: kokoro-tts
    ports:
      - "8880:8880"
    environment:
      - API_LOG_LEVEL=INFO
    # For GPU support, uncomment the following lines:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped

volumes:
  models:
    driver: local
  audio:
    driver: local