services: # Whisper.cpp STT service whisper: image: ghcr.io/ggml-org/whisper.cpp:main-cuda container_name: whisper-stt ports: - "8081:8081" volumes: - ./models:/app/models - ./audio:/app/audio working_dir: /app entrypoint: "" command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo-q5_0", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"] environment: - WHISPER_LOG_LEVEL=3 # Restart policy in case the service fails restart: unless-stopped # Kokoro-FastAPI TTS service kokoro-tts: # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest image: ghcr.io/remsky/kokoro-fastapi-gpu:latest container_name: kokoro-tts ports: - "8880:8880" environment: - API_LOG_LEVEL=INFO # For GPU support, uncomment the following lines: deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] restart: unless-stopped volumes: models: driver: local audio: driver: local