services: # Whisper.cpp STT service whisper: image: ghcr.io/ggml-org/whisper.cpp:main-cuda container_name: whisper-stt ports: - "8081:8081" volumes: - whisper_models:/app/models working_dir: /app entrypoint: "" command: > sh -c " if [ ! -f /app/models/ggml-large-v3-turbo.bin ]; then echo 'Downloading ggml-large-v3-turbo model...' ./download-ggml-model.sh large-v3-turbo /app/models fi && ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo.bin -t 4 -p 1 --port 8081 --host 0.0.0.0 " environment: - WHISPER_LOG_LEVEL=3 # Restart policy in case the service fails restart: unless-stopped # Kokoro-FastAPI TTS service kokoro-tts: # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest image: ghcr.io/remsky/kokoro-fastapi-gpu:latest container_name: kokoro-tts ports: - "8880:8880" environment: - API_LOG_LEVEL=INFO # For GPU support, uncomment the following lines: deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] restart: unless-stopped volumes: models: driver: local audio: driver: local whisper_models: driver: local