diff options
| author | Grail Finder <wohilas@gmail.com> | 2025-12-07 11:53:10 +0300 |
|---|---|---|
| committer | Grail Finder <wohilas@gmail.com> | 2025-12-07 13:22:07 +0300 |
| commit | 5bbb134aca390e415b93d8440f22207e44b9e4f8 (patch) | |
| tree | 8a91f69fda5b562fde9cbfbdb6321f0de7b26111 | |
| parent | 5582739e3c677ae7d397f4b4222cacb1c08a0df8 (diff) | |
Enha: docker-compose to not depend on locally downloaded models
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | README.md | 9 | ||||
| -rw-r--r-- | batteries/docker-compose.yml | 13 |
3 files changed, 22 insertions, 2 deletions
@@ -36,6 +36,7 @@ download-whisper-model: ## Download Whisper model for STT in batteries directory # Docker targets for STT/TTS services (in batteries directory) docker-up: ## Start Docker Compose services for STT and TTS from batteries directory @echo "Starting Docker services for STT (whisper) and TTS (kokoro)..." + @echo "Note: The Whisper model will be downloaded automatically inside the container on first run" docker-compose -f batteries/docker-compose.yml up -d @echo "Docker services started. STT available at http://localhost:8081, TTS available at http://localhost:8880" @@ -51,3 +52,4 @@ docker-logs: ## View logs from Docker services in batteries directory # Convenience target to setup everything setup-complete: setup-whisper docker-up @echo "Complete setup finished! STT and TTS services are running." + @echo "Note: Docker services will download the Whisper model automatically if not present." @@ -64,3 +64,12 @@ Ctrl+x: cycle through mentioned chars in chat, to pick persona to send next msg cp config.example.toml config.toml ``` set values as you need them to be. + +#### setting up STT/TTS services +For speech-to-text (STT) and text-to-speech (TTS) functionality: +1. The project uses Whisper.cpp for STT and Kokoro for TTS +2. Docker Compose automatically downloads the required Whisper model on first run +3. To start the services: `make docker-up` +4. To stop the services: `make docker-down` +5. The STT service runs on http://localhost:8081 +6. The TTS service runs on http://localhost:8880 diff --git a/batteries/docker-compose.yml b/batteries/docker-compose.yml index 50f207f..7cf401b 100644 --- a/batteries/docker-compose.yml +++ b/batteries/docker-compose.yml @@ -6,10 +6,17 @@ services: ports: - "8081:8081" volumes: - - ./whisper.cpp/models:/app/models + - whisper_models:/app/models working_dir: /app entrypoint: "" - command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo.bin", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"] + command: > + sh -c " + if [ ! -f /app/models/ggml-large-v3-turbo.bin ]; then + echo 'Downloading ggml-large-v3-turbo model...' + ./download-ggml-model.sh large-v3-turbo /app/models + fi && + ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo.bin -t 4 -p 1 --port 8081 --host 0.0.0.0 + " environment: - WHISPER_LOG_LEVEL=3 # Restart policy in case the service fails @@ -40,3 +47,5 @@ volumes: driver: local audio: driver: local + whisper_models: + driver: local |
