From 5bbb134aca390e415b93d8440f22207e44b9e4f8 Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Sun, 7 Dec 2025 11:53:10 +0300 Subject: Enha: docker-compose to not depend on locally downloaded models --- Makefile | 2 ++ README.md | 9 +++++++++ batteries/docker-compose.yml | 13 +++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 36686c2..5ea51a2 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ download-whisper-model: ## Download Whisper model for STT in batteries directory # Docker targets for STT/TTS services (in batteries directory) docker-up: ## Start Docker Compose services for STT and TTS from batteries directory @echo "Starting Docker services for STT (whisper) and TTS (kokoro)..." + @echo "Note: The Whisper model will be downloaded automatically inside the container on first run" docker-compose -f batteries/docker-compose.yml up -d @echo "Docker services started. STT available at http://localhost:8081, TTS available at http://localhost:8880" @@ -51,3 +52,4 @@ docker-logs: ## View logs from Docker services in batteries directory # Convenience target to setup everything setup-complete: setup-whisper docker-up @echo "Complete setup finished! STT and TTS services are running." + @echo "Note: Docker services will download the Whisper model automatically if not present." diff --git a/README.md b/README.md index 0a6f629..9891e20 100644 --- a/README.md +++ b/README.md @@ -64,3 +64,12 @@ Ctrl+x: cycle through mentioned chars in chat, to pick persona to send next msg cp config.example.toml config.toml ``` set values as you need them to be. + +#### setting up STT/TTS services +For speech-to-text (STT) and text-to-speech (TTS) functionality: +1. The project uses Whisper.cpp for STT and Kokoro for TTS +2. Docker Compose automatically downloads the required Whisper model on first run +3. To start the services: `make docker-up` +4. To stop the services: `make docker-down` +5. The STT service runs on http://localhost:8081 +6. The TTS service runs on http://localhost:8880 diff --git a/batteries/docker-compose.yml b/batteries/docker-compose.yml index 50f207f..7cf401b 100644 --- a/batteries/docker-compose.yml +++ b/batteries/docker-compose.yml @@ -6,10 +6,17 @@ services: ports: - "8081:8081" volumes: - - ./whisper.cpp/models:/app/models + - whisper_models:/app/models working_dir: /app entrypoint: "" - command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo.bin", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"] + command: > + sh -c " + if [ ! -f /app/models/ggml-large-v3-turbo.bin ]; then + echo 'Downloading ggml-large-v3-turbo model...' + ./download-ggml-model.sh large-v3-turbo /app/models + fi && + ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo.bin -t 4 -p 1 --port 8081 --host 0.0.0.0 + " environment: - WHISPER_LOG_LEVEL=3 # Restart policy in case the service fails @@ -40,3 +47,5 @@ volumes: driver: local audio: driver: local + whisper_models: + driver: local -- cgit v1.2.3