Enha: docker-compose to not depend on locally downloaded models

author: Grail Finder <wohilas@gmail.com> 2025-12-07 11:53:10 +0300
committer: Grail Finder <wohilas@gmail.com> 2025-12-07 13:22:07 +0300
commit: 5bbb134aca390e415b93d8440f22207e44b9e4f8 (patch)
tree: 8a91f69fda5b562fde9cbfbdb6321f0de7b26111
parent: 5582739e3c677ae7d397f4b4222cacb1c08a0df8 (diff)
3 files changed, 22 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 36686c2..5ea51a2 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,7 @@ download-whisper-model: ## Download Whisper model for STT in batteries directory
 # Docker targets for STT/TTS services (in batteries directory)
 docker-up: ## Start Docker Compose services for STT and TTS from batteries directory
 	@echo "Starting Docker services for STT (whisper) and TTS (kokoro)..."
+	@echo "Note: The Whisper model will be downloaded automatically inside the container on first run"
 	docker-compose -f batteries/docker-compose.yml up -d
 	@echo "Docker services started. STT available at http://localhost:8081, TTS available at http://localhost:8880"
 
@@ -51,3 +52,4 @@ docker-logs: ## View logs from Docker services in batteries directory
 # Convenience target to setup everything
 setup-complete: setup-whisper docker-up
 	@echo "Complete setup finished! STT and TTS services are running."
+	@echo "Note: Docker services will download the Whisper model automatically if not present."
diff --git a/README.md b/README.md
index 0a6f629..9891e20 100644
--- a/README.md
+++ b/README.md
@@ -64,3 +64,12 @@ Ctrl+x: cycle through mentioned chars in chat, to pick persona to send next msg
 cp config.example.toml config.toml
 ```
 set values as you need them to be.
+
+#### setting up STT/TTS services
+For speech-to-text (STT) and text-to-speech (TTS) functionality:
+1. The project uses Whisper.cpp for STT and Kokoro for TTS
+2. Docker Compose automatically downloads the required Whisper model on first run
+3. To start the services: `make docker-up`
+4. To stop the services: `make docker-down`
+5. The STT service runs on http://localhost:8081
+6. The TTS service runs on http://localhost:8880
diff --git a/batteries/docker-compose.yml b/batteries/docker-compose.yml
index 50f207f..7cf401b 100644
--- a/batteries/docker-compose.yml
+++ b/batteries/docker-compose.yml
@@ -6,10 +6,17 @@ services:
     ports:
       - "8081:8081"
     volumes:
-      - ./whisper.cpp/models:/app/models
+      - whisper_models:/app/models
     working_dir: /app
     entrypoint: ""
-    command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo.bin", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"]
+    command: >
+      sh -c "
+      if [ ! -f /app/models/ggml-large-v3-turbo.bin ]; then
+        echo 'Downloading ggml-large-v3-turbo model...'
+        ./download-ggml-model.sh large-v3-turbo /app/models
+      fi &&
+      ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo.bin -t 4 -p 1 --port 8081 --host 0.0.0.0
+      "
     environment:
       - WHISPER_LOG_LEVEL=3
     # Restart policy in case the service fails
@@ -40,3 +47,5 @@ volumes:
     driver: local
   audio:
     driver: local
+  whisper_models:
+    driver: local
author	Grail Finder <wohilas@gmail.com>	2025-12-07 11:53:10 +0300
committer	Grail Finder <wohilas@gmail.com>	2025-12-07 13:22:07 +0300
commit	5bbb134aca390e415b93d8440f22207e44b9e4f8 (patch)
tree	8a91f69fda5b562fde9cbfbdb6321f0de7b26111
parent	5582739e3c677ae7d397f4b4222cacb1c08a0df8 (diff)