Feat: docker whisper-cuda

author: Grail Finder <wohilas@gmail.com> 2025-12-07 11:34:31 +0300
committer: Grail Finder <wohilas@gmail.com> 2025-12-07 13:22:07 +0300
commit: 5582739e3c677ae7d397f4b4222cacb1c08a0df8 (patch)
tree: 3e06cefee2c3cbb5398126f841b67d8f50a2121c
parent: 58c4a4ad8cf199c4f59a225d96fafd8da49a12ed (diff)
4 files changed, 6 insertions, 10 deletions
diff --git a/Makefile b/Makefile
index 8265dda..36686c2 100644
--- a/Makefile
+++ b/Makefile
@@ -22,8 +22,6 @@ build-whisper: ## Build whisper.cpp from source in batteries directory
 		git clone https://github.com/ggml-org/whisper.cpp.git batteries/whisper.cpp; \
 	fi
 	cd batteries/whisper.cpp && make build
-	@echo "Creating symlink to whisper-cli binary..."
-	@ln -sf batteries/whisper.cpp/build/bin/whisper-cli ./whisper-cli
 	@echo "Whisper binary built successfully!"
 
 download-whisper-model: ## Download Whisper model for STT in batteries directory
@@ -32,9 +30,7 @@ download-whisper-model: ## Download Whisper model for STT in batteries directory
 		echo "Please run 'make setup-whisper' first to clone the repository."; \
 		exit 1; \
 	fi
-	@cd batteries/whisper.cpp && make tiny.en
-	@echo "Creating symlink to Whisper model..."
-	@ln -sf batteries/whisper.cpp/models/ggml-tiny.en.bin ./ggml-model.bin
+	@cd batteries/whisper.cpp && make large-v3-turbo
 	@echo "Whisper model downloaded successfully!"
 
 # Docker targets for STT/TTS services (in batteries directory)
diff --git a/batteries/docker-compose.yml b/batteries/docker-compose.yml
index d29cbf4..50f207f 100644
--- a/batteries/docker-compose.yml
+++ b/batteries/docker-compose.yml
@@ -6,16 +6,16 @@ services:
     ports:
       - "8081:8081"
     volumes:
-      - ./models:/app/models
-      - ./audio:/app/audio
+      - ./whisper.cpp/models:/app/models
     working_dir: /app
     entrypoint: ""
-    command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo-q5_0", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"]
+    command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo.bin", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"]
     environment:
       - WHISPER_LOG_LEVEL=3
     # Restart policy in case the service fails
     restart: unless-stopped
 
+
   # Kokoro-FastAPI TTS service
   kokoro-tts:
     # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
diff --git a/batteries/whisper.cpp b/batteries/whisper.cpp
new file mode 160000
+Subproject a88b93f85f08fc6045e5d8a8c3f94b7be0ac8bc
diff --git a/config.example.toml b/config.example.toml
index c09b3f1..70dc5e3 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -31,8 +31,8 @@ TTS_SPEED = 1.0
 STT_ENABLED = false
 STT_TYPE = "WHISPER_SERVER" # WHISPER_SERVER or WHISPER_BINARY
 STT_URL = "http://localhost:8081/inference"
-WhisperBinaryPath = "./whisper-cli"  # Path to whisper binary (for WHISPER_BINARY mode)
-WhisperModelPath = "./ggml-model.bin"  # Path to whisper model file (for WHISPER_BINARY mode)
+WhisperBinaryPath = "./batteries/whisper.cpp/whisper-cli"  # Path to whisper binary (for WHISPER_BINARY mode)
+WhisperModelPath = "./batteries/whisper.cpp/ggml-model.bin"  # Path to whisper model file (for WHISPER_BINARY mode)
 STT_LANG = "en"  # Language for speech recognition (for WHISPER_BINARY mode)
 STT_SR = 16000  # Sample rate for audio recording
 DBPATH = "gflt.db"
author	Grail Finder <wohilas@gmail.com>	2025-12-07 11:34:31 +0300
committer	Grail Finder <wohilas@gmail.com>	2025-12-07 13:22:07 +0300
commit	5582739e3c677ae7d397f4b4222cacb1c08a0df8 (patch)
tree	3e06cefee2c3cbb5398126f841b67d8f50a2121c
parent	58c4a4ad8cf199c4f59a225d96fafd8da49a12ed (diff)