From 5582739e3c677ae7d397f4b4222cacb1c08a0df8 Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Sun, 7 Dec 2025 11:34:31 +0300 Subject: Feat: docker whisper-cuda --- Makefile | 6 +----- batteries/docker-compose.yml | 6 +++--- batteries/whisper.cpp | 1 + config.example.toml | 4 ++-- 4 files changed, 7 insertions(+), 10 deletions(-) create mode 160000 batteries/whisper.cpp diff --git a/Makefile b/Makefile index 8265dda..36686c2 100644 --- a/Makefile +++ b/Makefile @@ -22,8 +22,6 @@ build-whisper: ## Build whisper.cpp from source in batteries directory git clone https://github.com/ggml-org/whisper.cpp.git batteries/whisper.cpp; \ fi cd batteries/whisper.cpp && make build - @echo "Creating symlink to whisper-cli binary..." - @ln -sf batteries/whisper.cpp/build/bin/whisper-cli ./whisper-cli @echo "Whisper binary built successfully!" download-whisper-model: ## Download Whisper model for STT in batteries directory @@ -32,9 +30,7 @@ download-whisper-model: ## Download Whisper model for STT in batteries directory echo "Please run 'make setup-whisper' first to clone the repository."; \ exit 1; \ fi - @cd batteries/whisper.cpp && make tiny.en - @echo "Creating symlink to Whisper model..." - @ln -sf batteries/whisper.cpp/models/ggml-tiny.en.bin ./ggml-model.bin + @cd batteries/whisper.cpp && make large-v3-turbo @echo "Whisper model downloaded successfully!" # Docker targets for STT/TTS services (in batteries directory) diff --git a/batteries/docker-compose.yml b/batteries/docker-compose.yml index d29cbf4..50f207f 100644 --- a/batteries/docker-compose.yml +++ b/batteries/docker-compose.yml @@ -6,16 +6,16 @@ services: ports: - "8081:8081" volumes: - - ./models:/app/models - - ./audio:/app/audio + - ./whisper.cpp/models:/app/models working_dir: /app entrypoint: "" - command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo-q5_0", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"] + command: ["./build/bin/whisper-server", "-m", "/app/models/ggml-large-v3-turbo.bin", "-t", "4", "-p", "1", "--port", "8081", "--host", "0.0.0.0"] environment: - WHISPER_LOG_LEVEL=3 # Restart policy in case the service fails restart: unless-stopped + # Kokoro-FastAPI TTS service kokoro-tts: # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest diff --git a/batteries/whisper.cpp b/batteries/whisper.cpp new file mode 160000 index 0000000..a88b93f --- /dev/null +++ b/batteries/whisper.cpp @@ -0,0 +1 @@ +Subproject commit a88b93f85f08fc6045e5d8a8c3f94b7be0ac8bce diff --git a/config.example.toml b/config.example.toml index c09b3f1..70dc5e3 100644 --- a/config.example.toml +++ b/config.example.toml @@ -31,8 +31,8 @@ TTS_SPEED = 1.0 STT_ENABLED = false STT_TYPE = "WHISPER_SERVER" # WHISPER_SERVER or WHISPER_BINARY STT_URL = "http://localhost:8081/inference" -WhisperBinaryPath = "./whisper-cli" # Path to whisper binary (for WHISPER_BINARY mode) -WhisperModelPath = "./ggml-model.bin" # Path to whisper model file (for WHISPER_BINARY mode) +WhisperBinaryPath = "./batteries/whisper.cpp/whisper-cli" # Path to whisper binary (for WHISPER_BINARY mode) +WhisperModelPath = "./batteries/whisper.cpp/ggml-model.bin" # Path to whisper model file (for WHISPER_BINARY mode) STT_LANG = "en" # Language for speech recognition (for WHISPER_BINARY mode) STT_SR = 16000 # Sample rate for audio recording DBPATH = "gflt.db" -- cgit v1.2.3