summaryrefslogtreecommitdiff
path: root/extra
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2026-03-07 18:13:11 +0300
committerGrail Finder <wohilas@gmail.com>2026-03-07 18:13:11 +0300
commitc8f00198d6f0ad66269753252f56485ee346d413 (patch)
treefd6cbfe88718d19dad1d4765397329da473822fb /extra
parentc5a24b2a3f30fe60888702b09e409647616c18d0 (diff)
Dep (stt): use ffmpeg instead of portaudio
Diffstat (limited to 'extra')
-rw-r--r--extra/stt.go132
-rw-r--r--extra/whisper_binary.go382
-rw-r--r--extra/whisper_server.go156
3 files changed, 272 insertions, 398 deletions
diff --git a/extra/stt.go b/extra/stt.go
index 86fcf9c..7bbf2fd 100644
--- a/extra/stt.go
+++ b/extra/stt.go
@@ -6,18 +6,10 @@ package extra
import (
"bytes"
"encoding/binary"
- "errors"
- "fmt"
"gf-lt/config"
"io"
"log/slog"
- "mime/multipart"
- "net/http"
"regexp"
- "strings"
- "syscall"
-
- "github.com/gordonklaus/portaudio"
)
var specialRE = regexp.MustCompile(`\[.*?\]`)
@@ -44,14 +36,6 @@ func NewSTT(logger *slog.Logger, cfg *config.Config) STT {
return NewWhisperServer(logger, cfg)
}
-type WhisperServer struct {
- logger *slog.Logger
- ServerURL string
- SampleRate int
- AudioBuffer *bytes.Buffer
- recording bool
-}
-
func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
return &WhisperServer{
logger: logger,
@@ -61,69 +45,6 @@ func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
}
}
-func (stt *WhisperServer) StartRecording() error {
- if err := stt.microphoneStream(stt.SampleRate); err != nil {
- return fmt.Errorf("failed to init microphone: %w", err)
- }
- stt.recording = true
- return nil
-}
-
-func (stt *WhisperServer) StopRecording() (string, error) {
- stt.recording = false
- // wait loop to finish?
- if stt.AudioBuffer == nil {
- err := errors.New("unexpected nil AudioBuffer")
- stt.logger.Error(err.Error())
- return "", err
- }
- // Create WAV header first
- body := &bytes.Buffer{}
- writer := multipart.NewWriter(body)
- // Add audio file part
- part, err := writer.CreateFormFile("file", "recording.wav")
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- // Stream directly to multipart writer: header + raw data
- dataSize := stt.AudioBuffer.Len()
- stt.writeWavHeader(part, dataSize)
- if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- // Reset buffer for next recording
- stt.AudioBuffer.Reset()
- // Add response format field
- err = writer.WriteField("response_format", "text")
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- if writer.Close() != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- // Send request
- resp, err := http.Post(stt.ServerURL, writer.FormDataContentType(), body) //nolint:noctx
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- defer resp.Body.Close()
- // Read and print response
- responseTextBytes, err := io.ReadAll(resp.Body)
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- resptext := strings.TrimRight(string(responseTextBytes), "\n")
- // in case there are special tokens like [_BEG_]
- resptext = specialRE.ReplaceAllString(resptext, "")
- return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
-}
-
func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
header := make([]byte, 44)
copy(header[0:4], "RIFF")
@@ -147,56 +68,3 @@ func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
func (stt *WhisperServer) IsRecording() bool {
return stt.recording
}
-
-func (stt *WhisperServer) microphoneStream(sampleRate int) error {
- // Temporarily redirect stderr to suppress ALSA warnings during PortAudio init
- origStderr, errDup := syscall.Dup(syscall.Stderr)
- if errDup != nil {
- return fmt.Errorf("failed to dup stderr: %w", errDup)
- }
- nullFD, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
- if err != nil {
- _ = syscall.Close(origStderr) // Close the dup'd fd if open fails
- return fmt.Errorf("failed to open /dev/null: %w", err)
- }
- // redirect stderr
- _ = syscall.Dup2(nullFD, syscall.Stderr)
- // Initialize PortAudio (this is where ALSA warnings occur)
- defer func() {
- // Restore stderr
- _ = syscall.Dup2(origStderr, syscall.Stderr)
- _ = syscall.Close(origStderr)
- _ = syscall.Close(nullFD)
- }()
- if err := portaudio.Initialize(); err != nil {
- return fmt.Errorf("portaudio init failed: %w", err)
- }
- in := make([]int16, 64)
- stream, err := portaudio.OpenDefaultStream(1, 0, float64(sampleRate), len(in), in)
- if err != nil {
- if paErr := portaudio.Terminate(); paErr != nil {
- return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
- }
- return fmt.Errorf("failed to open microphone: %w", err)
- }
- go func(stream *portaudio.Stream) {
- if err := stream.Start(); err != nil {
- stt.logger.Error("microphoneStream", "error", err)
- return
- }
- for {
- if !stt.IsRecording() {
- return
- }
- if err := stream.Read(); err != nil {
- stt.logger.Error("reading stream", "error", err)
- return
- }
- if err := binary.Write(stt.AudioBuffer, binary.LittleEndian, in); err != nil {
- stt.logger.Error("writing to buffer", "error", err)
- return
- }
- }
- }(stream)
- return nil
-}
diff --git a/extra/whisper_binary.go b/extra/whisper_binary.go
index 6b7ddc8..1c35952 100644
--- a/extra/whisper_binary.go
+++ b/extra/whisper_binary.go
@@ -9,15 +9,13 @@ import (
"errors"
"fmt"
"gf-lt/config"
- "io"
"log/slog"
"os"
"os/exec"
"strings"
"sync"
"syscall"
-
- "github.com/gordonklaus/portaudio"
+ "time"
)
type WhisperBinary struct {
@@ -25,24 +23,14 @@ type WhisperBinary struct {
whisperPath string
modelPath string
lang string
- ctx context.Context
- cancel context.CancelFunc
- mu sync.Mutex
- recording bool
- audioBuffer []int16
-}
-
-func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
- ctx, cancel := context.WithCancel(context.Background())
- // Set ALSA error handler first
- return &WhisperBinary{
- logger: logger,
- whisperPath: cfg.WhisperBinaryPath,
- modelPath: cfg.WhisperModelPath,
- lang: cfg.STT_LANG,
- ctx: ctx,
- cancel: cancel,
- }
+ // Per-recording fields (protected by mu)
+ mu sync.Mutex
+ recording bool
+ tempFile string
+ ctx context.Context
+ cancel context.CancelFunc
+ cmd *exec.Cmd
+ cmdMu sync.Mutex
}
func (w *WhisperBinary) StartRecording() error {
@@ -51,276 +39,138 @@ func (w *WhisperBinary) StartRecording() error {
if w.recording {
return errors.New("recording is already in progress")
}
- // If context is cancelled, create a new one for the next recording session
- if w.ctx.Err() != nil {
- w.logger.Debug("Context cancelled, creating new context")
- w.ctx, w.cancel = context.WithCancel(context.Background())
- }
- // Temporarily redirect stderr to suppress ALSA warnings during PortAudio init
- origStderr, errDup := syscall.Dup(syscall.Stderr)
- if errDup != nil {
- return fmt.Errorf("failed to dup stderr: %w", errDup)
- }
- nullFD, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
+ // Fresh context for this recording
+ ctx, cancel := context.WithCancel(context.Background())
+ w.ctx = ctx
+ w.cancel = cancel
+ // Create temporary file
+ tempFile, err := os.CreateTemp("", "recording_*.wav")
if err != nil {
- _ = syscall.Close(origStderr) // Close the dup'd fd if open fails
- return fmt.Errorf("failed to open /dev/null: %w", err)
- }
- // redirect stderr
- _ = syscall.Dup2(nullFD, syscall.Stderr)
- // Initialize PortAudio (this is where ALSA warnings occur)
- portaudioErr := portaudio.Initialize()
- defer func() {
- // Restore stderr
- _ = syscall.Dup2(origStderr, syscall.Stderr)
- _ = syscall.Close(origStderr)
- _ = syscall.Close(nullFD)
- }()
- if portaudioErr != nil {
- return fmt.Errorf("portaudio init failed: %w", portaudioErr)
- }
- // Initialize audio buffer
- w.audioBuffer = make([]int16, 0)
- in := make([]int16, 1024) // buffer size
- stream, err := portaudio.OpenDefaultStream(1, 0, 16000.0, len(in), in)
+ cancel()
+ return fmt.Errorf("failed to create temp file: %w", err)
+ }
+ tempFile.Close()
+ w.tempFile = tempFile.Name()
+ // ffmpeg command: capture from default microphone, write WAV
+ args := []string{
+ "-f", "alsa", // or "pulse" if preferred
+ "-i", "default",
+ "-acodec", "pcm_s16le",
+ "-ar", "16000",
+ "-ac", "1",
+ "-y", // overwrite output file
+ w.tempFile,
+ }
+ cmd := exec.CommandContext(w.ctx, "ffmpeg", args...)
+ // Capture stderr for debugging (optional, but useful for diagnosing)
+ stderr, err := cmd.StderrPipe()
if err != nil {
- if paErr := portaudio.Terminate(); paErr != nil {
- return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
- }
- return fmt.Errorf("failed to open microphone: %w", err)
- }
- go w.recordAudio(stream, in)
- w.recording = true
- w.logger.Debug("Recording started")
- return nil
-}
-
-func (w *WhisperBinary) recordAudio(stream *portaudio.Stream, in []int16) {
- defer func() {
- w.logger.Debug("recordAudio defer function called")
- _ = stream.Stop() // Stop the stream
- _ = portaudio.Terminate() // ignoring error as we're shutting down
- w.logger.Debug("recordAudio terminated")
- }()
- w.logger.Debug("Starting audio stream")
- if err := stream.Start(); err != nil {
- w.logger.Error("Failed to start audio stream", "error", err)
- return
- }
- w.logger.Debug("Audio stream started, entering recording loop")
- for {
- select {
- case <-w.ctx.Done():
- w.logger.Debug("Context done, exiting recording loop")
- return
- default:
- // Check recording status with minimal lock time
- w.mu.Lock()
- recording := w.recording
- w.mu.Unlock()
-
- if !recording {
- w.logger.Debug("Recording flag is false, exiting recording loop")
- return
+ cancel()
+ os.Remove(w.tempFile)
+ return fmt.Errorf("failed to create stderr pipe: %w", err)
+ }
+ go func() {
+ buf := make([]byte, 1024)
+ for {
+ n, err := stderr.Read(buf)
+ if n > 0 {
+ w.logger.Debug("ffmpeg stderr", "output", string(buf[:n]))
}
- if err := stream.Read(); err != nil {
- w.logger.Error("Error reading from stream", "error", err)
- return
+ if err != nil {
+ break
}
- // Append samples to buffer - only acquire lock when necessary
- w.mu.Lock()
- if w.audioBuffer == nil {
- w.audioBuffer = make([]int16, 0)
- }
- // Make a copy of the input buffer to avoid overwriting
- tempBuffer := make([]int16, len(in))
- copy(tempBuffer, in)
- w.audioBuffer = append(w.audioBuffer, tempBuffer...)
- w.mu.Unlock()
}
+ }()
+ w.cmdMu.Lock()
+ w.cmd = cmd
+ w.cmdMu.Unlock()
+ if err := cmd.Start(); err != nil {
+ cancel()
+ os.Remove(w.tempFile)
+ return fmt.Errorf("failed to start ffmpeg: %w", err)
}
+ w.recording = true
+ w.logger.Debug("Recording started", "file", w.tempFile)
+ return nil
}
func (w *WhisperBinary) StopRecording() (string, error) {
- w.logger.Debug("StopRecording called")
w.mu.Lock()
+ defer w.mu.Unlock()
if !w.recording {
- w.mu.Unlock()
return "", errors.New("not currently recording")
}
- w.logger.Debug("Setting recording to false and cancelling context")
w.recording = false
- w.cancel() // This will stop the recording goroutine
- w.mu.Unlock()
- // // Small delay to allow the recording goroutine to react to context cancellation
- // time.Sleep(20 * time.Millisecond)
- // Save the recorded audio to a temporary file
- tempFile, err := w.saveAudioToTempFile()
- if err != nil {
- w.logger.Error("Error saving audio to temp file", "error", err)
- return "", fmt.Errorf("failed to save audio to temp file: %w", err)
- }
- w.logger.Debug("Saved audio to temp file", "file", tempFile)
- // Run the whisper binary with a separate context to avoid cancellation during transcription
- cmd := exec.Command(w.whisperPath, "-m", w.modelPath, "-l", w.lang, tempFile, "2>/dev/null")
- var outBuf bytes.Buffer
- cmd.Stdout = &outBuf
- // Redirect stderr to suppress ALSA warnings and other stderr output
- cmd.Stderr = io.Discard // Suppress stderr output from whisper binary
- w.logger.Debug("Running whisper binary command")
- if err := cmd.Run(); err != nil {
- // Clean up audio buffer
- w.mu.Lock()
- w.audioBuffer = nil
- w.mu.Unlock()
- // Since we're suppressing stderr, we'll just log that the command failed
- w.logger.Error("Error running whisper binary", "error", err)
- return "", fmt.Errorf("whisper binary failed: %w", err)
+ // Gracefully stop ffmpeg
+ w.cmdMu.Lock()
+ if w.cmd != nil && w.cmd.Process != nil {
+ w.logger.Debug("Sending SIGTERM to ffmpeg")
+ w.cmd.Process.Signal(syscall.SIGTERM)
+ // Wait for process to exit (up to 2 seconds)
+ done := make(chan error, 1)
+ go func() {
+ done <- w.cmd.Wait()
+ }()
+ select {
+ case <-done:
+ w.logger.Debug("ffmpeg exited after SIGTERM")
+ case <-time.After(2 * time.Second):
+ w.logger.Warn("ffmpeg did not exit, sending SIGKILL")
+ w.cmd.Process.Kill()
+ <-done
+ }
}
- result := outBuf.String()
- w.logger.Debug("Whisper binary completed", "result", result)
- // Clean up audio buffer
- w.mu.Lock()
- w.audioBuffer = nil
- w.mu.Unlock()
- // Clean up the temporary file after transcription
- w.logger.Debug("StopRecording completed")
- os.Remove(tempFile)
- result = strings.TrimRight(result, "\n")
- // in case there are special tokens like [_BEG_]
- result = specialRE.ReplaceAllString(result, "")
- return strings.TrimSpace(strings.ReplaceAll(result, "\n ", "\n")), nil
-}
-
-// saveAudioToTempFile saves the recorded audio data to a temporary WAV file
-func (w *WhisperBinary) saveAudioToTempFile() (string, error) {
- w.logger.Debug("saveAudioToTempFile called")
- // Create temporary WAV file
- tempFile, err := os.CreateTemp("", "recording_*.wav")
- if err != nil {
- w.logger.Error("Failed to create temp file", "error", err)
- return "", fmt.Errorf("failed to create temp file: %w", err)
+ w.cmdMu.Unlock()
+ // Cancel context (already done, but for cleanliness)
+ if w.cancel != nil {
+ w.cancel()
}
- w.logger.Debug("Created temp file", "file", tempFile.Name())
- defer tempFile.Close()
-
- // Write WAV header and data
- w.logger.Debug("About to write WAV file", "file", tempFile.Name())
- err = w.writeWAVFile(tempFile.Name())
- if err != nil {
- w.logger.Error("Error writing WAV file", "error", err)
- return "", fmt.Errorf("failed to write WAV file: %w", err)
+ // Validate temp file
+ if w.tempFile == "" {
+ return "", errors.New("no recording file")
}
- w.logger.Debug("WAV file written successfully", "file", tempFile.Name())
-
- return tempFile.Name(), nil
-}
-
-// writeWAVFile creates a WAV file from the recorded audio data
-func (w *WhisperBinary) writeWAVFile(filename string) error {
- w.logger.Debug("writeWAVFile called", "filename", filename)
- // Open file for writing
- file, err := os.Create(filename)
+ defer os.Remove(w.tempFile)
+ info, err := os.Stat(w.tempFile)
if err != nil {
- w.logger.Error("Error creating file", "error", err)
- return err
+ return "", fmt.Errorf("failed to stat temp file: %w", err)
}
- defer file.Close()
-
- w.logger.Debug("About to acquire mutex in writeWAVFile")
- w.mu.Lock()
- w.logger.Debug("Locked mutex, copying audio buffer")
- audioData := make([]int16, len(w.audioBuffer))
- copy(audioData, w.audioBuffer)
- w.mu.Unlock()
- w.logger.Debug("Unlocked mutex", "audio_data_length", len(audioData))
-
- if len(audioData) == 0 {
- w.logger.Warn("No audio data to write")
- return errors.New("no audio data to write")
+ if info.Size() < 44 { // WAV header is 44 bytes
+ // Log ffmpeg stderr? Already captured in debug logs.
+ return "", fmt.Errorf("recording file too small (%d bytes), possibly no audio captured", info.Size())
}
-
- // Calculate data size (number of samples * size of int16)
- dataSize := len(audioData) * 2 // 2 bytes per int16 sample
- w.logger.Debug("Calculated data size", "size", dataSize)
-
- // Write WAV header with the correct data size
- header := w.createWAVHeader(16000, 1, 16, dataSize)
- _, err = file.Write(header)
- if err != nil {
- w.logger.Error("Error writing WAV header", "error", err)
- return err
- }
- w.logger.Debug("WAV header written successfully")
-
- // Write audio data
- w.logger.Debug("About to write audio data samples")
- for i, sample := range audioData {
- // Write little-endian 16-bit sample
- _, err := file.Write([]byte{byte(sample), byte(sample >> 8)})
- if err != nil {
- w.logger.Error("Error writing sample", "index", i, "error", err)
- return err
- }
- // Log progress every 10000 samples to avoid too much output
- if i%10000 == 0 {
- w.logger.Debug("Written samples", "count", i)
- }
+ // Run whisper.cpp binary
+ cmd := exec.Command(w.whisperPath, "-m", w.modelPath, "-l", w.lang, w.tempFile)
+ var outBuf, errBuf bytes.Buffer
+ cmd.Stdout = &outBuf
+ cmd.Stderr = &errBuf
+ if err := cmd.Run(); err != nil {
+ w.logger.Error("whisper binary failed",
+ "error", err,
+ "stderr", errBuf.String(),
+ "file_size", info.Size())
+ return "", fmt.Errorf("whisper binary failed: %w (stderr: %s)", err, errBuf.String())
}
- w.logger.Debug("All audio data written successfully")
-
- return nil
-}
-
-// createWAVHeader creates a WAV file header
-func (w *WhisperBinary) createWAVHeader(sampleRate, channels, bitsPerSample int, dataSize int) []byte {
- header := make([]byte, 44)
- copy(header[0:4], "RIFF")
- // Total file size will be updated later
- copy(header[8:12], "WAVE")
- copy(header[12:16], "fmt ")
- // fmt chunk size (16 for PCM)
- header[16] = 16
- header[17] = 0
- header[18] = 0
- header[19] = 0
- // Audio format (1 = PCM)
- header[20] = 1
- header[21] = 0
- // Number of channels
- header[22] = byte(channels)
- header[23] = 0
- // Sample rate
- header[24] = byte(sampleRate)
- header[25] = byte(sampleRate >> 8)
- header[26] = byte(sampleRate >> 16)
- header[27] = byte(sampleRate >> 24)
- // Byte rate
- byteRate := sampleRate * channels * bitsPerSample / 8
- header[28] = byte(byteRate)
- header[29] = byte(byteRate >> 8)
- header[30] = byte(byteRate >> 16)
- header[31] = byte(byteRate >> 24)
- // Block align
- blockAlign := channels * bitsPerSample / 8
- header[32] = byte(blockAlign)
- header[33] = 0
- // Bits per sample
- header[34] = byte(bitsPerSample)
- header[35] = 0
- // "data" subchunk
- copy(header[36:40], "data")
- // Data size
- header[40] = byte(dataSize)
- header[41] = byte(dataSize >> 8)
- header[42] = byte(dataSize >> 16)
- header[43] = byte(dataSize >> 24)
-
- return header
+ result := strings.TrimRight(outBuf.String(), "\n")
+ result = specialRE.ReplaceAllString(result, "")
+ return strings.TrimSpace(strings.ReplaceAll(result, "\n ", "\n")), nil
}
+// IsRecording returns true if a recording is in progress.
func (w *WhisperBinary) IsRecording() bool {
w.mu.Lock()
defer w.mu.Unlock()
return w.recording
}
+
+func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
+ ctx, cancel := context.WithCancel(context.Background())
+ // Set ALSA error handler first
+ return &WhisperBinary{
+ logger: logger,
+ whisperPath: cfg.WhisperBinaryPath,
+ modelPath: cfg.WhisperModelPath,
+ lang: cfg.STT_LANG,
+ ctx: ctx,
+ cancel: cancel,
+ }
+}
diff --git a/extra/whisper_server.go b/extra/whisper_server.go
new file mode 100644
index 0000000..7532f4a
--- /dev/null
+++ b/extra/whisper_server.go
@@ -0,0 +1,156 @@
+//go:build extra
+// +build extra
+
+package extra
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "log/slog"
+ "mime/multipart"
+ "net/http"
+ "os/exec"
+ "strings"
+ "sync"
+)
+
+type WhisperServer struct {
+ logger *slog.Logger
+ ServerURL string
+ SampleRate int
+ AudioBuffer *bytes.Buffer
+ recording bool // protected by mu
+ mu sync.Mutex // protects recording & AudioBuffer
+ cmd *exec.Cmd // protected by cmdMu
+ stopCh chan struct{} // protected by cmdMu
+ cmdMu sync.Mutex // protects cmd and stopCh
+}
+
+func (stt *WhisperServer) StartRecording() error {
+ stt.mu.Lock()
+ defer stt.mu.Unlock()
+ if stt.recording {
+ return nil
+ }
+ // Build ffmpeg command for microphone capture
+ args := []string{
+ "-f", "alsa",
+ "-i", "default",
+ "-acodec", "pcm_s16le",
+ "-ar", fmt.Sprint(stt.SampleRate),
+ "-ac", "1",
+ "-f", "s16le",
+ "-",
+ }
+ cmd := exec.Command("ffmpeg", args...)
+ stdout, err := cmd.StdoutPipe()
+ if err != nil {
+ return fmt.Errorf("failed to get stdout pipe: %w", err)
+ }
+ stt.cmdMu.Lock()
+ stt.cmd = cmd
+ stt.stopCh = make(chan struct{})
+ stt.cmdMu.Unlock()
+ if err := cmd.Start(); err != nil {
+ return fmt.Errorf("failed to start ffmpeg: %w", err)
+ }
+ stt.recording = true
+ stt.AudioBuffer.Reset()
+ // Read PCM data in goroutine
+ go func() {
+ buf := make([]byte, 4096)
+ for {
+ select {
+ case <-stt.stopCh:
+ return
+ default:
+ n, err := stdout.Read(buf)
+ if n > 0 {
+ stt.mu.Lock()
+ stt.AudioBuffer.Write(buf[:n])
+ stt.mu.Unlock()
+ }
+ if err != nil {
+ if err != io.EOF {
+ stt.logger.Error("recording read error", "error", err)
+ }
+ return
+ }
+ }
+ }
+ }()
+ return nil
+}
+
+func (stt *WhisperServer) StopRecording() (string, error) {
+ stt.mu.Lock()
+ defer stt.mu.Unlock()
+ if !stt.recording {
+ return "", errors.New("not recording")
+ }
+ stt.recording = false
+ // Stop ffmpeg
+ stt.cmdMu.Lock()
+ if stt.cmd != nil && stt.cmd.Process != nil {
+ stt.cmd.Process.Kill()
+ stt.cmd.Wait()
+ }
+ close(stt.stopCh)
+ stt.cmdMu.Unlock()
+ // Rest of StopRecording unchanged (WAV header + HTTP upload)
+ // ...
+ stt.recording = false
+ // wait loop to finish?
+ if stt.AudioBuffer == nil {
+ err := errors.New("unexpected nil AudioBuffer")
+ stt.logger.Error(err.Error())
+ return "", err
+ }
+ // Create WAV header first
+ body := &bytes.Buffer{}
+ writer := multipart.NewWriter(body)
+ // Add audio file part
+ part, err := writer.CreateFormFile("file", "recording.wav")
+ if err != nil {
+ stt.logger.Error("fn: StopRecording", "error", err)
+ return "", err
+ }
+ // Stream directly to multipart writer: header + raw data
+ dataSize := stt.AudioBuffer.Len()
+ stt.writeWavHeader(part, dataSize)
+ if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
+ stt.logger.Error("fn: StopRecording", "error", err)
+ return "", err
+ }
+ // Reset buffer for next recording
+ stt.AudioBuffer.Reset()
+ // Add response format field
+ err = writer.WriteField("response_format", "text")
+ if err != nil {
+ stt.logger.Error("fn: StopRecording", "error", err)
+ return "", err
+ }
+ if writer.Close() != nil {
+ stt.logger.Error("fn: StopRecording", "error", err)
+ return "", err
+ }
+ // Send request
+ resp, err := http.Post(stt.ServerURL, writer.FormDataContentType(), body) //nolint:noctx
+ if err != nil {
+ stt.logger.Error("fn: StopRecording", "error", err)
+ return "", err
+ }
+ defer resp.Body.Close()
+ // Read and print response
+ responseTextBytes, err := io.ReadAll(resp.Body)
+ if err != nil {
+ stt.logger.Error("fn: StopRecording", "error", err)
+ return "", err
+ }
+ resptext := strings.TrimRight(string(responseTextBytes), "\n")
+ // in case there are special tokens like [_BEG_]
+ resptext = specialRE.ReplaceAllString(resptext, "")
+ return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
+}