summaryrefslogtreecommitdiff
path: root/extra/whisper_binary.go
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2025-11-10 13:06:05 +0300
committerGrail Finder <wohilas@gmail.com>2025-11-10 13:06:05 +0300
commita84930621cf0a3e35b4ee1d7facdfbfa3e987cff (patch)
tree2f67183384cef896561475fc4ae4babf6e9ef61a /extra/whisper_binary.go
parent48f32ba36a889d2514888589bc6c5da710a19feb (diff)
Fix: whisper binary
Diffstat (limited to 'extra/whisper_binary.go')
-rw-r--r--extra/whisper_binary.go152
1 files changed, 120 insertions, 32 deletions
diff --git a/extra/whisper_binary.go b/extra/whisper_binary.go
index 17345f2..2779cb5 100644
--- a/extra/whisper_binary.go
+++ b/extra/whisper_binary.go
@@ -6,31 +6,32 @@ import (
"errors"
"fmt"
"gf-lt/config"
+ "io"
"log/slog"
"os"
"os/exec"
"sync"
+ "time"
"github.com/gordonklaus/portaudio"
)
-
-
type WhisperBinary struct {
+ logger *slog.Logger
whisperPath string
modelPath string
lang string
ctx context.Context
cancel context.CancelFunc
mu sync.Mutex
- running bool
- cmd *exec.Cmd
+ recording bool
audioBuffer []int16
}
func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
ctx, cancel := context.WithCancel(context.Background())
return &WhisperBinary{
+ logger: logger,
whisperPath: cfg.WhisperBinaryPath,
modelPath: cfg.WhisperModelPath,
lang: cfg.STT_LANG,
@@ -42,18 +43,59 @@ func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
func (w *WhisperBinary) StartRecording() error {
w.mu.Lock()
defer w.mu.Unlock()
-
- if w.running {
+ if w.recording {
return errors.New("recording is already in progress")
}
+ // Suppress ALSA warnings by setting environment variables
+ origCard := os.Getenv("ALSA_PCM_CARD")
+ origDevice := os.Getenv("ALSA_PCM_DEVICE")
+ origSubdevice := os.Getenv("ALSA_PCM_SUBDEVICE")
+
+ // Set specific ALSA device to prevent "Unknown PCM card.pcm.rear" warnings
+ os.Setenv("ALSA_PCM_CARD", "0")
+ os.Setenv("ALSA_PCM_DEVICE", "0")
+ os.Setenv("ALSA_PCM_SUBDEVICE", "0")
+
if err := portaudio.Initialize(); err != nil {
+ // Restore original environment variables on error
+ if origCard != "" {
+ os.Setenv("ALSA_PCM_CARD", origCard)
+ } else {
+ os.Unsetenv("ALSA_PCM_CARD")
+ }
+ if origDevice != "" {
+ os.Setenv("ALSA_PCM_DEVICE", origDevice)
+ } else {
+ os.Unsetenv("ALSA_PCM_DEVICE")
+ }
+ if origSubdevice != "" {
+ os.Setenv("ALSA_PCM_SUBDEVICE", origSubdevice)
+ } else {
+ os.Unsetenv("ALSA_PCM_SUBDEVICE")
+ }
return fmt.Errorf("portaudio init failed: %w", err)
}
+ // Restore original environment variables after initialization
+ if origCard != "" {
+ os.Setenv("ALSA_PCM_CARD", origCard)
+ } else {
+ os.Unsetenv("ALSA_PCM_CARD")
+ }
+ if origDevice != "" {
+ os.Setenv("ALSA_PCM_DEVICE", origDevice)
+ } else {
+ os.Unsetenv("ALSA_PCM_DEVICE")
+ }
+ if origSubdevice != "" {
+ os.Setenv("ALSA_PCM_SUBDEVICE", origSubdevice)
+ } else {
+ os.Unsetenv("ALSA_PCM_SUBDEVICE")
+ }
+
// Initialize audio buffer
w.audioBuffer = make([]int16, 0)
-
in := make([]int16, 1024) // buffer size
stream, err := portaudio.OpenDefaultStream(1, 0, 16000.0, len(in), in)
if err != nil {
@@ -63,37 +105,45 @@ func (w *WhisperBinary) StartRecording() error {
return fmt.Errorf("failed to open microphone: %w", err)
}
- // Create a dummy command just for context management
- w.cmd = exec.CommandContext(w.ctx, "sh", "-c", "echo 'dummy command'")
-
go w.recordAudio(stream, in)
- w.running = true
-
+ w.recording = true
+ w.logger.Debug("Recording started")
return nil
}
func (w *WhisperBinary) recordAudio(stream *portaudio.Stream, in []int16) {
defer func() {
+ w.logger.Debug("recordAudio defer function called")
+ _ = stream.Stop() // Stop the stream
_ = portaudio.Terminate() // ignoring error as we're shutting down
+ w.logger.Debug("recordAudio terminated")
}()
-
+ w.logger.Debug("Starting audio stream")
if err := stream.Start(); err != nil {
+ w.logger.Error("Failed to start audio stream", "error", err)
return
}
-
+ w.logger.Debug("Audio stream started, entering recording loop")
for {
select {
case <-w.ctx.Done():
+ w.logger.Debug("Context done, exiting recording loop")
return
default:
- if !w.running {
+ // Check recording status with minimal lock time
+ w.mu.Lock()
+ recording := w.recording
+ w.mu.Unlock()
+
+ if !recording {
+ w.logger.Debug("Recording flag is false, exiting recording loop")
return
}
if err := stream.Read(); err != nil {
+ w.logger.Error("Error reading from stream", "error", err)
return
}
-
- // Append samples to buffer
+ // Append samples to buffer - only acquire lock when necessary
w.mu.Lock()
if w.audioBuffer == nil {
w.audioBuffer = make([]int16, 0)
@@ -108,98 +158,136 @@ func (w *WhisperBinary) recordAudio(stream *portaudio.Stream, in []int16) {
}
func (w *WhisperBinary) StopRecording() (string, error) {
+ w.logger.Debug("StopRecording called")
w.mu.Lock()
- if !w.running {
+ if !w.recording {
w.mu.Unlock()
return "", errors.New("not currently recording")
}
-
- w.running = false
+ w.logger.Debug("Setting recording to false and cancelling context")
+ w.recording = false
w.cancel() // This will stop the recording goroutine
w.mu.Unlock()
+ // Small delay to allow the recording goroutine to react to context cancellation
+ time.Sleep(100 * time.Millisecond)
+
// Save the recorded audio to a temporary file
tempFile, err := w.saveAudioToTempFile()
if err != nil {
+ w.logger.Error("Error saving audio to temp file", "error", err)
return "", fmt.Errorf("failed to save audio to temp file: %w", err)
}
- defer os.Remove(tempFile) // Clean up the temp file
+ w.logger.Debug("Saved audio to temp file", "file", tempFile)
- // Run the whisper binary
- cmd := exec.CommandContext(w.ctx, w.whisperPath, "-m", w.modelPath, "-l", w.lang, tempFile)
-
- var outBuf, errBuf bytes.Buffer
+ // Run the whisper binary with a separate context to avoid cancellation during transcription
+ cmd := exec.Command(w.whisperPath, "-m", w.modelPath, "-l", w.lang, tempFile, "2>/dev/null")
+ var outBuf bytes.Buffer
cmd.Stdout = &outBuf
- cmd.Stderr = &errBuf
+ // Redirect stderr to suppress ALSA warnings and other stderr output
+ cmd.Stderr = io.Discard // Suppress stderr output from whisper binary
+ w.logger.Debug("Running whisper binary command")
if err := cmd.Run(); err != nil {
- return "", fmt.Errorf("whisper binary failed: %w, stderr: %s", err, errBuf.String())
+ // Clean up audio buffer
+ w.mu.Lock()
+ w.audioBuffer = nil
+ w.mu.Unlock()
+ // Since we're suppressing stderr, we'll just log that the command failed
+ w.logger.Error("Error running whisper binary", "error", err)
+ return "", fmt.Errorf("whisper binary failed: %w", err)
}
-
result := outBuf.String()
-
+ w.logger.Debug("Whisper binary completed", "result", result)
+
// Clean up audio buffer
w.mu.Lock()
w.audioBuffer = nil
w.mu.Unlock()
-
+
+ // Clean up the temporary file after transcription
+ w.logger.Debug("StopRecording completed")
+ os.Remove(tempFile)
+
return result, nil
}
// saveAudioToTempFile saves the recorded audio data to a temporary WAV file
func (w *WhisperBinary) saveAudioToTempFile() (string, error) {
+ w.logger.Debug("saveAudioToTempFile called")
// Create temporary WAV file
tempFile, err := os.CreateTemp("", "recording_*.wav")
if err != nil {
+ w.logger.Error("Failed to create temp file", "error", err)
return "", fmt.Errorf("failed to create temp file: %w", err)
}
+ w.logger.Debug("Created temp file", "file", tempFile.Name())
defer tempFile.Close()
// Write WAV header and data
+ w.logger.Debug("About to write WAV file", "file", tempFile.Name())
err = w.writeWAVFile(tempFile.Name())
if err != nil {
+ w.logger.Error("Error writing WAV file", "error", err)
return "", fmt.Errorf("failed to write WAV file: %w", err)
}
+ w.logger.Debug("WAV file written successfully", "file", tempFile.Name())
return tempFile.Name(), nil
}
// writeWAVFile creates a WAV file from the recorded audio data
func (w *WhisperBinary) writeWAVFile(filename string) error {
+ w.logger.Debug("writeWAVFile called", "filename", filename)
// Open file for writing
file, err := os.Create(filename)
if err != nil {
+ w.logger.Error("Error creating file", "error", err)
return err
}
defer file.Close()
+ w.logger.Debug("About to acquire mutex in writeWAVFile")
w.mu.Lock()
+ w.logger.Debug("Locked mutex, copying audio buffer")
audioData := make([]int16, len(w.audioBuffer))
copy(audioData, w.audioBuffer)
w.mu.Unlock()
+ w.logger.Debug("Unlocked mutex", "audio_data_length", len(audioData))
if len(audioData) == 0 {
+ w.logger.Warn("No audio data to write")
return errors.New("no audio data to write")
}
// Calculate data size (number of samples * size of int16)
dataSize := len(audioData) * 2 // 2 bytes per int16 sample
+ w.logger.Debug("Calculated data size", "size", dataSize)
// Write WAV header with the correct data size
header := w.createWAVHeader(16000, 1, 16, dataSize)
_, err = file.Write(header)
if err != nil {
+ w.logger.Error("Error writing WAV header", "error", err)
return err
}
+ w.logger.Debug("WAV header written successfully")
// Write audio data
- for _, sample := range audioData {
+ w.logger.Debug("About to write audio data samples")
+ for i, sample := range audioData {
// Write little-endian 16-bit sample
_, err := file.Write([]byte{byte(sample), byte(sample >> 8)})
if err != nil {
+ w.logger.Error("Error writing sample", "index", i, "error", err)
return err
}
+ // Log progress every 10000 samples to avoid too much output
+ if i%10000 == 0 {
+ w.logger.Debug("Written samples", "count", i)
+ }
}
+ w.logger.Debug("All audio data written successfully")
return nil
}
@@ -254,5 +342,5 @@ func (w *WhisperBinary) createWAVHeader(sampleRate, channels, bitsPerSample int,
func (w *WhisperBinary) IsRecording() bool {
w.mu.Lock()
defer w.mu.Unlock()
- return w.running
+ return w.recording
}