summaryrefslogtreecommitdiff
path: root/extra/whisper_binary.go
diff options
context:
space:
mode:
Diffstat (limited to 'extra/whisper_binary.go')
-rw-r--r--extra/whisper_binary.go227
1 files changed, 227 insertions, 0 deletions
diff --git a/extra/whisper_binary.go b/extra/whisper_binary.go
index 1002a97..17345f2 100644
--- a/extra/whisper_binary.go
+++ b/extra/whisper_binary.go
@@ -1,13 +1,21 @@
package extra
import (
+ "bytes"
"context"
+ "errors"
+ "fmt"
"gf-lt/config"
"log/slog"
+ "os"
"os/exec"
"sync"
+
+ "github.com/gordonklaus/portaudio"
)
+
+
type WhisperBinary struct {
whisperPath string
modelPath string
@@ -17,6 +25,7 @@ type WhisperBinary struct {
mu sync.Mutex
running bool
cmd *exec.Cmd
+ audioBuffer []int16
}
func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
@@ -29,3 +38,221 @@ func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
cancel: cancel,
}
}
+
+func (w *WhisperBinary) StartRecording() error {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+
+ if w.running {
+ return errors.New("recording is already in progress")
+ }
+
+ if err := portaudio.Initialize(); err != nil {
+ return fmt.Errorf("portaudio init failed: %w", err)
+ }
+
+ // Initialize audio buffer
+ w.audioBuffer = make([]int16, 0)
+
+ in := make([]int16, 1024) // buffer size
+ stream, err := portaudio.OpenDefaultStream(1, 0, 16000.0, len(in), in)
+ if err != nil {
+ if paErr := portaudio.Terminate(); paErr != nil {
+ return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
+ }
+ return fmt.Errorf("failed to open microphone: %w", err)
+ }
+
+ // Create a dummy command just for context management
+ w.cmd = exec.CommandContext(w.ctx, "sh", "-c", "echo 'dummy command'")
+
+ go w.recordAudio(stream, in)
+ w.running = true
+
+ return nil
+}
+
+func (w *WhisperBinary) recordAudio(stream *portaudio.Stream, in []int16) {
+ defer func() {
+ _ = portaudio.Terminate() // ignoring error as we're shutting down
+ }()
+
+ if err := stream.Start(); err != nil {
+ return
+ }
+
+ for {
+ select {
+ case <-w.ctx.Done():
+ return
+ default:
+ if !w.running {
+ return
+ }
+ if err := stream.Read(); err != nil {
+ return
+ }
+
+ // Append samples to buffer
+ w.mu.Lock()
+ if w.audioBuffer == nil {
+ w.audioBuffer = make([]int16, 0)
+ }
+ // Make a copy of the input buffer to avoid overwriting
+ tempBuffer := make([]int16, len(in))
+ copy(tempBuffer, in)
+ w.audioBuffer = append(w.audioBuffer, tempBuffer...)
+ w.mu.Unlock()
+ }
+ }
+}
+
+func (w *WhisperBinary) StopRecording() (string, error) {
+ w.mu.Lock()
+ if !w.running {
+ w.mu.Unlock()
+ return "", errors.New("not currently recording")
+ }
+
+ w.running = false
+ w.cancel() // This will stop the recording goroutine
+ w.mu.Unlock()
+
+ // Save the recorded audio to a temporary file
+ tempFile, err := w.saveAudioToTempFile()
+ if err != nil {
+ return "", fmt.Errorf("failed to save audio to temp file: %w", err)
+ }
+ defer os.Remove(tempFile) // Clean up the temp file
+
+ // Run the whisper binary
+ cmd := exec.CommandContext(w.ctx, w.whisperPath, "-m", w.modelPath, "-l", w.lang, tempFile)
+
+ var outBuf, errBuf bytes.Buffer
+ cmd.Stdout = &outBuf
+ cmd.Stderr = &errBuf
+
+ if err := cmd.Run(); err != nil {
+ return "", fmt.Errorf("whisper binary failed: %w, stderr: %s", err, errBuf.String())
+ }
+
+ result := outBuf.String()
+
+ // Clean up audio buffer
+ w.mu.Lock()
+ w.audioBuffer = nil
+ w.mu.Unlock()
+
+ return result, nil
+}
+
+// saveAudioToTempFile saves the recorded audio data to a temporary WAV file
+func (w *WhisperBinary) saveAudioToTempFile() (string, error) {
+ // Create temporary WAV file
+ tempFile, err := os.CreateTemp("", "recording_*.wav")
+ if err != nil {
+ return "", fmt.Errorf("failed to create temp file: %w", err)
+ }
+ defer tempFile.Close()
+
+ // Write WAV header and data
+ err = w.writeWAVFile(tempFile.Name())
+ if err != nil {
+ return "", fmt.Errorf("failed to write WAV file: %w", err)
+ }
+
+ return tempFile.Name(), nil
+}
+
+// writeWAVFile creates a WAV file from the recorded audio data
+func (w *WhisperBinary) writeWAVFile(filename string) error {
+ // Open file for writing
+ file, err := os.Create(filename)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ w.mu.Lock()
+ audioData := make([]int16, len(w.audioBuffer))
+ copy(audioData, w.audioBuffer)
+ w.mu.Unlock()
+
+ if len(audioData) == 0 {
+ return errors.New("no audio data to write")
+ }
+
+ // Calculate data size (number of samples * size of int16)
+ dataSize := len(audioData) * 2 // 2 bytes per int16 sample
+
+ // Write WAV header with the correct data size
+ header := w.createWAVHeader(16000, 1, 16, dataSize)
+ _, err = file.Write(header)
+ if err != nil {
+ return err
+ }
+
+ // Write audio data
+ for _, sample := range audioData {
+ // Write little-endian 16-bit sample
+ _, err := file.Write([]byte{byte(sample), byte(sample >> 8)})
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// createWAVHeader creates a WAV file header
+func (w *WhisperBinary) createWAVHeader(sampleRate, channels, bitsPerSample int, dataSize int) []byte {
+ header := make([]byte, 44)
+ copy(header[0:4], "RIFF")
+ // Total file size will be updated later
+ copy(header[8:12], "WAVE")
+ copy(header[12:16], "fmt ")
+ // fmt chunk size (16 for PCM)
+ header[16] = 16
+ header[17] = 0
+ header[18] = 0
+ header[19] = 0
+ // Audio format (1 = PCM)
+ header[20] = 1
+ header[21] = 0
+ // Number of channels
+ header[22] = byte(channels)
+ header[23] = 0
+ // Sample rate
+ header[24] = byte(sampleRate)
+ header[25] = byte(sampleRate >> 8)
+ header[26] = byte(sampleRate >> 16)
+ header[27] = byte(sampleRate >> 24)
+ // Byte rate
+ byteRate := sampleRate * channels * bitsPerSample / 8
+ header[28] = byte(byteRate)
+ header[29] = byte(byteRate >> 8)
+ header[30] = byte(byteRate >> 16)
+ header[31] = byte(byteRate >> 24)
+ // Block align
+ blockAlign := channels * bitsPerSample / 8
+ header[32] = byte(blockAlign)
+ header[33] = 0
+ // Bits per sample
+ header[34] = byte(bitsPerSample)
+ header[35] = 0
+ // "data" subchunk
+ copy(header[36:40], "data")
+ // Data size
+ header[40] = byte(dataSize)
+ header[41] = byte(dataSize >> 8)
+ header[42] = byte(dataSize >> 16)
+ header[43] = byte(dataSize >> 24)
+
+ return header
+}
+
+func (w *WhisperBinary) IsRecording() bool {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ return w.running
+}