summaryrefslogtreecommitdiff
path: root/extra/stt.go
diff options
context:
space:
mode:
Diffstat (limited to 'extra/stt.go')
-rw-r--r--extra/stt.go132
1 files changed, 0 insertions, 132 deletions
diff --git a/extra/stt.go b/extra/stt.go
index 86fcf9c..7bbf2fd 100644
--- a/extra/stt.go
+++ b/extra/stt.go
@@ -6,18 +6,10 @@ package extra
import (
"bytes"
"encoding/binary"
- "errors"
- "fmt"
"gf-lt/config"
"io"
"log/slog"
- "mime/multipart"
- "net/http"
"regexp"
- "strings"
- "syscall"
-
- "github.com/gordonklaus/portaudio"
)
var specialRE = regexp.MustCompile(`\[.*?\]`)
@@ -44,14 +36,6 @@ func NewSTT(logger *slog.Logger, cfg *config.Config) STT {
return NewWhisperServer(logger, cfg)
}
-type WhisperServer struct {
- logger *slog.Logger
- ServerURL string
- SampleRate int
- AudioBuffer *bytes.Buffer
- recording bool
-}
-
func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
return &WhisperServer{
logger: logger,
@@ -61,69 +45,6 @@ func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
}
}
-func (stt *WhisperServer) StartRecording() error {
- if err := stt.microphoneStream(stt.SampleRate); err != nil {
- return fmt.Errorf("failed to init microphone: %w", err)
- }
- stt.recording = true
- return nil
-}
-
-func (stt *WhisperServer) StopRecording() (string, error) {
- stt.recording = false
- // wait loop to finish?
- if stt.AudioBuffer == nil {
- err := errors.New("unexpected nil AudioBuffer")
- stt.logger.Error(err.Error())
- return "", err
- }
- // Create WAV header first
- body := &bytes.Buffer{}
- writer := multipart.NewWriter(body)
- // Add audio file part
- part, err := writer.CreateFormFile("file", "recording.wav")
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- // Stream directly to multipart writer: header + raw data
- dataSize := stt.AudioBuffer.Len()
- stt.writeWavHeader(part, dataSize)
- if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- // Reset buffer for next recording
- stt.AudioBuffer.Reset()
- // Add response format field
- err = writer.WriteField("response_format", "text")
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- if writer.Close() != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- // Send request
- resp, err := http.Post(stt.ServerURL, writer.FormDataContentType(), body) //nolint:noctx
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- defer resp.Body.Close()
- // Read and print response
- responseTextBytes, err := io.ReadAll(resp.Body)
- if err != nil {
- stt.logger.Error("fn: StopRecording", "error", err)
- return "", err
- }
- resptext := strings.TrimRight(string(responseTextBytes), "\n")
- // in case there are special tokens like [_BEG_]
- resptext = specialRE.ReplaceAllString(resptext, "")
- return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
-}
-
func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
header := make([]byte, 44)
copy(header[0:4], "RIFF")
@@ -147,56 +68,3 @@ func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
func (stt *WhisperServer) IsRecording() bool {
return stt.recording
}
-
-func (stt *WhisperServer) microphoneStream(sampleRate int) error {
- // Temporarily redirect stderr to suppress ALSA warnings during PortAudio init
- origStderr, errDup := syscall.Dup(syscall.Stderr)
- if errDup != nil {
- return fmt.Errorf("failed to dup stderr: %w", errDup)
- }
- nullFD, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
- if err != nil {
- _ = syscall.Close(origStderr) // Close the dup'd fd if open fails
- return fmt.Errorf("failed to open /dev/null: %w", err)
- }
- // redirect stderr
- _ = syscall.Dup2(nullFD, syscall.Stderr)
- // Initialize PortAudio (this is where ALSA warnings occur)
- defer func() {
- // Restore stderr
- _ = syscall.Dup2(origStderr, syscall.Stderr)
- _ = syscall.Close(origStderr)
- _ = syscall.Close(nullFD)
- }()
- if err := portaudio.Initialize(); err != nil {
- return fmt.Errorf("portaudio init failed: %w", err)
- }
- in := make([]int16, 64)
- stream, err := portaudio.OpenDefaultStream(1, 0, float64(sampleRate), len(in), in)
- if err != nil {
- if paErr := portaudio.Terminate(); paErr != nil {
- return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
- }
- return fmt.Errorf("failed to open microphone: %w", err)
- }
- go func(stream *portaudio.Stream) {
- if err := stream.Start(); err != nil {
- stt.logger.Error("microphoneStream", "error", err)
- return
- }
- for {
- if !stt.IsRecording() {
- return
- }
- if err := stream.Read(); err != nil {
- stt.logger.Error("reading stream", "error", err)
- return
- }
- if err := binary.Write(stt.AudioBuffer, binary.LittleEndian, in); err != nil {
- stt.logger.Error("writing to buffer", "error", err)
- return
- }
- }
- }(stream)
- return nil
-}