summaryrefslogtreecommitdiff
path: root/extra/audio.go
diff options
context:
space:
mode:
Diffstat (limited to 'extra/audio.go')
-rw-r--r--extra/audio.go159
1 files changed, 82 insertions, 77 deletions
diff --git a/extra/audio.go b/extra/audio.go
index b130bf8..8b0d8f9 100644
--- a/extra/audio.go
+++ b/extra/audio.go
@@ -2,6 +2,7 @@ package extra
import (
"bytes"
+ "elefant/config"
"elefant/models"
"encoding/json"
"fmt"
@@ -18,24 +19,45 @@ import (
)
var (
- TTSTextChan = make(chan string, 1000)
+ TTSTextChan = make(chan string, 10000)
TTSFlushChan = make(chan bool, 1)
TTSDoneChan = make(chan bool, 1)
)
type Orator interface {
Speak(text string) error
+ Stop()
+ // pause and resume?
+ GetSBuilder() strings.Builder
GetLogger() *slog.Logger
}
// impl https://github.com/remsky/Kokoro-FastAPI
type KokoroOrator struct {
- logger *slog.Logger
- URL string
- Format models.AudioFormat
- Stream bool
- Speed int8
- Language string
+ logger *slog.Logger
+ URL string
+ Format models.AudioFormat
+ Stream bool
+ Speed float32
+ Language string
+ Voice string
+ currentStream *beep.Ctrl // Added for playback control
+ textBuffer strings.Builder
+}
+
+func stoproutine(orator Orator) {
+ select {
+ case <-TTSDoneChan:
+ orator.GetLogger().Info("orator got done signal")
+ orator.Stop()
+ // close(TTSTextChan)
+ // TTSTextChan = make(chan string, 10000)
+ // drain the channel
+ for len(TTSTextChan) > 0 {
+ <-TTSTextChan
+ }
+ return
+ }
}
func readroutine(orator Orator) {
@@ -70,98 +92,58 @@ func readroutine(orator Orator) {
break
}
}
+ // INFO: if there is a lot of text it will take some time to make with tts at once
+ // to avoid this pause, it might be better to keep splitting on sentences
+ // but keepinig in mind that remainder could be ommited by tokenizer
// Flush remaining text
remaining := remainder.String()
- orator.GetLogger().Info("flushing", "rem", remaining)
- if remaining != "" { // but nothing is here?
- orator.GetLogger().Info("flushing", "remaining", remaining)
- if err := orator.Speak(remaining); err != nil {
- orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err)
- }
- }
- case <-TTSDoneChan:
- // Flush remaining text
- if remaining := sentenceBuf.String(); remaining != "" {
+ remainder.Reset()
+ if remaining != "" {
+ // orator.GetLogger().Info("flushing", "remaining", remaining)
if err := orator.Speak(remaining); err != nil {
orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err)
}
}
- return
+ // case <-TTSDoneChan:
+ // orator.GetLogger().Info("orator got done signal")
+ // orator.Stop()
+ // // it that the best way to empty channel?
+ // close(TTSTextChan)
+ // TTSTextChan = make(chan string, 10000)
+ // return
}
}
}
-func InitOrator(log *slog.Logger, URL string) Orator {
+func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
orator := &KokoroOrator{
logger: log,
- URL: URL,
+ URL: cfg.TTS_URL,
Format: models.AFMP3,
Stream: false,
- Speed: 1,
+ Speed: cfg.TTS_SPEED,
Language: "a",
+ Voice: "af_bella(1)+af_sky(1)",
}
go readroutine(orator)
+ go stoproutine(orator)
return orator
}
-// type AudioStream struct {
-// TextChan chan string // Send text chunks here
-// DoneChan chan bool // Close when streaming ends
-// }
-
-// func RunOrator(orator Orator) *AudioStream {
-// stream := &AudioStream{
-// TextChan: make(chan string, 1000),
-// DoneChan: make(chan bool),
-// }
-// go func() {
-// tokenizer, _ := english.NewSentenceTokenizer(nil)
-// var sentenceBuf bytes.Buffer
-// for {
-// select {
-// case chunk := <-stream.TextChan:
-// sentenceBuf.WriteString(chunk)
-// text := sentenceBuf.String()
-// sentences := tokenizer.Tokenize(text)
-// for i, sentence := range sentences {
-// if i == len(sentences)-1 {
-// sentenceBuf.Reset()
-// sentenceBuf.WriteString(sentence.Text)
-// continue
-// }
-// // Send complete sentence to TTS
-// if err := orator.Speak(sentence.Text); err != nil {
-// orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err)
-// }
-// }
-// case <-stream.DoneChan:
-// // Flush remaining text
-// if remaining := sentenceBuf.String(); remaining != "" {
-// if err := orator.Speak(remaining); err != nil {
-// orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err)
-// }
-// }
-// return
-// }
-// }
-// }()
-// return stream
-// }
-
func (o *KokoroOrator) GetLogger() *slog.Logger {
return o.logger
}
func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
payload := map[string]interface{}{
- "input": text,
- "voice": "af_bella(1)+af_sky(1)",
- "response_format": "mp3",
- "download_format": "mp3",
- "stream": o.Stream,
- "speed": o.Speed,
- "return_download_link": true,
- "lang_code": o.Language,
+ "input": text,
+ "voice": o.Voice,
+ "response_format": o.Format,
+ "download_format": o.Format,
+ "stream": o.Stream,
+ "speed": o.Speed,
+ // "return_download_link": true,
+ "lang_code": o.Language,
}
payloadBytes, err := json.Marshal(payload)
if err != nil {
@@ -185,6 +167,7 @@ func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
}
func (o *KokoroOrator) Speak(text string) error {
+ o.logger.Info("fn: Speak is called", "text-len", len(text))
body, err := o.requestSound(text)
if err != nil {
o.logger.Error("request failed", "error", err)
@@ -198,11 +181,33 @@ func (o *KokoroOrator) Speak(text string) error {
return fmt.Errorf("mp3 decode failed: %w", err)
}
defer streamer.Close()
- speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10))
+ // here it spams with errors that speaker cannot be initialized more than once, but how would we deal with many audio records then?
+ if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
+ o.logger.Debug("failed to init speaker", "error", err)
+ }
done := make(chan bool)
- speaker.Play(beep.Seq(streamer, beep.Callback(func() {
+ // Create controllable stream and store reference
+ o.currentStream = &beep.Ctrl{Streamer: beep.Seq(streamer, beep.Callback(func() {
close(done)
- })))
- <-done
+ o.currentStream = nil
+ })), Paused: false}
+ speaker.Play(o.currentStream)
+ <-done // we hang in this routine;
return nil
}
+
+// TODO: stop works; but new stream does not start afterwards
+func (o *KokoroOrator) Stop() {
+ // speaker.Clear()
+ o.logger.Info("attempted to stop orator", "orator", o)
+ speaker.Lock()
+ defer speaker.Unlock()
+ if o.currentStream != nil {
+ o.currentStream.Paused = true
+ o.currentStream.Streamer = nil
+ }
+}
+
+func (o *KokoroOrator) GetSBuilder() strings.Builder {
+ return o.textBuffer
+}