diff options
author | Grail Finder <wohilas@gmail.com> | 2025-05-21 21:16:42 +0300 |
---|---|---|
committer | Grail Finder <wohilas@gmail.com> | 2025-05-21 21:16:42 +0300 |
commit | 4a17dd745cbd7354a24b046b76838f7670af51c6 (patch) | |
tree | 7365b5d5d3b3ad24252d1a5ea6b1a3c0de0b2557 /extra | |
parent | fe4f759173ecd3cff2a6db38878cfd2727b8b49c (diff) |
Enha: rename to tts.go; use one buffer;
Diffstat (limited to 'extra')
-rw-r--r-- | extra/tts.go (renamed from extra/audio.go) | 100 | ||||
-rw-r--r-- | extra/vad.go | 1 |
2 files changed, 54 insertions, 47 deletions
diff --git a/extra/audio.go b/extra/tts.go index 11021f9..b8dd0d0 100644 --- a/extra/audio.go +++ b/extra/tts.go @@ -2,13 +2,14 @@ package extra import ( "bytes" - "gf-lt/config" - "gf-lt/models" "encoding/json" "fmt" + "gf-lt/config" + "gf-lt/models" "io" "log/slog" "net/http" + "regexp" "strings" "time" @@ -19,16 +20,16 @@ import ( ) var ( - TTSTextChan = make(chan string, 10000) - TTSFlushChan = make(chan bool, 1) - TTSDoneChan = make(chan bool, 1) + TTSTextChan = make(chan string, 10000) + TTSFlushChan = make(chan bool, 1) + TTSDoneChan = make(chan bool, 1) + endsWithPunctuation = regexp.MustCompile(`[;.!?]$`) ) type Orator interface { Speak(text string) error Stop() // pause and resume? - GetSBuilder() strings.Builder GetLogger() *slog.Logger } @@ -43,69 +44,79 @@ type KokoroOrator struct { Voice string currentStream *beep.Ctrl // Added for playback control textBuffer strings.Builder + // textBuffer bytes.Buffer } -func stoproutine(orator Orator) { +func (o *KokoroOrator) stoproutine() { <-TTSDoneChan - orator.GetLogger().Info("orator got done signal") - orator.Stop() + o.logger.Info("orator got done signal") + o.Stop() // drain the channel for len(TTSTextChan) > 0 { <-TTSTextChan } } -func readroutine(orator Orator) { +func (o *KokoroOrator) readroutine() { tokenizer, _ := english.NewSentenceTokenizer(nil) - var sentenceBuf bytes.Buffer - var remainder strings.Builder + // var sentenceBuf bytes.Buffer + // var remainder strings.Builder for { select { case chunk := <-TTSTextChan: - sentenceBuf.WriteString(chunk) - text := sentenceBuf.String() + // sentenceBuf.WriteString(chunk) + // text := sentenceBuf.String() + _, err := o.textBuffer.WriteString(chunk) + if err != nil { + o.logger.Warn("failed to write to stringbuilder", "error", err) + continue + } + text := o.textBuffer.String() sentences := tokenizer.Tokenize(text) + o.logger.Info("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences)) for i, sentence := range sentences { if i == len(sentences)-1 { - sentenceBuf.Reset() - sentenceBuf.WriteString(sentence.Text) - continue + o.textBuffer.Reset() + _, err := o.textBuffer.WriteString(sentence.Text) + if err != nil { + o.logger.Warn("failed to write to stringbuilder", "error", err) + continue + } + continue // if only one (often incomplete) sentence; wait for next chunk } - // Send complete sentence to TTS - if err := orator.Speak(sentence.Text); err != nil { - orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err) + o.logger.Info("calling Speak with sentence", "sent", sentence.Text) + if err := o.Speak(sentence.Text); err != nil { + o.logger.Error("tts failed", "sentence", sentence.Text, "error", err) } } case <-TTSFlushChan: + o.logger.Info("got flushchan signal start") // lln is done get the whole message out - // FIXME: loses one token - for chunk := range TTSTextChan { - // orator.GetLogger().Info("flushing", "chunk", chunk) - // sentenceBuf.WriteString(chunk) - remainder.WriteString(chunk) // I get text here - if len(TTSTextChan) == 0 { - break + if len(TTSTextChan) > 0 { // otherwise might get stuck + for chunk := range TTSTextChan { + _, err := o.textBuffer.WriteString(chunk) + if err != nil { + o.logger.Warn("failed to write to stringbuilder", "error", err) + continue + } + if len(TTSTextChan) == 0 { + break + } } } // INFO: if there is a lot of text it will take some time to make with tts at once // to avoid this pause, it might be better to keep splitting on sentences // but keepinig in mind that remainder could be ommited by tokenizer // Flush remaining text - remaining := remainder.String() - remainder.Reset() + remaining := o.textBuffer.String() + o.logger.Info("got flushchan signal", "rem", remaining) + defer o.textBuffer.Reset() if remaining != "" { - // orator.GetLogger().Info("flushing", "remaining", remaining) - if err := orator.Speak(remaining); err != nil { - orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err) + o.logger.Info("calling Speak with remainder", "rem", remaining) + if err := o.Speak(remaining); err != nil { + o.logger.Error("tts failed", "sentence", remaining, "error", err) } } - // case <-TTSDoneChan: - // orator.GetLogger().Info("orator got done signal") - // orator.Stop() - // // it that the best way to empty channel? - // close(TTSTextChan) - // TTSTextChan = make(chan string, 10000) - // return } } } @@ -120,8 +131,8 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator { Language: "a", Voice: "af_bella(1)+af_sky(1)", } - go readroutine(orator) - go stoproutine(orator) + go orator.readroutine() + go orator.stoproutine() return orator } @@ -191,18 +202,13 @@ func (o *KokoroOrator) Speak(text string) error { return nil } -// TODO: stop works; but new stream does not start afterwards func (o *KokoroOrator) Stop() { // speaker.Clear() o.logger.Info("attempted to stop orator", "orator", o) speaker.Lock() defer speaker.Unlock() if o.currentStream != nil { - o.currentStream.Paused = true + // o.currentStream.Paused = true o.currentStream.Streamer = nil } } - -func (o *KokoroOrator) GetSBuilder() strings.Builder { - return o.textBuffer -} diff --git a/extra/vad.go b/extra/vad.go new file mode 100644 index 0000000..2a9e238 --- /dev/null +++ b/extra/vad.go @@ -0,0 +1 @@ +package extra |