summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bot.go23
-rw-r--r--config.example.toml3
-rw-r--r--config/config.go10
-rw-r--r--extra/audio.go185
-rw-r--r--go.mod5
-rw-r--r--go.sum20
-rw-r--r--models/extra.go8
7 files changed, 250 insertions, 4 deletions
diff --git a/bot.go b/bot.go
index f6770e7..11dcbee 100644
--- a/bot.go
+++ b/bot.go
@@ -67,6 +67,7 @@ var (
interruptResp = false
ragger *rag.RAG
chunkParser ChunkParser
+ orator extra.Orator
defaultLCPProps = map[string]float32{
"temperature": 0.8,
"dry_multiplier": 0.0,
@@ -277,8 +278,13 @@ func checkGame(role string, tv *tview.TextView) {
// should go before form msg, since formmsg takes chatBody and makes ioreader out of it
// role is almost always user, unless it's regen or resume
// cannot get in this block, since cluedoState is nil;
- // check if cfg.EnableCluedo is true and init the cluedo state; ai!
- if cfg.EnableCluedo && cluedoState != nil {
+ if cfg.EnableCluedo {
+ // Initialize Cluedo game if needed
+ if cluedoState == nil {
+ playerOrder = []string{cfg.UserRole, cfg.AssistantRole, cfg.CluedoRole2}
+ cluedoState = extra.CluedoPrepCards(playerOrder)
+ }
+
notifyUser("got in cluedo", "yay")
currentPlayer := playerOrder[0]
playerOrder = append(playerOrder[1:], currentPlayer) // Rotate turns
@@ -328,6 +334,12 @@ func chatRound(userMsg, role string, tv *tview.TextView, regen, resume bool) {
}
}
respText := strings.Builder{}
+ // if tts is enabled
+ // var audioStream *extra.AudioStream
+ // if cfg.TTS_ENABLED {
+ // audioStream = extra.RunOrator(orator)
+ // // defer close(audioStream.DoneChan)
+ // }
out:
for {
select {
@@ -335,6 +347,11 @@ out:
fmt.Fprint(tv, chunk)
respText.WriteString(chunk)
tv.ScrollToEnd()
+ // Send chunk to audio stream handler
+ if cfg.TTS_ENABLED {
+ // audioStream.TextChan <- chunk
+ extra.TTSTextChan <- chunk
+ }
case <-streamDone:
botRespMode = false
break out
@@ -508,6 +525,8 @@ func init() {
}
choseChunkParser()
httpClient = createClient(time.Second * 15)
+ // TODO: check config for orator
+ orator = extra.InitOrator(logger, "http://localhost:8880/v1/audio/speech")
// go runModelNameTicker(time.Second * 120)
// tempLoad()
}
diff --git a/config.example.toml b/config.example.toml
index ded1475..846dbba 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -12,3 +12,6 @@ ChunkLimit = 100000
RAGBatchSize = 100
RAGWordLimit = 80
RAGWorkers = 5
+# extra tts
+TTS_ENABLED = false
+TTS_URL = "http://localhost:8880/v1/audio/speech"
diff --git a/config/config.go b/config/config.go
index b48d657..5e00dba 100644
--- a/config/config.go
+++ b/config/config.go
@@ -7,8 +7,8 @@ import (
)
type Config struct {
- EnableCluedo bool `toml:"EnableCluedo"` // Cluedo game mode toggle
- CluedoRole2 string `toml:"CluedoRole2"` // Secondary AI role name
+ EnableCluedo bool `toml:"EnableCluedo"` // Cluedo game mode toggle
+ CluedoRole2 string `toml:"CluedoRole2"` // Secondary AI role name
ChatAPI string `toml:"ChatAPI"`
CompletionAPI string `toml:"CompletionAPI"`
CurrentAPI string
@@ -39,6 +39,9 @@ type Config struct {
DeepSeekToken string `toml:"DeepSeekToken"`
DeepSeekModel string `toml:"DeepSeekModel"`
ApiLinks []string
+ // TTS
+ TTS_URL string `toml:"TTS_URL"`
+ TTS_ENABLED bool `toml:"TTS_ENABLED"`
}
func LoadConfigOrDefault(fn string) *Config {
@@ -66,6 +69,9 @@ func LoadConfigOrDefault(fn string) *Config {
config.RAGBatchSize = 100
config.RAGWordLimit = 80
config.RAGWorkers = 5
+ // tts
+ config.TTS_ENABLED = false
+ config.TTS_URL = "http://localhost:8880/v1/audio/speech"
}
config.CurrentAPI = config.ChatAPI
config.APIMap = map[string]string{
diff --git a/extra/audio.go b/extra/audio.go
new file mode 100644
index 0000000..ae3300c
--- /dev/null
+++ b/extra/audio.go
@@ -0,0 +1,185 @@
+package extra
+
+import (
+ "bytes"
+ "elefant/models"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log/slog"
+ "net/http"
+ "time"
+
+ "github.com/gopxl/beep"
+ "github.com/gopxl/beep/mp3"
+ "github.com/gopxl/beep/speaker"
+ "github.com/neurosnap/sentences/english"
+)
+
+var (
+ TTSTextChan = make(chan string, 1000)
+ TTSDoneChan = make(chan bool, 1)
+)
+
+type Orator interface {
+ Speak(text string) error
+ GetLogger() *slog.Logger
+}
+
+// impl https://github.com/remsky/Kokoro-FastAPI
+type KokoroOrator struct {
+ logger *slog.Logger
+ URL string
+ Format models.AudioFormat
+ Stream bool
+ Speed int8
+ Language string
+}
+
+func readroutine(orator Orator) {
+ tokenizer, _ := english.NewSentenceTokenizer(nil)
+ var sentenceBuf bytes.Buffer
+ for {
+ select {
+ case chunk := <-TTSTextChan:
+ sentenceBuf.WriteString(chunk)
+ text := sentenceBuf.String()
+ sentences := tokenizer.Tokenize(text)
+ for i, sentence := range sentences {
+ if i == len(sentences)-1 {
+ sentenceBuf.Reset()
+ sentenceBuf.WriteString(sentence.Text)
+ continue
+ }
+ // Send complete sentence to TTS
+ if err := orator.Speak(sentence.Text); err != nil {
+ orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err)
+ }
+ }
+ case <-TTSDoneChan:
+ // Flush remaining text
+ if remaining := sentenceBuf.String(); remaining != "" {
+ if err := orator.Speak(remaining); err != nil {
+ orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err)
+ }
+ }
+ return
+ }
+ }
+}
+
+func InitOrator(log *slog.Logger, URL string) Orator {
+ orator := &KokoroOrator{
+ logger: log,
+ URL: URL,
+ Format: models.AFMP3,
+ Stream: false,
+ Speed: 1,
+ Language: "a",
+ }
+ go readroutine(orator)
+ return orator
+}
+
+// type AudioStream struct {
+// TextChan chan string // Send text chunks here
+// DoneChan chan bool // Close when streaming ends
+// }
+
+// func RunOrator(orator Orator) *AudioStream {
+// stream := &AudioStream{
+// TextChan: make(chan string, 1000),
+// DoneChan: make(chan bool),
+// }
+// go func() {
+// tokenizer, _ := english.NewSentenceTokenizer(nil)
+// var sentenceBuf bytes.Buffer
+// for {
+// select {
+// case chunk := <-stream.TextChan:
+// sentenceBuf.WriteString(chunk)
+// text := sentenceBuf.String()
+// sentences := tokenizer.Tokenize(text)
+// for i, sentence := range sentences {
+// if i == len(sentences)-1 {
+// sentenceBuf.Reset()
+// sentenceBuf.WriteString(sentence.Text)
+// continue
+// }
+// // Send complete sentence to TTS
+// if err := orator.Speak(sentence.Text); err != nil {
+// orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err)
+// }
+// }
+// case <-stream.DoneChan:
+// // Flush remaining text
+// if remaining := sentenceBuf.String(); remaining != "" {
+// if err := orator.Speak(remaining); err != nil {
+// orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err)
+// }
+// }
+// return
+// }
+// }
+// }()
+// return stream
+// }
+
+func (o *KokoroOrator) GetLogger() *slog.Logger {
+ return o.logger
+}
+
+func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
+ payload := map[string]interface{}{
+ "input": text,
+ "voice": "af_bella(1)+af_sky(1)",
+ "response_format": "mp3",
+ "download_format": "mp3",
+ "stream": o.Stream,
+ "speed": o.Speed,
+ "return_download_link": true,
+ "lang_code": o.Language,
+ }
+ payloadBytes, err := json.Marshal(payload)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal payload: %w", err)
+ }
+ req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes))
+ if err != nil {
+ return nil, fmt.Errorf("failed to create request: %w", err)
+ }
+ req.Header.Set("accept", "application/json")
+ req.Header.Set("Content-Type", "application/json")
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("request failed: %w", err)
+ }
+ if resp.StatusCode != http.StatusOK {
+ defer resp.Body.Close()
+ return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
+ }
+ return resp.Body, nil
+}
+
+func (o *KokoroOrator) Speak(text string) error {
+ body, err := o.requestSound(text)
+ if err != nil {
+ o.logger.Error("request failed", "error", err)
+ return fmt.Errorf("request failed: %w", err)
+ }
+ defer body.Close()
+ // Decode the mp3 audio from response body
+ streamer, format, err := mp3.Decode(body)
+ if err != nil {
+ o.logger.Error("mp3 decode failed", "error", err)
+ return fmt.Errorf("mp3 decode failed: %w", err)
+ }
+ defer streamer.Close()
+ speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10))
+ done := make(chan bool)
+ speaker.Play(beep.Seq(streamer, beep.Callback(func() {
+ close(done)
+ })))
+ <-done
+ return nil
+}
diff --git a/go.mod b/go.mod
index eeaa6f6..aa00007 100644
--- a/go.mod
+++ b/go.mod
@@ -7,6 +7,7 @@ require (
github.com/asg017/sqlite-vec-go-bindings v0.1.6
github.com/gdamore/tcell/v2 v2.7.4
github.com/glebarez/go-sqlite v1.22.0
+ github.com/gopxl/beep v1.4.1
github.com/jmoiron/sqlx v1.4.0
github.com/ncruces/go-sqlite3 v0.21.3
github.com/neurosnap/sentences v1.1.2
@@ -15,12 +16,16 @@ require (
require (
github.com/dustin/go-humanize v1.0.1 // indirect
+ github.com/ebitengine/oto/v3 v3.1.0 // indirect
+ github.com/ebitengine/purego v0.7.1 // indirect
github.com/gdamore/encoding v1.0.0 // indirect
github.com/google/uuid v1.6.0 // indirect
+ github.com/hajimehoshi/go-mp3 v0.3.4 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/ncruces/julianday v1.0.0 // indirect
+ github.com/pkg/errors v0.9.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/tetratelabs/wazero v1.8.2 // indirect
diff --git a/go.sum b/go.sum
index fe84d96..ccac93c 100644
--- a/go.sum
+++ b/go.sum
@@ -4,8 +4,14 @@ github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0
github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/asg017/sqlite-vec-go-bindings v0.1.6 h1:Nx0jAzyS38XpkKznJ9xQjFXz2X9tI7KqjwVxV8RNoww=
github.com/asg017/sqlite-vec-go-bindings v0.1.6/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/ebitengine/oto/v3 v3.1.0 h1:9tChG6rizyeR2w3vsygTTTVVJ9QMMyu00m2yBOCch6U=
+github.com/ebitengine/oto/v3 v3.1.0/go.mod h1:IK1QTnlfZK2GIB6ziyECm433hAdTaPpOsGMLhEyEGTg=
+github.com/ebitengine/purego v0.7.1 h1:6/55d26lG3o9VCZX8lping+bZcmShseiqlh2bnUDiPA=
+github.com/ebitengine/purego v0.7.1/go.mod h1:ah1In8AOtksoNK6yk5z1HTJeUkC1Ez4Wk2idgGslMwQ=
github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko=
github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg=
github.com/gdamore/tcell/v2 v2.7.4 h1:sg6/UnTM9jGpZU+oFYAsDahfchWAFW8Xx2yFinNSAYU=
@@ -18,6 +24,11 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gopxl/beep v1.4.1 h1:WqNs9RsDAhG9M3khMyc1FaVY50dTdxG/6S6a3qsUHqE=
+github.com/gopxl/beep v1.4.1/go.mod h1:A1dmiUkuY8kxsvcNJNUBIEcchmiP6eUyCHSxpXl0YO0=
+github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
+github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
+github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
@@ -36,6 +47,10 @@ github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt
github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g=
github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw=
github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/tview v0.0.0-20241103174730-c76f7879f592 h1:YIJ+B1hePP6AgynC5TcqpO0H9k3SSoZa2BGyL6vDUzM=
@@ -44,6 +59,8 @@ github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ
github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tetratelabs/wazero v1.8.2 h1:yIgLR/b2bN31bjxwXHD8a3d+BogigR952csSDdLYEv4=
github.com/tetratelabs/wazero v1.8.2/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
@@ -62,6 +79,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -85,6 +103,8 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
modernc.org/libc v1.37.6 h1:orZH3c5wmhIQFTXF+Nt+eeauyd+ZIt2BX6ARe+kD+aw=
modernc.org/libc v1.37.6/go.mod h1:YAXkAZ8ktnkCKaN9sw/UDeUVkGYJ/YquGO4FTi5nmHE=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
diff --git a/models/extra.go b/models/extra.go
new file mode 100644
index 0000000..4e3a0bf
--- /dev/null
+++ b/models/extra.go
@@ -0,0 +1,8 @@
+package models
+
+type AudioFormat string
+
+const (
+ AFOPUS AudioFormat = "opus"
+ AFMP3 AudioFormat = "mp3"
+)