diff options
-rw-r--r-- | bot.go | 23 | ||||
-rw-r--r-- | config.example.toml | 3 | ||||
-rw-r--r-- | config/config.go | 10 | ||||
-rw-r--r-- | extra/audio.go | 185 | ||||
-rw-r--r-- | go.mod | 5 | ||||
-rw-r--r-- | go.sum | 20 | ||||
-rw-r--r-- | models/extra.go | 8 |
7 files changed, 250 insertions, 4 deletions
@@ -67,6 +67,7 @@ var ( interruptResp = false ragger *rag.RAG chunkParser ChunkParser + orator extra.Orator defaultLCPProps = map[string]float32{ "temperature": 0.8, "dry_multiplier": 0.0, @@ -277,8 +278,13 @@ func checkGame(role string, tv *tview.TextView) { // should go before form msg, since formmsg takes chatBody and makes ioreader out of it // role is almost always user, unless it's regen or resume // cannot get in this block, since cluedoState is nil; - // check if cfg.EnableCluedo is true and init the cluedo state; ai! - if cfg.EnableCluedo && cluedoState != nil { + if cfg.EnableCluedo { + // Initialize Cluedo game if needed + if cluedoState == nil { + playerOrder = []string{cfg.UserRole, cfg.AssistantRole, cfg.CluedoRole2} + cluedoState = extra.CluedoPrepCards(playerOrder) + } + notifyUser("got in cluedo", "yay") currentPlayer := playerOrder[0] playerOrder = append(playerOrder[1:], currentPlayer) // Rotate turns @@ -328,6 +334,12 @@ func chatRound(userMsg, role string, tv *tview.TextView, regen, resume bool) { } } respText := strings.Builder{} + // if tts is enabled + // var audioStream *extra.AudioStream + // if cfg.TTS_ENABLED { + // audioStream = extra.RunOrator(orator) + // // defer close(audioStream.DoneChan) + // } out: for { select { @@ -335,6 +347,11 @@ out: fmt.Fprint(tv, chunk) respText.WriteString(chunk) tv.ScrollToEnd() + // Send chunk to audio stream handler + if cfg.TTS_ENABLED { + // audioStream.TextChan <- chunk + extra.TTSTextChan <- chunk + } case <-streamDone: botRespMode = false break out @@ -508,6 +525,8 @@ func init() { } choseChunkParser() httpClient = createClient(time.Second * 15) + // TODO: check config for orator + orator = extra.InitOrator(logger, "http://localhost:8880/v1/audio/speech") // go runModelNameTicker(time.Second * 120) // tempLoad() } diff --git a/config.example.toml b/config.example.toml index ded1475..846dbba 100644 --- a/config.example.toml +++ b/config.example.toml @@ -12,3 +12,6 @@ ChunkLimit = 100000 RAGBatchSize = 100 RAGWordLimit = 80 RAGWorkers = 5 +# extra tts +TTS_ENABLED = false +TTS_URL = "http://localhost:8880/v1/audio/speech" diff --git a/config/config.go b/config/config.go index b48d657..5e00dba 100644 --- a/config/config.go +++ b/config/config.go @@ -7,8 +7,8 @@ import ( ) type Config struct { - EnableCluedo bool `toml:"EnableCluedo"` // Cluedo game mode toggle - CluedoRole2 string `toml:"CluedoRole2"` // Secondary AI role name + EnableCluedo bool `toml:"EnableCluedo"` // Cluedo game mode toggle + CluedoRole2 string `toml:"CluedoRole2"` // Secondary AI role name ChatAPI string `toml:"ChatAPI"` CompletionAPI string `toml:"CompletionAPI"` CurrentAPI string @@ -39,6 +39,9 @@ type Config struct { DeepSeekToken string `toml:"DeepSeekToken"` DeepSeekModel string `toml:"DeepSeekModel"` ApiLinks []string + // TTS + TTS_URL string `toml:"TTS_URL"` + TTS_ENABLED bool `toml:"TTS_ENABLED"` } func LoadConfigOrDefault(fn string) *Config { @@ -66,6 +69,9 @@ func LoadConfigOrDefault(fn string) *Config { config.RAGBatchSize = 100 config.RAGWordLimit = 80 config.RAGWorkers = 5 + // tts + config.TTS_ENABLED = false + config.TTS_URL = "http://localhost:8880/v1/audio/speech" } config.CurrentAPI = config.ChatAPI config.APIMap = map[string]string{ diff --git a/extra/audio.go b/extra/audio.go new file mode 100644 index 0000000..ae3300c --- /dev/null +++ b/extra/audio.go @@ -0,0 +1,185 @@ +package extra + +import ( + "bytes" + "elefant/models" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "time" + + "github.com/gopxl/beep" + "github.com/gopxl/beep/mp3" + "github.com/gopxl/beep/speaker" + "github.com/neurosnap/sentences/english" +) + +var ( + TTSTextChan = make(chan string, 1000) + TTSDoneChan = make(chan bool, 1) +) + +type Orator interface { + Speak(text string) error + GetLogger() *slog.Logger +} + +// impl https://github.com/remsky/Kokoro-FastAPI +type KokoroOrator struct { + logger *slog.Logger + URL string + Format models.AudioFormat + Stream bool + Speed int8 + Language string +} + +func readroutine(orator Orator) { + tokenizer, _ := english.NewSentenceTokenizer(nil) + var sentenceBuf bytes.Buffer + for { + select { + case chunk := <-TTSTextChan: + sentenceBuf.WriteString(chunk) + text := sentenceBuf.String() + sentences := tokenizer.Tokenize(text) + for i, sentence := range sentences { + if i == len(sentences)-1 { + sentenceBuf.Reset() + sentenceBuf.WriteString(sentence.Text) + continue + } + // Send complete sentence to TTS + if err := orator.Speak(sentence.Text); err != nil { + orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err) + } + } + case <-TTSDoneChan: + // Flush remaining text + if remaining := sentenceBuf.String(); remaining != "" { + if err := orator.Speak(remaining); err != nil { + orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err) + } + } + return + } + } +} + +func InitOrator(log *slog.Logger, URL string) Orator { + orator := &KokoroOrator{ + logger: log, + URL: URL, + Format: models.AFMP3, + Stream: false, + Speed: 1, + Language: "a", + } + go readroutine(orator) + return orator +} + +// type AudioStream struct { +// TextChan chan string // Send text chunks here +// DoneChan chan bool // Close when streaming ends +// } + +// func RunOrator(orator Orator) *AudioStream { +// stream := &AudioStream{ +// TextChan: make(chan string, 1000), +// DoneChan: make(chan bool), +// } +// go func() { +// tokenizer, _ := english.NewSentenceTokenizer(nil) +// var sentenceBuf bytes.Buffer +// for { +// select { +// case chunk := <-stream.TextChan: +// sentenceBuf.WriteString(chunk) +// text := sentenceBuf.String() +// sentences := tokenizer.Tokenize(text) +// for i, sentence := range sentences { +// if i == len(sentences)-1 { +// sentenceBuf.Reset() +// sentenceBuf.WriteString(sentence.Text) +// continue +// } +// // Send complete sentence to TTS +// if err := orator.Speak(sentence.Text); err != nil { +// orator.GetLogger().Error("tts failed", "sentence", sentence.Text, "error", err) +// } +// } +// case <-stream.DoneChan: +// // Flush remaining text +// if remaining := sentenceBuf.String(); remaining != "" { +// if err := orator.Speak(remaining); err != nil { +// orator.GetLogger().Error("tts failed", "sentence", remaining, "error", err) +// } +// } +// return +// } +// } +// }() +// return stream +// } + +func (o *KokoroOrator) GetLogger() *slog.Logger { + return o.logger +} + +func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) { + payload := map[string]interface{}{ + "input": text, + "voice": "af_bella(1)+af_sky(1)", + "response_format": "mp3", + "download_format": "mp3", + "stream": o.Stream, + "speed": o.Speed, + "return_download_link": true, + "lang_code": o.Language, + } + payloadBytes, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("failed to marshal payload: %w", err) + } + req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("accept", "application/json") + req.Header.Set("Content-Type", "application/json") + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + if resp.StatusCode != http.StatusOK { + defer resp.Body.Close() + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + return resp.Body, nil +} + +func (o *KokoroOrator) Speak(text string) error { + body, err := o.requestSound(text) + if err != nil { + o.logger.Error("request failed", "error", err) + return fmt.Errorf("request failed: %w", err) + } + defer body.Close() + // Decode the mp3 audio from response body + streamer, format, err := mp3.Decode(body) + if err != nil { + o.logger.Error("mp3 decode failed", "error", err) + return fmt.Errorf("mp3 decode failed: %w", err) + } + defer streamer.Close() + speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)) + done := make(chan bool) + speaker.Play(beep.Seq(streamer, beep.Callback(func() { + close(done) + }))) + <-done + return nil +} @@ -7,6 +7,7 @@ require ( github.com/asg017/sqlite-vec-go-bindings v0.1.6 github.com/gdamore/tcell/v2 v2.7.4 github.com/glebarez/go-sqlite v1.22.0 + github.com/gopxl/beep v1.4.1 github.com/jmoiron/sqlx v1.4.0 github.com/ncruces/go-sqlite3 v0.21.3 github.com/neurosnap/sentences v1.1.2 @@ -15,12 +16,16 @@ require ( require ( github.com/dustin/go-humanize v1.0.1 // indirect + github.com/ebitengine/oto/v3 v3.1.0 // indirect + github.com/ebitengine/purego v0.7.1 // indirect github.com/gdamore/encoding v1.0.0 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/hajimehoshi/go-mp3 v0.3.4 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect github.com/ncruces/julianday v1.0.0 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/tetratelabs/wazero v1.8.2 // indirect @@ -4,8 +4,14 @@ github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0 github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/asg017/sqlite-vec-go-bindings v0.1.6 h1:Nx0jAzyS38XpkKznJ9xQjFXz2X9tI7KqjwVxV8RNoww= github.com/asg017/sqlite-vec-go-bindings v0.1.6/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/ebitengine/oto/v3 v3.1.0 h1:9tChG6rizyeR2w3vsygTTTVVJ9QMMyu00m2yBOCch6U= +github.com/ebitengine/oto/v3 v3.1.0/go.mod h1:IK1QTnlfZK2GIB6ziyECm433hAdTaPpOsGMLhEyEGTg= +github.com/ebitengine/purego v0.7.1 h1:6/55d26lG3o9VCZX8lping+bZcmShseiqlh2bnUDiPA= +github.com/ebitengine/purego v0.7.1/go.mod h1:ah1In8AOtksoNK6yk5z1HTJeUkC1Ez4Wk2idgGslMwQ= github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko= github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg= github.com/gdamore/tcell/v2 v2.7.4 h1:sg6/UnTM9jGpZU+oFYAsDahfchWAFW8Xx2yFinNSAYU= @@ -18,6 +24,11 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gopxl/beep v1.4.1 h1:WqNs9RsDAhG9M3khMyc1FaVY50dTdxG/6S6a3qsUHqE= +github.com/gopxl/beep v1.4.1/go.mod h1:A1dmiUkuY8kxsvcNJNUBIEcchmiP6eUyCHSxpXl0YO0= +github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68= +github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo= +github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo= github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= @@ -36,6 +47,10 @@ github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g= github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw= github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/tview v0.0.0-20241103174730-c76f7879f592 h1:YIJ+B1hePP6AgynC5TcqpO0H9k3SSoZa2BGyL6vDUzM= @@ -44,6 +59,8 @@ github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tetratelabs/wazero v1.8.2 h1:yIgLR/b2bN31bjxwXHD8a3d+BogigR952csSDdLYEv4= github.com/tetratelabs/wazero v1.8.2/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -62,6 +79,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -85,6 +103,8 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= modernc.org/libc v1.37.6 h1:orZH3c5wmhIQFTXF+Nt+eeauyd+ZIt2BX6ARe+kD+aw= modernc.org/libc v1.37.6/go.mod h1:YAXkAZ8ktnkCKaN9sw/UDeUVkGYJ/YquGO4FTi5nmHE= modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= diff --git a/models/extra.go b/models/extra.go new file mode 100644 index 0000000..4e3a0bf --- /dev/null +++ b/models/extra.go @@ -0,0 +1,8 @@ +package models + +type AudioFormat string + +const ( + AFOPUS AudioFormat = "opus" + AFMP3 AudioFormat = "mp3" +) |