diff options
-rw-r--r-- | bot.go | 1 | ||||
-rw-r--r-- | config.example.toml | 4 | ||||
-rw-r--r-- | config/config.go | 8 | ||||
-rw-r--r-- | llm.go | 2 | ||||
-rw-r--r-- | rag/main.go | 20 | ||||
-rw-r--r-- | tables.go | 12 | ||||
-rw-r--r-- | tui.go | 2 |
7 files changed, 27 insertions, 22 deletions
@@ -193,7 +193,6 @@ func chatRound(userMsg, role string, tv *tview.TextView, regen, resume bool) { } go sendMsgToLLM(reader) logger.Debug("looking at vars in chatRound", "msg", userMsg, "regen", regen, "resume", resume) - // TODO: consider case where user msg is regened (not assistant one) if !resume { fmt.Fprintf(tv, "[-:-:b](%d) ", len(chatBody.Messages)) fmt.Fprint(tv, roleToIcon(cfg.AssistantRole)) diff --git a/config.example.toml b/config.example.toml index 80e3640..ded1475 100644 --- a/config.example.toml +++ b/config.example.toml @@ -8,3 +8,7 @@ ToolRole = "tool" AssistantRole = "assistant" SysDir = "sysprompts" ChunkLimit = 100000 +# rag settings +RAGBatchSize = 100 +RAGWordLimit = 80 +RAGWorkers = 5 diff --git a/config/config.go b/config/config.go index f26a82e..63495b5 100644 --- a/config/config.go +++ b/config/config.go @@ -26,6 +26,10 @@ type Config struct { EmbedURL string `toml:"EmbedURL"` HFToken string `toml:"HFToken"` RAGDir string `toml:"RAGDir"` + // rag settings + RAGWorkers uint32 `toml:"RAGWorkers"` + RAGBatchSize int `toml:"RAGBatchSize"` + RAGWordLimit uint32 `toml:"RAGWordLimit"` } func LoadConfigOrDefault(fn string) *Config { @@ -47,6 +51,10 @@ func LoadConfigOrDefault(fn string) *Config { config.AssistantRole = "assistant" config.SysDir = "sysprompts" config.ChunkLimit = 8192 + // + config.RAGBatchSize = 100 + config.RAGWordLimit = 80 + config.RAGWorkers = 5 } config.CurrentAPI = config.ChatAPI config.APIMap = map[string]string{ @@ -53,12 +53,10 @@ func (lcp LlamaCPPeer) FormMsg(msg, role string, resume bool) (io.Reader, error) } prompt := strings.Join(messages, "\n") // strings builder? - // if cfg.ToolUse && msg != "" && !resume { if !resume { botMsgStart := "\n" + cfg.AssistantRole + ":\n" prompt += botMsgStart } - // if cfg.ThinkUse && msg != "" && !cfg.ToolUse { if cfg.ThinkUse && !cfg.ToolUse { prompt += "<think>" } diff --git a/rag/main.go b/rag/main.go index 5f2aa00..5e53839 100644 --- a/rag/main.go +++ b/rag/main.go @@ -61,16 +61,10 @@ func (r *RAG) LoadRAG(fpath string) error { for i, s := range sentences { sents[i] = s.Text } - // TODO: maybe better to decide batch size based on sentences len var ( - // TODO: to config - workers = 5 - batchSize = 100 maxChSize = 1000 - // - wordLimit = 80 left = 0 - right = batchSize + right = r.cfg.RAGBatchSize batchCh = make(chan map[int][]string, maxChSize) vectorCh = make(chan []models.VectorRow, maxChSize) errCh = make(chan error, 1) @@ -85,29 +79,29 @@ func (r *RAG) LoadRAG(fpath string) error { par := strings.Builder{} for i := 0; i < len(sents); i++ { par.WriteString(sents[i]) - if wordCounter(par.String()) > wordLimit { + if wordCounter(par.String()) > int(r.cfg.RAGWordLimit) { paragraphs = append(paragraphs, par.String()) par.Reset() } } - if len(paragraphs) < batchSize { - batchSize = len(paragraphs) + if len(paragraphs) < int(r.cfg.RAGBatchSize) { + r.cfg.RAGBatchSize = len(paragraphs) } // fill input channel ctn := 0 for { - if right > len(paragraphs) { + if int(right) > len(paragraphs) { batchCh <- map[int][]string{left: paragraphs[left:]} break } batchCh <- map[int][]string{left: paragraphs[left:right]} - left, right = right, right+batchSize + left, right = right, right+r.cfg.RAGBatchSize ctn++ } finishedBatchesMsg := fmt.Sprintf("finished batching batches#: %d; paragraphs: %d; sentences: %d\n", len(batchCh), len(paragraphs), len(sents)) r.logger.Debug(finishedBatchesMsg) LongJobStatusCh <- finishedBatchesMsg - for w := 0; w < workers; w++ { + for w := 0; w < int(r.cfg.RAGWorkers); w++ { go r.batchToVectorHFAsync(lock, w, batchCh, vectorCh, errCh, doneCh, path.Base(fpath)) } // wait for emb to be done @@ -114,12 +114,12 @@ func makeChatTable(chatMap map[string]models.Chat) *tview.Table { } return } - if chatBody.Messages[0].Role != "system" || chatBody.Messages[1].Role != agentName { - if err := notifyUser("error", "unexpected chat structure; card: "+agentName); err != nil { - logger.Warn("failed ot notify", "error", err) - } - return - } + // if chatBody.Messages[0].Role != "system" || chatBody.Messages[1].Role != agentName { + // if err := notifyUser("error", "unexpected chat structure; card: "+agentName); err != nil { + // logger.Warn("failed ot notify", "error", err) + // } + // return + // } // change sys_prompt + first msg cc.SysPrompt = chatBody.Messages[0].Content cc.FirstMsg = chatBody.Messages[1].Content @@ -450,6 +450,8 @@ func init() { if event.Key() == tcell.KeyF2 { // regen last msg chatBody.Messages = chatBody.Messages[:len(chatBody.Messages)-1] + // there is no case where user msg is regenerated + // lastRole := chatBody.Messages[len(chatBody.Messages)-1].Role textView.SetText(chatToText(cfg.ShowSys)) go chatRound("", cfg.UserRole, textView, true, false) return nil |