Feat: switch between completion and chat api

author: Grail Finder <wohilas@gmail.com> 2025-02-01 16:32:36 +0300
committer: Grail Finder <wohilas@gmail.com> 2025-02-01 16:32:36 +0300
commit: 84c94ecea34753f246bdfd51f6ff989281e873e3 (patch)
tree: 3e4330ed00811cd46299968af69e6f3671316aa6
parent: 336451340b86ba1f713b47d44225df61058f5a8f (diff)
8 files changed, 81 insertions, 24 deletions
diff --git a/README.md b/README.md
index 8e9db0f..164b1c8 100644
--- a/README.md
+++ b/README.md
@@ -27,22 +27,21 @@
 - RAG support|implementation; +
 - delete chat option; +
 - RAG file loading status/progress; +
-- fullscreen textarea option (bothersome to implement);
-- separate messages that are stored and chat and send to the bot, i.e. option to omit tool calls (there might be a point where they are no longer needed in ctx);
-- char card is the sys message, but how about giving tools to char that does not have it?
-- it is a bit clumsy to mix chats in db and chars from the external files, maybe load external files in db on startup?
-- lets say we have two (or more) agents with the same name across multiple chats. These agents go and ask db for topics they memorised. Now they can access topics that aren't meant for them. (so memory should have an option: shareable; that indicates if that memory can be shared across chats);
-- server mode: no tui but api calls with the func calling, rag, other middleware;
-- boolean flag to use/not use tools. I see it as a msg from a tool to an llm "Hey, it might be good idea to use me!";
-- connection to a model status;
-- ===== /llamacpp specific (it has a different body -> interface instead of global var)
+- in chat management table add preview of the last message; +
+===== /llamacpp specific (it has a different body -> interface instead of global var)
 - edit syscards; +
 - consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues; +
 - change temp, min-p and other params from tui; +
 - DRY; +
-- keybind to switch between openai and llamacpp endpoints (chat vs completion);
+- keybind to switch between openai and llamacpp endpoints (chat vs completion); +
+=======
+- char card is the sys message, but how about giving tools to char that does not have it?
+- lets say we have two (or more) agents with the same name across multiple chats. These agents go and ask db for topics they memorised. Now they can access topics that aren't meant for them. (so memory should have an option: shareable; that indicates if that memory can be shared across chats);
+- server mode: no tui but api calls with the func calling, rag, other middleware;
+- boolean flag to use/not use tools. I see it as a msg from a tool to an llm "Hey, it might be good idea to use me!";
+- connection to a model status; (need to be tied to some event, perhaps its own shortcut even)
+- separate messages that are stored and chat and send to the bot, i.e. option to omit tool calls and thinking (there might be a point where they are no longer needed in ctx);
 - option to remove <thinking> from chat history;
-- in chat management table add preview of the last message; +
 
 ### FIX:
 - bot responding (or hanging) blocks everything; +
@@ -61,7 +60,6 @@
 - all page names should be vars; +
 - normal case regen omits assistant icon; +
 - user icon (and role?) from config is not used; +
-- message editing broke ( runtime error: index out of range [-1]); +
 - RAG: encode multiple sentences (~5-10) to embeddings a piece. +
 - number of sentences in a batch should depend on number of words there. +
 - F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); +
@@ -69,3 +67,5 @@
 - name split for llamacpp completion. user msg should end with 'bot_name:'; +
 - add retry on failed call (and EOF);
 - model info shold be an event and show disconnect status when fails;
+- message editing broke ( runtime error: index out of range [-1]); out of index
+- remove icons for agents/user; use only <role>:
diff --git a/bot.go b/bot.go
index c256f58..e60c39f 100644
--- a/bot.go
+++ b/bot.go
@@ -91,7 +91,7 @@ func fetchModelName() {
 // func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) {
 func sendMsgToLLM(body io.Reader) {
 	// nolint
-	resp, err := httpClient.Post(cfg.APIURL, "application/json", body)
+	resp, err := httpClient.Post(cfg.CurrentAPI, "application/json", body)
 	if err != nil {
 		logger.Error("llamacpp api", "error", err)
 		streamDone <- true
@@ -128,7 +128,7 @@ func sendMsgToLLM(body io.Reader) {
 		line = line[6:]
 		content, stop, err := chunkParser.ParseChunk(line)
 		if err != nil {
-			logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.APIURL)
+			logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.CurrentAPI)
 			streamDone <- true
 			break
 		}
diff --git a/config.example.toml b/config.example.toml
index d0a9841..6512a37 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -1,4 +1,5 @@
-APIURL = "http://localhost:8080/v1/chat/completions"
+ChatAPI = "http://localhost:8080/v1/chat/completions"
+CompletionAPI = "http://localhost:8080/completion"
 EmbedURL = "http://localhost:8080/v1/embeddings"
 ShowSys = true
 LogFile = "log.txt"
diff --git a/config/config.go b/config/config.go
index eb5f3f1..2e48f34 100644
--- a/config/config.go
+++ b/config/config.go
@@ -7,7 +7,11 @@ import (
 )
 
 type Config struct {
-	APIURL        string `toml:"APIURL"`
+	ChatAPI       string `toml:"ChatAPI"`
+	CompletionAPI string `toml:"CompletionAPI"`
+	CurrentAPI    string
+	APIMap        map[string]string
+	//
 	ShowSys       bool   `toml:"ShowSys"`
 	LogFile       string `toml:"LogFile"`
 	UserRole      string `toml:"UserRole"`
@@ -34,7 +38,8 @@ func LoadConfigOrDefault(fn string) *Config {
 	_, err := toml.DecodeFile(fn, &config)
 	if err != nil {
 		fmt.Println("failed to read config from file, loading default")
-		config.APIURL = "http://localhost:8080/v1/chat/completions"
+		config.ChatAPI = "http://localhost:8080/v1/chat/completions"
+		config.CompletionAPI = "http://localhost:8080/completion"
 		config.RAGEnabled = false
 		config.EmbedURL = "http://localhost:8080/v1/embiddings"
 		config.ShowSys = true
@@ -48,6 +53,16 @@ func LoadConfigOrDefault(fn string) *Config {
 		config.SysDir = "sysprompts"
 		config.ChunkLimit = 8192
 	}
+	config.CurrentAPI = config.ChatAPI
+	config.APIMap = map[string]string{
+		config.ChatAPI: config.CompletionAPI,
+	}
+	if config.CompletionAPI != "" {
+		config.CurrentAPI = config.CompletionAPI
+		config.APIMap = map[string]string{
+			config.CompletionAPI: config.ChatAPI,
+		}
+	}
 	// if any value is empty fill with default
 	return config
 }
diff --git a/llm.go b/llm.go
index d847da1..be7f418 100644
--- a/llm.go
+++ b/llm.go
@@ -15,7 +15,7 @@ type ChunkParser interface {
 
 func initChunkParser() {
 	chunkParser = LlamaCPPeer{}
-	if strings.Contains(cfg.APIURL, "v1") {
+	if strings.Contains(cfg.CurrentAPI, "v1") {
 		logger.Info("chosen openai parser")
 		chunkParser = OpenAIer{}
 		return
diff --git a/main.go b/main.go
index f3105e5..64f11d3 100644
--- a/main.go
+++ b/main.go
@@ -10,7 +10,7 @@ var (
 	botRespMode   = false
 	editMode      = false
 	selectedIndex = int(-1)
-	indexLine     = "F12 to show keys help; bot resp mode: %v; char: %s; chat: %s; RAGEnabled: %v; toolUseAdviced: %v; model: %s"
+	indexLine     = "F12 to show keys help; bot resp mode: %v; char: %s; chat: %s; RAGEnabled: %v; toolUseAdviced: %v; model: %s\nAPI_URL: %s"
 	focusSwitcher = map[tview.Primitive]tview.Primitive{}
 )
 
diff --git a/server.go b/server.go
new file mode 100644
index 0000000..2e25559
--- /dev/null
+++ b/server.go
@@ -0,0 +1,27 @@
+package main
+
+import (
+	"fmt"
+	"net/http"
+)
+
+// create server
+// listen to the completion endpoint handler
+
+func completion(w http.ResponseWriter, req *http.Request) {
+	// post request
+	body := req.Body
+	// get body as io.reader
+	// pass it to the /completion
+	go sendMsgToLLM(body)
+out:
+	for {
+		select {
+		case chunk := <-chunkChan:
+			fmt.Println(chunk)
+		case <-streamDone:
+			break out
+		}
+	}
+	return
+}
diff --git a/tui.go b/tui.go
index 53d1c10..4bb3f08 100644
--- a/tui.go
+++ b/tui.go
@@ -3,7 +3,6 @@ package main
 import (
 	"elefant/models"
 	"elefant/pngmeta"
-	"elefant/rag"
 	"fmt"
 	"os"
 	"strconv"
@@ -41,7 +40,7 @@ var (
 	// help text
 	helpText = `
 [yellow]Esc[white]: send msg
-[yellow]PgUp/Down[white]: switch focus
+[yellow]PgUp/Down[white]: switch focus between input and chat widgets
 [yellow]F1[white]: manage chats
 [yellow]F2[white]: regen last
 [yellow]F3[white]: delete last msg
@@ -50,13 +49,16 @@ var (
 [yellow]F6[white]: interrupt bot resp
 [yellow]F7[white]: copy last msg to clipboard (linux xclip)
 [yellow]F8[white]: copy n msg to clipboard (linux xclip)
-[yellow]F10[white]: manage loaded rag files
+[yellow]F10[white]: manage loaded rag files (that already in vector db)
 [yellow]F11[white]: switch RAGEnabled boolean
 [yellow]F12[white]: show this help page
 [yellow]Ctrl+s[white]: load new char/agent
 [yellow]Ctrl+e[white]: export chat to json file
 [yellow]Ctrl+n[white]: start a new chat
 [yellow]Ctrl+c[white]: close programm
+[yellow]Ctrl+p[white]: props edit form (min-p, dry, etc.)
+[yellow]Ctrl+v[white]: switch between /completion and /chat api (if provided in config)
+[yellow]Ctrl+r[white]: menu of files that can be loaded in vector db (RAG)
 
 Press Enter to go back
 `
@@ -87,7 +89,7 @@ func colorText() {
 }
 
 func updateStatusLine() {
-	position.SetText(fmt.Sprintf(indexLine, botRespMode, cfg.AssistantRole, activeChatName, cfg.RAGEnabled, cfg.ToolUse, currentModel))
+	position.SetText(fmt.Sprintf(indexLine, botRespMode, cfg.AssistantRole, activeChatName, cfg.RAGEnabled, cfg.ToolUse, currentModel, cfg.CurrentAPI))
 }
 
 func initSysCards() ([]string, error) {
@@ -473,6 +475,19 @@ func init() {
 			startNewChat()
 			return nil
 		}
+		if event.Key() == tcell.KeyCtrlV {
+			// switch between /chat and /completion api
+			prevAPI := cfg.CurrentAPI
+			newAPI := cfg.APIMap[cfg.CurrentAPI]
+			if newAPI == "" {
+				// do not switch
+				return nil
+			}
+			cfg.APIMap[newAPI] = prevAPI
+			cfg.CurrentAPI = newAPI
+			updateStatusLine()
+			return nil
+		}
 		if event.Key() == tcell.KeyCtrlS {
 			// switch sys prompt
 			labels, err := initSysCards()
@@ -505,7 +520,6 @@ func init() {
 				}
 				fileList = append(fileList, f.Name())
 			}
-			rag.LongJobStatusCh <- "first msg"
 			chatRAGTable := makeRAGTable(fileList)
 			pages.AddPage(RAGPage, chatRAGTable, true, true)
 			return nil
author	Grail Finder <wohilas@gmail.com>	2025-02-01 16:32:36 +0300
committer	Grail Finder <wohilas@gmail.com>	2025-02-01 16:32:36 +0300
commit	84c94ecea34753f246bdfd51f6ff989281e873e3 (patch)
tree	3e4330ed00811cd46299968af69e6f3671316aa6
parent	336451340b86ba1f713b47d44225df61058f5a8f (diff)