1 files changed, 1621 insertions, 144 deletions
diff --git a/bot.go b/bot.go
index 66303a2..384563e 100644
--- a/bot.go
+++ b/bot.go
@@ -3,250 +3,1727 @@ package main
 import (
 	"bufio"
 	"bytes"
-	"elefant/models"
-	"elefant/storage"
+	"compress/gzip"
+	"context"
 	"encoding/json"
 	"fmt"
+	"gf-lt/config"
+	"gf-lt/models"
+	"gf-lt/rag"
+	"gf-lt/storage"
+	"gf-lt/tools"
+	"html"
 	"io"
 	"log/slog"
+	"net"
 	"net/http"
 	"os"
+	"regexp"
+	"slices"
+	"strconv"
 	"strings"
+	"sync"
+	"sync/atomic"
 	"time"
 
 	"github.com/rivo/tview"
 )
 
-var httpClient = http.Client{
-	Timeout: time.Second * 20,
+var (
+	httpClient     = &http.Client{}
+	cfg            *config.Config
+	logger         *slog.Logger
+	logLevel       = new(slog.LevelVar)
+	ctx, cancel    = context.WithCancel(context.Background())
+	activeChatName string
+	chatRoundChan  = make(chan *models.ChatRoundReq, 1)
+	chunkChan      = make(chan string, 10)
+	openAIToolChan = make(chan string, 10)
+	streamDone     = make(chan bool, 1)
+	chatBody       *models.ChatBody
+	store          storage.FullRepo
+	defaultStarter = []models.RoleMsg{}
+	interruptResp  atomic.Bool
+	ragger         *rag.RAG
+	chunkParser    ChunkParser
+	lastToolCall   *models.FuncCall
+	lastRespStats  *models.ResponseStats
+	outputHandler  OutputHandler
+	cliPrevOutput  string
+	cliRespDone    chan bool
+)
+
+type OutputHandler interface {
+	Write(p string)
+	Writef(format string, args ...interface{})
+	ScrollToEnd()
+}
+
+type TUIOutputHandler struct {
+	tv *tview.TextView
+}
+
+func (h *TUIOutputHandler) Write(p string) {
+	fmt.Fprint(h.tv, p)
+}
+
+func (h *TUIOutputHandler) Writef(format string, args ...interface{}) {
+	fmt.Fprintf(h.tv, format, args...)
+}
+
+func (h *TUIOutputHandler) ScrollToEnd() {
+	h.tv.ScrollToEnd()
+}
+
+type CLIOutputHandler struct{}
+
+func (h *CLIOutputHandler) Write(p string) {
+	fmt.Print(p)
+}
+
+func (h *CLIOutputHandler) Writef(format string, args ...interface{}) {
+	fmt.Printf(format, args...)
+}
+
+func (h *CLIOutputHandler) ScrollToEnd() {
 }
 
 var (
-	logger        *slog.Logger
-	APIURL        = "http://localhost:8080/v1/chat/completions"
-	DB            = map[string]map[string]any{}
-	userRole      = "user"
-	assistantRole = "assistant"
-	toolRole      = "tool"
-	assistantIcon = "<🤖>: "
-	userIcon      = "<user>: "
-	historyDir    = "./history/"
-	// TODO: pass as an cli arg
-	showSystemMsgs  bool
-	activeChatName  string
-	chunkChan       = make(chan string, 10)
-	streamDone      = make(chan bool, 1)
-	chatBody        *models.ChatBody
-	store           storage.ChatHistory
-	defaultFirstMsg = "Hello! What can I do for you?"
-	defaultStarter  = []models.MessagesStory{
-		{Role: "system", Content: systemMsg},
-		{Role: assistantRole, Content: defaultFirstMsg},
-	}
-	interruptResp = false
+	basicCard = &models.CharCard{
+		ID:        models.ComputeCardID("assistant", "basic_sys"),
+		SysPrompt: models.BasicSysMsg,
+		FirstMsg:  models.DefaultFirstMsg,
+		Role:      "assistant",
+		FilePath:  "basic_sys",
+	}
+	sysMap               = map[string]*models.CharCard{}
+	roleToID             = map[string]string{}
+	modelHasVision       bool
+	windowToolsAvailable bool
+	// tooler               *tools.Tools
+	//
+	orator          Orator
+	asr             STT
+	localModelsMu   sync.RWMutex
+	defaultLCPProps = map[string]float32{
+		"temperature":    0.8,
+		"dry_multiplier": 0.0,
+		"min_p":          0.05,
+		"n_predict":      -1.0,
+	}
+	ORFreeModels = []string{
+		"google/gemini-2.0-flash-exp:free",
+		"deepseek/deepseek-chat-v3-0324:free",
+		"mistralai/mistral-small-3.2-24b-instruct:free",
+		"qwen/qwen3-14b:free",
+		"google/gemma-3-27b-it:free",
+		"meta-llama/llama-3.3-70b-instruct:free",
+	}
+	LocalModels     = []string{}
+	localModelsData *models.LCPModels
+	orModelsData    *models.ORModels
 )
 
-// ====
+var thinkBlockRE = regexp.MustCompile(`(?s)<think>.*?</think>`)
 
-func getUserInput(userPrompt string) string {
-	fmt.Printf(userPrompt)
-	reader := bufio.NewReader(os.Stdin)
-	line, err := reader.ReadString('\n')
+// parseKnownToTag extracts known_to list from content using configured tag.
+// Returns cleaned content and list of character names.
+func parseKnownToTag(content string) []string {
+	if cfg == nil || !cfg.CharSpecificContextEnabled {
+		return nil
+	}
+	tag := cfg.CharSpecificContextTag
+	if tag == "" {
+		tag = "@"
+	}
+	// Pattern: tag + list + "@"
+	pattern := regexp.QuoteMeta(tag) + `(.*?)@`
+	re := regexp.MustCompile(pattern)
+	matches := re.FindAllStringSubmatch(content, -1)
+	if len(matches) == 0 {
+		return nil
+	}
+	// There may be multiple tags; we combine all.
+	var knownTo []string
+	for _, match := range matches {
+		if len(match) < 2 {
+			continue
+		}
+		// Remove the entire matched tag from content
+		list := strings.TrimSpace(match[1])
+		if list == "" {
+			continue
+		}
+		strings.SplitSeq(list, ",")
+		// parts := strings.Split(list, ",")
+		// for _, p := range parts {
+		for p := range strings.SplitSeq(list, ",") {
+			p = strings.TrimSpace(p)
+			if p != "" {
+				knownTo = append(knownTo, p)
+			}
+		}
+	}
+	// Also remove any leftover trailing "__" that might be orphaned? Not needed.
+	return knownTo
+}
+
+// processMessageTag processes a message for known_to tag and sets KnownTo field.
+// It also ensures the sender's role is included in KnownTo.
+// If KnownTo already set (e.g., from DB), preserves it unless new tag found.
+func processMessageTag(msg *models.RoleMsg) *models.RoleMsg {
+	if cfg == nil || !cfg.CharSpecificContextEnabled {
+		return msg
+	}
+	// If KnownTo already set, assume tag already processed (content cleaned).
+	// However, we still check for new tags (maybe added later).
+	knownTo := parseKnownToTag(msg.GetText())
+	// If tag found, replace KnownTo with new list (merge with existing?)
+	// For simplicity, if knownTo is not nil, replace.
+	if knownTo == nil {
+		return msg
+	}
+	msg.KnownTo = knownTo
+	if msg.Role == "" {
+		return msg
+	}
+	if !slices.Contains(msg.KnownTo, msg.Role) {
+		msg.KnownTo = append(msg.KnownTo, msg.Role)
+	}
+	return msg
+}
+
+// filterMessagesForCharacter returns messages visible to the specified character.
+// If CharSpecificContextEnabled is false, returns all messages.
+func filterMessagesForCharacter(messages []models.RoleMsg, character string) []models.RoleMsg {
+	if strings.Contains(cfg.CurrentAPI, "chat") {
+		return messages
+	}
+	if cfg == nil || !cfg.CharSpecificContextEnabled || character == "" {
+		return messages
+	}
+	if character == "system" { // system sees every message
+		return messages
+	}
+	filtered := make([]models.RoleMsg, 0, len(messages))
+	for i := range messages {
+		// If KnownTo is nil or empty, message is visible to all
+		// system msg cannot be filtered
+		if len(messages[i].KnownTo) == 0 || messages[i].Role == "system" {
+			filtered = append(filtered, messages[i])
+			continue
+		}
+		if slices.Contains(messages[i].KnownTo, character) {
+			// Check if character is in KnownTo lis
+			filtered = append(filtered, messages[i])
+		}
+	}
+	return filtered
+}
+
+func consolidateAssistantMessages(messages []models.RoleMsg) []models.RoleMsg {
+	if len(messages) == 0 {
+		return messages
+	}
+	result := make([]models.RoleMsg, 0, len(messages))
+	for i := range messages {
+		// Non-assistant messages are appended as-is
+		if messages[i].Role != cfg.AssistantRole {
+			result = append(result, messages[i])
+			continue
+		}
+		// Assistant message: start a new block or merge with the last one
+		if len(result) == 0 || result[len(result)-1].Role != cfg.AssistantRole {
+			// First assistant in a block: append a copy (avoid mutating input)
+			result = append(result, messages[i].Copy())
+			continue
+		}
+		// Merge with the last assistant message
+		last := &result[len(result)-1]
+		// If either message has structured content, unify to ContentParts
+		if last.IsContentParts() || messages[i].IsContentParts() {
+			// Convert last to ContentParts if needed, preserving ToolCallID
+			if !last.IsContentParts() {
+				toolCallID := last.ToolCallID
+				*last = models.NewMultimodalMsg(last.Role, []interface{}{
+					models.TextContentPart{Type: "text", Text: last.Content},
+				})
+				last.ToolCallID = toolCallID
+			}
+			// Add current message's content to last
+			if messages[i].IsContentParts() {
+				last.ContentParts = append(last.ContentParts, messages[i].GetContentParts()...)
+			} else if messages[i].Content != "" {
+				last.AddTextPart(messages[i].Content)
+			}
+		} else {
+			// Both simple strings: concatenate with newline
+			if last.Content != "" && messages[i].Content != "" {
+				last.Content += "\n" + messages[i].Content
+			} else if messages[i].Content != "" {
+				last.Content = messages[i].Content
+			}
+			// ToolCallID is already preserved in last
+		}
+	}
+	return result
+}
+
+// GetLogLevel returns the current log level as a string
+func GetLogLevel() string {
+	level := logLevel.Level()
+	switch level {
+	case slog.LevelDebug:
+		return "Debug"
+	case slog.LevelInfo:
+		return "Info"
+	case slog.LevelWarn:
+		return "Warn"
+	default:
+		// For any other values, return "Info" as default
+		return "Info"
+	}
+}
+
+func createClient(connectTimeout time.Duration) *http.Client {
+	// Custom transport with connection timeout
+	transport := &http.Transport{
+		DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
+			// Create a dialer with connection timeout
+			dialer := &net.Dialer{
+				Timeout:   connectTimeout,
+				KeepAlive: 30 * time.Second, // Optional
+			}
+			return dialer.DialContext(ctx, network, addr)
+		},
+		// Other transport settings (optional)
+		TLSHandshakeTimeout:   connectTimeout,
+		ResponseHeaderTimeout: connectTimeout,
+	}
+	// Client with no overall timeout (or set to streaming-safe duration)
+	return &http.Client{
+		Transport: transport,
+		Timeout:   0, // No overall timeout (for streaming)
+	}
+}
+
+func warmUpModel() {
+	if !isLocalLlamacpp() {
+		return
+	}
+	// Check if model is already loaded
+	loaded, err := isModelLoaded(chatBody.Model)
 	if err != nil {
-		panic(err) // think about it
+		logger.Debug("failed to check model status", "model", chatBody.Model, "error", err)
+		// Continue with warmup attempt anyway
+	}
+	if loaded {
+		showToast("model already loaded", "Model "+chatBody.Model+" is already loaded.")
+		return
 	}
-	return line
+	go func() {
+		var data []byte
+		var err error
+		switch {
+		case strings.HasSuffix(cfg.CurrentAPI, "/completion"):
+			// Old completion endpoint
+			req := models.NewLCPReq(".", chatBody.Model, nil, map[string]float32{
+				"temperature":    0.8,
+				"dry_multiplier": 0.0,
+				"min_p":          0.05,
+				"n_predict":      0,
+			}, []string{})
+			req.Stream = false
+			data, err = json.Marshal(req)
+		case strings.Contains(cfg.CurrentAPI, "/v1/chat/completions"):
+			// OpenAI-compatible chat endpoint
+			req := models.OpenAIReq{
+				ChatBody: &models.ChatBody{
+					Model: chatBody.Model,
+					Messages: []models.RoleMsg{
+						{Role: "system", Content: "."},
+					},
+					Stream: false,
+				},
+				Tools: nil,
+			}
+			data, err = json.Marshal(req)
+		default:
+			// Unknown local endpoint, skip
+			return
+		}
+		if err != nil {
+			logger.Debug("failed to marshal warmup request", "error", err)
+			return
+		}
+		resp, err := httpClient.Post(cfg.CurrentAPI, "application/json", bytes.NewReader(data))
+		if err != nil {
+			logger.Debug("warmup request failed", "error", err)
+			return
+		}
+		resp.Body.Close()
+		// Start monitoring for model load completion
+		monitorModelLoad(chatBody.Model)
+	}()
 }
 
-func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
-	if newMsg != "" { // otherwise let the bot continue
-		newMsg := models.MessagesStory{Role: role, Content: newMsg}
-		chatBody.Messages = append(chatBody.Messages, newMsg)
+// nolint
+func fetchDSBalance() *models.DSBalance {
+	url := "https://api.deepseek.com/user/balance"
+	method := "GET"
+	// nolint
+	req, err := http.NewRequest(method, url, nil)
+	if err != nil {
+		logger.Warn("failed to create request", "error", err)
+		return nil
 	}
-	data, err := json.Marshal(chatBody)
+	req.Header.Add("Accept", "application/json")
+	req.Header.Add("Authorization", "Bearer "+cfg.DeepSeekToken)
+	res, err := httpClient.Do(req)
 	if err != nil {
-		panic(err)
+		logger.Warn("failed to make request", "error", err)
+		return nil
+	}
+	defer res.Body.Close()
+	resp := models.DSBalance{}
+	if err := json.NewDecoder(res.Body).Decode(&resp); err != nil {
+		return nil
 	}
-	return bytes.NewReader(data)
+	return &resp
 }
 
-// func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) {
-func sendMsgToLLM(body io.Reader) (any, error) {
-	resp, err := httpClient.Post(APIURL, "application/json", body)
+func fetchORModels(free bool) ([]string, error) {
+	resp, err := http.Get("https://openrouter.ai/api/v1/models")
 	if err != nil {
-		logger.Error("llamacpp api", "error", err)
 		return nil, err
 	}
 	defer resp.Body.Close()
-	llmResp := []models.LLMRespChunk{}
-	// chunkChan <- assistantIcon
+	if resp.StatusCode != 200 {
+		err := fmt.Errorf("failed to fetch or models; status: %s", resp.Status)
+		return nil, err
+	}
+	data := &models.ORModels{}
+	if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
+		return nil, err
+	}
+	orModelsData = data
+	freeModels := data.ListModels(free)
+	return freeModels, nil
+}
+
+func fetchLCPModels() ([]string, error) {
+	resp, err := http.Get(cfg.FetchModelNameAPI)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		err := fmt.Errorf("failed to fetch or models; status: %s", resp.Status)
+		return nil, err
+	}
+	data := &models.LCPModels{}
+	if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
+		return nil, err
+	}
+	localModels := data.ListModels()
+	return localModels, nil
+}
+
+// fetchLCPModelsWithLoadStatus returns models with "(loaded)" indicator for loaded models
+func fetchLCPModelsWithLoadStatus() ([]string, error) {
+	modelList, err := fetchLCPModelsWithStatus()
+	if err != nil {
+		return nil, err
+	}
+	result := make([]string, 0, len(modelList.Data))
+	li := 0 // loaded index
+	for i, m := range modelList.Data {
+		modelName := m.ID
+		if m.Status.Value == "loaded" {
+			modelName = models.LoadedMark + modelName
+			li = i
+		}
+		result = append(result, modelName)
+	}
+	if li == 0 {
+		return result, nil // no loaded modelList
+	}
+	loadedModel := result[li]
+	result = append(result[:li], result[li+1:]...)
+	return slices.Concat([]string{loadedModel}, result), nil
+}
+
+// fetchLCPModelsWithStatus returns the full LCPModels struct including status information.
+func fetchLCPModelsWithStatus() (*models.LCPModels, error) {
+	resp, err := http.Get(cfg.FetchModelNameAPI)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		err := fmt.Errorf("failed to fetch llama.cpp models; status: %s", resp.Status)
+		return nil, err
+	}
+	data := &models.LCPModels{}
+	if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
+		return nil, err
+	}
+	localModelsData = data
+	return data, nil
+}
+
+// isModelLoaded checks if the given model ID is currently loaded in llama.cpp server.
+func isModelLoaded(modelID string) (bool, error) {
+	models, err := fetchLCPModelsWithStatus()
+	if err != nil {
+		return false, err
+	}
+	for _, m := range models.Data {
+		if m.ID == modelID {
+			return m.Status.Value == "loaded", nil
+		}
+	}
+	return false, nil
+}
+
+func ModelHasVision(api, modelID string) bool {
+	switch {
+	case strings.Contains(api, "deepseek"):
+		return false
+	case strings.Contains(api, "openrouter"):
+		resp, err := http.Get("https://openrouter.ai/api/v1/models")
+		if err != nil {
+			logger.Warn("failed to fetch OR models for vision check", "error", err)
+			return false
+		}
+		defer resp.Body.Close()
+		orm := &models.ORModels{}
+		if err := json.NewDecoder(resp.Body).Decode(orm); err != nil {
+			logger.Warn("failed to decode OR models for vision check", "error", err)
+			return false
+		}
+		return orm.HasVision(modelID)
+	default:
+		models, err := fetchLCPModelsWithStatus()
+		if err != nil {
+			logger.Warn("failed to fetch LCP models for vision check", "error", err)
+			return false
+		}
+		return models.HasVision(modelID)
+	}
+}
+
+func UpdateToolCapabilities() {
+	if !cfg.ToolUse {
+		return
+	}
+	modelHasVision = false
+	if cfg == nil || cfg.CurrentAPI == "" {
+		logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
+		// tooler.RegisterWindowTools(modelHasVision)
+		return
+	}
+	prevHasVision := modelHasVision
+	modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel)
+	if modelHasVision {
+		logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
+	} else {
+		logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
+		if windowToolsAvailable && !prevHasVision && !modelHasVision {
+			showToast("window tools", "Window capture-and-view unavailable: model lacks vision support")
+		}
+	}
+	// tooler.RegisterWindowTools(modelHasVision)
+}
+
+// monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded.
+func monitorModelLoad(modelID string) {
+	go func() {
+		timeout := time.After(2 * time.Minute) // max wait 2 minutes
+		ticker := time.NewTicker(2 * time.Second)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-timeout:
+				logger.Debug("model load monitoring timeout", "model", modelID)
+				return
+			case <-ticker.C:
+				loaded, err := isModelLoaded(modelID)
+				if err != nil {
+					logger.Debug("failed to check model status", "model", modelID, "error", err)
+					continue
+				}
+				if loaded {
+					showToast("model loaded", "Model "+modelID+" is now loaded and ready.")
+					refreshChatDisplay()
+					return
+				}
+			}
+		}
+	}()
+}
+
+// extractDetailedErrorFromBytes extracts detailed error information from response body bytes
+func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
+	// Try to decompress gzip if the response is compressed
+	if len(body) >= 2 && body[0] == 0x1f && body[1] == 0x8b {
+		reader, err := gzip.NewReader(bytes.NewReader(body))
+		if err == nil {
+			decompressed, err := io.ReadAll(reader)
+			reader.Close()
+			if err == nil {
+				body = decompressed
+			}
+		}
+	}
+	// Try to parse as JSON to extract detailed error information
+	var errorResponse map[string]any
+	if err := json.Unmarshal(body, &errorResponse); err == nil {
+		// Check if it's an error response with detailed information
+		if errorData, ok := errorResponse["error"]; ok {
+			if errorMap, ok := errorData.(map[string]any); ok {
+				var errorMsg string
+				if msg, ok := errorMap["message"]; ok {
+					errorMsg = fmt.Sprintf("%v", msg)
+				}
+				var details []string
+				if code, ok := errorMap["code"]; ok {
+					details = append(details, fmt.Sprintf("Code: %v", code))
+				}
+				if metadata, ok := errorMap["metadata"]; ok {
+					// Handle metadata which might contain raw error details
+					if metadataMap, ok := metadata.(map[string]any); ok {
+						if raw, ok := metadataMap["raw"]; ok {
+							// Parse the raw error string if it's JSON
+							var rawError map[string]any
+							if rawStr, ok := raw.(string); ok && json.Unmarshal([]byte(rawStr), &rawError) == nil {
+								if rawErrorData, ok := rawError["error"]; ok {
+									if rawErrorMap, ok := rawErrorData.(map[string]any); ok {
+										if rawMsg, ok := rawErrorMap["message"]; ok {
+											return fmt.Sprintf("API Error: %s", rawMsg)
+										}
+									}
+								}
+							}
+						}
+					}
+					details = append(details, fmt.Sprintf("Metadata: %v", metadata))
+				}
+				if len(details) > 0 {
+					return fmt.Sprintf("API Error: %s (%s)", errorMsg, strings.Join(details, ", "))
+				}
+				return "API Error: " + errorMsg
+			}
+		}
+	}
+	// If not a structured error response, return the raw body with status
+	return fmt.Sprintf("HTTP Status: %d, Response Body: %s", statusCode, string(body))
+}
+
+func finalizeRespStats(tokenCount int, startTime time.Time) {
+	duration := time.Since(startTime).Seconds()
+	var tps float64
+	if duration > 0 {
+		tps = float64(tokenCount) / duration
+	}
+	lastRespStats = &models.ResponseStats{
+		Tokens:       tokenCount,
+		Duration:     duration,
+		TokensPerSec: tps,
+	}
+}
+
+// sendMsgToLLM expects streaming resp
+func sendMsgToLLM(body io.Reader) {
+	choseChunkParser()
+	// openrouter does not respect stop strings, so we have to cut the message ourselves
+	stopStrings := chatBody.MakeStopSliceExcluding("", listChatRoles())
+	req, err := http.NewRequest("POST", cfg.CurrentAPI, body)
+	if err != nil {
+		logger.Error("newreq error", "error", err)
+		showToast("error", "apicall failed:"+err.Error())
+		streamDone <- true
+		return
+	}
+	req.Header.Add("Accept", "application/json")
+	req.Header.Add("Content-Type", "application/json")
+	req.Header.Add("Authorization", "Bearer "+chunkParser.GetToken())
+	req.Header.Set("Accept-Encoding", "gzip")
+	// nolint
+	resp, err := httpClient.Do(req)
+	if err != nil {
+		logger.Error("llamacpp api", "error", err)
+		showToast("error", "apicall failed:"+err.Error())
+		streamDone <- true
+		return
+	}
+	// Check if the initial response is an error before starting to stream
+	if resp.StatusCode >= 400 {
+		// Read the response body to get detailed error information
+		bodyBytes, err := io.ReadAll(resp.Body)
+		if err != nil {
+			logger.Error("failed to read error response body", "error", err, "status_code", resp.StatusCode)
+			detailedError := fmt.Sprintf("HTTP Status: %d, Failed to read response body: %v", resp.StatusCode, err)
+			showToast("API Error", detailedError)
+			resp.Body.Close()
+			streamDone <- true
+			return
+		}
+		// Parse the error response for detailed information
+		detailedError := extractDetailedErrorFromBytes(bodyBytes, resp.StatusCode)
+		logger.Error("API returned error status", "status_code", resp.StatusCode, "detailed_error", detailedError)
+		showToast("API Error", detailedError)
+		resp.Body.Close()
+		streamDone <- true
+		return
+	}
+	//
+	defer resp.Body.Close()
 	reader := bufio.NewReader(resp.Body)
-	counter := 0
+	counter := uint32(0)
+	tokenCount := 0
+	startTime := time.Now()
+	hasReasoning := false
+	reasoningSent := false
+	defer func() {
+		finalizeRespStats(tokenCount, startTime)
+	}()
 	for {
-		if interruptResp {
-			interruptResp = false
-			logger.Info("interrupted bot response")
-			break
-		}
-		llmchunk := models.LLMRespChunk{}
-		if counter > 2000 {
+		var (
+			answerText string
+			chunk      *models.TextChunk
+		)
+		counter++
+		// to stop from spiriling in infinity read of bad bytes that happens with poor connection
+		if cfg.ChunkLimit > 0 && counter > cfg.ChunkLimit {
+			logger.Warn("response hit chunk limit", "limit", cfg.ChunkLimit)
 			streamDone <- true
 			break
 		}
 		line, err := reader.ReadBytes('\n')
 		if err != nil {
+			// Check if this is an EOF error and if the response contains detailed error information
+			if err == io.EOF {
+				// For streaming responses, we may have already consumed the error body
+				// So we'll use the original status code to provide context
+				detailedError := fmt.Sprintf("Streaming connection closed unexpectedly (Status: %d). This may indicate an API error. Check your API provider and model settings.", resp.StatusCode)
+				logger.Error("error reading response body", "error", err, "detailed_error", detailedError,
+					"status_code", resp.StatusCode, "user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI)
+				showToast("API Error", detailedError)
+			} else {
+				logger.Error("error reading response body", "error", err, "line", string(line),
+					"user_role", cfg.UserRole, "parser", chunkParser, "link", cfg.CurrentAPI)
+				// if err.Error() != "EOF" {
+				showToast("API error", err.Error())
+			}
 			streamDone <- true
-			panic(err)
+			break
+			// }
+			// continue
 		}
-		// logger.Info("linecheck", "line", string(line), "len", len(line), "counter", counter)
 		if len(line) <= 1 {
+			if interruptResp.Load() {
+				goto interrupt // get unstuck from bad connection
+			}
 			continue // skip \n
 		}
 		// starts with -> data:
 		line = line[6:]
-		if err := json.Unmarshal(line, &llmchunk); err != nil {
-			logger.Error("failed to decode", "error", err, "line", string(line))
+		logger.Debug("debugging resp", "line", string(line))
+		if bytes.Equal(line, []byte("[DONE]\n")) {
 			streamDone <- true
-			return nil, err
+			break
+		}
+		if bytes.Equal(line, []byte("ROUTER PROCESSING\n")) {
+			continue
 		}
-		llmResp = append(llmResp, llmchunk)
-		// logger.Info("streamview", "chunk", llmchunk)
-		// if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason != "chat.completion.chunk" {
-		if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
+		chunk, err = chunkParser.ParseChunk(line)
+		if err != nil {
+			logger.Error("error parsing response body", "error", err,
+				"line", string(line), "url", cfg.CurrentAPI)
+			showToast("LLM Response Error", "Failed to parse LLM response: "+err.Error())
 			streamDone <- true
-			// last chunk
 			break
 		}
-		counter++
+		// // problem: this catches any mention of the word 'error'
+		// Handle error messages in response content
+		// example needed, since llm could use the word error in the normal msg
+		// if string(line) != "" && strings.Contains(strings.ToLower(string(line)), "error") {
+		// 	logger.Error("API error response detected", "line", line, "url", cfg.CurrentAPI)
+		// 	streamDone <- true
+		// 	break
+		// }
+		if chunk.Finished {
+			// Close the thinking block if we were streaming reasoning and haven't closed it yet
+			if hasReasoning && !reasoningSent {
+				chunkChan <- "</think>"
+				tokenCount++
+			}
+			if chunk.Chunk != "" {
+				logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
+				answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
+				chunkChan <- answerText
+				tokenCount++
+			}
+			streamDone <- true
+			break
+		}
+		if counter == 0 {
+			chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
+		}
+		// Handle reasoning chunks - stream them immediately as they arrive
+		if chunk.Reasoning != "" && !reasoningSent {
+			if !hasReasoning {
+				// First reasoning chunk - send opening tag
+				chunkChan <- "<think>"
+				tokenCount++
+				hasReasoning = true
+			}
+			// Stream reasoning content immediately
+			answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n")
+			if answerText != "" {
+				chunkChan <- answerText
+				tokenCount++
+			}
+		}
+		// When we get content and have been streaming reasoning, close the thinking block
+		if chunk.Chunk != "" && hasReasoning && !reasoningSent {
+			// Close the thinking block before sending actual content
+			chunkChan <- "</think>"
+			tokenCount++
+			reasoningSent = true
+		}
 		// bot sends way too many \n
-		answerText := strings.ReplaceAll(llmchunk.Choices[0].Delta.Content, "\n\n", "\n")
-		chunkChan <- answerText
+		answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
+		// Accumulate text to check for stop strings that might span across chunks
+		// check if chunk is in stopstrings => stop
+		// this check is needed only for openrouter /v1/completion, since it does not respect stop slice
+		if chunkParser.GetAPIType() == models.APITypeCompletion &&
+			slices.Contains(stopStrings, answerText) {
+			logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
+			streamDone <- true
+			break
+		}
+		if answerText != "" {
+			chunkChan <- answerText
+			tokenCount++
+		}
+		openAIToolChan <- chunk.ToolChunk
+		if chunk.FuncName != "" {
+			lastToolCall.Name = chunk.FuncName
+			// Store the tool call ID for the response
+			lastToolCall.ID = chunk.ToolID
+		}
+	interrupt:
+		if interruptResp.Load() { // read bytes, so it would not get into beginning of the next req
+			logger.Info("interrupted bot response", "chunk_counter", counter)
+			streamDone <- true
+			break
+		}
+	}
+}
+
+func roleToIcon(role string) string {
+	return "<" + role + ">: "
+}
+
+func chatWatcher(ctx context.Context) {
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case chatRoundReq := <-chatRoundChan:
+			if err := chatRound(chatRoundReq); err != nil {
+				logger.Error("failed to chatRound", "err", err)
+			}
+		}
+	}
+}
+
+// inpired by https://github.com/rivo/tview/issues/225
+func showSpinner() {
+	if cfg.CLIMode {
+		showSpinnerCLI()
+		return
+	}
+	spinners := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}
+	var i int
+	botPersona := cfg.AssistantRole
+	if cfg.WriteNextMsgAsCompletionAgent != "" {
+		botPersona = cfg.WriteNextMsgAsCompletionAgent
+	}
+	for botRespMode.Load() || toolRunningMode.Load() {
+		time.Sleep(400 * time.Millisecond)
+		spin := i % len(spinners)
+		app.QueueUpdateDraw(func() {
+			switch {
+			case toolRunningMode.Load():
+				textArea.SetTitle(spinners[spin] + " tool")
+			case botRespMode.Load():
+				textArea.SetTitle(spinners[spin] + " " + botPersona + " (F6 to interrupt)")
+			default:
+				textArea.SetTitle(spinners[spin] + " input")
+			}
+		})
+		i++
 	}
-	return llmResp, nil
+	app.QueueUpdateDraw(func() {
+		textArea.SetTitle("input")
+	})
 }
 
-func chatRound(userMsg, role string, tv *tview.TextView) {
-	botRespMode = true
-	reader := formMsg(chatBody, userMsg, role)
+func showSpinnerCLI() {
+	for botRespMode.Load() || toolRunningMode.Load() {
+		time.Sleep(400 * time.Millisecond)
+	}
+}
+
+func chatRound(r *models.ChatRoundReq) error {
+	interruptResp.Store(false)
+	botRespMode.Store(true)
+	go showSpinner()
+	updateStatusLine()
+	botPersona := cfg.AssistantRole
+	if cfg.WriteNextMsgAsCompletionAgent != "" {
+		botPersona = cfg.WriteNextMsgAsCompletionAgent
+	}
+	defer func() {
+		botRespMode.Store(false)
+		ClearImageAttachment()
+	}()
+	// check that there is a model set to use if is not local
+	choseChunkParser()
+	reader, err := chunkParser.FormMsg(r.UserMsg, r.Role, r.Resume)
+	if reader == nil || err != nil {
+		logger.Error("empty reader from msgs", "role", r.Role, "error", err)
+		return err
+	}
+	if cfg.SkipLLMResp {
+		return nil
+	}
 	go sendMsgToLLM(reader)
-	fmt.Fprintf(tv, fmt.Sprintf("(%d) ", len(chatBody.Messages)))
-	fmt.Fprintf(tv, assistantIcon)
+	logger.Debug("looking at vars in chatRound", "msg", r.UserMsg, "regen", r.Regen, "resume", r.Resume)
+	msgIdx := len(chatBody.Messages)
+	if !r.Resume {
+		// Add empty message to chatBody immediately so it persists during Alt+T toggle
+		chatBody.Messages = append(chatBody.Messages, models.RoleMsg{
+			Role: botPersona, Content: "",
+		})
+		nl := "\n\n"
+		prevText := cliPrevOutput
+		if cfg.CLIMode {
+			if strings.HasSuffix(prevText, nl) {
+				nl = ""
+			} else if strings.HasSuffix(prevText, "\n") {
+				nl = "\n"
+			}
+		} else {
+			prevText = textView.GetText(true)
+			if strings.HasSuffix(prevText, nl) {
+				nl = ""
+			} else if strings.HasSuffix(prevText, "\n") {
+				nl = "\n"
+			}
+		}
+		outputHandler.Writef("%s[-:-:b](%d) %s[-:-:-]\n", nl, msgIdx, roleToIcon(botPersona))
+	} else {
+		msgIdx = len(chatBody.Messages) - 1
+	}
 	respText := strings.Builder{}
+	toolResp := strings.Builder{}
+	// Variables for handling thinking blocks during streaming
+	inThinkingBlock := false
+	thinkingBuffer := strings.Builder{}
+	justExitedThinkingCollapsed := false
 out:
 	for {
 		select {
 		case chunk := <-chunkChan:
-			// fmt.Printf(chunk)
-			fmt.Fprintf(tv, chunk)
+			// Handle thinking blocks during streaming
+			if strings.HasPrefix(chunk, "<think>") && !inThinkingBlock {
+				// Start of thinking block
+				inThinkingBlock = true
+				thinkingBuffer.Reset()
+				thinkingBuffer.WriteString(chunk)
+				if thinkingCollapsed {
+					// Show placeholder immediately when thinking starts in collapsed mode
+					outputHandler.Write("[yellow::i][thinking... (press Alt+T to expand)][-:-:-]")
+					if cfg.AutoScrollEnabled {
+						outputHandler.ScrollToEnd()
+					}
+					respText.WriteString(chunk)
+					continue
+				}
+			} else if inThinkingBlock {
+				thinkingBuffer.WriteString(chunk)
+				if strings.Contains(chunk, "</think>") {
+					// End of thinking block
+					inThinkingBlock = false
+					if thinkingCollapsed {
+						// Thinking already displayed as placeholder, just update respText
+						respText.WriteString(chunk)
+						justExitedThinkingCollapsed = true
+						if cfg.AutoScrollEnabled {
+							outputHandler.ScrollToEnd()
+						}
+						continue
+					}
+					// If not collapsed, fall through to normal display
+				} else if thinkingCollapsed {
+					// Still in thinking block and collapsed - just buffer, don't display
+					respText.WriteString(chunk)
+					continue
+				}
+				// If not collapsed, fall through to normal display
+			}
+			// Add spacing after collapsed thinking block before real response
+			if justExitedThinkingCollapsed {
+				chunk = "\n\n" + chunk
+				justExitedThinkingCollapsed = false
+			}
+			outputHandler.Write(chunk)
 			respText.WriteString(chunk)
-			tv.ScrollToEnd()
+			// Update the message in chatBody.Messages so it persists during Alt+T
+			if !r.Resume {
+				chatBody.Messages[msgIdx].Content += respText.String()
+			}
+			if cfg.AutoScrollEnabled {
+				outputHandler.ScrollToEnd()
+			}
+			// Send chunk to audio stream handler
+			if cfg.TTS_ENABLED {
+				TTSTextChan <- chunk
+			}
+		case toolChunk := <-openAIToolChan:
+			outputHandler.Write(toolChunk)
+			toolResp.WriteString(toolChunk)
+			if cfg.AutoScrollEnabled {
+				outputHandler.ScrollToEnd()
+			}
 		case <-streamDone:
+			for len(chunkChan) > 0 {
+				chunk := <-chunkChan
+				outputHandler.Write(chunk)
+				respText.WriteString(chunk)
+				if cfg.AutoScrollEnabled {
+					outputHandler.ScrollToEnd()
+				}
+				if cfg.TTS_ENABLED {
+					TTSTextChan <- chunk
+				}
+			}
+			if cfg.TTS_ENABLED {
+				TTSFlushChan <- true
+			}
 			break out
 		}
 	}
-	botRespMode = false
-	chatBody.Messages = append(chatBody.Messages, models.MessagesStory{
-		Role: assistantRole, Content: respText.String(),
-	})
-	// bot msg is done;
-	// now check it for func call
+	var msgStats *models.ResponseStats
+	if lastRespStats != nil {
+		msgStats = &models.ResponseStats{
+			Tokens:       lastRespStats.Tokens,
+			Duration:     lastRespStats.Duration,
+			TokensPerSec: lastRespStats.TokensPerSec,
+		}
+		lastRespStats = nil
+	}
+	botRespMode.Store(false)
+	if r.Resume {
+		chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
+		updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
+		processedMsg := processMessageTag(&updatedMsg)
+		chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
+		if msgStats != nil && chatBody.Messages[len(chatBody.Messages)-1].Role != cfg.ToolRole {
+			chatBody.Messages[len(chatBody.Messages)-1].Stats = msgStats
+		}
+	} else {
+		chatBody.Messages[msgIdx].Content = respText.String()
+		processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
+		chatBody.Messages[msgIdx] = *processedMsg
+		if msgStats != nil && chatBody.Messages[msgIdx].Role != cfg.ToolRole {
+			chatBody.Messages[msgIdx].Stats = msgStats
+		}
+		stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
+	}
+	cleanChatBody()
+	refreshChatDisplay()
+	updateStatusLine()
+	// bot msg is done; now check it for func call
 	// logChat(activeChatName, chatBody.Messages)
-	err := updateStorageChat(activeChatName, chatBody.Messages)
-	if err != nil {
+	if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
 		logger.Warn("failed to update storage", "error", err, "name", activeChatName)
 	}
-	findCall(respText.String(), tv)
+	// Strip think blocks before parsing for tool calls
+	respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "")
+	if interruptResp.Load() {
+		return nil
+	}
+	if findCall(respTextNoThink, toolResp.String()) {
+		// Tool was found and executed, subsequent chatRound will signal cliRespDone when complete
+		return nil
+	}
+	// No tool call - signal completion now
+	if cfg.CLIMode && cliRespDone != nil {
+		select {
+		case cliRespDone <- true:
+		default:
+		}
+	}
+	// Check if this message was sent privately to specific characters
+	// If so, trigger those characters to respond if that char is not controlled by user
+	// perhaps we should have narrator role to determine which char is next to act
+	if cfg.AutoTurn {
+		lastMsg := chatBody.Messages[len(chatBody.Messages)-1]
+		if len(lastMsg.KnownTo) > 0 {
+			triggerPrivateMessageResponses(&lastMsg)
+		}
+	}
+	return nil
 }
 
-func findCall(msg string, tv *tview.TextView) {
-	prefix := "__tool_call__\n"
-	suffix := "\n__tool_call__"
-	fc := models.FuncCall{}
-	if !strings.HasPrefix(msg, prefix) ||
-		!strings.HasSuffix(msg, suffix) {
+// cleanChatBody removes messages with null or empty content to prevent API issues
+func cleanChatBody() {
+	if chatBody == nil || chatBody.Messages == nil {
 		return
 	}
-	jsStr := strings.TrimSuffix(strings.TrimPrefix(msg, prefix), suffix)
-	if err := json.Unmarshal([]byte(jsStr), &fc); err != nil {
-		logger.Error("failed to unmarshal tool call", "error", err)
-		return
-		// panic(err)
+	// Tool request cleaning is now configurable via AutoCleanToolCallsFromCtx (default false)
+	// /completion msg where part meant for user and other part tool call
+	// chatBody.Messages = cleanToolCalls(chatBody.Messages)
+	chatBody.Messages = consolidateAssistantMessages(chatBody.Messages)
+}
+
+// convertJSONToMapStringString unmarshals JSON into map[string]interface{} and converts all values to strings.
+func convertJSONToMapStringString(jsonStr string) (map[string]string, error) {
+	// Extract JSON object from string - models may output extra text after JSON
+	jsonStr = extractJSON(jsonStr)
+	var raw map[string]interface{}
+	if err := json.Unmarshal([]byte(jsonStr), &raw); err != nil {
+		return nil, err
+	}
+	result := make(map[string]string, len(raw))
+	for k, v := range raw {
+		switch val := v.(type) {
+		case string:
+			result[k] = val
+		case float64:
+			result[k] = strconv.FormatFloat(val, 'f', -1, 64)
+		case int, int64, int32:
+			// json.Unmarshal converts numbers to float64, but handle other integer types if they appear
+			result[k] = fmt.Sprintf("%v", val)
+		case bool:
+			result[k] = strconv.FormatBool(val)
+		case nil:
+			result[k] = ""
+		default:
+			result[k] = fmt.Sprintf("%v", val)
+		}
+	}
+	return result, nil
+}
+
+// extractJSON finds the first { and last } to extract only the JSON object
+// This handles cases where models output extra text after JSON
+func extractJSON(s string) string {
+	// Try direct parse first - if it works, return as-is
+	var dummy map[string]interface{}
+	if err := json.Unmarshal([]byte(s), &dummy); err == nil {
+		return s
+	}
+	// Otherwise find JSON boundaries
+	start := strings.Index(s, "{")
+	end := strings.LastIndex(s, "}")
+	if start >= 0 && end > start {
+		return s[start : end+1]
+	}
+	return s
+}
+
+// unmarshalFuncCall unmarshals a JSON tool call, converting numeric arguments to strings.
+func unmarshalFuncCall(jsonStr string) (*models.FuncCall, error) {
+	type tempFuncCall struct {
+		ID   string                 `json:"id,omitempty"`
+		Name string                 `json:"name"`
+		Args map[string]interface{} `json:"args"`
+	}
+	var temp tempFuncCall
+	if err := json.Unmarshal([]byte(jsonStr), &temp); err != nil {
+		return nil, err
+	}
+	fc := &models.FuncCall{
+		ID:   temp.ID,
+		Name: temp.Name,
+		Args: make(map[string]string, len(temp.Args)),
+	}
+	for k, v := range temp.Args {
+		switch val := v.(type) {
+		case string:
+			fc.Args[k] = val
+		case float64:
+			fc.Args[k] = strconv.FormatFloat(val, 'f', -1, 64)
+		case int, int64, int32:
+			fc.Args[k] = fmt.Sprintf("%v", val)
+		case bool:
+			fc.Args[k] = strconv.FormatBool(val)
+		case nil:
+			fc.Args[k] = ""
+		default:
+			fc.Args[k] = fmt.Sprintf("%v", val)
+		}
+	}
+	return fc, nil
+}
+
+// findCall: adds chatRoundReq into the chatRoundChan and returns true if does
+func findCall(msg, toolCall string) bool {
+	var fc *models.FuncCall
+	if toolCall != "" {
+		// HTML-decode the tool call string to handle encoded characters like &lt; -> <=
+		decodedToolCall := html.UnescapeString(toolCall)
+		openAIToolMap, err := convertJSONToMapStringString(decodedToolCall)
+		if err != nil {
+			logger.Error("failed to unmarshal openai tool call", "call", decodedToolCall, "error", err)
+			// Ensure lastToolCall.ID is set for the error response (already set from chunk)
+			// Send error response to LLM so it can retry or handle the error
+			toolResponseMsg := models.RoleMsg{
+				Role:       cfg.ToolRole,
+				Content:    fmt.Sprintf("Error processing tool call: %v. Please check the JSON format and try again.", err),
+				ToolCallID: lastToolCall.ID, // Use the stored tool call ID
+			}
+			chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
+			// Clear the stored tool call ID after using it (no longer needed)
+			// Trigger the assistant to continue processing with the error message
+			crr := &models.ChatRoundReq{
+				Role: cfg.AssistantRole,
+			}
+			// provoke next llm msg after failed tool call
+			chatRoundChan <- crr
+			// chatRound("", cfg.AssistantRole, tv, false, false)
+			return true
+		}
+		lastToolCall.Args = openAIToolMap
+		fc = lastToolCall
+		// NOTE: We do NOT override lastToolCall.ID from arguments.
+		// The ID should come from the streaming response (chunk.ToolID) set earlier.
+		// Some tools like todo_create have "id" in their arguments which is NOT the tool call ID.
+	} else {
+		jsStr := models.ToolCallRE.FindString(msg)
+		if jsStr == "" { // no tool call case
+			return false
+		}
+		// Remove prefix/suffix with flexible whitespace handling
+		jsStr = strings.TrimSpace(jsStr)
+		jsStr = strings.TrimPrefix(jsStr, "__tool_call__")
+		jsStr = strings.TrimSuffix(jsStr, "__tool_call__")
+		jsStr = strings.TrimSpace(jsStr)
+		// HTML-decode the JSON string to handle encoded characters like &lt; -> <=
+		decodedJsStr := html.UnescapeString(jsStr)
+		// Try to find valid JSON bounds (first { to last })
+		start := strings.Index(decodedJsStr, "{")
+		end := strings.LastIndex(decodedJsStr, "}")
+		if start == -1 || end == -1 || end <= start {
+			logger.Error("failed to find valid JSON in tool call", "json_string", decodedJsStr)
+			toolResponseMsg := models.RoleMsg{
+				Role:    cfg.ToolRole,
+				Content: "Error processing tool call: no valid JSON found. Please check the JSON format.",
+			}
+			chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
+			crr := &models.ChatRoundReq{
+				Role: cfg.AssistantRole,
+			}
+			chatRoundChan <- crr
+			return true
+		}
+		decodedJsStr = decodedJsStr[start : end+1]
+		var err error
+		fc, err = unmarshalFuncCall(decodedJsStr)
+		if err != nil {
+			logger.Error("failed to unmarshal tool call", "error", err, "json_string", decodedJsStr)
+			// Send error response to LLM so it can retry or handle the error
+			toolResponseMsg := models.RoleMsg{
+				Role:    cfg.ToolRole,
+				Content: fmt.Sprintf("Error processing tool call: %v. Please check the JSON format and try again.", err),
+			}
+			chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
+			logger.Debug("findCall: added tool error response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "message_count_after_add", len(chatBody.Messages))
+			// Trigger the assistant to continue processing with the error message
+			// chatRound("", cfg.AssistantRole, tv, false, false)
+			crr := &models.ChatRoundReq{
+				Role: cfg.AssistantRole,
+			}
+			// provoke next llm msg after failed tool call
+			chatRoundChan <- crr
+			return true
+		}
+		// Update lastToolCall with parsed function call
+		lastToolCall.ID = fc.ID
+		lastToolCall.Name = fc.Name
+		lastToolCall.Args = fc.Args
+	}
+	// we got here => last msg recognized as a tool call (correct or not)
+	// Use the tool call ID from streaming response (lastToolCall.ID)
+	// Don't generate random ID - the ID should match between assistant message and tool response
+	lastMsgIdx := len(chatBody.Messages) - 1
+	if lastToolCall.ID != "" {
+		chatBody.Messages[lastMsgIdx].ToolCallID = lastToolCall.ID
+	}
+	// Store tool call info in the assistant message
+	// Convert Args map to JSON string for storage
+	chatBody.Messages[lastMsgIdx].ToolCall = &models.ToolCall{
+		ID:   lastToolCall.ID,
+		Name: lastToolCall.Name,
+		Args: mapToString(lastToolCall.Args),
 	}
 	// call a func
-	f, ok := fnMap[fc.Name]
-	if !ok {
-		m := fmt.Sprintf("%s is not implemented", fc.Name)
-		chatRound(m, toolRole, tv)
-		return
+	// _, ok := tools.FnMap[fc.Name]
+	// if !ok {
+	// 	m := fc.Name + " is not implemented"
+	// 	// Create tool response message with the proper tool_call_id
+	// 	toolResponseMsg := models.RoleMsg{
+	// 		Role:       cfg.ToolRole,
+	// 		Content:    m,
+	// 		ToolCallID: lastToolCall.ID, // Use the stored tool call ID
+	// 	}
+	// 	chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
+	// 	logger.Debug("findCall: added tool not implemented response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
+	// 	// Clear the stored tool call ID after using it
+	// 	lastToolCall.ID = ""
+	// 	// Trigger the assistant to continue processing with the new tool response
+	// 	// by calling chatRound with empty content to continue the assistant's response
+	// 	crr := &models.ChatRoundReq{
+	// 		Role: cfg.AssistantRole,
+	// 	}
+	// 	// failed to find tool
+	// 	chatRoundChan <- crr
+	// 	return true
+	// }
+	// Show tool call progress indicator before execution
+	outputHandler.Writef("\n[yellow::i][tool: %s...][-:-:-]", fc.Name)
+	toolRunningMode.Store(true)
+	resp, okT := tools.CallToolWithAgent(fc.Name, fc.Args)
+	if !okT {
+		// Create tool response message with the proper tool_call_id
+		toolResponseMsg := models.RoleMsg{
+			Role:       cfg.ToolRole,
+			Content:    string(resp),
+			ToolCallID: lastToolCall.ID, // Use the stored tool call ID
+		}
+		chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
+		logger.Debug("findCall: added tool not implemented response", "role", toolResponseMsg.Role,
+			"content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID)
+		// Clear the stored tool call ID after using it
+		lastToolCall.ID = ""
+		// Trigger the assistant to continue processing with the new tool response
+		// by calling chatRound with empty content to continue the assistant's response
+		crr := &models.ChatRoundReq{
+			Role: cfg.AssistantRole,
+		}
+		// failed to find tool
+		chatRoundChan <- crr
+		return true
 	}
-	resp := f(fc.Args)
-	toolMsg := fmt.Sprintf("tool response: %+v", resp)
-	// reader := formMsg(chatBody, toolMsg, toolRole)
-	// sendMsgToLLM()
-	chatRound(toolMsg, toolRole, tv)
-	// return func result to the llm
+	toolRunningMode.Store(false)
+	toolMsg := string(resp)
+	logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
+	// Create tool response message with the proper tool_call_id
+	// Mark shell commands as always visible
+	isShellCommand := fc.Name == "execute_command"
+	// Check if response is multimodal content (image)
+	var toolResponseMsg models.RoleMsg
+	if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
+		// Parse multimodal content response
+		multimodalResp := models.MultimodalToolResp{}
+		if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
+			// Create RoleMsg with ContentParts
+			var contentParts []any
+			for _, part := range multimodalResp.Parts {
+				partType := part["type"]
+				switch partType {
+				case "text":
+					contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
+				case "image_url":
+					contentParts = append(contentParts, models.ImageContentPart{
+						Type: "image_url",
+						ImageURL: struct {
+							URL string `json:"url"`
+						}{URL: part["url"]},
+					})
+				default:
+					continue
+				}
+			}
+			toolResponseMsg = models.RoleMsg{
+				Role:            cfg.ToolRole,
+				ContentParts:    contentParts,
+				HasContentParts: true,
+				ToolCallID:      lastToolCall.ID,
+				IsShellCommand:  isShellCommand,
+			}
+		} else {
+			// Fallback to regular content
+			toolResponseMsg = models.RoleMsg{
+				Role:           cfg.ToolRole,
+				Content:        toolMsg,
+				ToolCallID:     lastToolCall.ID,
+				IsShellCommand: isShellCommand,
+			}
+		}
+	} else {
+		toolResponseMsg = models.RoleMsg{
+			Role:           cfg.ToolRole,
+			Content:        toolMsg,
+			ToolCallID:     lastToolCall.ID,
+			IsShellCommand: isShellCommand,
+		}
+	}
+	outputHandler.Writef("%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
+		"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
+	chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
+	// Clear the stored tool call ID after using it
+	lastToolCall.ID = ""
+	// Trigger the assistant to continue processing with the new tool response
+	// by calling chatRound with empty content to continue the assistant's response
+	crr := &models.ChatRoundReq{
+		Role: cfg.AssistantRole,
+	}
+	chatRoundChan <- crr
+	return true
 }
 
-func chatToTextSlice(showSys bool) []string {
-	resp := make([]string, len(chatBody.Messages))
-	for i, msg := range chatBody.Messages {
-		if !showSys && (msg.Role != assistantRole && msg.Role != userRole) {
+func chatToTextSlice(messages []models.RoleMsg, showSys bool) []string {
+	resp := make([]string, len(messages))
+	for i := range messages {
+		icon := fmt.Sprintf("[-:-:b](%d) <%s>:[-:-:-]", i, messages[i].Role)
+		// Handle tool call indicators (assistant messages with tool call but empty content)
+		if messages[i].Role == cfg.AssistantRole && messages[i].ToolCall != nil && messages[i].ToolCall.ID != "" {
+			// This is a tool call indicator - show collapsed
+			if toolCollapsed {
+				toolName := messages[i].ToolCall.Name
+				resp[i] = strings.ReplaceAll(
+					fmt.Sprintf(
+						"%s\n%s\n[yellow::i][tool call: %s (press Ctrl+T to expand)][-:-:-]\n",
+						icon, messages[i].GetText(), toolName),
+					"\n\n", "\n")
+			} else {
+				// Show full tool call info
+				toolName := messages[i].ToolCall.Name
+				resp[i] = strings.ReplaceAll(
+					fmt.Sprintf(
+						"%s\n%s\n[yellow::i][tool call: %s][-:-:-]\nargs: %s\nid: %s\n",
+						icon, messages[i].GetText(), toolName, messages[i].ToolCall.Args, messages[i].ToolCall.ID),
+					"\n\n", "\n")
+			}
+			continue
+		}
+		// Handle tool responses
+		if messages[i].Role == cfg.ToolRole || messages[i].Role == "tool" {
+			// Always show shell commands
+			if messages[i].IsShellCommand {
+				resp[i] = MsgToText(i, &messages[i])
+				continue
+			}
+			// Hide non-shell tool responses when collapsed
+			if toolCollapsed {
+				resp[i] = icon + "\n[yellow::i][tool resp (press Ctrl+T to expand)][-:-:-]\n"
+				continue
+			}
+			// When expanded, show tool responses
+			resp[i] = MsgToText(i, &messages[i])
 			continue
 		}
-		resp[i] = msg.ToText(i)
+		// INFO: skips system msg when showSys is false
+		if !showSys && messages[i].Role == "system" {
+			continue
+		}
+		resp[i] = MsgToText(i, &messages[i])
 	}
 	return resp
 }
 
-func chatToText(showSys bool) string {
-	s := chatToTextSlice(showSys)
-	return strings.Join(s, "")
+func chatToText(messages []models.RoleMsg, showSys bool) string {
+	s := chatToTextSlice(messages, showSys)
+	text := strings.Join(s, "\n")
+	// Collapse thinking blocks if enabled
+	if thinkingCollapsed {
+		text = models.ThinkRE.ReplaceAllStringFunc(text, func(match string) string {
+			// Extract content between <think> and </think>
+			start := len("<think>")
+			end := len(match) - len("</think>")
+			if start < end && start < len(match) {
+				content := match[start:end]
+				return fmt.Sprintf("[yellow::i][thinking... (%d chars) (press Alt+T to expand)][-:-:-]", len(content))
+			}
+			return "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]"
+		})
+		// Handle incomplete thinking blocks (during streaming when </think> hasn't arrived yet)
+		if strings.Contains(text, "<think>") && !strings.Contains(text, "</think>") {
+			// Find the incomplete thinking block and replace it
+			startIdx := strings.Index(text, "<think>")
+			if startIdx != -1 {
+				content := text[startIdx+len("<think>"):]
+				placeholder := fmt.Sprintf(
+					"[yellow::i][thinking... (%d chars) (press Alt+T to expand)][-:-:-]",
+					len(content))
+				text = text[:startIdx] + placeholder
+			}
+		}
+	}
+	return text
 }
 
-func textToMsg(rawMsg string) models.MessagesStory {
-	msg := models.MessagesStory{}
-	// system and tool?
-	if strings.HasPrefix(rawMsg, assistantIcon) {
-		msg.Role = assistantRole
-		msg.Content = strings.TrimPrefix(rawMsg, assistantIcon)
-		return msg
+func addNewChat(chatName string) {
+	id, err := store.ChatGetMaxID()
+	if err != nil {
+		logger.Error("failed to get max chat id from db;", "id:", id)
+		// INFO: will rewrite first chat
 	}
-	if strings.HasPrefix(rawMsg, userIcon) {
-		msg.Role = userRole
-		msg.Content = strings.TrimPrefix(rawMsg, userIcon)
-		return msg
+	chat := &models.Chat{
+		ID:        id + 1,
+		CreatedAt: time.Now(),
+		UpdatedAt: time.Now(),
+		Agent:     cfg.AssistantRole,
 	}
-	return msg
+	if chatName == "" {
+		chatName = fmt.Sprintf("%d_%s", chat.ID, cfg.AssistantRole)
+	}
+	chat.Name = chatName
+	chatMap[chat.Name] = chat
+	activeChatName = chat.Name
+}
+
+func applyCharCard(cc *models.CharCard, loadHistory bool) {
+	cfg.AssistantRole = cc.Role
+	history, err := loadAgentsLastChat(cfg.AssistantRole)
+	if err != nil || !loadHistory {
+		// too much action for err != nil; loadAgentsLastChat needs to be split up
+		history = []models.RoleMsg{
+			{Role: "system", Content: cc.SysPrompt},
+			{Role: cfg.AssistantRole, Content: cc.FirstMsg},
+		}
+		logger.Warn("failed to load last agent chat;", "agent", cc.Role, "err", err, "new_history", history)
+		addNewChat("")
+	}
+	chatBody.Messages = history
 }
 
-func textSliceToChat(chat []string) []models.MessagesStory {
-	resp := make([]models.MessagesStory, len(chat))
-	for i, rawMsg := range chat {
-		msg := textToMsg(rawMsg)
-		resp[i] = msg
+func charToStart(agentName string, keepSysP bool) bool {
+	cc := GetCardByRole(agentName)
+	if cc == nil {
+		return false
+	}
+	applyCharCard(cc, keepSysP)
+	return true
+}
+
+func updateModelLists() {
+	var err error
+	if cfg.OpenRouterToken != "" {
+		ORFreeModels, err = fetchORModels(true)
+		if err != nil {
+			logger.Warn("failed to fetch or models", "error", err)
+		}
+	}
+	// if llama.cpp started after gf-lt?
+	ml, err := fetchLCPModelsWithLoadStatus()
+	if err != nil {
+		logger.Warn("failed to fetch llama.cpp models", "error", err)
+	}
+	localModelsMu.Lock()
+	LocalModels = ml
+	localModelsMu.Unlock()
+	// set already loaded model in llama.cpp
+	if !isLocalLlamacpp() {
+		return
+	}
+	localModelsMu.Lock()
+	defer localModelsMu.Unlock()
+	for i := range LocalModels {
+		if strings.Contains(LocalModels[i], models.LoadedMark) {
+			m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
+			cfg.CurrentModel = m
+			chatBody.Model = m
+			cachedModelColor.Store("green")
+			updateStatusLine()
+			UpdateToolCapabilities()
+			app.Draw()
+			return
+		}
 	}
-	return resp
+}
+
+func refreshLocalModelsIfEmpty() {
+	localModelsMu.RLock()
+	if len(LocalModels) > 0 {
+		localModelsMu.RUnlock()
+		return
+	}
+	localModelsMu.RUnlock()
+	// try to fetch
+	models, err := fetchLCPModels()
+	if err != nil {
+		logger.Warn("failed to fetch llama.cpp models", "error", err)
+		return
+	}
+	localModelsMu.Lock()
+	LocalModels = models
+	localModelsMu.Unlock()
+}
+
+func startNewCLIChat() []models.RoleMsg {
+	id, err := store.ChatGetMaxID()
+	if err != nil {
+		logger.Error("failed to get chat id", "error", err)
+	}
+	id++
+	charToStart(cfg.AssistantRole, false)
+	newChat := &models.Chat{
+		ID:        id,
+		Name:      fmt.Sprintf("%d_%s", id, cfg.AssistantRole),
+		CreatedAt: time.Now(),
+		UpdatedAt: time.Now(),
+		Msgs:      "",
+		Agent:     cfg.AssistantRole,
+	}
+	activeChatName = newChat.Name
+	chatMap[newChat.Name] = newChat
+	cliPrevOutput = ""
+	return chatBody.Messages
+}
+
+func startNewCLIErrors() []models.RoleMsg {
+	return startNewCLIChat()
+}
+
+func summarizeAndStartNewChat() {
+	if len(chatBody.Messages) == 0 {
+		showToast("info", "No chat history to summarize")
+		return
+	}
+	showToast("info", "Summarizing chat history...")
+	arg := map[string]string{
+		"chat": chatToText(chatBody.Messages, false),
+	}
+	// Call the summarize_chat tool via agent
+	summaryBytes, _ := tools.CallToolWithAgent("summarize_chat", arg)
+	summary := string(summaryBytes)
+	if summary == "" {
+		showToast("error", "Failed to generate summary")
+		return
+	}
+	// Start a new chat
+	startNewChat(true)
+	// Inject summary as a tool call response
+	toolMsg := models.RoleMsg{
+		Role:       cfg.ToolRole,
+		Content:    summary,
+		ToolCallID: "",
+	}
+	chatBody.Messages = append(chatBody.Messages, toolMsg)
+	// Update UI
+	if !cfg.CLIMode {
+		textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
+		colorText()
+	}
+	// Update storage
+	if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
+		logger.Warn("failed to update storage after injecting summary", "error", err)
+	}
+	showToast("info", "Chat summarized and new chat started with summary as tool response")
 }
 
 func init() {
-	file, err := os.OpenFile("log.txt", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	// ctx, cancel := context.WithCancel(context.Background())
+	var err error
+	cfg, err = config.LoadConfig("config.toml")
+	if err != nil {
+		fmt.Println("failed to load config.toml", err)
+		cancel()
+		os.Exit(1)
+		return
+	}
+	defaultStarter = []models.RoleMsg{
+		{Role: "system", Content: models.BasicSysMsg},
+		{Role: cfg.AssistantRole, Content: models.DefaultFirstMsg},
+	}
+	logfile, err := os.OpenFile(cfg.LogFile,
+		os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		slog.Error("failed to open log file", "error", err, "filename", cfg.LogFile)
+		cancel()
+		os.Exit(1)
+		return
+	}
+	// load cards
+	sysMap[basicCard.ID] = basicCard
+	roleToID["assistant"] = basicCard.ID
+	basicCard.Role = cfg.AssistantRole
+	logLevel.Set(slog.LevelInfo)
+	logger = slog.New(slog.NewTextHandler(logfile, &slog.HandlerOptions{Level: logLevel}))
+	store = storage.NewProviderSQL(cfg.DBPATH, logger)
+	if store == nil {
+		cancel()
+		os.Exit(1)
+		return
+	}
+	ragger, err = rag.New(logger, store, cfg)
 	if err != nil {
-		panic(err)
+		logger.Error("failed to create RAG", "error", err)
 	}
-	// create dir if does not exist
-	if err := os.MkdirAll(historyDir, os.ModePerm); err != nil {
-		panic(err)
+	if ragger != nil && ragger.FallbackMessage() != "" && app != nil {
+		showToast("RAG", "ONNX unavailable, using API: "+ragger.FallbackMessage())
 	}
-	logger = slog.New(slog.NewTextHandler(file, nil))
-	store = storage.NewProviderSQL("test.db", logger)
 	// https://github.com/coreydaley/ggerganov-llama.cpp/blob/master/examples/server/README.md
 	// load all chats in memory
-	loadHistoryChats()
-	lastChat := loadOldChatOrGetNew()
-	logger.Info("loaded history", "chat", lastChat)
+	if _, err := loadHistoryChats(); err != nil {
+		logger.Error("failed to load chat", "error", err)
+		cancel()
+		os.Exit(1)
+		return
+	}
+	lastToolCall = &models.FuncCall{}
+	var lastChat []models.RoleMsg
+	if cfg.CLIMode {
+		lastChat = startNewCLIErrors()
+	} else {
+		lastChat = loadOldChatOrGetNew()
+	}
 	chatBody = &models.ChatBody{
-		Model:    "modl_name",
+		Model:    "modelname",
 		Stream:   true,
 		Messages: lastChat,
 	}
+	choseChunkParser()
+	httpClient = createClient(time.Second * 90)
+	if cfg.TTS_ENABLED {
+		orator = NewOrator(logger, cfg)
+	}
+	if cfg.STT_ENABLED {
+		asr = NewSTT(logger, cfg)
+	}
+	if cfg.PlaywrightEnabled {
+		go func() {
+			if err := tools.CheckPlaywright(); err != nil {
+				if err := tools.InstallPW(); err != nil {
+					logger.Error("failed to install playwright", "error", err)
+					cancel()
+					os.Exit(1)
+					return
+				}
+				if err := tools.CheckPlaywright(); err != nil {
+					logger.Error("failed to run playwright", "error", err)
+					cancel()
+					os.Exit(1)
+					return
+				}
+			}
+		}()
+	}
+	// atomic default values
+	cachedModelColor.Store("orange")
+	go chatWatcher(ctx)
+	if !cfg.CLIMode {
+		initTUI()
+	}
+	tools.InitTools(cfg, logger, store)
+	// tooler = tools.InitTools(cfg, logger, store)
+	// tooler.RegisterWindowTools(modelHasVision)
 }