From 4b6769e531ab844db2ed98445c13df9e2c781776 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 08:43:50 +0300
Subject: Fix (notification): non-blocking way to notify

---
 helpfuncs.go |  2 +-
 tui.go       | 78 ++++++++++++++++++++++++++++++++++++++----------------------
 2 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/helpfuncs.go b/helpfuncs.go
index 038e275..b94e672 100644
--- a/helpfuncs.go
+++ b/helpfuncs.go
@@ -521,7 +521,7 @@ func updateFlexLayout() {
 	if shellMode {
 		flex.AddItem(shellInput, 0, 10, false)
 	} else {
-		flex.AddItem(textArea, 0, 10, false)
+		flex.AddItem(bottomFlex, 0, 10, true)
 	}
 	if positionVisible {
 		flex.AddItem(statusLineWidget, 0, 2, false)
diff --git a/tui.go b/tui.go
index b23c3ff..9cf32de 100644
--- a/tui.go
+++ b/tui.go
@@ -29,6 +29,8 @@ var (
 	statusLineWidget   *tview.TextView
 	helpView           *tview.TextView
 	flex               *tview.Flex
+	bottomFlex         *tview.Flex
+	notificationWidget *tview.TextView
 	imgView            *tview.Image
 	defaultImage       = "sysprompts/llama.png"
 	indexPickWindow    *tview.InputField
@@ -137,8 +139,8 @@ func setShellMode(enabled bool) {
 	}()
 }
 
-// showToast displays a temporary message in the top‑right corner.
-// It auto‑hides after 3 seconds and disappears when clicked.
+// showToast displays a temporary notification in the bottom-right corner.
+// It auto-hides after 3 seconds.
 func showToast(title, message string) {
 	sanitize := func(s string, maxLen int) string {
 		sanitized := strings.Map(func(r rune) rune {
@@ -154,33 +156,34 @@ func showToast(title, message string) {
 	}
 	title = sanitize(title, 50)
 	message = sanitize(message, 197)
-	notification := tview.NewTextView().
-		SetTextAlign(tview.AlignCenter).
-		SetDynamicColors(true).
-		SetRegions(true).
-		SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
-		SetChangedFunc(func() {
-			app.Draw()
+
+	notificationWidget.SetTitle(title)
+	notificationWidget.SetText(fmt.Sprintf("[yellow]%s[-]", message))
+
+	go func() {
+		app.QueueUpdateDraw(func() {
+			flex.RemoveItem(bottomFlex)
+			flex.RemoveItem(statusLineWidget)
+			bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
+				AddItem(textArea, 0, 1, true).
+				AddItem(notificationWidget, 40, 1, false)
+			flex.AddItem(bottomFlex, 0, 10, true)
+			if positionVisible {
+				flex.AddItem(statusLineWidget, 0, 2, false)
+			}
 		})
-	notification.SetTitleAlign(tview.AlignLeft).
-		SetBorder(true).
-		SetTitle(title)
-	// Wrap it in a full‑screen Flex to position it in the top‑right corner.
-	// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
-	background := tview.NewFlex().SetDirection(tview.FlexRow).
-		AddItem(nil, 0, 1, false). // top spacer
-		AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
-			AddItem(nil, 0, 1, false).          // left spacer
-			AddItem(notification, 40, 1, true), // notification width 40
-			5, 1, false) // notification height 5
-	// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
-	pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
-	pages.AddPage(pageName, background, true, true)
-	// Auto‑dismiss after 3 seconds.
+	}()
+
 	time.AfterFunc(3*time.Second, func() {
 		app.QueueUpdateDraw(func() {
-			if pages.HasPage(pageName) {
-				pages.RemovePage(pageName)
+			flex.RemoveItem(bottomFlex)
+			flex.RemoveItem(statusLineWidget)
+			bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
+				AddItem(textArea, 0, 1, true).
+				AddItem(notificationWidget, 0, 0, false)
+			flex.AddItem(bottomFlex, 0, 10, true)
+			if positionVisible {
+				flex.AddItem(statusLineWidget, 0, 2, false)
 			}
 		})
 	})
@@ -286,12 +289,25 @@ func init() {
 		SetDynamicColors(true).
 		SetRegions(true).
 		SetChangedFunc(func() {
-			app.Draw()
+			// https://github.com/rivo/tview/wiki/Concurrency#event-handlers
+			// app.Draw() // already called by default per tview specs
 		})
+	notificationWidget = tview.NewTextView().
+		SetTextAlign(tview.AlignCenter).
+		SetDynamicColors(true).
+		SetRegions(true).
+		SetChangedFunc(func() {
+		})
+	notificationWidget.SetBorder(true).SetTitle("notification")
+
+	bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
+		AddItem(textArea, 0, 1, true).
+		AddItem(notificationWidget, 0, 0, false)
+
 	//
 	flex = tview.NewFlex().SetDirection(tview.FlexRow).
 		AddItem(textView, 0, 40, false).
-		AddItem(textArea, 0, 10, true) // Restore original height
+		AddItem(bottomFlex, 0, 10, true)
 	if positionVisible {
 		flex.AddItem(statusLineWidget, 0, 2, false)
 	}
@@ -360,10 +376,14 @@ func init() {
 	// 	y += h / 2
 	// 	return x, y, w, h
 	// })
+	notificationWidget.SetDrawFunc(func(screen tcell.Screen, x, y, w, h int) (int, int, int, int) {
+		y += h / 2
+		return x, y, w, h
+	})
 	// Initially set up flex without search bar
 	flex = tview.NewFlex().SetDirection(tview.FlexRow).
 		AddItem(textView, 0, 40, false).
-		AddItem(textArea, 0, 10, true) // Restore original height
+		AddItem(bottomFlex, 0, 10, true)
 	if positionVisible {
 		flex.AddItem(statusLineWidget, 0, 2, false)
 	}
-- 
cgit v1.2.3


From 57088565bd7a3edbf55d63780573096124a1fc1b Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 08:51:04 +0300
Subject: Fix (notification): being closed by prev notification early

---
 tui.go | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tui.go b/tui.go
index 9cf32de..b9bf35f 100644
--- a/tui.go
+++ b/tui.go
@@ -38,6 +38,7 @@ var (
 	roleEditWindow     *tview.InputField
 	shellInput         *tview.InputField
 	confirmModal       *tview.Modal
+	toastTimer         *time.Timer
 	confirmPageName    = "confirm"
 	fullscreenMode     bool
 	positionVisible    bool = true
@@ -156,10 +157,11 @@ func showToast(title, message string) {
 	}
 	title = sanitize(title, 50)
 	message = sanitize(message, 197)
-
+	if toastTimer != nil {
+		toastTimer.Stop()
+	}
 	notificationWidget.SetTitle(title)
 	notificationWidget.SetText(fmt.Sprintf("[yellow]%s[-]", message))
-
 	go func() {
 		app.QueueUpdateDraw(func() {
 			flex.RemoveItem(bottomFlex)
@@ -173,8 +175,7 @@ func showToast(title, message string) {
 			}
 		})
 	}()
-
-	time.AfterFunc(3*time.Second, func() {
+	toastTimer = time.AfterFunc(3*time.Second, func() {
 		app.QueueUpdateDraw(func() {
 			flex.RemoveItem(bottomFlex)
 			flex.RemoveItem(statusLineWidget)
@@ -299,11 +300,9 @@ func init() {
 		SetChangedFunc(func() {
 		})
 	notificationWidget.SetBorder(true).SetTitle("notification")
-
 	bottomFlex = tview.NewFlex().SetDirection(tview.FlexColumn).
 		AddItem(textArea, 0, 1, true).
 		AddItem(notificationWidget, 0, 0, false)
-
 	//
 	flex = tview.NewFlex().SetDirection(tview.FlexRow).
 		AddItem(textView, 0, 40, false).
-- 
cgit v1.2.3


From 645b7351a80713a40e2c823479a3605baeb231b8 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 09:09:13 +0300
Subject: Fix: add different kind of notifiction for fullscreen mode

---
 tui.go | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tui.go b/tui.go
index b9bf35f..36a34bb 100644
--- a/tui.go
+++ b/tui.go
@@ -160,6 +160,40 @@ func showToast(title, message string) {
 	if toastTimer != nil {
 		toastTimer.Stop()
 	}
+	// show blocking notification to not mess up flex
+	if fullscreenMode {
+		notification := tview.NewTextView().
+			SetTextAlign(tview.AlignCenter).
+			SetDynamicColors(true).
+			SetRegions(true).
+			SetText(fmt.Sprintf("[yellow]%s[-]\n", message)).
+			SetChangedFunc(func() {
+				app.Draw()
+			})
+		notification.SetTitleAlign(tview.AlignLeft).
+			SetBorder(true).
+			SetTitle(title)
+		// Wrap it in a full‑screen Flex to position it in the top‑right corner.
+		// Outer Flex (row) pushes content to the top; inner Flex (column) pushes to the right.
+		background := tview.NewFlex().SetDirection(tview.FlexRow).
+			AddItem(nil, 0, 1, false). // top spacer
+			AddItem(tview.NewFlex().SetDirection(tview.FlexColumn).
+				AddItem(nil, 0, 1, false).          // left spacer
+				AddItem(notification, 40, 1, true), // notification width 40
+				5, 1, false) // notification height 5
+		// Generate a unique page name (e.g., using timestamp) to allow multiple toasts.
+		pageName := fmt.Sprintf("toast-%d", time.Now().UnixNano())
+		pages.AddPage(pageName, background, true, true)
+		// Auto‑dismiss after 2 seconds, since blocking is more annoying
+		time.AfterFunc(2*time.Second, func() {
+			app.QueueUpdateDraw(func() {
+				if pages.HasPage(pageName) {
+					pages.RemovePage(pageName)
+				}
+			})
+		})
+		return
+	}
 	notificationWidget.SetTitle(title)
 	notificationWidget.SetText(fmt.Sprintf("[yellow]%s[-]", message))
 	go func() {
-- 
cgit v1.2.3


From 6e9c453ee0f4a0212ef3f3200156c62a5c30b1ad Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 10:35:17 +0300
Subject: Enha: explicit app.Draw per textView update for smooth streaming

---
 tui.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tui.go b/tui.go
index 36a34bb..f744825 100644
--- a/tui.go
+++ b/tui.go
@@ -324,8 +324,11 @@ func init() {
 		SetDynamicColors(true).
 		SetRegions(true).
 		SetChangedFunc(func() {
+			// INFO:
 			// https://github.com/rivo/tview/wiki/Concurrency#event-handlers
-			// app.Draw() // already called by default per tview specs
+			// although already called by default per tview specs
+			// calling it explicitly makes text streaming to look more smooth
+			app.Draw()
 		})
 	notificationWidget = tview.NewTextView().
 		SetTextAlign(tview.AlignCenter).
-- 
cgit v1.2.3


From 04f1fd464bf9bfcb80ea1e0cf0f1ca88091a5713 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 11:17:01 +0300
Subject: Chore: remove cluedo sysprompt

---
 sysprompts/cluedo.json | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 sysprompts/cluedo.json

diff --git a/sysprompts/cluedo.json b/sysprompts/cluedo.json
deleted file mode 100644
index 0c90cb5..0000000
--- a/sysprompts/cluedo.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "sys_prompt": "A game of cluedo. Players are {{user}}, {{char}}, {{char2}};\n\nrooms: hall, lounge, dinning room kitchen, ballroom, conservatory, billiard room, library, study;\nweapons: candlestick, dagger, lead pipe, revolver, rope, spanner;\npeople: miss Scarlett, colonel Mustard, mrs. White, reverend Green, mrs. Peacock, professor Plum;\n\nA murder happened in a mansion with 9 rooms. Victim is dr. Black.\nPlayers goal is to find out who commited a murder, in what room and with what weapon.\nWeapons, people and rooms not involved in murder are distributed between players (as cards) by tool agent.\nThe objective of the game is to deduce the details of the murder. There are six characters, six murder weapons, and nine rooms, leaving the players with 324 possibilities. As soon as a player enters a room, they may make a suggestion as to the details, naming a suspect, the room they are in, and the weapon. For example: \"I suspect Professor Plum, in the Dining Room, with the candlestick\".\nOnce a player makes a suggestion, the others are called upon to disprove it.\nBefore the player's move, tool agent will remind that players their cards. There are two types of moves: making a suggestion (suggestion_move) and disproving other player suggestion (evidence_move);\nIn this version player wins when the correct details are named in the suggestion_move.\n\n<example_game>\n{{user}}:\nlet's start a game of cluedo!\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; suggestion_move;\n{{char}}:\n(putting miss Scarlet into the Hall with the Revolver) \"I suspect miss Scarlett, in the Hall, with the revolver.\"\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; evidence_move;\n{{char2}}:\n\"No objections.\" (no cards matching the suspicion of {{char}})\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n\"I object. Miss Scarlett is innocent.\" (shows card with 'Miss Scarlett')\ntool: cards of {{char2}} are 'SPANNER', 'DAGGER', 'Professor Plum', 'LIBRARY', 'Mrs. Peacock'; suggestion_move;\n{{char2}}:\n*So it was not Miss Scarlett, good to know.*\n(moves Mrs. White to the Billiard Room) \"It might have been Mrs. White, in the Billiard Room, with the Revolver.\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; evidence_move;\n{{user}}:\n(no matching cards for the assumption of {{char2}}) \"Sounds possible to me.\"\ntool: cards of {{char}} are 'LEAD PIPE', 'BALLROOM', 'CONSERVATORY', 'STUDY', 'Mrs. White'; evidence_move;\n{{char}}:\n(shows Mrs. White card) \"No. Was not Mrs. White\"\ntool: cards of {{user}} are 'Colonel Mustard', 'Miss Scarlett', 'DINNING ROOM', 'CANDLESTICK', 'HALL'; suggestion_move;\n{{user}}:\n*So not Mrs. White...* (moves Reverend Green into the Billiard Room) \"I suspect Reverend Green, in the Billiard Room, with the Revolver.\"\ntool: Correct. It was Reverend Green in the Billiard Room, with the revolver. {{user}} wins.\n</example_game>",
-  "role": "CluedoPlayer",
-  "role2": "CluedoEnjoyer",
-  "filepath": "sysprompts/cluedo.json",
-  "first_msg": "Hey guys! Want to play cluedo?"
-}
-- 
cgit v1.2.3


From c65c11bcfbc563611743d02039420533bcfe9d05 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 11:36:35 +0300
Subject: Fix: shellmode tab completion

---
 tui.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tui.go b/tui.go
index f744825..c6ab392 100644
--- a/tui.go
+++ b/tui.go
@@ -273,7 +273,7 @@ func init() {
 			shellHistoryPos = -1
 		}
 		// Handle Tab key for @ file completion
-		if event.Key() == tcell.KeyTab {
+		if event.Key() == tcell.KeyTab && shellMode {
 			currentText := shellInput.GetText()
 			atIndex := strings.LastIndex(currentText, "@")
 			if atIndex >= 0 {
@@ -1151,7 +1151,7 @@ func init() {
 			chatRoundChan <- &models.ChatRoundReq{Role: persona, UserMsg: msgText}
 			return nil
 		}
-		if event.Key() == tcell.KeyTab {
+		if event.Key() == tcell.KeyTab && !shellMode {
 			currentF := app.GetFocus()
 			if currentF == textArea {
 				currentText := textArea.GetText()
-- 
cgit v1.2.3


From fbc955ca37836553ef4b7c365b84e3dfa859c501 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 14:13:58 +0300
Subject: Enha: local onnx

---
 go.mod          |   4 ++-
 go.sum          |   4 +++
 rag/embedder.go |  65 ++++++++++++++++++++++++++++++++---
 rag/rag.go      |   2 +-
 rag_issues.md   | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 172 insertions(+), 7 deletions(-)
 create mode 100644 rag_issues.md

diff --git a/go.mod b/go.mod
index 0ad3405..d94cfbf 100644
--- a/go.mod
+++ b/go.mod
@@ -7,7 +7,6 @@ require (
 	github.com/GrailFinder/google-translate-tts v0.1.3
 	github.com/GrailFinder/searchagent v0.2.0
 	github.com/PuerkitoBio/goquery v1.11.0
-	github.com/deckarep/golang-set/v2 v2.8.0
 	github.com/gdamore/tcell/v2 v2.13.2
 	github.com/glebarez/go-sqlite v1.22.0
 	github.com/gopxl/beep/v2 v2.1.1
@@ -17,11 +16,14 @@ require (
 	github.com/neurosnap/sentences v1.1.2
 	github.com/playwright-community/playwright-go v0.5700.1
 	github.com/rivo/tview v0.42.0
+	github.com/takara-ai/go-tokenizers v1.0.0
+	github.com/yalue/onnxruntime_go v1.27.0
 	github.com/yuin/goldmark v1.4.13
 )
 
 require (
 	github.com/andybalholm/cascadia v1.3.3 // indirect
+	github.com/deckarep/golang-set/v2 v2.8.0 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/ebitengine/oto/v3 v3.4.0 // indirect
 	github.com/ebitengine/purego v0.9.1 // indirect
diff --git a/go.sum b/go.sum
index 24ca3bd..f95017b 100644
--- a/go.sum
+++ b/go.sum
@@ -81,6 +81,10 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/takara-ai/go-tokenizers v1.0.0 h1:C+UQl3fPFw08YQdwthzPZbqykh6yumzjPrSs+3OSe7o=
+github.com/takara-ai/go-tokenizers v1.0.0/go.mod h1:2A7hN3gMtAARJ2V3sYyIzTDm+GNTudBX+CwUOyIVH2A=
+github.com/yalue/onnxruntime_go v1.27.0 h1:c1YSgDNtpf0WGtxj3YeRIb8VC5LmM1J+Ve3uHdteC1U=
+github.com/yalue/onnxruntime_go v1.27.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4=
 github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
diff --git a/rag/embedder.go b/rag/embedder.go
index 1d29877..386d508 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -9,6 +9,10 @@ import (
 	"gf-lt/models"
 	"log/slog"
 	"net/http"
+
+	"github.com/takara-ai/go-tokenizers/tokenizers"
+
+	"github.com/yalue/onnxruntime_go"
 )
 
 // Embedder defines the interface for embedding text
@@ -134,11 +138,62 @@ func (a *APIEmbedder) EmbedSlice(lines []string) ([][]float32, error) {
 	return embeddings, nil
 }
 
-// TODO: ONNXEmbedder implementation would go here
-// This would require:
 // 1. Loading ONNX models locally
 // 2. Using a Go ONNX runtime (like gorgonia/onnx or similar)
 // 3. Converting text to embeddings without external API calls
-//
-// For now, we'll focus on the API implementation which is already working in the current system,
-// and can be extended later when we have ONNX runtime integration
+
+type ONNXEmbedder struct {
+	session   *onnxruntime_go.DynamicAdvancedSession
+	tokenizer *tokenizers.Tokenizer
+	dims      int // 768, 512, 256, or 128 for Matryoshka
+}
+
+func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
+	// Batch processing
+	inputs := e.prepareBatch(texts)
+	outputs := make([][]float32, len(texts))
+
+	// Run batch inference (much faster)
+	err := e.session.Run(inputs, outputs)
+	return outputs, err
+}
+
+func NewONNXEmbedder(modelPath string) (*ONNXEmbedder, error) {
+	// Load ONNX model
+	session, err := onnxruntime_go.NewDynamicAdvancedSession(
+		modelPath, // onnx/embedgemma/model_q4.onnx
+		[]string{"input_ids", "attention_mask"},
+		[]string{"sentence_embedding"},
+		nil,
+	)
+	if err != nil {
+		return nil, err
+	}
+	// Load tokenizer (from Hugging Face)
+	tokenizer, err := tokenizers.FromFile("./tokenizer.json")
+	return &ONNXEmbedder{
+		session:   session,
+		tokenizer: tokenizer,
+	}, nil
+}
+
+func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
+	// Tokenize
+	tokens := e.tokenizer.Encode(text, true)
+	// Prepare inputs
+	inputIDs := []int64{tokens.GetIds()}
+	attentionMask := []int64{tokens.GetAttentionMask()}
+	// Run inference
+	output := onnxruntime_go.NewEmptyTensor[float32](
+		onnxruntime_go.NewShape(1, 768),
+	)
+	err := e.session.Run(
+		map[string]any{
+			"input_ids":      inputIDs,
+			"attention_mask": attentionMask,
+		},
+		[]string{"sentence_embedding"},
+		[]any{&output},
+	)
+	return output.GetData(), nil
+}
diff --git a/rag/rag.go b/rag/rag.go
index b63cb08..3d0f38f 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -246,7 +246,7 @@ func (r *RAG) extractImportantPhrases(query string) string {
 				break
 			}
 		}
-		if isImportant || len(word) > 3 {
+		if isImportant || len(word) >= 3 {
 			important = append(important, word)
 		}
 	}
diff --git a/rag_issues.md b/rag_issues.md
new file mode 100644
index 0000000..d9578e4
--- /dev/null
+++ b/rag_issues.md
@@ -0,0 +1,104 @@
+# RAG Implementation Issues and Proposed Solutions
+
+## Overview
+The current RAG system fails to retrieve relevant information for specific queries, as demonstrated by the inability to find the "two she bears" reference in the KJV Bible (2 Kings 2:23-24). While the system retrieves documents containing the word "bear", it misses the actual verse, indicating fundamental flaws in chunking, query processing, retrieval, and answer synthesis. Below we dissect each problem and propose concrete solutions.
+
+---
+
+### Problem 1: Chunking Destroys Semantic Coherence
+- **Problem description**  
+  The current chunking splits text into sentences and groups them by a simple word count threshold (`RAGWordLimit`). This ignores document structure (chapters, headings) and can cut through narrative units, scattering related content across multiple chunks. For the Bible query, the story of Elisha and the bears likely spans multiple verses; splitting it prevents any single chunk from containing the full context, diluting the embedding signal and making retrieval difficult.
+
+- **Proposed solution**  
+  - **Structure-aware chunking**: Use the EPUB’s internal structure (chapters, sections) to create chunks that align with logical content units (e.g., by chapter or story).  
+  - **Overlap between chunks**: Add a configurable overlap (e.g., 10–20% of chunk size) to preserve continuity, ensuring key phrases like "two she bears" are not split across boundaries.  
+  - **Rich metadata**: Store book name, chapter, and verse numbers with each chunk to enable filtering and source attribution.  
+  - **Fallback to recursive splitting**: For documents without clear structure, use a recursive character text splitter with overlap (similar to LangChain’s `RecursiveCharacterTextSplitter`) to maintain semantic boundaries (paragraphs, sentences).
+
+---
+
+### Problem 2: Query Refinement Strips Critical Context
+- **Problem description**  
+  `RefineQuery` removes stop words and applies keyword-based filtering that discards semantically important modifiers. For "two she bears", the word "she" (a gender modifier) may be treated as a stop word, leaving "two bears". This loses the specificity of the query and causes the embedding to drift toward generic "bear" contexts. The rule-based approach cannot understand that "she bears" is a key phrase in the biblical story.
+
+- **Proposed solution**  
+  - **Entity-aware query preservation**: Use a lightweight NLP model (e.g., spaCy or a BERT-based NER tagger) to identify and retain key entities (quantities, animals, names) while only removing truly irrelevant stop words.  
+  - **Intelligent query rewriting**: Employ a small LLM (or a set of transformation rules) to generate query variations that reflect likely biblical phrasing, e.g., "two bears came out of the wood" or "Elisha and the bears".  
+  - **Contextual stop word removal**: Instead of a static list, use a POS tagger to keep adjectives, nouns, and verbs while removing only function words that don't carry meaning.  
+  - **Disable refinement for short queries**: If the query is already concise (like "two she bears"), skip aggressive filtering.
+
+---
+
+### Problem 3: Embedding Similarity Fails for Rare or Specific Phrases
+- **Problem description**  
+  Dense embeddings excel at capturing semantic similarity but can fail when the query contains rare phrases or when the relevant passage is embedded in a noisy chunk. The verse "there came forth two she bears out of the wood" shares only the word "bears" with the query, and its embedding may be pulled toward the average of surrounding verses. Consequently, the similarity score may be lower than that of other chunks containing the word "bear" in generic contexts.
+
+- **Proposed solution**  
+  - **Hybrid retrieval**: Combine dense embeddings with BM25 (keyword) search. BM25 excels at exact phrase matching and would likely retrieve the verse based on "two bears" even if the embedding is weak.  
+    - Use a library like [blevesearch](https://github.com/blevesearch/bleve) to index text alongside vectors.  
+    - Fuse results using Reciprocal Rank Fusion (RRF) or a weighted combination.  
+  - **Query expansion**: Add relevant terms to the query (e.g., "Elisha", "2 Kings") to improve embedding alignment.  
+  - **Fine-tuned embeddings**: Consider using an embedding model fine-tuned on domain-specific data (e.g., biblical texts) if this is a recurring use case.  
+
+---
+
+### Problem 4: Reranking Heuristics Are Insufficient
+- **Problem description**  
+  `RerankResults` boosts results based on simple keyword matching and file name heuristics. This coarse approach cannot reliably promote the correct verse over false positives. The adjustment `distance - score/100` is arbitrary and may not reflect true relevance.
+
+- **Proposed solution**  
+  - **Cross-encoder reranking**: After retrieving top candidates (e.g., top 20) with hybrid search, rerank them using a cross-encoder model that directly computes the relevance score between the query and each chunk.  
+    - Models like `cross-encoder/ms-marco-MiniLM-L-6-v2` are lightweight and can be run locally or via a microservice.  
+  - **Score normalization**: Use the cross-encoder scores to reorder results, discarding low-scoring ones.  
+  - **Contextual boosting**: If metadata (e.g., chapter/verse) is available, boost results that match the query’s expected location (if inferable).  
+
+---
+
+### Problem 5: Answer Synthesis Is Not Generative
+- **Problem description**  
+  `SynthesizeAnswer` embeds a prompt and attempts to retrieve a pre-stored answer, falling back to concatenating truncated chunks. This is fundamentally flawed: RAG requires an LLM to generate a coherent answer from retrieved context. In the Bible example, even if the correct verse were retrieved, the system would only output a snippet, not an answer explaining the reference.
+
+- **Proposed solution**  
+  - **Integrate an LLM for generation**: Use a local model (via Ollama, Llama.cpp) or a cloud API (OpenAI, etc.) to synthesize answers.  
+    - Construct a prompt that includes the retrieved chunks (with metadata) and the user query.  
+    - Instruct the model to answer based solely on the provided context and cite sources (e.g., "According to 2 Kings 2:24...").  
+  - **Implement a fallback**: If no relevant chunks are retrieved, return a message like "I couldn't find that information in your documents."  
+  - **Streaming support**: For better UX, stream the answer token-by-token.  
+
+---
+
+### Problem 6: Concurrency and Error Handling
+- **Problem description**  
+  The code uses a mutex only in `LoadRAG`, leaving other methods vulnerable to race conditions. The global status channel `LongJobStatusCh` may drop messages due to `select/default`, and errors are sometimes logged but not propagated. Ingestion is synchronous and slow.
+
+- **Proposed solution**  
+  - **Add context support**: Pass `context.Context` to all methods to allow cancellation and timeouts.  
+  - **Worker pools for embedding**: Parallelize batch embedding with a controlled number of workers to respect API rate limits and speed up ingestion.  
+  - **Retry logic**: Implement exponential backoff for transient API errors.  
+  - **Replace global channel**: Use a callback or an injectable status reporter to avoid dropping messages.  
+  - **Fine-grained locking**: Protect shared state (e.g., `storage`) with appropriate synchronization.  
+
+---
+
+### Problem 7: Lack of Monitoring and Evaluation
+- **Problem description**  
+  There are no metrics to track retrieval quality, latency, or user satisfaction. The failure case was discovered manually; without systematic evaluation, regressions will go unnoticed.
+
+- **Proposed solution**  
+  - **Log key metrics**: Record query, retrieved chunk IDs, scores, and latency for each search.  
+  - **User feedback**: Add a mechanism for users to rate answers (thumbs up/down) and use this data to improve retrieval.  
+  - **Offline evaluation**: Create a test set of queries and expected relevant chunks (e.g., the Bible example) to measure recall@k, MRR, etc., and run it after each change.  
+
+---
+
+## Summary
+Fixing the RAG pipeline requires a multi-pronged approach:
+1. **Structure-aware chunking** with metadata.
+2. **Hybrid retrieval** (dense + sparse).
+3. **Query understanding** via entity preservation and intelligent rewriting.
+4. **Cross-encoder reranking** for precision.
+5. **LLM-based answer generation**.
+6. **Robust concurrency and error handling**.
+7. **Monitoring and evaluation** to track improvements.
+
+Implementing these changes will transform the system from a brittle keyword matcher into a reliable knowledge assistant capable of handling nuanced queries like the "two she bears" reference.
-- 
cgit v1.2.3


From 7c56e27dbe904b3c08b3eee375542011458e297c Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 14:27:19 +0300
Subject: Dep: trying sugarme tokenizer

---
 go.mod          |   7 ++-
 go.sum          |  15 ++++-
 rag/embedder.go | 181 +++++++++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 164 insertions(+), 39 deletions(-)

diff --git a/go.mod b/go.mod
index d94cfbf..531609a 100644
--- a/go.mod
+++ b/go.mod
@@ -16,7 +16,7 @@ require (
 	github.com/neurosnap/sentences v1.1.2
 	github.com/playwright-community/playwright-go v0.5700.1
 	github.com/rivo/tview v0.42.0
-	github.com/takara-ai/go-tokenizers v1.0.0
+	github.com/sugarme/tokenizer v0.3.0
 	github.com/yalue/onnxruntime_go v1.27.0
 	github.com/yuin/goldmark v1.4.13
 )
@@ -27,6 +27,7 @@ require (
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/ebitengine/oto/v3 v3.4.0 // indirect
 	github.com/ebitengine/purego v0.9.1 // indirect
+	github.com/emirpasic/gods v1.18.1 // indirect
 	github.com/gdamore/encoding v1.0.1 // indirect
 	github.com/go-jose/go-jose/v3 v3.0.4 // indirect
 	github.com/go-stack/stack v1.8.1 // indirect
@@ -35,10 +36,14 @@ require (
 	github.com/hajimehoshi/oto/v2 v2.3.1 // indirect
 	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
 	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
+	github.com/schollz/progressbar/v2 v2.15.0 // indirect
+	github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c // indirect
 	golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect
 	golang.org/x/net v0.48.0 // indirect
 	golang.org/x/sys v0.39.0 // indirect
diff --git a/go.sum b/go.sum
index f95017b..73d273b 100644
--- a/go.sum
+++ b/go.sum
@@ -21,6 +21,8 @@ github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/
 github.com/ebitengine/oto/v3 v3.4.0/go.mod h1:IOleLVD0m+CMak3mRVwsYY8vTctQgOM0iiL6S7Ar7eI=
 github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
 github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
+github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
+github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
 github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw=
 github.com/gdamore/encoding v1.0.1/go.mod h1:0Z0cMFinngz9kS1QfMjCP8TY7em3bZYeeklsSDPivEo=
 github.com/gdamore/tcell/v2 v2.13.2 h1:5j4srfF8ow3HICOv/61/sOhQtA25qxEB2XR3Q/Bhx2g=
@@ -61,10 +63,14 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
 github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
 github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
 github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7ZoUw=
 github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
+github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
+github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
@@ -77,12 +83,17 @@ github.com/rivo/tview v0.42.0 h1:b/ftp+RxtDsHSaynXTbJb+/n/BxDEi+W3UfF5jILK6c=
 github.com/rivo/tview v0.42.0/go.mod h1:cSfIYfhpSGCjp3r/ECJb+GKS7cGJnqV8vfjQPwoXyfY=
 github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
 github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/schollz/progressbar/v2 v2.15.0 h1:dVzHQ8fHRmtPjD3K10jT3Qgn/+H+92jhPrhmxIJfDz8=
+github.com/schollz/progressbar/v2 v2.15.0/go.mod h1:UdPq3prGkfQ7MOzZKlDRpYKcFqEMczbD7YmbPgpzKMI=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/takara-ai/go-tokenizers v1.0.0 h1:C+UQl3fPFw08YQdwthzPZbqykh6yumzjPrSs+3OSe7o=
-github.com/takara-ai/go-tokenizers v1.0.0/go.mod h1:2A7hN3gMtAARJ2V3sYyIzTDm+GNTudBX+CwUOyIVH2A=
+github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c h1:pwb4kNSHb4K89ymCaN+5lPH/MwnfSVg4rzGDh4d+iy4=
+github.com/sugarme/regexpset v0.0.0-20200920021344-4d4ec8eaf93c/go.mod h1:2gwkXLWbDGUQWeL3RtpCmcY4mzCtU13kb9UsAg9xMaw=
+github.com/sugarme/tokenizer v0.3.0 h1:FE8DYbNSz/kSbgEo9l/RjgYHkIJYEdskumitFQBE9FE=
+github.com/sugarme/tokenizer v0.3.0/go.mod h1:VJ+DLK5ZEZwzvODOWwY0cw+B1dabTd3nCB5HuFCItCc=
 github.com/yalue/onnxruntime_go v1.27.0 h1:c1YSgDNtpf0WGtxj3YeRIb8VC5LmM1J+Ve3uHdteC1U=
 github.com/yalue/onnxruntime_go v1.27.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4=
 github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
diff --git a/rag/embedder.go b/rag/embedder.go
index 386d508..396f04b 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -10,8 +10,8 @@ import (
 	"log/slog"
 	"net/http"
 
-	"github.com/takara-ai/go-tokenizers/tokenizers"
-
+	"github.com/sugarme/tokenizer"
+	"github.com/sugarme/tokenizer/pretrained"
 	"github.com/yalue/onnxruntime_go"
 )
 
@@ -141,59 +141,168 @@ func (a *APIEmbedder) EmbedSlice(lines []string) ([][]float32, error) {
 // 1. Loading ONNX models locally
 // 2. Using a Go ONNX runtime (like gorgonia/onnx or similar)
 // 3. Converting text to embeddings without external API calls
-
 type ONNXEmbedder struct {
 	session   *onnxruntime_go.DynamicAdvancedSession
-	tokenizer *tokenizers.Tokenizer
-	dims      int // 768, 512, 256, or 128 for Matryoshka
+	tokenizer *tokenizer.Tokenizer
+	dims      int // embedding dimension (e.g., 768)
+	logger    *slog.Logger
 }
 
-func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
-	// Batch processing
-	inputs := e.prepareBatch(texts)
-	outputs := make([][]float32, len(texts))
-
-	// Run batch inference (much faster)
-	err := e.session.Run(inputs, outputs)
-	return outputs, err
-}
-
-func NewONNXEmbedder(modelPath string) (*ONNXEmbedder, error) {
-	// Load ONNX model
+func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) {
+	// Load tokenizer using sugarme/tokenizer
+	tok, err := pretrained.FromFile(tokenizerPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to load tokenizer: %w", err)
+	}
+	// Create ONNX session
 	session, err := onnxruntime_go.NewDynamicAdvancedSession(
 		modelPath, // onnx/embedgemma/model_q4.onnx
 		[]string{"input_ids", "attention_mask"},
 		[]string{"sentence_embedding"},
-		nil,
+		nil, // optional options
 	)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("failed to create ONNX session: %w", err)
 	}
-	// Load tokenizer (from Hugging Face)
-	tokenizer, err := tokenizers.FromFile("./tokenizer.json")
 	return &ONNXEmbedder{
 		session:   session,
-		tokenizer: tokenizer,
+		tokenizer: tok,
+		dims:      dims,
+		logger:    logger,
 	}, nil
 }
 
 func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
-	// Tokenize
-	tokens := e.tokenizer.Encode(text, true)
-	// Prepare inputs
-	inputIDs := []int64{tokens.GetIds()}
-	attentionMask := []int64{tokens.GetAttentionMask()}
-	// Run inference
-	output := onnxruntime_go.NewEmptyTensor[float32](
-		onnxruntime_go.NewShape(1, 768),
+	// 1. Tokenize
+	encoding, err := e.tokenizer.Encode(text, true) // true = add special tokens
+	if err != nil {
+		return nil, fmt.Errorf("tokenization failed: %w", err)
+	}
+	// Convert []int32 to []int64 for ONNX
+	inputIDs := make([]int64, len(encoding.GetIDs()))
+	for i, id := range encoding.GetIDs() {
+		inputIDs[i] = int64(id)
+	}
+	attentionMask := make([]int64, len(encoding.GetAttentionMask()))
+	for i, m := range encoding.GetAttentionMask() {
+		attentionMask[i] = int64(m)
+	}
+	// 2. Create input tensors (shape: [1, seq_len])
+	seqLen := int64(len(inputIDs))
+	inputIDsTensor, err := onnxruntime_go.NewTensor(onnxruntime_go.NewShape(1, seqLen), inputIDs)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create input_ids tensor: %w", err)
+	}
+	defer inputIDsTensor.Destroy()
+	maskTensor, err := onnxruntime_go.NewTensor(onnxruntime_go.NewShape(1, seqLen), attentionMask)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create attention_mask tensor: %w", err)
+	}
+	defer maskTensor.Destroy()
+	// 3. Create output tensor (shape: [1, dims])
+	outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](onnxruntime_go.NewShape(1, int64(e.dims)))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create output tensor: %w", err)
+	}
+	defer outputTensor.Destroy()
+	// 4. Run inference
+	err = e.session.Run(
+		map[string]*onnxruntime_go.Tensor{
+			"input_ids":      inputIDsTensor,
+			"attention_mask": maskTensor,
+		},
+		[]string{"sentence_embedding"},
+		[]*onnxruntime_go.Tensor{outputTensor},
 	)
-	err := e.session.Run(
-		map[string]any{
-			"input_ids":      inputIDs,
-			"attention_mask": attentionMask,
+	if err != nil {
+		return nil, fmt.Errorf("inference failed: %w", err)
+	}
+	// 5. Extract data
+	outputData := outputTensor.GetData()
+	// outputTensor is owned by us, but GetData returns a slice that remains valid until Destroy.
+	// We need to copy if we want to keep it after Destroy (we defer Destroy, so copy now).
+	embedding := make([]float32, len(outputData))
+	copy(embedding, outputData)
+	return embedding, nil
+}
+
+// EmbedSlice (batch) – to be implemented properly
+func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+	// 1. Tokenize all texts and find max length for padding
+	encodings := make([]*tokenizer.Encoding, len(texts))
+	maxLen := 0
+	for i, txt := range texts {
+		enc, err := e.tokenizer.Encode(txt, true)
+		if err != nil {
+			return nil, fmt.Errorf("tokenization failed at index %d: %w", i, err)
+		}
+		encodings[i] = enc
+		if l := len(enc.GetIDs()); l > maxLen {
+			maxLen = l
+		}
+	}
+	// 2. Build padded input_ids and attention_mask (shape: [batch, maxLen])
+	batchSize := len(texts)
+	inputIDs := make([]int64, batchSize*maxLen)
+	attentionMask := make([]int64, batchSize*maxLen)
+	for i, enc := range encodings {
+		ids := enc.GetIDs()
+		mask := enc.GetAttentionMask()
+		offset := i * maxLen
+		// copy actual tokens
+		for j := 0; j < len(ids); j++ {
+			inputIDs[offset+j] = int64(ids[j])
+			attentionMask[offset+j] = int64(mask[j])
+		}
+		// remaining positions (padding) are already zero-initialized
+	}
+	// 3. Create tensors
+	inputIDsTensor, err := onnxruntime_go.NewTensor(
+		onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
+		inputIDs,
+	)
+	if err != nil {
+		return nil, err
+	}
+	defer inputIDsTensor.Destroy()
+	maskTensor, err := onnxruntime_go.NewTensor(
+		onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
+		attentionMask,
+	)
+	if err != nil {
+		return nil, err
+	}
+	defer maskTensor.Destroy()
+	outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
+		onnxruntime_go.NewShape(int64(batchSize), int64(e.dims)),
+	)
+	if err != nil {
+		return nil, err
+	}
+	defer outputTensor.Destroy()
+	// 4. Run
+	err = e.session.Run(
+		map[string]*onnxruntime_go.Tensor{
+			"input_ids":      inputIDsTensor,
+			"attention_mask": maskTensor,
 		},
 		[]string{"sentence_embedding"},
-		[]any{&output},
+		[]*onnxruntime_go.Tensor{outputTensor},
 	)
-	return output.GetData(), nil
+	if err != nil {
+		return nil, err
+	}
+	// 5. Extract batch results
+	outputData := outputTensor.GetData()
+	embeddings := make([][]float32, batchSize)
+	for i := 0; i < batchSize; i++ {
+		start := i * e.dims
+		emb := make([]float32, e.dims)
+		copy(emb, outputData[start:start+e.dims])
+		embeddings[i] = emb
+	}
+	return embeddings, nil
 }
-- 
cgit v1.2.3


From 4bd6883966824cff81b86e8bf79e278165d7d24a Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 14:38:26 +0300
Subject: WIP

---
 rag/embedder.go | 105 +++++++++++++++++++++++---------------------------------
 1 file changed, 43 insertions(+), 62 deletions(-)

diff --git a/rag/embedder.go b/rag/embedder.go
index 396f04b..988d91e 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -174,134 +174,115 @@ func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Log
 
 func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
 	// 1. Tokenize
-	encoding, err := e.tokenizer.Encode(text, true) // true = add special tokens
+	encoding, err := e.tokenizer.EncodeSingle(text)
 	if err != nil {
 		return nil, fmt.Errorf("tokenization failed: %w", err)
 	}
-	// Convert []int32 to []int64 for ONNX
-	inputIDs := make([]int64, len(encoding.GetIDs()))
-	for i, id := range encoding.GetIDs() {
+	// 2. Convert to int64 and create attention mask
+	ids := encoding.Ids
+	inputIDs := make([]int64, len(ids))
+	attentionMask := make([]int64, len(ids))
+	for i, id := range ids {
 		inputIDs[i] = int64(id)
+		attentionMask[i] = 1
 	}
-	attentionMask := make([]int64, len(encoding.GetAttentionMask()))
-	for i, m := range encoding.GetAttentionMask() {
-		attentionMask[i] = int64(m)
-	}
-	// 2. Create input tensors (shape: [1, seq_len])
+	// 3. Create input tensors (shape: [1, seq_len])
 	seqLen := int64(len(inputIDs))
-	inputIDsTensor, err := onnxruntime_go.NewTensor(onnxruntime_go.NewShape(1, seqLen), inputIDs)
+	inputIDsTensor, err := onnxruntime_go.NewTensor[int64](
+		onnxruntime_go.NewShape(1, seqLen),
+		inputIDs,
+	)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create input_ids tensor: %w", err)
 	}
 	defer inputIDsTensor.Destroy()
-	maskTensor, err := onnxruntime_go.NewTensor(onnxruntime_go.NewShape(1, seqLen), attentionMask)
+	maskTensor, err := onnxruntime_go.NewTensor[int64](
+		onnxruntime_go.NewShape(1, seqLen),
+		attentionMask,
+	)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create attention_mask tensor: %w", err)
 	}
 	defer maskTensor.Destroy()
-	// 3. Create output tensor (shape: [1, dims])
-	outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](onnxruntime_go.NewShape(1, int64(e.dims)))
+	// 4. Create output tensor
+	outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
+		onnxruntime_go.NewShape(1, int64(e.dims)),
+	)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create output tensor: %w", err)
 	}
 	defer outputTensor.Destroy()
-	// 4. Run inference
+	// 5. Run inference
 	err = e.session.Run(
-		map[string]*onnxruntime_go.Tensor{
-			"input_ids":      inputIDsTensor,
-			"attention_mask": maskTensor,
-		},
+		[]onnxruntime_go.Value{inputIDsTensor, maskTensor},
 		[]string{"sentence_embedding"},
-		[]*onnxruntime_go.Tensor{outputTensor},
+		[]onnxruntime_go.Value{outputTensor},
 	)
 	if err != nil {
 		return nil, fmt.Errorf("inference failed: %w", err)
 	}
-	// 5. Extract data
+	// 6. Copy output data
 	outputData := outputTensor.GetData()
-	// outputTensor is owned by us, but GetData returns a slice that remains valid until Destroy.
-	// We need to copy if we want to keep it after Destroy (we defer Destroy, so copy now).
 	embedding := make([]float32, len(outputData))
 	copy(embedding, outputData)
 	return embedding, nil
 }
 
-// EmbedSlice (batch) – to be implemented properly
 func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
-	if len(texts) == 0 {
-		return nil, nil
-	}
-	// 1. Tokenize all texts and find max length for padding
 	encodings := make([]*tokenizer.Encoding, len(texts))
 	maxLen := 0
 	for i, txt := range texts {
-		enc, err := e.tokenizer.Encode(txt, true)
+		enc, err := e.tokenizer.EncodeSingle(txt)
 		if err != nil {
-			return nil, fmt.Errorf("tokenization failed at index %d: %w", i, err)
+			return nil, err
 		}
 		encodings[i] = enc
-		if l := len(enc.GetIDs()); l > maxLen {
+		if l := len(enc.Ids); l > maxLen {
 			maxLen = l
 		}
 	}
-	// 2. Build padded input_ids and attention_mask (shape: [batch, maxLen])
 	batchSize := len(texts)
 	inputIDs := make([]int64, batchSize*maxLen)
 	attentionMask := make([]int64, batchSize*maxLen)
 	for i, enc := range encodings {
-		ids := enc.GetIDs()
-		mask := enc.GetAttentionMask()
+		ids := enc.Ids
 		offset := i * maxLen
-		// copy actual tokens
-		for j := 0; j < len(ids); j++ {
-			inputIDs[offset+j] = int64(ids[j])
-			attentionMask[offset+j] = int64(mask[j])
+		for j, id := range ids {
+			inputIDs[offset+j] = int64(id)
+			attentionMask[offset+j] = 1
 		}
-		// remaining positions (padding) are already zero-initialized
+		// Remaining positions are already zero (padding)
 	}
-	// 3. Create tensors
-	inputIDsTensor, err := onnxruntime_go.NewTensor(
+	// Create tensors with shape [batchSize, maxLen]
+	inputTensor, _ := onnxruntime_go.NewTensor[int64](
 		onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
 		inputIDs,
 	)
-	if err != nil {
-		return nil, err
-	}
-	defer inputIDsTensor.Destroy()
-	maskTensor, err := onnxruntime_go.NewTensor(
+	defer inputTensor.Destroy()
+	maskTensor, _ := onnxruntime_go.NewTensor[int64](
 		onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
 		attentionMask,
 	)
-	if err != nil {
-		return nil, err
-	}
 	defer maskTensor.Destroy()
-	outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
+	outputTensor, _ := onnxruntime_go.NewEmptyTensor[float32](
 		onnxruntime_go.NewShape(int64(batchSize), int64(e.dims)),
 	)
-	if err != nil {
-		return nil, err
-	}
 	defer outputTensor.Destroy()
-	// 4. Run
-	err = e.session.Run(
-		map[string]*onnxruntime_go.Tensor{
-			"input_ids":      inputIDsTensor,
-			"attention_mask": maskTensor,
-		},
+	err := e.session.Run(
+		[]onnxruntime_go.Value{inputTensor, maskTensor},
 		[]string{"sentence_embedding"},
-		[]*onnxruntime_go.Tensor{outputTensor},
+		[]onnxruntime_go.Value{outputTensor},
 	)
 	if err != nil {
 		return nil, err
 	}
-	// 5. Extract batch results
-	outputData := outputTensor.GetData()
+	// Extract embeddings per batch item
+	data := outputTensor.GetData()
 	embeddings := make([][]float32, batchSize)
 	for i := 0; i < batchSize; i++ {
 		start := i * e.dims
 		emb := make([]float32, e.dims)
-		copy(emb, outputData[start:start+e.dims])
+		copy(emb, data[start:start+e.dims])
 		embeddings[i] = emb
 	}
 	return embeddings, nil
-- 
cgit v1.2.3


From c2757653a3429ab3f9e76081328a3877bc11ed4d Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 14:49:59 +0300
Subject: Fix: buildable

---
 rag/embedder.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/rag/embedder.go b/rag/embedder.go
index 988d91e..6903a5d 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -215,7 +215,6 @@ func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
 	// 5. Run inference
 	err = e.session.Run(
 		[]onnxruntime_go.Value{inputIDsTensor, maskTensor},
-		[]string{"sentence_embedding"},
 		[]onnxruntime_go.Value{outputTensor},
 	)
 	if err != nil {
@@ -270,7 +269,6 @@ func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
 	defer outputTensor.Destroy()
 	err := e.session.Run(
 		[]onnxruntime_go.Value{inputTensor, maskTensor},
-		[]string{"sentence_embedding"},
 		[]onnxruntime_go.Value{outputTensor},
 	)
 	if err != nil {
-- 
cgit v1.2.3


From c2c107c78688dedb8429ef13bd0aa102eeb32fdb Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 16:05:03 +0300
Subject: Dep: make-fetch onnx embed gemma

---
 .gitignore | 1 +
 Makefile   | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index fa70754..15b83b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ gflt
 chat_exports/*.json
 ragimport
 .env
+onnx/
diff --git a/Makefile b/Makefile
index 9113919..4314d99 100644
--- a/Makefile
+++ b/Makefile
@@ -30,6 +30,9 @@ lint: ## Run linters. Use make install-linters first.
 lintall: lint
 	noblanks ./...
 
+fetch-onnx:
+	mkdir -p onnx/embedgemma && curl -o onnx/embedgemma/config.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/config.json && curl -o onnx/embedgemma/tokenizer.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/tokenizer.json && curl -o onnx/embedgemma/model_q4.onnx -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx && curl -o onnx/embedgemma/model_q4.onnx_data -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx_data?download=true
+
 # Whisper STT Setup (in batteries directory)
 setup-whisper: build-whisper download-whisper-model
 
-- 
cgit v1.2.3


From ac8c8bb0558a00cf0d025ab8522aaa57b8cba7de Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 19:20:21 +0300
Subject: Enha: onnx config vars

---
 bot.go              |  7 ++++---
 config.example.toml |  3 +++
 config/config.go    |  7 +++++--
 rag/embedder.go     | 23 +++++++++++++++++------
 rag/rag.go          | 16 ++++++++++++++--
 5 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/bot.go b/bot.go
index 13ee074..5463800 100644
--- a/bot.go
+++ b/bot.go
@@ -1393,12 +1393,13 @@ func updateModelLists() {
 		}
 	}
 	// if llama.cpp started after gf-lt?
-	localModelsMu.Lock()
-	LocalModels, err = fetchLCPModelsWithLoadStatus()
-	localModelsMu.Unlock()
+	ml, err := fetchLCPModelsWithLoadStatus()
 	if err != nil {
 		logger.Warn("failed to fetch llama.cpp models", "error", err)
 	}
+	localModelsMu.Lock()
+	LocalModels = ml
+	localModelsMu.Unlock()
 	// set already loaded model in llama.cpp
 	if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
 		localModelsMu.Lock()
diff --git a/config.example.toml b/config.example.toml
index 39a730b..f5820da 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -13,6 +13,9 @@ OpenRouterChatAPI = "https://openrouter.ai/api/v1/chat/completions"
 # embeddings
 EmbedURL = "http://localhost:8082/v1/embeddings"
 HFToken = ""
+EmbedModelPath = "onnx/embedgemma/model_q4.onnx"
+EmbedTokenizerPath = "onnx/embedgemma/tokenizer.json"
+EmbedDims = 768
 #
 ShowSys = true
 LogFile = "log.txt"
diff --git a/config/config.go b/config/config.go
index 412eaaa..84ec480 100644
--- a/config/config.go
+++ b/config/config.go
@@ -34,8 +34,11 @@ type Config struct {
 	ImagePreview                  bool   `toml:"ImagePreview"`
 	EnableMouse                   bool   `toml:"EnableMouse"`
 	// embeddings
-	EmbedURL string `toml:"EmbedURL"`
-	HFToken  string `toml:"HFToken"`
+	EmbedURL           string `toml:"EmbedURL"`
+	HFToken            string `toml:"HFToken"`
+	EmbedModelPath     string `toml:"EmbedModelPath"`
+	EmbedTokenizerPath string `toml:"EmbedTokenizerPath"`
+	EmbedDims          int    `toml:"EmbedDims"`
 	// rag settings
 	RAGEnabled   bool   `toml:"RAGEnabled"`
 	RAGDir       string `toml:"RAGDir"`
diff --git a/rag/embedder.go b/rag/embedder.go
index 6903a5d..b0a3226 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -9,6 +9,7 @@ import (
 	"gf-lt/models"
 	"log/slog"
 	"net/http"
+	"sync"
 
 	"github.com/sugarme/tokenizer"
 	"github.com/sugarme/tokenizer/pretrained"
@@ -148,7 +149,17 @@ type ONNXEmbedder struct {
 	logger    *slog.Logger
 }
 
+var onnxInitOnce sync.Once
+
 func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) {
+	// Initialize ONNX runtime environment once
+	onnxInitOnce.Do(func() {
+		onnxruntime_go.SetSharedLibraryPath("/usr/local/lib/libonnxruntime.so")
+		err := onnxruntime_go.InitializeEnvironment()
+		if err != nil {
+			logger.Error("failed to initialize ONNX runtime", "error", err)
+		}
+	})
 	// Load tokenizer using sugarme/tokenizer
 	tok, err := pretrained.FromFile(tokenizerPath)
 	if err != nil {
@@ -195,7 +206,7 @@ func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
 	if err != nil {
 		return nil, fmt.Errorf("failed to create input_ids tensor: %w", err)
 	}
-	defer inputIDsTensor.Destroy()
+	defer func() { _ = inputIDsTensor.Destroy() }()
 	maskTensor, err := onnxruntime_go.NewTensor[int64](
 		onnxruntime_go.NewShape(1, seqLen),
 		attentionMask,
@@ -203,7 +214,7 @@ func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
 	if err != nil {
 		return nil, fmt.Errorf("failed to create attention_mask tensor: %w", err)
 	}
-	defer maskTensor.Destroy()
+	defer func() { _ = maskTensor.Destroy() }()
 	// 4. Create output tensor
 	outputTensor, err := onnxruntime_go.NewEmptyTensor[float32](
 		onnxruntime_go.NewShape(1, int64(e.dims)),
@@ -211,7 +222,7 @@ func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
 	if err != nil {
 		return nil, fmt.Errorf("failed to create output tensor: %w", err)
 	}
-	defer outputTensor.Destroy()
+	defer func() { _ = outputTensor.Destroy() }()
 	// 5. Run inference
 	err = e.session.Run(
 		[]onnxruntime_go.Value{inputIDsTensor, maskTensor},
@@ -257,16 +268,16 @@ func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
 		onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
 		inputIDs,
 	)
-	defer inputTensor.Destroy()
+	defer func() { _ = inputTensor.Destroy() }()
 	maskTensor, _ := onnxruntime_go.NewTensor[int64](
 		onnxruntime_go.NewShape(int64(batchSize), int64(maxLen)),
 		attentionMask,
 	)
-	defer maskTensor.Destroy()
+	defer func() { _ = maskTensor.Destroy() }()
 	outputTensor, _ := onnxruntime_go.NewEmptyTensor[float32](
 		onnxruntime_go.NewShape(int64(batchSize), int64(e.dims)),
 	)
-	defer outputTensor.Destroy()
+	defer func() { _ = outputTensor.Destroy() }()
 	err := e.session.Run(
 		[]onnxruntime_go.Value{inputTensor, maskTensor},
 		[]onnxruntime_go.Value{outputTensor},
diff --git a/rag/rag.go b/rag/rag.go
index 3d0f38f..654afde 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -34,8 +34,20 @@ type RAG struct {
 }
 
 func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) *RAG {
-	// Initialize with API embedder by default, could be configurable later
-	embedder := NewAPIEmbedder(l, cfg)
+	var embedder Embedder
+	if cfg.EmbedModelPath != "" && cfg.EmbedTokenizerPath != "" {
+		emb, err := NewONNXEmbedder(cfg.EmbedModelPath, cfg.EmbedTokenizerPath, cfg.EmbedDims, l)
+		if err != nil {
+			l.Error("failed to create ONNX embedder, falling back to API", "error", err)
+			embedder = NewAPIEmbedder(l, cfg)
+		} else {
+			embedder = emb
+			l.Info("using ONNX embedder", "model", cfg.EmbedModelPath, "dims", cfg.EmbedDims)
+		}
+	} else {
+		embedder = NewAPIEmbedder(l, cfg)
+		l.Info("using API embedder", "url", cfg.EmbedURL)
+	}
 	rag := &RAG{
 		logger:   l,
 		store:    s,
-- 
cgit v1.2.3


From efc92d884c36498220e2b8d5ad9e02f84e42d953 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Thu, 5 Mar 2026 20:02:46 +0300
Subject: Chore: onnx library lookup

---
 bot.go          |   8 +++-
 rag/embedder.go | 113 ++++++++++++++++++++++++++++++++++++++++++++------------
 rag/rag.go      |  39 +++++++++++--------
 3 files changed, 120 insertions(+), 40 deletions(-)

diff --git a/bot.go b/bot.go
index 5463800..20ffeb2 100644
--- a/bot.go
+++ b/bot.go
@@ -1501,7 +1501,13 @@ func init() {
 		os.Exit(1)
 		return
 	}
-	ragger = rag.New(logger, store, cfg)
+	ragger, err = rag.New(logger, store, cfg)
+	if err != nil {
+		logger.Error("failed to create RAG", "error", err)
+	}
+	if ragger != nil && ragger.FallbackMessage() != "" && app != nil {
+		showToast("RAG", "ONNX unavailable, using API: "+ragger.FallbackMessage())
+	}
 	// https://github.com/coreydaley/ggerganov-llama.cpp/blob/master/examples/server/README.md
 	// load all chats in memory
 	if _, err := loadHistoryChats(); err != nil {
diff --git a/rag/embedder.go b/rag/embedder.go
index b0a3226..59dbfd2 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -9,6 +9,7 @@ import (
 	"gf-lt/models"
 	"log/slog"
 	"net/http"
+	"os"
 	"sync"
 
 	"github.com/sugarme/tokenizer"
@@ -143,47 +144,111 @@ func (a *APIEmbedder) EmbedSlice(lines []string) ([][]float32, error) {
 // 2. Using a Go ONNX runtime (like gorgonia/onnx or similar)
 // 3. Converting text to embeddings without external API calls
 type ONNXEmbedder struct {
-	session   *onnxruntime_go.DynamicAdvancedSession
-	tokenizer *tokenizer.Tokenizer
-	dims      int // embedding dimension (e.g., 768)
-	logger    *slog.Logger
+	session       *onnxruntime_go.DynamicAdvancedSession
+	tokenizer     *tokenizer.Tokenizer
+	tokenizerPath string
+	dims          int
+	logger        *slog.Logger
+	mu            sync.Mutex
+	modelPath     string
 }
 
 var onnxInitOnce sync.Once
+var onnxReady bool
+var onnxLibPath string
+
+var onnxLibPaths = []string{
+	"/usr/lib/libonnxruntime.so",
+	"/usr/local/lib/libonnxruntime.so",
+	"/usr/lib/x86_64-linux-gnu/libonnxruntime.so",
+	"/opt/onnxruntime/lib/libonnxruntime.so",
+}
+
+func findONNXLibrary() string {
+	for _, path := range onnxLibPaths {
+		if _, err := os.Stat(path); err == nil {
+			return path
+		}
+	}
+	return ""
+}
 
 func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) {
-	// Initialize ONNX runtime environment once
-	onnxInitOnce.Do(func() {
-		onnxruntime_go.SetSharedLibraryPath("/usr/local/lib/libonnxruntime.so")
-		err := onnxruntime_go.InitializeEnvironment()
+	// Check if model and tokenizer files exist
+	if _, err := os.Stat(modelPath); err != nil {
+		return nil, fmt.Errorf("ONNX model not found: %w", err)
+	}
+	if _, err := os.Stat(tokenizerPath); err != nil {
+		return nil, fmt.Errorf("tokenizer not found: %w", err)
+	}
+
+	// Find ONNX library
+	onnxLibPath = findONNXLibrary()
+	if onnxLibPath == "" {
+		return nil, errors.New("ONNX runtime library not found in standard locations")
+	}
+
+	emb := &ONNXEmbedder{
+		tokenizerPath: tokenizerPath,
+		dims:          dims,
+		logger:        logger,
+		modelPath:     modelPath,
+	}
+	return emb, nil
+}
+
+func (e *ONNXEmbedder) ensureInitialized() error {
+	if e.session != nil {
+		return nil
+	}
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	if e.session != nil {
+		return nil
+	}
+
+	// Load tokenizer lazily
+	if e.tokenizer == nil {
+		tok, err := pretrained.FromFile(e.tokenizerPath)
 		if err != nil {
-			logger.Error("failed to initialize ONNX runtime", "error", err)
+			return fmt.Errorf("failed to load tokenizer: %w", err)
+		}
+		e.tokenizer = tok
+	}
+
+	onnxInitOnce.Do(func() {
+		onnxruntime_go.SetSharedLibraryPath(onnxLibPath)
+		if err := onnxruntime_go.InitializeEnvironment(); err != nil {
+			e.logger.Error("failed to initialize ONNX runtime", "error", err)
+			onnxReady = false
+			return
 		}
+		onnxReady = true
 	})
-	// Load tokenizer using sugarme/tokenizer
-	tok, err := pretrained.FromFile(tokenizerPath)
-	if err != nil {
-		return nil, fmt.Errorf("failed to load tokenizer: %w", err)
+	if !onnxReady {
+		return errors.New("ONNX runtime not ready")
 	}
-	// Create ONNX session
 	session, err := onnxruntime_go.NewDynamicAdvancedSession(
-		modelPath, // onnx/embedgemma/model_q4.onnx
+		e.getModelPath(),
 		[]string{"input_ids", "attention_mask"},
 		[]string{"sentence_embedding"},
-		nil, // optional options
+		nil,
 	)
 	if err != nil {
-		return nil, fmt.Errorf("failed to create ONNX session: %w", err)
-	}
-	return &ONNXEmbedder{
-		session:   session,
-		tokenizer: tok,
-		dims:      dims,
-		logger:    logger,
-	}, nil
+		return fmt.Errorf("failed to create ONNX session: %w", err)
+	}
+	e.session = session
+	return nil
+}
+
+func (e *ONNXEmbedder) getModelPath() string {
+	return e.modelPath
 }
 
 func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
+	if err := e.ensureInitialized(); err != nil {
+		return nil, err
+	}
 	// 1. Tokenize
 	encoding, err := e.tokenizer.EncodeSingle(text)
 	if err != nil {
diff --git a/rag/rag.go b/rag/rag.go
index 654afde..fa30303 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -25,20 +25,23 @@ var (
 )
 
 type RAG struct {
-	logger   *slog.Logger
-	store    storage.FullRepo
-	cfg      *config.Config
-	embedder Embedder
-	storage  *VectorStorage
-	mu       sync.Mutex
+	logger      *slog.Logger
+	store       storage.FullRepo
+	cfg         *config.Config
+	embedder    Embedder
+	storage     *VectorStorage
+	mu          sync.Mutex
+	fallbackMsg string
 }
 
-func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) *RAG {
+func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) (*RAG, error) {
 	var embedder Embedder
+	var fallbackMsg string
 	if cfg.EmbedModelPath != "" && cfg.EmbedTokenizerPath != "" {
 		emb, err := NewONNXEmbedder(cfg.EmbedModelPath, cfg.EmbedTokenizerPath, cfg.EmbedDims, l)
 		if err != nil {
 			l.Error("failed to create ONNX embedder, falling back to API", "error", err)
+			fallbackMsg = err.Error()
 			embedder = NewAPIEmbedder(l, cfg)
 		} else {
 			embedder = emb
@@ -49,16 +52,17 @@ func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) *RAG {
 		l.Info("using API embedder", "url", cfg.EmbedURL)
 	}
 	rag := &RAG{
-		logger:   l,
-		store:    s,
-		cfg:      cfg,
-		embedder: embedder,
-		storage:  NewVectorStorage(l, s),
+		logger:      l,
+		store:       s,
+		cfg:         cfg,
+		embedder:    embedder,
+		storage:     NewVectorStorage(l, s),
+		fallbackMsg: fallbackMsg,
 	}
 
 	// Note: Vector tables are created via database migrations, not at runtime
 
-	return rag
+	return rag, nil
 }
 
 func wordCounter(sentence string) int {
@@ -449,14 +453,19 @@ var (
 	ragOnce     sync.Once
 )
 
+func (r *RAG) FallbackMessage() string {
+	return r.fallbackMsg
+}
+
 func Init(c *config.Config, l *slog.Logger, s storage.FullRepo) error {
+	var err error
 	ragOnce.Do(func() {
 		if c == nil || l == nil || s == nil {
 			return
 		}
-		ragInstance = New(l, s, c)
+		ragInstance, err = New(l, s, c)
 	})
-	return nil
+	return err
 }
 
 func GetInstance() *RAG {
-- 
cgit v1.2.3


From e1f2a8cd7be487a3b4284ca70cc5a2a64b50f5d1 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 07:46:15 +0300
Subject: Chore: remove unused RagEnabled var

---
 config.example.toml | 1 -
 config/config.go    | 1 -
 docs/config.md      | 3 ---
 props_table.go      | 3 ---
 4 files changed, 8 deletions(-)

diff --git a/config.example.toml b/config.example.toml
index f5820da..15c8ca6 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -27,7 +27,6 @@ ChunkLimit = 100000
 AutoScrollEnabled = true
 AutoCleanToolCallsFromCtx = false
 # rag settings
-RAGEnabled = false
 RAGBatchSize = 1
 RAGWordLimit = 80
 RAGDir = "ragimport"
diff --git a/config/config.go b/config/config.go
index 84ec480..29d5744 100644
--- a/config/config.go
+++ b/config/config.go
@@ -40,7 +40,6 @@ type Config struct {
 	EmbedTokenizerPath string `toml:"EmbedTokenizerPath"`
 	EmbedDims          int    `toml:"EmbedDims"`
 	// rag settings
-	RAGEnabled   bool   `toml:"RAGEnabled"`
 	RAGDir       string `toml:"RAGDir"`
 	RAGBatchSize int    `toml:"RAGBatchSize"`
 	RAGWordLimit uint32 `toml:"RAGWordLimit"`
diff --git a/docs/config.md b/docs/config.md
index d8b42d6..6f11d73 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -71,9 +71,6 @@ This document explains how to set up and configure the application using the `co
 #### EmbedURL (`"http://localhost:8082/v1/embeddings"`)
 - The endpoint for embedding API, used for RAG (Retrieval Augmented Generation) functionality.
 
-#### RAGEnabled (`false`)
-- Enable or disable RAG functionality for enhanced context retrieval.
-
 #### RAGBatchSize (`1`)
 - Number of documents to process in each RAG batch.
 
diff --git a/props_table.go b/props_table.go
index ec66812..5c3d8d7 100644
--- a/props_table.go
+++ b/props_table.go
@@ -115,9 +115,6 @@ func makePropsTable(props map[string]float32) *tview.Table {
 		row++
 	}
 	// Add checkboxes
-	addCheckboxRow("RAG use", cfg.RAGEnabled, func(checked bool) {
-		cfg.RAGEnabled = checked
-	})
 	addCheckboxRow("Inject role", injectRole, func(checked bool) {
 		injectRole = checked
 	})
-- 
cgit v1.2.3


From d2caebdb4fd3ad148aad20866503b7d46d546404 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 09:11:25 +0300
Subject: Enha (onnx): use gpu

---
 Makefile        | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 rag/embedder.go | 68 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 164 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4314d99..78db940 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve
+.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve fetch-onnx install-onnx-deps
 
 run: setconfig
 	go build -tags extra -o gf-lt && ./gf-lt
@@ -33,6 +33,102 @@ lintall: lint
 fetch-onnx:
 	mkdir -p onnx/embedgemma && curl -o onnx/embedgemma/config.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/config.json && curl -o onnx/embedgemma/tokenizer.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/tokenizer.json && curl -o onnx/embedgemma/model_q4.onnx -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx && curl -o onnx/embedgemma/model_q4.onnx_data -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx_data?download=true
 
+install-onnx-deps: ## Install ONNX Runtime with CUDA support (or CPU fallback)
+	@echo "=== ONNX Runtime Installer ===" && \
+	echo "" && \
+	echo "Checking for existing ONNX Runtime..." && \
+	if ldconfig -p 2>/dev/null | grep -q libonnxruntime.so.1; then \
+		echo "ONNX Runtime is already installed:" && \
+		ldconfig -p 2>/dev/null | grep libonnxruntime && \
+		echo "" && \
+		echo "Skipping installation. To reinstall, remove existing libs first:" && \
+		echo "  sudo rm -f /usr/local/lib/libonnxruntime*.so*" && \
+		exit 0; \
+	fi && \
+	echo "No ONNX Runtime found. Proceeding with installation..." && \
+	echo "" && \
+	echo "Detecting CUDA version..." && \
+	HAS_CUDA=0 && \
+	if command -v nvidia-smi >/dev/null 2>&1; then \
+		CUDA_INFO=$$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1) && \
+		if [ -n "$$CUDA_INFO" ]; then \
+			echo "Found NVIDIA GPU with driver: $$CUDA_INFO" && \
+			HAS_CUDA=1; \
+		else \
+			echo "NVIDIA driver found but could not detect CUDA version"; \
+		fi; \
+	else \
+		echo "No NVIDIA GPU detected (nvidia-smi not found)"; \
+	fi && \
+	echo "" && \
+	echo "Determining ONNX Runtime version..." && \
+	ARCH=$$(uname -m) && \
+	if [ "$$ARCH" = "x86_64" ]; then \
+		ONNX_ARCH="x64"; \
+	elif [ "$$ARCH" = "aarch64" ] || [ "$$ARCH" = "arm64" ]; then \
+		ONNX_ARCH="aarch64"; \
+	else \
+		echo "Unsupported architecture: $$ARCH" && \
+		exit 1; \
+	fi && \
+	echo "Detected architecture: $$ARCH (ONNX runtime: $$ONNX_ARCH)" && \
+	if [ "$$HAS_CUDA" = "1" ]; then \
+		echo "Installing ONNX Runtime with CUDA support..."; \
+		ONNX_VERSION="1.24.2"; \
+	else \
+		echo "Installing ONNX Runtime (CPU version)..."; \
+		ONNX_VERSION="1.24.2"; \
+	fi && \
+	FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
+	URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
+	echo "Downloading $${URL}..." && \
+	mkdir -p /tmp/onnx-install && \
+	curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
+		echo "Failed to download ONNX Runtime v$${ONNX_VERSION}. Trying v1.18.0..." && \
+		ONNX_VERSION="1.18.0" && \
+		FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \
+		URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \
+		curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \
+			echo "ERROR: Failed to download ONNX Runtime from GitHub" && \
+			echo "" && \
+			echo "Please install manually:" && \
+			echo "  1. Go to https://github.com/microsoft/onnxruntime/releases" && \
+			echo "  2. Download onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
+			echo "  3. Extract and copy to /usr/local/lib:" && \
+			echo "     tar -xzf onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \
+			echo "     sudo cp -r onnxruntime-linux-$${ONNX_ARCH}-VERSION/lib/* /usr/local/lib/" && \
+			echo "     sudo ldconfig" && \
+			exit 1; \
+		}; \
+	} && \
+	echo "Extracting..." && \
+	cd /tmp/onnx-install && tar -xzf $${FILENAME} && \
+	echo "Installing to /usr/local/lib..." && \
+	ONNX_DIR=$$(find /tmp/onnx-install -maxdepth 1 -type d -name "onnxruntime-linux-*") && \
+	if [ -d "$${ONNX_DIR}/lib" ]; then \
+		cp -r $${ONNX_DIR}/lib/* /usr/local/lib/ 2>/dev/null || sudo cp -r $${ONNX_DIR}/lib/* /usr/local/lib/; \
+	else \
+		echo "ERROR: Could not find lib directory in extracted archive" && \
+		exit 1; \
+	fi && \
+	echo "Updating library cache..." && \
+	sudo ldconfig 2>/dev/null || ldconfig && \
+	echo "" && \
+	echo "=== Installation complete! ===" && \
+	echo "" && \
+	echo "Installed libraries:" && \
+	ldconfig -p | grep libonnxruntime || echo "(libraries may require logout/relogin to appear)" && \
+	echo "" && \
+	if [ "$$HAS_CUDA" = "1" ]; then \
+		echo "NOTE: CUDA-enabled ONNX Runtime installed."; \
+		echo "Ensure you also have CUDA libraries installed:"; \
+		echo "  - libcudnn, libcublas, libcurand"; \
+	else \
+		echo "NOTE: CPU-only ONNX Runtime installed."; \
+		echo "For GPU support, install CUDA and re-run this script."; \
+	fi && \
+	rm -rf /tmp/onnx-install
+
 # Whisper STT Setup (in batteries directory)
 setup-whisper: build-whisper download-whisper-model
 
diff --git a/rag/embedder.go b/rag/embedder.go
index 59dbfd2..13f6a6e 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -156,14 +156,22 @@ type ONNXEmbedder struct {
 var onnxInitOnce sync.Once
 var onnxReady bool
 var onnxLibPath string
+var cudaLibPath string
 
 var onnxLibPaths = []string{
 	"/usr/lib/libonnxruntime.so",
+	"/usr/lib/libonnxruntime.so.1.24.2",
 	"/usr/local/lib/libonnxruntime.so",
 	"/usr/lib/x86_64-linux-gnu/libonnxruntime.so",
 	"/opt/onnxruntime/lib/libonnxruntime.so",
 }
 
+var cudaLibPaths = []string{
+	"/usr/lib/libonnxruntime_providers_cuda.so",
+	"/usr/local/lib/libonnxruntime_providers_cuda.so",
+	"/opt/onnxruntime/lib/libonnxruntime_providers_cuda.so",
+}
+
 func findONNXLibrary() string {
 	for _, path := range onnxLibPaths {
 		if _, err := os.Stat(path); err == nil {
@@ -173,6 +181,15 @@ func findONNXLibrary() string {
 	return ""
 }
 
+func findCUDALibrary() string {
+	for _, path := range cudaLibPaths {
+		if _, err := os.Stat(path); err == nil {
+			return path
+		}
+	}
+	return ""
+}
+
 func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) {
 	// Check if model and tokenizer files exist
 	if _, err := os.Stat(modelPath); err != nil {
@@ -188,6 +205,12 @@ func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Log
 		return nil, errors.New("ONNX runtime library not found in standard locations")
 	}
 
+	// Find CUDA provider library (optional)
+	cudaLibPath = findCUDALibrary()
+	if cudaLibPath == "" {
+		fmt.Println("WARNING: CUDA provider library not found, will use CPU")
+	}
+
 	emb := &ONNXEmbedder{
 		tokenizerPath: tokenizerPath,
 		dims:          dims,
@@ -223,16 +246,56 @@ func (e *ONNXEmbedder) ensureInitialized() error {
 			onnxReady = false
 			return
 		}
+		// Register CUDA provider if available
+		if cudaLibPath != "" {
+			if err := onnxruntime_go.RegisterExecutionProviderLibrary("CUDA", cudaLibPath); err != nil {
+				e.logger.Warn("failed to register CUDA provider", "error", err)
+			}
+		}
 		onnxReady = true
 	})
 	if !onnxReady {
 		return errors.New("ONNX runtime not ready")
 	}
+
+	// Create session options
+	opts, err := onnxruntime_go.NewSessionOptions()
+	if err != nil {
+		return fmt.Errorf("failed to create session options: %w", err)
+	}
+	defer opts.Destroy()
+
+	// Try to add CUDA provider
+	useCUDA := cudaLibPath != ""
+	if useCUDA {
+		cudaOpts, err := onnxruntime_go.NewCUDAProviderOptions()
+		if err != nil {
+			e.logger.Warn("failed to create CUDA provider options, falling back to CPU", "error", err)
+			useCUDA = false
+		} else {
+			defer cudaOpts.Destroy()
+			if err := cudaOpts.Update(map[string]string{"device_id": "0"}); err != nil {
+				e.logger.Warn("failed to update CUDA options, falling back to CPU", "error", err)
+				useCUDA = false
+			} else if err := opts.AppendExecutionProviderCUDA(cudaOpts); err != nil {
+				e.logger.Warn("failed to append CUDA provider, falling back to CPU", "error", err)
+				useCUDA = false
+			}
+		}
+	}
+
+	if useCUDA {
+		e.logger.Info("Using CUDA for ONNX inference")
+	} else {
+		e.logger.Info("Using CPU for ONNX inference")
+	}
+
+	// Create session with options
 	session, err := onnxruntime_go.NewDynamicAdvancedSession(
 		e.getModelPath(),
 		[]string{"input_ids", "attention_mask"},
 		[]string{"sentence_embedding"},
-		nil,
+		opts,
 	)
 	if err != nil {
 		return fmt.Errorf("failed to create ONNX session: %w", err)
@@ -304,6 +367,9 @@ func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
 }
 
 func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
+	if err := e.ensureInitialized(); err != nil {
+		return nil, err
+	}
 	encodings := make([]*tokenizer.Encoding, len(texts))
 	maxLen := 0
 	for i, txt := range texts {
-- 
cgit v1.2.3


From 4ef0a215119924347c2219f4677f11a96358307f Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 09:32:45 +0300
Subject: Enha (onnx): unload model if noop for 30s

---
 rag/embedder.go | 13 +++++++++++++
 rag/rag.go      | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/rag/embedder.go b/rag/embedder.go
index 13f6a6e..39f4b5c 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -308,6 +308,19 @@ func (e *ONNXEmbedder) getModelPath() string {
 	return e.modelPath
 }
 
+func (e *ONNXEmbedder) Destroy() error {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	if e.session != nil {
+		if err := e.session.Destroy(); err != nil {
+			return fmt.Errorf("failed to destroy ONNX session: %w", err)
+		}
+		e.session = nil
+		e.logger.Info("ONNX session destroyed, VRAM freed")
+	}
+	return nil
+}
+
 func (e *ONNXEmbedder) Embed(text string) ([]float32, error) {
 	if err := e.ensureInitialized(); err != nil {
 		return nil, err
diff --git a/rag/rag.go b/rag/rag.go
index fa30303..d64a3e1 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -12,6 +12,7 @@ import (
 	"sort"
 	"strings"
 	"sync"
+	"time"
 
 	"github.com/neurosnap/sentences/english"
 )
@@ -32,6 +33,8 @@ type RAG struct {
 	storage     *VectorStorage
 	mu          sync.Mutex
 	fallbackMsg string
+	idleTimer   *time.Timer
+	idleTimeout time.Duration
 }
 
 func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) (*RAG, error) {
@@ -58,6 +61,7 @@ func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) (*RAG, error) {
 		embedder:    embedder,
 		storage:     NewVectorStorage(l, s),
 		fallbackMsg: fallbackMsg,
+		idleTimeout: 30 * time.Second,
 	}
 
 	// Note: Vector tables are created via database migrations, not at runtime
@@ -187,6 +191,7 @@ func (r *RAG) LoadRAG(fpath string) error {
 		}
 	}
 	r.logger.Debug("finished writing vectors", "batches", batchCount)
+	r.resetIdleTimer()
 	select {
 	case LongJobStatusCh <- FinishedRAGStatus:
 	default:
@@ -196,10 +201,12 @@ func (r *RAG) LoadRAG(fpath string) error {
 }
 
 func (r *RAG) LineToVector(line string) ([]float32, error) {
+	r.resetIdleTimer()
 	return r.embedder.Embed(line)
 }
 
 func (r *RAG) SearchEmb(emb *models.EmbeddingResp) ([]models.VectorRow, error) {
+	r.resetIdleTimer()
 	return r.storage.SearchClosest(emb.Embedding)
 }
 
@@ -208,6 +215,7 @@ func (r *RAG) ListLoaded() ([]string, error) {
 }
 
 func (r *RAG) RemoveFile(filename string) error {
+	r.resetIdleTimer()
 	return r.storage.RemoveEmbByFileName(filename)
 }
 
@@ -471,3 +479,38 @@ func Init(c *config.Config, l *slog.Logger, s storage.FullRepo) error {
 func GetInstance() *RAG {
 	return ragInstance
 }
+
+func (r *RAG) resetIdleTimer() {
+	if r.idleTimer != nil {
+		r.idleTimer.Stop()
+	}
+	r.idleTimer = time.AfterFunc(r.idleTimeout, func() {
+		r.freeONNXMemory()
+	})
+}
+
+func (r *RAG) freeONNXMemory() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if onnx, ok := r.embedder.(*ONNXEmbedder); ok {
+		if err := onnx.Destroy(); err != nil {
+			r.logger.Error("failed to free ONNX memory", "error", err)
+		} else {
+			r.logger.Info("freed ONNX VRAM after idle timeout")
+		}
+	}
+}
+
+func (r *RAG) Destroy() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if r.idleTimer != nil {
+		r.idleTimer.Stop()
+		r.idleTimer = nil
+	}
+	if onnx, ok := r.embedder.(*ONNXEmbedder); ok {
+		if err := onnx.Destroy(); err != nil {
+			r.logger.Error("failed to destroy ONNX embedder", "error", err)
+		}
+	}
+}
-- 
cgit v1.2.3


From 822cc48834f5f1908f619b5441ae40946aceb86d Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 10:37:08 +0300
Subject: Fix: avoid panic if statuslinewidget not loaded yet

---
 bot.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/bot.go b/bot.go
index 20ffeb2..ad52059 100644
--- a/bot.go
+++ b/bot.go
@@ -1400,6 +1400,9 @@ func updateModelLists() {
 	localModelsMu.Lock()
 	LocalModels = ml
 	localModelsMu.Unlock()
+	for statusLineWidget == nil {
+		time.Sleep(time.Millisecond * 100)
+	}
 	// set already loaded model in llama.cpp
 	if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
 		localModelsMu.Lock()
-- 
cgit v1.2.3


From f9866bcf5a7369e28246d51b951e81b5b2a8489f Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 11:20:50 +0300
Subject: Feat (rag): hybrid search attempt

---
 config.example.toml                          |   1 +
 config/config.go                             |   7 +-
 rag/rag.go                                   | 176 +++++++++++++++++++++------
 rag/storage.go                               | 119 +++++++++++++++---
 storage/migrations/003_add_fts.down.sql      |   2 +
 storage/migrations/003_add_fts.up.sql        |  15 +++
 storage/migrations/004_populate_fts.down.sql |   2 +
 storage/migrations/004_populate_fts.up.sql   |  26 ++++
 storage/vector.go                            |  38 +++---
 9 files changed, 305 insertions(+), 81 deletions(-)
 create mode 100644 storage/migrations/003_add_fts.down.sql
 create mode 100644 storage/migrations/003_add_fts.up.sql
 create mode 100644 storage/migrations/004_populate_fts.down.sql
 create mode 100644 storage/migrations/004_populate_fts.up.sql

diff --git a/config.example.toml b/config.example.toml
index 15c8ca6..1698189 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -29,6 +29,7 @@ AutoCleanToolCallsFromCtx = false
 # rag settings
 RAGBatchSize = 1
 RAGWordLimit = 80
+RAGOverlapWords = 16
 RAGDir = "ragimport"
 # extra tts
 TTS_ENABLED = false
diff --git a/config/config.go b/config/config.go
index 29d5744..fab3237 100644
--- a/config/config.go
+++ b/config/config.go
@@ -40,9 +40,10 @@ type Config struct {
 	EmbedTokenizerPath string `toml:"EmbedTokenizerPath"`
 	EmbedDims          int    `toml:"EmbedDims"`
 	// rag settings
-	RAGDir       string `toml:"RAGDir"`
-	RAGBatchSize int    `toml:"RAGBatchSize"`
-	RAGWordLimit uint32 `toml:"RAGWordLimit"`
+	RAGDir          string `toml:"RAGDir"`
+	RAGBatchSize    int    `toml:"RAGBatchSize"`
+	RAGWordLimit    uint32 `toml:"RAGWordLimit"`
+	RAGOverlapWords uint32 `toml:"RAGOverlapWords"`
 	// deepseek
 	DeepSeekChatAPI       string `toml:"DeepSeekChatAPI"`
 	DeepSeekCompletionAPI string `toml:"DeepSeekCompletionAPI"`
diff --git a/rag/rag.go b/rag/rag.go
index d64a3e1..4e11a0d 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -73,6 +73,74 @@ func wordCounter(sentence string) int {
 	return len(strings.Split(strings.TrimSpace(sentence), " "))
 }
 
+func createChunks(sentences []string, wordLimit, overlapWords uint32) []string {
+	if len(sentences) == 0 {
+		return nil
+	}
+	if overlapWords >= wordLimit {
+		overlapWords = wordLimit / 2
+	}
+	var chunks []string
+	i := 0
+	for i < len(sentences) {
+		var chunkWords []string
+		wordCount := 0
+		j := i
+		for j < len(sentences) && wordCount <= int(wordLimit) {
+			sentence := sentences[j]
+			words := strings.Fields(sentence)
+			chunkWords = append(chunkWords, sentence)
+			wordCount += len(words)
+			j++
+			// If this sentence alone exceeds limit, still include it and stop
+			if wordCount > int(wordLimit) {
+				break
+			}
+		}
+		if len(chunkWords) == 0 {
+			break
+		}
+		chunk := strings.Join(chunkWords, " ")
+		chunks = append(chunks, chunk)
+		if j >= len(sentences) {
+			break
+		}
+		// Move i forward by skipping overlap
+		if overlapWords == 0 {
+			i = j
+			continue
+		}
+		// Calculate how many sentences to skip to achieve overlapWords
+		overlapRemaining := int(overlapWords)
+		newI := i
+		for newI < j && overlapRemaining > 0 {
+			words := len(strings.Fields(sentences[newI]))
+			overlapRemaining -= words
+			if overlapRemaining >= 0 {
+				newI++
+			}
+		}
+		if newI == i {
+			newI = j
+		}
+		i = newI
+	}
+	return chunks
+}
+
+func sanitizeFTSQuery(query string) string {
+	// Remove double quotes and other problematic characters for FTS5
+	query = strings.ReplaceAll(query, "\"", " ")
+	query = strings.ReplaceAll(query, "'", " ")
+	query = strings.ReplaceAll(query, ";", " ")
+	query = strings.ReplaceAll(query, "\\", " ")
+	query = strings.TrimSpace(query)
+	if query == "" {
+		return "*" // match all
+	}
+	return query
+}
+
 func (r *RAG) LoadRAG(fpath string) error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
@@ -95,31 +163,8 @@ func (r *RAG) LoadRAG(fpath string) error {
 	for i, s := range sentences {
 		sents[i] = s.Text
 	}
-	// Group sentences into paragraphs based on word limit
-	paragraphs := []string{}
-	par := strings.Builder{}
-	for i := 0; i < len(sents); i++ {
-		if strings.TrimSpace(sents[i]) != "" {
-			if par.Len() > 0 {
-				par.WriteString(" ")
-			}
-			par.WriteString(sents[i])
-		}
-		if wordCounter(par.String()) > int(r.cfg.RAGWordLimit) {
-			paragraph := strings.TrimSpace(par.String())
-			if paragraph != "" {
-				paragraphs = append(paragraphs, paragraph)
-			}
-			par.Reset()
-		}
-	}
-	// Handle any remaining content in the paragraph buffer
-	if par.Len() > 0 {
-		paragraph := strings.TrimSpace(par.String())
-		if paragraph != "" {
-			paragraphs = append(paragraphs, paragraph)
-		}
-	}
+	// Create chunks with overlap
+	paragraphs := createChunks(sents, r.cfg.RAGWordLimit, r.cfg.RAGOverlapWords)
 	// Adjust batch size if needed
 	if len(paragraphs) < r.cfg.RAGBatchSize && len(paragraphs) > 0 {
 		r.cfg.RAGBatchSize = len(paragraphs)
@@ -205,9 +250,15 @@ func (r *RAG) LineToVector(line string) ([]float32, error) {
 	return r.embedder.Embed(line)
 }
 
-func (r *RAG) SearchEmb(emb *models.EmbeddingResp) ([]models.VectorRow, error) {
+func (r *RAG) SearchEmb(emb *models.EmbeddingResp, limit int) ([]models.VectorRow, error) {
 	r.resetIdleTimer()
-	return r.storage.SearchClosest(emb.Embedding)
+	return r.storage.SearchClosest(emb.Embedding, limit)
+}
+
+func (r *RAG) SearchKeyword(query string, limit int) ([]models.VectorRow, error) {
+	r.resetIdleTimer()
+	sanitized := sanitizeFTSQuery(query)
+	return r.storage.SearchKeyword(sanitized, limit)
 }
 
 func (r *RAG) ListLoaded() ([]string, error) {
@@ -393,7 +444,7 @@ func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string
 		Embedding: emb,
 		Index:     0,
 	}
-	topResults, err := r.SearchEmb(embResp)
+	topResults, err := r.SearchEmb(embResp, 1)
 	if err != nil {
 		r.logger.Error("failed to search for synthesis context", "error", err)
 		return "", err
@@ -422,7 +473,9 @@ func truncateString(s string, maxLen int) string {
 func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 	refined := r.RefineQuery(query)
 	variations := r.GenerateQueryVariations(refined)
-	allResults := make([]models.VectorRow, 0)
+
+	// Collect embedding search results from all variations
+	var embResults []models.VectorRow
 	seen := make(map[string]bool)
 	for _, q := range variations {
 		emb, err := r.LineToVector(q)
@@ -430,29 +483,78 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 			r.logger.Error("failed to embed query variation", "error", err, "query", q)
 			continue
 		}
-
 		embResp := &models.EmbeddingResp{
 			Embedding: emb,
 			Index:     0,
 		}
-
-		results, err := r.SearchEmb(embResp)
+		results, err := r.SearchEmb(embResp, limit*2) // Get more candidates
 		if err != nil {
 			r.logger.Error("failed to search embeddings", "error", err, "query", q)
 			continue
 		}
-
 		for _, row := range results {
 			if !seen[row.Slug] {
 				seen[row.Slug] = true
-				allResults = append(allResults, row)
+				embResults = append(embResults, row)
 			}
 		}
 	}
-	reranked := r.RerankResults(allResults, query)
-	if len(reranked) > limit {
-		reranked = reranked[:limit]
+	// Sort embedding results by distance (lower is better)
+	sort.Slice(embResults, func(i, j int) bool {
+		return embResults[i].Distance < embResults[j].Distance
+	})
+
+	// Perform keyword search
+	kwResults, err := r.SearchKeyword(refined, limit*2)
+	if err != nil {
+		r.logger.Warn("keyword search failed, using only embeddings", "error", err)
+		kwResults = nil
+	}
+	// Sort keyword results by distance (already sorted by BM25 score)
+	// kwResults already sorted by distance (lower is better)
+
+	// Combine using Reciprocal Rank Fusion (RRF)
+	const rrfK = 60
+	type scoredRow struct {
+		row   models.VectorRow
+		score float64
+	}
+	scoreMap := make(map[string]float64)
+	// Add embedding results
+	for rank, row := range embResults {
+		score := 1.0 / (float64(rank) + rrfK)
+		scoreMap[row.Slug] += score
+	}
+	// Add keyword results
+	for rank, row := range kwResults {
+		score := 1.0 / (float64(rank) + rrfK)
+		scoreMap[row.Slug] += score
+		// Ensure row exists in combined results
+		if _, exists := seen[row.Slug]; !exists {
+			embResults = append(embResults, row)
+		}
+	}
+	// Create slice of scored rows
+	scoredRows := make([]scoredRow, 0, len(embResults))
+	for _, row := range embResults {
+		score := scoreMap[row.Slug]
+		scoredRows = append(scoredRows, scoredRow{row: row, score: score})
+	}
+	// Sort by descending RRF score
+	sort.Slice(scoredRows, func(i, j int) bool {
+		return scoredRows[i].score > scoredRows[j].score
+	})
+	// Take top limit
+	if len(scoredRows) > limit {
+		scoredRows = scoredRows[:limit]
+	}
+	// Convert back to VectorRow
+	finalResults := make([]models.VectorRow, len(scoredRows))
+	for i, sr := range scoredRows {
+		finalResults[i] = sr.row
 	}
+	// Apply reranking heuristics
+	reranked := r.RerankResults(finalResults, query)
 	return reranked, nil
 }
 
diff --git a/rag/storage.go b/rag/storage.go
index 52f6859..08e9d2a 100644
--- a/rag/storage.go
+++ b/rag/storage.go
@@ -62,6 +62,18 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
 	if err != nil {
 		return err
 	}
+	embeddingSize := len(row.Embeddings)
+
+	// Start transaction
+	tx, err := vs.sqlxDB.Beginx()
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err != nil {
+			tx.Rollback()
+		}
+	}()
 
 	// Serialize the embeddings to binary
 	serializedEmbeddings := SerializeVector(row.Embeddings)
@@ -69,10 +81,23 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
 		"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES (?, ?, ?, ?)",
 		tableName,
 	)
-	if _, err := vs.sqlxDB.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil {
+	if _, err := tx.Exec(query, serializedEmbeddings, row.Slug, row.RawText, row.FileName); err != nil {
 		vs.logger.Error("failed to write vector", "error", err, "slug", row.Slug)
 		return err
 	}
+
+	// Insert into FTS table
+	ftsQuery := `INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES (?, ?, ?, ?)`
+	if _, err := tx.Exec(ftsQuery, row.Slug, row.RawText, row.FileName, embeddingSize); err != nil {
+		vs.logger.Error("failed to write to FTS table", "error", err, "slug", row.Slug)
+		return err
+	}
+
+	err = tx.Commit()
+	if err != nil {
+		vs.logger.Error("failed to commit transaction", "error", err)
+		return err
+	}
 	return nil
 }
 
@@ -98,16 +123,15 @@ func (vs *VectorStorage) getTableName(emb []float32) (string, error) {
 }
 
 // SearchClosest finds vectors closest to the query vector using efficient cosine similarity calculation
-func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, error) {
+func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.VectorRow, error) {
+	if limit <= 0 {
+		limit = 10
+	}
 	tableName, err := vs.getTableName(query)
 	if err != nil {
 		return nil, err
 	}
 
-	// For better performance, instead of loading all vectors at once,
-	// we'll implement batching and potentially add L2 distance-based pre-filtering
-	// since cosine similarity is related to L2 distance for normalized vectors
-
 	querySQL := "SELECT embeddings, slug, raw_text, filename FROM " + tableName
 	rows, err := vs.sqlxDB.Query(querySQL)
 	if err != nil {
@@ -115,13 +139,11 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
 	}
 	defer rows.Close()
 
-	// Use a min-heap or simple slice to keep track of top 3 closest vectors
 	type SearchResult struct {
 		vector   models.VectorRow
 		distance float32
 	}
 	var topResults []SearchResult
-	// Process vectors one by one to avoid loading everything into memory
 	for rows.Next() {
 		var (
 			embeddingsBlob          []byte
@@ -134,10 +156,8 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
 		}
 
 		storedEmbeddings := DeserializeVector(embeddingsBlob)
-
-		// Calculate cosine similarity (returns value between -1 and 1, where 1 is most similar)
 		similarity := cosineSimilarity(query, storedEmbeddings)
-		distance := 1 - similarity // Convert to distance where 0 is most similar
+		distance := 1 - similarity
 
 		result := SearchResult{
 			vector: models.VectorRow{
@@ -149,20 +169,15 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
 			distance: distance,
 		}
 
-		// Add to top results and maintain only top 3
 		topResults = append(topResults, result)
-
-		// Sort and keep only top 3
 		sort.Slice(topResults, func(i, j int) bool {
 			return topResults[i].distance < topResults[j].distance
 		})
-
-		if len(topResults) > 3 {
-			topResults = topResults[:3] // Keep only closest 3
+		if len(topResults) > limit {
+			topResults = topResults[:limit]
 		}
 	}
 
-	// Convert back to VectorRow slice
 	results := make([]models.VectorRow, 0, len(topResults))
 	for _, result := range topResults {
 		result.vector.Distance = result.distance
@@ -171,6 +186,70 @@ func (vs *VectorStorage) SearchClosest(query []float32) ([]models.VectorRow, err
 	return results, nil
 }
 
+// GetVectorBySlug retrieves a vector row by its slug
+func (vs *VectorStorage) GetVectorBySlug(slug string) (*models.VectorRow, error) {
+	embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
+	for _, size := range embeddingSizes {
+		table := fmt.Sprintf("embeddings_%d", size)
+		query := fmt.Sprintf("SELECT embeddings, slug, raw_text, filename FROM %s WHERE slug = ?", table)
+		row := vs.sqlxDB.QueryRow(query, slug)
+		var (
+			embeddingsBlob                   []byte
+			retrievedSlug, rawText, fileName string
+		)
+		if err := row.Scan(&embeddingsBlob, &retrievedSlug, &rawText, &fileName); err != nil {
+			// No row in this table, continue to next size
+			continue
+		}
+		storedEmbeddings := DeserializeVector(embeddingsBlob)
+		return &models.VectorRow{
+			Embeddings: storedEmbeddings,
+			Slug:       retrievedSlug,
+			RawText:    rawText,
+			FileName:   fileName,
+		}, nil
+	}
+	return nil, fmt.Errorf("vector with slug %s not found", slug)
+}
+
+// SearchKeyword performs full-text search using FTS5
+func (vs *VectorStorage) SearchKeyword(query string, limit int) ([]models.VectorRow, error) {
+	// Use FTS5 bm25 ranking. bm25 returns negative values where more negative is better.
+	// We'll order by bm25 (ascending) and limit.
+	ftsQuery := `SELECT slug, raw_text, filename, bm25(fts_embeddings) as score 
+				 FROM fts_embeddings 
+				 WHERE fts_embeddings MATCH ? 
+				 ORDER BY score 
+				 LIMIT ?`
+	rows, err := vs.sqlxDB.Query(ftsQuery, query, limit)
+	if err != nil {
+		return nil, fmt.Errorf("FTS search failed: %w", err)
+	}
+	defer rows.Close()
+	var results []models.VectorRow
+	for rows.Next() {
+		var slug, rawText, fileName string
+		var score float64
+		if err := rows.Scan(&slug, &rawText, &fileName, &score); err != nil {
+			vs.logger.Error("failed to scan FTS row", "error", err)
+			continue
+		}
+		// Convert BM25 score to distance-like metric (lower is better)
+		// BM25 is negative, more negative is better. We'll normalize to positive distance.
+		distance := float32(-score) // Make positive (since score is negative)
+		if distance < 0 {
+			distance = 0
+		}
+		results = append(results, models.VectorRow{
+			Slug:     slug,
+			RawText:  rawText,
+			FileName: fileName,
+			Distance: distance,
+		})
+	}
+	return results, nil
+}
+
 // ListFiles returns a list of all loaded files
 func (vs *VectorStorage) ListFiles() ([]string, error) {
 	fileLists := make([][]string, 0)
@@ -215,6 +294,10 @@ func (vs *VectorStorage) ListFiles() ([]string, error) {
 // RemoveEmbByFileName removes all embeddings associated with a specific filename
 func (vs *VectorStorage) RemoveEmbByFileName(filename string) error {
 	var errors []string
+	// Delete from FTS table first
+	if _, err := vs.sqlxDB.Exec("DELETE FROM fts_embeddings WHERE filename = ?", filename); err != nil {
+		errors = append(errors, err.Error())
+	}
 	embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
 	for _, size := range embeddingSizes {
 		table := fmt.Sprintf("embeddings_%d", size)
diff --git a/storage/migrations/003_add_fts.down.sql b/storage/migrations/003_add_fts.down.sql
new file mode 100644
index 0000000..e565fd5
--- /dev/null
+++ b/storage/migrations/003_add_fts.down.sql
@@ -0,0 +1,2 @@
+-- Drop FTS5 virtual table
+DROP TABLE IF EXISTS fts_embeddings;
\ No newline at end of file
diff --git a/storage/migrations/003_add_fts.up.sql b/storage/migrations/003_add_fts.up.sql
new file mode 100644
index 0000000..114586a
--- /dev/null
+++ b/storage/migrations/003_add_fts.up.sql
@@ -0,0 +1,15 @@
+-- Create FTS5 virtual table for full-text search
+CREATE VIRTUAL TABLE IF NOT EXISTS fts_embeddings USING fts5(
+    slug UNINDEXED,
+    raw_text,
+    filename UNINDEXED,
+    embedding_size UNINDEXED,
+    tokenize='porter unicode61'  -- Use porter stemmer and unicode61 tokenizer
+);
+
+-- Create triggers to maintain FTS table when embeddings are inserted/deleted
+-- Note: We'll handle inserts/deletes programmatically for simplicity
+-- but triggers could be added here if needed.
+
+-- Indexes for performance (FTS5 manages its own indexes)
+-- No additional indexes needed for FTS5 virtual table.
\ No newline at end of file
diff --git a/storage/migrations/004_populate_fts.down.sql b/storage/migrations/004_populate_fts.down.sql
new file mode 100644
index 0000000..2b5c756
--- /dev/null
+++ b/storage/migrations/004_populate_fts.down.sql
@@ -0,0 +1,2 @@
+-- Clear FTS table (optional)
+DELETE FROM fts_embeddings;
\ No newline at end of file
diff --git a/storage/migrations/004_populate_fts.up.sql b/storage/migrations/004_populate_fts.up.sql
new file mode 100644
index 0000000..1d1b16a
--- /dev/null
+++ b/storage/migrations/004_populate_fts.up.sql
@@ -0,0 +1,26 @@
+-- Populate FTS table with existing embeddings
+DELETE FROM fts_embeddings;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 384 FROM embeddings_384;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 768 FROM embeddings_768;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 1024 FROM embeddings_1024;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 1536 FROM embeddings_1536;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 2048 FROM embeddings_2048;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 3072 FROM embeddings_3072;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 4096 FROM embeddings_4096;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 5120 FROM embeddings_5120;
\ No newline at end of file
diff --git a/storage/vector.go b/storage/vector.go
index 75f5c9a..e3bbb89 100644
--- a/storage/vector.go
+++ b/storage/vector.go
@@ -4,6 +4,7 @@ import (
 	"encoding/binary"
 	"fmt"
 	"gf-lt/models"
+	"sort"
 	"unsafe"
 
 	"github.com/jmoiron/sqlx"
@@ -11,7 +12,7 @@ import (
 
 type VectorRepo interface {
 	WriteVector(*models.VectorRow) error
-	SearchClosest(q []float32) ([]models.VectorRow, error)
+	SearchClosest(q []float32, limit int) ([]models.VectorRow, error)
 	ListFiles() ([]string, error)
 	RemoveEmbByFileName(filename string) error
 	DB() *sqlx.DB
@@ -79,7 +80,7 @@ func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
 	return err
 }
 
-func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
+func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
 	tableName, err := fetchTableName(q)
 	if err != nil {
 		return nil, err
@@ -94,7 +95,7 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
 		vector   models.VectorRow
 		distance float32
 	}
-	var topResults []SearchResult
+	var allResults []SearchResult
 	for rows.Next() {
 		var (
 			embeddingsBlob          []byte
@@ -119,28 +120,19 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
 			},
 			distance: distance,
 		}
-
-		// Add to top results and maintain only top results
-		topResults = append(topResults, result)
-
-		// Sort and keep only top results
-		// We'll keep the top 3 closest vectors
-		if len(topResults) > 3 {
-			// Simple sort and truncate to maintain only 3 best matches
-			for i := 0; i < len(topResults); i++ {
-				for j := i + 1; j < len(topResults); j++ {
-					if topResults[i].distance > topResults[j].distance {
-						topResults[i], topResults[j] = topResults[j], topResults[i]
-					}
-				}
-			}
-			topResults = topResults[:3]
-		}
+		allResults = append(allResults, result)
+	}
+	// Sort by distance
+	sort.Slice(allResults, func(i, j int) bool {
+		return allResults[i].distance < allResults[j].distance
+	})
+	// Truncate to limit
+	if len(allResults) > limit {
+		allResults = allResults[:limit]
 	}
-
 	// Convert back to VectorRow slice
-	results := make([]models.VectorRow, len(topResults))
-	for i, result := range topResults {
+	results := make([]models.VectorRow, len(allResults))
+	for i, result := range allResults {
 		result.vector.Distance = result.distance
 		results[i] = result.vector
 	}
-- 
cgit v1.2.3


From 62ec55505ca07701ee6a976895d910b051e725b9 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 13:17:49 +0300
Subject: Enha (rag): query each doc

---
 rag/rag.go     | 46 ++++++++++++++++++++++++++++++++++++++++++----
 rag/storage.go | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/rag/rag.go b/rag/rag.go
index 4e11a0d..9271b60 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -286,10 +286,13 @@ func (r *RAG) RefineQuery(query string) string {
 		return original
 	}
 	query = strings.ToLower(query)
-	for _, stopWord := range stopWords {
-		wordPattern := `\b` + stopWord + `\b`
-		re := regexp.MustCompile(wordPattern)
-		query = re.ReplaceAllString(query, "")
+	words := strings.Fields(query)
+	if len(words) >= 3 {
+		for _, stopWord := range stopWords {
+			wordPattern := `\b` + stopWord + `\b`
+			re := regexp.MustCompile(wordPattern)
+			query = re.ReplaceAllString(query, "")
+		}
 	}
 	query = strings.TrimSpace(query)
 	if len(query) < 5 {
@@ -340,6 +343,36 @@ func (r *RAG) GenerateQueryVariations(query string) []string {
 	if len(parts) == 0 {
 		return variations
 	}
+	// Get loaded filenames to filter out filename terms
+	filenames, err := r.storage.ListFiles()
+	if err == nil && len(filenames) > 0 {
+		// Convert to lowercase for case-insensitive matching
+		lowerFilenames := make([]string, len(filenames))
+		for i, f := range filenames {
+			lowerFilenames[i] = strings.ToLower(f)
+		}
+		filteredParts := make([]string, 0, len(parts))
+		for _, part := range parts {
+			partLower := strings.ToLower(part)
+			skip := false
+			for _, fn := range lowerFilenames {
+				if strings.Contains(fn, partLower) || strings.Contains(partLower, fn) {
+					skip = true
+					break
+				}
+			}
+			if !skip {
+				filteredParts = append(filteredParts, part)
+			}
+		}
+		// If filteredParts not empty and different from original, add filtered query
+		if len(filteredParts) > 0 && len(filteredParts) != len(parts) {
+			filteredQuery := strings.Join(filteredParts, " ")
+			if len(filteredQuery) >= 5 {
+				variations = append(variations, filteredQuery)
+			}
+		}
+	}
 	if len(parts) >= 2 {
 		trimmed := strings.Join(parts[:len(parts)-1], " ")
 		if len(trimmed) >= 5 {
@@ -403,9 +436,14 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
 	})
 	unique := make([]models.VectorRow, 0)
 	seen := make(map[string]bool)
+	fileCounts := make(map[string]int)
 	for i := range scored {
 		if !seen[scored[i].row.Slug] {
+			if fileCounts[scored[i].row.FileName] >= 2 {
+				continue
+			}
 			seen[scored[i].row.Slug] = true
+			fileCounts[scored[i].row.FileName]++
 			unique = append(unique, scored[i].row)
 		}
 	}
diff --git a/rag/storage.go b/rag/storage.go
index 08e9d2a..110cea2 100644
--- a/rag/storage.go
+++ b/rag/storage.go
@@ -1,6 +1,7 @@
 package rag
 
 import (
+	"database/sql"
 	"encoding/binary"
 	"fmt"
 	"gf-lt/models"
@@ -221,11 +222,41 @@ func (vs *VectorStorage) SearchKeyword(query string, limit int) ([]models.Vector
 				 WHERE fts_embeddings MATCH ? 
 				 ORDER BY score 
 				 LIMIT ?`
+
+	// Try original query first
 	rows, err := vs.sqlxDB.Query(ftsQuery, query, limit)
 	if err != nil {
 		return nil, fmt.Errorf("FTS search failed: %w", err)
 	}
-	defer rows.Close()
+	results, err := vs.scanRows(rows)
+	rows.Close()
+	if err != nil {
+		return nil, err
+	}
+
+	// If no results and query contains multiple terms, try OR fallback
+	if len(results) == 0 && strings.Contains(query, " ") && !strings.Contains(strings.ToUpper(query), " OR ") {
+		// Build OR query: term1 OR term2 OR term3
+		terms := strings.Fields(query)
+		if len(terms) > 1 {
+			orQuery := strings.Join(terms, " OR ")
+			rows, err := vs.sqlxDB.Query(ftsQuery, orQuery, limit)
+			if err != nil {
+				// Return original empty results rather than error
+				return results, nil
+			}
+			orResults, err := vs.scanRows(rows)
+			rows.Close()
+			if err == nil {
+				results = orResults
+			}
+		}
+	}
+	return results, nil
+}
+
+// scanRows converts SQL rows to VectorRow slice
+func (vs *VectorStorage) scanRows(rows *sql.Rows) ([]models.VectorRow, error) {
 	var results []models.VectorRow
 	for rows.Next() {
 		var slug, rawText, fileName string
-- 
cgit v1.2.3


From edfd43c52ae3f2fa16f6ab5d64cb48218a2c0a64 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 13:45:12 +0300
Subject: Doc: update

---
 rag_issues.md | 105 +++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 75 insertions(+), 30 deletions(-)

diff --git a/rag_issues.md b/rag_issues.md
index d9578e4..e74417b 100644
--- a/rag_issues.md
+++ b/rag_issues.md
@@ -1,52 +1,82 @@
 # RAG Implementation Issues and Proposed Solutions
 
 ## Overview
-The current RAG system fails to retrieve relevant information for specific queries, as demonstrated by the inability to find the "two she bears" reference in the KJV Bible (2 Kings 2:23-24). While the system retrieves documents containing the word "bear", it misses the actual verse, indicating fundamental flaws in chunking, query processing, retrieval, and answer synthesis. Below we dissect each problem and propose concrete solutions.
+The current RAG system had several limitations preventing reliable retrieval across multiple documents. Initial tests showed failures for queries like "two she bears" (KJV Bible 2 Kings 2:23-24), where the system would retrieve documents containing "bear" but miss the specific verse due to issues with chunking, query processing, and retrieval.
+
+**Recent improvements** have addressed several key issues through targeted enhancements:
+
+1. **Chunk overlap**: Added configurable overlap (`RAGOverlapWords`) to preserve context across chunk boundaries
+2. **Hybrid retrieval**: Implemented FTS5 BM25 keyword search combined with embedding similarity via Reciprocal Rank Fusion (RRF)
+3. **Query refinement**: Enhanced stopword preservation for short queries and filename contamination filtering
+4. **Cross-document diversity**: Added per-file result caps to ensure multiple documents are represented
+5. **Robust FTS5 queries**: Added OR fallback for multi-term queries when AND logic fails
+
+**Result**: The system now successfully performs cross-document search, as demonstrated in `chat_exports/54_assistant.json` where queries like "Krahi Andrihee level" retrieve relevant information from both `ghost_7.txt` and `Overlord Volume 01 - The Undead King.epub`, handling LLM-injected terms like "Overlord" gracefully.
+
+Below we dissect each original problem, note implementation status, and describe the actual solutions deployed.
 
 ---
 
-### Problem 1: Chunking Destroys Semantic Coherence
+### Problem 1: Chunking Destroys Semantic Coherence [PARTIALLY ADDRESSED]
 - **Problem description**  
   The current chunking splits text into sentences and groups them by a simple word count threshold (`RAGWordLimit`). This ignores document structure (chapters, headings) and can cut through narrative units, scattering related content across multiple chunks. For the Bible query, the story of Elisha and the bears likely spans multiple verses; splitting it prevents any single chunk from containing the full context, diluting the embedding signal and making retrieval difficult.
 
-- **Proposed solution**  
+- **Implemented solution**  
+  - **Overlap between chunks**: Added `RAGOverlapWords` configuration (default 16 words) to `createChunks()` function, ensuring continuity across chunk boundaries. This preserves context for phrases that might be split, though full structure-aware chunking remains future work.
+  - The overlap mechanism calculates word-level overlap, skipping sentences as needed to achieve the configured overlap size while maintaining chunk size limits.
+
+- **Proposed solution (remaining)**  
   - **Structure-aware chunking**: Use the EPUB’s internal structure (chapters, sections) to create chunks that align with logical content units (e.g., by chapter or story).  
-  - **Overlap between chunks**: Add a configurable overlap (e.g., 10–20% of chunk size) to preserve continuity, ensuring key phrases like "two she bears" are not split across boundaries.  
   - **Rich metadata**: Store book name, chapter, and verse numbers with each chunk to enable filtering and source attribution.  
   - **Fallback to recursive splitting**: For documents without clear structure, use a recursive character text splitter with overlap (similar to LangChain’s `RecursiveCharacterTextSplitter`) to maintain semantic boundaries (paragraphs, sentences).
 
 ---
 
-### Problem 2: Query Refinement Strips Critical Context
+### Problem 2: Query Refinement Strips Critical Context [PARTIALLY ADDRESSED]
 - **Problem description**  
   `RefineQuery` removes stop words and applies keyword-based filtering that discards semantically important modifiers. For "two she bears", the word "she" (a gender modifier) may be treated as a stop word, leaving "two bears". This loses the specificity of the query and causes the embedding to drift toward generic "bear" contexts. The rule-based approach cannot understand that "she bears" is a key phrase in the biblical story.
 
-- **Proposed solution**  
+- **Implemented solution**  
+  - **Stopword preservation for short queries**: Modified `RefineQuery()` to skip stopword removal entirely for queries with fewer than 3 words. This preserves critical modifiers like "she" in "two she bears" while still cleaning longer, noisier queries.
+  - **Filename contamination filtering**: Extended `GenerateQueryVariations()` to filter out query terms that match loaded filenames (case-insensitive). This prevents LLM-injected terms like "Overlord" from contaminating queries when searching across documents.
+  - **Query variation generation**: Maintained existing variation generation (prefix/suffix trimming, adding "explanation", "details", etc.) to improve embedding alignment.
+
+- **Proposed solution (remaining)**  
   - **Entity-aware query preservation**: Use a lightweight NLP model (e.g., spaCy or a BERT-based NER tagger) to identify and retain key entities (quantities, animals, names) while only removing truly irrelevant stop words.  
   - **Intelligent query rewriting**: Employ a small LLM (or a set of transformation rules) to generate query variations that reflect likely biblical phrasing, e.g., "two bears came out of the wood" or "Elisha and the bears".  
-  - **Contextual stop word removal**: Instead of a static list, use a POS tagger to keep adjectives, nouns, and verbs while removing only function words that don't carry meaning.  
-  - **Disable refinement for short queries**: If the query is already concise (like "two she bears"), skip aggressive filtering.
+  - **Contextual stop word removal**: Instead of a static list, use a POS tagger to keep adjectives, nouns, and verbs while removing only function words that don't carry meaning.
 
 ---
 
-### Problem 3: Embedding Similarity Fails for Rare or Specific Phrases
+### Problem 3: Embedding Similarity Fails for Rare or Specific Phrases [ADDRESSED]
 - **Problem description**  
   Dense embeddings excel at capturing semantic similarity but can fail when the query contains rare phrases or when the relevant passage is embedded in a noisy chunk. The verse "there came forth two she bears out of the wood" shares only the word "bears" with the query, and its embedding may be pulled toward the average of surrounding verses. Consequently, the similarity score may be lower than that of other chunks containing the word "bear" in generic contexts.
 
-- **Proposed solution**  
-  - **Hybrid retrieval**: Combine dense embeddings with BM25 (keyword) search. BM25 excels at exact phrase matching and would likely retrieve the verse based on "two bears" even if the embedding is weak.  
-    - Use a library like [blevesearch](https://github.com/blevesearch/bleve) to index text alongside vectors.  
-    - Fuse results using Reciprocal Rank Fusion (RRF) or a weighted combination.  
-  - **Query expansion**: Add relevant terms to the query (e.g., "Elisha", "2 Kings") to improve embedding alignment.  
+- **Implemented solution**  
+  - **Hybrid retrieval (FTS5 + embeddings)**: 
+    - Created FTS5 virtual table (`fts_embeddings`) with porter stemmer tokenizer
+    - Implemented `SearchKeyword()` using BM25 ranking with proper score-to-distance conversion
+    - Combined embedding and keyword results using **Reciprocal Rank Fusion (RRF)** with k=60
+    - Results from both methods are deduplicated and scored jointly
+  - **Query variation expansion**: Enhanced `GenerateQueryVariations()` to produce multiple query forms (trimmed prefixes/suffixes, added "explanation"/"details"/"summary")
+  - **FTS5 OR fallback**: Modified `SearchKeyword()` to automatically retry with OR operator when AND query returns zero results, handling LLM-injected terms gracefully
+  - **Increased retrieval breadth**: Modified `SearchClosest()` to retrieve more candidates (limit × 2) before RRF fusion
+
+- **Proposed solution (remaining)**  
   - **Fine-tuned embeddings**: Consider using an embedding model fine-tuned on domain-specific data (e.g., biblical texts) if this is a recurring use case.  
 
 ---
 
-### Problem 4: Reranking Heuristics Are Insufficient
+### Problem 4: Reranking Heuristics Are Insufficient [PARTIALLY ADDRESSED]
 - **Problem description**  
   `RerankResults` boosts results based on simple keyword matching and file name heuristics. This coarse approach cannot reliably promote the correct verse over false positives. The adjustment `distance - score/100` is arbitrary and may not reflect true relevance.
 
-- **Proposed solution**  
+- **Implemented solution**  
+  - **Document diversity cap**: Modified `RerankResults()` to limit results to **maximum 2 per file**. This ensures cross-document representation and prevents single-document dominance in results.
+  - **Enhanced scoring**: Maintained existing keyword match scoring (exact query match +10, partial word matches proportional) but added per-file tracking to enforce diversity.
+  - **Result limiting**: Final results capped at 10 unique chunks after deduplication and file diversity enforcement.
+
+- **Proposed solution (remaining)**  
   - **Cross-encoder reranking**: After retrieving top candidates (e.g., top 20) with hybrid search, rerank them using a cross-encoder model that directly computes the relevance score between the query and each chunk.  
     - Models like `cross-encoder/ms-marco-MiniLM-L-6-v2` are lightweight and can be run locally or via a microservice.  
   - **Score normalization**: Use the cross-encoder scores to reorder results, discarding low-scoring ones.  
@@ -54,7 +84,7 @@ The current RAG system fails to retrieve relevant information for specific queri
 
 ---
 
-### Problem 5: Answer Synthesis Is Not Generative
+### Problem 5: Answer Synthesis Is Not Generative [NOT ADDRESSED]
 - **Problem description**  
   `SynthesizeAnswer` embeds a prompt and attempts to retrieve a pre-stored answer, falling back to concatenating truncated chunks. This is fundamentally flawed: RAG requires an LLM to generate a coherent answer from retrieved context. In the Bible example, even if the correct verse were retrieved, the system would only output a snippet, not an answer explaining the reference.
 
@@ -67,7 +97,7 @@ The current RAG system fails to retrieve relevant information for specific queri
 
 ---
 
-### Problem 6: Concurrency and Error Handling
+### Problem 6: Concurrency and Error Handling [NOT ADDRESSED]
 - **Problem description**  
   The code uses a mutex only in `LoadRAG`, leaving other methods vulnerable to race conditions. The global status channel `LongJobStatusCh` may drop messages due to `select/default`, and errors are sometimes logged but not propagated. Ingestion is synchronous and slow.
 
@@ -80,7 +110,7 @@ The current RAG system fails to retrieve relevant information for specific queri
 
 ---
 
-### Problem 7: Lack of Monitoring and Evaluation
+### Problem 7: Lack of Monitoring and Evaluation [NOT ADDRESSED]
 - **Problem description**  
   There are no metrics to track retrieval quality, latency, or user satisfaction. The failure case was discovered manually; without systematic evaluation, regressions will go unnoticed.
 
@@ -91,14 +121,29 @@ The current RAG system fails to retrieve relevant information for specific queri
 
 ---
 
-## Summary
-Fixing the RAG pipeline requires a multi-pronged approach:
-1. **Structure-aware chunking** with metadata.
-2. **Hybrid retrieval** (dense + sparse).
-3. **Query understanding** via entity preservation and intelligent rewriting.
-4. **Cross-encoder reranking** for precision.
-5. **LLM-based answer generation**.
-6. **Robust concurrency and error handling**.
-7. **Monitoring and evaluation** to track improvements.
-
-Implementing these changes will transform the system from a brittle keyword matcher into a reliable knowledge assistant capable of handling nuanced queries like the "two she bears" reference.
+## Summary & Current Status
+The original RAG improvement plan outlined 7 key areas. Recent implementation has partially or fully addressed several critical issues, particularly enabling successful cross-document search:
+
+### Implemented / Partially Addressed
+1. **✅ Hybrid retrieval (dense + sparse)** - FTS5 BM25 with RRF fusion, OR fallback for multi-term queries
+2. **🔄 Structure-aware chunking** - Overlap between chunks (`RAGOverlapWords`), though full structure-awareness pending
+3. **🔄 Query understanding** - Stopword preservation for short queries, filename contamination filtering
+4. **🔄 Cross-document diversity** - Per-file result caps (max 2 per document) in reranking
+
+### Key Achievements
+- **Cross-document search**: Queries like "Krahi Andrihee level" now retrieve relevant information from multiple loaded documents
+- **Robust query handling**: OR fallback handles LLM-injected terms (e.g., "Overlord" added to queries)
+- **Document diversity**: Prevents single-document dominance in results
+- **Context preservation**: Chunk overlap maintains semantic continuity
+
+### Remaining Work
+5. **LLM-based answer generation** - Still uses concatenation rather than generative synthesis
+6. **Robust concurrency and error handling** - Race conditions, synchronous ingestion, status channel issues
+7. **Monitoring and evaluation** - No metrics, logging, or systematic testing
+
+### Next Steps
+- Evaluate the improved system with the original "two she bears" test case
+- Consider adding cross-encoder reranking for higher precision
+- Address synthesis and concurrency issues as needed
+
+The system has evolved from a brittle keyword matcher toward a more reliable knowledge assistant, though significant improvements remain for production-grade robustness.
-- 
cgit v1.2.3


From 17b68bc21fae99c17ec48e046e67a643b9d159bb Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 18:58:23 +0300
Subject: Enha (rag): async writes

---
 rag/embedder.go    |   7 +-
 rag/rag.go         | 440 +++++++++++++++++++++++++++++++++++++++++++++--------
 rag/storage.go     |  86 +++++++++++
 storage/storage.go |  16 ++
 4 files changed, 480 insertions(+), 69 deletions(-)

diff --git a/rag/embedder.go b/rag/embedder.go
index 39f4b5c..fd4cfa7 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -11,6 +11,7 @@ import (
 	"net/http"
 	"os"
 	"sync"
+	"time"
 
 	"github.com/sugarme/tokenizer"
 	"github.com/sugarme/tokenizer/pretrained"
@@ -33,8 +34,10 @@ type APIEmbedder struct {
 func NewAPIEmbedder(l *slog.Logger, cfg *config.Config) *APIEmbedder {
 	return &APIEmbedder{
 		logger: l,
-		client: &http.Client{},
-		cfg:    cfg,
+		client: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		cfg: cfg,
 	}
 }
 
diff --git a/rag/rag.go b/rag/rag.go
index 9271b60..180ad50 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -1,6 +1,7 @@
 package rag
 
 import (
+	"context"
 	"errors"
 	"fmt"
 	"gf-lt/config"
@@ -9,6 +10,7 @@ import (
 	"log/slog"
 	"path"
 	"regexp"
+	"runtime"
 	"sort"
 	"strings"
 	"sync"
@@ -17,9 +19,14 @@ import (
 	"github.com/neurosnap/sentences/english"
 )
 
+const (
+	// batchTimeout is the maximum time allowed for embedding a single batch
+	batchTimeout = 2 * time.Minute
+)
+
 var (
 	// Status messages for TUI integration
-	LongJobStatusCh     = make(chan string, 10) // Increased buffer size to prevent blocking
+	LongJobStatusCh     = make(chan string, 100) // Increased buffer size for parallel batch updates
 	FinishedRAGStatus   = "finished loading RAG file; press Enter"
 	LoadedFileRAGStatus = "loaded file"
 	ErrRAGStatus        = "some error occurred; failed to transfer data to vector db"
@@ -31,12 +38,38 @@ type RAG struct {
 	cfg         *config.Config
 	embedder    Embedder
 	storage     *VectorStorage
-	mu          sync.Mutex
+	mu          sync.RWMutex
+	idleMu      sync.Mutex
 	fallbackMsg string
 	idleTimer   *time.Timer
 	idleTimeout time.Duration
 }
 
+// batchTask represents a single batch to be embedded
+type batchTask struct {
+	batchIndex   int
+	paragraphs   []string
+	filename     string
+	totalBatches int
+}
+
+// batchResult represents the result of embedding a batch
+type batchResult struct {
+	batchIndex int
+	embeddings [][]float32
+	paragraphs []string
+	filename   string
+}
+
+// sendStatusNonBlocking sends a status message without blocking
+func (r *RAG) sendStatusNonBlocking(status string) {
+	select {
+	case LongJobStatusCh <- status:
+	default:
+		r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", status)
+	}
+}
+
 func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) (*RAG, error) {
 	var embedder Embedder
 	var fallbackMsg string
@@ -142,18 +175,22 @@ func sanitizeFTSQuery(query string) string {
 }
 
 func (r *RAG) LoadRAG(fpath string) error {
+	return r.LoadRAGWithContext(context.Background(), fpath)
+}
+
+func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
+
 	fileText, err := ExtractText(fpath)
 	if err != nil {
 		return err
 	}
 	r.logger.Debug("rag: loaded file", "fp", fpath)
-	select {
-	case LongJobStatusCh <- LoadedFileRAGStatus:
-	default:
-		r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", LoadedFileRAGStatus)
-	}
+
+	// Send initial status (non-blocking with retry)
+	r.sendStatusNonBlocking(LoadedFileRAGStatus)
+
 	tokenizer, err := english.NewSentenceTokenizer(nil)
 	if err != nil {
 		return err
@@ -163,6 +200,7 @@ func (r *RAG) LoadRAG(fpath string) error {
 	for i, s := range sentences {
 		sents[i] = s.Text
 	}
+
 	// Create chunks with overlap
 	paragraphs := createChunks(sents, r.cfg.RAGWordLimit, r.cfg.RAGOverlapWords)
 	// Adjust batch size if needed
@@ -172,76 +210,332 @@ func (r *RAG) LoadRAG(fpath string) error {
 	if len(paragraphs) == 0 {
 		return errors.New("no valid paragraphs found in file")
 	}
-	// Process paragraphs in batches synchronously
-	batchCount := 0
-	for i := 0; i < len(paragraphs); i += r.cfg.RAGBatchSize {
-		end := i + r.cfg.RAGBatchSize
-		if end > len(paragraphs) {
-			end = len(paragraphs)
-		}
-		batch := paragraphs[i:end]
-		batchCount++
-		// Filter empty paragraphs
-		nonEmptyBatch := make([]string, 0, len(batch))
-		for _, p := range batch {
-			if strings.TrimSpace(p) != "" {
-				nonEmptyBatch = append(nonEmptyBatch, strings.TrimSpace(p))
+
+	totalBatches := (len(paragraphs) + r.cfg.RAGBatchSize - 1) / r.cfg.RAGBatchSize
+	r.logger.Debug("starting parallel embedding", "total_batches", totalBatches, "batch_size", r.cfg.RAGBatchSize)
+
+	// Determine concurrency level
+	concurrency := runtime.NumCPU()
+	if concurrency > totalBatches {
+		concurrency = totalBatches
+	}
+	if concurrency < 1 {
+		concurrency = 1
+	}
+	// If using ONNX embedder, limit concurrency to 1 due to mutex serialization
+	isONNX := false
+	if _, isONNX = r.embedder.(*ONNXEmbedder); isONNX {
+		concurrency = 1
+	}
+	embedderType := "API"
+	if isONNX {
+		embedderType = "ONNX"
+	}
+	r.logger.Debug("parallel embedding setup",
+		"total_batches", totalBatches,
+		"concurrency", concurrency,
+		"embedder", embedderType,
+		"batch_size", r.cfg.RAGBatchSize)
+
+	// Create context with timeout (30 minutes) and cancellation for error handling
+	ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
+	defer cancel()
+
+	// Channels for task distribution and results
+	taskCh := make(chan batchTask, totalBatches)
+	resultCh := make(chan batchResult, totalBatches)
+	errorCh := make(chan error, totalBatches)
+
+	// Start worker goroutines
+	var wg sync.WaitGroup
+	for w := 0; w < concurrency; w++ {
+		wg.Add(1)
+		go r.embeddingWorker(ctx, w, taskCh, resultCh, errorCh, &wg)
+	}
+
+	// Close task channel after all tasks are sent (by separate goroutine)
+	go func() {
+		// Ensure task channel is closed when this goroutine exits
+		defer close(taskCh)
+		r.logger.Debug("task distributor started", "total_batches", totalBatches)
+
+		for i := 0; i < totalBatches; i++ {
+			start := i * r.cfg.RAGBatchSize
+			end := start + r.cfg.RAGBatchSize
+			if end > len(paragraphs) {
+				end = len(paragraphs)
 			}
-		}
-		if len(nonEmptyBatch) == 0 {
-			continue
-		}
-		// Embed the batch
-		embeddings, err := r.embedder.EmbedSlice(nonEmptyBatch)
-		if err != nil {
-			r.logger.Error("failed to embed batch", "error", err, "batch", batchCount)
+			batch := paragraphs[start:end]
+
+			// Filter empty paragraphs
+			nonEmptyBatch := make([]string, 0, len(batch))
+			for _, p := range batch {
+				if strings.TrimSpace(p) != "" {
+					nonEmptyBatch = append(nonEmptyBatch, strings.TrimSpace(p))
+				}
+			}
+
+			task := batchTask{
+				batchIndex:   i,
+				paragraphs:   nonEmptyBatch,
+				filename:     path.Base(fpath),
+				totalBatches: totalBatches,
+			}
+
 			select {
-			case LongJobStatusCh <- ErrRAGStatus:
-			default:
-				r.logger.Warn("LongJobStatusCh channel full, dropping message")
+			case taskCh <- task:
+				r.logger.Debug("task distributor sent batch", "batch", i, "paragraphs", len(nonEmptyBatch))
+			case <-ctx.Done():
+				r.logger.Debug("task distributor cancelled", "batches_sent", i+1, "total_batches", totalBatches)
+				return
 			}
-			return fmt.Errorf("failed to embed batch %d: %w", batchCount, err)
-		}
-		if len(embeddings) != len(nonEmptyBatch) {
-			err := errors.New("embedding count mismatch")
-			r.logger.Error("embedding mismatch", "expected", len(nonEmptyBatch), "got", len(embeddings))
-			return err
-		}
-		// Write vectors to storage
-		filename := path.Base(fpath)
-		for j, text := range nonEmptyBatch {
-			vector := models.VectorRow{
-				Embeddings: embeddings[j],
-				RawText:    text,
-				Slug:       fmt.Sprintf("%s_%d_%d", filename, batchCount, j),
-				FileName:   filename,
+		}
+		r.logger.Debug("task distributor finished", "batches_sent", totalBatches)
+	}()
+
+	// Wait for workers to finish and close result channel
+	go func() {
+		wg.Wait()
+		close(resultCh)
+	}()
+
+	// Process results in order and write to database
+	nextExpectedBatch := 0
+	resultsBuffer := make(map[int]batchResult)
+	filename := path.Base(fpath)
+	batchesProcessed := 0
+
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+
+		case err := <-errorCh:
+			// First error from any worker, cancel everything
+			cancel()
+			r.logger.Error("embedding worker failed", "error", err)
+			r.sendStatusNonBlocking(ErrRAGStatus)
+			return fmt.Errorf("embedding failed: %w", err)
+
+		case result, ok := <-resultCh:
+			if !ok {
+				// All results processed
+				resultCh = nil
+				r.logger.Debug("result channel closed", "batches_processed", batchesProcessed, "total_batches", totalBatches)
+				continue
+			}
+
+			// Store result in buffer
+			resultsBuffer[result.batchIndex] = result
+
+			// Process buffered results in order
+			for {
+				if res, exists := resultsBuffer[nextExpectedBatch]; exists {
+					// Write this batch to database
+					if err := r.writeBatchToStorage(ctx, res, filename); err != nil {
+						cancel()
+						return err
+					}
+
+					batchesProcessed++
+					// Send progress update
+					statusMsg := fmt.Sprintf("processed batch %d/%d", batchesProcessed, totalBatches)
+					r.sendStatusNonBlocking(statusMsg)
+
+					delete(resultsBuffer, nextExpectedBatch)
+					nextExpectedBatch++
+				} else {
+					break
+				}
 			}
-			if err := r.storage.WriteVector(&vector); err != nil {
-				r.logger.Error("failed to write vector to DB", "error", err, "slug", vector.Slug)
+
+		default:
+			// No channels ready, check for deadlock conditions
+			if resultCh == nil && nextExpectedBatch < totalBatches {
+				// Missing batch results after result channel closed
+				r.logger.Error("missing batch results",
+					"expected", totalBatches,
+					"received", nextExpectedBatch,
+					"missing", totalBatches-nextExpectedBatch)
+
+				// Wait a short time for any delayed errors, then cancel
 				select {
-				case LongJobStatusCh <- ErrRAGStatus:
-				default:
-					r.logger.Warn("LongJobStatusCh channel full, dropping message")
+				case <-time.After(5 * time.Second):
+					cancel()
+					return fmt.Errorf("missing batch results: expected %d, got %d", totalBatches, nextExpectedBatch)
+				case <-ctx.Done():
+					return ctx.Err()
+				case err := <-errorCh:
+					cancel()
+					r.logger.Error("embedding worker failed after result channel closed", "error", err)
+					r.sendStatusNonBlocking(ErrRAGStatus)
+					return fmt.Errorf("embedding failed: %w", err)
 				}
-				return fmt.Errorf("failed to write vector: %w", err)
+			}
+			// If we reach here, no deadlock yet, just busy loop prevention
+			time.Sleep(100 * time.Millisecond)
+		}
+
+		// Check if we're done
+		if resultCh == nil && nextExpectedBatch >= totalBatches {
+			r.logger.Debug("all batches processed successfully", "total", totalBatches)
+			break
+		}
+	}
+
+	r.logger.Debug("finished writing vectors", "batches", batchesProcessed)
+	r.resetIdleTimer()
+	r.sendStatusNonBlocking(FinishedRAGStatus)
+	return nil
+}
+
+// embeddingWorker processes batch embedding tasks
+func (r *RAG) embeddingWorker(ctx context.Context, workerID int, taskCh <-chan batchTask, resultCh chan<- batchResult, errorCh chan<- error, wg *sync.WaitGroup) {
+	defer wg.Done()
+	r.logger.Debug("embedding worker started", "worker", workerID)
+
+	// Panic recovery to ensure worker doesn't crash silently
+	defer func() {
+		if rec := recover(); rec != nil {
+			r.logger.Error("embedding worker panicked", "worker", workerID, "panic", rec)
+			// Try to send error, but don't block if channel is full
+			select {
+			case errorCh <- fmt.Errorf("worker %d panicked: %v", workerID, rec):
+			default:
+				r.logger.Warn("error channel full, dropping panic error", "worker", workerID)
 			}
 		}
-		r.logger.Debug("wrote batch to db", "batch", batchCount, "size", len(nonEmptyBatch))
-		// Send progress status
-		statusMsg := fmt.Sprintf("processed batch %d/%d", batchCount, (len(paragraphs)+r.cfg.RAGBatchSize-1)/r.cfg.RAGBatchSize)
+	}()
+
+	for task := range taskCh {
 		select {
-		case LongJobStatusCh <- statusMsg:
+		case <-ctx.Done():
+			r.logger.Debug("embedding worker cancelled", "worker", workerID)
+			return
 		default:
-			r.logger.Warn("LongJobStatusCh channel full, dropping message")
 		}
+		r.logger.Debug("worker processing batch", "worker", workerID, "batch", task.batchIndex, "paragraphs", len(task.paragraphs), "total_batches", task.totalBatches)
+
+		// Skip empty batches
+		if len(task.paragraphs) == 0 {
+			select {
+			case resultCh <- batchResult{
+				batchIndex: task.batchIndex,
+				embeddings: nil,
+				paragraphs: nil,
+				filename:   task.filename,
+			}:
+			case <-ctx.Done():
+				r.logger.Debug("embedding worker cancelled while sending empty batch", "worker", workerID)
+				return
+			}
+			r.logger.Debug("worker sent empty batch", "worker", workerID, "batch", task.batchIndex)
+			continue
+		}
+
+		// Embed with retry for API embedder
+		embeddings, err := r.embedWithRetry(ctx, task.paragraphs, 3)
+		if err != nil {
+			// Try to send error, but don't block indefinitely
+			select {
+			case errorCh <- fmt.Errorf("worker %d batch %d: %w", workerID, task.batchIndex, err):
+			case <-ctx.Done():
+				r.logger.Debug("embedding worker cancelled while sending error", "worker", workerID)
+			}
+			return
+		}
+
+		// Send result with context awareness
+		select {
+		case resultCh <- batchResult{
+			batchIndex: task.batchIndex,
+			embeddings: embeddings,
+			paragraphs: task.paragraphs,
+			filename:   task.filename,
+		}:
+		case <-ctx.Done():
+			r.logger.Debug("embedding worker cancelled while sending result", "worker", workerID)
+			return
+		}
+		r.logger.Debug("worker completed batch", "worker", workerID, "batch", task.batchIndex, "embeddings", len(embeddings))
 	}
-	r.logger.Debug("finished writing vectors", "batches", batchCount)
-	r.resetIdleTimer()
+	r.logger.Debug("embedding worker finished", "worker", workerID)
+}
+
+// embedWithRetry attempts embedding with exponential backoff for API embedder
+func (r *RAG) embedWithRetry(ctx context.Context, paragraphs []string, maxRetries int) ([][]float32, error) {
+	var lastErr error
+
+	for attempt := 0; attempt < maxRetries; attempt++ {
+		if attempt > 0 {
+			// Exponential backoff
+			backoff := time.Duration(attempt*attempt) * time.Second
+			if backoff > 10*time.Second {
+				backoff = 10 * time.Second
+			}
+
+			select {
+			case <-time.After(backoff):
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			}
+
+			r.logger.Debug("retrying embedding", "attempt", attempt, "max_retries", maxRetries)
+		}
+
+		embeddings, err := r.embedder.EmbedSlice(paragraphs)
+		if err == nil {
+			// Validate embedding count
+			if len(embeddings) != len(paragraphs) {
+				return nil, fmt.Errorf("embedding count mismatch: expected %d, got %d", len(paragraphs), len(embeddings))
+			}
+			return embeddings, nil
+		}
+
+		lastErr = err
+		// Only retry for API embedder errors (network/timeout)
+		// For ONNX embedder, fail fast
+		if _, isAPI := r.embedder.(*APIEmbedder); !isAPI {
+			break
+		}
+	}
+
+	return nil, fmt.Errorf("embedding failed after %d attempts: %w", maxRetries, lastErr)
+}
+
+// writeBatchToStorage writes a single batch of vectors to the database
+func (r *RAG) writeBatchToStorage(ctx context.Context, result batchResult, filename string) error {
+	if len(result.embeddings) == 0 {
+		// Empty batch, skip
+		return nil
+	}
+
+	// Check context before starting
 	select {
-	case LongJobStatusCh <- FinishedRAGStatus:
+	case <-ctx.Done():
+		return ctx.Err()
 	default:
-		r.logger.Warn("LongJobStatusCh channel is full or closed, dropping status message", "message", FinishedRAGStatus)
 	}
+
+	// Build all vectors for batch write
+	vectors := make([]*models.VectorRow, 0, len(result.paragraphs))
+	for j, text := range result.paragraphs {
+		vectors = append(vectors, &models.VectorRow{
+			Embeddings: result.embeddings[j],
+			RawText:    text,
+			Slug:       fmt.Sprintf("%s_%d_%d", filename, result.batchIndex+1, j),
+			FileName:   filename,
+		})
+	}
+
+	// Write all vectors in a single transaction
+	if err := r.storage.WriteVectors(vectors); err != nil {
+		r.logger.Error("failed to write vectors batch to DB", "error", err, "batch", result.batchIndex+1, "size", len(vectors))
+		r.sendStatusNonBlocking(ErrRAGStatus)
+		return fmt.Errorf("failed to write vectors batch: %w", err)
+	}
+
+	r.logger.Debug("wrote batch to db", "batch", result.batchIndex+1, "size", len(result.paragraphs))
 	return nil
 }
 
@@ -250,22 +544,26 @@ func (r *RAG) LineToVector(line string) ([]float32, error) {
 	return r.embedder.Embed(line)
 }
 
-func (r *RAG) SearchEmb(emb *models.EmbeddingResp, limit int) ([]models.VectorRow, error) {
+func (r *RAG) searchEmb(emb *models.EmbeddingResp, limit int) ([]models.VectorRow, error) {
 	r.resetIdleTimer()
 	return r.storage.SearchClosest(emb.Embedding, limit)
 }
 
-func (r *RAG) SearchKeyword(query string, limit int) ([]models.VectorRow, error) {
+func (r *RAG) searchKeyword(query string, limit int) ([]models.VectorRow, error) {
 	r.resetIdleTimer()
 	sanitized := sanitizeFTSQuery(query)
 	return r.storage.SearchKeyword(sanitized, limit)
 }
 
 func (r *RAG) ListLoaded() ([]string, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
 	return r.storage.ListFiles()
 }
 
 func (r *RAG) RemoveFile(filename string) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
 	r.resetIdleTimer()
 	return r.storage.RemoveEmbByFileName(filename)
 }
@@ -454,6 +752,9 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
 }
 
 func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	r.resetIdleTimer()
 	if len(results) == 0 {
 		return "No relevant information found in the vector database.", nil
 	}
@@ -482,7 +783,7 @@ func (r *RAG) SynthesizeAnswer(results []models.VectorRow, query string) (string
 		Embedding: emb,
 		Index:     0,
 	}
-	topResults, err := r.SearchEmb(embResp, 1)
+	topResults, err := r.searchEmb(embResp, 1)
 	if err != nil {
 		r.logger.Error("failed to search for synthesis context", "error", err)
 		return "", err
@@ -509,6 +810,9 @@ func truncateString(s string, maxLen int) string {
 }
 
 func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	r.resetIdleTimer()
 	refined := r.RefineQuery(query)
 	variations := r.GenerateQueryVariations(refined)
 
@@ -525,7 +829,7 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 			Embedding: emb,
 			Index:     0,
 		}
-		results, err := r.SearchEmb(embResp, limit*2) // Get more candidates
+		results, err := r.searchEmb(embResp, limit*2) // Get more candidates
 		if err != nil {
 			r.logger.Error("failed to search embeddings", "error", err, "query", q)
 			continue
@@ -543,7 +847,7 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 	})
 
 	// Perform keyword search
-	kwResults, err := r.SearchKeyword(refined, limit*2)
+	kwResults, err := r.searchKeyword(refined, limit*2)
 	if err != nil {
 		r.logger.Warn("keyword search failed, using only embeddings", "error", err)
 		kwResults = nil
@@ -621,6 +925,8 @@ func GetInstance() *RAG {
 }
 
 func (r *RAG) resetIdleTimer() {
+	r.idleMu.Lock()
+	defer r.idleMu.Unlock()
 	if r.idleTimer != nil {
 		r.idleTimer.Stop()
 	}
diff --git a/rag/storage.go b/rag/storage.go
index 110cea2..1e6b013 100644
--- a/rag/storage.go
+++ b/rag/storage.go
@@ -102,6 +102,92 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
 	return nil
 }
 
+// WriteVectors stores multiple embedding vectors in a single transaction
+func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
+	if len(rows) == 0 {
+		return nil
+	}
+	// SQLite has limit of 999 parameters per statement, each row uses 4 parameters
+	const maxBatchSize = 200 // 200 * 4 = 800 < 999
+	if len(rows) > maxBatchSize {
+		// Process in chunks
+		for i := 0; i < len(rows); i += maxBatchSize {
+			end := i + maxBatchSize
+			if end > len(rows) {
+				end = len(rows)
+			}
+			if err := vs.WriteVectors(rows[i:end]); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+	// All rows should have same embedding size (same model)
+	firstSize := len(rows[0].Embeddings)
+	for i, row := range rows {
+		if len(row.Embeddings) != firstSize {
+			return fmt.Errorf("embedding size mismatch: row %d has size %d, expected %d", i, len(row.Embeddings), firstSize)
+		}
+	}
+	tableName, err := vs.getTableName(rows[0].Embeddings)
+	if err != nil {
+		return err
+	}
+
+	// Start transaction
+	tx, err := vs.sqlxDB.Beginx()
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err != nil {
+			tx.Rollback()
+		}
+	}()
+
+	// Build batch insert for embeddings table
+	embeddingPlaceholders := make([]string, 0, len(rows))
+	embeddingArgs := make([]any, 0, len(rows)*4)
+	for _, row := range rows {
+		embeddingPlaceholders = append(embeddingPlaceholders, "(?, ?, ?, ?)")
+		embeddingArgs = append(embeddingArgs, SerializeVector(row.Embeddings), row.Slug, row.RawText, row.FileName)
+	}
+	embeddingQuery := fmt.Sprintf(
+		"INSERT INTO %s (embeddings, slug, raw_text, filename) VALUES %s",
+		tableName,
+		strings.Join(embeddingPlaceholders, ", "),
+	)
+	if _, err := tx.Exec(embeddingQuery, embeddingArgs...); err != nil {
+		vs.logger.Error("failed to write vectors batch", "error", err, "batch_size", len(rows))
+		return err
+	}
+
+	// Build batch insert for FTS table
+	ftsPlaceholders := make([]string, 0, len(rows))
+	ftsArgs := make([]any, 0, len(rows)*4)
+	embeddingSize := len(rows[0].Embeddings)
+	for _, row := range rows {
+		ftsPlaceholders = append(ftsPlaceholders, "(?, ?, ?, ?)")
+		ftsArgs = append(ftsArgs, row.Slug, row.RawText, row.FileName, embeddingSize)
+	}
+	ftsQuery := fmt.Sprintf(
+		"INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES %s",
+		strings.Join(ftsPlaceholders, ", "),
+	)
+	if _, err := tx.Exec(ftsQuery, ftsArgs...); err != nil {
+		vs.logger.Error("failed to write FTS batch", "error", err, "batch_size", len(rows))
+		return err
+	}
+
+	err = tx.Commit()
+	if err != nil {
+		vs.logger.Error("failed to commit transaction", "error", err)
+		return err
+	}
+	vs.logger.Debug("wrote vectors batch", "batch_size", len(rows))
+	return nil
+}
+
 // getTableName determines which table to use based on embedding size
 func (vs *VectorStorage) getTableName(emb []float32) (string, error) {
 	size := len(emb)
diff --git a/storage/storage.go b/storage/storage.go
index 9ad9745..57631da 100644
--- a/storage/storage.go
+++ b/storage/storage.go
@@ -102,6 +102,22 @@ func NewProviderSQL(dbPath string, logger *slog.Logger) FullRepo {
 		logger.Error("failed to open db connection", "error", err)
 		return nil
 	}
+	// Enable WAL mode for better concurrency and performance
+	if _, err := db.Exec("PRAGMA journal_mode = WAL;"); err != nil {
+		logger.Warn("failed to enable WAL mode", "error", err)
+	}
+	if _, err := db.Exec("PRAGMA synchronous = NORMAL;"); err != nil {
+		logger.Warn("failed to set synchronous mode", "error", err)
+	}
+	// Increase cache size for better performance
+	if _, err := db.Exec("PRAGMA cache_size = -2000;"); err != nil {
+		logger.Warn("failed to set cache size", "error", err)
+	}
+	// Log actual journal mode for debugging
+	var journalMode string
+	if err := db.QueryRow("PRAGMA journal_mode;").Scan(&journalMode); err == nil {
+		logger.Debug("SQLite journal mode", "mode", journalMode)
+	}
 	p := ProviderSQL{db: db, logger: logger}
 	if err := p.Migrate(); err != nil {
 		logger.Error("migration failed, app cannot start", "error", err)
-- 
cgit v1.2.3


From 5f273681df9ab45fc62c863ef3259cc9ac2375a1 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 19:03:26 +0300
Subject: Chore: remove plan doc

---
 rag_issues.md | 149 ----------------------------------------------------------
 1 file changed, 149 deletions(-)
 delete mode 100644 rag_issues.md

diff --git a/rag_issues.md b/rag_issues.md
deleted file mode 100644
index e74417b..0000000
--- a/rag_issues.md
+++ /dev/null
@@ -1,149 +0,0 @@
-# RAG Implementation Issues and Proposed Solutions
-
-## Overview
-The current RAG system had several limitations preventing reliable retrieval across multiple documents. Initial tests showed failures for queries like "two she bears" (KJV Bible 2 Kings 2:23-24), where the system would retrieve documents containing "bear" but miss the specific verse due to issues with chunking, query processing, and retrieval.
-
-**Recent improvements** have addressed several key issues through targeted enhancements:
-
-1. **Chunk overlap**: Added configurable overlap (`RAGOverlapWords`) to preserve context across chunk boundaries
-2. **Hybrid retrieval**: Implemented FTS5 BM25 keyword search combined with embedding similarity via Reciprocal Rank Fusion (RRF)
-3. **Query refinement**: Enhanced stopword preservation for short queries and filename contamination filtering
-4. **Cross-document diversity**: Added per-file result caps to ensure multiple documents are represented
-5. **Robust FTS5 queries**: Added OR fallback for multi-term queries when AND logic fails
-
-**Result**: The system now successfully performs cross-document search, as demonstrated in `chat_exports/54_assistant.json` where queries like "Krahi Andrihee level" retrieve relevant information from both `ghost_7.txt` and `Overlord Volume 01 - The Undead King.epub`, handling LLM-injected terms like "Overlord" gracefully.
-
-Below we dissect each original problem, note implementation status, and describe the actual solutions deployed.
-
----
-
-### Problem 1: Chunking Destroys Semantic Coherence [PARTIALLY ADDRESSED]
-- **Problem description**  
-  The current chunking splits text into sentences and groups them by a simple word count threshold (`RAGWordLimit`). This ignores document structure (chapters, headings) and can cut through narrative units, scattering related content across multiple chunks. For the Bible query, the story of Elisha and the bears likely spans multiple verses; splitting it prevents any single chunk from containing the full context, diluting the embedding signal and making retrieval difficult.
-
-- **Implemented solution**  
-  - **Overlap between chunks**: Added `RAGOverlapWords` configuration (default 16 words) to `createChunks()` function, ensuring continuity across chunk boundaries. This preserves context for phrases that might be split, though full structure-aware chunking remains future work.
-  - The overlap mechanism calculates word-level overlap, skipping sentences as needed to achieve the configured overlap size while maintaining chunk size limits.
-
-- **Proposed solution (remaining)**  
-  - **Structure-aware chunking**: Use the EPUB’s internal structure (chapters, sections) to create chunks that align with logical content units (e.g., by chapter or story).  
-  - **Rich metadata**: Store book name, chapter, and verse numbers with each chunk to enable filtering and source attribution.  
-  - **Fallback to recursive splitting**: For documents without clear structure, use a recursive character text splitter with overlap (similar to LangChain’s `RecursiveCharacterTextSplitter`) to maintain semantic boundaries (paragraphs, sentences).
-
----
-
-### Problem 2: Query Refinement Strips Critical Context [PARTIALLY ADDRESSED]
-- **Problem description**  
-  `RefineQuery` removes stop words and applies keyword-based filtering that discards semantically important modifiers. For "two she bears", the word "she" (a gender modifier) may be treated as a stop word, leaving "two bears". This loses the specificity of the query and causes the embedding to drift toward generic "bear" contexts. The rule-based approach cannot understand that "she bears" is a key phrase in the biblical story.
-
-- **Implemented solution**  
-  - **Stopword preservation for short queries**: Modified `RefineQuery()` to skip stopword removal entirely for queries with fewer than 3 words. This preserves critical modifiers like "she" in "two she bears" while still cleaning longer, noisier queries.
-  - **Filename contamination filtering**: Extended `GenerateQueryVariations()` to filter out query terms that match loaded filenames (case-insensitive). This prevents LLM-injected terms like "Overlord" from contaminating queries when searching across documents.
-  - **Query variation generation**: Maintained existing variation generation (prefix/suffix trimming, adding "explanation", "details", etc.) to improve embedding alignment.
-
-- **Proposed solution (remaining)**  
-  - **Entity-aware query preservation**: Use a lightweight NLP model (e.g., spaCy or a BERT-based NER tagger) to identify and retain key entities (quantities, animals, names) while only removing truly irrelevant stop words.  
-  - **Intelligent query rewriting**: Employ a small LLM (or a set of transformation rules) to generate query variations that reflect likely biblical phrasing, e.g., "two bears came out of the wood" or "Elisha and the bears".  
-  - **Contextual stop word removal**: Instead of a static list, use a POS tagger to keep adjectives, nouns, and verbs while removing only function words that don't carry meaning.
-
----
-
-### Problem 3: Embedding Similarity Fails for Rare or Specific Phrases [ADDRESSED]
-- **Problem description**  
-  Dense embeddings excel at capturing semantic similarity but can fail when the query contains rare phrases or when the relevant passage is embedded in a noisy chunk. The verse "there came forth two she bears out of the wood" shares only the word "bears" with the query, and its embedding may be pulled toward the average of surrounding verses. Consequently, the similarity score may be lower than that of other chunks containing the word "bear" in generic contexts.
-
-- **Implemented solution**  
-  - **Hybrid retrieval (FTS5 + embeddings)**: 
-    - Created FTS5 virtual table (`fts_embeddings`) with porter stemmer tokenizer
-    - Implemented `SearchKeyword()` using BM25 ranking with proper score-to-distance conversion
-    - Combined embedding and keyword results using **Reciprocal Rank Fusion (RRF)** with k=60
-    - Results from both methods are deduplicated and scored jointly
-  - **Query variation expansion**: Enhanced `GenerateQueryVariations()` to produce multiple query forms (trimmed prefixes/suffixes, added "explanation"/"details"/"summary")
-  - **FTS5 OR fallback**: Modified `SearchKeyword()` to automatically retry with OR operator when AND query returns zero results, handling LLM-injected terms gracefully
-  - **Increased retrieval breadth**: Modified `SearchClosest()` to retrieve more candidates (limit × 2) before RRF fusion
-
-- **Proposed solution (remaining)**  
-  - **Fine-tuned embeddings**: Consider using an embedding model fine-tuned on domain-specific data (e.g., biblical texts) if this is a recurring use case.  
-
----
-
-### Problem 4: Reranking Heuristics Are Insufficient [PARTIALLY ADDRESSED]
-- **Problem description**  
-  `RerankResults` boosts results based on simple keyword matching and file name heuristics. This coarse approach cannot reliably promote the correct verse over false positives. The adjustment `distance - score/100` is arbitrary and may not reflect true relevance.
-
-- **Implemented solution**  
-  - **Document diversity cap**: Modified `RerankResults()` to limit results to **maximum 2 per file**. This ensures cross-document representation and prevents single-document dominance in results.
-  - **Enhanced scoring**: Maintained existing keyword match scoring (exact query match +10, partial word matches proportional) but added per-file tracking to enforce diversity.
-  - **Result limiting**: Final results capped at 10 unique chunks after deduplication and file diversity enforcement.
-
-- **Proposed solution (remaining)**  
-  - **Cross-encoder reranking**: After retrieving top candidates (e.g., top 20) with hybrid search, rerank them using a cross-encoder model that directly computes the relevance score between the query and each chunk.  
-    - Models like `cross-encoder/ms-marco-MiniLM-L-6-v2` are lightweight and can be run locally or via a microservice.  
-  - **Score normalization**: Use the cross-encoder scores to reorder results, discarding low-scoring ones.  
-  - **Contextual boosting**: If metadata (e.g., chapter/verse) is available, boost results that match the query’s expected location (if inferable).  
-
----
-
-### Problem 5: Answer Synthesis Is Not Generative [NOT ADDRESSED]
-- **Problem description**  
-  `SynthesizeAnswer` embeds a prompt and attempts to retrieve a pre-stored answer, falling back to concatenating truncated chunks. This is fundamentally flawed: RAG requires an LLM to generate a coherent answer from retrieved context. In the Bible example, even if the correct verse were retrieved, the system would only output a snippet, not an answer explaining the reference.
-
-- **Proposed solution**  
-  - **Integrate an LLM for generation**: Use a local model (via Ollama, Llama.cpp) or a cloud API (OpenAI, etc.) to synthesize answers.  
-    - Construct a prompt that includes the retrieved chunks (with metadata) and the user query.  
-    - Instruct the model to answer based solely on the provided context and cite sources (e.g., "According to 2 Kings 2:24...").  
-  - **Implement a fallback**: If no relevant chunks are retrieved, return a message like "I couldn't find that information in your documents."  
-  - **Streaming support**: For better UX, stream the answer token-by-token.  
-
----
-
-### Problem 6: Concurrency and Error Handling [NOT ADDRESSED]
-- **Problem description**  
-  The code uses a mutex only in `LoadRAG`, leaving other methods vulnerable to race conditions. The global status channel `LongJobStatusCh` may drop messages due to `select/default`, and errors are sometimes logged but not propagated. Ingestion is synchronous and slow.
-
-- **Proposed solution**  
-  - **Add context support**: Pass `context.Context` to all methods to allow cancellation and timeouts.  
-  - **Worker pools for embedding**: Parallelize batch embedding with a controlled number of workers to respect API rate limits and speed up ingestion.  
-  - **Retry logic**: Implement exponential backoff for transient API errors.  
-  - **Replace global channel**: Use a callback or an injectable status reporter to avoid dropping messages.  
-  - **Fine-grained locking**: Protect shared state (e.g., `storage`) with appropriate synchronization.  
-
----
-
-### Problem 7: Lack of Monitoring and Evaluation [NOT ADDRESSED]
-- **Problem description**  
-  There are no metrics to track retrieval quality, latency, or user satisfaction. The failure case was discovered manually; without systematic evaluation, regressions will go unnoticed.
-
-- **Proposed solution**  
-  - **Log key metrics**: Record query, retrieved chunk IDs, scores, and latency for each search.  
-  - **User feedback**: Add a mechanism for users to rate answers (thumbs up/down) and use this data to improve retrieval.  
-  - **Offline evaluation**: Create a test set of queries and expected relevant chunks (e.g., the Bible example) to measure recall@k, MRR, etc., and run it after each change.  
-
----
-
-## Summary & Current Status
-The original RAG improvement plan outlined 7 key areas. Recent implementation has partially or fully addressed several critical issues, particularly enabling successful cross-document search:
-
-### Implemented / Partially Addressed
-1. **✅ Hybrid retrieval (dense + sparse)** - FTS5 BM25 with RRF fusion, OR fallback for multi-term queries
-2. **🔄 Structure-aware chunking** - Overlap between chunks (`RAGOverlapWords`), though full structure-awareness pending
-3. **🔄 Query understanding** - Stopword preservation for short queries, filename contamination filtering
-4. **🔄 Cross-document diversity** - Per-file result caps (max 2 per document) in reranking
-
-### Key Achievements
-- **Cross-document search**: Queries like "Krahi Andrihee level" now retrieve relevant information from multiple loaded documents
-- **Robust query handling**: OR fallback handles LLM-injected terms (e.g., "Overlord" added to queries)
-- **Document diversity**: Prevents single-document dominance in results
-- **Context preservation**: Chunk overlap maintains semantic continuity
-
-### Remaining Work
-5. **LLM-based answer generation** - Still uses concatenation rather than generative synthesis
-6. **Robust concurrency and error handling** - Race conditions, synchronous ingestion, status channel issues
-7. **Monitoring and evaluation** - No metrics, logging, or systematic testing
-
-### Next Steps
-- Evaluate the improved system with the original "two she bears" test case
-- Consider adding cross-encoder reranking for higher precision
-- Address synthesis and concurrency issues as needed
-
-The system has evolved from a brittle keyword matcher toward a more reliable knowledge assistant, though significant improvements remain for production-grade robustness.
-- 
cgit v1.2.3


From 014e297ae3497d07b5c46c234a9157db8dfce198 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Fri, 6 Mar 2026 19:57:44 +0300
Subject: Chore: linter complaints

---
 .gitignore      |  2 ++
 rag/embedder.go | 13 ++++++-------
 rag/rag.go      | 26 ++------------------------
 rag/storage.go  | 20 ++++----------------
 4 files changed, 14 insertions(+), 47 deletions(-)

diff --git a/.gitignore b/.gitignore
index 15b83b4..b3baaec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@
 testlog
 history/
 *.db
+*.db-shm
+*.db-wal
 config.toml
 sysprompts/*
 !sysprompts/alice_bob_carl.json
diff --git a/rag/embedder.go b/rag/embedder.go
index fd4cfa7..5a4aae0 100644
--- a/rag/embedder.go
+++ b/rag/embedder.go
@@ -213,7 +213,6 @@ func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Log
 	if cudaLibPath == "" {
 		fmt.Println("WARNING: CUDA provider library not found, will use CPU")
 	}
-
 	emb := &ONNXEmbedder{
 		tokenizerPath: tokenizerPath,
 		dims:          dims,
@@ -232,7 +231,6 @@ func (e *ONNXEmbedder) ensureInitialized() error {
 	if e.session != nil {
 		return nil
 	}
-
 	// Load tokenizer lazily
 	if e.tokenizer == nil {
 		tok, err := pretrained.FromFile(e.tokenizerPath)
@@ -241,7 +239,6 @@ func (e *ONNXEmbedder) ensureInitialized() error {
 		}
 		e.tokenizer = tok
 	}
-
 	onnxInitOnce.Do(func() {
 		onnxruntime_go.SetSharedLibraryPath(onnxLibPath)
 		if err := onnxruntime_go.InitializeEnvironment(); err != nil {
@@ -260,13 +257,14 @@ func (e *ONNXEmbedder) ensureInitialized() error {
 	if !onnxReady {
 		return errors.New("ONNX runtime not ready")
 	}
-
 	// Create session options
 	opts, err := onnxruntime_go.NewSessionOptions()
 	if err != nil {
 		return fmt.Errorf("failed to create session options: %w", err)
 	}
-	defer opts.Destroy()
+	defer func() {
+		_ = opts.Destroy()
+	}()
 
 	// Try to add CUDA provider
 	useCUDA := cudaLibPath != ""
@@ -276,7 +274,9 @@ func (e *ONNXEmbedder) ensureInitialized() error {
 			e.logger.Warn("failed to create CUDA provider options, falling back to CPU", "error", err)
 			useCUDA = false
 		} else {
-			defer cudaOpts.Destroy()
+			defer func() {
+				_ = cudaOpts.Destroy()
+			}()
 			if err := cudaOpts.Update(map[string]string{"device_id": "0"}); err != nil {
 				e.logger.Warn("failed to update CUDA options, falling back to CPU", "error", err)
 				useCUDA = false
@@ -286,7 +286,6 @@ func (e *ONNXEmbedder) ensureInitialized() error {
 			}
 		}
 	}
-
 	if useCUDA {
 		e.logger.Info("Using CUDA for ONNX inference")
 	} else {
diff --git a/rag/rag.go b/rag/rag.go
index 180ad50..3db4303 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -19,10 +19,7 @@ import (
 	"github.com/neurosnap/sentences/english"
 )
 
-const (
-	// batchTimeout is the maximum time allowed for embedding a single batch
-	batchTimeout = 2 * time.Minute
-)
+const ()
 
 var (
 	// Status messages for TUI integration
@@ -102,10 +99,6 @@ func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) (*RAG, error) {
 	return rag, nil
 }
 
-func wordCounter(sentence string) int {
-	return len(strings.Split(strings.TrimSpace(sentence), " "))
-}
-
 func createChunks(sentences []string, wordLimit, overlapWords uint32) []string {
 	if len(sentences) == 0 {
 		return nil
@@ -181,7 +174,6 @@ func (r *RAG) LoadRAG(fpath string) error {
 func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
-
 	fileText, err := ExtractText(fpath)
 	if err != nil {
 		return err
@@ -190,7 +182,6 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 
 	// Send initial status (non-blocking with retry)
 	r.sendStatusNonBlocking(LoadedFileRAGStatus)
-
 	tokenizer, err := english.NewSentenceTokenizer(nil)
 	if err != nil {
 		return err
@@ -210,7 +201,6 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 	if len(paragraphs) == 0 {
 		return errors.New("no valid paragraphs found in file")
 	}
-
 	totalBatches := (len(paragraphs) + r.cfg.RAGBatchSize - 1) / r.cfg.RAGBatchSize
 	r.logger.Debug("starting parallel embedding", "total_batches", totalBatches, "batch_size", r.cfg.RAGBatchSize)
 
@@ -223,7 +213,7 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 		concurrency = 1
 	}
 	// If using ONNX embedder, limit concurrency to 1 due to mutex serialization
-	isONNX := false
+	var isONNX bool
 	if _, isONNX = r.embedder.(*ONNXEmbedder); isONNX {
 		concurrency = 1
 	}
@@ -258,7 +248,6 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 		// Ensure task channel is closed when this goroutine exits
 		defer close(taskCh)
 		r.logger.Debug("task distributor started", "total_batches", totalBatches)
-
 		for i := 0; i < totalBatches; i++ {
 			start := i * r.cfg.RAGBatchSize
 			end := start + r.cfg.RAGBatchSize
@@ -304,7 +293,6 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 	resultsBuffer := make(map[int]batchResult)
 	filename := path.Base(fpath)
 	batchesProcessed := 0
-
 	for {
 		select {
 		case <-ctx.Done():
@@ -382,7 +370,6 @@ func (r *RAG) LoadRAGWithContext(ctx context.Context, fpath string) error {
 			break
 		}
 	}
-
 	r.logger.Debug("finished writing vectors", "batches", batchesProcessed)
 	r.resetIdleTimer()
 	r.sendStatusNonBlocking(FinishedRAGStatus)
@@ -406,7 +393,6 @@ func (r *RAG) embeddingWorker(ctx context.Context, workerID int, taskCh <-chan b
 			}
 		}
 	}()
-
 	for task := range taskCh {
 		select {
 		case <-ctx.Done():
@@ -432,7 +418,6 @@ func (r *RAG) embeddingWorker(ctx context.Context, workerID int, taskCh <-chan b
 			r.logger.Debug("worker sent empty batch", "worker", workerID, "batch", task.batchIndex)
 			continue
 		}
-
 		// Embed with retry for API embedder
 		embeddings, err := r.embedWithRetry(ctx, task.paragraphs, 3)
 		if err != nil {
@@ -444,7 +429,6 @@ func (r *RAG) embeddingWorker(ctx context.Context, workerID int, taskCh <-chan b
 			}
 			return
 		}
-
 		// Send result with context awareness
 		select {
 		case resultCh <- batchResult{
@@ -465,7 +449,6 @@ func (r *RAG) embeddingWorker(ctx context.Context, workerID int, taskCh <-chan b
 // embedWithRetry attempts embedding with exponential backoff for API embedder
 func (r *RAG) embedWithRetry(ctx context.Context, paragraphs []string, maxRetries int) ([][]float32, error) {
 	var lastErr error
-
 	for attempt := 0; attempt < maxRetries; attempt++ {
 		if attempt > 0 {
 			// Exponential backoff
@@ -473,13 +456,11 @@ func (r *RAG) embedWithRetry(ctx context.Context, paragraphs []string, maxRetrie
 			if backoff > 10*time.Second {
 				backoff = 10 * time.Second
 			}
-
 			select {
 			case <-time.After(backoff):
 			case <-ctx.Done():
 				return nil, ctx.Err()
 			}
-
 			r.logger.Debug("retrying embedding", "attempt", attempt, "max_retries", maxRetries)
 		}
 
@@ -499,7 +480,6 @@ func (r *RAG) embedWithRetry(ctx context.Context, paragraphs []string, maxRetrie
 			break
 		}
 	}
-
 	return nil, fmt.Errorf("embedding failed after %d attempts: %w", maxRetries, lastErr)
 }
 
@@ -509,7 +489,6 @@ func (r *RAG) writeBatchToStorage(ctx context.Context, result batchResult, filen
 		// Empty batch, skip
 		return nil
 	}
-
 	// Check context before starting
 	select {
 	case <-ctx.Done():
@@ -534,7 +513,6 @@ func (r *RAG) writeBatchToStorage(ctx context.Context, result batchResult, filen
 		r.sendStatusNonBlocking(ErrRAGStatus)
 		return fmt.Errorf("failed to write vectors batch: %w", err)
 	}
-
 	r.logger.Debug("wrote batch to db", "batch", result.batchIndex+1, "size", len(result.paragraphs))
 	return nil
 }
diff --git a/rag/storage.go b/rag/storage.go
index 1e6b013..62477b6 100644
--- a/rag/storage.go
+++ b/rag/storage.go
@@ -64,7 +64,6 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
 		return err
 	}
 	embeddingSize := len(row.Embeddings)
-
 	// Start transaction
 	tx, err := vs.sqlxDB.Beginx()
 	if err != nil {
@@ -72,7 +71,7 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
 	}
 	defer func() {
 		if err != nil {
-			tx.Rollback()
+			_ = tx.Rollback()
 		}
 	}()
 
@@ -86,14 +85,12 @@ func (vs *VectorStorage) WriteVector(row *models.VectorRow) error {
 		vs.logger.Error("failed to write vector", "error", err, "slug", row.Slug)
 		return err
 	}
-
 	// Insert into FTS table
 	ftsQuery := `INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES (?, ?, ?, ?)`
 	if _, err := tx.Exec(ftsQuery, row.Slug, row.RawText, row.FileName, embeddingSize); err != nil {
 		vs.logger.Error("failed to write to FTS table", "error", err, "slug", row.Slug)
 		return err
 	}
-
 	err = tx.Commit()
 	if err != nil {
 		vs.logger.Error("failed to commit transaction", "error", err)
@@ -133,7 +130,6 @@ func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
 	if err != nil {
 		return err
 	}
-
 	// Start transaction
 	tx, err := vs.sqlxDB.Beginx()
 	if err != nil {
@@ -141,7 +137,7 @@ func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
 	}
 	defer func() {
 		if err != nil {
-			tx.Rollback()
+			_ = tx.Rollback()
 		}
 	}()
 
@@ -161,7 +157,6 @@ func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
 		vs.logger.Error("failed to write vectors batch", "error", err, "batch_size", len(rows))
 		return err
 	}
-
 	// Build batch insert for FTS table
 	ftsPlaceholders := make([]string, 0, len(rows))
 	ftsArgs := make([]any, 0, len(rows)*4)
@@ -170,15 +165,12 @@ func (vs *VectorStorage) WriteVectors(rows []*models.VectorRow) error {
 		ftsPlaceholders = append(ftsPlaceholders, "(?, ?, ?, ?)")
 		ftsArgs = append(ftsArgs, row.Slug, row.RawText, row.FileName, embeddingSize)
 	}
-	ftsQuery := fmt.Sprintf(
-		"INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES %s",
-		strings.Join(ftsPlaceholders, ", "),
-	)
+	ftsQuery := "INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) VALUES " +
+		strings.Join(ftsPlaceholders, ", ")
 	if _, err := tx.Exec(ftsQuery, ftsArgs...); err != nil {
 		vs.logger.Error("failed to write FTS batch", "error", err, "batch_size", len(rows))
 		return err
 	}
-
 	err = tx.Commit()
 	if err != nil {
 		vs.logger.Error("failed to commit transaction", "error", err)
@@ -218,14 +210,12 @@ func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.Vec
 	if err != nil {
 		return nil, err
 	}
-
 	querySQL := "SELECT embeddings, slug, raw_text, filename FROM " + tableName
 	rows, err := vs.sqlxDB.Query(querySQL)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
-
 	type SearchResult struct {
 		vector   models.VectorRow
 		distance float32
@@ -241,7 +231,6 @@ func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.Vec
 			vs.logger.Error("failed to scan row", "error", err)
 			continue
 		}
-
 		storedEmbeddings := DeserializeVector(embeddingsBlob)
 		similarity := cosineSimilarity(query, storedEmbeddings)
 		distance := 1 - similarity
@@ -264,7 +253,6 @@ func (vs *VectorStorage) SearchClosest(query []float32, limit int) ([]models.Vec
 			topResults = topResults[:limit]
 		}
 	}
-
 	results := make([]models.VectorRow, 0, len(topResults))
 	for _, result := range topResults {
 		result.vector.Distance = result.distance
-- 
cgit v1.2.3


From 0e55e44f624d2839dc51fa293a18b323c497a6b1 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sat, 7 Mar 2026 15:41:39 +0300
Subject: Enha (kokoro): use ffplay instead of beep (portaudio)

---
 extra/kokoro.go | 277 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 extra/tts.go    | 215 -------------------------------------------
 2 files changed, 277 insertions(+), 215 deletions(-)
 create mode 100644 extra/kokoro.go

diff --git a/extra/kokoro.go b/extra/kokoro.go
new file mode 100644
index 0000000..15b173b
--- /dev/null
+++ b/extra/kokoro.go
@@ -0,0 +1,277 @@
+//go:build extra
+// +build extra
+
+package extra
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"gf-lt/models"
+	"io"
+	"log/slog"
+	"net/http"
+	"os/exec"
+	"strings"
+	"sync"
+
+	"github.com/neurosnap/sentences/english"
+)
+
+type KokoroOrator struct {
+	logger   *slog.Logger
+	mu       sync.Mutex
+	URL      string
+	Format   models.AudioFormat
+	Stream   bool
+	Speed    float32
+	Language string
+	Voice    string
+	// fields for playback control
+	cmd    *exec.Cmd
+	cmdMu  sync.Mutex
+	stopCh chan struct{}
+	// textBuffer, interrupt etc. remain the same
+	textBuffer strings.Builder
+	interrupt  bool
+}
+
+func (o *KokoroOrator) GetLogger() *slog.Logger {
+	return o.logger
+}
+
+// Speak streams audio directly to an external player
+func (o *KokoroOrator) Speak(text string) error {
+	o.logger.Debug("fn: Speak is called", "text-len", len(text))
+	// 1. Get the audio stream (still an io.ReadCloser)
+	body, err := o.requestSound(text)
+	if err != nil {
+		return fmt.Errorf("request failed: %w", err)
+	}
+	defer body.Close()
+	// 2. Prepare external player (ffplay as example)
+	//    -i pipe:0 tells ffplay to read from stdin
+	cmd := exec.Command("ffplay", "-nodisp", "-autoexit", "-i", "pipe:0")
+	stdin, err := cmd.StdinPipe()
+	if err != nil {
+		return fmt.Errorf("failed to get stdin pipe: %w", err)
+	}
+	o.cmdMu.Lock()
+	o.cmd = cmd
+	o.stopCh = make(chan struct{})
+	o.cmdMu.Unlock()
+	// 3. Start the player
+	if err := cmd.Start(); err != nil {
+		return fmt.Errorf("failed to start ffplay: %w", err)
+	}
+	// 4. Copy audio data to stdin in a goroutine
+	copyErr := make(chan error, 1)
+	go func() {
+		_, err := io.Copy(stdin, body)
+		stdin.Close() // signal EOF to player
+		copyErr <- err
+	}()
+	// 5. Wait for player to finish or stop signal
+	done := make(chan error, 1)
+	go func() {
+		done <- cmd.Wait()
+	}()
+	select {
+	case <-o.stopCh:
+		// Stop requested: kill the player
+		if o.cmd != nil && o.cmd.Process != nil {
+			o.cmd.Process.Kill()
+		}
+		<-done // wait for process to exit
+		return nil
+	case err := <-done:
+		// Playback finished normally
+		return err
+	case copyErrVal := <-copyErr:
+		if copyErrVal != nil {
+			// Copy failed – kill the player
+			if o.cmd != nil && o.cmd.Process != nil {
+				o.cmd.Process.Kill()
+			}
+			<-done
+			return copyErrVal
+		}
+		return nil
+	}
+}
+
+// // Stop interrupts ongoing playback
+// func (o *KokoroOrator) Stop() {
+// 	o.cmdMu.Lock()
+// 	defer o.cmdMu.Unlock()
+// 	if o.stopCh != nil {
+// 		close(o.stopCh)
+// 	}
+// 	// Also clear the buffer and set interrupt flag as before
+// 	o.mu.Lock()
+// 	o.textBuffer.Reset()
+// 	o.interrupt = true
+// 	o.mu.Unlock()
+// }
+
+func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
+	if o.URL == "" {
+		return nil, fmt.Errorf("TTS URL is empty")
+	}
+	payload := map[string]interface{}{
+		"input":           text,
+		"voice":           o.Voice,
+		"response_format": o.Format,
+		"download_format": o.Format,
+		"stream":          o.Stream,
+		"speed":           o.Speed,
+		// "return_download_link": true,
+		"lang_code": o.Language,
+	}
+	payloadBytes, err := json.Marshal(payload)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal payload: %w", err)
+	}
+	req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes)) //nolint:noctx
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("accept", "application/json")
+	req.Header.Set("Content-Type", "application/json")
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("request failed: %w", err)
+	}
+	if resp.StatusCode != http.StatusOK {
+		defer resp.Body.Close()
+		return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
+	}
+	return resp.Body, nil
+}
+
+func (o *KokoroOrator) stoproutine() {
+	for {
+		<-TTSDoneChan
+		o.logger.Debug("orator got done signal")
+		// 1. Stop any ongoing playback (kills external player, closes stopCh)
+		o.Stop()
+		// 2. Drain any pending text chunks
+		for len(TTSTextChan) > 0 {
+			<-TTSTextChan
+		}
+		// 3. Reset internal state
+		o.mu.Lock()
+		o.textBuffer.Reset()
+		o.interrupt = true
+		o.mu.Unlock()
+	}
+}
+
+func (o *KokoroOrator) Stop() {
+	o.cmdMu.Lock()
+	defer o.cmdMu.Unlock()
+	// Signal any running Speak to stop
+	if o.stopCh != nil {
+		select {
+		case <-o.stopCh: // already closed
+		default:
+			close(o.stopCh)
+		}
+		o.stopCh = nil
+	}
+	// Kill the external player process if it's still running
+	if o.cmd != nil && o.cmd.Process != nil {
+		o.cmd.Process.Kill()
+		o.cmd.Wait() // clean up zombie process
+		o.cmd = nil
+	}
+	// Also reset text buffer and interrupt flag (with o.mu)
+	o.mu.Lock()
+	o.textBuffer.Reset()
+	o.interrupt = true
+	o.mu.Unlock()
+}
+
+func (o *KokoroOrator) readroutine() {
+	tokenizer, _ := english.NewSentenceTokenizer(nil)
+	for {
+		select {
+		case chunk := <-TTSTextChan:
+			o.mu.Lock()
+			o.interrupt = false
+			_, err := o.textBuffer.WriteString(chunk)
+			if err != nil {
+				o.logger.Warn("failed to write to stringbuilder", "error", err)
+				o.mu.Unlock()
+				continue
+			}
+			text := o.textBuffer.String()
+			sentences := tokenizer.Tokenize(text)
+			o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
+			if len(sentences) <= 1 {
+				o.mu.Unlock()
+				continue
+			}
+			completeSentences := sentences[:len(sentences)-1]
+			remaining := sentences[len(sentences)-1].Text
+			o.textBuffer.Reset()
+			o.textBuffer.WriteString(remaining)
+			o.mu.Unlock()
+			for _, sentence := range completeSentences {
+				o.mu.Lock()
+				interrupted := o.interrupt
+				o.mu.Unlock()
+				if interrupted {
+					return
+				}
+				cleanedText := models.CleanText(sentence.Text)
+				if cleanedText == "" {
+					continue
+				}
+				o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
+				if err := o.Speak(cleanedText); err != nil {
+					o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
+				}
+			}
+		case <-TTSFlushChan:
+			o.logger.Debug("got flushchan signal start")
+			// lln is done get the whole message out
+			if len(TTSTextChan) > 0 { // otherwise might get stuck
+				for chunk := range TTSTextChan {
+					o.mu.Lock()
+					_, err := o.textBuffer.WriteString(chunk)
+					o.mu.Unlock()
+					if err != nil {
+						o.logger.Warn("failed to write to stringbuilder", "error", err)
+						continue
+					}
+					if len(TTSTextChan) == 0 {
+						break
+					}
+				}
+			}
+			// flush remaining text
+			o.mu.Lock()
+			remaining := o.textBuffer.String()
+			remaining = models.CleanText(remaining)
+			o.textBuffer.Reset()
+			o.mu.Unlock()
+			if remaining == "" {
+				continue
+			}
+			o.logger.Debug("calling Speak with remainder", "rem", remaining)
+			sentencesRem := tokenizer.Tokenize(remaining)
+			for _, rs := range sentencesRem { // to avoid dumping large volume of text
+				o.mu.Lock()
+				interrupt := o.interrupt
+				o.mu.Unlock()
+				if interrupt {
+					break
+				}
+				if err := o.Speak(rs.Text); err != nil {
+					o.logger.Error("tts failed", "sentence", rs, "error", err)
+				}
+			}
+		}
+	}
+}
diff --git a/extra/tts.go b/extra/tts.go
index 1960aa7..a75678b 100644
--- a/extra/tts.go
+++ b/extra/tts.go
@@ -4,14 +4,11 @@
 package extra
 
 import (
-	"bytes"
-	"encoding/json"
 	"fmt"
 	"gf-lt/config"
 	"gf-lt/models"
 	"io"
 	"log/slog"
-	"net/http"
 	"os"
 	"strings"
 	"sync"
@@ -39,23 +36,6 @@ type Orator interface {
 	GetLogger() *slog.Logger
 }
 
-// impl https://github.com/remsky/Kokoro-FastAPI
-type KokoroOrator struct {
-	logger        *slog.Logger
-	mu            sync.Mutex
-	URL           string
-	Format        models.AudioFormat
-	Stream        bool
-	Speed         float32
-	Language      string
-	Voice         string
-	currentStream *beep.Ctrl // Added for playback control
-	currentDone   chan bool
-	textBuffer    strings.Builder
-	interrupt     bool
-	// textBuffer bytes.Buffer
-}
-
 // Google Translate TTS implementation
 type GoogleTranslateOrator struct {
 	logger        *slog.Logger
@@ -67,114 +47,6 @@ type GoogleTranslateOrator struct {
 	interrupt     bool
 }
 
-func (o *KokoroOrator) stoproutine() {
-	for {
-		<-TTSDoneChan
-		o.logger.Debug("orator got done signal")
-		o.Stop()
-		// drain the channel
-		for len(TTSTextChan) > 0 {
-			<-TTSTextChan
-		}
-		o.mu.Lock()
-		o.textBuffer.Reset()
-		if o.currentDone != nil {
-			select {
-			case o.currentDone <- true:
-			default:
-				// Channel might be closed, ignore
-			}
-		}
-		o.interrupt = true
-		o.mu.Unlock()
-	}
-}
-
-func (o *KokoroOrator) readroutine() {
-	tokenizer, _ := english.NewSentenceTokenizer(nil)
-	for {
-		select {
-		case chunk := <-TTSTextChan:
-			o.mu.Lock()
-			o.interrupt = false
-			_, err := o.textBuffer.WriteString(chunk)
-			if err != nil {
-				o.logger.Warn("failed to write to stringbuilder", "error", err)
-				o.mu.Unlock()
-				continue
-			}
-			text := o.textBuffer.String()
-			sentences := tokenizer.Tokenize(text)
-			o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
-			if len(sentences) <= 1 {
-				o.mu.Unlock()
-				continue
-			}
-			completeSentences := sentences[:len(sentences)-1]
-			remaining := sentences[len(sentences)-1].Text
-			o.textBuffer.Reset()
-			o.textBuffer.WriteString(remaining)
-			o.mu.Unlock()
-
-			for _, sentence := range completeSentences {
-				o.mu.Lock()
-				interrupted := o.interrupt
-				o.mu.Unlock()
-				if interrupted {
-					return
-				}
-				cleanedText := models.CleanText(sentence.Text)
-				if cleanedText == "" {
-					continue
-				}
-				o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
-				if err := o.Speak(cleanedText); err != nil {
-					o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
-				}
-			}
-		case <-TTSFlushChan:
-			o.logger.Debug("got flushchan signal start")
-			// lln is done get the whole message out
-			if len(TTSTextChan) > 0 { // otherwise might get stuck
-				for chunk := range TTSTextChan {
-					o.mu.Lock()
-					_, err := o.textBuffer.WriteString(chunk)
-					o.mu.Unlock()
-					if err != nil {
-						o.logger.Warn("failed to write to stringbuilder", "error", err)
-						continue
-					}
-					if len(TTSTextChan) == 0 {
-						break
-					}
-				}
-			}
-			// flush remaining text
-			o.mu.Lock()
-			remaining := o.textBuffer.String()
-			remaining = models.CleanText(remaining)
-			o.textBuffer.Reset()
-			o.mu.Unlock()
-			if remaining == "" {
-				continue
-			}
-			o.logger.Debug("calling Speak with remainder", "rem", remaining)
-			sentencesRem := tokenizer.Tokenize(remaining)
-			for _, rs := range sentencesRem { // to avoid dumping large volume of text
-				o.mu.Lock()
-				interrupt := o.interrupt
-				o.mu.Unlock()
-				if interrupt {
-					break
-				}
-				if err := o.Speak(rs.Text); err != nil {
-					o.logger.Error("tts failed", "sentence", rs, "error", err)
-				}
-			}
-		}
-	}
-}
-
 func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
 	provider := cfg.TTS_PROVIDER
 	if provider == "" {
@@ -216,93 +88,6 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
 	}
 }
 
-func (o *KokoroOrator) GetLogger() *slog.Logger {
-	return o.logger
-}
-
-func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
-	if o.URL == "" {
-		return nil, fmt.Errorf("TTS URL is empty")
-	}
-	payload := map[string]interface{}{
-		"input":           text,
-		"voice":           o.Voice,
-		"response_format": o.Format,
-		"download_format": o.Format,
-		"stream":          o.Stream,
-		"speed":           o.Speed,
-		// "return_download_link": true,
-		"lang_code": o.Language,
-	}
-	payloadBytes, err := json.Marshal(payload)
-	if err != nil {
-		return nil, fmt.Errorf("failed to marshal payload: %w", err)
-	}
-	req, err := http.NewRequest("POST", o.URL, bytes.NewBuffer(payloadBytes)) //nolint:noctx
-	if err != nil {
-		return nil, fmt.Errorf("failed to create request: %w", err)
-	}
-	req.Header.Set("accept", "application/json")
-	req.Header.Set("Content-Type", "application/json")
-	resp, err := http.DefaultClient.Do(req)
-	if err != nil {
-		return nil, fmt.Errorf("request failed: %w", err)
-	}
-	if resp.StatusCode != http.StatusOK {
-		defer resp.Body.Close()
-		return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
-	}
-	return resp.Body, nil
-}
-
-func (o *KokoroOrator) Speak(text string) error {
-	o.logger.Debug("fn: Speak is called", "text-len", len(text))
-	body, err := o.requestSound(text)
-	if err != nil {
-		o.logger.Error("request failed", "error", err)
-		return fmt.Errorf("request failed: %w", err)
-	}
-	defer body.Close()
-	// Decode the mp3 audio from response body
-	streamer, format, err := mp3.Decode(body)
-	if err != nil {
-		o.logger.Error("mp3 decode failed", "error", err)
-		return fmt.Errorf("mp3 decode failed: %w", err)
-	}
-	defer streamer.Close()
-	// here it spams with errors that speaker cannot be initialized more than once, but how would we deal with many audio records then?
-	if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
-		o.logger.Debug("failed to init speaker", "error", err)
-	}
-	done := make(chan bool)
-	o.mu.Lock()
-	o.currentDone = done
-	o.currentStream = &beep.Ctrl{Streamer: beep.Seq(streamer, beep.Callback(func() {
-		o.mu.Lock()
-		close(done)
-		o.currentStream = nil
-		o.currentDone = nil
-		o.mu.Unlock()
-	})), Paused: false}
-	o.mu.Unlock()
-	speaker.Play(o.currentStream)
-	<-done
-	return nil
-}
-
-func (o *KokoroOrator) Stop() {
-	// speaker.Clear()
-	o.logger.Debug("attempted to stop orator", "orator", o)
-	speaker.Lock()
-	defer speaker.Unlock()
-	o.mu.Lock()
-	defer o.mu.Unlock()
-	if o.currentStream != nil {
-		// o.currentStream.Paused = true
-		o.currentStream.Streamer = nil
-	}
-}
-
 func (o *GoogleTranslateOrator) stoproutine() {
 	for {
 		<-TTSDoneChan
-- 
cgit v1.2.3


From 0f0c43f32701c314e2472ef1f9a1ec8a68ab0d1a Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sat, 7 Mar 2026 16:24:39 +0300
Subject: Dep: remove beep/portaudio dependancy

---
 extra/google_tts.go | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 extra/kokoro.go     |  42 +++--------
 extra/tts.go        | 190 ----------------------------------------------
 go.mod              |   7 +-
 go.sum              |  13 +---
 5 files changed, 226 insertions(+), 237 deletions(-)
 create mode 100644 extra/google_tts.go

diff --git a/extra/google_tts.go b/extra/google_tts.go
new file mode 100644
index 0000000..5b46f34
--- /dev/null
+++ b/extra/google_tts.go
@@ -0,0 +1,211 @@
+//go:build extra
+// +build extra
+
+package extra
+
+import (
+	"fmt"
+	"gf-lt/models"
+	"io"
+	"log/slog"
+	"os/exec"
+	"strings"
+	"sync"
+
+	google_translate_tts "github.com/GrailFinder/google-translate-tts"
+	"github.com/neurosnap/sentences/english"
+)
+
+type GoogleTranslateOrator struct {
+	logger *slog.Logger
+	mu     sync.Mutex
+	speech *google_translate_tts.Speech
+	// fields for playback control
+	cmd    *exec.Cmd
+	cmdMu  sync.Mutex
+	stopCh chan struct{}
+	// text buffer and interrupt flag
+	textBuffer strings.Builder
+	interrupt  bool
+}
+
+func (o *GoogleTranslateOrator) stoproutine() {
+	for {
+		<-TTSDoneChan
+		o.logger.Debug("orator got done signal")
+		o.Stop()
+		for len(TTSTextChan) > 0 {
+			<-TTSTextChan
+		}
+		o.mu.Lock()
+		o.textBuffer.Reset()
+		o.interrupt = true
+		o.mu.Unlock()
+	}
+}
+
+func (o *GoogleTranslateOrator) readroutine() {
+	tokenizer, _ := english.NewSentenceTokenizer(nil)
+	for {
+		select {
+		case chunk := <-TTSTextChan:
+			o.mu.Lock()
+			o.interrupt = false
+			_, err := o.textBuffer.WriteString(chunk)
+			if err != nil {
+				o.logger.Warn("failed to write to stringbuilder", "error", err)
+				o.mu.Unlock()
+				continue
+			}
+			text := o.textBuffer.String()
+			sentences := tokenizer.Tokenize(text)
+			o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
+			if len(sentences) <= 1 {
+				o.mu.Unlock()
+				continue
+			}
+			completeSentences := sentences[:len(sentences)-1]
+			remaining := sentences[len(sentences)-1].Text
+			o.textBuffer.Reset()
+			o.textBuffer.WriteString(remaining)
+			o.mu.Unlock()
+			for _, sentence := range completeSentences {
+				o.mu.Lock()
+				interrupted := o.interrupt
+				o.mu.Unlock()
+				if interrupted {
+					return
+				}
+				cleanedText := models.CleanText(sentence.Text)
+				if cleanedText == "" {
+					continue
+				}
+				o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
+				if err := o.Speak(cleanedText); err != nil {
+					o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
+				}
+			}
+		case <-TTSFlushChan:
+			o.logger.Debug("got flushchan signal start")
+			// lln is done get the whole message out
+			if len(TTSTextChan) > 0 { // otherwise might get stuck
+				for chunk := range TTSTextChan {
+					o.mu.Lock()
+					_, err := o.textBuffer.WriteString(chunk)
+					o.mu.Unlock()
+					if err != nil {
+						o.logger.Warn("failed to write to stringbuilder", "error", err)
+						continue
+					}
+					if len(TTSTextChan) == 0 {
+						break
+					}
+				}
+			}
+			o.mu.Lock()
+			remaining := o.textBuffer.String()
+			remaining = models.CleanText(remaining)
+			o.textBuffer.Reset()
+			o.mu.Unlock()
+			if remaining == "" {
+				continue
+			}
+			o.logger.Debug("calling Speak with remainder", "rem", remaining)
+			sentencesRem := tokenizer.Tokenize(remaining)
+			for _, rs := range sentencesRem { // to avoid dumping large volume of text
+				o.mu.Lock()
+				interrupt := o.interrupt
+				o.mu.Unlock()
+				if interrupt {
+					break
+				}
+				if err := o.Speak(rs.Text); err != nil {
+					o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
+				}
+			}
+		}
+	}
+}
+
+func (o *GoogleTranslateOrator) GetLogger() *slog.Logger {
+	return o.logger
+}
+
+func (o *GoogleTranslateOrator) Speak(text string) error {
+	o.logger.Debug("fn: Speak is called", "text-len", len(text))
+	// Generate MP3 data directly as an io.Reader
+	reader, err := o.speech.GenerateSpeech(text)
+	if err != nil {
+		return fmt.Errorf("generate speech failed: %w", err)
+	}
+	// Wrap in io.NopCloser since GenerateSpeech returns io.Reader (no close needed)
+	body := io.NopCloser(reader)
+	defer body.Close()
+	// Exactly the same ffplay piping as KokoroOrator
+	cmd := exec.Command("ffplay", "-nodisp", "-autoexit", "-i", "pipe:0")
+	stdin, err := cmd.StdinPipe()
+	if err != nil {
+		return fmt.Errorf("failed to get stdin pipe: %w", err)
+	}
+	o.cmdMu.Lock()
+	o.cmd = cmd
+	o.stopCh = make(chan struct{})
+	o.cmdMu.Unlock()
+	if err := cmd.Start(); err != nil {
+		return fmt.Errorf("failed to start ffplay: %w", err)
+	}
+	copyErr := make(chan error, 1)
+	go func() {
+		_, err := io.Copy(stdin, body)
+		stdin.Close()
+		copyErr <- err
+	}()
+	done := make(chan error, 1)
+	go func() {
+		done <- cmd.Wait()
+	}()
+	select {
+	case <-o.stopCh:
+		if o.cmd != nil && o.cmd.Process != nil {
+			o.cmd.Process.Kill()
+		}
+		<-done
+		return nil
+	case copyErrVal := <-copyErr:
+		if copyErrVal != nil {
+			if o.cmd != nil && o.cmd.Process != nil {
+				o.cmd.Process.Kill()
+			}
+			<-done
+			return copyErrVal
+		}
+		return <-done
+	case err := <-done:
+		return err
+	}
+}
+
+func (o *GoogleTranslateOrator) Stop() {
+	o.cmdMu.Lock()
+	defer o.cmdMu.Unlock()
+	// Signal any running Speak to stop
+	if o.stopCh != nil {
+		select {
+		case <-o.stopCh: // already closed
+		default:
+			close(o.stopCh)
+		}
+		o.stopCh = nil
+	}
+	// Kill the external player process if it's still running
+	if o.cmd != nil && o.cmd.Process != nil {
+		o.cmd.Process.Kill()
+		o.cmd.Wait() // clean up zombie process
+		o.cmd = nil
+	}
+	// Also reset text buffer and interrupt flag (with o.mu)
+	o.mu.Lock()
+	o.textBuffer.Reset()
+	o.interrupt = true
+	o.mu.Unlock()
+}
diff --git a/extra/kokoro.go b/extra/kokoro.go
index 15b173b..e3ca047 100644
--- a/extra/kokoro.go
+++ b/extra/kokoro.go
@@ -40,17 +40,13 @@ func (o *KokoroOrator) GetLogger() *slog.Logger {
 	return o.logger
 }
 
-// Speak streams audio directly to an external player
 func (o *KokoroOrator) Speak(text string) error {
 	o.logger.Debug("fn: Speak is called", "text-len", len(text))
-	// 1. Get the audio stream (still an io.ReadCloser)
 	body, err := o.requestSound(text)
 	if err != nil {
 		return fmt.Errorf("request failed: %w", err)
 	}
 	defer body.Close()
-	// 2. Prepare external player (ffplay as example)
-	//    -i pipe:0 tells ffplay to read from stdin
 	cmd := exec.Command("ffplay", "-nodisp", "-autoexit", "-i", "pipe:0")
 	stdin, err := cmd.StdinPipe()
 	if err != nil {
@@ -60,60 +56,46 @@ func (o *KokoroOrator) Speak(text string) error {
 	o.cmd = cmd
 	o.stopCh = make(chan struct{})
 	o.cmdMu.Unlock()
-	// 3. Start the player
 	if err := cmd.Start(); err != nil {
 		return fmt.Errorf("failed to start ffplay: %w", err)
 	}
-	// 4. Copy audio data to stdin in a goroutine
+	// Copy audio in background
 	copyErr := make(chan error, 1)
 	go func() {
 		_, err := io.Copy(stdin, body)
-		stdin.Close() // signal EOF to player
+		stdin.Close()
 		copyErr <- err
 	}()
-	// 5. Wait for player to finish or stop signal
+	// Wait for player in background
 	done := make(chan error, 1)
 	go func() {
 		done <- cmd.Wait()
 	}()
+	// Wait for BOTH copy and player, but ensure we block until done
 	select {
 	case <-o.stopCh:
-		// Stop requested: kill the player
+		// Stop requested: kill player and wait for it to exit
 		if o.cmd != nil && o.cmd.Process != nil {
 			o.cmd.Process.Kill()
 		}
-		<-done // wait for process to exit
+		<-done // Wait for process to actually exit
 		return nil
-	case err := <-done:
-		// Playback finished normally
-		return err
 	case copyErrVal := <-copyErr:
 		if copyErrVal != nil {
-			// Copy failed – kill the player
+			// Copy failed: kill player and wait
 			if o.cmd != nil && o.cmd.Process != nil {
 				o.cmd.Process.Kill()
 			}
 			<-done
 			return copyErrVal
 		}
-		return nil
+		// Copy succeeded, now wait for playback to complete
+		return <-done
+	case err := <-done:
+		// Playback finished normally (copy must have succeeded or player would have exited early)
+		return err
 	}
 }
-
-// // Stop interrupts ongoing playback
-// func (o *KokoroOrator) Stop() {
-// 	o.cmdMu.Lock()
-// 	defer o.cmdMu.Unlock()
-// 	if o.stopCh != nil {
-// 		close(o.stopCh)
-// 	}
-// 	// Also clear the buffer and set interrupt flag as before
-// 	o.mu.Lock()
-// 	o.textBuffer.Reset()
-// 	o.interrupt = true
-// 	o.mu.Unlock()
-// }
-
 func (o *KokoroOrator) requestSound(text string) (io.ReadCloser, error) {
 	if o.URL == "" {
 		return nil, fmt.Errorf("TTS URL is empty")
diff --git a/extra/tts.go b/extra/tts.go
index a75678b..80085ab 100644
--- a/extra/tts.go
+++ b/extra/tts.go
@@ -4,22 +4,13 @@
 package extra
 
 import (
-	"fmt"
 	"gf-lt/config"
 	"gf-lt/models"
-	"io"
 	"log/slog"
 	"os"
 	"strings"
-	"sync"
-	"time"
 
 	google_translate_tts "github.com/GrailFinder/google-translate-tts"
-	"github.com/GrailFinder/google-translate-tts/handlers"
-	"github.com/gopxl/beep/v2"
-	"github.com/gopxl/beep/v2/mp3"
-	"github.com/gopxl/beep/v2/speaker"
-	"github.com/neurosnap/sentences/english"
 )
 
 var (
@@ -36,17 +27,6 @@ type Orator interface {
 	GetLogger() *slog.Logger
 }
 
-// Google Translate TTS implementation
-type GoogleTranslateOrator struct {
-	logger        *slog.Logger
-	mu            sync.Mutex
-	speech        *google_translate_tts.Speech
-	currentStream *beep.Ctrl
-	currentDone   chan bool
-	textBuffer    strings.Builder
-	interrupt     bool
-}
-
 func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
 	provider := cfg.TTS_PROVIDER
 	if provider == "" {
@@ -76,7 +56,6 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
 			Language: language,
 			Proxy:    "", // Proxy not supported
 			Speed:    cfg.TTS_SPEED,
-			Handler:  &handlers.Beep{},
 		}
 		orator := &GoogleTranslateOrator{
 			logger: log,
@@ -87,172 +66,3 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
 		return orator
 	}
 }
-
-func (o *GoogleTranslateOrator) stoproutine() {
-	for {
-		<-TTSDoneChan
-		o.logger.Debug("orator got done signal")
-		o.Stop()
-		// drain the channel
-		for len(TTSTextChan) > 0 {
-			<-TTSTextChan
-		}
-		o.mu.Lock()
-		o.textBuffer.Reset()
-		if o.currentDone != nil {
-			select {
-			case o.currentDone <- true:
-			default:
-				// Channel might be closed, ignore
-			}
-		}
-		o.interrupt = true
-		o.mu.Unlock()
-	}
-}
-
-func (o *GoogleTranslateOrator) readroutine() {
-	tokenizer, _ := english.NewSentenceTokenizer(nil)
-	for {
-		select {
-		case chunk := <-TTSTextChan:
-			o.mu.Lock()
-			o.interrupt = false
-			_, err := o.textBuffer.WriteString(chunk)
-			if err != nil {
-				o.logger.Warn("failed to write to stringbuilder", "error", err)
-				o.mu.Unlock()
-				continue
-			}
-			text := o.textBuffer.String()
-			sentences := tokenizer.Tokenize(text)
-			o.logger.Debug("adding chunk", "chunk", chunk, "text", text, "sen-len", len(sentences))
-			if len(sentences) <= 1 {
-				o.mu.Unlock()
-				continue
-			}
-			completeSentences := sentences[:len(sentences)-1]
-			remaining := sentences[len(sentences)-1].Text
-			o.textBuffer.Reset()
-			o.textBuffer.WriteString(remaining)
-			o.mu.Unlock()
-
-			for _, sentence := range completeSentences {
-				o.mu.Lock()
-				interrupted := o.interrupt
-				o.mu.Unlock()
-				if interrupted {
-					return
-				}
-				cleanedText := models.CleanText(sentence.Text)
-				if cleanedText == "" {
-					continue
-				}
-				o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
-				if err := o.Speak(cleanedText); err != nil {
-					o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
-				}
-			}
-		case <-TTSFlushChan:
-			o.logger.Debug("got flushchan signal start")
-			// lln is done get the whole message out
-			if len(TTSTextChan) > 0 { // otherwise might get stuck
-				for chunk := range TTSTextChan {
-					o.mu.Lock()
-					_, err := o.textBuffer.WriteString(chunk)
-					o.mu.Unlock()
-					if err != nil {
-						o.logger.Warn("failed to write to stringbuilder", "error", err)
-						continue
-					}
-					if len(TTSTextChan) == 0 {
-						break
-					}
-				}
-			}
-			o.mu.Lock()
-			remaining := o.textBuffer.String()
-			remaining = models.CleanText(remaining)
-			o.textBuffer.Reset()
-			o.mu.Unlock()
-			if remaining == "" {
-				continue
-			}
-			o.logger.Debug("calling Speak with remainder", "rem", remaining)
-			sentencesRem := tokenizer.Tokenize(remaining)
-			for _, rs := range sentencesRem { // to avoid dumping large volume of text
-				o.mu.Lock()
-				interrupt := o.interrupt
-				o.mu.Unlock()
-				if interrupt {
-					break
-				}
-				if err := o.Speak(rs.Text); err != nil {
-					o.logger.Error("tts failed", "sentence", rs.Text, "error", err)
-				}
-			}
-		}
-	}
-}
-
-func (o *GoogleTranslateOrator) GetLogger() *slog.Logger {
-	return o.logger
-}
-
-func (o *GoogleTranslateOrator) Speak(text string) error {
-	o.logger.Debug("fn: Speak is called", "text-len", len(text))
-	// Generate MP3 data using google-translate-tts
-	reader, err := o.speech.GenerateSpeech(text)
-	if err != nil {
-		o.logger.Error("generate speech failed", "error", err)
-		return fmt.Errorf("generate speech failed: %w", err)
-	}
-	// Decode the mp3 audio from reader (wrap with NopCloser for io.ReadCloser)
-	streamer, format, err := mp3.Decode(io.NopCloser(reader))
-	if err != nil {
-		o.logger.Error("mp3 decode failed", "error", err)
-		return fmt.Errorf("mp3 decode failed: %w", err)
-	}
-	defer streamer.Close()
-	playbackStreamer := beep.Streamer(streamer)
-	speed := o.speech.Speed
-	if speed <= 0 {
-		speed = 1.0
-	}
-	if speed != 1.0 {
-		playbackStreamer = beep.ResampleRatio(3, float64(speed), streamer)
-	}
-	// Initialize speaker with the format's sample rate
-	if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil {
-		o.logger.Debug("failed to init speaker", "error", err)
-	}
-	done := make(chan bool)
-	o.mu.Lock()
-	o.currentDone = done
-	o.currentStream = &beep.Ctrl{Streamer: beep.Seq(playbackStreamer, beep.Callback(func() {
-		o.mu.Lock()
-		close(done)
-		o.currentStream = nil
-		o.currentDone = nil
-		o.mu.Unlock()
-	})), Paused: false}
-	o.mu.Unlock()
-	speaker.Play(o.currentStream)
-	<-done // wait for playback to complete
-	return nil
-}
-
-func (o *GoogleTranslateOrator) Stop() {
-	o.logger.Debug("attempted to stop google translate orator")
-	speaker.Lock()
-	defer speaker.Unlock()
-	o.mu.Lock()
-	defer o.mu.Unlock()
-	if o.currentStream != nil {
-		o.currentStream.Streamer = nil
-	}
-	// Also stop the speech handler if possible
-	if o.speech != nil {
-		_ = o.speech.Stop()
-	}
-}
diff --git a/go.mod b/go.mod
index 531609a..17609a4 100644
--- a/go.mod
+++ b/go.mod
@@ -4,12 +4,11 @@ go 1.25.1
 
 require (
 	github.com/BurntSushi/toml v1.5.0
-	github.com/GrailFinder/google-translate-tts v0.1.3
+	github.com/GrailFinder/google-translate-tts v0.1.4
 	github.com/GrailFinder/searchagent v0.2.0
 	github.com/PuerkitoBio/goquery v1.11.0
 	github.com/gdamore/tcell/v2 v2.13.2
 	github.com/glebarez/go-sqlite v1.22.0
-	github.com/gopxl/beep/v2 v2.1.1
 	github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b
 	github.com/jmoiron/sqlx v1.4.0
 	github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728
@@ -25,21 +24,17 @@ require (
 	github.com/andybalholm/cascadia v1.3.3 // indirect
 	github.com/deckarep/golang-set/v2 v2.8.0 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
-	github.com/ebitengine/oto/v3 v3.4.0 // indirect
-	github.com/ebitengine/purego v0.9.1 // indirect
 	github.com/emirpasic/gods v1.18.1 // indirect
 	github.com/gdamore/encoding v1.0.1 // indirect
 	github.com/go-jose/go-jose/v3 v3.0.4 // indirect
 	github.com/go-stack/stack v1.8.1 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/hajimehoshi/go-mp3 v0.3.4 // indirect
-	github.com/hajimehoshi/oto/v2 v2.3.1 // indirect
 	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
 	github.com/ncruces/go-strftime v1.0.0 // indirect
 	github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
-	github.com/pkg/errors v0.9.1 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/schollz/progressbar/v2 v2.15.0 // indirect
diff --git a/go.sum b/go.sum
index 73d273b..565947e 100644
--- a/go.sum
+++ b/go.sum
@@ -2,8 +2,8 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
 filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
 github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
 github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
-github.com/GrailFinder/google-translate-tts v0.1.3 h1:Mww9tNzTWjjSh+OCbTPl/+21oMPKcUecXZfU7nTB/lA=
-github.com/GrailFinder/google-translate-tts v0.1.3/go.mod h1:YIOLKR7sObazdUCrSex3u9OVBovU55eYgWa25vsQJ18=
+github.com/GrailFinder/google-translate-tts v0.1.4 h1:NJoPZUGfBrmouQMN19MUcNPNUx4tmf4a8OZRME4E4Mg=
+github.com/GrailFinder/google-translate-tts v0.1.4/go.mod h1:YIOLKR7sObazdUCrSex3u9OVBovU55eYgWa25vsQJ18=
 github.com/GrailFinder/searchagent v0.2.0 h1:U2GVjLh/9xZt0xX9OcYk9Q2fMkyzyTiADPUmUisRdtQ=
 github.com/GrailFinder/searchagent v0.2.0/go.mod h1:d66tn5+22LI8IGJREUsRBT60P0sFdgQgvQRqyvgItrs=
 github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
@@ -17,10 +17,6 @@ github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+Zlfu
 github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/ebitengine/oto/v3 v3.4.0 h1:br0PgASsEWaoWn38b2Goe7m1GKFYfNgnsjSd5Gg+/bQ=
-github.com/ebitengine/oto/v3 v3.4.0/go.mod h1:IOleLVD0m+CMak3mRVwsYY8vTctQgOM0iiL6S7Ar7eI=
-github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
-github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
 github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
 github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
 github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw=
@@ -41,13 +37,10 @@ github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17k
 github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/gopxl/beep/v2 v2.1.1 h1:6FYIYMm2qPAdWkjX+7xwKrViS1x0Po5kDMdRkq8NVbU=
-github.com/gopxl/beep/v2 v2.1.1/go.mod h1:ZAm9TGQ9lvpoiFLd4zf5B1IuyxZhgRACMId1XJbaW0E=
 github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b h1:WEuQWBxelOGHA6z9lABqaMLMrfwVyMdN3UgRLT+YUPo=
 github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b/go.mod h1:esZFQEUwqC+l76f2R8bIWSwXMaPbp79PppwZ1eJhFco=
 github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
 github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
-github.com/hajimehoshi/oto/v2 v2.3.1 h1:qrLKpNus2UfD674oxckKjNJmesp9hMh7u7QCrStB3Rc=
 github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
@@ -71,8 +64,6 @@ github.com/neurosnap/sentences v1.1.2 h1:iphYOzx/XckXeBiLIUBkPu2EKMJ+6jDbz/sLJZ7
 github.com/neurosnap/sentences v1.1.2/go.mod h1:/pwU4E9XNL21ygMIkOIllv/SMy2ujHwpf8GQPu1YPbQ=
 github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
 github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/playwright-community/playwright-go v0.5700.1 h1:PNFb1byWqrTT720rEO0JL88C6Ju0EmUnR5deFLvtP/U=
 github.com/playwright-community/playwright-go v0.5700.1/go.mod h1:MlSn1dZrx8rszbCxY6x3qK89ZesJUYVx21B2JnkoNF0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-- 
cgit v1.2.3


From c5a24b2a3f30fe60888702b09e409647616c18d0 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sat, 7 Mar 2026 16:37:09 +0300
Subject: Enha: google-tts replay speed

---
 extra/google_tts.go | 11 +++++++++--
 extra/tts.go        |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/extra/google_tts.go b/extra/google_tts.go
index 5b46f34..782075d 100644
--- a/extra/google_tts.go
+++ b/extra/google_tts.go
@@ -27,6 +27,7 @@ type GoogleTranslateOrator struct {
 	// text buffer and interrupt flag
 	textBuffer strings.Builder
 	interrupt  bool
+	Speed      float32
 }
 
 func (o *GoogleTranslateOrator) stoproutine() {
@@ -141,8 +142,14 @@ func (o *GoogleTranslateOrator) Speak(text string) error {
 	// Wrap in io.NopCloser since GenerateSpeech returns io.Reader (no close needed)
 	body := io.NopCloser(reader)
 	defer body.Close()
-	// Exactly the same ffplay piping as KokoroOrator
-	cmd := exec.Command("ffplay", "-nodisp", "-autoexit", "-i", "pipe:0")
+	// Build ffplay command with optional speed filter
+	args := []string{"-nodisp", "-autoexit"}
+	if o.Speed > 0.1 && o.Speed != 1.0 {
+		// atempo range is 0.5 to 2.0; you might clamp it here
+		args = append(args, "-af", fmt.Sprintf("atempo=%.2f", o.Speed))
+	}
+	args = append(args, "-i", "pipe:0")
+	cmd := exec.Command("ffplay", args...)
 	stdin, err := cmd.StdinPipe()
 	if err != nil {
 		return fmt.Errorf("failed to get stdin pipe: %w", err)
diff --git a/extra/tts.go b/extra/tts.go
index 80085ab..2ddb0ae 100644
--- a/extra/tts.go
+++ b/extra/tts.go
@@ -60,6 +60,7 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
 		orator := &GoogleTranslateOrator{
 			logger: log,
 			speech: speech,
+			Speed:  cfg.TTS_SPEED,
 		}
 		go orator.readroutine()
 		go orator.stoproutine()
-- 
cgit v1.2.3


From c8f00198d6f0ad66269753252f56485ee346d413 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sat, 7 Mar 2026 18:13:11 +0300
Subject: Dep (stt): use ffmpeg instead of portaudio

---
 Makefile                     |   7 +-
 batteries/docker-compose.yml |  20 ++-
 extra/stt.go                 | 132 ---------------
 extra/whisper_binary.go      | 382 +++++++++++++------------------------------
 extra/whisper_server.go      | 156 ++++++++++++++++++
 go.mod                       |   1 -
 go.sum                       |   2 -
 7 files changed, 288 insertions(+), 412 deletions(-)
 create mode 100644 extra/whisper_server.go

diff --git a/Makefile b/Makefile
index 78db940..1490074 100644
--- a/Makefile
+++ b/Makefile
@@ -143,11 +143,10 @@ build-whisper: ## Build whisper.cpp from source in batteries directory
 
 download-whisper-model: ## Download Whisper model for STT in batteries directory
 	@echo "Downloading Whisper model for STT..."
-	@if [ ! -d "batteries/whisper.cpp" ]; then \
-		echo "Please run 'make setup-whisper' first to clone the repository."; \
-		exit 1; \
+	@if [ ! -d "batteries/whisper.cpp/models" ]; then \
+		mkdir -p "batteries/whisper.cpp/models" \
 	fi
-	@cd batteries/whisper.cpp && bash ./models/download-ggml-model.sh large-v3-turbo-q5_0
+	curl -o batteries/whisper.cpp/models/ggml-large-v3-turbo-q5_0.bin -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin?download=true"
 	@echo "Whisper model downloaded successfully!"
 
 # Docker targets for STT/TTS services (in batteries directory)
diff --git a/batteries/docker-compose.yml b/batteries/docker-compose.yml
index 7cf401b..84b2262 100644
--- a/batteries/docker-compose.yml
+++ b/batteries/docker-compose.yml
@@ -6,19 +6,27 @@ services:
     ports:
       - "8081:8081"
     volumes:
-      - whisper_models:/app/models
+      - ./whisper.cpp/models/ggml-large-v3-turbo-q5_0.bin:/app/models/ggml-large-v3-turbo-q5_0.bin
     working_dir: /app
     entrypoint: ""
     command: >
       sh -c "
-      if [ ! -f /app/models/ggml-large-v3-turbo.bin ]; then
-        echo 'Downloading ggml-large-v3-turbo model...'
-        ./download-ggml-model.sh large-v3-turbo /app/models
+      if [ ! -f /app/models/ggml-large-v3-turbo-q5_0.bin ]; then
+        echo 'Downloading ggml-large-v3-turboq5_0 model...'
+      	curl -o /app/models/ggml-large-v3-turbo-q5_0.bin -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin?download=true"
       fi &&
-      ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo.bin -t 4 -p 1 --port 8081 --host 0.0.0.0
+      ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo-q5_0.bin -t 4 -p 1 --port 8081 --host 0.0.0.0
       "
     environment:
       - WHISPER_LOG_LEVEL=3
+    # For GPU support, uncomment the following lines:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
     # Restart policy in case the service fails
     restart: unless-stopped
 
@@ -45,7 +53,5 @@ services:
 volumes:
   models:
     driver: local
-  audio:
-    driver: local
   whisper_models:
     driver: local
diff --git a/extra/stt.go b/extra/stt.go
index 86fcf9c..7bbf2fd 100644
--- a/extra/stt.go
+++ b/extra/stt.go
@@ -6,18 +6,10 @@ package extra
 import (
 	"bytes"
 	"encoding/binary"
-	"errors"
-	"fmt"
 	"gf-lt/config"
 	"io"
 	"log/slog"
-	"mime/multipart"
-	"net/http"
 	"regexp"
-	"strings"
-	"syscall"
-
-	"github.com/gordonklaus/portaudio"
 )
 
 var specialRE = regexp.MustCompile(`\[.*?\]`)
@@ -44,14 +36,6 @@ func NewSTT(logger *slog.Logger, cfg *config.Config) STT {
 	return NewWhisperServer(logger, cfg)
 }
 
-type WhisperServer struct {
-	logger      *slog.Logger
-	ServerURL   string
-	SampleRate  int
-	AudioBuffer *bytes.Buffer
-	recording   bool
-}
-
 func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
 	return &WhisperServer{
 		logger:      logger,
@@ -61,69 +45,6 @@ func NewWhisperServer(logger *slog.Logger, cfg *config.Config) *WhisperServer {
 	}
 }
 
-func (stt *WhisperServer) StartRecording() error {
-	if err := stt.microphoneStream(stt.SampleRate); err != nil {
-		return fmt.Errorf("failed to init microphone: %w", err)
-	}
-	stt.recording = true
-	return nil
-}
-
-func (stt *WhisperServer) StopRecording() (string, error) {
-	stt.recording = false
-	// wait loop to finish?
-	if stt.AudioBuffer == nil {
-		err := errors.New("unexpected nil AudioBuffer")
-		stt.logger.Error(err.Error())
-		return "", err
-	}
-	// Create WAV header first
-	body := &bytes.Buffer{}
-	writer := multipart.NewWriter(body)
-	// Add audio file part
-	part, err := writer.CreateFormFile("file", "recording.wav")
-	if err != nil {
-		stt.logger.Error("fn: StopRecording", "error", err)
-		return "", err
-	}
-	// Stream directly to multipart writer: header + raw data
-	dataSize := stt.AudioBuffer.Len()
-	stt.writeWavHeader(part, dataSize)
-	if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
-		stt.logger.Error("fn: StopRecording", "error", err)
-		return "", err
-	}
-	// Reset buffer for next recording
-	stt.AudioBuffer.Reset()
-	// Add response format field
-	err = writer.WriteField("response_format", "text")
-	if err != nil {
-		stt.logger.Error("fn: StopRecording", "error", err)
-		return "", err
-	}
-	if writer.Close() != nil {
-		stt.logger.Error("fn: StopRecording", "error", err)
-		return "", err
-	}
-	// Send request
-	resp, err := http.Post(stt.ServerURL, writer.FormDataContentType(), body) //nolint:noctx
-	if err != nil {
-		stt.logger.Error("fn: StopRecording", "error", err)
-		return "", err
-	}
-	defer resp.Body.Close()
-	// Read and print response
-	responseTextBytes, err := io.ReadAll(resp.Body)
-	if err != nil {
-		stt.logger.Error("fn: StopRecording", "error", err)
-		return "", err
-	}
-	resptext := strings.TrimRight(string(responseTextBytes), "\n")
-	// in case there are special tokens like [_BEG_]
-	resptext = specialRE.ReplaceAllString(resptext, "")
-	return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
-}
-
 func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
 	header := make([]byte, 44)
 	copy(header[0:4], "RIFF")
@@ -147,56 +68,3 @@ func (stt *WhisperServer) writeWavHeader(w io.Writer, dataSize int) {
 func (stt *WhisperServer) IsRecording() bool {
 	return stt.recording
 }
-
-func (stt *WhisperServer) microphoneStream(sampleRate int) error {
-	// Temporarily redirect stderr to suppress ALSA warnings during PortAudio init
-	origStderr, errDup := syscall.Dup(syscall.Stderr)
-	if errDup != nil {
-		return fmt.Errorf("failed to dup stderr: %w", errDup)
-	}
-	nullFD, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
-	if err != nil {
-		_ = syscall.Close(origStderr) // Close the dup'd fd if open fails
-		return fmt.Errorf("failed to open /dev/null: %w", err)
-	}
-	// redirect stderr
-	_ = syscall.Dup2(nullFD, syscall.Stderr)
-	// Initialize PortAudio (this is where ALSA warnings occur)
-	defer func() {
-		// Restore stderr
-		_ = syscall.Dup2(origStderr, syscall.Stderr)
-		_ = syscall.Close(origStderr)
-		_ = syscall.Close(nullFD)
-	}()
-	if err := portaudio.Initialize(); err != nil {
-		return fmt.Errorf("portaudio init failed: %w", err)
-	}
-	in := make([]int16, 64)
-	stream, err := portaudio.OpenDefaultStream(1, 0, float64(sampleRate), len(in), in)
-	if err != nil {
-		if paErr := portaudio.Terminate(); paErr != nil {
-			return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
-		}
-		return fmt.Errorf("failed to open microphone: %w", err)
-	}
-	go func(stream *portaudio.Stream) {
-		if err := stream.Start(); err != nil {
-			stt.logger.Error("microphoneStream", "error", err)
-			return
-		}
-		for {
-			if !stt.IsRecording() {
-				return
-			}
-			if err := stream.Read(); err != nil {
-				stt.logger.Error("reading stream", "error", err)
-				return
-			}
-			if err := binary.Write(stt.AudioBuffer, binary.LittleEndian, in); err != nil {
-				stt.logger.Error("writing to buffer", "error", err)
-				return
-			}
-		}
-	}(stream)
-	return nil
-}
diff --git a/extra/whisper_binary.go b/extra/whisper_binary.go
index 6b7ddc8..1c35952 100644
--- a/extra/whisper_binary.go
+++ b/extra/whisper_binary.go
@@ -9,15 +9,13 @@ import (
 	"errors"
 	"fmt"
 	"gf-lt/config"
-	"io"
 	"log/slog"
 	"os"
 	"os/exec"
 	"strings"
 	"sync"
 	"syscall"
-
-	"github.com/gordonklaus/portaudio"
+	"time"
 )
 
 type WhisperBinary struct {
@@ -25,24 +23,14 @@ type WhisperBinary struct {
 	whisperPath string
 	modelPath   string
 	lang        string
-	ctx         context.Context
-	cancel      context.CancelFunc
-	mu          sync.Mutex
-	recording   bool
-	audioBuffer []int16
-}
-
-func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
-	ctx, cancel := context.WithCancel(context.Background())
-	// Set ALSA error handler first
-	return &WhisperBinary{
-		logger:      logger,
-		whisperPath: cfg.WhisperBinaryPath,
-		modelPath:   cfg.WhisperModelPath,
-		lang:        cfg.STT_LANG,
-		ctx:         ctx,
-		cancel:      cancel,
-	}
+	// Per-recording fields (protected by mu)
+	mu        sync.Mutex
+	recording bool
+	tempFile  string
+	ctx       context.Context
+	cancel    context.CancelFunc
+	cmd       *exec.Cmd
+	cmdMu     sync.Mutex
 }
 
 func (w *WhisperBinary) StartRecording() error {
@@ -51,276 +39,138 @@ func (w *WhisperBinary) StartRecording() error {
 	if w.recording {
 		return errors.New("recording is already in progress")
 	}
-	// If context is cancelled, create a new one for the next recording session
-	if w.ctx.Err() != nil {
-		w.logger.Debug("Context cancelled, creating new context")
-		w.ctx, w.cancel = context.WithCancel(context.Background())
-	}
-	// Temporarily redirect stderr to suppress ALSA warnings during PortAudio init
-	origStderr, errDup := syscall.Dup(syscall.Stderr)
-	if errDup != nil {
-		return fmt.Errorf("failed to dup stderr: %w", errDup)
-	}
-	nullFD, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
+	// Fresh context for this recording
+	ctx, cancel := context.WithCancel(context.Background())
+	w.ctx = ctx
+	w.cancel = cancel
+	// Create temporary file
+	tempFile, err := os.CreateTemp("", "recording_*.wav")
 	if err != nil {
-		_ = syscall.Close(origStderr) // Close the dup'd fd if open fails
-		return fmt.Errorf("failed to open /dev/null: %w", err)
-	}
-	// redirect stderr
-	_ = syscall.Dup2(nullFD, syscall.Stderr)
-	// Initialize PortAudio (this is where ALSA warnings occur)
-	portaudioErr := portaudio.Initialize()
-	defer func() {
-		// Restore stderr
-		_ = syscall.Dup2(origStderr, syscall.Stderr)
-		_ = syscall.Close(origStderr)
-		_ = syscall.Close(nullFD)
-	}()
-	if portaudioErr != nil {
-		return fmt.Errorf("portaudio init failed: %w", portaudioErr)
-	}
-	// Initialize audio buffer
-	w.audioBuffer = make([]int16, 0)
-	in := make([]int16, 1024) // buffer size
-	stream, err := portaudio.OpenDefaultStream(1, 0, 16000.0, len(in), in)
+		cancel()
+		return fmt.Errorf("failed to create temp file: %w", err)
+	}
+	tempFile.Close()
+	w.tempFile = tempFile.Name()
+	// ffmpeg command: capture from default microphone, write WAV
+	args := []string{
+		"-f", "alsa", // or "pulse" if preferred
+		"-i", "default",
+		"-acodec", "pcm_s16le",
+		"-ar", "16000",
+		"-ac", "1",
+		"-y", // overwrite output file
+		w.tempFile,
+	}
+	cmd := exec.CommandContext(w.ctx, "ffmpeg", args...)
+	// Capture stderr for debugging (optional, but useful for diagnosing)
+	stderr, err := cmd.StderrPipe()
 	if err != nil {
-		if paErr := portaudio.Terminate(); paErr != nil {
-			return fmt.Errorf("failed to open microphone: %w; terminate error: %w", err, paErr)
-		}
-		return fmt.Errorf("failed to open microphone: %w", err)
-	}
-	go w.recordAudio(stream, in)
-	w.recording = true
-	w.logger.Debug("Recording started")
-	return nil
-}
-
-func (w *WhisperBinary) recordAudio(stream *portaudio.Stream, in []int16) {
-	defer func() {
-		w.logger.Debug("recordAudio defer function called")
-		_ = stream.Stop()         // Stop the stream
-		_ = portaudio.Terminate() // ignoring error as we're shutting down
-		w.logger.Debug("recordAudio terminated")
-	}()
-	w.logger.Debug("Starting audio stream")
-	if err := stream.Start(); err != nil {
-		w.logger.Error("Failed to start audio stream", "error", err)
-		return
-	}
-	w.logger.Debug("Audio stream started, entering recording loop")
-	for {
-		select {
-		case <-w.ctx.Done():
-			w.logger.Debug("Context done, exiting recording loop")
-			return
-		default:
-			// Check recording status with minimal lock time
-			w.mu.Lock()
-			recording := w.recording
-			w.mu.Unlock()
-
-			if !recording {
-				w.logger.Debug("Recording flag is false, exiting recording loop")
-				return
+		cancel()
+		os.Remove(w.tempFile)
+		return fmt.Errorf("failed to create stderr pipe: %w", err)
+	}
+	go func() {
+		buf := make([]byte, 1024)
+		for {
+			n, err := stderr.Read(buf)
+			if n > 0 {
+				w.logger.Debug("ffmpeg stderr", "output", string(buf[:n]))
 			}
-			if err := stream.Read(); err != nil {
-				w.logger.Error("Error reading from stream", "error", err)
-				return
+			if err != nil {
+				break
 			}
-			// Append samples to buffer - only acquire lock when necessary
-			w.mu.Lock()
-			if w.audioBuffer == nil {
-				w.audioBuffer = make([]int16, 0)
-			}
-			// Make a copy of the input buffer to avoid overwriting
-			tempBuffer := make([]int16, len(in))
-			copy(tempBuffer, in)
-			w.audioBuffer = append(w.audioBuffer, tempBuffer...)
-			w.mu.Unlock()
 		}
+	}()
+	w.cmdMu.Lock()
+	w.cmd = cmd
+	w.cmdMu.Unlock()
+	if err := cmd.Start(); err != nil {
+		cancel()
+		os.Remove(w.tempFile)
+		return fmt.Errorf("failed to start ffmpeg: %w", err)
 	}
+	w.recording = true
+	w.logger.Debug("Recording started", "file", w.tempFile)
+	return nil
 }
 
 func (w *WhisperBinary) StopRecording() (string, error) {
-	w.logger.Debug("StopRecording called")
 	w.mu.Lock()
+	defer w.mu.Unlock()
 	if !w.recording {
-		w.mu.Unlock()
 		return "", errors.New("not currently recording")
 	}
-	w.logger.Debug("Setting recording to false and cancelling context")
 	w.recording = false
-	w.cancel() // This will stop the recording goroutine
-	w.mu.Unlock()
-	// // Small delay to allow the recording goroutine to react to context cancellation
-	// time.Sleep(20 * time.Millisecond)
-	// Save the recorded audio to a temporary file
-	tempFile, err := w.saveAudioToTempFile()
-	if err != nil {
-		w.logger.Error("Error saving audio to temp file", "error", err)
-		return "", fmt.Errorf("failed to save audio to temp file: %w", err)
-	}
-	w.logger.Debug("Saved audio to temp file", "file", tempFile)
-	// Run the whisper binary with a separate context to avoid cancellation during transcription
-	cmd := exec.Command(w.whisperPath, "-m", w.modelPath, "-l", w.lang, tempFile, "2>/dev/null")
-	var outBuf bytes.Buffer
-	cmd.Stdout = &outBuf
-	// Redirect stderr to suppress ALSA warnings and other stderr output
-	cmd.Stderr = io.Discard // Suppress stderr output from whisper binary
-	w.logger.Debug("Running whisper binary command")
-	if err := cmd.Run(); err != nil {
-		// Clean up audio buffer
-		w.mu.Lock()
-		w.audioBuffer = nil
-		w.mu.Unlock()
-		// Since we're suppressing stderr, we'll just log that the command failed
-		w.logger.Error("Error running whisper binary", "error", err)
-		return "", fmt.Errorf("whisper binary failed: %w", err)
+	// Gracefully stop ffmpeg
+	w.cmdMu.Lock()
+	if w.cmd != nil && w.cmd.Process != nil {
+		w.logger.Debug("Sending SIGTERM to ffmpeg")
+		w.cmd.Process.Signal(syscall.SIGTERM)
+		// Wait for process to exit (up to 2 seconds)
+		done := make(chan error, 1)
+		go func() {
+			done <- w.cmd.Wait()
+		}()
+		select {
+		case <-done:
+			w.logger.Debug("ffmpeg exited after SIGTERM")
+		case <-time.After(2 * time.Second):
+			w.logger.Warn("ffmpeg did not exit, sending SIGKILL")
+			w.cmd.Process.Kill()
+			<-done
+		}
 	}
-	result := outBuf.String()
-	w.logger.Debug("Whisper binary completed", "result", result)
-	// Clean up audio buffer
-	w.mu.Lock()
-	w.audioBuffer = nil
-	w.mu.Unlock()
-	// Clean up the temporary file after transcription
-	w.logger.Debug("StopRecording completed")
-	os.Remove(tempFile)
-	result = strings.TrimRight(result, "\n")
-	// in case there are special tokens like [_BEG_]
-	result = specialRE.ReplaceAllString(result, "")
-	return strings.TrimSpace(strings.ReplaceAll(result, "\n ", "\n")), nil
-}
-
-// saveAudioToTempFile saves the recorded audio data to a temporary WAV file
-func (w *WhisperBinary) saveAudioToTempFile() (string, error) {
-	w.logger.Debug("saveAudioToTempFile called")
-	// Create temporary WAV file
-	tempFile, err := os.CreateTemp("", "recording_*.wav")
-	if err != nil {
-		w.logger.Error("Failed to create temp file", "error", err)
-		return "", fmt.Errorf("failed to create temp file: %w", err)
+	w.cmdMu.Unlock()
+	// Cancel context (already done, but for cleanliness)
+	if w.cancel != nil {
+		w.cancel()
 	}
-	w.logger.Debug("Created temp file", "file", tempFile.Name())
-	defer tempFile.Close()
-
-	// Write WAV header and data
-	w.logger.Debug("About to write WAV file", "file", tempFile.Name())
-	err = w.writeWAVFile(tempFile.Name())
-	if err != nil {
-		w.logger.Error("Error writing WAV file", "error", err)
-		return "", fmt.Errorf("failed to write WAV file: %w", err)
+	// Validate temp file
+	if w.tempFile == "" {
+		return "", errors.New("no recording file")
 	}
-	w.logger.Debug("WAV file written successfully", "file", tempFile.Name())
-
-	return tempFile.Name(), nil
-}
-
-// writeWAVFile creates a WAV file from the recorded audio data
-func (w *WhisperBinary) writeWAVFile(filename string) error {
-	w.logger.Debug("writeWAVFile called", "filename", filename)
-	// Open file for writing
-	file, err := os.Create(filename)
+	defer os.Remove(w.tempFile)
+	info, err := os.Stat(w.tempFile)
 	if err != nil {
-		w.logger.Error("Error creating file", "error", err)
-		return err
+		return "", fmt.Errorf("failed to stat temp file: %w", err)
 	}
-	defer file.Close()
-
-	w.logger.Debug("About to acquire mutex in writeWAVFile")
-	w.mu.Lock()
-	w.logger.Debug("Locked mutex, copying audio buffer")
-	audioData := make([]int16, len(w.audioBuffer))
-	copy(audioData, w.audioBuffer)
-	w.mu.Unlock()
-	w.logger.Debug("Unlocked mutex", "audio_data_length", len(audioData))
-
-	if len(audioData) == 0 {
-		w.logger.Warn("No audio data to write")
-		return errors.New("no audio data to write")
+	if info.Size() < 44 { // WAV header is 44 bytes
+		// Log ffmpeg stderr? Already captured in debug logs.
+		return "", fmt.Errorf("recording file too small (%d bytes), possibly no audio captured", info.Size())
 	}
-
-	// Calculate data size (number of samples * size of int16)
-	dataSize := len(audioData) * 2 // 2 bytes per int16 sample
-	w.logger.Debug("Calculated data size", "size", dataSize)
-
-	// Write WAV header with the correct data size
-	header := w.createWAVHeader(16000, 1, 16, dataSize)
-	_, err = file.Write(header)
-	if err != nil {
-		w.logger.Error("Error writing WAV header", "error", err)
-		return err
-	}
-	w.logger.Debug("WAV header written successfully")
-
-	// Write audio data
-	w.logger.Debug("About to write audio data samples")
-	for i, sample := range audioData {
-		// Write little-endian 16-bit sample
-		_, err := file.Write([]byte{byte(sample), byte(sample >> 8)})
-		if err != nil {
-			w.logger.Error("Error writing sample", "index", i, "error", err)
-			return err
-		}
-		// Log progress every 10000 samples to avoid too much output
-		if i%10000 == 0 {
-			w.logger.Debug("Written samples", "count", i)
-		}
+	// Run whisper.cpp binary
+	cmd := exec.Command(w.whisperPath, "-m", w.modelPath, "-l", w.lang, w.tempFile)
+	var outBuf, errBuf bytes.Buffer
+	cmd.Stdout = &outBuf
+	cmd.Stderr = &errBuf
+	if err := cmd.Run(); err != nil {
+		w.logger.Error("whisper binary failed",
+			"error", err,
+			"stderr", errBuf.String(),
+			"file_size", info.Size())
+		return "", fmt.Errorf("whisper binary failed: %w (stderr: %s)", err, errBuf.String())
 	}
-	w.logger.Debug("All audio data written successfully")
-
-	return nil
-}
-
-// createWAVHeader creates a WAV file header
-func (w *WhisperBinary) createWAVHeader(sampleRate, channels, bitsPerSample int, dataSize int) []byte {
-	header := make([]byte, 44)
-	copy(header[0:4], "RIFF")
-	// Total file size will be updated later
-	copy(header[8:12], "WAVE")
-	copy(header[12:16], "fmt ")
-	// fmt chunk size (16 for PCM)
-	header[16] = 16
-	header[17] = 0
-	header[18] = 0
-	header[19] = 0
-	// Audio format (1 = PCM)
-	header[20] = 1
-	header[21] = 0
-	// Number of channels
-	header[22] = byte(channels)
-	header[23] = 0
-	// Sample rate
-	header[24] = byte(sampleRate)
-	header[25] = byte(sampleRate >> 8)
-	header[26] = byte(sampleRate >> 16)
-	header[27] = byte(sampleRate >> 24)
-	// Byte rate
-	byteRate := sampleRate * channels * bitsPerSample / 8
-	header[28] = byte(byteRate)
-	header[29] = byte(byteRate >> 8)
-	header[30] = byte(byteRate >> 16)
-	header[31] = byte(byteRate >> 24)
-	// Block align
-	blockAlign := channels * bitsPerSample / 8
-	header[32] = byte(blockAlign)
-	header[33] = 0
-	// Bits per sample
-	header[34] = byte(bitsPerSample)
-	header[35] = 0
-	// "data" subchunk
-	copy(header[36:40], "data")
-	// Data size
-	header[40] = byte(dataSize)
-	header[41] = byte(dataSize >> 8)
-	header[42] = byte(dataSize >> 16)
-	header[43] = byte(dataSize >> 24)
-
-	return header
+	result := strings.TrimRight(outBuf.String(), "\n")
+	result = specialRE.ReplaceAllString(result, "")
+	return strings.TrimSpace(strings.ReplaceAll(result, "\n ", "\n")), nil
 }
 
+// IsRecording returns true if a recording is in progress.
 func (w *WhisperBinary) IsRecording() bool {
 	w.mu.Lock()
 	defer w.mu.Unlock()
 	return w.recording
 }
+
+func NewWhisperBinary(logger *slog.Logger, cfg *config.Config) *WhisperBinary {
+	ctx, cancel := context.WithCancel(context.Background())
+	// Set ALSA error handler first
+	return &WhisperBinary{
+		logger:      logger,
+		whisperPath: cfg.WhisperBinaryPath,
+		modelPath:   cfg.WhisperModelPath,
+		lang:        cfg.STT_LANG,
+		ctx:         ctx,
+		cancel:      cancel,
+	}
+}
diff --git a/extra/whisper_server.go b/extra/whisper_server.go
new file mode 100644
index 0000000..7532f4a
--- /dev/null
+++ b/extra/whisper_server.go
@@ -0,0 +1,156 @@
+//go:build extra
+// +build extra
+
+package extra
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"log/slog"
+	"mime/multipart"
+	"net/http"
+	"os/exec"
+	"strings"
+	"sync"
+)
+
+type WhisperServer struct {
+	logger      *slog.Logger
+	ServerURL   string
+	SampleRate  int
+	AudioBuffer *bytes.Buffer
+	recording   bool          // protected by mu
+	mu          sync.Mutex    // protects recording & AudioBuffer
+	cmd         *exec.Cmd     // protected by cmdMu
+	stopCh      chan struct{} // protected by cmdMu
+	cmdMu       sync.Mutex    // protects cmd and stopCh
+}
+
+func (stt *WhisperServer) StartRecording() error {
+	stt.mu.Lock()
+	defer stt.mu.Unlock()
+	if stt.recording {
+		return nil
+	}
+	// Build ffmpeg command for microphone capture
+	args := []string{
+		"-f", "alsa",
+		"-i", "default",
+		"-acodec", "pcm_s16le",
+		"-ar", fmt.Sprint(stt.SampleRate),
+		"-ac", "1",
+		"-f", "s16le",
+		"-",
+	}
+	cmd := exec.Command("ffmpeg", args...)
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return fmt.Errorf("failed to get stdout pipe: %w", err)
+	}
+	stt.cmdMu.Lock()
+	stt.cmd = cmd
+	stt.stopCh = make(chan struct{})
+	stt.cmdMu.Unlock()
+	if err := cmd.Start(); err != nil {
+		return fmt.Errorf("failed to start ffmpeg: %w", err)
+	}
+	stt.recording = true
+	stt.AudioBuffer.Reset()
+	// Read PCM data in goroutine
+	go func() {
+		buf := make([]byte, 4096)
+		for {
+			select {
+			case <-stt.stopCh:
+				return
+			default:
+				n, err := stdout.Read(buf)
+				if n > 0 {
+					stt.mu.Lock()
+					stt.AudioBuffer.Write(buf[:n])
+					stt.mu.Unlock()
+				}
+				if err != nil {
+					if err != io.EOF {
+						stt.logger.Error("recording read error", "error", err)
+					}
+					return
+				}
+			}
+		}
+	}()
+	return nil
+}
+
+func (stt *WhisperServer) StopRecording() (string, error) {
+	stt.mu.Lock()
+	defer stt.mu.Unlock()
+	if !stt.recording {
+		return "", errors.New("not recording")
+	}
+	stt.recording = false
+	// Stop ffmpeg
+	stt.cmdMu.Lock()
+	if stt.cmd != nil && stt.cmd.Process != nil {
+		stt.cmd.Process.Kill()
+		stt.cmd.Wait()
+	}
+	close(stt.stopCh)
+	stt.cmdMu.Unlock()
+	// Rest of StopRecording unchanged (WAV header + HTTP upload)
+	// ...
+	stt.recording = false
+	// wait loop to finish?
+	if stt.AudioBuffer == nil {
+		err := errors.New("unexpected nil AudioBuffer")
+		stt.logger.Error(err.Error())
+		return "", err
+	}
+	// Create WAV header first
+	body := &bytes.Buffer{}
+	writer := multipart.NewWriter(body)
+	// Add audio file part
+	part, err := writer.CreateFormFile("file", "recording.wav")
+	if err != nil {
+		stt.logger.Error("fn: StopRecording", "error", err)
+		return "", err
+	}
+	// Stream directly to multipart writer: header + raw data
+	dataSize := stt.AudioBuffer.Len()
+	stt.writeWavHeader(part, dataSize)
+	if _, err := io.Copy(part, stt.AudioBuffer); err != nil {
+		stt.logger.Error("fn: StopRecording", "error", err)
+		return "", err
+	}
+	// Reset buffer for next recording
+	stt.AudioBuffer.Reset()
+	// Add response format field
+	err = writer.WriteField("response_format", "text")
+	if err != nil {
+		stt.logger.Error("fn: StopRecording", "error", err)
+		return "", err
+	}
+	if writer.Close() != nil {
+		stt.logger.Error("fn: StopRecording", "error", err)
+		return "", err
+	}
+	// Send request
+	resp, err := http.Post(stt.ServerURL, writer.FormDataContentType(), body) //nolint:noctx
+	if err != nil {
+		stt.logger.Error("fn: StopRecording", "error", err)
+		return "", err
+	}
+	defer resp.Body.Close()
+	// Read and print response
+	responseTextBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		stt.logger.Error("fn: StopRecording", "error", err)
+		return "", err
+	}
+	resptext := strings.TrimRight(string(responseTextBytes), "\n")
+	// in case there are special tokens like [_BEG_]
+	resptext = specialRE.ReplaceAllString(resptext, "")
+	return strings.TrimSpace(strings.ReplaceAll(resptext, "\n ", "\n")), nil
+}
diff --git a/go.mod b/go.mod
index 17609a4..615390f 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,6 @@ require (
 	github.com/PuerkitoBio/goquery v1.11.0
 	github.com/gdamore/tcell/v2 v2.13.2
 	github.com/glebarez/go-sqlite v1.22.0
-	github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b
 	github.com/jmoiron/sqlx v1.4.0
 	github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728
 	github.com/neurosnap/sentences v1.1.2
diff --git a/go.sum b/go.sum
index 565947e..6c36a06 100644
--- a/go.sum
+++ b/go.sum
@@ -37,8 +37,6 @@ github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17k
 github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b h1:WEuQWBxelOGHA6z9lABqaMLMrfwVyMdN3UgRLT+YUPo=
-github.com/gordonklaus/portaudio v0.0.0-20250206071425-98a94950218b/go.mod h1:esZFQEUwqC+l76f2R8bIWSwXMaPbp79PppwZ1eJhFco=
 github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
 github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
 github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
-- 
cgit v1.2.3


From bf655a10875630a6fe5f283340b6d390a1920b58 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sat, 7 Mar 2026 18:42:12 +0300
Subject: Enha: llama.cpp on non localhost

---
 bot.go       | 37 ++++++++++++++++---------------------
 helpfuncs.go |  7 ++-----
 llm.go       |  9 +++++++--
 3 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/bot.go b/bot.go
index ad52059..663dd0b 100644
--- a/bot.go
+++ b/bot.go
@@ -16,7 +16,6 @@ import (
 	"log/slog"
 	"net"
 	"net/http"
-	"net/url"
 	"os"
 	"regexp"
 	"slices"
@@ -253,12 +252,7 @@ func createClient(connectTimeout time.Duration) *http.Client {
 }
 
 func warmUpModel() {
-	u, err := url.Parse(cfg.CurrentAPI)
-	if err != nil {
-		return
-	}
-	host := u.Hostname()
-	if host != "localhost" && host != "127.0.0.1" && host != "::1" {
+	if !isLocalLlamacpp() {
 		return
 	}
 	// Check if model is already loaded
@@ -1404,20 +1398,21 @@ func updateModelLists() {
 		time.Sleep(time.Millisecond * 100)
 	}
 	// set already loaded model in llama.cpp
-	if strings.Contains(cfg.CurrentAPI, "localhost") || strings.Contains(cfg.CurrentAPI, "127.0.0.1") {
-		localModelsMu.Lock()
-		defer localModelsMu.Unlock()
-		for i := range LocalModels {
-			if strings.Contains(LocalModels[i], models.LoadedMark) {
-				m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
-				cfg.CurrentModel = m
-				chatBody.Model = m
-				cachedModelColor = "green"
-				updateStatusLine()
-				updateToolCapabilities()
-				app.Draw()
-				return
-			}
+	if !isLocalLlamacpp() {
+		return
+	}
+	localModelsMu.Lock()
+	defer localModelsMu.Unlock()
+	for i := range LocalModels {
+		if strings.Contains(LocalModels[i], models.LoadedMark) {
+			m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
+			cfg.CurrentModel = m
+			chatBody.Model = m
+			cachedModelColor = "green"
+			updateStatusLine()
+			updateToolCapabilities()
+			app.Draw()
+			return
 		}
 	}
 }
diff --git a/helpfuncs.go b/helpfuncs.go
index b94e672..370f4de 100644
--- a/helpfuncs.go
+++ b/helpfuncs.go
@@ -5,7 +5,6 @@ import (
 	"gf-lt/models"
 	"gf-lt/pngmeta"
 	"image"
-	"net/url"
 	"os"
 	"os/exec"
 	"path"
@@ -323,12 +322,10 @@ func strInSlice(s string, sl []string) bool {
 
 // isLocalLlamacpp checks if the current API is a local llama.cpp instance.
 func isLocalLlamacpp() bool {
-	u, err := url.Parse(cfg.CurrentAPI)
-	if err != nil {
+	if strings.Contains(cfg.CurrentAPI, "openrouter") || strings.Contains(cfg.CurrentAPI, "deepseek") {
 		return false
 	}
-	host := u.Hostname()
-	return host == "localhost" || host == "127.0.0.1" || host == "::1"
+	return true
 }
 
 // getModelColor returns the cached color tag for the model name.
diff --git a/llm.go b/llm.go
index eaa0df8..0e77bc9 100644
--- a/llm.go
+++ b/llm.go
@@ -62,11 +62,11 @@ type ChunkParser interface {
 func choseChunkParser() {
 	chunkParser = LCPCompletion{}
 	switch cfg.CurrentAPI {
-	case "http://localhost:8080/completion":
+	case "http://localhost:8080/completion", "http://127.0.0.1:8080/completion":
 		chunkParser = LCPCompletion{}
 		logger.Debug("chosen lcpcompletion", "link", cfg.CurrentAPI)
 		return
-	case "http://localhost:8080/v1/chat/completions":
+	case "http://localhost:8080/v1/chat/completions", "http://127.0.0.1:8080/v1/chat/completions":
 		chunkParser = LCPChat{}
 		logger.Debug("chosen lcpchat", "link", cfg.CurrentAPI)
 		return
@@ -87,6 +87,11 @@ func choseChunkParser() {
 		logger.Debug("chosen openrouterchat", "link", cfg.CurrentAPI)
 		return
 	default:
+		logger.Warn("unexpected case, assuming llama.cpp on non default address", "link", cfg.CurrentAPI)
+		if strings.Contains(cfg.CurrentAPI, "chat") {
+			chunkParser = LCPChat{}
+			return
+		}
 		chunkParser = LCPCompletion{}
 	}
 }
-- 
cgit v1.2.3


From 4f0bce50c53267a9f53938ad1b264d5094a08ce4 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sat, 7 Mar 2026 19:11:13 +0300
Subject: Chore: one init for clear call order

---
 bot.go       | 52 ++--------------------------------------------------
 helpfuncs.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 tools.go     | 55 +------------------------------------------------------
 tui.go       |  3 ++-
 4 files changed, 54 insertions(+), 105 deletions(-)

diff --git a/bot.go b/bot.go
index 663dd0b..0b4328f 100644
--- a/bot.go
+++ b/bot.go
@@ -1548,55 +1548,7 @@ func init() {
 	}
 	// Initialize scrollToEndEnabled based on config
 	scrollToEndEnabled = cfg.AutoScrollEnabled
-	go updateModelLists()
 	go chatWatcher(ctx)
-}
-
-func getValidKnowToRecipient(msg *models.RoleMsg) (string, bool) {
-	if cfg == nil || !cfg.CharSpecificContextEnabled {
-		return "", false
-	}
-	// case where all roles are in the tag => public message
-	cr := listChatRoles()
-	slices.Sort(cr)
-	slices.Sort(msg.KnownTo)
-	if slices.Equal(cr, msg.KnownTo) {
-		logger.Info("got msg with tag mentioning every role")
-		return "", false
-	}
-	// Check each character in the KnownTo list
-	for _, recipient := range msg.KnownTo {
-		if recipient == msg.Role || recipient == cfg.ToolRole {
-			// weird cases, skip
-			continue
-		}
-		// Skip if this is the user character (user handles their own turn)
-		// If user is in KnownTo, stop processing - it's the user's turn
-		if recipient == cfg.UserRole || recipient == cfg.WriteNextMsgAs {
-			return "", false
-		}
-		return recipient, true
-	}
-	return "", false
-}
-
-// triggerPrivateMessageResponses checks if a message was sent privately to specific characters
-// and triggers those non-user characters to respond
-func triggerPrivateMessageResponses(msg *models.RoleMsg) {
-	recipient, ok := getValidKnowToRecipient(msg)
-	if !ok || recipient == "" {
-		return
-	}
-	// Trigger the recipient character to respond
-	triggerMsg := recipient + ":\n"
-	// Send empty message so LLM continues naturally from the conversation
-	crr := &models.ChatRoundReq{
-		UserMsg: triggerMsg,
-		Role:    recipient,
-		Resume:  true,
-	}
-	fmt.Fprintf(textView, "\n[-:-:b](%d) ", len(chatBody.Messages))
-	fmt.Fprint(textView, roleToIcon(recipient))
-	fmt.Fprint(textView, "[-:-:-]\n")
-	chatRoundChan <- crr
+	initTUI()
+	initTools()
 }
diff --git a/helpfuncs.go b/helpfuncs.go
index 370f4de..178406d 100644
--- a/helpfuncs.go
+++ b/helpfuncs.go
@@ -964,3 +964,52 @@ func extractDisplayPath(p, bp string) string {
 	}
 	return p
 }
+
+func getValidKnowToRecipient(msg *models.RoleMsg) (string, bool) {
+	if cfg == nil || !cfg.CharSpecificContextEnabled {
+		return "", false
+	}
+	// case where all roles are in the tag => public message
+	cr := listChatRoles()
+	slices.Sort(cr)
+	slices.Sort(msg.KnownTo)
+	if slices.Equal(cr, msg.KnownTo) {
+		logger.Info("got msg with tag mentioning every role")
+		return "", false
+	}
+	// Check each character in the KnownTo list
+	for _, recipient := range msg.KnownTo {
+		if recipient == msg.Role || recipient == cfg.ToolRole {
+			// weird cases, skip
+			continue
+		}
+		// Skip if this is the user character (user handles their own turn)
+		// If user is in KnownTo, stop processing - it's the user's turn
+		if recipient == cfg.UserRole || recipient == cfg.WriteNextMsgAs {
+			return "", false
+		}
+		return recipient, true
+	}
+	return "", false
+}
+
+// triggerPrivateMessageResponses checks if a message was sent privately to specific characters
+// and triggers those non-user characters to respond
+func triggerPrivateMessageResponses(msg *models.RoleMsg) {
+	recipient, ok := getValidKnowToRecipient(msg)
+	if !ok || recipient == "" {
+		return
+	}
+	// Trigger the recipient character to respond
+	triggerMsg := recipient + ":\n"
+	// Send empty message so LLM continues naturally from the conversation
+	crr := &models.ChatRoundReq{
+		UserMsg: triggerMsg,
+		Role:    recipient,
+		Resume:  true,
+	}
+	fmt.Fprintf(textView, "\n[-:-:b](%d) ", len(chatBody.Messages))
+	fmt.Fprint(textView, roleToIcon(recipient))
+	fmt.Fprint(textView, "[-:-:-]\n")
+	chatRoundChan <- crr
+}
diff --git a/tools.go b/tools.go
index 3e5d402..e66533a 100644
--- a/tools.go
+++ b/tools.go
@@ -207,7 +207,7 @@ var (
 	modelHasVision       bool
 )
 
-func init() {
+func initTools() {
 	sysMap[basicCard.ID] = basicCard
 	roleToID["assistant"] = basicCard.ID
 	sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
@@ -2273,56 +2273,3 @@ var baseTools = []models.Tool{
 		},
 	},
 }
-
-func init() {
-	if windowToolsAvailable {
-		baseTools = append(baseTools,
-			models.Tool{
-				Type: "function",
-				Function: models.ToolFunc{
-					Name:        "list_windows",
-					Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
-					Parameters: models.ToolFuncParams{
-						Type:       "object",
-						Required:   []string{},
-						Properties: map[string]models.ToolArgProps{},
-					},
-				},
-			},
-			models.Tool{
-				Type: "function",
-				Function: models.ToolFunc{
-					Name:        "capture_window",
-					Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
-					Parameters: models.ToolFuncParams{
-						Type:     "object",
-						Required: []string{"window"},
-						Properties: map[string]models.ToolArgProps{
-							"window": models.ToolArgProps{
-								Type:        "string",
-								Description: "window ID or window name (partial match)",
-							},
-						},
-					},
-				},
-			},
-			models.Tool{
-				Type: "function",
-				Function: models.ToolFunc{
-					Name:        "capture_window_and_view",
-					Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
-					Parameters: models.ToolFuncParams{
-						Type:     "object",
-						Required: []string{"window"},
-						Properties: map[string]models.ToolArgProps{
-							"window": models.ToolArgProps{
-								Type:        "string",
-								Description: "window ID or window name (partial match)",
-							},
-						},
-					},
-				},
-			},
-		)
-	}
-}
diff --git a/tui.go b/tui.go
index c6ab392..9c81f7d 100644
--- a/tui.go
+++ b/tui.go
@@ -224,7 +224,7 @@ func showToast(title, message string) {
 	})
 }
 
-func init() {
+func initTUI() {
 	// Start background goroutine to update model color cache
 	startModelColorUpdater()
 	tview.Styles = colorschemes["default"]
@@ -1173,4 +1173,5 @@ func init() {
 		}
 		return event
 	})
+	go updateModelLists()
 }
-- 
cgit v1.2.3


From 23cb8f2578540e698f590bed35f973a22a8c2f90 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 06:45:51 +0300
Subject: Chore: remove AutoCleanToolCallsFromCtx, atomic model color

---
 bot.go           | 19 ++++++++-----------
 config/config.go |  1 -
 docs/config.md   |  3 ---
 helpfuncs.go     | 24 +++++++++++++-----------
 props_table.go   |  3 ---
 tui.go           |  9 ++++-----
 6 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/bot.go b/bot.go
index 0b4328f..315491a 100644
--- a/bot.go
+++ b/bot.go
@@ -851,7 +851,7 @@ out:
 				if thinkingCollapsed {
 					// Show placeholder immediately when thinking starts in collapsed mode
 					fmt.Fprint(textView, "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]")
-					if scrollToEndEnabled {
+					if cfg.AutoScrollEnabled {
 						textView.ScrollToEnd()
 					}
 					respText.WriteString(chunk)
@@ -866,7 +866,7 @@ out:
 						// Thinking already displayed as placeholder, just update respText
 						respText.WriteString(chunk)
 						justExitedThinkingCollapsed = true
-						if scrollToEndEnabled {
+						if cfg.AutoScrollEnabled {
 							textView.ScrollToEnd()
 						}
 						continue
@@ -888,7 +888,7 @@ out:
 			respText.WriteString(chunk)
 			// Update the message in chatBody.Messages so it persists during Alt+T
 			chatBody.Messages[msgIdx].Content = respText.String()
-			if scrollToEndEnabled {
+			if cfg.AutoScrollEnabled {
 				textView.ScrollToEnd()
 			}
 			// Send chunk to audio stream handler
@@ -898,7 +898,7 @@ out:
 		case toolChunk := <-openAIToolChan:
 			fmt.Fprint(textView, toolChunk)
 			toolResp.WriteString(toolChunk)
-			if scrollToEndEnabled {
+			if cfg.AutoScrollEnabled {
 				textView.ScrollToEnd()
 			}
 		case <-streamDone:
@@ -906,7 +906,7 @@ out:
 				chunk := <-chunkChan
 				fmt.Fprint(textView, chunk)
 				respText.WriteString(chunk)
-				if scrollToEndEnabled {
+				if cfg.AutoScrollEnabled {
 					textView.ScrollToEnd()
 				}
 				if cfg.TTS_ENABLED {
@@ -1394,9 +1394,6 @@ func updateModelLists() {
 	localModelsMu.Lock()
 	LocalModels = ml
 	localModelsMu.Unlock()
-	for statusLineWidget == nil {
-		time.Sleep(time.Millisecond * 100)
-	}
 	// set already loaded model in llama.cpp
 	if !isLocalLlamacpp() {
 		return
@@ -1408,7 +1405,7 @@ func updateModelLists() {
 			m := strings.TrimPrefix(LocalModels[i], models.LoadedMark)
 			cfg.CurrentModel = m
 			chatBody.Model = m
-			cachedModelColor = "green"
+			cachedModelColor.Store("green")
 			updateStatusLine()
 			updateToolCapabilities()
 			app.Draw()
@@ -1546,8 +1543,8 @@ func init() {
 			}
 		}
 	}
-	// Initialize scrollToEndEnabled based on config
-	scrollToEndEnabled = cfg.AutoScrollEnabled
+	// atomic default values
+	cachedModelColor.Store("orange")
 	go chatWatcher(ctx)
 	initTUI()
 	initTools()
diff --git a/config/config.go b/config/config.go
index fab3237..e8c2687 100644
--- a/config/config.go
+++ b/config/config.go
@@ -27,7 +27,6 @@ type Config struct {
 	WriteNextMsgAs                string
 	WriteNextMsgAsCompletionAgent string
 	SkipLLMResp                   bool
-	AutoCleanToolCallsFromCtx     bool   `toml:"AutoCleanToolCallsFromCtx"`
 	DBPATH                        string `toml:"DBPATH"`
 	FilePickerDir                 string `toml:"FilePickerDir"`
 	FilePickerExts                string `toml:"FilePickerExts"`
diff --git a/docs/config.md b/docs/config.md
index 6f11d73..fab8261 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -63,9 +63,6 @@ This document explains how to set up and configure the application using the `co
 #### AutoScrollEnabled (`true`)
 - Whether to automatically scroll chat window while llm streams its repsonse.
 
-#### AutoCleanToolCallsFromCtx (`false`)
-- Whether to automatically clean tool calls from the conversation context to manage token usage.
-
 ### RAG (Retrieval Augmented Generation) Settings
 
 #### EmbedURL (`"http://localhost:8082/v1/embeddings"`)
diff --git a/helpfuncs.go b/helpfuncs.go
index 178406d..e28beda 100644
--- a/helpfuncs.go
+++ b/helpfuncs.go
@@ -12,6 +12,7 @@ import (
 	"slices"
 	"strconv"
 	"strings"
+	"sync/atomic"
 	"time"
 	"unicode"
 
@@ -19,7 +20,8 @@ import (
 )
 
 // Cached model color - updated by background goroutine
-var cachedModelColor string = "orange"
+// var cachedModelColor string = "orange"
+var cachedModelColor atomic.Value
 
 // startModelColorUpdater starts a background goroutine that periodically updates
 // the cached model color. Only runs HTTP requests for local llama.cpp APIs.
@@ -38,20 +40,20 @@ func startModelColorUpdater() {
 // updateCachedModelColor updates the global cachedModelColor variable
 func updateCachedModelColor() {
 	if !isLocalLlamacpp() {
-		cachedModelColor = "orange"
+		cachedModelColor.Store("orange")
 		return
 	}
 	// Check if model is loaded
 	loaded, err := isModelLoaded(chatBody.Model)
 	if err != nil {
 		// On error, assume not loaded (red)
-		cachedModelColor = "red"
+		cachedModelColor.Store("red")
 		return
 	}
 	if loaded {
-		cachedModelColor = "green"
+		cachedModelColor.Store("green")
 	} else {
-		cachedModelColor = "red"
+		cachedModelColor.Store("red")
 	}
 }
 
@@ -107,7 +109,7 @@ func refreshChatDisplay() {
 	textView.SetText(displayText)
 	colorText()
 	updateStatusLine()
-	if scrollToEndEnabled {
+	if cfg.AutoScrollEnabled {
 		textView.ScrollToEnd()
 	}
 }
@@ -332,7 +334,7 @@ func isLocalLlamacpp() bool {
 // The cached value is updated by a background goroutine every 5 seconds.
 // For non-local models, returns orange. For local llama.cpp models, returns green if loaded, red if not.
 func getModelColor() string {
-	return cachedModelColor
+	return cachedModelColor.Load().(string)
 }
 
 func makeStatusLine() string {
@@ -539,7 +541,7 @@ func executeCommandAndDisplay(cmdText string) {
 	cmdText = strings.TrimSpace(cmdText)
 	if cmdText == "" {
 		fmt.Fprintf(textView, "\n[red]Error: No command provided[-:-:-]\n")
-		if scrollToEndEnabled {
+		if cfg.AutoScrollEnabled {
 			textView.ScrollToEnd()
 		}
 		colorText()
@@ -571,7 +573,7 @@ func executeCommandAndDisplay(cmdText string) {
 				Content: "$ " + cmdText + "\n\n" + outputContent,
 			}
 			chatBody.Messages = append(chatBody.Messages, combinedMsg)
-			if scrollToEndEnabled {
+			if cfg.AutoScrollEnabled {
 				textView.ScrollToEnd()
 			}
 			colorText()
@@ -586,7 +588,7 @@ func executeCommandAndDisplay(cmdText string) {
 				Content: "$ " + cmdText + "\n\n" + outputContent,
 			}
 			chatBody.Messages = append(chatBody.Messages, combinedMsg)
-			if scrollToEndEnabled {
+			if cfg.AutoScrollEnabled {
 				textView.ScrollToEnd()
 			}
 			colorText()
@@ -634,7 +636,7 @@ func executeCommandAndDisplay(cmdText string) {
 	}
 	chatBody.Messages = append(chatBody.Messages, combinedMsg)
 	// Scroll to end and update colors
-	if scrollToEndEnabled {
+	if cfg.AutoScrollEnabled {
 		textView.ScrollToEnd()
 	}
 	colorText()
diff --git a/props_table.go b/props_table.go
index 5c3d8d7..d1d3680 100644
--- a/props_table.go
+++ b/props_table.go
@@ -121,9 +121,6 @@ func makePropsTable(props map[string]float32) *tview.Table {
 	addCheckboxRow("TTS Enabled", cfg.TTS_ENABLED, func(checked bool) {
 		cfg.TTS_ENABLED = checked
 	})
-	addCheckboxRow("Auto clean tool calls from context", cfg.AutoCleanToolCallsFromCtx, func(checked bool) {
-		cfg.AutoCleanToolCallsFromCtx = checked
-	})
 	addCheckboxRow("Enable Mouse", cfg.EnableMouse, func(checked bool) {
 		cfg.EnableMouse = checked
 		// Reconfigure the app's mouse setting
diff --git a/tui.go b/tui.go
index 9c81f7d..d7ea57f 100644
--- a/tui.go
+++ b/tui.go
@@ -42,7 +42,6 @@ var (
 	confirmPageName    = "confirm"
 	fullscreenMode     bool
 	positionVisible    bool = true
-	scrollToEndEnabled bool = true
 	// pages
 	historyPage    = "historyPage"
 	agentPage      = "agentPage"
@@ -634,7 +633,7 @@ func initTUI() {
 	updateStatusLine()
 	textView.SetText(chatToText(chatBody.Messages, cfg.ShowSys))
 	colorText()
-	if scrollToEndEnabled {
+	if cfg.AutoScrollEnabled {
 		textView.ScrollToEnd()
 	}
 	// init sysmap
@@ -663,9 +662,9 @@ func initTUI() {
 		}
 		if event.Key() == tcell.KeyRune && event.Rune() == '2' && event.Modifiers()&tcell.ModAlt != 0 {
 			// toggle auto-scrolling
-			scrollToEndEnabled = !scrollToEndEnabled
+			cfg.AutoScrollEnabled = !cfg.AutoScrollEnabled
 			status := "disabled"
-			if scrollToEndEnabled {
+			if cfg.AutoScrollEnabled {
 				status = "enabled"
 			}
 			showToast("autoscroll", "Auto-scrolling "+status)
@@ -1139,7 +1138,7 @@ func initTUI() {
 				fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
 					nl, len(chatBody.Messages), persona, msgText)
 				textArea.SetText("", true)
-				if scrollToEndEnabled {
+				if cfg.AutoScrollEnabled {
 					textView.ScrollToEnd()
 				}
 				colorText()
-- 
cgit v1.2.3


From c200c9328c4aa7654dc41c0eac02fe1cc267d666 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 07:13:27 +0300
Subject: Enha: botresp, toolresp to atomic

---
 bot.go  | 28 ++++++++++++++--------------
 main.go |  6 ++++--
 tui.go  | 14 +++++++-------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/bot.go b/bot.go
index 315491a..d01ebb9 100644
--- a/bot.go
+++ b/bot.go
@@ -22,6 +22,7 @@ import (
 	"strconv"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 )
 
@@ -40,7 +41,7 @@ var (
 	store           storage.FullRepo
 	defaultFirstMsg = "Hello! What can I do for you?"
 	defaultStarter  = []models.RoleMsg{}
-	interruptResp   = false
+	interruptResp   atomic.Bool
 	ragger          *rag.RAG
 	chunkParser     ChunkParser
 	lastToolCall    *models.FuncCall
@@ -643,7 +644,7 @@ func sendMsgToLLM(body io.Reader) {
 			// continue
 		}
 		if len(line) <= 1 {
-			if interruptResp {
+			if interruptResp.Load() {
 				goto interrupt // get unstuck from bad connection
 			}
 			continue // skip \n
@@ -736,8 +737,7 @@ func sendMsgToLLM(body io.Reader) {
 			lastToolCall.ID = chunk.ToolID
 		}
 	interrupt:
-		if interruptResp { // read bytes, so it would not get into beginning of the next req
-			// interruptResp = false
+		if interruptResp.Load() { // read bytes, so it would not get into beginning of the next req
 			logger.Info("interrupted bot response", "chunk_counter", counter)
 			streamDone <- true
 			break
@@ -770,14 +770,14 @@ func showSpinner() {
 	if cfg.WriteNextMsgAsCompletionAgent != "" {
 		botPersona = cfg.WriteNextMsgAsCompletionAgent
 	}
-	for botRespMode || toolRunningMode {
+	for botRespMode.Load() || toolRunningMode.Load() {
 		time.Sleep(400 * time.Millisecond)
 		spin := i % len(spinners)
 		app.QueueUpdateDraw(func() {
 			switch {
-			case toolRunningMode:
+			case toolRunningMode.Load():
 				textArea.SetTitle(spinners[spin] + " tool")
-			case botRespMode:
+			case botRespMode.Load():
 				textArea.SetTitle(spinners[spin] + " " + botPersona + " (F6 to interrupt)")
 			default:
 				textArea.SetTitle(spinners[spin] + " input")
@@ -791,8 +791,8 @@ func showSpinner() {
 }
 
 func chatRound(r *models.ChatRoundReq) error {
-	interruptResp = false
-	botRespMode = true
+	interruptResp.Store(false)
+	botRespMode.Store(true)
 	go showSpinner()
 	updateStatusLine()
 	botPersona := cfg.AssistantRole
@@ -800,7 +800,7 @@ func chatRound(r *models.ChatRoundReq) error {
 		botPersona = cfg.WriteNextMsgAsCompletionAgent
 	}
 	defer func() {
-		botRespMode = false
+		botRespMode.Store(false)
 		ClearImageAttachment()
 	}()
 	// check that there is a model set to use if is not local
@@ -928,7 +928,7 @@ out:
 		}
 		lastRespStats = nil
 	}
-	botRespMode = false
+	botRespMode.Store(false)
 	if r.Resume {
 		chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
 		updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
@@ -957,7 +957,7 @@ out:
 	}
 	// Strip think blocks before parsing for tool calls
 	respTextNoThink := thinkBlockRE.ReplaceAllString(respText.String(), "")
-	if interruptResp {
+	if interruptResp.Load() {
 		return nil
 	}
 	if findCall(respTextNoThink, toolResp.String()) {
@@ -1192,9 +1192,9 @@ func findCall(msg, toolCall string) bool {
 	}
 	// Show tool call progress indicator before execution
 	fmt.Fprintf(textView, "\n[yellow::i][tool: %s...][-:-:-]", fc.Name)
-	toolRunningMode = true
+	toolRunningMode.Store(true)
 	resp := callToolWithAgent(fc.Name, fc.Args)
-	toolRunningMode = false
+	toolRunningMode.Store(false)
 	toolMsg := string(resp)
 	logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
 	// Create tool response message with the proper tool_call_id
diff --git a/main.go b/main.go
index fe92327..ddabff8 100644
--- a/main.go
+++ b/main.go
@@ -1,13 +1,15 @@
 package main
 
 import (
+	"sync/atomic"
+
 	"github.com/rivo/tview"
 )
 
 var (
 	boolColors        = map[bool]string{true: "green", false: "red"}
-	botRespMode       = false
-	toolRunningMode   = false
+	botRespMode       atomic.Bool
+	toolRunningMode   atomic.Bool
 	editMode          = false
 	roleEditMode      = false
 	injectRole        = true
diff --git a/tui.go b/tui.go
index d7ea57f..482050a 100644
--- a/tui.go
+++ b/tui.go
@@ -731,7 +731,7 @@ func initTUI() {
 			updateStatusLine()
 			return nil
 		}
-		if event.Key() == tcell.KeyF2 && !botRespMode {
+		if event.Key() == tcell.KeyF2 && !botRespMode.Load() {
 			// regen last msg
 			if len(chatBody.Messages) == 0 {
 				showToast("info", "no messages to regenerate")
@@ -748,7 +748,7 @@ func initTUI() {
 			chatRoundChan <- &models.ChatRoundReq{Role: cfg.UserRole, Regen: true}
 			return nil
 		}
-		if event.Key() == tcell.KeyF3 && !botRespMode {
+		if event.Key() == tcell.KeyF3 && !botRespMode.Load() {
 			// delete last msg
 			// check textarea text; if it ends with bot icon delete only icon:
 			text := textView.GetText(true)
@@ -804,9 +804,9 @@ func initTUI() {
 			return nil
 		}
 		if event.Key() == tcell.KeyF6 {
-			interruptResp = true
-			botRespMode = false
-			toolRunningMode = false
+			interruptResp.Store(true)
+			botRespMode.Store(false)
+			toolRunningMode.Store(false)
 			return nil
 		}
 		if event.Key() == tcell.KeyF7 {
@@ -1101,7 +1101,7 @@ func initTUI() {
 			return nil
 		}
 		// cannot send msg in editMode or botRespMode
-		if event.Key() == tcell.KeyEscape && !editMode && !botRespMode {
+		if event.Key() == tcell.KeyEscape && !editMode && !botRespMode.Load() {
 			if shellMode {
 				cmdText := shellInput.GetText()
 				if cmdText != "" {
@@ -1167,7 +1167,7 @@ func initTUI() {
 			app.SetFocus(focusSwitcher[currentF])
 			return nil
 		}
-		if isASCII(string(event.Rune())) && !botRespMode {
+		if isASCII(string(event.Rune())) && !botRespMode.Load() {
 			return event
 		}
 		return event
-- 
cgit v1.2.3


From 5b175c12a63099525444ab455b333c8a6579bd78 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 07:29:04 +0300
Subject: Chore: update readme

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index e5faa9c..aa78b4f 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,12 @@ made with use of [tview](https://github.com/rivo/tview)
 #### how it looks
 ![how it looks](assets/ex01.png)
 
+
+#### dependencies
+- make
+- go
+- ffmpeg (extra)
+
 #### how to install
 (requires golang)
 clone the project
-- 
cgit v1.2.3


From e0201886f80528790c3a05864da66bafdf07f9d8 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 08:50:50 +0300
Subject: Enha (rag): keep page open until user closes it

---
 rag/rag.go |  2 +-
 tables.go  | 31 +++++++++++++++++++------------
 tui.go     |  1 -
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/rag/rag.go b/rag/rag.go
index 3db4303..e47e3d6 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -24,7 +24,7 @@ const ()
 var (
 	// Status messages for TUI integration
 	LongJobStatusCh     = make(chan string, 100) // Increased buffer size for parallel batch updates
-	FinishedRAGStatus   = "finished loading RAG file; press Enter"
+	FinishedRAGStatus   = "finished loading RAG file; press x to exit"
 	LoadedFileRAGStatus = "loaded file"
 	ErrRAGStatus        = "some error occurred; failed to transfer data to vector db"
 )
diff --git a/tables.go b/tables.go
index baa1c36..5042c7c 100644
--- a/tables.go
+++ b/tables.go
@@ -243,11 +243,9 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 	for _, f := range loadedFiles {
 		loadedSet[f] = true
 	}
-
 	// Build merged list: files from ragdir + orphaned files from DB
 	ragFiles := make([]ragFileInfo, 0, len(fileList)+len(loadedFiles))
 	seen := make(map[string]bool)
-
 	// Add files from ragdir
 	for _, f := range fileList {
 		ragFiles = append(ragFiles, ragFileInfo{
@@ -258,7 +256,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 		})
 		seen[f] = true
 	}
-
 	// Add orphaned files (in DB but not in ragdir)
 	for _, f := range loadedFiles {
 		if !seen[f] {
@@ -376,7 +373,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 	}
 	errCh := make(chan error, 1) // why?
 	go func() {
-		defer pages.RemovePage(RAGPage)
 		for {
 			select {
 			case err := <-errCh:
@@ -417,7 +413,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 			}
 			return
 		}
-		// defer pages.RemovePage(RAGPage)
 		tc := fileTable.GetCell(row, column)
 		tc.SetTextColor(tcell.ColorRed)
 		fileTable.SetSelectable(false, false)
@@ -430,7 +425,6 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 		f := ragFiles[row-1]
 		// Handle "-" case (orphaned file with no delete option)
 		if tc.Text == "-" {
-			pages.RemovePage(RAGPage)
 			return
 		}
 		switch tc.Text {
@@ -441,14 +435,14 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 				if err := ragger.LoadRAG(fpath); err != nil {
 					logger.Error("failed to embed file", "chat", fpath, "error", err)
 					showToast("RAG", "failed to embed file; error: "+err.Error())
-					app.QueueUpdate(func() {
-						pages.RemovePage(RAGPage)
-					})
 					return
 				}
 				showToast("RAG", "file loaded successfully")
 				app.QueueUpdate(func() {
 					pages.RemovePage(RAGPage)
+					loadedFiles, _ := ragger.ListLoaded()
+					chatRAGTable := makeRAGTable(fileList, loadedFiles)
+					pages.AddPage(RAGPage, chatRAGTable, true, true)
 				})
 			}()
 			return
@@ -458,14 +452,14 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 				if err := ragger.RemoveFile(f.name); err != nil {
 					logger.Error("failed to unload file from RAG", "filename", f.name, "error", err)
 					showToast("RAG", "failed to unload file; error: "+err.Error())
-					app.QueueUpdate(func() {
-						pages.RemovePage(RAGPage)
-					})
 					return
 				}
 				showToast("RAG", "file unloaded successfully")
 				app.QueueUpdate(func() {
 					pages.RemovePage(RAGPage)
+					loadedFiles, _ := ragger.ListLoaded()
+					chatRAGTable := makeRAGTable(fileList, loadedFiles)
+					pages.AddPage(RAGPage, chatRAGTable, true, true)
 				})
 			}()
 			return
@@ -476,6 +470,19 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 				return
 			}
 			showToast("chat deleted", fpath+" was deleted")
+			app.QueueUpdate(func() {
+				pages.RemovePage(RAGPage)
+				newFileList, _ := os.ReadDir(cfg.RAGDir)
+				loadedFiles, _ := ragger.ListLoaded()
+				var newFiles []string
+				for _, f := range newFileList {
+					if !f.IsDir() {
+						newFiles = append(newFiles, f.Name())
+					}
+				}
+				chatRAGTable := makeRAGTable(newFiles, loadedFiles)
+				pages.AddPage(RAGPage, chatRAGTable, true, true)
+			})
 			return
 		default:
 			pages.RemovePage(RAGPage)
diff --git a/tui.go b/tui.go
index 482050a..04ce38e 100644
--- a/tui.go
+++ b/tui.go
@@ -50,7 +50,6 @@ var (
 	helpPage       = "helpPage"
 	renamePage     = "renamePage"
 	RAGPage        = "RAGPage"
-	RAGLoadedPage  = "RAGLoadedPage"
 	propsPage      = "propsPage"
 	codeBlockPage  = "codeBlockPage"
 	imgPage        = "imgPage"
-- 
cgit v1.2.3


From b5f0eabeea01e4df7df8ee92473e56e10fedba9a Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 09:00:24 +0300
Subject: Fix (rag): do not hang on delete

---
 tables.go | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/tables.go b/tables.go
index 5042c7c..e9d4eb6 100644
--- a/tables.go
+++ b/tables.go
@@ -470,19 +470,21 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 				return
 			}
 			showToast("chat deleted", fpath+" was deleted")
-			app.QueueUpdate(func() {
-				pages.RemovePage(RAGPage)
-				newFileList, _ := os.ReadDir(cfg.RAGDir)
-				loadedFiles, _ := ragger.ListLoaded()
-				var newFiles []string
-				for _, f := range newFileList {
-					if !f.IsDir() {
-						newFiles = append(newFiles, f.Name())
+			go func() {
+				app.QueueUpdate(func() {
+					pages.RemovePage(RAGPage)
+					newFileList, _ := os.ReadDir(cfg.RAGDir)
+					loadedFiles, _ := ragger.ListLoaded()
+					var newFiles []string
+					for _, f := range newFileList {
+						if !f.IsDir() {
+							newFiles = append(newFiles, f.Name())
+						}
 					}
-				}
-				chatRAGTable := makeRAGTable(newFiles, loadedFiles)
-				pages.AddPage(RAGPage, chatRAGTable, true, true)
-			})
+					chatRAGTable := makeRAGTable(newFiles, loadedFiles)
+					pages.AddPage(RAGPage, chatRAGTable, true, true)
+				})
+			}()
 			return
 		default:
 			pages.RemovePage(RAGPage)
-- 
cgit v1.2.3


From 6ed96c9bd3cb2cd7afb980cf023a0f969651acbe Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 09:42:07 +0300
Subject: Fix (ctrl+w): avoid msg duplication

---
 bot.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bot.go b/bot.go
index d01ebb9..cb75a7b 100644
--- a/bot.go
+++ b/bot.go
@@ -887,7 +887,9 @@ out:
 			fmt.Fprint(textView, chunk)
 			respText.WriteString(chunk)
 			// Update the message in chatBody.Messages so it persists during Alt+T
-			chatBody.Messages[msgIdx].Content = respText.String()
+			if !r.Resume {
+				chatBody.Messages[msgIdx].Content += respText.String()
+			}
 			if cfg.AutoScrollEnabled {
 				textView.ScrollToEnd()
 			}
-- 
cgit v1.2.3


From c0d5db29a581b6a21f2d009189649d4e98ab55dc Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 10:28:30 +0300
Subject: Chore (rag): x to exit label

---
 tables.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tables.go b/tables.go
index e9d4eb6..e47a1ce 100644
--- a/tables.go
+++ b/tables.go
@@ -272,7 +272,7 @@ func makeRAGTable(fileList []string, loadedFiles []string) *tview.Flex {
 	fileTable := tview.NewTable().
 		SetBorders(true)
 	longStatusView := tview.NewTextView()
-	longStatusView.SetText("status text")
+	longStatusView.SetText("press x to exit")
 	longStatusView.SetBorder(true).SetTitle("status")
 	longStatusView.SetChangedFunc(func() {
 		app.Draw()
-- 
cgit v1.2.3


From b6e802c12e37aeaf19bd449cf2877df5ae04d389 Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 11:38:56 +0300
Subject: Enha (rag): bigger default batch

---
 config.example.toml | 4 ++--
 rag/rag.go          | 2 +-
 tools.go            | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/config.example.toml b/config.example.toml
index 1698189..f74d986 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -28,8 +28,8 @@ AutoScrollEnabled = true
 AutoCleanToolCallsFromCtx = false
 # rag settings
 RAGBatchSize = 1
-RAGWordLimit = 80
-RAGOverlapWords = 16
+RAGWordLimit = 250
+RAGOverlapWords = 25
 RAGDir = "ragimport"
 # extra tts
 TTS_ENABLED = false
diff --git a/rag/rag.go b/rag/rag.go
index e47e3d6..ef85e7f 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -156,7 +156,7 @@ func createChunks(sentences []string, wordLimit, overlapWords uint32) []string {
 
 func sanitizeFTSQuery(query string) string {
 	// Remove double quotes and other problematic characters for FTS5
-	query = strings.ReplaceAll(query, "\"", " ")
+	// query = strings.ReplaceAll(query, "\"", " ")
 	query = strings.ReplaceAll(query, "'", " ")
 	query = strings.ReplaceAll(query, ";", " ")
 	query = strings.ReplaceAll(query, "\\", " ")
diff --git a/tools.go b/tools.go
index e66533a..41b0b9b 100644
--- a/tools.go
+++ b/tools.go
@@ -360,13 +360,13 @@ func ragsearch(args map[string]string) []byte {
 	}
 	limitS, ok := args["limit"]
 	if !ok || limitS == "" {
-		limitS = "3"
+		limitS = "10"
 	}
 	limit, err := strconv.Atoi(limitS)
 	if err != nil || limit == 0 {
 		logger.Warn("ragsearch limit; passed bad value; setting to default (3)",
 			"limit_arg", limitS, "error", err)
-		limit = 3
+		limit = 10
 	}
 	ragInstance := rag.GetInstance()
 	if ragInstance == nil {
-- 
cgit v1.2.3


From e74ff8c03faaf156ad684eda48c8cfa35082ce1a Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 13:27:09 +0300
Subject: Enha (rag): semantic hybrid search

---
 rag/rag.go | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 164 insertions(+), 3 deletions(-)

diff --git a/rag/rag.go b/rag/rag.go
index ef85e7f..6f12dd9 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -12,6 +12,7 @@ import (
 	"regexp"
 	"runtime"
 	"sort"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -27,8 +28,101 @@ var (
 	FinishedRAGStatus   = "finished loading RAG file; press x to exit"
 	LoadedFileRAGStatus = "loaded file"
 	ErrRAGStatus        = "some error occurred; failed to transfer data to vector db"
+
+	// stopWords are common words that can be removed from queries when not part of phrases
+	stopWords = []string{"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from", "up", "down", "left", "right", "about", "like", "such", "than", "then", "also", "too"}
 )
 
+// isStopWord checks if a word is in the stop words list
+func isStopWord(word string) bool {
+	for _, stop := range stopWords {
+		if strings.EqualFold(word, stop) {
+			return true
+		}
+	}
+	return false
+}
+
+// detectPhrases returns multi-word phrases from a query that should be treated as units
+func detectPhrases(query string) []string {
+	words := strings.Fields(strings.ToLower(query))
+	var phrases []string
+
+	for i := 0; i < len(words)-1; i++ {
+		word1 := strings.Trim(words[i], ".,!?;:'\"()[]{}")
+		word2 := strings.Trim(words[i+1], ".,!?;:'\"()[]{}")
+
+		// Skip if either word is a stop word or too short
+		if isStopWord(word1) || isStopWord(word2) || len(word1) < 2 || len(word2) < 2 {
+			continue
+		}
+
+		// Check if this pair appears to be a meaningful phrase
+		// Simple heuristic: consecutive non-stop words of reasonable length
+		phrase := word1 + " " + word2
+		phrases = append(phrases, phrase)
+
+		// Optionally check for 3-word phrases
+		if i < len(words)-2 {
+			word3 := strings.Trim(words[i+2], ".,!?;:'\"()[]{}")
+			if !isStopWord(word3) && len(word3) >= 2 {
+				phrases = append(phrases, word1+" "+word2+" "+word3)
+			}
+		}
+	}
+
+	return phrases
+}
+
+// parseSlugIndices extracts batch and chunk indices from a slug
+// slug format: filename_batch_chunk (e.g., "kjv_bible.epub_1786_0")
+func parseSlugIndices(slug string) (batch, chunk int, ok bool) {
+	// Find the last two numbers separated by underscores
+	re := regexp.MustCompile(`_(\d+)_(\d+)$`)
+	matches := re.FindStringSubmatch(slug)
+	if matches == nil || len(matches) != 3 {
+		return 0, 0, false
+	}
+	batch, err1 := strconv.Atoi(matches[1])
+	chunk, err2 := strconv.Atoi(matches[2])
+	if err1 != nil || err2 != nil {
+		return 0, 0, false
+	}
+	return batch, chunk, true
+}
+
+// areSlugsAdjacent returns true if two slugs are from the same file and have sequential indices
+func areSlugsAdjacent(slug1, slug2 string) bool {
+	// Extract filename prefix (everything before the last underscore sequence)
+	parts1 := strings.Split(slug1, "_")
+	parts2 := strings.Split(slug2, "_")
+	if len(parts1) < 3 || len(parts2) < 3 {
+		return false
+	}
+
+	// Compare filename prefixes (all parts except last two)
+	prefix1 := strings.Join(parts1[:len(parts1)-2], "_")
+	prefix2 := strings.Join(parts2[:len(parts2)-2], "_")
+	if prefix1 != prefix2 {
+		return false
+	}
+
+	batch1, chunk1, ok1 := parseSlugIndices(slug1)
+	batch2, chunk2, ok2 := parseSlugIndices(slug2)
+	if !ok1 || !ok2 {
+		return false
+	}
+
+	// Check if they're in same batch and chunks are sequential
+	if batch1 == batch2 && (chunk1 == chunk2+1 || chunk2 == chunk1+1) {
+		return true
+	}
+
+	// Check if they're in sequential batches and chunk indices suggest continuity
+	// This is heuristic but useful for cross-batch adjacency
+	return false
+}
+
 type RAG struct {
 	logger      *slog.Logger
 	store       storage.FullRepo
@@ -155,8 +249,8 @@ func createChunks(sentences []string, wordLimit, overlapWords uint32) []string {
 }
 
 func sanitizeFTSQuery(query string) string {
-	// Remove double quotes and other problematic characters for FTS5
-	// query = strings.ReplaceAll(query, "\"", " ")
+	// Keep double quotes for FTS5 phrase matching
+	// Remove other problematic characters
 	query = strings.ReplaceAll(query, "'", " ")
 	query = strings.ReplaceAll(query, ";", " ")
 	query = strings.ReplaceAll(query, "\\", " ")
@@ -549,7 +643,6 @@ func (r *RAG) RemoveFile(filename string) error {
 var (
 	queryRefinementPattern = regexp.MustCompile(`(?i)(based on my (vector db|vector db|vector database|rags?|past (conversations?|chat|messages?))|from my (files?|documents?|data|information|memory)|search (in|my) (vector db|database|rags?)|rag search for)`)
 	importantKeywords      = []string{"project", "architecture", "code", "file", "chat", "conversation", "topic", "summary", "details", "history", "previous", "my", "user", "me"}
-	stopWords              = []string{"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from", "up", "down", "left", "right"}
 )
 
 func (r *RAG) RefineQuery(query string) string {
@@ -564,7 +657,20 @@ func (r *RAG) RefineQuery(query string) string {
 	query = strings.ToLower(query)
 	words := strings.Fields(query)
 	if len(words) >= 3 {
+		// Detect phrases and protect words that are part of phrases
+		phrases := detectPhrases(query)
+		protectedWords := make(map[string]bool)
+		for _, phrase := range phrases {
+			for _, word := range strings.Fields(phrase) {
+				protectedWords[word] = true
+			}
+		}
+
+		// Remove stop words that are not protected
 		for _, stopWord := range stopWords {
+			if protectedWords[stopWord] {
+				continue
+			}
 			wordPattern := `\b` + stopWord + `\b`
 			re := regexp.MustCompile(wordPattern)
 			query = re.ReplaceAllString(query, "")
@@ -673,6 +779,45 @@ func (r *RAG) GenerateQueryVariations(query string) []string {
 	if !strings.HasSuffix(query, " summary") {
 		variations = append(variations, query+" summary")
 	}
+
+	// Add phrase-quoted variations for better FTS5 matching
+	phrases := detectPhrases(query)
+	if len(phrases) > 0 {
+		// Sort phrases by length descending to prioritize longer phrases
+		sort.Slice(phrases, func(i, j int) bool {
+			return len(phrases[i]) > len(phrases[j])
+		})
+
+		// Create a version with all phrases quoted
+		quotedQuery := query
+		for _, phrase := range phrases {
+			// Only quote if not already quoted
+			quotedPhrase := "\"" + phrase + "\""
+			if !strings.Contains(strings.ToLower(quotedQuery), strings.ToLower(quotedPhrase)) {
+				// Case-insensitive replacement of phrase with quoted version
+				re := regexp.MustCompile(`(?i)\b` + regexp.QuoteMeta(phrase) + `\b`)
+				quotedQuery = re.ReplaceAllString(quotedQuery, quotedPhrase)
+			}
+		}
+		if quotedQuery != query {
+			variations = append(variations, quotedQuery)
+		}
+
+		// Also add individual phrase variations for short queries
+		if len(phrases) <= 3 {
+			for _, phrase := range phrases {
+				// Create a focused query with just this phrase quoted
+				// Keep original context but emphasize this phrase
+				quotedPhrase := "\"" + phrase + "\""
+				re := regexp.MustCompile(`(?i)\b` + regexp.QuoteMeta(phrase) + `\b`)
+				focusedQuery := re.ReplaceAllString(query, quotedPhrase)
+				if focusedQuery != query && focusedQuery != quotedQuery {
+					variations = append(variations, focusedQuery)
+				}
+			}
+		}
+	}
+
 	return variations
 }
 
@@ -704,6 +849,22 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
 		if row.FileName == "chat" || strings.Contains(strings.ToLower(row.FileName), "conversation") {
 			score += 3
 		}
+
+		// Cross-chunk adjacency bonus: if this chunk has adjacent siblings in results,
+		// boost score to promote narrative continuity
+		adjacentCount := 0
+		for _, other := range results {
+			if other.Slug == row.Slug {
+				continue
+			}
+			if areSlugsAdjacent(row.Slug, other.Slug) {
+				adjacentCount++
+			}
+		}
+		if adjacentCount > 0 {
+			// Bonus per adjacent chunk, but diminishing returns
+			score += float32(adjacentCount) * 4
+		}
 		distance := row.Distance - score/100
 		scored = append(scored, scoredResult{row: row, distance: distance})
 	}
-- 
cgit v1.2.3


From a1b5f9cdc59938901123650fc0900067ac3447ca Mon Sep 17 00:00:00 2001
From: Grail Finder <wohilas@gmail.com>
Date: Sun, 8 Mar 2026 16:12:32 +0300
Subject: Enha: rag tuning and tests

---
 rag/rag.go                  | 136 ++++++++++++---
 rag/rag_integration_test.go | 409 ++++++++++++++++++++++++++++++++++++++++++++
 rag/rag_real_test.go        | 131 ++++++++++++++
 rag/rag_test.go             | 155 +++++++++++++++++
 rag/storage.go              |   8 +-
 5 files changed, 814 insertions(+), 25 deletions(-)
 create mode 100644 rag/rag_integration_test.go
 create mode 100644 rag/rag_real_test.go
 create mode 100644 rag/rag_test.go

diff --git a/rag/rag.go b/rag/rag.go
index 6f12dd9..3a771d4 100644
--- a/rag/rag.go
+++ b/rag/rag.go
@@ -74,6 +74,22 @@ func detectPhrases(query string) []string {
 	return phrases
 }
 
+// countPhraseMatches returns the number of query phrases found in text
+func countPhraseMatches(text, query string) int {
+	phrases := detectPhrases(query)
+	if len(phrases) == 0 {
+		return 0
+	}
+	textLower := strings.ToLower(text)
+	count := 0
+	for _, phrase := range phrases {
+		if strings.Contains(textLower, phrase) {
+			count++
+		}
+	}
+	return count
+}
+
 // parseSlugIndices extracts batch and chunk indices from a slug
 // slug format: filename_batch_chunk (e.g., "kjv_bible.epub_1786_0")
 func parseSlugIndices(slug string) (batch, chunk int, ok bool) {
@@ -120,6 +136,9 @@ func areSlugsAdjacent(slug1, slug2 string) bool {
 
 	// Check if they're in sequential batches and chunk indices suggest continuity
 	// This is heuristic but useful for cross-batch adjacency
+	if (batch1 == batch2+1 && chunk1 == 0) || (batch2 == batch1+1 && chunk2 == 0) {
+		return true
+	}
 	return false
 }
 
@@ -654,6 +673,10 @@ func (r *RAG) RefineQuery(query string) string {
 	if len(query) <= 3 {
 		return original
 	}
+	// If query already contains double quotes, assume it's a phrase query and skip refinement
+	if strings.Contains(query, "\"") {
+		return original
+	}
 	query = strings.ToLower(query)
 	words := strings.Fields(query)
 	if len(words) >= 3 {
@@ -799,12 +822,13 @@ func (r *RAG) GenerateQueryVariations(query string) []string {
 				quotedQuery = re.ReplaceAllString(quotedQuery, quotedPhrase)
 			}
 		}
-		if quotedQuery != query {
-			variations = append(variations, quotedQuery)
-		}
+		// Disabled malformed quoted query for now
+		// if quotedQuery != query {
+		// 	variations = append(variations, quotedQuery)
+		// }
 
 		// Also add individual phrase variations for short queries
-		if len(phrases) <= 3 {
+		if len(phrases) <= 5 {
 			for _, phrase := range phrases {
 				// Create a focused query with just this phrase quoted
 				// Keep original context but emphasize this phrase
@@ -814,6 +838,8 @@ func (r *RAG) GenerateQueryVariations(query string) []string {
 				if focusedQuery != query && focusedQuery != quotedQuery {
 					variations = append(variations, focusedQuery)
 				}
+				// Add the phrase alone (quoted) as a separate variation
+				variations = append(variations, quotedPhrase)
 			}
 		}
 	}
@@ -822,9 +848,11 @@ func (r *RAG) GenerateQueryVariations(query string) []string {
 }
 
 func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.VectorRow {
+	phraseCount := len(detectPhrases(query))
 	type scoredResult struct {
-		row      models.VectorRow
-		distance float32
+		row           models.VectorRow
+		distance      float32
+		phraseMatches int
 	}
 	scored := make([]scoredResult, 0, len(results))
 	for i := range results {
@@ -850,6 +878,14 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
 			score += 3
 		}
 
+		// Phrase match bonus: extra points for containing detected phrases
+		phraseMatches := countPhraseMatches(row.RawText, query)
+		if phraseMatches > 0 {
+			// Significant bonus per phrase to prioritize exact phrase matches
+			r.logger.Debug("phrase match bonus", "slug", row.Slug, "phraseMatches", phraseMatches, "score", score)
+			score += float32(phraseMatches) * 100
+		}
+
 		// Cross-chunk adjacency bonus: if this chunk has adjacent siblings in results,
 		// boost score to promote narrative continuity
 		adjacentCount := 0
@@ -866,17 +902,27 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
 			score += float32(adjacentCount) * 4
 		}
 		distance := row.Distance - score/100
-		scored = append(scored, scoredResult{row: row, distance: distance})
+		scored = append(scored, scoredResult{row: row, distance: distance, phraseMatches: phraseMatches})
 	}
 	sort.Slice(scored, func(i, j int) bool {
 		return scored[i].distance < scored[j].distance
 	})
 	unique := make([]models.VectorRow, 0)
 	seen := make(map[string]bool)
+	maxPerFile := 2
+	if phraseCount > 0 {
+		maxPerFile = 10
+	}
 	fileCounts := make(map[string]int)
 	for i := range scored {
 		if !seen[scored[i].row.Slug] {
-			if fileCounts[scored[i].row.FileName] >= 2 {
+			// Allow phrase-matching chunks to bypass per-file limit (up to +5 extra)
+			allowed := fileCounts[scored[i].row.FileName] < maxPerFile
+			if !allowed && scored[i].phraseMatches > 0 {
+				// If chunk has phrase matches, allow extra slots (up to maxPerFile + 5)
+				allowed = fileCounts[scored[i].row.FileName] < maxPerFile+5
+			}
+			if !allowed {
 				continue
 			}
 			seen[scored[i].row.Slug] = true
@@ -884,8 +930,8 @@ func (r *RAG) RerankResults(results []models.VectorRow, query string) []models.V
 			unique = append(unique, scored[i].row)
 		}
 	}
-	if len(unique) > 10 {
-		unique = unique[:10]
+	if len(unique) > 30 {
+		unique = unique[:30]
 	}
 	return unique
 }
@@ -954,6 +1000,7 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 	r.resetIdleTimer()
 	refined := r.RefineQuery(query)
 	variations := r.GenerateQueryVariations(refined)
+	r.logger.Debug("query variations", "original", query, "refined", refined, "variations", variations)
 
 	// Collect embedding search results from all variations
 	var embResults []models.VectorRow
@@ -985,17 +1032,35 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 		return embResults[i].Distance < embResults[j].Distance
 	})
 
-	// Perform keyword search
-	kwResults, err := r.searchKeyword(refined, limit*2)
-	if err != nil {
-		r.logger.Warn("keyword search failed, using only embeddings", "error", err)
-		kwResults = nil
+	// Perform keyword search on all variations
+	var kwResults []models.VectorRow
+	seenKw := make(map[string]bool)
+	for _, q := range variations {
+		results, err := r.searchKeyword(q, limit)
+		if err != nil {
+			r.logger.Debug("keyword search failed for variation", "error", err, "query", q)
+			continue
+		}
+		for _, row := range results {
+			if !seenKw[row.Slug] {
+				seenKw[row.Slug] = true
+				kwResults = append(kwResults, row)
+			}
+		}
 	}
-	// Sort keyword results by distance (already sorted by BM25 score)
-	// kwResults already sorted by distance (lower is better)
+	// Sort keyword results by distance (lower is better)
+	sort.Slice(kwResults, func(i, j int) bool {
+		return kwResults[i].Distance < kwResults[j].Distance
+	})
 
 	// Combine using Reciprocal Rank Fusion (RRF)
-	const rrfK = 60
+	// Use smaller K for phrase-heavy queries to give more weight to top ranks
+	phraseCount := len(detectPhrases(query))
+	rrfK := 60.0
+	if phraseCount > 0 {
+		rrfK = 30.0
+	}
+	r.logger.Debug("RRF parameters", "phraseCount", phraseCount, "rrfK", rrfK, "query", query)
 	type scoredRow struct {
 		row   models.VectorRow
 		score float64
@@ -1005,11 +1070,22 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 	for rank, row := range embResults {
 		score := 1.0 / (float64(rank) + rrfK)
 		scoreMap[row.Slug] += score
+		if row.Slug == "kjv_bible.epub_1786_0" {
+			r.logger.Debug("target chunk embedding rank", "rank", rank, "score", score)
+		}
 	}
-	// Add keyword results
+	// Add keyword results with weight boost when phrases are present
+	kwWeight := 1.0
+	if phraseCount > 0 {
+		kwWeight = 100.0
+	}
+	r.logger.Debug("keyword weight", "kwWeight", kwWeight, "phraseCount", phraseCount)
 	for rank, row := range kwResults {
-		score := 1.0 / (float64(rank) + rrfK)
+		score := kwWeight * (1.0 / (float64(rank) + rrfK))
 		scoreMap[row.Slug] += score
+		if row.Slug == "kjv_bible.epub_1786_0" {
+			r.logger.Debug("target chunk keyword rank", "rank", rank, "score", score, "kwWeight", kwWeight, "rrfK", rrfK)
+		}
 		// Ensure row exists in combined results
 		if _, exists := seen[row.Slug]; !exists {
 			embResults = append(embResults, row)
@@ -1021,6 +1097,18 @@ func (r *RAG) Search(query string, limit int) ([]models.VectorRow, error) {
 		score := scoreMap[row.Slug]
 		scoredRows = append(scoredRows, scoredRow{row: row, score: score})
 	}
+	// Debug: log scores for target chunk and top chunks
+	if strings.Contains(strings.ToLower(query), "bald") || strings.Contains(strings.ToLower(query), "she bears") {
+		for _, sr := range scoredRows {
+			if sr.row.Slug == "kjv_bible.epub_1786_0" {
+				r.logger.Debug("target chunk score", "slug", sr.row.Slug, "score", sr.score, "distance", sr.row.Distance)
+			}
+		}
+		// Log top 5 scores
+		for i := 0; i < len(scoredRows) && i < 5; i++ {
+			r.logger.Debug("top scored row", "rank", i+1, "slug", scoredRows[i].row.Slug, "score", scoredRows[i].score, "distance", scoredRows[i].row.Distance)
+		}
+	}
 	// Sort by descending RRF score
 	sort.Slice(scoredRows, func(i, j int) bool {
 		return scoredRows[i].score > scoredRows[j].score
@@ -1099,3 +1187,11 @@ func (r *RAG) Destroy() {
 		}
 	}
 }
+
+// SetEmbedderForTesting replaces the internal embedder with a mock.
+// This function is only available when compiling with the "test" build tag.
+func (r *RAG) SetEmbedderForTesting(e Embedder) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.embedder = e
+}
diff --git a/rag/rag_integration_test.go b/rag/rag_integration_test.go
new file mode 100644
index 0000000..f3405eb
--- /dev/null
+++ b/rag/rag_integration_test.go
@@ -0,0 +1,409 @@
+package rag
+
+import (
+	"fmt"
+	"gf-lt/config"
+	"gf-lt/models"
+	"gf-lt/storage"
+	"log/slog"
+	"testing"
+
+	_ "github.com/glebarez/go-sqlite"
+	"github.com/jmoiron/sqlx"
+)
+
+// mockEmbedder returns zero vectors of a fixed dimension.
+type mockEmbedder struct {
+	dim int
+}
+
+func (m *mockEmbedder) Embed(text string) ([]float32, error) {
+	vec := make([]float32, m.dim)
+	return vec, nil
+}
+
+func (m *mockEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
+	vecs := make([][]float32, len(texts))
+	for i := range vecs {
+		vecs[i] = make([]float32, m.dim)
+	}
+	return vecs, nil
+}
+
+// dummyStore implements storage.FullRepo with a minimal set of methods.
+// Only DB() is used by VectorStorage; other methods return empty values.
+type dummyStore struct {
+	db *sqlx.DB
+}
+
+func (d dummyStore) DB() *sqlx.DB { return d.db }
+
+// ChatHistory methods
+func (d dummyStore) ListChats() ([]models.Chat, error)                     { return nil, nil }
+func (d dummyStore) GetChatByID(id uint32) (*models.Chat, error)           { return nil, nil }
+func (d dummyStore) GetChatByChar(char string) ([]models.Chat, error)      { return nil, nil }
+func (d dummyStore) GetLastChat() (*models.Chat, error)                    { return nil, nil }
+func (d dummyStore) GetLastChatByAgent(agent string) (*models.Chat, error) { return nil, nil }
+func (d dummyStore) UpsertChat(chat *models.Chat) (*models.Chat, error)    { return chat, nil }
+func (d dummyStore) RemoveChat(id uint32) error                            { return nil }
+func (d dummyStore) ChatGetMaxID() (uint32, error)                         { return 0, nil }
+
+// Memories methods
+func (d dummyStore) Memorise(m *models.Memory) (*models.Memory, error) { return m, nil }
+func (d dummyStore) Recall(agent, topic string) (string, error)        { return "", nil }
+func (d dummyStore) RecallTopics(agent string) ([]string, error)       { return nil, nil }
+
+// VectorRepo methods (not used but required by interface)
+func (d dummyStore) WriteVector(row *models.VectorRow) error { return nil }
+func (d dummyStore) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
+	return nil, nil
+}
+func (d dummyStore) ListFiles() ([]string, error)              { return nil, nil }
+func (d dummyStore) RemoveEmbByFileName(filename string) error { return nil }
+
+var _ storage.FullRepo = dummyStore{}
+
+// setupTestRAG creates an in‑memory SQLite database, creates the necessary tables,
+// inserts the provided chunks, and returns a RAG instance with a mock embedder.
+func setupTestRAG(t *testing.T, chunks []*models.VectorRow) (*RAG, error) {
+	t.Helper()
+	db, err := sqlx.Open("sqlite", ":memory:")
+	if err != nil {
+		return nil, fmt.Errorf("open in‑memory db: %w", err)
+	}
+	// Create the required tables (embeddings_768 and fts_embeddings).
+	// Use the same schema as production.
+	_, err = db.Exec(`
+		CREATE TABLE embeddings_768 (
+			id INTEGER PRIMARY KEY AUTOINCREMENT,
+			embeddings BLOB NOT NULL,
+			slug TEXT NOT NULL,
+			raw_text TEXT NOT NULL,
+			filename TEXT NOT NULL DEFAULT ''
+		);
+	`)
+	if err != nil {
+		return nil, fmt.Errorf("create embeddings table: %w", err)
+	}
+	_, err = db.Exec(`
+		CREATE VIRTUAL TABLE fts_embeddings USING fts5(
+			slug UNINDEXED,
+			raw_text,
+			filename UNINDEXED,
+			embedding_size UNINDEXED,
+			tokenize='porter unicode61'
+		);
+	`)
+	if err != nil {
+		return nil, fmt.Errorf("create FTS table: %w", err)
+	}
+	// Create a logger that discards output.
+	logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
+	store := dummyStore{db: db}
+	// Create config with embedding dimension 768.
+	cfg := &config.Config{
+		EmbedDims:       768,
+		RAGWordLimit:    250,
+		RAGOverlapWords: 25,
+		RAGBatchSize:    1,
+	}
+	// Create a RAG instance using New, which will create an embedder based on config.
+	// We'll override the embedder afterwards via reflection.
+	rag, err := New(logger, store, cfg)
+	if err != nil {
+		return nil, fmt.Errorf("create RAG: %w", err)
+	}
+	// Replace the embedder with our mock.
+	rag.SetEmbedderForTesting(&mockEmbedder{dim: cfg.EmbedDims})
+	// Insert the provided chunks using the storage directly.
+	if len(chunks) > 0 {
+		// Ensure each chunk has embeddings of correct dimension (zero vector).
+		for _, chunk := range chunks {
+			if len(chunk.Embeddings) != cfg.EmbedDims {
+				chunk.Embeddings = make([]float32, cfg.EmbedDims)
+			}
+		}
+		err = rag.storage.WriteVectors(chunks)
+		if err != nil {
+			return nil, fmt.Errorf("write test chunks: %w", err)
+		}
+	}
+	return rag, nil
+}
+
+// createTestChunks returns a slice of VectorRow representing the target chunk
+// (kjv_bible.epub_1786_0), several bald‑related noise chunks, and unrelated chunks.
+func createTestChunks() []*models.VectorRow {
+	// Target chunk: 2 Kings 2:23‑24 containing "bald head" and "two she bears".
+	targetRaw := `And he said, Ye shall not send. 
+
+
+2:17 And when they urged him till he was ashamed, he said, Send.  They sent
+therefore fifty men; and they sought three days, but found him not. 
+
+
+2:18 And when they came again to him, (for he tarried at Jericho,) he said unto
+them, Did I not say unto you, Go not?  2:19 And the men of the city said unto
+Elisha, Behold, I pray thee, the situation of this city is pleasant, as my lord
+seeth: but the water is naught, and the ground barren. 
+
+
+2:20 And he said, Bring me a new cruse, and put salt therein.  And they brought
+it to him. 
+
+
+2:21 And he went forth unto the spring of the waters, and cast the salt in
+there, and said, Thus saith the LORD, I have healed these waters; there shall
+not be from thence any more death or barren land. 
+
+
+2:22 So the waters were healed unto this day, according to the saying of Elisha
+which he spake. 
+
+
+2:23 And he went up from thence unto Bethel: and as he was going up by the way,
+there came forth little children out of the city, and mocked him, and said unto
+him, Go up, thou bald head; go up, thou bald head. 
+
+
+2:24 And he turned back, and looked on them, and cursed them in the name of the
+LORD.  And there came forth two she bears out of the wood, and tare forty and
+two children of them.`
+	// Noise chunk 1: Leviticus containing "bald locust"
+	noise1Raw := `11:12 Whatsoever hath no fins nor scales in the waters, that shall be an
+abomination unto you. 
+
+
+11:13 And these are they which ye shall have in abomination among the fowls;
+they shall not be eaten, they are an abomination: the eagle, and the ossifrage,
+and the ospray, 11:14 And the vulture, and the kite after his kind; 11:15 Every
+raven after his kind; 11:16 And the owl, and the night hawk, and the cuckow,
+and the hawk after his kind, 11:17 And the little owl, and the cormorant, and
+the great owl, 11:18 And the swan, and the pelican, and the gier eagle, 11:19
+And the stork, the heron after her kind, and the lapwing, and the bat. 
+
+
+11:20 All fowls that creep, going upon all four, shall be an abomination unto
+you. 
+
+
+11:21 Yet these may ye eat of every flying creeping thing that goeth upon all
+four, which have legs above their feet, to leap withal upon the earth; 11:22
+Even these of them ye may eat; the locust after his kind, and the bald locust
+after his kind, and the beetle after his kind, and the grasshopper after his
+kind. 
+
+
+11:23 But all other flying creeping things, which have four feet, shall be an
+abomination unto you. 
+
+
+11:24 And for these ye shall be unclean: whosoever toucheth the carcase of them
+shall be unclean until the even.`
+	// Noise chunk 2: Leviticus containing "bald"
+	noise2Raw := `11:13 And these are they which ye shall have in abomination among the fowls;
+they shall not be eaten, they are an abomination: the eagle, and the ossifrage,
+and the ospray, 11:14 And the vulture, and the kite after his kind; 11:15 Every
+raven after his kind; 11:16 And the owl, and the night hawk, and the cuckow,
+and the hawk after his kind, 11:17 And the little owl, and the cormorant, and
+the great owl, 11:18 And the swan, and the pelican, and the gier eagle, 11:19
+And the stork, the heron after her kind, and the lapwing, and the bat. 
+
+
+11:20 All fowls that creep, going upon all four, shall be an abomination unto
+you. 
+
+
+11:21 Yet these may ye eat of every flying creeping thing that goeth upon all
+four, which have legs above their feet, to leap withal upon the earth; 11:22
+Even these of them ye may eat; the locust after his kind, and the bald locust
+after his kind, and the beetle after his kind, and the grasshopper after his
+kind. 
+
+
+11:23 But all other flying creeping things, which have four feet, shall be an
+abomination unto you. 
+
+
+11:24 And for these ye shall be unclean: whosoever toucheth the carcase of them
+shall be unclean until the even.`
+	// Additional Leviticus noise chunks (simulating 28 bald-related chunks)
+	// Using variations of the same text with different slugs
+	leviticusSlugs := []string{
+		"kjv_bible.epub_564_0",
+		"kjv_bible.epub_565_0",
+		"kjv_bible.epub_579_0",
+		"kjv_bible.epub_580_0",
+		"kjv_bible.epub_581_0",
+		"kjv_bible.epub_582_0",
+		"kjv_bible.epub_583_0",
+		"kjv_bible.epub_584_0",
+		"kjv_bible.epub_585_0",
+		"kjv_bible.epub_586_0",
+		"kjv_bible.epub_587_0",
+		"kjv_bible.epub_588_0",
+		"kjv_bible.epub_589_0",
+		"kjv_bible.epub_590_0",
+	}
+	leviticusTexts := []string{
+		noise1Raw,
+		noise2Raw,
+		`13:40 And the man whose hair is fallen off his head, he is bald; yet is he
+clean. 
+
+
+13:41 And he that hath his hair fallen off from the part of his head toward his
+face, he is forehead bald; yet is he clean.`,
+		`13:42 And if there be in the bald head, or bald forehead, a white reddish sore;
+it is a leprosy sprung up in his bald head, or his bald forehead.`,
+		`13:43 Then the priest shall look upon it: and, behold, if the rising of the
+sore be white reddish in his bald head, or in his bald forehead, as the leprosy
+appearedh in the skin of the flesh;`,
+		`13:44 He is a leprous man, he is unclean: the priest shall pronounce him utterly
+unclean; his plague is in his head.`,
+		`13:45 And the leper in whom the plague is, his clothes shall be rent, and his
+head bare, and he shall put a covering upon his upper lip, and shall cry,
+Unclean, unclean.`,
+		`13:46 All the days wherein the plague shall be in him he shall be defiled; he
+is unclean: he shall dwell alone; without the camp shall his habitation be.`,
+		`13:47 The garment also that the plague of leprosy is in, whether it be a woollen
+garment, or a linen garment;`,
+		`13:48 Whether it be in the warp, or woof; of linen, or of woollen; whether in a
+skin, or in any thing made of skin;`,
+		`13:49 And if the plague be greenish or reddish in the garment, or in the skin,
+either in the warp, or in the woof, or in any thing of skin; it is a plague of
+leprosy, and shall be shewed unto the priest:`,
+		`13:50 And the priest shall look upon the plague, and shut up it that hath the
+plague seven days:`,
+		`13:51 And he shall look on the plague on the seventh day: if the plague be spread
+in the garment, either in the warp, or in the woof, or in a skin, or in any work
+that is made of skin; the plague is a fretting leprosy; it is unclean.`,
+		`13:52 He shall therefore burn that garment, whether warp or woof, in woollen or
+in linen, or any thing of skin, wherein the plague is: for it is a fretting
+leprosy; it shall be burnt in the fire.`,
+	}
+	// Unrelated chunk 1: ghost_7.txt_777_0
+	unrelated1Raw := `Doesn’t he have any pride as a hunter?!  
+
+I didn’t see what other choice I had.  I would just have to grovel and be ready to flee at any given moment.  
+The Hidden Curse clan house was in the central region of the imperial capital.  It was a high-class area with extraordinary property values that hosted the residences of people like Lord Gladis.  This district was near the Imperial Castle, though “near” was a 
+relative term as it was still a few kilometers away.  
+
+The clan house was made of brick and conformed to an older style of architecture.`
+	// Unrelated chunk 2: ghost_7.txt_778_0
+	unrelated2Raw := `I would just have to grovel and be ready to flee at any given moment.  
+The Hidden Curse clan house was in the central region of the imperial capital.  It was a high-class area with extraordinary property values that hosted the residences of people like Lord Gladis.  This district was near the Imperial Castle, though “near” was a 
+relative term as it was still a few kilometers away.  
+
+The clan house was made of brick and conformed to an older style of architecture.  Nearly everyone knew about this mansion and its clock tower.  It stood tall over the neighboring mansions and rumor had it that you could see the whole capital from the top.  It 
+spoke to this clan’s renown and history that they were able to get away with building something that dwarfed the mansions of the nobility.`
+
+	chunks := []*models.VectorRow{
+		{
+			Slug:       "kjv_bible.epub_1786_0",
+			RawText:    targetRaw,
+			FileName:   "kjv_bible.epub",
+			Embeddings: nil, // will be filled with zero vector later
+		},
+	}
+	// Add Leviticus noise chunks
+	for i, slug := range leviticusSlugs {
+		text := leviticusTexts[i%len(leviticusTexts)]
+		chunks = append(chunks, &models.VectorRow{
+			Slug:       slug,
+			RawText:    text,
+			FileName:   "kjv_bible.epub",
+			Embeddings: nil,
+		})
+	}
+	// Add unrelated chunks
+	chunks = append(chunks,
+		&models.VectorRow{
+			Slug:       "ghost_7.txt_777_0",
+			RawText:    unrelated1Raw,
+			FileName:   "ghost_7.txt",
+			Embeddings: nil,
+		},
+		&models.VectorRow{
+			Slug:       "ghost_7.txt_778_0",
+			RawText:    unrelated2Raw,
+			FileName:   "ghost_7.txt",
+			Embeddings: nil,
+		},
+	)
+	return chunks
+}
+func assertTargetInTopN(t *testing.T, results []models.VectorRow, topN int) bool {
+	t.Helper()
+	for i, row := range results {
+		if i >= topN {
+			break
+		}
+		if row.Slug == "kjv_bible.epub_1786_0" {
+			return true
+		}
+	}
+	return false
+}
+
+func TestBiblicalQuery(t *testing.T) {
+	chunks := createTestChunks()
+	rag, err := setupTestRAG(t, chunks)
+	if err != nil {
+		t.Fatalf("setup failed: %v", err)
+	}
+	query := "bald prophet and two she bears"
+	results, err := rag.Search(query, 10)
+	if err != nil {
+		t.Fatalf("search failed: %v", err)
+	}
+	// The target chunk should be in the top results.
+	if !assertTargetInTopN(t, results, 5) {
+		t.Errorf("target chunk not found in top 5 results for query %q", query)
+		t.Logf("results slugs: %v", func() []string {
+			slugs := make([]string, len(results))
+			for i, r := range results {
+				slugs[i] = r.Slug
+			}
+			return slugs
+		}())
+	}
+}
+
+func TestQueryVariations(t *testing.T) {
+	chunks := createTestChunks()
+	rag, err := setupTestRAG(t, chunks)
+	if err != nil {
+		t.Fatalf("setup failed: %v", err)
+	}
+	tests := []struct {
+		name  string
+		query string
+		topN  int
+	}{
+		{"she bears", "she bears", 5},
+		{"bald head", "bald head", 5},
+		{"two she bears out of the wood", "two she bears out of the wood", 5},
+		{"bald prophet", "bald prophet", 10},
+		{"go up thou bald head", "\"go up thou bald head\"", 5},
+		{"two she bears", "\"two she bears\"", 5},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			results, err := rag.Search(tt.query, 10)
+			if err != nil {
+				t.Fatalf("search failed: %v", err)
+			}
+			if !assertTargetInTopN(t, results, tt.topN) {
+				t.Errorf("target chunk not found in top %d results for query %q", tt.topN, tt.query)
+				t.Logf("results slugs: %v", func() []string {
+					slugs := make([]string, len(results))
+					for i, r := range results {
+						slugs[i] = r.Slug
+					}
+					return slugs
+				}())
+			}
+		})
+	}
+}
diff --git a/rag/rag_real_test.go b/rag/rag_real_test.go
new file mode 100644
index 0000000..87f6906
--- /dev/null
+++ b/rag/rag_real_test.go
@@ -0,0 +1,131 @@
+package rag
+
+import (
+	"gf-lt/config"
+	"gf-lt/storage"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestRealBiblicalQuery(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping real embedder test in short mode")
+	}
+	// Check if the embedder model exists
+	modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
+	if _, err := os.Stat(modelPath); os.IsNotExist(err) {
+		t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
+	}
+	tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
+	dbPath := filepath.Join("..", "gflt.db")
+	if _, err := os.Stat(dbPath); os.IsNotExist(err) {
+		t.Skipf("database not found at %s; skipping real embedder test", dbPath)
+	}
+	cfg := &config.Config{
+		EmbedModelPath:     modelPath,
+		EmbedTokenizerPath: tokenizerPath,
+		EmbedDims:          768,
+		RAGWordLimit:       250,
+		RAGOverlapWords:    25,
+		RAGBatchSize:       1,
+	}
+	logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
+	store := storage.NewProviderSQL(dbPath, logger)
+	if store == nil {
+		t.Fatal("failed to create storage provider")
+	}
+	rag, err := New(logger, store, cfg)
+	if err != nil {
+		t.Fatalf("failed to create RAG instance: %v", err)
+	}
+	t.Cleanup(func() { rag.Destroy() })
+
+	query := "bald prophet and two she bears"
+	results, err := rag.Search(query, 30)
+	if err != nil {
+		t.Fatalf("search failed: %v", err)
+	}
+	found := false
+	for i, row := range results {
+		if row.Slug == "kjv_bible.epub_1786_0" {
+			found = true
+			t.Logf("target chunk found at rank %d", i+1)
+			break
+		}
+	}
+	if !found {
+		t.Errorf("target chunk not found in search results for query %q", query)
+		t.Logf("results slugs:")
+		for i, r := range results {
+			t.Logf("%d: %s", i+1, r.Slug)
+		}
+	}
+}
+
+func TestRealQueryVariations(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping real embedder test in short mode")
+	}
+	modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
+	if _, err := os.Stat(modelPath); os.IsNotExist(err) {
+		t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
+	}
+	tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
+	dbPath := filepath.Join("..", "gflt.db")
+	if _, err := os.Stat(dbPath); os.IsNotExist(err) {
+		t.Skipf("database not found at %s; skipping real embedder test", dbPath)
+	}
+	cfg := &config.Config{
+		EmbedModelPath:     modelPath,
+		EmbedTokenizerPath: tokenizerPath,
+		EmbedDims:          768,
+		RAGWordLimit:       250,
+		RAGOverlapWords:    25,
+		RAGBatchSize:       1,
+	}
+	logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
+	store := storage.NewProviderSQL(dbPath, logger)
+	if store == nil {
+		t.Fatal("failed to create storage provider")
+	}
+	rag, err := New(logger, store, cfg)
+	if err != nil {
+		t.Fatalf("failed to create RAG instance: %v", err)
+	}
+	t.Cleanup(func() { rag.Destroy() })
+
+	tests := []struct {
+		name  string
+		query string
+	}{
+		{"she bears", "she bears"},
+		{"bald head", "bald head"},
+		{"two she bears out of the wood", "two she bears out of the wood"},
+		{"bald prophet", "bald prophet"},
+		{"go up thou bald head", "\"go up thou bald head\""},
+		{"two she bears", "\"two she bears\""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			results, err := rag.Search(tt.query, 10)
+			if err != nil {
+				t.Fatalf("search failed: %v", err)
+			}
+			found := false
+			for _, row := range results {
+				if row.Slug == "kjv_bible.epub_1786_0" {
+					found = true
+					break
+				}
+			}
+			if !found {
+				t.Errorf("target chunk not found for query %q", tt.query)
+				for i, r := range results {
+					t.Logf("%d: %s", i+1, r.Slug)
+				}
+			}
+		})
+	}
+}
diff --git a/rag/rag_test.go b/rag/rag_test.go
new file mode 100644
index 0000000..4944007
--- /dev/null
+++ b/rag/rag_test.go
@@ -0,0 +1,155 @@
+package rag
+
+import (
+	"testing"
+)
+
+func TestDetectPhrases(t *testing.T) {
+	tests := []struct {
+		query  string
+		expect []string
+	}{
+		{
+			query:  "bald prophet and two she bears",
+			expect: []string{"bald prophet", "two she", "two she bears", "she bears"},
+		},
+		{
+			query:  "she bears",
+			expect: []string{"she bears"},
+		},
+		{
+			query:  "the quick brown fox",
+			expect: []string{"quick brown", "quick brown fox", "brown fox"},
+		},
+		{
+			query:  "in the house", // stop words
+			expect: []string{},     // "in" and "the" are stop words
+		},
+		{
+			query:  "a", // short
+			expect: []string{},
+		},
+	}
+
+	for _, tt := range tests {
+		got := detectPhrases(tt.query)
+		if len(got) != len(tt.expect) {
+			t.Errorf("detectPhrases(%q) = %v, want %v", tt.query, got, tt.expect)
+			continue
+		}
+		for i := range got {
+			if got[i] != tt.expect[i] {
+				t.Errorf("detectPhrases(%q) = %v, want %v", tt.query, got, tt.expect)
+				break
+			}
+		}
+	}
+}
+
+func TestCountPhraseMatches(t *testing.T) {
+	tests := []struct {
+		text   string
+		query  string
+		expect int
+	}{
+		{
+			text:   "two she bears came out of the wood",
+			query:  "she bears",
+			expect: 1,
+		},
+		{
+			text:   "bald head and she bears",
+			query:  "bald prophet and two she bears",
+			expect: 1, // only "she bears" matches
+		},
+		{
+			text:   "no match here",
+			query:  "she bears",
+			expect: 0,
+		},
+		{
+			text:   "she bears and bald prophet",
+			query:  "bald prophet she bears",
+			expect: 2, // "she bears" and "bald prophet"
+		},
+	}
+
+	for _, tt := range tests {
+		got := countPhraseMatches(tt.text, tt.query)
+		if got != tt.expect {
+			t.Errorf("countPhraseMatches(%q, %q) = %d, want %d", tt.text, tt.query, got, tt.expect)
+		}
+	}
+}
+
+func TestAreSlugsAdjacent(t *testing.T) {
+	tests := []struct {
+		slug1  string
+		slug2  string
+		expect bool
+	}{
+		{
+			slug1:  "kjv_bible.epub_1786_0",
+			slug2:  "kjv_bible.epub_1787_0",
+			expect: true,
+		},
+		{
+			slug1:  "kjv_bible.epub_1787_0",
+			slug2:  "kjv_bible.epub_1786_0",
+			expect: true,
+		},
+		{
+			slug1:  "kjv_bible.epub_1786_0",
+			slug2:  "kjv_bible.epub_1788_0",
+			expect: false,
+		},
+		{
+			slug1:  "otherfile.txt_1_0",
+			slug2:  "kjv_bible.epub_1786_0",
+			expect: false,
+		},
+		{
+			slug1:  "file_1_0",
+			slug2:  "file_1_1",
+			expect: true,
+		},
+		{
+			slug1:  "file_1_0",
+			slug2:  "file_2_0", // different batch
+			expect: true,       // sequential batches with same chunk index are adjacent
+		},
+	}
+
+	for _, tt := range tests {
+		got := areSlugsAdjacent(tt.slug1, tt.slug2)
+		if got != tt.expect {
+			t.Errorf("areSlugsAdjacent(%q, %q) = %v, want %v", tt.slug1, tt.slug2, got, tt.expect)
+		}
+	}
+}
+
+func TestParseSlugIndices(t *testing.T) {
+	tests := []struct {
+		slug      string
+		wantBatch int
+		wantChunk int
+		wantOk    bool
+	}{
+		{"kjv_bible.epub_1786_0", 1786, 0, true},
+		{"file_1_5", 1, 5, true},
+		{"no_underscore", 0, 0, false},
+		{"file_abc_def", 0, 0, false},
+		{"file_123_456_extra", 456, 0, false}, // regex matches last two numbers
+	}
+
+	for _, tt := range tests {
+		batch, chunk, ok := parseSlugIndices(tt.slug)
+		if ok != tt.wantOk {
+			t.Errorf("parseSlugIndices(%q) ok = %v, want %v", tt.slug, ok, tt.wantOk)
+			continue
+		}
+		if ok && (batch != tt.wantBatch || chunk != tt.wantChunk) {
+			t.Errorf("parseSlugIndices(%q) = (%d, %d), want (%d, %d)", tt.slug, batch, chunk, tt.wantBatch, tt.wantChunk)
+		}
+	}
+}
diff --git a/rag/storage.go b/rag/storage.go
index 62477b6..a53f767 100644
--- a/rag/storage.go
+++ b/rag/storage.go
@@ -340,11 +340,9 @@ func (vs *VectorStorage) scanRows(rows *sql.Rows) ([]models.VectorRow, error) {
 			continue
 		}
 		// Convert BM25 score to distance-like metric (lower is better)
-		// BM25 is negative, more negative is better. We'll normalize to positive distance.
-		distance := float32(-score) // Make positive (since score is negative)
-		if distance < 0 {
-			distance = 0
-		}
+		// BM25 is negative, more negative is better. Keep as negative.
+		distance := float32(score) // Keep negative, more negative is better
+		// No clamping needed; negative distances are fine
 		results = append(results, models.VectorRow{
 			Slug:     slug,
 			RawText:  rawText,
-- 
cgit v1.2.3