summaryrefslogtreecommitdiff
path: root/rag/rag_integration_test.go
blob: f3405eba9521e8206aa43618fae471c72e7d9d46 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
package rag

import (
	"fmt"
	"gf-lt/config"
	"gf-lt/models"
	"gf-lt/storage"
	"log/slog"
	"testing"

	_ "github.com/glebarez/go-sqlite"
	"github.com/jmoiron/sqlx"
)

// mockEmbedder returns zero vectors of a fixed dimension.
type mockEmbedder struct {
	dim int
}

func (m *mockEmbedder) Embed(text string) ([]float32, error) {
	vec := make([]float32, m.dim)
	return vec, nil
}

func (m *mockEmbedder) EmbedSlice(texts []string) ([][]float32, error) {
	vecs := make([][]float32, len(texts))
	for i := range vecs {
		vecs[i] = make([]float32, m.dim)
	}
	return vecs, nil
}

// dummyStore implements storage.FullRepo with a minimal set of methods.
// Only DB() is used by VectorStorage; other methods return empty values.
type dummyStore struct {
	db *sqlx.DB
}

func (d dummyStore) DB() *sqlx.DB { return d.db }

// ChatHistory methods
func (d dummyStore) ListChats() ([]models.Chat, error)                     { return nil, nil }
func (d dummyStore) GetChatByID(id uint32) (*models.Chat, error)           { return nil, nil }
func (d dummyStore) GetChatByChar(char string) ([]models.Chat, error)      { return nil, nil }
func (d dummyStore) GetLastChat() (*models.Chat, error)                    { return nil, nil }
func (d dummyStore) GetLastChatByAgent(agent string) (*models.Chat, error) { return nil, nil }
func (d dummyStore) UpsertChat(chat *models.Chat) (*models.Chat, error)    { return chat, nil }
func (d dummyStore) RemoveChat(id uint32) error                            { return nil }
func (d dummyStore) ChatGetMaxID() (uint32, error)                         { return 0, nil }

// Memories methods
func (d dummyStore) Memorise(m *models.Memory) (*models.Memory, error) { return m, nil }
func (d dummyStore) Recall(agent, topic string) (string, error)        { return "", nil }
func (d dummyStore) RecallTopics(agent string) ([]string, error)       { return nil, nil }

// VectorRepo methods (not used but required by interface)
func (d dummyStore) WriteVector(row *models.VectorRow) error { return nil }
func (d dummyStore) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
	return nil, nil
}
func (d dummyStore) ListFiles() ([]string, error)              { return nil, nil }
func (d dummyStore) RemoveEmbByFileName(filename string) error { return nil }

var _ storage.FullRepo = dummyStore{}

// setupTestRAG creates an in‑memory SQLite database, creates the necessary tables,
// inserts the provided chunks, and returns a RAG instance with a mock embedder.
func setupTestRAG(t *testing.T, chunks []*models.VectorRow) (*RAG, error) {
	t.Helper()
	db, err := sqlx.Open("sqlite", ":memory:")
	if err != nil {
		return nil, fmt.Errorf("open in‑memory db: %w", err)
	}
	// Create the required tables (embeddings_768 and fts_embeddings).
	// Use the same schema as production.
	_, err = db.Exec(`
		CREATE TABLE embeddings_768 (
			id INTEGER PRIMARY KEY AUTOINCREMENT,
			embeddings BLOB NOT NULL,
			slug TEXT NOT NULL,
			raw_text TEXT NOT NULL,
			filename TEXT NOT NULL DEFAULT ''
		);
	`)
	if err != nil {
		return nil, fmt.Errorf("create embeddings table: %w", err)
	}
	_, err = db.Exec(`
		CREATE VIRTUAL TABLE fts_embeddings USING fts5(
			slug UNINDEXED,
			raw_text,
			filename UNINDEXED,
			embedding_size UNINDEXED,
			tokenize='porter unicode61'
		);
	`)
	if err != nil {
		return nil, fmt.Errorf("create FTS table: %w", err)
	}
	// Create a logger that discards output.
	logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
	store := dummyStore{db: db}
	// Create config with embedding dimension 768.
	cfg := &config.Config{
		EmbedDims:       768,
		RAGWordLimit:    250,
		RAGOverlapWords: 25,
		RAGBatchSize:    1,
	}
	// Create a RAG instance using New, which will create an embedder based on config.
	// We'll override the embedder afterwards via reflection.
	rag, err := New(logger, store, cfg)
	if err != nil {
		return nil, fmt.Errorf("create RAG: %w", err)
	}
	// Replace the embedder with our mock.
	rag.SetEmbedderForTesting(&mockEmbedder{dim: cfg.EmbedDims})
	// Insert the provided chunks using the storage directly.
	if len(chunks) > 0 {
		// Ensure each chunk has embeddings of correct dimension (zero vector).
		for _, chunk := range chunks {
			if len(chunk.Embeddings) != cfg.EmbedDims {
				chunk.Embeddings = make([]float32, cfg.EmbedDims)
			}
		}
		err = rag.storage.WriteVectors(chunks)
		if err != nil {
			return nil, fmt.Errorf("write test chunks: %w", err)
		}
	}
	return rag, nil
}

// createTestChunks returns a slice of VectorRow representing the target chunk
// (kjv_bible.epub_1786_0), several bald‑related noise chunks, and unrelated chunks.
func createTestChunks() []*models.VectorRow {
	// Target chunk: 2 Kings 2:23‑24 containing "bald head" and "two she bears".
	targetRaw := `And he said, Ye shall not send. 


2:17 And when they urged him till he was ashamed, he said, Send.  They sent
therefore fifty men; and they sought three days, but found him not. 


2:18 And when they came again to him, (for he tarried at Jericho,) he said unto
them, Did I not say unto you, Go not?  2:19 And the men of the city said unto
Elisha, Behold, I pray thee, the situation of this city is pleasant, as my lord
seeth: but the water is naught, and the ground barren. 


2:20 And he said, Bring me a new cruse, and put salt therein.  And they brought
it to him. 


2:21 And he went forth unto the spring of the waters, and cast the salt in
there, and said, Thus saith the LORD, I have healed these waters; there shall
not be from thence any more death or barren land. 


2:22 So the waters were healed unto this day, according to the saying of Elisha
which he spake. 


2:23 And he went up from thence unto Bethel: and as he was going up by the way,
there came forth little children out of the city, and mocked him, and said unto
him, Go up, thou bald head; go up, thou bald head. 


2:24 And he turned back, and looked on them, and cursed them in the name of the
LORD.  And there came forth two she bears out of the wood, and tare forty and
two children of them.`
	// Noise chunk 1: Leviticus containing "bald locust"
	noise1Raw := `11:12 Whatsoever hath no fins nor scales in the waters, that shall be an
abomination unto you. 


11:13 And these are they which ye shall have in abomination among the fowls;
they shall not be eaten, they are an abomination: the eagle, and the ossifrage,
and the ospray, 11:14 And the vulture, and the kite after his kind; 11:15 Every
raven after his kind; 11:16 And the owl, and the night hawk, and the cuckow,
and the hawk after his kind, 11:17 And the little owl, and the cormorant, and
the great owl, 11:18 And the swan, and the pelican, and the gier eagle, 11:19
And the stork, the heron after her kind, and the lapwing, and the bat. 


11:20 All fowls that creep, going upon all four, shall be an abomination unto
you. 


11:21 Yet these may ye eat of every flying creeping thing that goeth upon all
four, which have legs above their feet, to leap withal upon the earth; 11:22
Even these of them ye may eat; the locust after his kind, and the bald locust
after his kind, and the beetle after his kind, and the grasshopper after his
kind. 


11:23 But all other flying creeping things, which have four feet, shall be an
abomination unto you. 


11:24 And for these ye shall be unclean: whosoever toucheth the carcase of them
shall be unclean until the even.`
	// Noise chunk 2: Leviticus containing "bald"
	noise2Raw := `11:13 And these are they which ye shall have in abomination among the fowls;
they shall not be eaten, they are an abomination: the eagle, and the ossifrage,
and the ospray, 11:14 And the vulture, and the kite after his kind; 11:15 Every
raven after his kind; 11:16 And the owl, and the night hawk, and the cuckow,
and the hawk after his kind, 11:17 And the little owl, and the cormorant, and
the great owl, 11:18 And the swan, and the pelican, and the gier eagle, 11:19
And the stork, the heron after her kind, and the lapwing, and the bat. 


11:20 All fowls that creep, going upon all four, shall be an abomination unto
you. 


11:21 Yet these may ye eat of every flying creeping thing that goeth upon all
four, which have legs above their feet, to leap withal upon the earth; 11:22
Even these of them ye may eat; the locust after his kind, and the bald locust
after his kind, and the beetle after his kind, and the grasshopper after his
kind. 


11:23 But all other flying creeping things, which have four feet, shall be an
abomination unto you. 


11:24 And for these ye shall be unclean: whosoever toucheth the carcase of them
shall be unclean until the even.`
	// Additional Leviticus noise chunks (simulating 28 bald-related chunks)
	// Using variations of the same text with different slugs
	leviticusSlugs := []string{
		"kjv_bible.epub_564_0",
		"kjv_bible.epub_565_0",
		"kjv_bible.epub_579_0",
		"kjv_bible.epub_580_0",
		"kjv_bible.epub_581_0",
		"kjv_bible.epub_582_0",
		"kjv_bible.epub_583_0",
		"kjv_bible.epub_584_0",
		"kjv_bible.epub_585_0",
		"kjv_bible.epub_586_0",
		"kjv_bible.epub_587_0",
		"kjv_bible.epub_588_0",
		"kjv_bible.epub_589_0",
		"kjv_bible.epub_590_0",
	}
	leviticusTexts := []string{
		noise1Raw,
		noise2Raw,
		`13:40 And the man whose hair is fallen off his head, he is bald; yet is he
clean. 


13:41 And he that hath his hair fallen off from the part of his head toward his
face, he is forehead bald; yet is he clean.`,
		`13:42 And if there be in the bald head, or bald forehead, a white reddish sore;
it is a leprosy sprung up in his bald head, or his bald forehead.`,
		`13:43 Then the priest shall look upon it: and, behold, if the rising of the
sore be white reddish in his bald head, or in his bald forehead, as the leprosy
appearedh in the skin of the flesh;`,
		`13:44 He is a leprous man, he is unclean: the priest shall pronounce him utterly
unclean; his plague is in his head.`,
		`13:45 And the leper in whom the plague is, his clothes shall be rent, and his
head bare, and he shall put a covering upon his upper lip, and shall cry,
Unclean, unclean.`,
		`13:46 All the days wherein the plague shall be in him he shall be defiled; he
is unclean: he shall dwell alone; without the camp shall his habitation be.`,
		`13:47 The garment also that the plague of leprosy is in, whether it be a woollen
garment, or a linen garment;`,
		`13:48 Whether it be in the warp, or woof; of linen, or of woollen; whether in a
skin, or in any thing made of skin;`,
		`13:49 And if the plague be greenish or reddish in the garment, or in the skin,
either in the warp, or in the woof, or in any thing of skin; it is a plague of
leprosy, and shall be shewed unto the priest:`,
		`13:50 And the priest shall look upon the plague, and shut up it that hath the
plague seven days:`,
		`13:51 And he shall look on the plague on the seventh day: if the plague be spread
in the garment, either in the warp, or in the woof, or in a skin, or in any work
that is made of skin; the plague is a fretting leprosy; it is unclean.`,
		`13:52 He shall therefore burn that garment, whether warp or woof, in woollen or
in linen, or any thing of skin, wherein the plague is: for it is a fretting
leprosy; it shall be burnt in the fire.`,
	}
	// Unrelated chunk 1: ghost_7.txt_777_0
	unrelated1Raw := `Doesn’t he have any pride as a hunter?!  

I didn’t see what other choice I had.  I would just have to grovel and be ready to flee at any given moment.  
The Hidden Curse clan house was in the central region of the imperial capital.  It was a high-class area with extraordinary property values that hosted the residences of people like Lord Gladis.  This district was near the Imperial Castle, though “near” was a 
relative term as it was still a few kilometers away.  

The clan house was made of brick and conformed to an older style of architecture.`
	// Unrelated chunk 2: ghost_7.txt_778_0
	unrelated2Raw := `I would just have to grovel and be ready to flee at any given moment.  
The Hidden Curse clan house was in the central region of the imperial capital.  It was a high-class area with extraordinary property values that hosted the residences of people like Lord Gladis.  This district was near the Imperial Castle, though “near” was a 
relative term as it was still a few kilometers away.  

The clan house was made of brick and conformed to an older style of architecture.  Nearly everyone knew about this mansion and its clock tower.  It stood tall over the neighboring mansions and rumor had it that you could see the whole capital from the top.  It 
spoke to this clan’s renown and history that they were able to get away with building something that dwarfed the mansions of the nobility.`

	chunks := []*models.VectorRow{
		{
			Slug:       "kjv_bible.epub_1786_0",
			RawText:    targetRaw,
			FileName:   "kjv_bible.epub",
			Embeddings: nil, // will be filled with zero vector later
		},
	}
	// Add Leviticus noise chunks
	for i, slug := range leviticusSlugs {
		text := leviticusTexts[i%len(leviticusTexts)]
		chunks = append(chunks, &models.VectorRow{
			Slug:       slug,
			RawText:    text,
			FileName:   "kjv_bible.epub",
			Embeddings: nil,
		})
	}
	// Add unrelated chunks
	chunks = append(chunks,
		&models.VectorRow{
			Slug:       "ghost_7.txt_777_0",
			RawText:    unrelated1Raw,
			FileName:   "ghost_7.txt",
			Embeddings: nil,
		},
		&models.VectorRow{
			Slug:       "ghost_7.txt_778_0",
			RawText:    unrelated2Raw,
			FileName:   "ghost_7.txt",
			Embeddings: nil,
		},
	)
	return chunks
}
func assertTargetInTopN(t *testing.T, results []models.VectorRow, topN int) bool {
	t.Helper()
	for i, row := range results {
		if i >= topN {
			break
		}
		if row.Slug == "kjv_bible.epub_1786_0" {
			return true
		}
	}
	return false
}

func TestBiblicalQuery(t *testing.T) {
	chunks := createTestChunks()
	rag, err := setupTestRAG(t, chunks)
	if err != nil {
		t.Fatalf("setup failed: %v", err)
	}
	query := "bald prophet and two she bears"
	results, err := rag.Search(query, 10)
	if err != nil {
		t.Fatalf("search failed: %v", err)
	}
	// The target chunk should be in the top results.
	if !assertTargetInTopN(t, results, 5) {
		t.Errorf("target chunk not found in top 5 results for query %q", query)
		t.Logf("results slugs: %v", func() []string {
			slugs := make([]string, len(results))
			for i, r := range results {
				slugs[i] = r.Slug
			}
			return slugs
		}())
	}
}

func TestQueryVariations(t *testing.T) {
	chunks := createTestChunks()
	rag, err := setupTestRAG(t, chunks)
	if err != nil {
		t.Fatalf("setup failed: %v", err)
	}
	tests := []struct {
		name  string
		query string
		topN  int
	}{
		{"she bears", "she bears", 5},
		{"bald head", "bald head", 5},
		{"two she bears out of the wood", "two she bears out of the wood", 5},
		{"bald prophet", "bald prophet", 10},
		{"go up thou bald head", "\"go up thou bald head\"", 5},
		{"two she bears", "\"two she bears\"", 5},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			results, err := rag.Search(tt.query, 10)
			if err != nil {
				t.Fatalf("search failed: %v", err)
			}
			if !assertTargetInTopN(t, results, tt.topN) {
				t.Errorf("target chunk not found in top %d results for query %q", tt.topN, tt.query)
				t.Logf("results slugs: %v", func() []string {
					slugs := make([]string, len(results))
					for i, r := range results {
						slugs[i] = r.Slug
					}
					return slugs
				}())
			}
		})
	}
}