summaryrefslogtreecommitdiff
path: root/rag/rag_real_test.go
blob: 87f69061253ae9d51f5dade1b12ca0b60a6978a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
package rag

import (
	"gf-lt/config"
	"gf-lt/storage"
	"log/slog"
	"os"
	"path/filepath"
	"testing"
)

func TestRealBiblicalQuery(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping real embedder test in short mode")
	}
	// Check if the embedder model exists
	modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
	if _, err := os.Stat(modelPath); os.IsNotExist(err) {
		t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
	}
	tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
	dbPath := filepath.Join("..", "gflt.db")
	if _, err := os.Stat(dbPath); os.IsNotExist(err) {
		t.Skipf("database not found at %s; skipping real embedder test", dbPath)
	}
	cfg := &config.Config{
		EmbedModelPath:     modelPath,
		EmbedTokenizerPath: tokenizerPath,
		EmbedDims:          768,
		RAGWordLimit:       250,
		RAGOverlapWords:    25,
		RAGBatchSize:       1,
	}
	logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
	store := storage.NewProviderSQL(dbPath, logger)
	if store == nil {
		t.Fatal("failed to create storage provider")
	}
	rag, err := New(logger, store, cfg)
	if err != nil {
		t.Fatalf("failed to create RAG instance: %v", err)
	}
	t.Cleanup(func() { rag.Destroy() })

	query := "bald prophet and two she bears"
	results, err := rag.Search(query, 30)
	if err != nil {
		t.Fatalf("search failed: %v", err)
	}
	found := false
	for i, row := range results {
		if row.Slug == "kjv_bible.epub_1786_0" {
			found = true
			t.Logf("target chunk found at rank %d", i+1)
			break
		}
	}
	if !found {
		t.Errorf("target chunk not found in search results for query %q", query)
		t.Logf("results slugs:")
		for i, r := range results {
			t.Logf("%d: %s", i+1, r.Slug)
		}
	}
}

func TestRealQueryVariations(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping real embedder test in short mode")
	}
	modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
	if _, err := os.Stat(modelPath); os.IsNotExist(err) {
		t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
	}
	tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
	dbPath := filepath.Join("..", "gflt.db")
	if _, err := os.Stat(dbPath); os.IsNotExist(err) {
		t.Skipf("database not found at %s; skipping real embedder test", dbPath)
	}
	cfg := &config.Config{
		EmbedModelPath:     modelPath,
		EmbedTokenizerPath: tokenizerPath,
		EmbedDims:          768,
		RAGWordLimit:       250,
		RAGOverlapWords:    25,
		RAGBatchSize:       1,
	}
	logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
	store := storage.NewProviderSQL(dbPath, logger)
	if store == nil {
		t.Fatal("failed to create storage provider")
	}
	rag, err := New(logger, store, cfg)
	if err != nil {
		t.Fatalf("failed to create RAG instance: %v", err)
	}
	t.Cleanup(func() { rag.Destroy() })

	tests := []struct {
		name  string
		query string
	}{
		{"she bears", "she bears"},
		{"bald head", "bald head"},
		{"two she bears out of the wood", "two she bears out of the wood"},
		{"bald prophet", "bald prophet"},
		{"go up thou bald head", "\"go up thou bald head\""},
		{"two she bears", "\"two she bears\""},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			results, err := rag.Search(tt.query, 10)
			if err != nil {
				t.Fatalf("search failed: %v", err)
			}
			found := false
			for _, row := range results {
				if row.Slug == "kjv_bible.epub_1786_0" {
					found = true
					break
				}
			}
			if !found {
				t.Errorf("target chunk not found for query %q", tt.query)
				for i, r := range results {
					t.Logf("%d: %s", i+1, r.Slug)
				}
			}
		})
	}
}