diff options
Diffstat (limited to 'rag/rag_real_test.go')
| -rw-r--r-- | rag/rag_real_test.go | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/rag/rag_real_test.go b/rag/rag_real_test.go new file mode 100644 index 0000000..87f6906 --- /dev/null +++ b/rag/rag_real_test.go @@ -0,0 +1,131 @@ +package rag + +import ( + "gf-lt/config" + "gf-lt/storage" + "log/slog" + "os" + "path/filepath" + "testing" +) + +func TestRealBiblicalQuery(t *testing.T) { + if testing.Short() { + t.Skip("skipping real embedder test in short mode") + } + // Check if the embedder model exists + modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx") + if _, err := os.Stat(modelPath); os.IsNotExist(err) { + t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath) + } + tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json") + dbPath := filepath.Join("..", "gflt.db") + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + t.Skipf("database not found at %s; skipping real embedder test", dbPath) + } + cfg := &config.Config{ + EmbedModelPath: modelPath, + EmbedTokenizerPath: tokenizerPath, + EmbedDims: 768, + RAGWordLimit: 250, + RAGOverlapWords: 25, + RAGBatchSize: 1, + } + logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError})) + store := storage.NewProviderSQL(dbPath, logger) + if store == nil { + t.Fatal("failed to create storage provider") + } + rag, err := New(logger, store, cfg) + if err != nil { + t.Fatalf("failed to create RAG instance: %v", err) + } + t.Cleanup(func() { rag.Destroy() }) + + query := "bald prophet and two she bears" + results, err := rag.Search(query, 30) + if err != nil { + t.Fatalf("search failed: %v", err) + } + found := false + for i, row := range results { + if row.Slug == "kjv_bible.epub_1786_0" { + found = true + t.Logf("target chunk found at rank %d", i+1) + break + } + } + if !found { + t.Errorf("target chunk not found in search results for query %q", query) + t.Logf("results slugs:") + for i, r := range results { + t.Logf("%d: %s", i+1, r.Slug) + } + } +} + +func TestRealQueryVariations(t *testing.T) { + if testing.Short() { + t.Skip("skipping real embedder test in short mode") + } + modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx") + if _, err := os.Stat(modelPath); os.IsNotExist(err) { + t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath) + } + tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json") + dbPath := filepath.Join("..", "gflt.db") + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + t.Skipf("database not found at %s; skipping real embedder test", dbPath) + } + cfg := &config.Config{ + EmbedModelPath: modelPath, + EmbedTokenizerPath: tokenizerPath, + EmbedDims: 768, + RAGWordLimit: 250, + RAGOverlapWords: 25, + RAGBatchSize: 1, + } + logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError})) + store := storage.NewProviderSQL(dbPath, logger) + if store == nil { + t.Fatal("failed to create storage provider") + } + rag, err := New(logger, store, cfg) + if err != nil { + t.Fatalf("failed to create RAG instance: %v", err) + } + t.Cleanup(func() { rag.Destroy() }) + + tests := []struct { + name string + query string + }{ + {"she bears", "she bears"}, + {"bald head", "bald head"}, + {"two she bears out of the wood", "two she bears out of the wood"}, + {"bald prophet", "bald prophet"}, + {"go up thou bald head", "\"go up thou bald head\""}, + {"two she bears", "\"two she bears\""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results, err := rag.Search(tt.query, 10) + if err != nil { + t.Fatalf("search failed: %v", err) + } + found := false + for _, row := range results { + if row.Slug == "kjv_bible.epub_1786_0" { + found = true + break + } + } + if !found { + t.Errorf("target chunk not found for query %q", tt.query) + for i, r := range results { + t.Logf("%d: %s", i+1, r.Slug) + } + } + }) + } +} |
