summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2026-03-06 11:20:50 +0300
committerGrail Finder <wohilas@gmail.com>2026-03-06 11:20:50 +0300
commitf9866bcf5a7369e28246d51b951e81b5b2a8489f (patch)
treec09c3f4b0588a39735f19c61cf386195a1797604 /storage
parent822cc48834f5f1908f619b5441ae40946aceb86d (diff)
Feat (rag): hybrid search attempt
Diffstat (limited to 'storage')
-rw-r--r--storage/migrations/003_add_fts.down.sql2
-rw-r--r--storage/migrations/003_add_fts.up.sql15
-rw-r--r--storage/migrations/004_populate_fts.down.sql2
-rw-r--r--storage/migrations/004_populate_fts.up.sql26
-rw-r--r--storage/vector.go38
5 files changed, 60 insertions, 23 deletions
diff --git a/storage/migrations/003_add_fts.down.sql b/storage/migrations/003_add_fts.down.sql
new file mode 100644
index 0000000..e565fd5
--- /dev/null
+++ b/storage/migrations/003_add_fts.down.sql
@@ -0,0 +1,2 @@
+-- Drop FTS5 virtual table
+DROP TABLE IF EXISTS fts_embeddings; \ No newline at end of file
diff --git a/storage/migrations/003_add_fts.up.sql b/storage/migrations/003_add_fts.up.sql
new file mode 100644
index 0000000..114586a
--- /dev/null
+++ b/storage/migrations/003_add_fts.up.sql
@@ -0,0 +1,15 @@
+-- Create FTS5 virtual table for full-text search
+CREATE VIRTUAL TABLE IF NOT EXISTS fts_embeddings USING fts5(
+ slug UNINDEXED,
+ raw_text,
+ filename UNINDEXED,
+ embedding_size UNINDEXED,
+ tokenize='porter unicode61' -- Use porter stemmer and unicode61 tokenizer
+);
+
+-- Create triggers to maintain FTS table when embeddings are inserted/deleted
+-- Note: We'll handle inserts/deletes programmatically for simplicity
+-- but triggers could be added here if needed.
+
+-- Indexes for performance (FTS5 manages its own indexes)
+-- No additional indexes needed for FTS5 virtual table. \ No newline at end of file
diff --git a/storage/migrations/004_populate_fts.down.sql b/storage/migrations/004_populate_fts.down.sql
new file mode 100644
index 0000000..2b5c756
--- /dev/null
+++ b/storage/migrations/004_populate_fts.down.sql
@@ -0,0 +1,2 @@
+-- Clear FTS table (optional)
+DELETE FROM fts_embeddings; \ No newline at end of file
diff --git a/storage/migrations/004_populate_fts.up.sql b/storage/migrations/004_populate_fts.up.sql
new file mode 100644
index 0000000..1d1b16a
--- /dev/null
+++ b/storage/migrations/004_populate_fts.up.sql
@@ -0,0 +1,26 @@
+-- Populate FTS table with existing embeddings
+DELETE FROM fts_embeddings;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 384 FROM embeddings_384;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 768 FROM embeddings_768;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 1024 FROM embeddings_1024;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 1536 FROM embeddings_1536;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 2048 FROM embeddings_2048;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 3072 FROM embeddings_3072;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 4096 FROM embeddings_4096;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 5120 FROM embeddings_5120; \ No newline at end of file
diff --git a/storage/vector.go b/storage/vector.go
index 75f5c9a..e3bbb89 100644
--- a/storage/vector.go
+++ b/storage/vector.go
@@ -4,6 +4,7 @@ import (
"encoding/binary"
"fmt"
"gf-lt/models"
+ "sort"
"unsafe"
"github.com/jmoiron/sqlx"
@@ -11,7 +12,7 @@ import (
type VectorRepo interface {
WriteVector(*models.VectorRow) error
- SearchClosest(q []float32) ([]models.VectorRow, error)
+ SearchClosest(q []float32, limit int) ([]models.VectorRow, error)
ListFiles() ([]string, error)
RemoveEmbByFileName(filename string) error
DB() *sqlx.DB
@@ -79,7 +80,7 @@ func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
return err
}
-func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
+func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
tableName, err := fetchTableName(q)
if err != nil {
return nil, err
@@ -94,7 +95,7 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
vector models.VectorRow
distance float32
}
- var topResults []SearchResult
+ var allResults []SearchResult
for rows.Next() {
var (
embeddingsBlob []byte
@@ -119,28 +120,19 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
},
distance: distance,
}
-
- // Add to top results and maintain only top results
- topResults = append(topResults, result)
-
- // Sort and keep only top results
- // We'll keep the top 3 closest vectors
- if len(topResults) > 3 {
- // Simple sort and truncate to maintain only 3 best matches
- for i := 0; i < len(topResults); i++ {
- for j := i + 1; j < len(topResults); j++ {
- if topResults[i].distance > topResults[j].distance {
- topResults[i], topResults[j] = topResults[j], topResults[i]
- }
- }
- }
- topResults = topResults[:3]
- }
+ allResults = append(allResults, result)
+ }
+ // Sort by distance
+ sort.Slice(allResults, func(i, j int) bool {
+ return allResults[i].distance < allResults[j].distance
+ })
+ // Truncate to limit
+ if len(allResults) > limit {
+ allResults = allResults[:limit]
}
-
// Convert back to VectorRow slice
- results := make([]models.VectorRow, len(topResults))
- for i, result := range topResults {
+ results := make([]models.VectorRow, len(allResults))
+ for i, result := range allResults {
result.vector.Distance = result.distance
results[i] = result.vector
}