summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2026-03-09 07:07:36 +0300
committerGrail Finder <wohilas@gmail.com>2026-03-09 07:07:36 +0300
commit0e42a6f069ceea40485162c014c04cf718568cfe (patch)
tree583a6a6cb91b315e506990a03fdda1b32d0fe985 /storage
parent2687f38d00ceaa4f61034e3e02b9b59d08efc017 (diff)
parenta1b5f9cdc59938901123650fc0900067ac3447ca (diff)
Merge branch 'master' into feat/agent-flow
Diffstat (limited to 'storage')
-rw-r--r--storage/migrations/003_add_fts.down.sql2
-rw-r--r--storage/migrations/003_add_fts.up.sql15
-rw-r--r--storage/migrations/004_populate_fts.down.sql2
-rw-r--r--storage/migrations/004_populate_fts.up.sql26
-rw-r--r--storage/storage.go16
-rw-r--r--storage/vector.go38
6 files changed, 76 insertions, 23 deletions
diff --git a/storage/migrations/003_add_fts.down.sql b/storage/migrations/003_add_fts.down.sql
new file mode 100644
index 0000000..e565fd5
--- /dev/null
+++ b/storage/migrations/003_add_fts.down.sql
@@ -0,0 +1,2 @@
+-- Drop FTS5 virtual table
+DROP TABLE IF EXISTS fts_embeddings; \ No newline at end of file
diff --git a/storage/migrations/003_add_fts.up.sql b/storage/migrations/003_add_fts.up.sql
new file mode 100644
index 0000000..114586a
--- /dev/null
+++ b/storage/migrations/003_add_fts.up.sql
@@ -0,0 +1,15 @@
+-- Create FTS5 virtual table for full-text search
+CREATE VIRTUAL TABLE IF NOT EXISTS fts_embeddings USING fts5(
+ slug UNINDEXED,
+ raw_text,
+ filename UNINDEXED,
+ embedding_size UNINDEXED,
+ tokenize='porter unicode61' -- Use porter stemmer and unicode61 tokenizer
+);
+
+-- Create triggers to maintain FTS table when embeddings are inserted/deleted
+-- Note: We'll handle inserts/deletes programmatically for simplicity
+-- but triggers could be added here if needed.
+
+-- Indexes for performance (FTS5 manages its own indexes)
+-- No additional indexes needed for FTS5 virtual table. \ No newline at end of file
diff --git a/storage/migrations/004_populate_fts.down.sql b/storage/migrations/004_populate_fts.down.sql
new file mode 100644
index 0000000..2b5c756
--- /dev/null
+++ b/storage/migrations/004_populate_fts.down.sql
@@ -0,0 +1,2 @@
+-- Clear FTS table (optional)
+DELETE FROM fts_embeddings; \ No newline at end of file
diff --git a/storage/migrations/004_populate_fts.up.sql b/storage/migrations/004_populate_fts.up.sql
new file mode 100644
index 0000000..1d1b16a
--- /dev/null
+++ b/storage/migrations/004_populate_fts.up.sql
@@ -0,0 +1,26 @@
+-- Populate FTS table with existing embeddings
+DELETE FROM fts_embeddings;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 384 FROM embeddings_384;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 768 FROM embeddings_768;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 1024 FROM embeddings_1024;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 1536 FROM embeddings_1536;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 2048 FROM embeddings_2048;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 3072 FROM embeddings_3072;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 4096 FROM embeddings_4096;
+
+INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
+SELECT slug, raw_text, filename, 5120 FROM embeddings_5120; \ No newline at end of file
diff --git a/storage/storage.go b/storage/storage.go
index 9ad9745..57631da 100644
--- a/storage/storage.go
+++ b/storage/storage.go
@@ -102,6 +102,22 @@ func NewProviderSQL(dbPath string, logger *slog.Logger) FullRepo {
logger.Error("failed to open db connection", "error", err)
return nil
}
+ // Enable WAL mode for better concurrency and performance
+ if _, err := db.Exec("PRAGMA journal_mode = WAL;"); err != nil {
+ logger.Warn("failed to enable WAL mode", "error", err)
+ }
+ if _, err := db.Exec("PRAGMA synchronous = NORMAL;"); err != nil {
+ logger.Warn("failed to set synchronous mode", "error", err)
+ }
+ // Increase cache size for better performance
+ if _, err := db.Exec("PRAGMA cache_size = -2000;"); err != nil {
+ logger.Warn("failed to set cache size", "error", err)
+ }
+ // Log actual journal mode for debugging
+ var journalMode string
+ if err := db.QueryRow("PRAGMA journal_mode;").Scan(&journalMode); err == nil {
+ logger.Debug("SQLite journal mode", "mode", journalMode)
+ }
p := ProviderSQL{db: db, logger: logger}
if err := p.Migrate(); err != nil {
logger.Error("migration failed, app cannot start", "error", err)
diff --git a/storage/vector.go b/storage/vector.go
index 75f5c9a..e3bbb89 100644
--- a/storage/vector.go
+++ b/storage/vector.go
@@ -4,6 +4,7 @@ import (
"encoding/binary"
"fmt"
"gf-lt/models"
+ "sort"
"unsafe"
"github.com/jmoiron/sqlx"
@@ -11,7 +12,7 @@ import (
type VectorRepo interface {
WriteVector(*models.VectorRow) error
- SearchClosest(q []float32) ([]models.VectorRow, error)
+ SearchClosest(q []float32, limit int) ([]models.VectorRow, error)
ListFiles() ([]string, error)
RemoveEmbByFileName(filename string) error
DB() *sqlx.DB
@@ -79,7 +80,7 @@ func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
return err
}
-func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
+func (p ProviderSQL) SearchClosest(q []float32, limit int) ([]models.VectorRow, error) {
tableName, err := fetchTableName(q)
if err != nil {
return nil, err
@@ -94,7 +95,7 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
vector models.VectorRow
distance float32
}
- var topResults []SearchResult
+ var allResults []SearchResult
for rows.Next() {
var (
embeddingsBlob []byte
@@ -119,28 +120,19 @@ func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
},
distance: distance,
}
-
- // Add to top results and maintain only top results
- topResults = append(topResults, result)
-
- // Sort and keep only top results
- // We'll keep the top 3 closest vectors
- if len(topResults) > 3 {
- // Simple sort and truncate to maintain only 3 best matches
- for i := 0; i < len(topResults); i++ {
- for j := i + 1; j < len(topResults); j++ {
- if topResults[i].distance > topResults[j].distance {
- topResults[i], topResults[j] = topResults[j], topResults[i]
- }
- }
- }
- topResults = topResults[:3]
- }
+ allResults = append(allResults, result)
+ }
+ // Sort by distance
+ sort.Slice(allResults, func(i, j int) bool {
+ return allResults[i].distance < allResults[j].distance
+ })
+ // Truncate to limit
+ if len(allResults) > limit {
+ allResults = allResults[:limit]
}
-
// Convert back to VectorRow slice
- results := make([]models.VectorRow, len(topResults))
- for i, result := range topResults {
+ results := make([]models.VectorRow, len(allResults))
+ for i, result := range allResults {
result.vector.Distance = result.distance
results[i] = result.vector
}