From f9866bcf5a7369e28246d51b951e81b5b2a8489f Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Fri, 6 Mar 2026 11:20:50 +0300 Subject: Feat (rag): hybrid search attempt --- storage/migrations/003_add_fts.down.sql | 2 ++ storage/migrations/003_add_fts.up.sql | 15 +++++++++++++++ storage/migrations/004_populate_fts.down.sql | 2 ++ storage/migrations/004_populate_fts.up.sql | 26 ++++++++++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 storage/migrations/003_add_fts.down.sql create mode 100644 storage/migrations/003_add_fts.up.sql create mode 100644 storage/migrations/004_populate_fts.down.sql create mode 100644 storage/migrations/004_populate_fts.up.sql (limited to 'storage/migrations') diff --git a/storage/migrations/003_add_fts.down.sql b/storage/migrations/003_add_fts.down.sql new file mode 100644 index 0000000..e565fd5 --- /dev/null +++ b/storage/migrations/003_add_fts.down.sql @@ -0,0 +1,2 @@ +-- Drop FTS5 virtual table +DROP TABLE IF EXISTS fts_embeddings; \ No newline at end of file diff --git a/storage/migrations/003_add_fts.up.sql b/storage/migrations/003_add_fts.up.sql new file mode 100644 index 0000000..114586a --- /dev/null +++ b/storage/migrations/003_add_fts.up.sql @@ -0,0 +1,15 @@ +-- Create FTS5 virtual table for full-text search +CREATE VIRTUAL TABLE IF NOT EXISTS fts_embeddings USING fts5( + slug UNINDEXED, + raw_text, + filename UNINDEXED, + embedding_size UNINDEXED, + tokenize='porter unicode61' -- Use porter stemmer and unicode61 tokenizer +); + +-- Create triggers to maintain FTS table when embeddings are inserted/deleted +-- Note: We'll handle inserts/deletes programmatically for simplicity +-- but triggers could be added here if needed. + +-- Indexes for performance (FTS5 manages its own indexes) +-- No additional indexes needed for FTS5 virtual table. \ No newline at end of file diff --git a/storage/migrations/004_populate_fts.down.sql b/storage/migrations/004_populate_fts.down.sql new file mode 100644 index 0000000..2b5c756 --- /dev/null +++ b/storage/migrations/004_populate_fts.down.sql @@ -0,0 +1,2 @@ +-- Clear FTS table (optional) +DELETE FROM fts_embeddings; \ No newline at end of file diff --git a/storage/migrations/004_populate_fts.up.sql b/storage/migrations/004_populate_fts.up.sql new file mode 100644 index 0000000..1d1b16a --- /dev/null +++ b/storage/migrations/004_populate_fts.up.sql @@ -0,0 +1,26 @@ +-- Populate FTS table with existing embeddings +DELETE FROM fts_embeddings; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 384 FROM embeddings_384; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 768 FROM embeddings_768; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 1024 FROM embeddings_1024; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 1536 FROM embeddings_1536; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 2048 FROM embeddings_2048; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 3072 FROM embeddings_3072; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 4096 FROM embeddings_4096; + +INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size) +SELECT slug, raw_text, filename, 5120 FROM embeddings_5120; \ No newline at end of file -- cgit v1.2.3