diff options
| -rw-r--r-- | rag/rag.go | 5 | ||||
| -rw-r--r-- | rag/storage.go | 39 | ||||
| -rw-r--r-- | storage/migrations/002_add_vector.down.sql | 24 | ||||
| -rw-r--r-- | storage/migrations/002_add_vector.up.sql | 72 | ||||
| -rw-r--r-- | storage/vector.go | 37 |
5 files changed, 123 insertions, 54 deletions
@@ -43,10 +43,7 @@ func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) *RAG { storage: NewVectorStorage(l, s), } - // Create the necessary tables - if err := rag.storage.CreateTables(); err != nil { - l.Error("failed to create vector tables", "error", err) - } + // Note: Vector tables are created via database migrations, not at runtime return rag } diff --git a/rag/storage.go b/rag/storage.go index bb3a24a..782c504 100644 --- a/rag/storage.go +++ b/rag/storage.go @@ -28,45 +28,6 @@ func NewVectorStorage(logger *slog.Logger, store storage.FullRepo) *VectorStorag } } -// CreateTables creates the necessary tables for vector storage -func (vs *VectorStorage) CreateTables() error { - // Create tables for common embedding dimensions - embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120} - // Pre-allocate queries slice: each embedding size needs 1 table + 3 indexes = 4 queries per size - queries := make([]string, 0, len(embeddingSizes)*4) - - // Generate table creation queries for each embedding size - for _, size := range embeddingSizes { - tableName := fmt.Sprintf("embeddings_%d", size) - queries = append(queries, - fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - embeddings BLOB NOT NULL, - slug TEXT NOT NULL, - raw_text TEXT NOT NULL, - filename TEXT NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - )`, tableName), - ) - } - - // Add indexes for all supported sizes - for _, size := range embeddingSizes { - tableName := fmt.Sprintf("embeddings_%d", size) - queries = append(queries, - fmt.Sprintf(`CREATE INDEX IF NOT EXISTS idx_%s_filename ON %s(filename)`, tableName, tableName), - fmt.Sprintf(`CREATE INDEX IF NOT EXISTS idx_%s_slug ON %s(slug)`, tableName, tableName), - fmt.Sprintf(`CREATE INDEX IF NOT EXISTS idx_%s_created_at ON %s(created_at)`, tableName, tableName), - ) - } - - for _, query := range queries { - if _, err := vs.sqlxDB.Exec(query); err != nil { - return fmt.Errorf("failed to create table: %w", err) - } - } - return nil -} // SerializeVector converts []float32 to binary blob func SerializeVector(vec []float32) []byte { diff --git a/storage/migrations/002_add_vector.down.sql b/storage/migrations/002_add_vector.down.sql index 71c1f51..a257b11 100644 --- a/storage/migrations/002_add_vector.down.sql +++ b/storage/migrations/002_add_vector.down.sql @@ -1,10 +1,34 @@ -- Drop vector storage tables DROP INDEX IF EXISTS idx_embeddings_384_filename; +DROP INDEX IF EXISTS idx_embeddings_768_filename; +DROP INDEX IF EXISTS idx_embeddings_1024_filename; +DROP INDEX IF EXISTS idx_embeddings_1536_filename; +DROP INDEX IF EXISTS idx_embeddings_2048_filename; +DROP INDEX IF EXISTS idx_embeddings_3072_filename; +DROP INDEX IF EXISTS idx_embeddings_4096_filename; DROP INDEX IF EXISTS idx_embeddings_5120_filename; DROP INDEX IF EXISTS idx_embeddings_384_slug; +DROP INDEX IF EXISTS idx_embeddings_768_slug; +DROP INDEX IF EXISTS idx_embeddings_1024_slug; +DROP INDEX IF EXISTS idx_embeddings_1536_slug; +DROP INDEX IF EXISTS idx_embeddings_2048_slug; +DROP INDEX IF EXISTS idx_embeddings_3072_slug; +DROP INDEX IF EXISTS idx_embeddings_4096_slug; DROP INDEX IF EXISTS idx_embeddings_5120_slug; DROP INDEX IF EXISTS idx_embeddings_384_created_at; +DROP INDEX IF EXISTS idx_embeddings_768_created_at; +DROP INDEX IF EXISTS idx_embeddings_1024_created_at; +DROP INDEX IF EXISTS idx_embeddings_1536_created_at; +DROP INDEX IF EXISTS idx_embeddings_2048_created_at; +DROP INDEX IF EXISTS idx_embeddings_3072_created_at; +DROP INDEX IF EXISTS idx_embeddings_4096_created_at; DROP INDEX IF EXISTS idx_embeddings_5120_created_at; DROP TABLE IF EXISTS embeddings_384; +DROP TABLE IF EXISTS embeddings_768; +DROP TABLE IF EXISTS embeddings_1024; +DROP TABLE IF EXISTS embeddings_1536; +DROP TABLE IF EXISTS embeddings_2048; +DROP TABLE IF EXISTS embeddings_3072; +DROP TABLE IF EXISTS embeddings_4096; DROP TABLE IF EXISTS embeddings_5120;
\ No newline at end of file diff --git a/storage/migrations/002_add_vector.up.sql b/storage/migrations/002_add_vector.up.sql index 6e164ce..baf703d 100644 --- a/storage/migrations/002_add_vector.up.sql +++ b/storage/migrations/002_add_vector.up.sql @@ -8,6 +8,60 @@ CREATE TABLE IF NOT EXISTS embeddings_384 ( created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); +CREATE TABLE IF NOT EXISTS embeddings_768 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + embeddings BLOB NOT NULL, + slug TEXT NOT NULL, + raw_text TEXT NOT NULL, + filename TEXT NOT NULL DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS embeddings_1024 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + embeddings BLOB NOT NULL, + slug TEXT NOT NULL, + raw_text TEXT NOT NULL, + filename TEXT NOT NULL DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS embeddings_1536 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + embeddings BLOB NOT NULL, + slug TEXT NOT NULL, + raw_text TEXT NOT NULL, + filename TEXT NOT NULL DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS embeddings_2048 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + embeddings BLOB NOT NULL, + slug TEXT NOT NULL, + raw_text TEXT NOT NULL, + filename TEXT NOT NULL DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS embeddings_3072 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + embeddings BLOB NOT NULL, + slug TEXT NOT NULL, + raw_text TEXT NOT NULL, + filename TEXT NOT NULL DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS embeddings_4096 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + embeddings BLOB NOT NULL, + slug TEXT NOT NULL, + raw_text TEXT NOT NULL, + filename TEXT NOT NULL DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + CREATE TABLE IF NOT EXISTS embeddings_5120 ( id INTEGER PRIMARY KEY AUTOINCREMENT, embeddings BLOB NOT NULL, @@ -19,8 +73,26 @@ CREATE TABLE IF NOT EXISTS embeddings_5120 ( -- Indexes for better performance CREATE INDEX IF NOT EXISTS idx_embeddings_384_filename ON embeddings_384(filename); +CREATE INDEX IF NOT EXISTS idx_embeddings_768_filename ON embeddings_768(filename); +CREATE INDEX IF NOT EXISTS idx_embeddings_1024_filename ON embeddings_1024(filename); +CREATE INDEX IF NOT EXISTS idx_embeddings_1536_filename ON embeddings_1536(filename); +CREATE INDEX IF NOT EXISTS idx_embeddings_2048_filename ON embeddings_2048(filename); +CREATE INDEX IF NOT EXISTS idx_embeddings_3072_filename ON embeddings_3072(filename); +CREATE INDEX IF NOT EXISTS idx_embeddings_4096_filename ON embeddings_4096(filename); CREATE INDEX IF NOT EXISTS idx_embeddings_5120_filename ON embeddings_5120(filename); CREATE INDEX IF NOT EXISTS idx_embeddings_384_slug ON embeddings_384(slug); +CREATE INDEX IF NOT EXISTS idx_embeddings_768_slug ON embeddings_768(slug); +CREATE INDEX IF NOT EXISTS idx_embeddings_1024_slug ON embeddings_1024(slug); +CREATE INDEX IF NOT EXISTS idx_embeddings_1536_slug ON embeddings_1536(slug); +CREATE INDEX IF NOT EXISTS idx_embeddings_2048_slug ON embeddings_2048(slug); +CREATE INDEX IF NOT EXISTS idx_embeddings_3072_slug ON embeddings_3072(slug); +CREATE INDEX IF NOT EXISTS idx_embeddings_4096_slug ON embeddings_4096(slug); CREATE INDEX IF NOT EXISTS idx_embeddings_5120_slug ON embeddings_5120(slug); CREATE INDEX IF NOT EXISTS idx_embeddings_384_created_at ON embeddings_384(created_at); +CREATE INDEX IF NOT EXISTS idx_embeddings_768_created_at ON embeddings_768(created_at); +CREATE INDEX IF NOT EXISTS idx_embeddings_1024_created_at ON embeddings_1024(created_at); +CREATE INDEX IF NOT EXISTS idx_embeddings_1536_created_at ON embeddings_1536(created_at); +CREATE INDEX IF NOT EXISTS idx_embeddings_2048_created_at ON embeddings_2048(created_at); +CREATE INDEX IF NOT EXISTS idx_embeddings_3072_created_at ON embeddings_3072(created_at); +CREATE INDEX IF NOT EXISTS idx_embeddings_4096_created_at ON embeddings_4096(created_at); CREATE INDEX IF NOT EXISTS idx_embeddings_5120_created_at ON embeddings_5120(created_at); diff --git a/storage/vector.go b/storage/vector.go index 2793022..32b4731 100644 --- a/storage/vector.go +++ b/storage/vector.go @@ -45,17 +45,24 @@ func mathBitsToFloat32(b uint32) float32 { return *(*float32)(unsafe.Pointer(&b)) } -var ( - vecTableName5120 = "embeddings_5120" - vecTableName384 = "embeddings_384" -) - func fetchTableName(emb []float32) (string, error) { switch len(emb) { - case 5120: - return vecTableName5120, nil case 384: - return vecTableName384, nil + return "embeddings_384", nil + case 768: + return "embeddings_768", nil + case 1024: + return "embeddings_1024", nil + case 1536: + return "embeddings_1536", nil + case 2048: + return "embeddings_2048", nil + case 3072: + return "embeddings_3072", nil + case 4096: + return "embeddings_4096", nil + case 5120: + return "embeddings_5120", nil default: return "", fmt.Errorf("no table for the size of %d", len(emb)) } @@ -185,8 +192,12 @@ func sqrt(f float32) float32 { func (p ProviderSQL) ListFiles() ([]string, error) { fileLists := make([][]string, 0) - // Query both tables and combine results - for _, table := range []string{vecTableName384, vecTableName5120} { + // Query all supported tables and combine results + tableNames := []string{ + "embeddings_384", "embeddings_768", "embeddings_1024", "embeddings_1536", + "embeddings_2048", "embeddings_3072", "embeddings_4096", "embeddings_5120", + } + for _, table := range tableNames { query := "SELECT DISTINCT filename FROM " + table rows, err := p.db.Query(query) if err != nil { @@ -225,7 +236,11 @@ func (p ProviderSQL) ListFiles() ([]string, error) { func (p ProviderSQL) RemoveEmbByFileName(filename string) error { var errors []string - for _, table := range []string{vecTableName384, vecTableName5120} { + tableNames := []string{ + "embeddings_384", "embeddings_768", "embeddings_1024", "embeddings_1536", + "embeddings_2048", "embeddings_3072", "embeddings_4096", "embeddings_5120", + } + for _, table := range tableNames { query := fmt.Sprintf("DELETE FROM %s WHERE filename = ?", table) if _, err := p.db.Exec(query, filename); err != nil { errors = append(errors, err.Error()) |
