summaryrefslogtreecommitdiff
path: root/storage/vector.go.bak
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2025-11-19 12:32:46 +0300
committerGrail Finder <wohilas@gmail.com>2025-11-19 12:32:46 +0300
commit25b2e2f592bd8df9a5cbd3c77322b572eb8f829c (patch)
treea49630b91762e19a28dd500941e0b3f31cc9747c /storage/vector.go.bak
parent88b45f04b73fa408a9d7565c604a59c307bf9652 (diff)
Fix: migration use of vec0; rag cleanup
Diffstat (limited to 'storage/vector.go.bak')
-rw-r--r--storage/vector.go.bak179
1 files changed, 0 insertions, 179 deletions
diff --git a/storage/vector.go.bak b/storage/vector.go.bak
deleted file mode 100644
index f663beb..0000000
--- a/storage/vector.go.bak
+++ /dev/null
@@ -1,179 +0,0 @@
-package storage
-
-import (
- "gf-lt/models"
- "encoding/binary"
- "fmt"
- "sort"
- "unsafe"
-)
-
-type VectorRepo interface {
- WriteVector(*models.VectorRow) error
- SearchClosest(q []float32) ([]models.VectorRow, error)
- ListFiles() ([]string, error)
- RemoveEmbByFileName(filename string) error
-}
-
-// SerializeVector converts []float32 to binary blob
-func SerializeVector(vec []float32) []byte {
- buf := make([]byte, len(vec)*4) // 4 bytes per float32
- for i, v := range vec {
- binary.LittleEndian.PutUint32(buf[i*4:], mathFloat32bits(v))
- }
- return buf
-}
-
-// DeserializeVector converts binary blob back to []float32
-func DeserializeVector(data []byte) []float32 {
- count := len(data) / 4
- vec := make([]float32, count)
- for i := 0; i < count; i++ {
- vec[i] = mathBitsToFloat32(binary.LittleEndian.Uint32(data[i*4:]))
- }
- return vec
-}
-
-// mathFloat32bits and mathBitsToFloat32 are helpers to convert between float32 and uint32
-func mathFloat32bits(f float32) uint32 {
- return binary.LittleEndian.Uint32((*(*[4]byte)(unsafe.Pointer(&f)))[:4])
-}
-
-func mathBitsToFloat32(b uint32) float32 {
- return *(*float32)(unsafe.Pointer(&b))
-}
-
-var (
- vecTableName5120 = "embeddings_5120"
- vecTableName384 = "embeddings_384"
-)
-
-func fetchTableName(emb []float32) (string, error) {
- switch len(emb) {
- case 5120:
- return vecTableName5120, nil
- case 384:
- return vecTableName384, nil
- default:
- return "", fmt.Errorf("no table for the size of %d", len(emb))
- }
-}
-
-func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
- tableName, err := fetchTableName(row.Embeddings)
- if err != nil {
- return err
- }
- stmt, _, err := p.s3Conn.Prepare(
- fmt.Sprintf("INSERT INTO %s(embedding, slug, raw_text, filename) VALUES (?, ?, ?, ?)", tableName))
- if err != nil {
- p.logger.Error("failed to prep a stmt", "error", err)
- return err
- }
- defer stmt.Close()
- serializedEmbeddings := SerializeVector(row.Embeddings)
- if err := stmt.BindBlob(1, serializedEmbeddings); err != nil {
- p.logger.Error("failed to bind", "error", err)
- return err
- }
- if err := stmt.BindText(2, row.Slug); err != nil {
- p.logger.Error("failed to bind", "error", err)
- return err
- }
- if err := stmt.BindText(3, row.RawText); err != nil {
- p.logger.Error("failed to bind", "error", err)
- return err
- }
- if err := stmt.BindText(4, row.FileName); err != nil {
- p.logger.Error("failed to bind", "error", err)
- return err
- }
- err = stmt.Exec()
- if err != nil {
- return err
- }
- return nil
-}
-
-func decodeUnsafe(bs []byte) []float32 {
- return unsafe.Slice((*float32)(unsafe.Pointer(&bs[0])), len(bs)/4)
-}
-
-func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
- tableName, err := fetchTableName(q)
- if err != nil {
- return nil, err
- }
- stmt, _, err := p.s3Conn.Prepare(
- fmt.Sprintf(`SELECT
- distance,
- embedding,
- slug,
- raw_text,
- filename
- FROM %s
- WHERE embedding MATCH ?
- ORDER BY distance
- LIMIT 3
- `, tableName))
- if err != nil {
- return nil, err
- }
- // This function needs to be completely rewritten to use the new binary storage approach
- if err != nil {
- return nil, err
- }
- if err := stmt.BindBlob(1, query); err != nil {
- p.logger.Error("failed to bind", "error", err)
- return nil, err
- }
- resp := []models.VectorRow{}
- for stmt.Step() {
- res := models.VectorRow{}
- res.Distance = float32(stmt.ColumnFloat(0))
- emb := stmt.ColumnRawText(1)
- res.Embeddings = decodeUnsafe(emb)
- res.Slug = stmt.ColumnText(2)
- res.RawText = stmt.ColumnText(3)
- res.FileName = stmt.ColumnText(4)
- resp = append(resp, res)
- }
- if err := stmt.Err(); err != nil {
- return nil, err
- }
- err = stmt.Close()
- if err != nil {
- return nil, err
- }
- return resp, nil
-}
-
-func (p ProviderSQL) ListFiles() ([]string, error) {
- q := fmt.Sprintf("SELECT filename FROM %s GROUP BY filename", vecTableName384)
- stmt, _, err := p.s3Conn.Prepare(q)
- if err != nil {
- return nil, err
- }
- defer stmt.Close()
- resp := []string{}
- for stmt.Step() {
- resp = append(resp, stmt.ColumnText(0))
- }
- if err := stmt.Err(); err != nil {
- return nil, err
- }
- return resp, nil
-}
-
-func (p ProviderSQL) RemoveEmbByFileName(filename string) error {
- q := fmt.Sprintf("DELETE FROM %s WHERE filename = ?", vecTableName384)
- stmt, _, err := p.s3Conn.Prepare(q)
- if err != nil {
- return err
- }
- defer stmt.Close()
- if err := stmt.BindText(1, filename); err != nil {
- return err
- }
- return stmt.Exec()
-}