package storage import ( "gf-lt/models" "encoding/binary" "fmt" "unsafe" "github.com/jmoiron/sqlx" ) type VectorRepo interface { WriteVector(*models.VectorRow) error SearchClosest(q []float32) ([]models.VectorRow, error) ListFiles() ([]string, error) RemoveEmbByFileName(filename string) error DB() *sqlx.DB } // SerializeVector converts []float32 to binary blob func SerializeVector(vec []float32) []byte { buf := make([]byte, len(vec)*4) // 4 bytes per float32 for i, v := range vec { binary.LittleEndian.PutUint32(buf[i*4:], mathFloat32bits(v)) } return buf } // DeserializeVector converts binary blob back to []float32 func DeserializeVector(data []byte) []float32 { count := len(data) / 4 vec := make([]float32, count) for i := 0; i < count; i++ { vec[i] = mathBitsToFloat32(binary.LittleEndian.Uint32(data[i*4:])) } return vec } // mathFloat32bits and mathBitsToFloat32 are helpers to convert between float32 and uint32 func mathFloat32bits(f float32) uint32 { return binary.LittleEndian.Uint32((*(*[4]byte)(unsafe.Pointer(&f)))[:4]) } func mathBitsToFloat32(b uint32) float32 { return *(*float32)(unsafe.Pointer(&b)) } var ( vecTableName5120 = "embeddings_5120" vecTableName384 = "embeddings_384" ) func fetchTableName(emb []float32) (string, error) { switch len(emb) { case 5120: return vecTableName5120, nil case 384: return vecTableName384, nil default: return "", fmt.Errorf("no table for the size of %d", len(emb)) } } func (p ProviderSQL) WriteVector(row *models.VectorRow) error { tableName, err := fetchTableName(row.Embeddings) if err != nil { return err } stmt, _, err := p.s3Conn.Prepare( fmt.Sprintf("INSERT INTO %s(embedding, slug, raw_text, filename) VALUES (?, ?, ?, ?)", tableName)) if err != nil { p.logger.Error("failed to prep a stmt", "error", err) return err } defer stmt.Close() serializedEmbeddings := SerializeVector(row.Embeddings) if err := stmt.BindBlob(1, serializedEmbeddings); err != nil { p.logger.Error("failed to bind", "error", err) return err } if err := stmt.BindText(2, row.Slug); err != nil { p.logger.Error("failed to bind", "error", err) return err } if err := stmt.BindText(3, row.RawText); err != nil { p.logger.Error("failed to bind", "error", err) return err } if err := stmt.BindText(4, row.FileName); err != nil { p.logger.Error("failed to bind", "error", err) return err } err = stmt.Exec() if err != nil { return err } return nil } func decodeUnsafe(bs []byte) []float32 { return unsafe.Slice((*float32)(unsafe.Pointer(&bs[0])), len(bs)/4) } func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) { // TODO: This function has been temporarily disabled to avoid deprecated library usage. // In the new RAG implementation, this functionality is now in rag_new package. // For compatibility, return empty result instead of using deprecated vector extension. return []models.VectorRow{}, nil } func (p ProviderSQL) ListFiles() ([]string, error) { q := fmt.Sprintf("SELECT filename FROM %s GROUP BY filename", vecTableName384) stmt, _, err := p.s3Conn.Prepare(q) if err != nil { return nil, err } defer stmt.Close() resp := []string{} for stmt.Step() { resp = append(resp, stmt.ColumnText(0)) } if err := stmt.Err(); err != nil { return nil, err } return resp, nil } func (p ProviderSQL) RemoveEmbByFileName(filename string) error { q := fmt.Sprintf("DELETE FROM %s WHERE filename = ?", vecTableName384) stmt, _, err := p.s3Conn.Prepare(q) if err != nil { return err } defer stmt.Close() if err := stmt.BindText(1, filename); err != nil { return err } return stmt.Exec() }