summaryrefslogtreecommitdiff
path: root/rag/document.go
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2025-09-05 20:09:35 +0300
committerGrail Finder <wohilas@gmail.com>2025-09-05 20:09:35 +0300
commite0bd66e9ad7176ce85937c72f6e312b06fe259ab (patch)
tree712308cc87bdd8c2b9b990a98fe97ee5585ac5cd /rag/document.go
parent0068cd17ff1985102dd5ab22df899ea6b1065ff0 (diff)
Feat: rag chromemfeat/rag
Diffstat (limited to 'rag/document.go')
-rw-r--r--rag/document.go56
1 files changed, 56 insertions, 0 deletions
diff --git a/rag/document.go b/rag/document.go
new file mode 100644
index 0000000..48c907e
--- /dev/null
+++ b/rag/document.go
@@ -0,0 +1,56 @@
+package rag
+
+import (
+ "context"
+ "errors"
+)
+
+type EmbeddingFunc func(ctx context.Context, text string) ([]float32, error)
+
+func NewEmbeddingFuncDefault() EmbeddingFunc {
+ return nil
+}
+
+// Document represents a single document.
+type Document struct {
+ ID string
+ Metadata map[string]string
+ Embedding []float32
+ Content string
+}
+
+// NewDocument creates a new document, including its embeddings.
+// Metadata is optional.
+// If the embeddings are not provided, they are created using the embedding function.
+// You can leave the content empty if you only want to store embeddings.
+// If embeddingFunc is nil, the default embedding function is used.
+//
+// If you want to create a document without embeddings, for example to let [Collection.AddDocuments]
+// create them concurrently, you can create a document with `chromem.Document{...}`
+// instead of using this constructor.
+func NewDocument(ctx context.Context, id string, metadata map[string]string, embedding []float32, content string, embeddingFunc EmbeddingFunc) (Document, error) {
+ if id == "" {
+ return Document{}, errors.New("id is empty")
+ }
+ if len(embedding) == 0 && content == "" {
+ return Document{}, errors.New("either embedding or content must be filled")
+ }
+ if embeddingFunc == nil {
+ embeddingFunc = NewEmbeddingFuncDefault()
+ }
+
+ if len(embedding) == 0 {
+ var err error
+ embedding, err = embeddingFunc(ctx, content)
+ if err != nil {
+ return Document{}, err
+ }
+ }
+
+ return Document{
+ ID: id,
+ Metadata: metadata,
+ Embedding: embedding,
+ Content: content,
+ }, nil
+}