summaryrefslogtreecommitdiff
path: root/rag/document.go
blob: 48c907e743ad239947a7676a16aaaba2003e87ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package rag

import (
	"context"
	"errors"
)

type EmbeddingFunc func(ctx context.Context, text string) ([]float32, error)

func NewEmbeddingFuncDefault() EmbeddingFunc {
	return nil
}

// Document represents a single document.
type Document struct {
	ID        string
	Metadata  map[string]string
	Embedding []float32
	Content   string
}

// NewDocument creates a new document, including its embeddings.
// Metadata is optional.
// If the embeddings are not provided, they are created using the embedding function.
// You can leave the content empty if you only want to store embeddings.
// If embeddingFunc is nil, the default embedding function is used.
//
// If you want to create a document without embeddings, for example to let [Collection.AddDocuments]
// create them concurrently, you can create a document with `chromem.Document{...}`
// instead of using this constructor.
func NewDocument(ctx context.Context, id string, metadata map[string]string, embedding []float32, content string, embeddingFunc EmbeddingFunc) (Document, error) {
	if id == "" {
		return Document{}, errors.New("id is empty")
	}
	if len(embedding) == 0 && content == "" {
		return Document{}, errors.New("either embedding or content must be filled")
	}
	if embeddingFunc == nil {
		embeddingFunc = NewEmbeddingFuncDefault()
	}

	if len(embedding) == 0 {
		var err error
		embedding, err = embeddingFunc(ctx, content)
		if err != nil {
			return Document{}, err
		}
	}

	return Document{
		ID:        id,
		Metadata:  metadata,
		Embedding: embedding,
		Content:   content,
	}, nil
}