Files
kg-scr/kg_ocr/embeddings/indexer.py

19 lines
393 B
Python

from txtai.embeddings import Embeddings
def create_and_index(
data: list[str], model: str = "sentence-transformers/all-MiniLM-L6-v2"
) -> Embeddings:
"""Create and index embeddings from text."""
embeddings = Embeddings({
"path": model,
"content": True,
"hybrid": True,
"scoring": "bm25",
})
embeddings.index(data)
return embeddings