24 lines
627 B
Python
24 lines
627 B
Python
from txtai.embeddings import Embeddings
|
|
|
|
|
|
def create_and_index(
|
|
data: list[str], model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
) -> Embeddings:
|
|
"""Create and index embeddings from text."""
|
|
embeddings = Embeddings({
|
|
"path": model,
|
|
"content": True,
|
|
"hybrid": True,
|
|
"scoring": "bm25",
|
|
})
|
|
embeddings.index(data)
|
|
return embeddings
|
|
|
|
|
|
def query_embedding(
|
|
embeddings: Embeddings, query: str, limit: int = 100
|
|
) -> list[str]:
|
|
"""Search embeddings and return matching texts."""
|
|
results = embeddings.search(query, limit)
|
|
return [r["text"] for r in results]
|