32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
from txtai.embeddings import Embeddings
|
|
from txtai import LLM
|
|
import litellm
|
|
from dotenv import load_dotenv
|
|
import os
|
|
|
|
load_dotenv()
|
|
|
|
def retrieve(embeddings: Embeddings, query: str, limit: int = 3) -> list[dict]:
|
|
"""Search embeddings and return results with scores"""
|
|
return embeddings.search(query, limit)
|
|
|
|
def ask_wllm(embeddings: Embeddings, question: str, model: str = "openrouter/minimax/minimax-m2.5:free", limit: int = 3) -> str:
|
|
"""RAG: retrieve context from embeddings, then answer with an LLM."""
|
|
results = retrieve(embeddings, question, limit)
|
|
context = "\n\n".join([r["text"] for r in results])
|
|
|
|
response = litellm.completion(
|
|
model=model,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f"Context from my documents:\n{context}\n\nQuestion: {question}"
|
|
}
|
|
]
|
|
)
|
|
|
|
return response.choices[0].message.content |