from txtai.embeddings import Embeddings from txtai import LLM import litellm from dotenv import load_dotenv import os load_dotenv() def retrieve(embeddings: Embeddings, query: str, limit: int = 3) -> list[dict]: """Search embeddings and return results with scores""" return embeddings.search(query, limit) def ask_wllm(embeddings: Embeddings, question: str, model: str = "openrouter/minimax/minimax-m2.5:free", limit: int = 3) -> str: """RAG: retrieve context from embeddings, then answer with an LLM.""" results = retrieve(embeddings, question, limit) context = "\n\n".join([r["text"] for r in results]) response = litellm.completion( model=model, messages=[ { "role": "system", "content": "Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'." }, { "role": "user", "content": f"Context from my documents:\n{context}\n\nQuestion: {question}" } ] ) return response.choices[0].message.content