added llm and rag queries

2026-03-24 20:15:22 +01:00
parent cbfcf1e315
commit 012549b4bc
12 changed files with 278 additions and 2981 deletions
--- a/kg_ocr/init.py
+++ b/kg_ocr/init.py
@@ -1,4 +1,5 @@
 from .ocr import get_screenshots, extract_text
-from .embeddings import create_and_index, query_embedding
+from .embeddings import create_and_index
+from .rag import retrieve, ask_wllm

-__all__ = ["get_screenshots", "extract_text", "create_and_index", "query_embedding"]
+__all__ = ["get_screenshots", "extract_text", "create_and_index", "retrieve", "ask_wllm"]
--- a/kg_ocr/embeddings/init.py
+++ b/kg_ocr/embeddings/init.py
@@ -1,3 +1,3 @@
-from .indexer import create_and_index, query_embedding
+from .indexer import create_and_index

-__all__ = ["create_and_index", "query_embedding"]
+__all__ = ["create_and_index"]
--- a/kg_ocr/embeddings/indexer.py
+++ b/kg_ocr/embeddings/indexer.py
@@ -15,9 +15,4 @@ def create_and_index(
    return embeddings


-def query_embedding(
-    embeddings: Embeddings, query: str, limit: int = 100
-) -> list[str]:
-    """Search embeddings and return matching texts."""
-    results = embeddings.search(query, limit)
-    return [r["text"] for r in results]
+
--- a/kg_ocr/rag/init.py
+++ b/kg_ocr/rag/init.py
@@ -0,0 +1,3 @@
+from .query import retrieve, ask_wllm
+
+__all__ = ["retrieve", "ask_wllm"]
--- a/kg_ocr/rag/query.py
+++ b/kg_ocr/rag/query.py
@@ -0,0 +1,32 @@
+from txtai.embeddings import Embeddings
+from txtai import LLM
+import litellm
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+def retrieve(embeddings: Embeddings, query: str, limit: int = 3) -> list[dict]:
+    """Search embeddings and return results with scores"""
+    return embeddings.search(query, limit)
+
+def ask_wllm(embeddings: Embeddings, question: str, model: str = "openrouter/minimax/minimax-m2.5:free", limit: int = 3) -> str:
+    """RAG: retrieve context from embeddings, then answer with an LLM."""
+    results = retrieve(embeddings, question, limit)
+    context = "\n\n".join([r["text"] for r in results])
+
+    response = litellm.completion(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": "Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'."
+            },
+            {
+                "role": "user",
+                "content": f"Context from my documents:\n{context}\n\nQuestion: {question}"
+            }
+        ]
+    )
+
+    return response.choices[0].message.content