added scaffold struct for the prj

This commit is contained in:
2026-03-24 14:58:58 +01:00
parent 95655686f9
commit cbfcf1e315
26 changed files with 6655 additions and 312 deletions

4
kg_ocr/ocr/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
from .extractor import get_screenshots
from .batch_processor import extract_text
__all__ = ["get_screenshots", "extract_text"]

View File

@@ -0,0 +1,7 @@
from PIL import Image
import pytesseract
def extract_text(images: list[str]) -> list[str]:
"""OCR a list of image paths into text."""
return [pytesseract.image_to_string(Image.open(img)) for img in images]

15
kg_ocr/ocr/constants.py Normal file
View File

@@ -0,0 +1,15 @@
from pathlib import Path
import platform
def_paths = {
"Darwin": Path.home() / "Desktop",
"Windows": Path.home() / "Pictures" / "Screenshots",
"Linux": Path.home() / "Pictures",
}
sc_pathpatterns = {
"Darwin": ["SCR*.png", "Screenshot*.png"],
"Windows": ["Screenshot*.png"],
"Linux": ["Screenshot*.png", "scrot*.png", "screenshot*.png"],
}

17
kg_ocr/ocr/extractor.py Normal file
View File

@@ -0,0 +1,17 @@
import platform
from pathlib import Path
from typing import Optional
from .constants import def_paths, sc_pathpatterns
def get_screenshots(path: Optional[str | Path] = None) -> list[str]:
"""Find screenshot files for the current OS."""
if path is None:
path = def_paths.get(platform.system(), Path.home())
path = Path(path)
patterns = sc_pathpatterns.get(platform.system(), ["SCR*.png"])
results = []
for pattern in patterns:
results.extend(str(f.absolute()) for f in path.glob(pattern))
return sorted(set(results))