More datatypes

2026-04-29 09:11:46 +00:00
parent ef55253cbd
commit 965e900743
7 changed files with 519 additions and 51 deletions
--- a/app/utils/image_processor.py
+++ b/app/utils/image_processor.py
@@ -0,0 +1,153 @@
+import base64
+import httpx
+import os
+import logging
+from fastapi import HTTPException
+
+logger       = logging.getLogger(__name__)
+LITELLM_URL  = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
+VISION_MODEL = os.getenv("VISION_MODEL", "openai/gpt-4o-mini")
+
+SUPPORTED_IMAGE_FORMATS = [
+    ".jpg", ".jpeg",
+    ".png",
+    ".gif",
+    ".webp",
+    ".tiff"
+]
+
+MIME_TYPES = {
+    "jpg":  "image/jpeg",
+    "jpeg": "image/jpeg",
+    "png":  "image/png",
+    "gif":  "image/gif",
+    "webp": "image/webp",
+    "tiff": "image/tiff"
+}
+
+DEFAULT_PROMPT = (
+    "Beschreibe den Inhalt dieses Bildes detailliert. "
+    "Falls Text vorhanden ist, gib ihn vollstaendig wieder. "
+    "Falls es ein Diagramm oder Chart ist, erklaere die Daten. "
+    "Falls es ein Screenshot ist, beschreibe was zu sehen ist. "
+    "Antworte auf Deutsch."
+)
+
+
+def is_image(filename: str) -> bool:
+    """Prueft ob eine Datei ein Bild ist"""
+    return any(
+        filename.lower().endswith(ext)
+        for ext in SUPPORTED_IMAGE_FORMATS
+    )
+
+
+async def image_to_text(
+    content:  bytes,
+    filename: str,
+    token:    str,
+    model:    str = None,
+    prompt:   str = DEFAULT_PROMPT
+) -> str:
+    """Bild ueber Vision LLM in Text umwandeln"""
+    use_model = model or VISION_MODEL
+    ext       = filename.lower().split(".")[-1]
+    mime_type = MIME_TYPES.get(ext, "image/jpeg")
+    image_b64 = base64.b64encode(content).decode("utf-8")
+
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{LITELLM_URL}/chat/completions",
+            headers={
+                "Authorization": f"Bearer {token}",
+                "Content-Type":  "application/json"
+            },
+            json={
+                "model": use_model,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": prompt
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:{mime_type};base64,{image_b64}"
+                                }
+                            }
+                        ]
+                    }
+                ],
+                "max_tokens": 2048
+            },
+            timeout=60.0
+        )
+
+    if resp.status_code != 200:
+        logger.error(f"Vision Fehler: {resp.status_code} - {resp.text}")
+        raise HTTPException(
+            502,
+            f"Bild konnte nicht verarbeitet werden: {resp.text}"
+        )
+
+    return resp.json()["choices"][0]["message"]["content"]
+
+async def validate_vision_model(
+    model: str,
+    token: str
+) -> str:
+    """
+    Prüft ob das gewählte Modell Vision unterstützt.
+    Gibt das validierte Modell zurück.
+    """
+    LITELLM_MASTER = os.getenv("LITELLM_MASTER_KEY")
+
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(
+                f"{LITELLM_URL}/model_group/info",
+                headers={"Authorization": f"Bearer {LITELLM_MASTER}"},
+                timeout=10.0
+            )
+        except httpx.RequestError as e:
+            raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}")
+
+    if resp.status_code != 200:
+        raise HTTPException(502, "Modelle konnten nicht abgerufen werden")
+
+    models = {
+        m.get("model_group"): m
+        for m in resp.json().get("data", [])
+    }
+
+    if model not in models:
+        raise HTTPException(404, {
+            "error": {
+                "message": f"Modell '{model}' nicht gefunden",
+                "type":    "invalid_request_error",
+                "code":    "model_not_found"
+            }
+        })
+
+    if not models[model].get("supports_vision"):
+        vision_models = [
+            m.get("model_group")
+            for m in resp.json().get("data", [])
+            if m.get("supports_vision")
+        ]
+        raise HTTPException(400, {
+            "error": {
+                "message": (
+                    f"Modell '{model}' unterstützt kein Vision. "
+                    f"Verfügbare Vision Modelle: "
+                    f"{', '.join(vision_models)}"
+                ),
+                "type":    "invalid_request_error",
+                "code":    "model_not_supported"
+            }
+        })
+
+    return model