More datatypes
This commit is contained in:
153
app/utils/image_processor.py
Normal file
153
app/utils/image_processor.py
Normal file
@@ -0,0 +1,153 @@
|
||||
import base64
|
||||
import httpx
|
||||
import os
|
||||
import logging
|
||||
from fastapi import HTTPException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
|
||||
VISION_MODEL = os.getenv("VISION_MODEL", "openai/gpt-4o-mini")
|
||||
|
||||
SUPPORTED_IMAGE_FORMATS = [
|
||||
".jpg", ".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".webp",
|
||||
".tiff"
|
||||
]
|
||||
|
||||
MIME_TYPES = {
|
||||
"jpg": "image/jpeg",
|
||||
"jpeg": "image/jpeg",
|
||||
"png": "image/png",
|
||||
"gif": "image/gif",
|
||||
"webp": "image/webp",
|
||||
"tiff": "image/tiff"
|
||||
}
|
||||
|
||||
DEFAULT_PROMPT = (
|
||||
"Beschreibe den Inhalt dieses Bildes detailliert. "
|
||||
"Falls Text vorhanden ist, gib ihn vollstaendig wieder. "
|
||||
"Falls es ein Diagramm oder Chart ist, erklaere die Daten. "
|
||||
"Falls es ein Screenshot ist, beschreibe was zu sehen ist. "
|
||||
"Antworte auf Deutsch."
|
||||
)
|
||||
|
||||
|
||||
def is_image(filename: str) -> bool:
|
||||
"""Prueft ob eine Datei ein Bild ist"""
|
||||
return any(
|
||||
filename.lower().endswith(ext)
|
||||
for ext in SUPPORTED_IMAGE_FORMATS
|
||||
)
|
||||
|
||||
|
||||
async def image_to_text(
|
||||
content: bytes,
|
||||
filename: str,
|
||||
token: str,
|
||||
model: str = None,
|
||||
prompt: str = DEFAULT_PROMPT
|
||||
) -> str:
|
||||
"""Bild ueber Vision LLM in Text umwandeln"""
|
||||
use_model = model or VISION_MODEL
|
||||
ext = filename.lower().split(".")[-1]
|
||||
mime_type = MIME_TYPES.get(ext, "image/jpeg")
|
||||
image_b64 = base64.b64encode(content).decode("utf-8")
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
f"{LITELLM_URL}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": use_model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{mime_type};base64,{image_b64}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 2048
|
||||
},
|
||||
timeout=60.0
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"Vision Fehler: {resp.status_code} - {resp.text}")
|
||||
raise HTTPException(
|
||||
502,
|
||||
f"Bild konnte nicht verarbeitet werden: {resp.text}"
|
||||
)
|
||||
|
||||
return resp.json()["choices"][0]["message"]["content"]
|
||||
|
||||
async def validate_vision_model(
|
||||
model: str,
|
||||
token: str
|
||||
) -> str:
|
||||
"""
|
||||
Prüft ob das gewählte Modell Vision unterstützt.
|
||||
Gibt das validierte Modell zurück.
|
||||
"""
|
||||
LITELLM_MASTER = os.getenv("LITELLM_MASTER_KEY")
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{LITELLM_URL}/model_group/info",
|
||||
headers={"Authorization": f"Bearer {LITELLM_MASTER}"},
|
||||
timeout=10.0
|
||||
)
|
||||
except httpx.RequestError as e:
|
||||
raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}")
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise HTTPException(502, "Modelle konnten nicht abgerufen werden")
|
||||
|
||||
models = {
|
||||
m.get("model_group"): m
|
||||
for m in resp.json().get("data", [])
|
||||
}
|
||||
|
||||
if model not in models:
|
||||
raise HTTPException(404, {
|
||||
"error": {
|
||||
"message": f"Modell '{model}' nicht gefunden",
|
||||
"type": "invalid_request_error",
|
||||
"code": "model_not_found"
|
||||
}
|
||||
})
|
||||
|
||||
if not models[model].get("supports_vision"):
|
||||
vision_models = [
|
||||
m.get("model_group")
|
||||
for m in resp.json().get("data", [])
|
||||
if m.get("supports_vision")
|
||||
]
|
||||
raise HTTPException(400, {
|
||||
"error": {
|
||||
"message": (
|
||||
f"Modell '{model}' unterstützt kein Vision. "
|
||||
f"Verfügbare Vision Modelle: "
|
||||
f"{', '.join(vision_models)}"
|
||||
),
|
||||
"type": "invalid_request_error",
|
||||
"code": "model_not_supported"
|
||||
}
|
||||
})
|
||||
|
||||
return model
|
||||
Reference in New Issue
Block a user