154 lines
4.4 KiB
Python
154 lines
4.4 KiB
Python
import base64
|
|
import httpx
|
|
import os
|
|
import logging
|
|
from fastapi import HTTPException
|
|
|
|
logger = logging.getLogger(__name__)
|
|
LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
|
|
VISION_MODEL = os.getenv("VISION_MODEL", "openai/gpt-4o-mini")
|
|
|
|
SUPPORTED_IMAGE_FORMATS = [
|
|
".jpg", ".jpeg",
|
|
".png",
|
|
".gif",
|
|
".webp",
|
|
".tiff"
|
|
]
|
|
|
|
MIME_TYPES = {
|
|
"jpg": "image/jpeg",
|
|
"jpeg": "image/jpeg",
|
|
"png": "image/png",
|
|
"gif": "image/gif",
|
|
"webp": "image/webp",
|
|
"tiff": "image/tiff"
|
|
}
|
|
|
|
DEFAULT_PROMPT = (
|
|
"Beschreibe den Inhalt dieses Bildes detailliert. "
|
|
"Falls Text vorhanden ist, gib ihn vollstaendig wieder. "
|
|
"Falls es ein Diagramm oder Chart ist, erklaere die Daten. "
|
|
"Falls es ein Screenshot ist, beschreibe was zu sehen ist. "
|
|
"Antworte auf Deutsch."
|
|
)
|
|
|
|
|
|
def is_image(filename: str) -> bool:
|
|
"""Prueft ob eine Datei ein Bild ist"""
|
|
return any(
|
|
filename.lower().endswith(ext)
|
|
for ext in SUPPORTED_IMAGE_FORMATS
|
|
)
|
|
|
|
|
|
async def image_to_text(
|
|
content: bytes,
|
|
filename: str,
|
|
token: str,
|
|
model: str = None,
|
|
prompt: str = DEFAULT_PROMPT
|
|
) -> str:
|
|
"""Bild ueber Vision LLM in Text umwandeln"""
|
|
use_model = model or VISION_MODEL
|
|
ext = filename.lower().split(".")[-1]
|
|
mime_type = MIME_TYPES.get(ext, "image/jpeg")
|
|
image_b64 = base64.b64encode(content).decode("utf-8")
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.post(
|
|
f"{LITELLM_URL}/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json={
|
|
"model": use_model,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": prompt
|
|
},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:{mime_type};base64,{image_b64}"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"max_tokens": 2048
|
|
},
|
|
timeout=60.0
|
|
)
|
|
|
|
if resp.status_code != 200:
|
|
logger.error(f"Vision Fehler: {resp.status_code} - {resp.text}")
|
|
raise HTTPException(
|
|
502,
|
|
f"Bild konnte nicht verarbeitet werden: {resp.text}"
|
|
)
|
|
|
|
return resp.json()["choices"][0]["message"]["content"]
|
|
|
|
async def validate_vision_model(
|
|
model: str,
|
|
token: str
|
|
) -> str:
|
|
"""
|
|
Prüft ob das gewählte Modell Vision unterstützt.
|
|
Gibt das validierte Modell zurück.
|
|
"""
|
|
LITELLM_MASTER = os.getenv("LITELLM_MASTER_KEY")
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
try:
|
|
resp = await client.get(
|
|
f"{LITELLM_URL}/model_group/info",
|
|
headers={"Authorization": f"Bearer {LITELLM_MASTER}"},
|
|
timeout=10.0
|
|
)
|
|
except httpx.RequestError as e:
|
|
raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}")
|
|
|
|
if resp.status_code != 200:
|
|
raise HTTPException(502, "Modelle konnten nicht abgerufen werden")
|
|
|
|
models = {
|
|
m.get("model_group"): m
|
|
for m in resp.json().get("data", [])
|
|
}
|
|
|
|
if model not in models:
|
|
raise HTTPException(404, {
|
|
"error": {
|
|
"message": f"Modell '{model}' nicht gefunden",
|
|
"type": "invalid_request_error",
|
|
"code": "model_not_found"
|
|
}
|
|
})
|
|
|
|
if not models[model].get("supports_vision"):
|
|
vision_models = [
|
|
m.get("model_group")
|
|
for m in resp.json().get("data", [])
|
|
if m.get("supports_vision")
|
|
]
|
|
raise HTTPException(400, {
|
|
"error": {
|
|
"message": (
|
|
f"Modell '{model}' unterstützt kein Vision. "
|
|
f"Verfügbare Vision Modelle: "
|
|
f"{', '.join(vision_models)}"
|
|
),
|
|
"type": "invalid_request_error",
|
|
"code": "model_not_supported"
|
|
}
|
|
})
|
|
|
|
return model
|