From 965e9007430410105692d961cfb038cd65cdacad Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Apr 2026 09:11:46 +0000 Subject: [PATCH] More datatypes --- Dockerfile | 1 + README.md | 200 ++++++++++++++++++++++++++++---- app/routers/openai_compat.py | 205 ++++++++++++++++++++++++++++----- app/utils/image_processor.py | 153 ++++++++++++++++++++++++ k8s/configmap.yaml | 1 + k8s/vector-api/deployment.yaml | 6 + requirements.txt | 4 + 7 files changed, 519 insertions(+), 51 deletions(-) create mode 100644 app/utils/image_processor.py diff --git a/Dockerfile b/Dockerfile index d67b8c5..e240f23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,6 +39,7 @@ COPY app/routers/openai_compat.py ./app/routers/openai_compat.py COPY app/utils/__init__.py ./app/utils/__init__.py COPY app/utils/stats.py ./app/utils/stats.py COPY app/utils/chunking.py ./app/utils/chunking.py +COPY app/utils/image_processor.py ./app/utils/image_processor.py RUN find /app -type f | sort diff --git a/README.md b/README.md index 04a1ecd..0d89886 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ A vector store service built on top of [LiteLLM](https://github.com/BerriAI/lite - πŸ—„οΈ **Vector Store** powered by PostgreSQL + pgvector - πŸ” **Semantic Search** with optional Reranking - πŸ€– **RAG Endpoint** - Search + LLM in one request -- πŸ“„ **File Upload** - PDF, DOCX, TXT, Markdown +- πŸ“„ **File Upload** - PDF, DOCX, TXT, Markdown, Excel, CSV, PowerPoint, HTML, E-Mail, JSON +- πŸ–ΌοΈ **Image Support** - Upload images via Vision LLM (JPG, PNG, GIF, WebP, TIFF) - 🧩 **OpenAI-compatible API** - works with existing OpenAI SDKs - πŸ‘₯ **Multi-User** - Store permissions per user - πŸ–₯️ **Admin UI** - Manage users, stores and permissions @@ -20,20 +21,22 @@ A vector store service built on top of [LiteLLM](https://github.com/BerriAI/lite Client (API Key) β”‚ β–Ό -LiteLLM Proxy ──────────────────────┐ - β”‚ β”‚ - β–Ό β–Ό -Vector Store API Embedding Models - β”‚ (via LiteLLM) - β–Ό -PostgreSQL + pgvector +LiteLLM Proxy ──────────────────────────────┐ + β”‚ β”‚ + β–Ό β–Ό +Vector Store API LiteLLM Models + β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β–Ό β”‚ Embedding Models β”‚ +PostgreSQL + pgvector β”‚ Vision Models β”‚ + β”‚ LLM Models β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` ## Requirements - Kubernetes Cluster -- PostgreSQL with pgvector extension -- LiteLLM Proxy (deployed) +- PostgreSQL with pgvector extension (already deployed) +- LiteLLM Proxy (already deployed) - Container Registry ## Quick Start @@ -106,6 +109,7 @@ EOF ### 3. Configure ```bash +# Create secrets kubectl create secret generic vector-api-secrets \ --namespace vector-store \ --from-literal=DATABASE_URL="postgresql://vecuser:pass@postgres:5432/vectordb" \ @@ -124,16 +128,17 @@ data: ADMIN_USER_IDS: "your-admin-user-id" API_URL: "https://api.your-domain.com" EMBEDDING_MODEL: "your-embedding-model" + VISION_MODEL: "openai/gpt-4o-mini" ``` ### 4. Build & Deploy ```bash -# API +# Build & push API docker build -t your-registry/vector-store-api:1.0.0 . docker push your-registry/vector-store-api:1.0.0 -# Admin UI +# Build & push Admin UI docker build \ -t your-registry/vector-store-admin:1.0.0 \ ./ui @@ -165,6 +170,7 @@ litellm-vector-store/ β”‚ β”‚ └── openai_compat.py # OpenAI-compatible API β”‚ └── utils/ β”‚ β”œβ”€β”€ chunking.py # Text chunking +β”‚ β”œβ”€β”€ image_processor.py # Vision LLM integration β”‚ └── stats.py # Usage tracking β”œβ”€β”€ ui/ # React Admin UI β”‚ β”œβ”€β”€ src/ @@ -216,6 +222,10 @@ Authorization: Bearer sk-your-api-key | Method | Endpoint | Description | |--------|----------|-------------| +| `GET` | `/v1/models` | List all models | +| `GET` | `/v1/embeddings/models` | List embedding models | +| `GET` | `/v1/vision/models` | List vision models | +| `POST` | `/v1/embeddings` | Create embeddings | | `POST` | `/v1/vector_stores` | Create store | | `GET` | `/v1/vector_stores` | List stores | | `GET` | `/v1/vector_stores/{id}` | Get store | @@ -223,21 +233,21 @@ Authorization: Bearer sk-your-api-key | `POST` | `/v1/vector_stores/{id}/files` | Add texts | | `GET` | `/v1/vector_stores/{id}/files` | List files | | `DELETE` | `/v1/vector_stores/{id}/files/{file_id}` | Delete file | -| `POST` | `/v1/vector_stores/{id}/upload` | Upload file | -| `POST` | `/v1/vector_stores/{id}/search` | Search | +| `POST` | `/v1/vector_stores/{id}/upload` | Upload file or image | +| `POST` | `/v1/vector_stores/{id}/search` | Semantic search | | `POST` | `/v1/vector_stores/{id}/rag` | RAG query | -| `POST` | `/v1/embeddings` | Create embeddings | -| `GET` | `/v1/embeddings/models` | List embedding models | -| `GET` | `/v1/models` | List all models | -### Example +### Examples + +#### Store anlegen & Datei hochladen ```python import httpx client = httpx.Client( base_url="https://api.your-domain.com/v1", - headers={"Authorization": "Bearer sk-your-key"} + headers={"Authorization": "Bearer sk-your-key"}, + timeout=120.0 ) # Create store @@ -246,27 +256,129 @@ store = client.post( json={"name": "My Knowledge Base"} ).json() -# Upload file +# Upload document with open("document.pdf", "rb") as f: client.post( f"/vector_stores/{store['id']}/upload", files={"file": f} ) +# Upload image (with default vision model) +with open("screenshot.png", "rb") as f: + client.post( + f"/vector_stores/{store['id']}/upload", + files={"file": f} + ) + +# Upload image (with custom vision model) +with open("diagram.png", "rb") as f: + client.post( + f"/vector_stores/{store['id']}/upload", + files={"file": f}, + data={ + "vision_model": "openai/gpt-4o", + "vision_prompt": "Explain this diagram in detail." + } + ) + # Search results = client.post( f"/vector_stores/{store['id']}/search", - json={"query": "What is FastAPI?", "top_k": 3} + json={ + "query": "What is FastAPI?", + "top_k": 3, + "rerank": True + } ).json() # RAG answer = client.post( f"/vector_stores/{store['id']}/rag", - json={"query": "What is FastAPI?"} + json={ + "query": "What is FastAPI?", + "model": "openai/gpt-4o-mini", + "rerank": True + } ).json() print(answer["answer"]) ``` +#### JavaScript / TypeScript + +```javascript +const API_KEY = "sk-your-api-key"; +const BASE_URL = "https://api.your-domain.com/v1"; +const HEADERS = { + "Authorization": `Bearer ${API_KEY}`, + "Content-Type": "application/json" +}; + +// Create store +const store = await fetch(`${BASE_URL}/vector_stores`, { + method: "POST", + headers: HEADERS, + body: JSON.stringify({ name: "My Store" }) +}).then(r => r.json()); + +// Search +const results = await fetch( + `${BASE_URL}/vector_stores/${store.id}/search`, { + method: "POST", + headers: HEADERS, + body: JSON.stringify({ + query: "What is FastAPI?", + top_k: 3, + rerank: true + }) +}).then(r => r.json()); + +// RAG +const answer = await fetch( + `${BASE_URL}/vector_stores/${store.id}/rag`, { + method: "POST", + headers: HEADERS, + body: JSON.stringify({ + query: "What is FastAPI?" + }) +}).then(r => r.json()); + +console.log(answer.answer); +``` + +#### curl + +```bash +# Create store +curl -X POST https://api.your-domain.com/v1/vector_stores \ + -H "Authorization: Bearer sk-your-key" \ + -H "Content-Type: application/json" \ + -d '{"name": "My Store"}' + +# Upload document +curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/upload \ + -H "Authorization: Bearer sk-your-key" \ + -F "file=@document.pdf" + +# Upload image with custom vision model +curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/upload \ + -H "Authorization: Bearer sk-your-key" \ + -F "file=@diagram.png" \ + -F "vision_model=openai/gpt-4o" \ + -F "vision_prompt=Explain this diagram in detail." + +# Search +curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/search \ + -H "Authorization: Bearer sk-your-key" \ + -H "Content-Type: application/json" \ + -d '{"query": "What is FastAPI?", "top_k": 3, "rerank": true}' + +# RAG +curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/rag \ + -H "Authorization: Bearer sk-your-key" \ + -H "Content-Type: application/json" \ + -d '{"query": "What is FastAPI?", "model": "openai/gpt-4o-mini"}' +``` + ## Configuration Reference ### Environment Variables @@ -278,15 +390,54 @@ print(answer["answer"]) | `LITELLM_MASTER_KEY` | βœ… | β€” | LiteLLM master key | | `ADMIN_USER_IDS` | βœ… | β€” | Comma-separated admin user IDs | | `EMBEDDING_MODEL` | ❌ | `text-embedding-ada-002` | Default embedding model | +| `VISION_MODEL` | ❌ | `openai/gpt-4o-mini` | Default vision model | + +### Upload Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `file` | file | β€” | File to upload | +| `chunk_size` | int | 512 | Characters per chunk | +| `chunk_overlap` | int | 50 | Overlap between chunks | +| `vision_model` | string | Config default | Vision model for images | +| `vision_prompt` | string | Auto | Custom prompt for vision model | + +### Search Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `query` | string | β€” | Search query | +| `top_k` | int | 5 | Number of results (max. 50) | +| `rerank` | bool | false | Enable reranking | +| `rerank_model` | string | Auto | Custom rerank model | + +### RAG Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `query` | string | β€” | Question | +| `model` | string | cosair/gemma4:31b | LLM model | +| `top_k` | int | 5 | Context documents | +| `rerank` | bool | false | Enable reranking | +| `system_prompt` | string | Auto | Custom system prompt | +| `messages` | array | [] | Chat history | ### Supported File Formats | Format | Extension | Notes | |--------|-----------|-------| | Text | `.txt` | UTF-8 encoded | +| Markdown | `.md` | Standard Markdown | | PDF | `.pdf` | Text PDFs only, no scans | | Word | `.docx` | Microsoft Word 2007+ | -| Markdown | `.md` | Standard Markdown | +| Excel | `.xlsx` | All sheets extracted | +| CSV | `.csv` | All columns extracted | +| PowerPoint | `.pptx` | All slides extracted | +| HTML | `.html` `.htm` | Scripts/styles removed | +| Outlook Mail | `.msg` | Including headers | +| E-Mail | `.eml` | Including headers | +| JSON | `.json` | Pretty printed | +| Image | `.jpg` `.jpeg` `.png` `.gif` `.webp` `.tiff` | Via Vision LLM | ### Limits @@ -320,6 +471,8 @@ DATABASE_URL="postgresql://..." \ LITELLM_PROXY_URL="http://..." \ LITELLM_MASTER_KEY="sk-..." \ ADMIN_USER_IDS="your-id" \ +EMBEDDING_MODEL="your-model" \ +VISION_MODEL="openai/gpt-4o-mini" \ uvicorn app.main:app --reload # Run UI locally @@ -336,6 +489,7 @@ VITE_API_URL=http://localhost:8000 npm run dev | **Database** | PostgreSQL 16 + pgvector | | **Auth** | LiteLLM Key Management | | **Embeddings** | Via LiteLLM Proxy | +| **Vision** | Via LiteLLM Vision Models | | **Admin UI** | React + TypeScript + Tailwind CSS | | **Container** | Docker + Kubernetes | | **Ingress** | NGINX Ingress Controller | diff --git a/app/routers/openai_compat.py b/app/routers/openai_compat.py index e06dec3..91dcaed 100644 --- a/app/routers/openai_compat.py +++ b/app/routers/openai_compat.py @@ -7,6 +7,12 @@ import pypdf import docx import io from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form +from app.utils.image_processor import ( + image_to_text, + is_image, + SUPPORTED_IMAGE_FORMATS, + DEFAULT_PROMPT +) from pydantic import BaseModel, Field from typing import Optional from app.auth import verify_api_key @@ -14,6 +20,12 @@ from app.database import get_db from app.utils.stats import track_usage from app.utils.chunking import chunk_text +SUPPORTED_FORMATS = ( + ".txt .md .pdf .docx .xlsx .csv " + ".pptx .html .htm .msg .eml .json " + + " ".join(SUPPORTED_IMAGE_FORMATS) +) + router = APIRouter() logger = logging.getLogger(__name__) @@ -691,59 +703,169 @@ async def rag( async def upload_file( store_id: str, file: UploadFile = File(...), - chunk_size: int = Form(default=512), - chunk_overlap: int = Form(default=50), + chunk_size: int = Form(default=512), + chunk_overlap: int = Form(default=50), + vision_prompt: str = Form(default=DEFAULT_PROMPT), + vision_model: str = Form(default=None), user: dict = Depends(verify_api_key), db=Depends(get_db) ): - """Datei hochladen, chunken und in Vector Store speichern""" - start = time.time() + start = time.time() await _check_access(db, store_id, user["user_id"]) content = await file.read() filename = file.filename.lower() try: - if filename.endswith(".pdf"): - pdf = pypdf.PdfReader(io.BytesIO(content)) - text = "\n".join( + if is_image(filename): + # Modell validieren falls angegeben + if vision_model: + await validate_vision_model(vision_model, user["token"]) + use_model = vision_model + else: + use_model = None + + text = await image_to_text( + content=content, + filename=filename, + token=user["token"], + model=use_model, + prompt=vision_prompt + ) + chunks = [{"text": text, "index": 0, "start": 0}] + elif filename.endswith((".txt", ".md")): + text = content.decode("utf-8") + chunks = chunk_text(text, chunk_size, chunk_overlap) + + elif filename.endswith(".pdf"): + import pypdf, io + pdf = pypdf.PdfReader(io.BytesIO(content)) + text = "\n".join( page.extract_text() for page in pdf.pages if page.extract_text() ) + chunks = chunk_text(text, chunk_size, chunk_overlap) elif filename.endswith(".docx"): - doc = docx.Document(io.BytesIO(content)) - text = "\n".join( - p.text for p in doc.paragraphs if p.text.strip() + import docx, io + doc = docx.Document(io.BytesIO(content)) + text = "\n".join( + p.text for p in doc.paragraphs + if p.text.strip() ) + chunks = chunk_text(text, chunk_size, chunk_overlap) - elif filename.endswith(".txt"): - text = content.decode("utf-8") + elif filename.endswith(".xlsx"): + import openpyxl, io + wb = openpyxl.load_workbook(io.BytesIO(content)) + lines = [] + for sheet in wb.worksheets: + lines.append(f"=== Tabelle: {sheet.title} ===") + for row in sheet.iter_rows(values_only=True): + if any(cell is not None for cell in row): + lines.append( + " | ".join( + str(c) for c in row if c is not None + ) + ) + text = "\n".join(lines) + chunks = chunk_text(text, chunk_size, chunk_overlap) - elif filename.endswith(".md"): - text = content.decode("utf-8") + elif filename.endswith(".csv"): + import csv, io + reader = csv.reader( + io.StringIO(content.decode("utf-8")) + ) + text = "\n".join( + " | ".join(row) + for row in reader + if any(cell.strip() for cell in row) + ) + chunks = chunk_text(text, chunk_size, chunk_overlap) + + elif filename.endswith(".pptx"): + from pptx import Presentation + import io + prs = Presentation(io.BytesIO(content)) + lines = [] + for i, slide in enumerate(prs.slides): + lines.append(f"=== Folie {i+1} ===") + for shape in slide.shapes: + if hasattr(shape, "text") and shape.text.strip(): + lines.append(shape.text) + text = "\n".join(lines) + chunks = chunk_text(text, chunk_size, chunk_overlap) + + elif filename.endswith((".html", ".htm")): + from bs4 import BeautifulSoup + soup = BeautifulSoup(content, "html.parser") + for tag in soup(["script", "style", "nav", "footer"]): + tag.decompose() + text = soup.get_text(separator="\n", strip=True) + chunks = chunk_text(text, chunk_size, chunk_overlap) + + elif filename.endswith(".msg"): + import extract_msg, io + msg = extract_msg.Message(io.BytesIO(content)) + text = "\n".join(filter(None, [ + f"Von: {msg.sender}", + f"An: {msg.to}", + f"Betreff: {msg.subject}", + f"Datum: {msg.date}", + "─" * 40, + msg.body + ])) + chunks = chunk_text(text, chunk_size, chunk_overlap) + + elif filename.endswith(".eml"): + import email + msg = email.message_from_bytes(content) + body = "" + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() == "text/plain": + body = part.get_payload( + decode=True + ).decode("utf-8", errors="ignore") + break + else: + body = msg.get_payload( + decode=True + ).decode("utf-8", errors="ignore") + text = "\n".join(filter(None, [ + f"Von: {msg.get('From')}", + f"An: {msg.get('To')}", + f"Betreff: {msg.get('Subject')}", + f"Datum: {msg.get('Date')}", + "─" * 40, + body + ])) + chunks = chunk_text(text, chunk_size, chunk_overlap) + + elif filename.endswith(".json"): + import json as jsonlib + data = jsonlib.loads(content.decode("utf-8")) + text = jsonlib.dumps(data, indent=2, ensure_ascii=False) + chunks = chunk_text(text, chunk_size, chunk_overlap) else: raise HTTPException( 400, f"Nicht unterstΓΌtztes Format: {file.filename}. " - f"UnterstΓΌtzt: .pdf, .docx, .txt, .md" + f"UnterstΓΌtzt: {SUPPORTED_FORMATS}" ) except HTTPException: raise except Exception as e: - raise HTTPException(422, f"Datei konnte nicht gelesen werden: {e}") + raise HTTPException( + 422, + f"Datei konnte nicht gelesen werden: {e}" + ) - if not text.strip(): - raise HTTPException(422, "Datei enthaelt keinen Text") - - chunks = chunk_text( - text=text, - chunk_size=chunk_size, - overlap=chunk_overlap - ) + if not any(c["text"].strip() for c in chunks): + raise HTTPException(422, "Datei enthΓ€lt keinen Text") ids = [] failed = 0 @@ -752,14 +874,16 @@ async def upload_file( try: embedding = await _embed(chunk["text"], user["token"]) doc_id = await db.fetchval( - """INSERT INTO documents (store_id, content, metadata, embedding) + """INSERT INTO documents + (store_id, content, metadata, embedding) VALUES ($1, $2, $3, $4::vector) RETURNING id""", store_id, chunk["text"], json.dumps({ - "source": file.filename, - "chunk": chunk["index"], - "start": chunk.get("start", 0), + "source": file.filename, + "type": "image" if is_image(filename) else "document", + "chunk": chunk["index"], + "start": chunk.get("start", 0), }), str(embedding) ) @@ -778,6 +902,7 @@ async def upload_file( return { "object": "vector_store.file_batch", "filename": file.filename, + "type": "image" if is_image(filename) else "document", "counts": { "completed": len(ids), "failed": failed, @@ -785,3 +910,27 @@ async def upload_file( }, "ids": ids } + +@router.get("/vision/models") +async def list_vision_models( + user: dict = Depends(verify_api_key), +): + """Alle verfΓΌgbaren Vision Modelle""" + all_models = await _get_all_models() + + vision_models = [ + { + "id": m["id"], + "object": "model", + "owned_by": "system", + "default": m["id"] == VISION_MODEL, + } + for m in all_models + if m.get("supports_vision") is True + ] + + return { + "object": "list", + "default": VISION_MODEL, + "data": vision_models + } diff --git a/app/utils/image_processor.py b/app/utils/image_processor.py new file mode 100644 index 0000000..1ad02b1 --- /dev/null +++ b/app/utils/image_processor.py @@ -0,0 +1,153 @@ +import base64 +import httpx +import os +import logging +from fastapi import HTTPException + +logger = logging.getLogger(__name__) +LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000") +VISION_MODEL = os.getenv("VISION_MODEL", "openai/gpt-4o-mini") + +SUPPORTED_IMAGE_FORMATS = [ + ".jpg", ".jpeg", + ".png", + ".gif", + ".webp", + ".tiff" +] + +MIME_TYPES = { + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "png": "image/png", + "gif": "image/gif", + "webp": "image/webp", + "tiff": "image/tiff" +} + +DEFAULT_PROMPT = ( + "Beschreibe den Inhalt dieses Bildes detailliert. " + "Falls Text vorhanden ist, gib ihn vollstaendig wieder. " + "Falls es ein Diagramm oder Chart ist, erklaere die Daten. " + "Falls es ein Screenshot ist, beschreibe was zu sehen ist. " + "Antworte auf Deutsch." +) + + +def is_image(filename: str) -> bool: + """Prueft ob eine Datei ein Bild ist""" + return any( + filename.lower().endswith(ext) + for ext in SUPPORTED_IMAGE_FORMATS + ) + + +async def image_to_text( + content: bytes, + filename: str, + token: str, + model: str = None, + prompt: str = DEFAULT_PROMPT +) -> str: + """Bild ueber Vision LLM in Text umwandeln""" + use_model = model or VISION_MODEL + ext = filename.lower().split(".")[-1] + mime_type = MIME_TYPES.get(ext, "image/jpeg") + image_b64 = base64.b64encode(content).decode("utf-8") + + async with httpx.AsyncClient() as client: + resp = await client.post( + f"{LITELLM_URL}/chat/completions", + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + }, + json={ + "model": use_model, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": prompt + }, + { + "type": "image_url", + "image_url": { + "url": f"data:{mime_type};base64,{image_b64}" + } + } + ] + } + ], + "max_tokens": 2048 + }, + timeout=60.0 + ) + + if resp.status_code != 200: + logger.error(f"Vision Fehler: {resp.status_code} - {resp.text}") + raise HTTPException( + 502, + f"Bild konnte nicht verarbeitet werden: {resp.text}" + ) + + return resp.json()["choices"][0]["message"]["content"] + +async def validate_vision_model( + model: str, + token: str +) -> str: + """ + PrΓΌft ob das gewΓ€hlte Modell Vision unterstΓΌtzt. + Gibt das validierte Modell zurΓΌck. + """ + LITELLM_MASTER = os.getenv("LITELLM_MASTER_KEY") + + async with httpx.AsyncClient() as client: + try: + resp = await client.get( + f"{LITELLM_URL}/model_group/info", + headers={"Authorization": f"Bearer {LITELLM_MASTER}"}, + timeout=10.0 + ) + except httpx.RequestError as e: + raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}") + + if resp.status_code != 200: + raise HTTPException(502, "Modelle konnten nicht abgerufen werden") + + models = { + m.get("model_group"): m + for m in resp.json().get("data", []) + } + + if model not in models: + raise HTTPException(404, { + "error": { + "message": f"Modell '{model}' nicht gefunden", + "type": "invalid_request_error", + "code": "model_not_found" + } + }) + + if not models[model].get("supports_vision"): + vision_models = [ + m.get("model_group") + for m in resp.json().get("data", []) + if m.get("supports_vision") + ] + raise HTTPException(400, { + "error": { + "message": ( + f"Modell '{model}' unterstΓΌtzt kein Vision. " + f"VerfΓΌgbare Vision Modelle: " + f"{', '.join(vision_models)}" + ), + "type": "invalid_request_error", + "code": "model_not_supported" + } + }) + + return model diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml index 496cd46..18cd5d8 100644 --- a/k8s/configmap.yaml +++ b/k8s/configmap.yaml @@ -8,3 +8,4 @@ data: ADMIN_USER_IDS: "default_user_id" API_URL: "https://api.vector.cosair.de" EMBEDDING_MODEL: "cosair/multilingual-e5-large-instruct" + VISION_MODEL: "cosair/gemma4:31b" diff --git a/k8s/vector-api/deployment.yaml b/k8s/vector-api/deployment.yaml index ab09235..6bd99ab 100644 --- a/k8s/vector-api/deployment.yaml +++ b/k8s/vector-api/deployment.yaml @@ -43,6 +43,12 @@ spec: name: vector-store-config key: ADMIN_USER_IDS + - name: VISION_MODEL + valueFrom: + configMapKeyRef: + name: vector-store-config + key: VISION_MODEL + readinessProbe: httpGet: path: /health diff --git a/requirements.txt b/requirements.txt index d754908..4a199fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,7 @@ pgvector==0.3.0 tenacity==8.3.0 pypdf==4.2.0 python-docx==1.1.0 +openpyxl==3.1.2 +python-pptx==0.6.23 +beautifulsoup4==4.12.3 +extract-msg==0.48.0