From 965e9007430410105692d961cfb038cd65cdacad Mon Sep 17 00:00:00 2001
From: root <root@build.hs-woe.de>
Date: Wed, 29 Apr 2026 09:11:46 +0000
Subject: [PATCH] More datatypes

---
 Dockerfile                     |   1 +
 README.md                      | 200 ++++++++++++++++++++++++++++----
 app/routers/openai_compat.py   | 205 ++++++++++++++++++++++++++++-----
 app/utils/image_processor.py   | 153 ++++++++++++++++++++++++
 k8s/configmap.yaml             |   1 +
 k8s/vector-api/deployment.yaml |   6 +
 requirements.txt               |   4 +
 7 files changed, 519 insertions(+), 51 deletions(-)
 create mode 100644 app/utils/image_processor.py

diff --git a/Dockerfile b/Dockerfile
index d67b8c5..e240f23 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -39,6 +39,7 @@ COPY app/routers/openai_compat.py   ./app/routers/openai_compat.py
 COPY app/utils/__init__.py   ./app/utils/__init__.py
 COPY app/utils/stats.py      ./app/utils/stats.py
 COPY app/utils/chunking.py            ./app/utils/chunking.py
+COPY app/utils/image_processor.py     ./app/utils/image_processor.py
 
 RUN find /app -type f | sort
 
diff --git a/README.md b/README.md
index 04a1ecd..0d89886 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,8 @@ A vector store service built on top of [LiteLLM](https://github.com/BerriAI/lite
 - 🗄️ **Vector Store** powered by PostgreSQL + pgvector
 - 🔍 **Semantic Search** with optional Reranking
 - 🤖 **RAG Endpoint** - Search + LLM in one request
-- 📄 **File Upload** - PDF, DOCX, TXT, Markdown
+- 📄 **File Upload** - PDF, DOCX, TXT, Markdown, Excel, CSV, PowerPoint, HTML, E-Mail, JSON
+- 🖼️ **Image Support** - Upload images via Vision LLM (JPG, PNG, GIF, WebP, TIFF)
 - 🧩 **OpenAI-compatible API** - works with existing OpenAI SDKs
 - 👥 **Multi-User** - Store permissions per user
 - 🖥️ **Admin UI** - Manage users, stores and permissions
@@ -20,20 +21,22 @@ A vector store service built on top of [LiteLLM](https://github.com/BerriAI/lite
 Client (API Key)
       │
       ▼
-LiteLLM Proxy ──────────────────────┐
-      │                             │
-      ▼                             ▼
-Vector Store API            Embedding Models
-      │                         (via LiteLLM)
-      ▼
-PostgreSQL + pgvector
+LiteLLM Proxy ──────────────────────────────┐
+      │                                     │
+      ▼                                     ▼
+Vector Store API                    LiteLLM Models
+      │                          ┌──────────────────┐
+      ▼                          │ Embedding Models  │
+PostgreSQL + pgvector             │ Vision Models     │
+                                  │ LLM Models        │
+                                  └──────────────────┘
 ```
 
 ## Requirements
 
 - Kubernetes Cluster
-- PostgreSQL with pgvector extension
-- LiteLLM Proxy (deployed)
+- PostgreSQL with pgvector extension (already deployed)
+- LiteLLM Proxy (already deployed)
 - Container Registry
 
 ## Quick Start
@@ -106,6 +109,7 @@ EOF
 ### 3. Configure
 
 ```bash
+# Create secrets
 kubectl create secret generic vector-api-secrets \
   --namespace vector-store \
   --from-literal=DATABASE_URL="postgresql://vecuser:pass@postgres:5432/vectordb" \
@@ -124,16 +128,17 @@ data:
   ADMIN_USER_IDS:    "your-admin-user-id"
   API_URL:           "https://api.your-domain.com"
   EMBEDDING_MODEL:   "your-embedding-model"
+  VISION_MODEL:      "openai/gpt-4o-mini"
 ```
 
 ### 4. Build & Deploy
 
 ```bash
-# API
+# Build & push API
 docker build -t your-registry/vector-store-api:1.0.0 .
 docker push your-registry/vector-store-api:1.0.0
 
-# Admin UI
+# Build & push Admin UI
 docker build \
   -t your-registry/vector-store-admin:1.0.0 \
   ./ui
@@ -165,6 +170,7 @@ litellm-vector-store/
 │   │   └── openai_compat.py      # OpenAI-compatible API
 │   └── utils/
 │       ├── chunking.py           # Text chunking
+│       ├── image_processor.py    # Vision LLM integration
 │       └── stats.py              # Usage tracking
 ├── ui/                           # React Admin UI
 │   ├── src/
@@ -216,6 +222,10 @@ Authorization: Bearer sk-your-api-key
 
 | Method | Endpoint | Description |
 |--------|----------|-------------|
+| `GET` | `/v1/models` | List all models |
+| `GET` | `/v1/embeddings/models` | List embedding models |
+| `GET` | `/v1/vision/models` | List vision models |
+| `POST` | `/v1/embeddings` | Create embeddings |
 | `POST` | `/v1/vector_stores` | Create store |
 | `GET` | `/v1/vector_stores` | List stores |
 | `GET` | `/v1/vector_stores/{id}` | Get store |
@@ -223,21 +233,21 @@ Authorization: Bearer sk-your-api-key
 | `POST` | `/v1/vector_stores/{id}/files` | Add texts |
 | `GET` | `/v1/vector_stores/{id}/files` | List files |
 | `DELETE` | `/v1/vector_stores/{id}/files/{file_id}` | Delete file |
-| `POST` | `/v1/vector_stores/{id}/upload` | Upload file |
-| `POST` | `/v1/vector_stores/{id}/search` | Search |
+| `POST` | `/v1/vector_stores/{id}/upload` | Upload file or image |
+| `POST` | `/v1/vector_stores/{id}/search` | Semantic search |
 | `POST` | `/v1/vector_stores/{id}/rag` | RAG query |
-| `POST` | `/v1/embeddings` | Create embeddings |
-| `GET` | `/v1/embeddings/models` | List embedding models |
-| `GET` | `/v1/models` | List all models |
 
-### Example
+### Examples
+
+#### Store anlegen & Datei hochladen
 
 ```python
 import httpx
 
 client = httpx.Client(
     base_url="https://api.your-domain.com/v1",
-    headers={"Authorization": "Bearer sk-your-key"}
+    headers={"Authorization": "Bearer sk-your-key"},
+    timeout=120.0
 )
 
 # Create store
@@ -246,27 +256,129 @@ store = client.post(
     json={"name": "My Knowledge Base"}
 ).json()
 
-# Upload file
+# Upload document
 with open("document.pdf", "rb") as f:
     client.post(
         f"/vector_stores/{store['id']}/upload",
         files={"file": f}
     )
 
+# Upload image (with default vision model)
+with open("screenshot.png", "rb") as f:
+    client.post(
+        f"/vector_stores/{store['id']}/upload",
+        files={"file": f}
+    )
+
+# Upload image (with custom vision model)
+with open("diagram.png", "rb") as f:
+    client.post(
+        f"/vector_stores/{store['id']}/upload",
+        files={"file": f},
+        data={
+            "vision_model":  "openai/gpt-4o",
+            "vision_prompt": "Explain this diagram in detail."
+        }
+    )
+
 # Search
 results = client.post(
     f"/vector_stores/{store['id']}/search",
-    json={"query": "What is FastAPI?", "top_k": 3}
+    json={
+        "query":  "What is FastAPI?",
+        "top_k":  3,
+        "rerank": True
+    }
 ).json()
 
 # RAG
 answer = client.post(
     f"/vector_stores/{store['id']}/rag",
-    json={"query": "What is FastAPI?"}
+    json={
+        "query":  "What is FastAPI?",
+        "model":  "openai/gpt-4o-mini",
+        "rerank": True
+    }
 ).json()
 print(answer["answer"])
 ```
 
+#### JavaScript / TypeScript
+
+```javascript
+const API_KEY  = "sk-your-api-key";
+const BASE_URL = "https://api.your-domain.com/v1";
+const HEADERS  = {
+    "Authorization": `Bearer ${API_KEY}`,
+    "Content-Type":  "application/json"
+};
+
+// Create store
+const store = await fetch(`${BASE_URL}/vector_stores`, {
+    method:  "POST",
+    headers: HEADERS,
+    body:    JSON.stringify({ name: "My Store" })
+}).then(r => r.json());
+
+// Search
+const results = await fetch(
+    `${BASE_URL}/vector_stores/${store.id}/search`, {
+    method:  "POST",
+    headers: HEADERS,
+    body:    JSON.stringify({
+        query:  "What is FastAPI?",
+        top_k:  3,
+        rerank: true
+    })
+}).then(r => r.json());
+
+// RAG
+const answer = await fetch(
+    `${BASE_URL}/vector_stores/${store.id}/rag`, {
+    method:  "POST",
+    headers: HEADERS,
+    body:    JSON.stringify({
+        query: "What is FastAPI?"
+    })
+}).then(r => r.json());
+
+console.log(answer.answer);
+```
+
+#### curl
+
+```bash
+# Create store
+curl -X POST https://api.your-domain.com/v1/vector_stores \
+  -H "Authorization: Bearer sk-your-key" \
+  -H "Content-Type: application/json" \
+  -d '{"name": "My Store"}'
+
+# Upload document
+curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/upload \
+  -H "Authorization: Bearer sk-your-key" \
+  -F "file=@document.pdf"
+
+# Upload image with custom vision model
+curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/upload \
+  -H "Authorization: Bearer sk-your-key" \
+  -F "file=@diagram.png" \
+  -F "vision_model=openai/gpt-4o" \
+  -F "vision_prompt=Explain this diagram in detail."
+
+# Search
+curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/search \
+  -H "Authorization: Bearer sk-your-key" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What is FastAPI?", "top_k": 3, "rerank": true}'
+
+# RAG
+curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/rag \
+  -H "Authorization: Bearer sk-your-key" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What is FastAPI?", "model": "openai/gpt-4o-mini"}'
+```
+
 ## Configuration Reference
 
 ### Environment Variables
@@ -278,15 +390,54 @@ print(answer["answer"])
 | `LITELLM_MASTER_KEY` | ✅ | — | LiteLLM master key |
 | `ADMIN_USER_IDS` | ✅ | — | Comma-separated admin user IDs |
 | `EMBEDDING_MODEL` | ❌ | `text-embedding-ada-002` | Default embedding model |
+| `VISION_MODEL` | ❌ | `openai/gpt-4o-mini` | Default vision model |
+
+### Upload Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `file` | file | — | File to upload |
+| `chunk_size` | int | 512 | Characters per chunk |
+| `chunk_overlap` | int | 50 | Overlap between chunks |
+| `vision_model` | string | Config default | Vision model for images |
+| `vision_prompt` | string | Auto | Custom prompt for vision model |
+
+### Search Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `query` | string | — | Search query |
+| `top_k` | int | 5 | Number of results (max. 50) |
+| `rerank` | bool | false | Enable reranking |
+| `rerank_model` | string | Auto | Custom rerank model |
+
+### RAG Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `query` | string | — | Question |
+| `model` | string | cosair/gemma4:31b | LLM model |
+| `top_k` | int | 5 | Context documents |
+| `rerank` | bool | false | Enable reranking |
+| `system_prompt` | string | Auto | Custom system prompt |
+| `messages` | array | [] | Chat history |
 
 ### Supported File Formats
 
 | Format | Extension | Notes |
 |--------|-----------|-------|
 | Text | `.txt` | UTF-8 encoded |
+| Markdown | `.md` | Standard Markdown |
 | PDF | `.pdf` | Text PDFs only, no scans |
 | Word | `.docx` | Microsoft Word 2007+ |
-| Markdown | `.md` | Standard Markdown |
+| Excel | `.xlsx` | All sheets extracted |
+| CSV | `.csv` | All columns extracted |
+| PowerPoint | `.pptx` | All slides extracted |
+| HTML | `.html` `.htm` | Scripts/styles removed |
+| Outlook Mail | `.msg` | Including headers |
+| E-Mail | `.eml` | Including headers |
+| JSON | `.json` | Pretty printed |
+| Image | `.jpg` `.jpeg` `.png` `.gif` `.webp` `.tiff` | Via Vision LLM |
 
 ### Limits
 
@@ -320,6 +471,8 @@ DATABASE_URL="postgresql://..." \
 LITELLM_PROXY_URL="http://..." \
 LITELLM_MASTER_KEY="sk-..." \
 ADMIN_USER_IDS="your-id" \
+EMBEDDING_MODEL="your-model" \
+VISION_MODEL="openai/gpt-4o-mini" \
 uvicorn app.main:app --reload
 
 # Run UI locally
@@ -336,6 +489,7 @@ VITE_API_URL=http://localhost:8000 npm run dev
 | **Database** | PostgreSQL 16 + pgvector |
 | **Auth** | LiteLLM Key Management |
 | **Embeddings** | Via LiteLLM Proxy |
+| **Vision** | Via LiteLLM Vision Models |
 | **Admin UI** | React + TypeScript + Tailwind CSS |
 | **Container** | Docker + Kubernetes |
 | **Ingress** | NGINX Ingress Controller |
diff --git a/app/routers/openai_compat.py b/app/routers/openai_compat.py
index e06dec3..91dcaed 100644
--- a/app/routers/openai_compat.py
+++ b/app/routers/openai_compat.py
@@ -7,6 +7,12 @@ import pypdf
 import docx
 import io
 from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
+from app.utils.image_processor import (
+    image_to_text,
+    is_image,
+    SUPPORTED_IMAGE_FORMATS,
+    DEFAULT_PROMPT
+)
 from pydantic import BaseModel, Field
 from typing import Optional
 from app.auth import verify_api_key
@@ -14,6 +20,12 @@ from app.database import get_db
 from app.utils.stats import track_usage
 from app.utils.chunking import chunk_text
 
+SUPPORTED_FORMATS = (
+    ".txt .md .pdf .docx .xlsx .csv "
+    ".pptx .html .htm .msg .eml .json "
+    + " ".join(SUPPORTED_IMAGE_FORMATS)
+)
+
 router = APIRouter()
 logger = logging.getLogger(__name__)
 
@@ -691,59 +703,169 @@ async def rag(
 async def upload_file(
     store_id:      str,
     file:          UploadFile = File(...),
-    chunk_size:    int = Form(default=512),
-    chunk_overlap: int = Form(default=50),
+    chunk_size:    int  = Form(default=512),
+    chunk_overlap: int  = Form(default=50),
+    vision_prompt: str  = Form(default=DEFAULT_PROMPT),
+    vision_model:  str  = Form(default=None),
     user:          dict = Depends(verify_api_key),
     db=Depends(get_db)
 ):
-    """Datei hochladen, chunken und in Vector Store speichern"""
-    start = time.time()
+    start    = time.time()
     await _check_access(db, store_id, user["user_id"])
 
     content  = await file.read()
     filename = file.filename.lower()
 
     try:
-        if filename.endswith(".pdf"):
-            pdf  = pypdf.PdfReader(io.BytesIO(content))
-            text = "\n".join(
+        if is_image(filename):
+            # Modell validieren falls angegeben
+            if vision_model:
+                await validate_vision_model(vision_model, user["token"])
+                use_model = vision_model
+            else:
+                use_model = None
+
+            text   = await image_to_text(
+                content=content,
+                filename=filename,
+                token=user["token"],
+                model=use_model,
+                prompt=vision_prompt
+            )
+            chunks = [{"text": text, "index": 0, "start": 0}]
+        elif filename.endswith((".txt", ".md")):
+            text   = content.decode("utf-8")
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
+
+        elif filename.endswith(".pdf"):
+            import pypdf, io
+            pdf    = pypdf.PdfReader(io.BytesIO(content))
+            text   = "\n".join(
                 page.extract_text()
                 for page in pdf.pages
                 if page.extract_text()
             )
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
 
         elif filename.endswith(".docx"):
-            doc  = docx.Document(io.BytesIO(content))
-            text = "\n".join(
-                p.text for p in doc.paragraphs if p.text.strip()
+            import docx, io
+            doc    = docx.Document(io.BytesIO(content))
+            text   = "\n".join(
+                p.text for p in doc.paragraphs
+                if p.text.strip()
             )
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
 
-        elif filename.endswith(".txt"):
-            text = content.decode("utf-8")
+        elif filename.endswith(".xlsx"):
+            import openpyxl, io
+            wb     = openpyxl.load_workbook(io.BytesIO(content))
+            lines  = []
+            for sheet in wb.worksheets:
+                lines.append(f"=== Tabelle: {sheet.title} ===")
+                for row in sheet.iter_rows(values_only=True):
+                    if any(cell is not None for cell in row):
+                        lines.append(
+                            " | ".join(
+                                str(c) for c in row if c is not None
+                            )
+                        )
+            text   = "\n".join(lines)
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
 
-        elif filename.endswith(".md"):
-            text = content.decode("utf-8")
+        elif filename.endswith(".csv"):
+            import csv, io
+            reader = csv.reader(
+                io.StringIO(content.decode("utf-8"))
+            )
+            text   = "\n".join(
+                " | ".join(row)
+                for row in reader
+                if any(cell.strip() for cell in row)
+            )
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
+
+        elif filename.endswith(".pptx"):
+            from pptx import Presentation
+            import io
+            prs    = Presentation(io.BytesIO(content))
+            lines  = []
+            for i, slide in enumerate(prs.slides):
+                lines.append(f"=== Folie {i+1} ===")
+                for shape in slide.shapes:
+                    if hasattr(shape, "text") and shape.text.strip():
+                        lines.append(shape.text)
+            text   = "\n".join(lines)
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
+
+        elif filename.endswith((".html", ".htm")):
+            from bs4 import BeautifulSoup
+            soup   = BeautifulSoup(content, "html.parser")
+            for tag in soup(["script", "style", "nav", "footer"]):
+                tag.decompose()
+            text   = soup.get_text(separator="\n", strip=True)
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
+
+        elif filename.endswith(".msg"):
+            import extract_msg, io
+            msg    = extract_msg.Message(io.BytesIO(content))
+            text   = "\n".join(filter(None, [
+                f"Von:     {msg.sender}",
+                f"An:      {msg.to}",
+                f"Betreff: {msg.subject}",
+                f"Datum:   {msg.date}",
+                "─" * 40,
+                msg.body
+            ]))
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
+
+        elif filename.endswith(".eml"):
+            import email
+            msg    = email.message_from_bytes(content)
+            body   = ""
+            if msg.is_multipart():
+                for part in msg.walk():
+                    if part.get_content_type() == "text/plain":
+                        body = part.get_payload(
+                            decode=True
+                        ).decode("utf-8", errors="ignore")
+                        break
+            else:
+                body = msg.get_payload(
+                    decode=True
+                ).decode("utf-8", errors="ignore")
+            text   = "\n".join(filter(None, [
+                f"Von:     {msg.get('From')}",
+                f"An:      {msg.get('To')}",
+                f"Betreff: {msg.get('Subject')}",
+                f"Datum:   {msg.get('Date')}",
+                "─" * 40,
+                body
+            ]))
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
+
+        elif filename.endswith(".json"):
+            import json as jsonlib
+            data   = jsonlib.loads(content.decode("utf-8"))
+            text   = jsonlib.dumps(data, indent=2, ensure_ascii=False)
+            chunks = chunk_text(text, chunk_size, chunk_overlap)
 
         else:
             raise HTTPException(
                 400,
                 f"Nicht unterstütztes Format: {file.filename}. "
-                f"Unterstützt: .pdf, .docx, .txt, .md"
+                f"Unterstützt: {SUPPORTED_FORMATS}"
             )
 
     except HTTPException:
         raise
     except Exception as e:
-        raise HTTPException(422, f"Datei konnte nicht gelesen werden: {e}")
+        raise HTTPException(
+            422,
+            f"Datei konnte nicht gelesen werden: {e}"
+        )
 
-    if not text.strip():
-        raise HTTPException(422, "Datei enthaelt keinen Text")
-
-    chunks = chunk_text(
-        text=text,
-        chunk_size=chunk_size,
-        overlap=chunk_overlap
-    )
+    if not any(c["text"].strip() for c in chunks):
+        raise HTTPException(422, "Datei enthält keinen Text")
 
     ids    = []
     failed = 0
@@ -752,14 +874,16 @@ async def upload_file(
         try:
             embedding = await _embed(chunk["text"], user["token"])
             doc_id    = await db.fetchval(
-                """INSERT INTO documents (store_id, content, metadata, embedding)
+                """INSERT INTO documents
+                   (store_id, content, metadata, embedding)
                    VALUES ($1, $2, $3, $4::vector) RETURNING id""",
                 store_id,
                 chunk["text"],
                 json.dumps({
-                    "source":   file.filename,
-                    "chunk":    chunk["index"],
-                    "start":    chunk.get("start", 0),
+                    "source":  file.filename,
+                    "type":    "image" if is_image(filename) else "document",
+                    "chunk":   chunk["index"],
+                    "start":   chunk.get("start", 0),
                 }),
                 str(embedding)
             )
@@ -778,6 +902,7 @@ async def upload_file(
     return {
         "object":   "vector_store.file_batch",
         "filename": file.filename,
+        "type":     "image" if is_image(filename) else "document",
         "counts": {
             "completed": len(ids),
             "failed":    failed,
@@ -785,3 +910,27 @@ async def upload_file(
         },
         "ids": ids
     }
+
+@router.get("/vision/models")
+async def list_vision_models(
+    user: dict = Depends(verify_api_key),
+):
+    """Alle verfügbaren Vision Modelle"""
+    all_models = await _get_all_models()
+
+    vision_models = [
+        {
+            "id":       m["id"],
+            "object":   "model",
+            "owned_by": "system",
+            "default":  m["id"] == VISION_MODEL,
+        }
+        for m in all_models
+        if m.get("supports_vision") is True
+    ]
+
+    return {
+        "object":  "list",
+        "default": VISION_MODEL,
+        "data":    vision_models
+    }
diff --git a/app/utils/image_processor.py b/app/utils/image_processor.py
new file mode 100644
index 0000000..1ad02b1
--- /dev/null
+++ b/app/utils/image_processor.py
@@ -0,0 +1,153 @@
+import base64
+import httpx
+import os
+import logging
+from fastapi import HTTPException
+
+logger       = logging.getLogger(__name__)
+LITELLM_URL  = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
+VISION_MODEL = os.getenv("VISION_MODEL", "openai/gpt-4o-mini")
+
+SUPPORTED_IMAGE_FORMATS = [
+    ".jpg", ".jpeg",
+    ".png",
+    ".gif",
+    ".webp",
+    ".tiff"
+]
+
+MIME_TYPES = {
+    "jpg":  "image/jpeg",
+    "jpeg": "image/jpeg",
+    "png":  "image/png",
+    "gif":  "image/gif",
+    "webp": "image/webp",
+    "tiff": "image/tiff"
+}
+
+DEFAULT_PROMPT = (
+    "Beschreibe den Inhalt dieses Bildes detailliert. "
+    "Falls Text vorhanden ist, gib ihn vollstaendig wieder. "
+    "Falls es ein Diagramm oder Chart ist, erklaere die Daten. "
+    "Falls es ein Screenshot ist, beschreibe was zu sehen ist. "
+    "Antworte auf Deutsch."
+)
+
+
+def is_image(filename: str) -> bool:
+    """Prueft ob eine Datei ein Bild ist"""
+    return any(
+        filename.lower().endswith(ext)
+        for ext in SUPPORTED_IMAGE_FORMATS
+    )
+
+
+async def image_to_text(
+    content:  bytes,
+    filename: str,
+    token:    str,
+    model:    str = None,
+    prompt:   str = DEFAULT_PROMPT
+) -> str:
+    """Bild ueber Vision LLM in Text umwandeln"""
+    use_model = model or VISION_MODEL
+    ext       = filename.lower().split(".")[-1]
+    mime_type = MIME_TYPES.get(ext, "image/jpeg")
+    image_b64 = base64.b64encode(content).decode("utf-8")
+
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{LITELLM_URL}/chat/completions",
+            headers={
+                "Authorization": f"Bearer {token}",
+                "Content-Type":  "application/json"
+            },
+            json={
+                "model": use_model,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": prompt
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:{mime_type};base64,{image_b64}"
+                                }
+                            }
+                        ]
+                    }
+                ],
+                "max_tokens": 2048
+            },
+            timeout=60.0
+        )
+
+    if resp.status_code != 200:
+        logger.error(f"Vision Fehler: {resp.status_code} - {resp.text}")
+        raise HTTPException(
+            502,
+            f"Bild konnte nicht verarbeitet werden: {resp.text}"
+        )
+
+    return resp.json()["choices"][0]["message"]["content"]
+
+async def validate_vision_model(
+    model: str,
+    token: str
+) -> str:
+    """
+    Prüft ob das gewählte Modell Vision unterstützt.
+    Gibt das validierte Modell zurück.
+    """
+    LITELLM_MASTER = os.getenv("LITELLM_MASTER_KEY")
+
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(
+                f"{LITELLM_URL}/model_group/info",
+                headers={"Authorization": f"Bearer {LITELLM_MASTER}"},
+                timeout=10.0
+            )
+        except httpx.RequestError as e:
+            raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}")
+
+    if resp.status_code != 200:
+        raise HTTPException(502, "Modelle konnten nicht abgerufen werden")
+
+    models = {
+        m.get("model_group"): m
+        for m in resp.json().get("data", [])
+    }
+
+    if model not in models:
+        raise HTTPException(404, {
+            "error": {
+                "message": f"Modell '{model}' nicht gefunden",
+                "type":    "invalid_request_error",
+                "code":    "model_not_found"
+            }
+        })
+
+    if not models[model].get("supports_vision"):
+        vision_models = [
+            m.get("model_group")
+            for m in resp.json().get("data", [])
+            if m.get("supports_vision")
+        ]
+        raise HTTPException(400, {
+            "error": {
+                "message": (
+                    f"Modell '{model}' unterstützt kein Vision. "
+                    f"Verfügbare Vision Modelle: "
+                    f"{', '.join(vision_models)}"
+                ),
+                "type":    "invalid_request_error",
+                "code":    "model_not_supported"
+            }
+        })
+
+    return model
diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml
index 496cd46..18cd5d8 100644
--- a/k8s/configmap.yaml
+++ b/k8s/configmap.yaml
@@ -8,3 +8,4 @@ data:
   ADMIN_USER_IDS:    "default_user_id"
   API_URL:           "https://api.vector.cosair.de"
   EMBEDDING_MODEL:   "cosair/multilingual-e5-large-instruct"
+  VISION_MODEL:      "cosair/gemma4:31b"
diff --git a/k8s/vector-api/deployment.yaml b/k8s/vector-api/deployment.yaml
index ab09235..6bd99ab 100644
--- a/k8s/vector-api/deployment.yaml
+++ b/k8s/vector-api/deployment.yaml
@@ -43,6 +43,12 @@ spec:
                   name: vector-store-config
                   key: ADMIN_USER_IDS
 
+            - name: VISION_MODEL
+              valueFrom:
+                configMapKeyRef:
+                  name: vector-store-config
+                  key: VISION_MODEL
+
           readinessProbe:
             httpGet:
               path: /health
diff --git a/requirements.txt b/requirements.txt
index d754908..4a199fe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,7 @@ pgvector==0.3.0
 tenacity==8.3.0
 pypdf==4.2.0
 python-docx==1.1.0
+openpyxl==3.1.2
+python-pptx==0.6.23
+beautifulsoup4==4.12.3
+extract-msg==0.48.0