More datatypes

This commit is contained in:
root
2026-04-29 09:11:46 +00:00
parent ef55253cbd
commit 965e900743
7 changed files with 519 additions and 51 deletions

View File

@@ -39,6 +39,7 @@ COPY app/routers/openai_compat.py ./app/routers/openai_compat.py
COPY app/utils/__init__.py ./app/utils/__init__.py
COPY app/utils/stats.py ./app/utils/stats.py
COPY app/utils/chunking.py ./app/utils/chunking.py
COPY app/utils/image_processor.py ./app/utils/image_processor.py
RUN find /app -type f | sort

200
README.md
View File

@@ -8,7 +8,8 @@ A vector store service built on top of [LiteLLM](https://github.com/BerriAI/lite
- 🗄️ **Vector Store** powered by PostgreSQL + pgvector
- 🔍 **Semantic Search** with optional Reranking
- 🤖 **RAG Endpoint** - Search + LLM in one request
- 📄 **File Upload** - PDF, DOCX, TXT, Markdown
- 📄 **File Upload** - PDF, DOCX, TXT, Markdown, Excel, CSV, PowerPoint, HTML, E-Mail, JSON
- 🖼️ **Image Support** - Upload images via Vision LLM (JPG, PNG, GIF, WebP, TIFF)
- 🧩 **OpenAI-compatible API** - works with existing OpenAI SDKs
- 👥 **Multi-User** - Store permissions per user
- 🖥️ **Admin UI** - Manage users, stores and permissions
@@ -20,20 +21,22 @@ A vector store service built on top of [LiteLLM](https://github.com/BerriAI/lite
Client (API Key)
LiteLLM Proxy ──────────────────────┐
│ │
▼ ▼
Vector Store API Embedding Models
(via LiteLLM)
PostgreSQL + pgvector
LiteLLM Proxy ──────────────────────────────
Vector Store API LiteLLM Models
┌──────────────────┐
│ Embedding Models │
PostgreSQL + pgvector │ Vision Models │
│ LLM Models │
└──────────────────┘
```
## Requirements
- Kubernetes Cluster
- PostgreSQL with pgvector extension
- LiteLLM Proxy (deployed)
- PostgreSQL with pgvector extension (already deployed)
- LiteLLM Proxy (already deployed)
- Container Registry
## Quick Start
@@ -106,6 +109,7 @@ EOF
### 3. Configure
```bash
# Create secrets
kubectl create secret generic vector-api-secrets \
--namespace vector-store \
--from-literal=DATABASE_URL="postgresql://vecuser:pass@postgres:5432/vectordb" \
@@ -124,16 +128,17 @@ data:
ADMIN_USER_IDS: "your-admin-user-id"
API_URL: "https://api.your-domain.com"
EMBEDDING_MODEL: "your-embedding-model"
VISION_MODEL: "openai/gpt-4o-mini"
```
### 4. Build & Deploy
```bash
# API
# Build & push API
docker build -t your-registry/vector-store-api:1.0.0 .
docker push your-registry/vector-store-api:1.0.0
# Admin UI
# Build & push Admin UI
docker build \
-t your-registry/vector-store-admin:1.0.0 \
./ui
@@ -165,6 +170,7 @@ litellm-vector-store/
│ │ └── openai_compat.py # OpenAI-compatible API
│ └── utils/
│ ├── chunking.py # Text chunking
│ ├── image_processor.py # Vision LLM integration
│ └── stats.py # Usage tracking
├── ui/ # React Admin UI
│ ├── src/
@@ -216,6 +222,10 @@ Authorization: Bearer sk-your-api-key
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/v1/models` | List all models |
| `GET` | `/v1/embeddings/models` | List embedding models |
| `GET` | `/v1/vision/models` | List vision models |
| `POST` | `/v1/embeddings` | Create embeddings |
| `POST` | `/v1/vector_stores` | Create store |
| `GET` | `/v1/vector_stores` | List stores |
| `GET` | `/v1/vector_stores/{id}` | Get store |
@@ -223,21 +233,21 @@ Authorization: Bearer sk-your-api-key
| `POST` | `/v1/vector_stores/{id}/files` | Add texts |
| `GET` | `/v1/vector_stores/{id}/files` | List files |
| `DELETE` | `/v1/vector_stores/{id}/files/{file_id}` | Delete file |
| `POST` | `/v1/vector_stores/{id}/upload` | Upload file |
| `POST` | `/v1/vector_stores/{id}/search` | Search |
| `POST` | `/v1/vector_stores/{id}/upload` | Upload file or image |
| `POST` | `/v1/vector_stores/{id}/search` | Semantic search |
| `POST` | `/v1/vector_stores/{id}/rag` | RAG query |
| `POST` | `/v1/embeddings` | Create embeddings |
| `GET` | `/v1/embeddings/models` | List embedding models |
| `GET` | `/v1/models` | List all models |
### Example
### Examples
#### Store anlegen & Datei hochladen
```python
import httpx
client = httpx.Client(
base_url="https://api.your-domain.com/v1",
headers={"Authorization": "Bearer sk-your-key"}
headers={"Authorization": "Bearer sk-your-key"},
timeout=120.0
)
# Create store
@@ -246,27 +256,129 @@ store = client.post(
json={"name": "My Knowledge Base"}
).json()
# Upload file
# Upload document
with open("document.pdf", "rb") as f:
client.post(
f"/vector_stores/{store['id']}/upload",
files={"file": f}
)
# Upload image (with default vision model)
with open("screenshot.png", "rb") as f:
client.post(
f"/vector_stores/{store['id']}/upload",
files={"file": f}
)
# Upload image (with custom vision model)
with open("diagram.png", "rb") as f:
client.post(
f"/vector_stores/{store['id']}/upload",
files={"file": f},
data={
"vision_model": "openai/gpt-4o",
"vision_prompt": "Explain this diagram in detail."
}
)
# Search
results = client.post(
f"/vector_stores/{store['id']}/search",
json={"query": "What is FastAPI?", "top_k": 3}
json={
"query": "What is FastAPI?",
"top_k": 3,
"rerank": True
}
).json()
# RAG
answer = client.post(
f"/vector_stores/{store['id']}/rag",
json={"query": "What is FastAPI?"}
json={
"query": "What is FastAPI?",
"model": "openai/gpt-4o-mini",
"rerank": True
}
).json()
print(answer["answer"])
```
#### JavaScript / TypeScript
```javascript
const API_KEY = "sk-your-api-key";
const BASE_URL = "https://api.your-domain.com/v1";
const HEADERS = {
"Authorization": `Bearer ${API_KEY}`,
"Content-Type": "application/json"
};
// Create store
const store = await fetch(`${BASE_URL}/vector_stores`, {
method: "POST",
headers: HEADERS,
body: JSON.stringify({ name: "My Store" })
}).then(r => r.json());
// Search
const results = await fetch(
`${BASE_URL}/vector_stores/${store.id}/search`, {
method: "POST",
headers: HEADERS,
body: JSON.stringify({
query: "What is FastAPI?",
top_k: 3,
rerank: true
})
}).then(r => r.json());
// RAG
const answer = await fetch(
`${BASE_URL}/vector_stores/${store.id}/rag`, {
method: "POST",
headers: HEADERS,
body: JSON.stringify({
query: "What is FastAPI?"
})
}).then(r => r.json());
console.log(answer.answer);
```
#### curl
```bash
# Create store
curl -X POST https://api.your-domain.com/v1/vector_stores \
-H "Authorization: Bearer sk-your-key" \
-H "Content-Type: application/json" \
-d '{"name": "My Store"}'
# Upload document
curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/upload \
-H "Authorization: Bearer sk-your-key" \
-F "file=@document.pdf"
# Upload image with custom vision model
curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/upload \
-H "Authorization: Bearer sk-your-key" \
-F "file=@diagram.png" \
-F "vision_model=openai/gpt-4o" \
-F "vision_prompt=Explain this diagram in detail."
# Search
curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/search \
-H "Authorization: Bearer sk-your-key" \
-H "Content-Type: application/json" \
-d '{"query": "What is FastAPI?", "top_k": 3, "rerank": true}'
# RAG
curl -X POST https://api.your-domain.com/v1/vector_stores/{store_id}/rag \
-H "Authorization: Bearer sk-your-key" \
-H "Content-Type: application/json" \
-d '{"query": "What is FastAPI?", "model": "openai/gpt-4o-mini"}'
```
## Configuration Reference
### Environment Variables
@@ -278,15 +390,54 @@ print(answer["answer"])
| `LITELLM_MASTER_KEY` | ✅ | — | LiteLLM master key |
| `ADMIN_USER_IDS` | ✅ | — | Comma-separated admin user IDs |
| `EMBEDDING_MODEL` | ❌ | `text-embedding-ada-002` | Default embedding model |
| `VISION_MODEL` | ❌ | `openai/gpt-4o-mini` | Default vision model |
### Upload Parameters
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `file` | file | — | File to upload |
| `chunk_size` | int | 512 | Characters per chunk |
| `chunk_overlap` | int | 50 | Overlap between chunks |
| `vision_model` | string | Config default | Vision model for images |
| `vision_prompt` | string | Auto | Custom prompt for vision model |
### Search Parameters
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `query` | string | — | Search query |
| `top_k` | int | 5 | Number of results (max. 50) |
| `rerank` | bool | false | Enable reranking |
| `rerank_model` | string | Auto | Custom rerank model |
### RAG Parameters
| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `query` | string | — | Question |
| `model` | string | cosair/gemma4:31b | LLM model |
| `top_k` | int | 5 | Context documents |
| `rerank` | bool | false | Enable reranking |
| `system_prompt` | string | Auto | Custom system prompt |
| `messages` | array | [] | Chat history |
### Supported File Formats
| Format | Extension | Notes |
|--------|-----------|-------|
| Text | `.txt` | UTF-8 encoded |
| Markdown | `.md` | Standard Markdown |
| PDF | `.pdf` | Text PDFs only, no scans |
| Word | `.docx` | Microsoft Word 2007+ |
| Markdown | `.md` | Standard Markdown |
| Excel | `.xlsx` | All sheets extracted |
| CSV | `.csv` | All columns extracted |
| PowerPoint | `.pptx` | All slides extracted |
| HTML | `.html` `.htm` | Scripts/styles removed |
| Outlook Mail | `.msg` | Including headers |
| E-Mail | `.eml` | Including headers |
| JSON | `.json` | Pretty printed |
| Image | `.jpg` `.jpeg` `.png` `.gif` `.webp` `.tiff` | Via Vision LLM |
### Limits
@@ -320,6 +471,8 @@ DATABASE_URL="postgresql://..." \
LITELLM_PROXY_URL="http://..." \
LITELLM_MASTER_KEY="sk-..." \
ADMIN_USER_IDS="your-id" \
EMBEDDING_MODEL="your-model" \
VISION_MODEL="openai/gpt-4o-mini" \
uvicorn app.main:app --reload
# Run UI locally
@@ -336,6 +489,7 @@ VITE_API_URL=http://localhost:8000 npm run dev
| **Database** | PostgreSQL 16 + pgvector |
| **Auth** | LiteLLM Key Management |
| **Embeddings** | Via LiteLLM Proxy |
| **Vision** | Via LiteLLM Vision Models |
| **Admin UI** | React + TypeScript + Tailwind CSS |
| **Container** | Docker + Kubernetes |
| **Ingress** | NGINX Ingress Controller |

View File

@@ -7,6 +7,12 @@ import pypdf
import docx
import io
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from app.utils.image_processor import (
image_to_text,
is_image,
SUPPORTED_IMAGE_FORMATS,
DEFAULT_PROMPT
)
from pydantic import BaseModel, Field
from typing import Optional
from app.auth import verify_api_key
@@ -14,6 +20,12 @@ from app.database import get_db
from app.utils.stats import track_usage
from app.utils.chunking import chunk_text
SUPPORTED_FORMATS = (
".txt .md .pdf .docx .xlsx .csv "
".pptx .html .htm .msg .eml .json "
+ " ".join(SUPPORTED_IMAGE_FORMATS)
)
router = APIRouter()
logger = logging.getLogger(__name__)
@@ -691,59 +703,169 @@ async def rag(
async def upload_file(
store_id: str,
file: UploadFile = File(...),
chunk_size: int = Form(default=512),
chunk_overlap: int = Form(default=50),
chunk_size: int = Form(default=512),
chunk_overlap: int = Form(default=50),
vision_prompt: str = Form(default=DEFAULT_PROMPT),
vision_model: str = Form(default=None),
user: dict = Depends(verify_api_key),
db=Depends(get_db)
):
"""Datei hochladen, chunken und in Vector Store speichern"""
start = time.time()
start = time.time()
await _check_access(db, store_id, user["user_id"])
content = await file.read()
filename = file.filename.lower()
try:
if filename.endswith(".pdf"):
pdf = pypdf.PdfReader(io.BytesIO(content))
text = "\n".join(
if is_image(filename):
# Modell validieren falls angegeben
if vision_model:
await validate_vision_model(vision_model, user["token"])
use_model = vision_model
else:
use_model = None
text = await image_to_text(
content=content,
filename=filename,
token=user["token"],
model=use_model,
prompt=vision_prompt
)
chunks = [{"text": text, "index": 0, "start": 0}]
elif filename.endswith((".txt", ".md")):
text = content.decode("utf-8")
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".pdf"):
import pypdf, io
pdf = pypdf.PdfReader(io.BytesIO(content))
text = "\n".join(
page.extract_text()
for page in pdf.pages
if page.extract_text()
)
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".docx"):
doc = docx.Document(io.BytesIO(content))
text = "\n".join(
p.text for p in doc.paragraphs if p.text.strip()
import docx, io
doc = docx.Document(io.BytesIO(content))
text = "\n".join(
p.text for p in doc.paragraphs
if p.text.strip()
)
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".txt"):
text = content.decode("utf-8")
elif filename.endswith(".xlsx"):
import openpyxl, io
wb = openpyxl.load_workbook(io.BytesIO(content))
lines = []
for sheet in wb.worksheets:
lines.append(f"=== Tabelle: {sheet.title} ===")
for row in sheet.iter_rows(values_only=True):
if any(cell is not None for cell in row):
lines.append(
" | ".join(
str(c) for c in row if c is not None
)
)
text = "\n".join(lines)
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".md"):
text = content.decode("utf-8")
elif filename.endswith(".csv"):
import csv, io
reader = csv.reader(
io.StringIO(content.decode("utf-8"))
)
text = "\n".join(
" | ".join(row)
for row in reader
if any(cell.strip() for cell in row)
)
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".pptx"):
from pptx import Presentation
import io
prs = Presentation(io.BytesIO(content))
lines = []
for i, slide in enumerate(prs.slides):
lines.append(f"=== Folie {i+1} ===")
for shape in slide.shapes:
if hasattr(shape, "text") and shape.text.strip():
lines.append(shape.text)
text = "\n".join(lines)
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith((".html", ".htm")):
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
for tag in soup(["script", "style", "nav", "footer"]):
tag.decompose()
text = soup.get_text(separator="\n", strip=True)
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".msg"):
import extract_msg, io
msg = extract_msg.Message(io.BytesIO(content))
text = "\n".join(filter(None, [
f"Von: {msg.sender}",
f"An: {msg.to}",
f"Betreff: {msg.subject}",
f"Datum: {msg.date}",
"" * 40,
msg.body
]))
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".eml"):
import email
msg = email.message_from_bytes(content)
body = ""
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(
decode=True
).decode("utf-8", errors="ignore")
break
else:
body = msg.get_payload(
decode=True
).decode("utf-8", errors="ignore")
text = "\n".join(filter(None, [
f"Von: {msg.get('From')}",
f"An: {msg.get('To')}",
f"Betreff: {msg.get('Subject')}",
f"Datum: {msg.get('Date')}",
"" * 40,
body
]))
chunks = chunk_text(text, chunk_size, chunk_overlap)
elif filename.endswith(".json"):
import json as jsonlib
data = jsonlib.loads(content.decode("utf-8"))
text = jsonlib.dumps(data, indent=2, ensure_ascii=False)
chunks = chunk_text(text, chunk_size, chunk_overlap)
else:
raise HTTPException(
400,
f"Nicht unterstütztes Format: {file.filename}. "
f"Unterstützt: .pdf, .docx, .txt, .md"
f"Unterstützt: {SUPPORTED_FORMATS}"
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(422, f"Datei konnte nicht gelesen werden: {e}")
raise HTTPException(
422,
f"Datei konnte nicht gelesen werden: {e}"
)
if not text.strip():
raise HTTPException(422, "Datei enthaelt keinen Text")
chunks = chunk_text(
text=text,
chunk_size=chunk_size,
overlap=chunk_overlap
)
if not any(c["text"].strip() for c in chunks):
raise HTTPException(422, "Datei enthält keinen Text")
ids = []
failed = 0
@@ -752,14 +874,16 @@ async def upload_file(
try:
embedding = await _embed(chunk["text"], user["token"])
doc_id = await db.fetchval(
"""INSERT INTO documents (store_id, content, metadata, embedding)
"""INSERT INTO documents
(store_id, content, metadata, embedding)
VALUES ($1, $2, $3, $4::vector) RETURNING id""",
store_id,
chunk["text"],
json.dumps({
"source": file.filename,
"chunk": chunk["index"],
"start": chunk.get("start", 0),
"source": file.filename,
"type": "image" if is_image(filename) else "document",
"chunk": chunk["index"],
"start": chunk.get("start", 0),
}),
str(embedding)
)
@@ -778,6 +902,7 @@ async def upload_file(
return {
"object": "vector_store.file_batch",
"filename": file.filename,
"type": "image" if is_image(filename) else "document",
"counts": {
"completed": len(ids),
"failed": failed,
@@ -785,3 +910,27 @@ async def upload_file(
},
"ids": ids
}
@router.get("/vision/models")
async def list_vision_models(
user: dict = Depends(verify_api_key),
):
"""Alle verfügbaren Vision Modelle"""
all_models = await _get_all_models()
vision_models = [
{
"id": m["id"],
"object": "model",
"owned_by": "system",
"default": m["id"] == VISION_MODEL,
}
for m in all_models
if m.get("supports_vision") is True
]
return {
"object": "list",
"default": VISION_MODEL,
"data": vision_models
}

View File

@@ -0,0 +1,153 @@
import base64
import httpx
import os
import logging
from fastapi import HTTPException
logger = logging.getLogger(__name__)
LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
VISION_MODEL = os.getenv("VISION_MODEL", "openai/gpt-4o-mini")
SUPPORTED_IMAGE_FORMATS = [
".jpg", ".jpeg",
".png",
".gif",
".webp",
".tiff"
]
MIME_TYPES = {
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"gif": "image/gif",
"webp": "image/webp",
"tiff": "image/tiff"
}
DEFAULT_PROMPT = (
"Beschreibe den Inhalt dieses Bildes detailliert. "
"Falls Text vorhanden ist, gib ihn vollstaendig wieder. "
"Falls es ein Diagramm oder Chart ist, erklaere die Daten. "
"Falls es ein Screenshot ist, beschreibe was zu sehen ist. "
"Antworte auf Deutsch."
)
def is_image(filename: str) -> bool:
"""Prueft ob eine Datei ein Bild ist"""
return any(
filename.lower().endswith(ext)
for ext in SUPPORTED_IMAGE_FORMATS
)
async def image_to_text(
content: bytes,
filename: str,
token: str,
model: str = None,
prompt: str = DEFAULT_PROMPT
) -> str:
"""Bild ueber Vision LLM in Text umwandeln"""
use_model = model or VISION_MODEL
ext = filename.lower().split(".")[-1]
mime_type = MIME_TYPES.get(ext, "image/jpeg")
image_b64 = base64.b64encode(content).decode("utf-8")
async with httpx.AsyncClient() as client:
resp = await client.post(
f"{LITELLM_URL}/chat/completions",
headers={
"Authorization": f"Bearer {token}",
"Content-Type": "application/json"
},
json={
"model": use_model,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{image_b64}"
}
}
]
}
],
"max_tokens": 2048
},
timeout=60.0
)
if resp.status_code != 200:
logger.error(f"Vision Fehler: {resp.status_code} - {resp.text}")
raise HTTPException(
502,
f"Bild konnte nicht verarbeitet werden: {resp.text}"
)
return resp.json()["choices"][0]["message"]["content"]
async def validate_vision_model(
model: str,
token: str
) -> str:
"""
Prüft ob das gewählte Modell Vision unterstützt.
Gibt das validierte Modell zurück.
"""
LITELLM_MASTER = os.getenv("LITELLM_MASTER_KEY")
async with httpx.AsyncClient() as client:
try:
resp = await client.get(
f"{LITELLM_URL}/model_group/info",
headers={"Authorization": f"Bearer {LITELLM_MASTER}"},
timeout=10.0
)
except httpx.RequestError as e:
raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}")
if resp.status_code != 200:
raise HTTPException(502, "Modelle konnten nicht abgerufen werden")
models = {
m.get("model_group"): m
for m in resp.json().get("data", [])
}
if model not in models:
raise HTTPException(404, {
"error": {
"message": f"Modell '{model}' nicht gefunden",
"type": "invalid_request_error",
"code": "model_not_found"
}
})
if not models[model].get("supports_vision"):
vision_models = [
m.get("model_group")
for m in resp.json().get("data", [])
if m.get("supports_vision")
]
raise HTTPException(400, {
"error": {
"message": (
f"Modell '{model}' unterstützt kein Vision. "
f"Verfügbare Vision Modelle: "
f"{', '.join(vision_models)}"
),
"type": "invalid_request_error",
"code": "model_not_supported"
}
})
return model

View File

@@ -8,3 +8,4 @@ data:
ADMIN_USER_IDS: "default_user_id"
API_URL: "https://api.vector.cosair.de"
EMBEDDING_MODEL: "cosair/multilingual-e5-large-instruct"
VISION_MODEL: "cosair/gemma4:31b"

View File

@@ -43,6 +43,12 @@ spec:
name: vector-store-config
key: ADMIN_USER_IDS
- name: VISION_MODEL
valueFrom:
configMapKeyRef:
name: vector-store-config
key: VISION_MODEL
readinessProbe:
httpGet:
path: /health

View File

@@ -9,3 +9,7 @@ pgvector==0.3.0
tenacity==8.3.0
pypdf==4.2.0
python-docx==1.1.0
openpyxl==3.1.2
python-pptx==0.6.23
beautifulsoup4==4.12.3
extract-msg==0.48.0