Initial commit

2026-04-29 08:17:35 +00:00
commit ef55253cbd
49 changed files with 3073 additions and 0 deletions
--- a/app/init.py
+++ b/app/init.py
--- a/app/auth.py
+++ b/app/auth.py
@@ -0,0 +1,53 @@
+import httpx
+import os
+import logging
+from fastapi import HTTPException, Header
+
+logger = logging.getLogger(__name__)
+
+LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
+MASTER_KEY  = os.getenv("LITELLM_MASTER_KEY")
+
+async def verify_api_key(authorization: str = Header(...)) -> dict:
+    token = authorization.removeprefix("Bearer ")
+
+    async with httpx.AsyncClient() as client:
+        try:
+            # Master Key nutzen um Key-Info abzufragen
+            resp = await client.get(
+                f"{LITELLM_URL}/key/info",
+                headers={
+                    "Authorization": f"Bearer {MASTER_KEY}"
+                },
+                params={"key": token},
+                timeout=5.0
+            )
+        except httpx.RequestError as e:
+            logger.error(f"LiteLLM nicht erreichbar: {e}")
+            raise HTTPException(503, f"Auth service unavailable: {e}")
+
+    logger.debug(f"LiteLLM Status:   {resp.status_code}")
+    logger.debug(f"LiteLLM Response: {resp.text}")
+
+    if resp.status_code == 404:
+        raise HTTPException(401, "Invalid API Key")
+    if resp.status_code == 401:
+        raise HTTPException(401, "Invalid API Key")
+    if resp.status_code != 200:
+        raise HTTPException(502, f"Auth service error: {resp.status_code}")
+
+    data = resp.json()
+
+    user_id = (
+        data.get("info", {}).get("user_id") or
+        data.get("user_id")
+    )
+
+    if not user_id:
+        raise HTTPException(400, "API Key hat keine user_id")
+
+    return {
+        "user_id":   user_id,
+        "token":     token,
+        "key_alias": data.get("info", {}).get("key_alias"),
+    }
--- a/app/database.py
+++ b/app/database.py
@@ -0,0 +1,32 @@
+import asyncpg
+import os
+from tenacity import retry, stop_after_attempt, wait_fixed
+
+pool: asyncpg.Pool = None
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(3))
+async def init_db():
+    global pool
+
+    url = os.getenv("DATABASE_URL")
+    if not url:
+        raise ValueError("DATABASE_URL nicht gesetzt!")
+
+    pool = await asyncpg.create_pool(
+        dsn=url,
+        min_size=2,
+        max_size=10
+    )
+    async with pool.acquire() as conn:
+        await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
+
+    print(f"✅ Datenbank verbunden")
+
+async def close_db():
+    global pool
+    if pool:
+        await pool.close()
+
+async def get_db():
+    async with pool.acquire() as conn:
+        yield conn
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,35 @@
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+from app.database import init_db, close_db
+from app.routers import stores, documents, admin, openai_compat
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    await init_db()
+    yield
+    await close_db()
+
+app = FastAPI(
+    title="Vector Store API",
+    version="1.0.0",
+    lifespan=lifespan
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["https://admin.vector.cosair.de"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(stores.router,    prefix="/stores",    tags=["Stores"])
+app.include_router(documents.router, prefix="/documents", tags=["Documents"])
+app.include_router(admin.router,     prefix="/admin",     tags=["Admin"])
+
+app.include_router(openai_compat.router, prefix="/v1", tags=["OpenAI Compatible"])
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
--- a/app/middleware/rate_limit.py
+++ b/app/middleware/rate_limit.py
@@ -0,0 +1,35 @@
+from fastapi import Request, HTTPException
+from collections import defaultdict
+import time
+
+# Einfaches In-Memory Rate Limiting
+request_counts: dict = defaultdict(list)
+
+RATE_LIMITS = {
+    "search": (100, 60),   # 100 Requests pro 60 Sekunden
+    "upsert": (50,  60),
+    "embed":  (200, 60),
+    "rag":    (20,  60),
+}
+
+def check_rate_limit(user_id: str, action: str):
+    limit, window = RATE_LIMITS.get(action, (100, 60))
+    now           = time.time()
+    key           = f"{user_id}:{action}"
+
+    # Alte Requests entfernen
+    request_counts[key] = [
+        t for t in request_counts[key]
+        if now - t < window
+    ]
+
+    if len(request_counts[key]) >= limit:
+        raise HTTPException(429, {
+            "error": {
+                "message": f"Rate limit erreicht: {limit} Requests pro {window}s",
+                "type":    "rate_limit_error",
+                "code":    "rate_limit_exceeded"
+            }
+        })
+
+    request_counts[key].append(now)
--- a/app/models.py
+++ b/app/models.py
@@ -0,0 +1,27 @@
+from pydantic import BaseModel, Field
+from typing import Optional
+from uuid import UUID
+
+class StoreCreate(BaseModel):
+    name: str = Field(..., min_length=1, max_length=255)
+
+class StoreResponse(BaseModel):
+    store_id: UUID
+    name: str
+
+class UpsertRequest(BaseModel):
+    store_id: UUID
+    texts: list[str] = Field(..., min_length=1)
+    metadata: list[dict] = []
+
+class QueryRequest(BaseModel):
+    store_id: UUID
+    query: str = Field(..., min_length=1)
+    top_k: int = Field(default=5, ge=1, le=50)
+    filter: Optional[dict] = None
+
+class QueryResult(BaseModel):
+    id: UUID
+    content: str
+    metadata: dict
+    similarity: float
--- a/app/routers/init.py
+++ b/app/routers/init.py
--- a/app/routers/admin.py
+++ b/app/routers/admin.py
@@ -0,0 +1,186 @@
+from fastapi import APIRouter, Depends, HTTPException
+from app.auth import verify_api_key
+from app.database import get_db
+import httpx
+import os
+
+router = APIRouter()
+
+LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
+MASTER_KEY  = os.getenv("LITELLM_MASTER_KEY")
+ADMIN_IDS   = os.getenv("ADMIN_USER_IDS", "").split(",")
+
+
+# --- Admin-Check ---
+async def require_admin(user: dict = Depends(verify_api_key)):
+    if user["user_id"] not in ADMIN_IDS:
+        raise HTTPException(403, "Admin-Zugriff erforderlich")
+    return user
+
+
+# --- Stats ---
+@router.get("/stats")
+async def get_stats(
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    stats = await db.fetchrow(
+        """SELECT
+               (SELECT COUNT(*) FROM vector_stores)       AS total_stores,
+               (SELECT COUNT(*) FROM documents)           AS total_documents,
+               (SELECT COUNT(DISTINCT owner_user_id)
+                FROM vector_stores)                       AS total_users,
+               (SELECT COUNT(*) FROM store_permissions)   AS total_permissions"""
+    )
+    return dict(stats)
+
+
+# --- User Endpoints ---
+@router.get("/users")
+async def list_users(
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    rows = await db.fetch(
+        """SELECT
+               owner_user_id  AS user_id,
+               COUNT(id)      AS store_count,
+               MAX(created_at) AS last_activity
+           FROM vector_stores
+           GROUP BY owner_user_id
+           ORDER BY last_activity DESC"""
+    )
+    return [dict(r) for r in rows]
+
+
+@router.get("/users/{user_id}/stores")
+async def get_user_stores(
+    user_id: str,
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    rows = await db.fetch(
+        """SELECT
+               vs.id,
+               vs.name,
+               vs.created_at,
+               COUNT(d.id) AS document_count
+           FROM vector_stores vs
+           LEFT JOIN documents d ON d.store_id = vs.id
+           WHERE vs.owner_user_id = $1
+           GROUP BY vs.id, vs.name, vs.created_at""",
+        user_id
+    )
+    return [dict(r) for r in rows]
+
+
+@router.delete("/users/{user_id}/stores/{store_id}")
+async def admin_delete_store(
+    user_id: str,
+    store_id: str,
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    deleted = await db.fetchval(
+        """DELETE FROM vector_stores
+           WHERE id = $1 AND owner_user_id = $2
+           RETURNING id""",
+        store_id, user_id
+    )
+    if not deleted:
+        raise HTTPException(404, "Store nicht gefunden")
+    return {"deleted": str(deleted)}
+
+
+# --- Permission Endpoints ---
+@router.get("/stores/{store_id}/permissions")
+async def get_permissions(
+    store_id: str,
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    rows = await db.fetch(
+        """SELECT user_id, permission, created_at
+           FROM store_permissions
+           WHERE store_id = $1""",
+        store_id
+    )
+    return [dict(r) for r in rows]
+
+
+@router.post("/stores/{store_id}/permissions")
+async def grant_permission(
+    store_id: str,
+    user_id: str,
+    permission: str = "read",
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    if permission not in ("read", "write", "admin"):
+        raise HTTPException(400, "Ungültige Permission: read, write oder admin")
+
+    await db.execute(
+        """INSERT INTO store_permissions (store_id, user_id, permission)
+           VALUES ($1, $2, $3)
+           ON CONFLICT (store_id, user_id)
+           DO UPDATE SET permission = $3""",
+        store_id, user_id, permission
+    )
+    return {"granted": permission, "user_id": user_id}
+
+
+@router.delete("/stores/{store_id}/permissions/{user_id}")
+async def revoke_permission(
+    store_id: str,
+    user_id: str,
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    await db.execute(
+        "DELETE FROM store_permissions WHERE store_id=$1 AND user_id=$2",
+        store_id, user_id
+    )
+    return {"revoked": user_id}
+
+
+# --- Key Management ---
+@router.post("/users/{user_id}/rotate-key")
+async def rotate_key(
+    user_id: str,
+    admin=Depends(require_admin),
+    db=Depends(get_db)
+):
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{LITELLM_URL}/key/generate",
+            headers={"Authorization": f"Bearer {MASTER_KEY}"},
+            json={
+                "user_id":   user_id,
+                "key_alias": f"{user_id}-rotated"
+            }
+        )
+    if resp.status_code != 200:
+        raise HTTPException(500, "Key-Rotation fehlgeschlagen")
+
+    store_count = await db.fetchval(
+        "SELECT COUNT(*) FROM vector_stores WHERE owner_user_id=$1",
+        user_id
+    )
+    return {
+        "new_key":          resp.json()["key"],
+        "user_id":          user_id,
+        "stores_preserved": store_count
+    }
+
+@router.get("/verify")
+async def verify_admin(
+    admin=Depends(require_admin),
+):
+    """
+    Prüft ob der API Key Admin-Rechte hat.
+    Gibt 200 zurück wenn Admin, 403 wenn nicht.
+    """
+    return {
+        "admin":   True,
+        "user_id": admin["user_id"],
+    }
--- a/app/routers/chunking.py
+++ b/app/routers/chunking.py
@@ -0,0 +1,41 @@
+from typing import Optional
+
+def chunk_text(
+    text:        str,
+    chunk_size:  int = 512,
+    overlap:     int = 50,
+    separator:   str = "\n"
+) -> list[dict]:
+    """
+    Text in ueberlappende Chunks aufteilen
+    chunk_size:  Max Zeichen pro Chunk
+    overlap:     Ueberlappung zwischen Chunks
+    separator:   Trennzeichen fuer saubere Splits
+    """
+    # Zuerst an Separatoren splitten
+    paragraphs = text.split(separator)
+    chunks     = []
+    current    = ""
+    index      = 0
+
+    for para in paragraphs:
+        if len(current) + len(para) > chunk_size and current:
+            chunks.append({
+                "text":  current.strip(),
+                "index": index,
+                "start": text.find(current.strip()),
+            })
+            # Overlap: letzten Teil behalten
+            current = current[-overlap:] + para
+            index  += 1
+        else:
+            current += separator + para
+
+    if current.strip():
+        chunks.append({
+            "text":  current.strip(),
+            "index": index,
+            "start": text.find(current.strip()),
+        })
+
+    return chunks
--- a/app/routers/documents.py
+++ b/app/routers/documents.py
@@ -0,0 +1,113 @@
+import json
+import httpx
+import os
+import logging
+from fastapi import APIRouter, Depends, HTTPException
+from app.auth import verify_api_key
+from app.database import get_db
+from app.models import UpsertRequest, QueryRequest
+
+router = APIRouter()
+logger = logging.getLogger(__name__)
+
+LITELLM_URL     = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
+EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL",    "text-embedding-ada-002")
+
+
+async def _embed(text: str, token: str) -> list[float]:
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{LITELLM_URL}/embeddings",
+            headers={
+                "Authorization": f"Bearer {token}",
+                "Content-Type":  "application/json"
+            },
+            json={
+                "model": EMBEDDING_MODEL,
+                "input": text
+            },
+            timeout=30.0
+        )
+
+    if resp.status_code != 200:
+        logger.error(f"Embedding Fehler: {resp.status_code} - {resp.text}")
+        raise HTTPException(
+            502,
+            f"Embedding fehlgeschlagen: {resp.status_code} - {resp.text}"
+        )
+
+    return resp.json()["data"][0]["embedding"]
+
+
+async def _check_access(db, store_id: str, user_id: str):
+    row = await db.fetchrow(
+        "SELECT owner_user_id FROM vector_stores WHERE id=$1", store_id
+    )
+    if not row:
+        raise HTTPException(404, "Store nicht gefunden")
+    if row["owner_user_id"] != user_id:
+        shared = await db.fetchval(
+            "SELECT 1 FROM store_permissions WHERE store_id=$1 AND user_id=$2",
+            store_id, user_id
+        )
+        if not shared:
+            raise HTTPException(403, "Kein Zugriff")
+
+
+@router.post("/upsert")
+async def upsert(
+    body: UpsertRequest,
+    user: dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    await _check_access(db, str(body.store_id), user["user_id"])
+
+    ids = []
+    for i, text in enumerate(body.texts):
+        embedding = await _embed(text, user["token"])
+        meta      = body.metadata[i] if i < len(body.metadata) else {}
+
+        doc_id = await db.fetchval(
+            """INSERT INTO documents (store_id, content, metadata, embedding)
+               VALUES ($1, $2, $3, $4::vector) RETURNING id""",
+            str(body.store_id),
+            text,
+            json.dumps(meta),
+            str(embedding)
+        )
+        ids.append(str(doc_id))
+
+    return {"inserted": len(ids), "ids": ids}
+
+
+@router.post("/query")
+async def query(
+    body: QueryRequest,
+    user: dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    await _check_access(db, str(body.store_id), user["user_id"])
+
+    q_emb = await _embed(body.query, user["token"])
+
+    rows = await db.fetch(
+        """SELECT id, content, metadata,
+                  1 - (embedding <=> $1::vector) AS similarity
+           FROM documents
+           WHERE store_id = $2
+           ORDER BY embedding <=> $1::vector
+           LIMIT $3""",
+        str(q_emb),
+        str(body.store_id),
+        body.top_k
+    )
+
+    return {"results": [
+        {
+            "id":         str(r["id"]),
+            "content":    r["content"],
+            "metadata":   r["metadata"],
+            "similarity": float(r["similarity"])
+        }
+        for r in rows
+    ]}
--- a/app/routers/files.py
+++ b/app/routers/files.py
@@ -0,0 +1,35 @@
+from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
+import pypdf
+import docx
+import io
+
+router = APIRouter()
+
+async def extract_text(file: UploadFile) -> str:
+    """Text aus verschiedenen Dateiformaten extrahieren"""
+    content = await file.read()
+
+    if file.filename.endswith(".pdf"):
+        pdf    = pypdf.PdfReader(io.BytesIO(content))
+        return "\n".join(page.extract_text() for page in pdf.pages)
+
+    elif file.filename.endswith(".docx"):
+        doc    = docx.Document(io.BytesIO(content))
+        return "\n".join(p.text for p in doc.paragraphs)
+
+    elif file.filename.endswith(".txt"):
+        return content.decode("utf-8")
+
+    else:
+        raise HTTPException(400, f"Nicht unterstütztes Format: {file.filename}")
+
+
+@router.post("/v1/vector_stores/{store_id}/upload")
+async def upload_file(
+    store_id: str,
+    file:     UploadFile = File(...),
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    text   = await extract_text(file)
+    chunks = chunk_text(text)
--- a/app/routers/openai_compat.py
+++ b/app/routers/openai_compat.py
@@ -0,0 +1,787 @@
+import json
+import httpx
+import os
+import logging
+import time
+import pypdf
+import docx
+import io
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
+from pydantic import BaseModel, Field
+from typing import Optional
+from app.auth import verify_api_key
+from app.database import get_db
+from app.utils.stats import track_usage
+from app.utils.chunking import chunk_text
+
+router = APIRouter()
+logger = logging.getLogger(__name__)
+
+LITELLM_URL     = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
+LITELLM_MASTER  = os.getenv("LITELLM_MASTER_KEY")
+EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL",   "cosair/multilingual-e5-large-instruct")
+
+
+class VectorStoreCreate(BaseModel):
+    name:     str
+    metadata: dict = {}
+
+class VectorStoreResponse(BaseModel):
+    id:         str
+    object:     str = "vector_store"
+    name:       str
+    metadata:   dict = {}
+    created_at: int
+
+class FileUploadRequest(BaseModel):
+    texts:    list[str]
+    metadata: list[dict] = []
+
+class SearchRequest(BaseModel):
+    query:        str
+    top_k:        int = Field(default=5, ge=1, le=50)
+    rerank:       bool = False
+    rerank_model: Optional[str] = None
+    filters:      Optional[dict] = None
+
+class EmbeddingRequest(BaseModel):
+    input:           str | list[str]
+    model:           Optional[str] = None
+    encoding_format: Optional[str] = "float"
+
+class RAGRequest(BaseModel):
+    query:         str
+    model:         str = "cosair/gemma4:31b"
+    top_k:         int = 5
+    rerank:        bool = False
+    system_prompt: Optional[str] = None
+    messages:      list[dict] = []
+
+
+# Hilfsfunktionen
+
+def is_embedding_model(model: dict) -> bool:
+    """Prueft ob ein Modell ein Embedding Modell ist - nur ueber mode"""
+    mode = (
+        model.get("mode") or
+        model.get("model_info", {}).get("mode")
+    )
+    return mode == "embedding"
+
+
+async def _get_all_models() -> list[dict]:
+    """
+    Alle Modelle mit Master Key holen.
+    Master Key gibt korrekte mode Infos fuer alle Modelle zurueck.
+    """
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(
+                f"{LITELLM_URL}/model_group/info",
+                headers={"Authorization": f"Bearer {LITELLM_MASTER}"},
+                timeout=10.0
+            )
+        except httpx.RequestError as e:
+            raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}")
+
+    if resp.status_code != 200:
+        raise HTTPException(502, f"Modelle konnten nicht abgerufen werden: {resp.text}")
+
+    models = []
+    for m in resp.json().get("data", []):
+        models.append({
+            **m,
+            "id": m.get("model_group", m.get("id", "")),
+        })
+
+    return models
+
+
+async def _embed(
+    text:  str,
+    token: str,
+    model: Optional[str] = None
+) -> list[float]:
+    """Embedding ueber LiteLLM generieren"""
+    use_model = model or EMBEDDING_MODEL
+
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{LITELLM_URL}/embeddings",
+            headers={
+                "Authorization": f"Bearer {token}",
+                "Content-Type":  "application/json"
+            },
+            json={
+                "model": use_model,
+                "input": text
+            },
+            timeout=30.0
+        )
+
+    if resp.status_code != 200:
+        logger.error(f"Embedding Fehler: {resp.status_code} - {resp.text}")
+        raise HTTPException(502, f"Embedding fehlgeschlagen: {resp.text}")
+
+    return resp.json()["data"][0]["embedding"]
+
+
+async def _check_access(db, store_id: str, user_id: str):
+    """Zugriff auf Store pruefen"""
+    row = await db.fetchrow(
+        "SELECT owner_user_id FROM vector_stores WHERE id=$1", store_id
+    )
+    if not row:
+        raise HTTPException(404, detail={
+            "error": {
+                "message": f"No vector store found with id '{store_id}'",
+                "type":    "invalid_request_error",
+                "code":    "not_found"
+            }
+        })
+    if row["owner_user_id"] != user_id:
+        shared = await db.fetchval(
+            "SELECT 1 FROM store_permissions WHERE store_id=$1 AND user_id=$2",
+            store_id, user_id
+        )
+        if not shared:
+            raise HTTPException(403, detail={
+                "error": {
+                    "message": "You don't have access to this vector store",
+                    "type":    "invalid_request_error",
+                    "code":    "permission_denied"
+                }
+            })
+
+
+async def _rerank(
+    query:   str,
+    results: list[dict],
+    model:   str,
+    token:   str
+) -> list[dict]:
+    """Ergebnisse mit Reranker neu sortieren"""
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{LITELLM_URL}/rerank",
+            headers={"Authorization": f"Bearer {token}"},
+            json={
+                "model":     model,
+                "query":     query,
+                "documents": [r["content"][0]["text"] for r in results]
+            },
+            timeout=30.0
+        )
+
+    if resp.status_code != 200:
+        logger.error(f"Rerank Fehler: {resp.text}")
+        return results
+
+    reranked = resp.json()["results"]
+
+    return [
+        {**results[r["index"]], "score": r["relevance_score"]}
+        for r in sorted(reranked, key=lambda x: x["relevance_score"], reverse=True)
+    ]
+
+
+# Models Endpoints
+
+@router.get("/models")
+async def list_models(
+    user: dict = Depends(verify_api_key),
+):
+    """Alle verfuegbaren Modelle von LiteLLM"""
+    models = await _get_all_models()
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id":       m["id"],
+                "object":   "model",
+                "mode":     m.get("mode"),
+                "owned_by": "system",
+            }
+            for m in models
+        ]
+    }
+
+
+@router.get("/models/{model_id:path}")
+async def get_model(
+    model_id: str,
+    user:     dict = Depends(verify_api_key),
+):
+    """Einzelnes Modell von LiteLLM"""
+    all_models   = await _get_all_models()
+    model_lookup = {m["id"]: m for m in all_models}
+
+    if model_id not in model_lookup:
+        raise HTTPException(404, {
+            "error": {
+                "message": f"Modell '{model_id}' nicht gefunden",
+                "type":    "invalid_request_error",
+                "code":    "not_found"
+            }
+        })
+
+    m = model_lookup[model_id]
+    return {
+        "id":       m["id"],
+        "object":   "model",
+        "mode":     m.get("mode"),
+        "owned_by": "system",
+    }
+
+
+# Embedding Endpoints
+
+@router.get("/embeddings/models")
+async def list_embedding_models(
+    user: dict = Depends(verify_api_key),
+):
+    """Nur Embedding Modelle - gefiltert über mode mit Master Key"""
+    all_models = await _get_all_models()
+
+    embedding_models = [
+        {
+            "id":       m["id"],
+            "object":   "model",
+            "owned_by": "system",
+            "default":  m["id"] == EMBEDDING_MODEL,
+        }
+        for m in all_models
+        if is_embedding_model(m)
+    ]
+
+    return {
+        "object":  "list",
+        "default": EMBEDDING_MODEL,
+        "data":    embedding_models
+    }
+
+
+@router.post("/embeddings")
+async def create_embeddings(
+    body: EmbeddingRequest,
+    user: dict = Depends(verify_api_key),
+):
+    """Embeddings erstellen - einzeln oder als Liste"""
+    start  = time.time()
+    model  = body.model or EMBEDDING_MODEL
+    inputs = body.input if isinstance(body.input, list) else [body.input]
+
+    all_models   = await _get_all_models()
+    model_lookup = {m["id"]: m for m in all_models}
+
+    if model in model_lookup and not is_embedding_model(model_lookup[model]):
+        raise HTTPException(400, {
+            "error": {
+                "message": f"'{model}' ist kein Embedding Modell",
+                "type":    "invalid_request_error",
+                "code":    "invalid_model"
+            }
+        })
+
+    embeddings   = []
+    total_tokens = 0
+
+    async with httpx.AsyncClient() as client:
+        for i, text in enumerate(inputs):
+            resp = await client.post(
+                f"{LITELLM_URL}/embeddings",
+                headers={
+                    "Authorization": f"Bearer {user['token']}",
+                    "Content-Type":  "application/json"
+                },
+                json={"model": model, "input": text},
+                timeout=30.0
+            )
+
+            if resp.status_code != 200:
+                logger.error(f"Embedding Fehler: {resp.status_code} - {resp.text}")
+                raise HTTPException(502, f"Embedding fehlgeschlagen: {resp.text}")
+
+            data          = resp.json()
+            total_tokens += data.get("usage", {}).get("total_tokens", 0)
+            embeddings.append({
+                "object":    "embedding",
+                "index":     i,
+                "embedding": data["data"][0]["embedding"]
+            })
+
+    await track_usage(
+        user_id=user["user_id"],
+        action="embed",
+        tokens=total_tokens,
+        duration=time.time() - start
+    )
+
+    return {
+        "object": "list",
+        "model":  model,
+        "data":   embeddings,
+        "usage":  {
+            "prompt_tokens": total_tokens,
+            "total_tokens":  total_tokens
+        }
+    }
+
+
+# Vector Store Endpoints
+
+@router.post("/vector_stores", response_model=VectorStoreResponse)
+async def create_vector_store(
+    body: VectorStoreCreate,
+    user: dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Neuen Vector Store anlegen"""
+    row = await db.fetchrow(
+        """INSERT INTO vector_stores (name, owner_user_id)
+           VALUES ($1, $2)
+           RETURNING id, name, created_at""",
+        body.name, user["user_id"]
+    )
+    return VectorStoreResponse(
+        id=str(row["id"]),
+        name=row["name"],
+        metadata=body.metadata,
+        created_at=int(row["created_at"].timestamp())
+    )
+
+
+@router.get("/vector_stores")
+async def list_vector_stores(
+    user: dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Alle eigenen Vector Stores auflisten"""
+    rows = await db.fetch(
+        """SELECT vs.id, vs.name, vs.created_at,
+                  COUNT(d.id) AS file_counts
+           FROM vector_stores vs
+           LEFT JOIN documents d ON d.store_id = vs.id
+           WHERE vs.owner_user_id = $1
+           GROUP BY vs.id, vs.name, vs.created_at
+           ORDER BY vs.created_at DESC""",
+        user["user_id"]
+    )
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id":          str(r["id"]),
+                "object":      "vector_store",
+                "name":        r["name"],
+                "created_at":  int(r["created_at"].timestamp()),
+                "file_counts": {"total": r["file_counts"]}
+            }
+            for r in rows
+        ]
+    }
+
+
+@router.get("/vector_stores/{store_id}")
+async def get_vector_store(
+    store_id: str,
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Einzelnen Vector Store abrufen"""
+    await _check_access(db, store_id, user["user_id"])
+
+    row = await db.fetchrow(
+        """SELECT vs.id, vs.name, vs.created_at,
+                  COUNT(d.id) AS file_counts
+           FROM vector_stores vs
+           LEFT JOIN documents d ON d.store_id = vs.id
+           WHERE vs.id = $1
+           GROUP BY vs.id, vs.name, vs.created_at""",
+        store_id
+    )
+    return {
+        "id":          str(row["id"]),
+        "object":      "vector_store",
+        "name":        row["name"],
+        "created_at":  int(row["created_at"].timestamp()),
+        "file_counts": {"total": row["file_counts"]}
+    }
+
+
+@router.delete("/vector_stores/{store_id}")
+async def delete_vector_store(
+    store_id: str,
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Vector Store loeschen"""
+    deleted = await db.fetchval(
+        """DELETE FROM vector_stores
+           WHERE id=$1 AND owner_user_id=$2
+           RETURNING id""",
+        store_id, user["user_id"]
+    )
+    if not deleted:
+        raise HTTPException(404, "Vector store nicht gefunden")
+    return {
+        "id":      store_id,
+        "object":  "vector_store.deleted",
+        "deleted": True
+    }
+
+
+# Files Endpoints
+
+@router.post("/vector_stores/{store_id}/files")
+async def add_files(
+    store_id: str,
+    body:     FileUploadRequest,
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Dokumente in Vector Store einfuegen"""
+    start = time.time()
+    await _check_access(db, store_id, user["user_id"])
+
+    ids = []
+    for i, text in enumerate(body.texts):
+        embedding = await _embed(text, user["token"])
+        meta      = body.metadata[i] if i < len(body.metadata) else {}
+
+        doc_id = await db.fetchval(
+            """INSERT INTO documents (store_id, content, metadata, embedding)
+               VALUES ($1, $2, $3, $4::vector) RETURNING id""",
+            store_id, text, json.dumps(meta), str(embedding)
+        )
+        ids.append(str(doc_id))
+
+    await track_usage(
+        user_id=user["user_id"],
+        action="upsert",
+        store_id=store_id,
+        duration=time.time() - start
+    )
+
+    return {
+        "object": "vector_store.file_batch",
+        "counts": {
+            "completed": len(ids),
+            "failed":    0,
+            "total":     len(body.texts)
+        },
+        "ids": ids
+    }
+
+
+@router.get("/vector_stores/{store_id}/files")
+async def list_files(
+    store_id: str,
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Alle Dokumente eines Vector Stores auflisten"""
+    await _check_access(db, store_id, user["user_id"])
+
+    rows = await db.fetch(
+        """SELECT id, content, metadata, created_at
+           FROM documents
+           WHERE store_id=$1
+           ORDER BY created_at DESC""",
+        store_id
+    )
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id":         str(r["id"]),
+                "object":     "vector_store.file",
+                "content":    r["content"][:100] + "...",
+                "metadata":   r["metadata"],
+                "created_at": int(r["created_at"].timestamp())
+            }
+            for r in rows
+        ]
+    }
+
+
+@router.delete("/vector_stores/{store_id}/files/{file_id}")
+async def delete_file(
+    store_id: str,
+    file_id:  str,
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Einzelnes Dokument loeschen"""
+    await _check_access(db, store_id, user["user_id"])
+
+    deleted = await db.fetchval(
+        "DELETE FROM documents WHERE id=$1 AND store_id=$2 RETURNING id",
+        file_id, store_id
+    )
+    if not deleted:
+        raise HTTPException(404, "File nicht gefunden")
+    return {
+        "id":      file_id,
+        "object":  "vector_store.file.deleted",
+        "deleted": True
+    }
+
+
+# Search Endpoint
+
+@router.post("/vector_stores/{store_id}/search")
+async def search(
+    store_id: str,
+    body:     SearchRequest,
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Aehnliche Dokumente im Vector Store suchen"""
+    start = time.time()
+    await _check_access(db, store_id, user["user_id"])
+
+    q_emb   = await _embed(body.query, user["token"])
+    fetch_k = body.top_k * 3 if body.rerank else body.top_k
+
+    rows = await db.fetch(
+        """SELECT id, content, metadata,
+                  1 - (embedding <=> $1::vector) AS score
+           FROM documents
+           WHERE store_id = $2
+           ORDER BY embedding <=> $1::vector
+           LIMIT $3""",
+        str(q_emb), store_id, fetch_k
+    )
+
+    results = []
+    for r in rows:
+        metadata = r["metadata"]
+        if isinstance(metadata, str):
+            try:
+                metadata = json.loads(metadata)
+            except Exception:
+                metadata = {}
+        if metadata is None:
+            metadata = {}
+
+        results.append({
+            "id":       str(r["id"]),
+            "object":   "vector_store.search_result",
+            "score":    float(r["score"]),
+            "content":  [{"type": "text", "text": r["content"]}],
+            "metadata": metadata
+        })
+
+    if body.rerank:
+        rerank_model = body.rerank_model or "cosair/bge-reranker-v2-m3"
+        results      = await _rerank(body.query, results, rerank_model, user["token"])
+        results      = results[:body.top_k]
+
+    await track_usage(
+        user_id=user["user_id"],
+        action="search",
+        store_id=store_id,
+        duration=time.time() - start
+    )
+
+    return {"object": "list", "data": results}
+
+
+# RAG Endpoint
+
+@router.post("/vector_stores/{store_id}/rag")
+async def rag(
+    store_id: str,
+    body:     RAGRequest,
+    user:     dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Retrieval Augmented Generation"""
+    start = time.time()
+    await _check_access(db, store_id, user["user_id"])
+
+    q_emb   = await _embed(body.query, user["token"])
+    fetch_k = body.top_k * 3 if body.rerank else body.top_k
+
+    rows = await db.fetch(
+        """SELECT id, content, metadata,
+                  1 - (embedding <=> $1::vector) AS score
+           FROM documents
+           WHERE store_id = $2
+           ORDER BY embedding <=> $1::vector
+           LIMIT $3""",
+        str(q_emb), store_id, fetch_k
+    )
+
+    results = [
+        {
+            "id":      str(r["id"]),
+            "content": r["content"],
+            "score":   float(r["score"]),
+        }
+        for r in rows
+    ]
+
+    if body.rerank:
+        results = await _rerank(
+            body.query, results,
+            "cosair/bge-reranker-v2-m3",
+            user["token"]
+        )
+        results = results[:body.top_k]
+
+    context = "\n\n".join([
+        f"[{i+1}] {r['content']}"
+        for i, r in enumerate(results)
+    ])
+
+    system_prompt = body.system_prompt or (
+        "Du bist ein hilfreicher Assistent. "
+        "Beantworte Fragen ausschließlich basierend auf dem gegebenen Kontext. "
+        "Wenn die Antwort nicht im Kontext zu finden ist, sage das ehrlich.\n\n"
+        f"Kontext:\n{context}"
+    )
+
+    messages = [
+        {"role": "system", "content": system_prompt},
+        *body.messages,
+        {"role": "user",   "content": body.query}
+    ]
+
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{LITELLM_URL}/chat/completions",
+            headers={"Authorization": f"Bearer {user['token']}"},
+            json={"model": body.model, "messages": messages},
+            timeout=60.0
+        )
+
+    if resp.status_code != 200:
+        raise HTTPException(502, f"LLM Fehler: {resp.text}")
+
+    llm_data     = resp.json()
+    answer       = llm_data["choices"][0]["message"]["content"]
+    total_tokens = llm_data.get("usage", {}).get("total_tokens", 0)
+
+    await track_usage(
+        user_id=user["user_id"],
+        action="rag",
+        store_id=store_id,
+        tokens=total_tokens,
+        duration=time.time() - start
+    )
+
+    return {
+        "object":  "rag.response",
+        "answer":  answer,
+        "sources": [
+            {
+                "id":      r["id"],
+                "content": r["content"][:200] + "...",
+                "score":   r["score"]
+            }
+            for r in results
+        ],
+        "model":   body.model,
+        "usage":   llm_data.get("usage", {})
+    }
+
+@router.post("/vector_stores/{store_id}/upload")
+async def upload_file(
+    store_id:      str,
+    file:          UploadFile = File(...),
+    chunk_size:    int = Form(default=512),
+    chunk_overlap: int = Form(default=50),
+    user:          dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    """Datei hochladen, chunken und in Vector Store speichern"""
+    start = time.time()
+    await _check_access(db, store_id, user["user_id"])
+
+    content  = await file.read()
+    filename = file.filename.lower()
+
+    try:
+        if filename.endswith(".pdf"):
+            pdf  = pypdf.PdfReader(io.BytesIO(content))
+            text = "\n".join(
+                page.extract_text()
+                for page in pdf.pages
+                if page.extract_text()
+            )
+
+        elif filename.endswith(".docx"):
+            doc  = docx.Document(io.BytesIO(content))
+            text = "\n".join(
+                p.text for p in doc.paragraphs if p.text.strip()
+            )
+
+        elif filename.endswith(".txt"):
+            text = content.decode("utf-8")
+
+        elif filename.endswith(".md"):
+            text = content.decode("utf-8")
+
+        else:
+            raise HTTPException(
+                400,
+                f"Nicht unterstütztes Format: {file.filename}. "
+                f"Unterstützt: .pdf, .docx, .txt, .md"
+            )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(422, f"Datei konnte nicht gelesen werden: {e}")
+
+    if not text.strip():
+        raise HTTPException(422, "Datei enthaelt keinen Text")
+
+    chunks = chunk_text(
+        text=text,
+        chunk_size=chunk_size,
+        overlap=chunk_overlap
+    )
+
+    ids    = []
+    failed = 0
+
+    for chunk in chunks:
+        try:
+            embedding = await _embed(chunk["text"], user["token"])
+            doc_id    = await db.fetchval(
+                """INSERT INTO documents (store_id, content, metadata, embedding)
+                   VALUES ($1, $2, $3, $4::vector) RETURNING id""",
+                store_id,
+                chunk["text"],
+                json.dumps({
+                    "source":   file.filename,
+                    "chunk":    chunk["index"],
+                    "start":    chunk.get("start", 0),
+                }),
+                str(embedding)
+            )
+            ids.append(str(doc_id))
+        except Exception as e:
+            logger.error(f"Chunk {chunk['index']} fehlgeschlagen: {e}")
+            failed += 1
+
+    await track_usage(
+        user_id=user["user_id"],
+        action="upload",
+        store_id=store_id,
+        duration=time.time() - start
+    )
+
+    return {
+        "object":   "vector_store.file_batch",
+        "filename": file.filename,
+        "counts": {
+            "completed": len(ids),
+            "failed":    failed,
+            "total":     len(chunks)
+        },
+        "ids": ids
+    }
--- a/app/routers/stores.py
+++ b/app/routers/stores.py
@@ -0,0 +1,45 @@
+from fastapi import APIRouter, Depends, HTTPException
+from app.auth import verify_api_key
+from app.database import get_db
+from app.models import StoreCreate, StoreResponse
+
+router = APIRouter()
+
+@router.post("", response_model=StoreResponse)
+async def create_store(
+    body: StoreCreate,
+    user: dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    store_id = await db.fetchval(
+        "INSERT INTO vector_stores (name, owner_user_id) VALUES ($1,$2) RETURNING id",
+        body.name, user["user_id"]
+    )
+    return StoreResponse(store_id=store_id, name=body.name)
+
+@router.get("")
+async def list_stores(
+    user: dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    rows = await db.fetch(
+        "SELECT id, name, created_at FROM vector_stores WHERE owner_user_id=$1",
+        user["user_id"]
+    )
+    return [dict(r) for r in rows]
+
+@router.delete("/{store_id}")
+async def delete_store(
+    store_id: str,
+    user: dict = Depends(verify_api_key),
+    db=Depends(get_db)
+):
+    deleted = await db.fetchval(
+        """DELETE FROM vector_stores 
+           WHERE id=$1 AND owner_user_id=$2 
+           RETURNING id""",
+        store_id, user["user_id"]
+    )
+    if not deleted:
+        raise HTTPException(404, "Store not found or access denied")
+    return {"deleted": str(deleted)}
--- a/app/utils/init.py
+++ b/app/utils/init.py
--- a/app/utils/chunking.py
+++ b/app/utils/chunking.py
@@ -0,0 +1,36 @@
+def chunk_text(
+    text:       str,
+    chunk_size: int = 512,
+    overlap:    int = 50,
+) -> list[dict]:
+    """Text in ueberlappende Chunks aufteilen"""
+    chunks  = []
+    start   = 0
+    index   = 0
+
+    while start < len(text):
+        end   = start + chunk_size
+        chunk = text[start:end]
+
+        if end < len(text):
+            last_period = max(
+                chunk.rfind(". "),
+                chunk.rfind(".\n"),
+                chunk.rfind("! "),
+                chunk.rfind("? "),
+            )
+            if last_period > chunk_size // 2:
+                end   = start + last_period + 1
+                chunk = text[start:end]
+
+        if chunk.strip():
+            chunks.append({
+                "text":  chunk.strip(),
+                "index": index,
+                "start": start,
+            })
+
+        start  = end - overlap
+        index += 1
+
+    return chunks
--- a/app/utils/stats.py
+++ b/app/utils/stats.py
@@ -0,0 +1,24 @@
+import time
+import logging
+from typing import Optional
+from app.database import pool
+
+logger = logging.getLogger(__name__)
+
+async def track_usage(
+    user_id:  str,
+    action:   str,
+    store_id: Optional[str] = None,
+    tokens:   int   = 0,
+    duration: float = 0
+):
+    try:
+        async with pool.acquire() as conn:
+            await conn.execute(
+                """INSERT INTO usage_stats
+                   (user_id, store_id, action, tokens, duration)
+                   VALUES ($1, $2, $3, $4, $5)""",
+                user_id, store_id, action, tokens, round(duration, 3)
+            )
+    except Exception as e:
+        logger.error(f"Tracking Fehler: {e}")