Initial commit
This commit is contained in:
0
app/__init__.py
Normal file
0
app/__init__.py
Normal file
53
app/auth.py
Normal file
53
app/auth.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import httpx
|
||||
import os
|
||||
import logging
|
||||
from fastapi import HTTPException, Header
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
|
||||
MASTER_KEY = os.getenv("LITELLM_MASTER_KEY")
|
||||
|
||||
async def verify_api_key(authorization: str = Header(...)) -> dict:
|
||||
token = authorization.removeprefix("Bearer ")
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
# Master Key nutzen um Key-Info abzufragen
|
||||
resp = await client.get(
|
||||
f"{LITELLM_URL}/key/info",
|
||||
headers={
|
||||
"Authorization": f"Bearer {MASTER_KEY}"
|
||||
},
|
||||
params={"key": token},
|
||||
timeout=5.0
|
||||
)
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"LiteLLM nicht erreichbar: {e}")
|
||||
raise HTTPException(503, f"Auth service unavailable: {e}")
|
||||
|
||||
logger.debug(f"LiteLLM Status: {resp.status_code}")
|
||||
logger.debug(f"LiteLLM Response: {resp.text}")
|
||||
|
||||
if resp.status_code == 404:
|
||||
raise HTTPException(401, "Invalid API Key")
|
||||
if resp.status_code == 401:
|
||||
raise HTTPException(401, "Invalid API Key")
|
||||
if resp.status_code != 200:
|
||||
raise HTTPException(502, f"Auth service error: {resp.status_code}")
|
||||
|
||||
data = resp.json()
|
||||
|
||||
user_id = (
|
||||
data.get("info", {}).get("user_id") or
|
||||
data.get("user_id")
|
||||
)
|
||||
|
||||
if not user_id:
|
||||
raise HTTPException(400, "API Key hat keine user_id")
|
||||
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"token": token,
|
||||
"key_alias": data.get("info", {}).get("key_alias"),
|
||||
}
|
||||
32
app/database.py
Normal file
32
app/database.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import asyncpg
|
||||
import os
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||
|
||||
pool: asyncpg.Pool = None
|
||||
|
||||
@retry(stop=stop_after_attempt(5), wait=wait_fixed(3))
|
||||
async def init_db():
|
||||
global pool
|
||||
|
||||
url = os.getenv("DATABASE_URL")
|
||||
if not url:
|
||||
raise ValueError("DATABASE_URL nicht gesetzt!")
|
||||
|
||||
pool = await asyncpg.create_pool(
|
||||
dsn=url,
|
||||
min_size=2,
|
||||
max_size=10
|
||||
)
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
||||
|
||||
print(f"✅ Datenbank verbunden")
|
||||
|
||||
async def close_db():
|
||||
global pool
|
||||
if pool:
|
||||
await pool.close()
|
||||
|
||||
async def get_db():
|
||||
async with pool.acquire() as conn:
|
||||
yield conn
|
||||
35
app/main.py
Normal file
35
app/main.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
from app.database import init_db, close_db
|
||||
from app.routers import stores, documents, admin, openai_compat
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
await init_db()
|
||||
yield
|
||||
await close_db()
|
||||
|
||||
app = FastAPI(
|
||||
title="Vector Store API",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["https://admin.vector.cosair.de"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.include_router(stores.router, prefix="/stores", tags=["Stores"])
|
||||
app.include_router(documents.router, prefix="/documents", tags=["Documents"])
|
||||
app.include_router(admin.router, prefix="/admin", tags=["Admin"])
|
||||
|
||||
app.include_router(openai_compat.router, prefix="/v1", tags=["OpenAI Compatible"])
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
35
app/middleware/rate_limit.py
Normal file
35
app/middleware/rate_limit.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from fastapi import Request, HTTPException
|
||||
from collections import defaultdict
|
||||
import time
|
||||
|
||||
# Einfaches In-Memory Rate Limiting
|
||||
request_counts: dict = defaultdict(list)
|
||||
|
||||
RATE_LIMITS = {
|
||||
"search": (100, 60), # 100 Requests pro 60 Sekunden
|
||||
"upsert": (50, 60),
|
||||
"embed": (200, 60),
|
||||
"rag": (20, 60),
|
||||
}
|
||||
|
||||
def check_rate_limit(user_id: str, action: str):
|
||||
limit, window = RATE_LIMITS.get(action, (100, 60))
|
||||
now = time.time()
|
||||
key = f"{user_id}:{action}"
|
||||
|
||||
# Alte Requests entfernen
|
||||
request_counts[key] = [
|
||||
t for t in request_counts[key]
|
||||
if now - t < window
|
||||
]
|
||||
|
||||
if len(request_counts[key]) >= limit:
|
||||
raise HTTPException(429, {
|
||||
"error": {
|
||||
"message": f"Rate limit erreicht: {limit} Requests pro {window}s",
|
||||
"type": "rate_limit_error",
|
||||
"code": "rate_limit_exceeded"
|
||||
}
|
||||
})
|
||||
|
||||
request_counts[key].append(now)
|
||||
27
app/models.py
Normal file
27
app/models.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
class StoreCreate(BaseModel):
|
||||
name: str = Field(..., min_length=1, max_length=255)
|
||||
|
||||
class StoreResponse(BaseModel):
|
||||
store_id: UUID
|
||||
name: str
|
||||
|
||||
class UpsertRequest(BaseModel):
|
||||
store_id: UUID
|
||||
texts: list[str] = Field(..., min_length=1)
|
||||
metadata: list[dict] = []
|
||||
|
||||
class QueryRequest(BaseModel):
|
||||
store_id: UUID
|
||||
query: str = Field(..., min_length=1)
|
||||
top_k: int = Field(default=5, ge=1, le=50)
|
||||
filter: Optional[dict] = None
|
||||
|
||||
class QueryResult(BaseModel):
|
||||
id: UUID
|
||||
content: str
|
||||
metadata: dict
|
||||
similarity: float
|
||||
0
app/routers/__init__.py
Normal file
0
app/routers/__init__.py
Normal file
186
app/routers/admin.py
Normal file
186
app/routers/admin.py
Normal file
@@ -0,0 +1,186 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from app.auth import verify_api_key
|
||||
from app.database import get_db
|
||||
import httpx
|
||||
import os
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
|
||||
MASTER_KEY = os.getenv("LITELLM_MASTER_KEY")
|
||||
ADMIN_IDS = os.getenv("ADMIN_USER_IDS", "").split(",")
|
||||
|
||||
|
||||
# --- Admin-Check ---
|
||||
async def require_admin(user: dict = Depends(verify_api_key)):
|
||||
if user["user_id"] not in ADMIN_IDS:
|
||||
raise HTTPException(403, "Admin-Zugriff erforderlich")
|
||||
return user
|
||||
|
||||
|
||||
# --- Stats ---
|
||||
@router.get("/stats")
|
||||
async def get_stats(
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
stats = await db.fetchrow(
|
||||
"""SELECT
|
||||
(SELECT COUNT(*) FROM vector_stores) AS total_stores,
|
||||
(SELECT COUNT(*) FROM documents) AS total_documents,
|
||||
(SELECT COUNT(DISTINCT owner_user_id)
|
||||
FROM vector_stores) AS total_users,
|
||||
(SELECT COUNT(*) FROM store_permissions) AS total_permissions"""
|
||||
)
|
||||
return dict(stats)
|
||||
|
||||
|
||||
# --- User Endpoints ---
|
||||
@router.get("/users")
|
||||
async def list_users(
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
rows = await db.fetch(
|
||||
"""SELECT
|
||||
owner_user_id AS user_id,
|
||||
COUNT(id) AS store_count,
|
||||
MAX(created_at) AS last_activity
|
||||
FROM vector_stores
|
||||
GROUP BY owner_user_id
|
||||
ORDER BY last_activity DESC"""
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
@router.get("/users/{user_id}/stores")
|
||||
async def get_user_stores(
|
||||
user_id: str,
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
rows = await db.fetch(
|
||||
"""SELECT
|
||||
vs.id,
|
||||
vs.name,
|
||||
vs.created_at,
|
||||
COUNT(d.id) AS document_count
|
||||
FROM vector_stores vs
|
||||
LEFT JOIN documents d ON d.store_id = vs.id
|
||||
WHERE vs.owner_user_id = $1
|
||||
GROUP BY vs.id, vs.name, vs.created_at""",
|
||||
user_id
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
@router.delete("/users/{user_id}/stores/{store_id}")
|
||||
async def admin_delete_store(
|
||||
user_id: str,
|
||||
store_id: str,
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
deleted = await db.fetchval(
|
||||
"""DELETE FROM vector_stores
|
||||
WHERE id = $1 AND owner_user_id = $2
|
||||
RETURNING id""",
|
||||
store_id, user_id
|
||||
)
|
||||
if not deleted:
|
||||
raise HTTPException(404, "Store nicht gefunden")
|
||||
return {"deleted": str(deleted)}
|
||||
|
||||
|
||||
# --- Permission Endpoints ---
|
||||
@router.get("/stores/{store_id}/permissions")
|
||||
async def get_permissions(
|
||||
store_id: str,
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
rows = await db.fetch(
|
||||
"""SELECT user_id, permission, created_at
|
||||
FROM store_permissions
|
||||
WHERE store_id = $1""",
|
||||
store_id
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
@router.post("/stores/{store_id}/permissions")
|
||||
async def grant_permission(
|
||||
store_id: str,
|
||||
user_id: str,
|
||||
permission: str = "read",
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
if permission not in ("read", "write", "admin"):
|
||||
raise HTTPException(400, "Ungültige Permission: read, write oder admin")
|
||||
|
||||
await db.execute(
|
||||
"""INSERT INTO store_permissions (store_id, user_id, permission)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (store_id, user_id)
|
||||
DO UPDATE SET permission = $3""",
|
||||
store_id, user_id, permission
|
||||
)
|
||||
return {"granted": permission, "user_id": user_id}
|
||||
|
||||
|
||||
@router.delete("/stores/{store_id}/permissions/{user_id}")
|
||||
async def revoke_permission(
|
||||
store_id: str,
|
||||
user_id: str,
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
await db.execute(
|
||||
"DELETE FROM store_permissions WHERE store_id=$1 AND user_id=$2",
|
||||
store_id, user_id
|
||||
)
|
||||
return {"revoked": user_id}
|
||||
|
||||
|
||||
# --- Key Management ---
|
||||
@router.post("/users/{user_id}/rotate-key")
|
||||
async def rotate_key(
|
||||
user_id: str,
|
||||
admin=Depends(require_admin),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
f"{LITELLM_URL}/key/generate",
|
||||
headers={"Authorization": f"Bearer {MASTER_KEY}"},
|
||||
json={
|
||||
"user_id": user_id,
|
||||
"key_alias": f"{user_id}-rotated"
|
||||
}
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
raise HTTPException(500, "Key-Rotation fehlgeschlagen")
|
||||
|
||||
store_count = await db.fetchval(
|
||||
"SELECT COUNT(*) FROM vector_stores WHERE owner_user_id=$1",
|
||||
user_id
|
||||
)
|
||||
return {
|
||||
"new_key": resp.json()["key"],
|
||||
"user_id": user_id,
|
||||
"stores_preserved": store_count
|
||||
}
|
||||
|
||||
@router.get("/verify")
|
||||
async def verify_admin(
|
||||
admin=Depends(require_admin),
|
||||
):
|
||||
"""
|
||||
Prüft ob der API Key Admin-Rechte hat.
|
||||
Gibt 200 zurück wenn Admin, 403 wenn nicht.
|
||||
"""
|
||||
return {
|
||||
"admin": True,
|
||||
"user_id": admin["user_id"],
|
||||
}
|
||||
41
app/routers/chunking.py
Normal file
41
app/routers/chunking.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from typing import Optional
|
||||
|
||||
def chunk_text(
|
||||
text: str,
|
||||
chunk_size: int = 512,
|
||||
overlap: int = 50,
|
||||
separator: str = "\n"
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Text in ueberlappende Chunks aufteilen
|
||||
chunk_size: Max Zeichen pro Chunk
|
||||
overlap: Ueberlappung zwischen Chunks
|
||||
separator: Trennzeichen fuer saubere Splits
|
||||
"""
|
||||
# Zuerst an Separatoren splitten
|
||||
paragraphs = text.split(separator)
|
||||
chunks = []
|
||||
current = ""
|
||||
index = 0
|
||||
|
||||
for para in paragraphs:
|
||||
if len(current) + len(para) > chunk_size and current:
|
||||
chunks.append({
|
||||
"text": current.strip(),
|
||||
"index": index,
|
||||
"start": text.find(current.strip()),
|
||||
})
|
||||
# Overlap: letzten Teil behalten
|
||||
current = current[-overlap:] + para
|
||||
index += 1
|
||||
else:
|
||||
current += separator + para
|
||||
|
||||
if current.strip():
|
||||
chunks.append({
|
||||
"text": current.strip(),
|
||||
"index": index,
|
||||
"start": text.find(current.strip()),
|
||||
})
|
||||
|
||||
return chunks
|
||||
113
app/routers/documents.py
Normal file
113
app/routers/documents.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import json
|
||||
import httpx
|
||||
import os
|
||||
import logging
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from app.auth import verify_api_key
|
||||
from app.database import get_db
|
||||
from app.models import UpsertRequest, QueryRequest
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
|
||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
|
||||
|
||||
|
||||
async def _embed(text: str, token: str) -> list[float]:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
f"{LITELLM_URL}/embeddings",
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": EMBEDDING_MODEL,
|
||||
"input": text
|
||||
},
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"Embedding Fehler: {resp.status_code} - {resp.text}")
|
||||
raise HTTPException(
|
||||
502,
|
||||
f"Embedding fehlgeschlagen: {resp.status_code} - {resp.text}"
|
||||
)
|
||||
|
||||
return resp.json()["data"][0]["embedding"]
|
||||
|
||||
|
||||
async def _check_access(db, store_id: str, user_id: str):
|
||||
row = await db.fetchrow(
|
||||
"SELECT owner_user_id FROM vector_stores WHERE id=$1", store_id
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(404, "Store nicht gefunden")
|
||||
if row["owner_user_id"] != user_id:
|
||||
shared = await db.fetchval(
|
||||
"SELECT 1 FROM store_permissions WHERE store_id=$1 AND user_id=$2",
|
||||
store_id, user_id
|
||||
)
|
||||
if not shared:
|
||||
raise HTTPException(403, "Kein Zugriff")
|
||||
|
||||
|
||||
@router.post("/upsert")
|
||||
async def upsert(
|
||||
body: UpsertRequest,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
await _check_access(db, str(body.store_id), user["user_id"])
|
||||
|
||||
ids = []
|
||||
for i, text in enumerate(body.texts):
|
||||
embedding = await _embed(text, user["token"])
|
||||
meta = body.metadata[i] if i < len(body.metadata) else {}
|
||||
|
||||
doc_id = await db.fetchval(
|
||||
"""INSERT INTO documents (store_id, content, metadata, embedding)
|
||||
VALUES ($1, $2, $3, $4::vector) RETURNING id""",
|
||||
str(body.store_id),
|
||||
text,
|
||||
json.dumps(meta),
|
||||
str(embedding)
|
||||
)
|
||||
ids.append(str(doc_id))
|
||||
|
||||
return {"inserted": len(ids), "ids": ids}
|
||||
|
||||
|
||||
@router.post("/query")
|
||||
async def query(
|
||||
body: QueryRequest,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
await _check_access(db, str(body.store_id), user["user_id"])
|
||||
|
||||
q_emb = await _embed(body.query, user["token"])
|
||||
|
||||
rows = await db.fetch(
|
||||
"""SELECT id, content, metadata,
|
||||
1 - (embedding <=> $1::vector) AS similarity
|
||||
FROM documents
|
||||
WHERE store_id = $2
|
||||
ORDER BY embedding <=> $1::vector
|
||||
LIMIT $3""",
|
||||
str(q_emb),
|
||||
str(body.store_id),
|
||||
body.top_k
|
||||
)
|
||||
|
||||
return {"results": [
|
||||
{
|
||||
"id": str(r["id"]),
|
||||
"content": r["content"],
|
||||
"metadata": r["metadata"],
|
||||
"similarity": float(r["similarity"])
|
||||
}
|
||||
for r in rows
|
||||
]}
|
||||
35
app/routers/files.py
Normal file
35
app/routers/files.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
|
||||
import pypdf
|
||||
import docx
|
||||
import io
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
async def extract_text(file: UploadFile) -> str:
|
||||
"""Text aus verschiedenen Dateiformaten extrahieren"""
|
||||
content = await file.read()
|
||||
|
||||
if file.filename.endswith(".pdf"):
|
||||
pdf = pypdf.PdfReader(io.BytesIO(content))
|
||||
return "\n".join(page.extract_text() for page in pdf.pages)
|
||||
|
||||
elif file.filename.endswith(".docx"):
|
||||
doc = docx.Document(io.BytesIO(content))
|
||||
return "\n".join(p.text for p in doc.paragraphs)
|
||||
|
||||
elif file.filename.endswith(".txt"):
|
||||
return content.decode("utf-8")
|
||||
|
||||
else:
|
||||
raise HTTPException(400, f"Nicht unterstütztes Format: {file.filename}")
|
||||
|
||||
|
||||
@router.post("/v1/vector_stores/{store_id}/upload")
|
||||
async def upload_file(
|
||||
store_id: str,
|
||||
file: UploadFile = File(...),
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
text = await extract_text(file)
|
||||
chunks = chunk_text(text)
|
||||
787
app/routers/openai_compat.py
Normal file
787
app/routers/openai_compat.py
Normal file
@@ -0,0 +1,787 @@
|
||||
import json
|
||||
import httpx
|
||||
import os
|
||||
import logging
|
||||
import time
|
||||
import pypdf
|
||||
import docx
|
||||
import io
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
from app.auth import verify_api_key
|
||||
from app.database import get_db
|
||||
from app.utils.stats import track_usage
|
||||
from app.utils.chunking import chunk_text
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LITELLM_URL = os.getenv("LITELLM_PROXY_URL", "http://litellm:4000")
|
||||
LITELLM_MASTER = os.getenv("LITELLM_MASTER_KEY")
|
||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "cosair/multilingual-e5-large-instruct")
|
||||
|
||||
|
||||
class VectorStoreCreate(BaseModel):
|
||||
name: str
|
||||
metadata: dict = {}
|
||||
|
||||
class VectorStoreResponse(BaseModel):
|
||||
id: str
|
||||
object: str = "vector_store"
|
||||
name: str
|
||||
metadata: dict = {}
|
||||
created_at: int
|
||||
|
||||
class FileUploadRequest(BaseModel):
|
||||
texts: list[str]
|
||||
metadata: list[dict] = []
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str
|
||||
top_k: int = Field(default=5, ge=1, le=50)
|
||||
rerank: bool = False
|
||||
rerank_model: Optional[str] = None
|
||||
filters: Optional[dict] = None
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: str | list[str]
|
||||
model: Optional[str] = None
|
||||
encoding_format: Optional[str] = "float"
|
||||
|
||||
class RAGRequest(BaseModel):
|
||||
query: str
|
||||
model: str = "cosair/gemma4:31b"
|
||||
top_k: int = 5
|
||||
rerank: bool = False
|
||||
system_prompt: Optional[str] = None
|
||||
messages: list[dict] = []
|
||||
|
||||
|
||||
# Hilfsfunktionen
|
||||
|
||||
def is_embedding_model(model: dict) -> bool:
|
||||
"""Prueft ob ein Modell ein Embedding Modell ist - nur ueber mode"""
|
||||
mode = (
|
||||
model.get("mode") or
|
||||
model.get("model_info", {}).get("mode")
|
||||
)
|
||||
return mode == "embedding"
|
||||
|
||||
|
||||
async def _get_all_models() -> list[dict]:
|
||||
"""
|
||||
Alle Modelle mit Master Key holen.
|
||||
Master Key gibt korrekte mode Infos fuer alle Modelle zurueck.
|
||||
"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{LITELLM_URL}/model_group/info",
|
||||
headers={"Authorization": f"Bearer {LITELLM_MASTER}"},
|
||||
timeout=10.0
|
||||
)
|
||||
except httpx.RequestError as e:
|
||||
raise HTTPException(503, f"LiteLLM nicht erreichbar: {e}")
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise HTTPException(502, f"Modelle konnten nicht abgerufen werden: {resp.text}")
|
||||
|
||||
models = []
|
||||
for m in resp.json().get("data", []):
|
||||
models.append({
|
||||
**m,
|
||||
"id": m.get("model_group", m.get("id", "")),
|
||||
})
|
||||
|
||||
return models
|
||||
|
||||
|
||||
async def _embed(
|
||||
text: str,
|
||||
token: str,
|
||||
model: Optional[str] = None
|
||||
) -> list[float]:
|
||||
"""Embedding ueber LiteLLM generieren"""
|
||||
use_model = model or EMBEDDING_MODEL
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
f"{LITELLM_URL}/embeddings",
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": use_model,
|
||||
"input": text
|
||||
},
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"Embedding Fehler: {resp.status_code} - {resp.text}")
|
||||
raise HTTPException(502, f"Embedding fehlgeschlagen: {resp.text}")
|
||||
|
||||
return resp.json()["data"][0]["embedding"]
|
||||
|
||||
|
||||
async def _check_access(db, store_id: str, user_id: str):
|
||||
"""Zugriff auf Store pruefen"""
|
||||
row = await db.fetchrow(
|
||||
"SELECT owner_user_id FROM vector_stores WHERE id=$1", store_id
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(404, detail={
|
||||
"error": {
|
||||
"message": f"No vector store found with id '{store_id}'",
|
||||
"type": "invalid_request_error",
|
||||
"code": "not_found"
|
||||
}
|
||||
})
|
||||
if row["owner_user_id"] != user_id:
|
||||
shared = await db.fetchval(
|
||||
"SELECT 1 FROM store_permissions WHERE store_id=$1 AND user_id=$2",
|
||||
store_id, user_id
|
||||
)
|
||||
if not shared:
|
||||
raise HTTPException(403, detail={
|
||||
"error": {
|
||||
"message": "You don't have access to this vector store",
|
||||
"type": "invalid_request_error",
|
||||
"code": "permission_denied"
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
async def _rerank(
|
||||
query: str,
|
||||
results: list[dict],
|
||||
model: str,
|
||||
token: str
|
||||
) -> list[dict]:
|
||||
"""Ergebnisse mit Reranker neu sortieren"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
f"{LITELLM_URL}/rerank",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
json={
|
||||
"model": model,
|
||||
"query": query,
|
||||
"documents": [r["content"][0]["text"] for r in results]
|
||||
},
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"Rerank Fehler: {resp.text}")
|
||||
return results
|
||||
|
||||
reranked = resp.json()["results"]
|
||||
|
||||
return [
|
||||
{**results[r["index"]], "score": r["relevance_score"]}
|
||||
for r in sorted(reranked, key=lambda x: x["relevance_score"], reverse=True)
|
||||
]
|
||||
|
||||
|
||||
# Models Endpoints
|
||||
|
||||
@router.get("/models")
|
||||
async def list_models(
|
||||
user: dict = Depends(verify_api_key),
|
||||
):
|
||||
"""Alle verfuegbaren Modelle von LiteLLM"""
|
||||
models = await _get_all_models()
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": m["id"],
|
||||
"object": "model",
|
||||
"mode": m.get("mode"),
|
||||
"owned_by": "system",
|
||||
}
|
||||
for m in models
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.get("/models/{model_id:path}")
|
||||
async def get_model(
|
||||
model_id: str,
|
||||
user: dict = Depends(verify_api_key),
|
||||
):
|
||||
"""Einzelnes Modell von LiteLLM"""
|
||||
all_models = await _get_all_models()
|
||||
model_lookup = {m["id"]: m for m in all_models}
|
||||
|
||||
if model_id not in model_lookup:
|
||||
raise HTTPException(404, {
|
||||
"error": {
|
||||
"message": f"Modell '{model_id}' nicht gefunden",
|
||||
"type": "invalid_request_error",
|
||||
"code": "not_found"
|
||||
}
|
||||
})
|
||||
|
||||
m = model_lookup[model_id]
|
||||
return {
|
||||
"id": m["id"],
|
||||
"object": "model",
|
||||
"mode": m.get("mode"),
|
||||
"owned_by": "system",
|
||||
}
|
||||
|
||||
|
||||
# Embedding Endpoints
|
||||
|
||||
@router.get("/embeddings/models")
|
||||
async def list_embedding_models(
|
||||
user: dict = Depends(verify_api_key),
|
||||
):
|
||||
"""Nur Embedding Modelle - gefiltert über mode mit Master Key"""
|
||||
all_models = await _get_all_models()
|
||||
|
||||
embedding_models = [
|
||||
{
|
||||
"id": m["id"],
|
||||
"object": "model",
|
||||
"owned_by": "system",
|
||||
"default": m["id"] == EMBEDDING_MODEL,
|
||||
}
|
||||
for m in all_models
|
||||
if is_embedding_model(m)
|
||||
]
|
||||
|
||||
return {
|
||||
"object": "list",
|
||||
"default": EMBEDDING_MODEL,
|
||||
"data": embedding_models
|
||||
}
|
||||
|
||||
|
||||
@router.post("/embeddings")
|
||||
async def create_embeddings(
|
||||
body: EmbeddingRequest,
|
||||
user: dict = Depends(verify_api_key),
|
||||
):
|
||||
"""Embeddings erstellen - einzeln oder als Liste"""
|
||||
start = time.time()
|
||||
model = body.model or EMBEDDING_MODEL
|
||||
inputs = body.input if isinstance(body.input, list) else [body.input]
|
||||
|
||||
all_models = await _get_all_models()
|
||||
model_lookup = {m["id"]: m for m in all_models}
|
||||
|
||||
if model in model_lookup and not is_embedding_model(model_lookup[model]):
|
||||
raise HTTPException(400, {
|
||||
"error": {
|
||||
"message": f"'{model}' ist kein Embedding Modell",
|
||||
"type": "invalid_request_error",
|
||||
"code": "invalid_model"
|
||||
}
|
||||
})
|
||||
|
||||
embeddings = []
|
||||
total_tokens = 0
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
for i, text in enumerate(inputs):
|
||||
resp = await client.post(
|
||||
f"{LITELLM_URL}/embeddings",
|
||||
headers={
|
||||
"Authorization": f"Bearer {user['token']}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={"model": model, "input": text},
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"Embedding Fehler: {resp.status_code} - {resp.text}")
|
||||
raise HTTPException(502, f"Embedding fehlgeschlagen: {resp.text}")
|
||||
|
||||
data = resp.json()
|
||||
total_tokens += data.get("usage", {}).get("total_tokens", 0)
|
||||
embeddings.append({
|
||||
"object": "embedding",
|
||||
"index": i,
|
||||
"embedding": data["data"][0]["embedding"]
|
||||
})
|
||||
|
||||
await track_usage(
|
||||
user_id=user["user_id"],
|
||||
action="embed",
|
||||
tokens=total_tokens,
|
||||
duration=time.time() - start
|
||||
)
|
||||
|
||||
return {
|
||||
"object": "list",
|
||||
"model": model,
|
||||
"data": embeddings,
|
||||
"usage": {
|
||||
"prompt_tokens": total_tokens,
|
||||
"total_tokens": total_tokens
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Vector Store Endpoints
|
||||
|
||||
@router.post("/vector_stores", response_model=VectorStoreResponse)
|
||||
async def create_vector_store(
|
||||
body: VectorStoreCreate,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Neuen Vector Store anlegen"""
|
||||
row = await db.fetchrow(
|
||||
"""INSERT INTO vector_stores (name, owner_user_id)
|
||||
VALUES ($1, $2)
|
||||
RETURNING id, name, created_at""",
|
||||
body.name, user["user_id"]
|
||||
)
|
||||
return VectorStoreResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
metadata=body.metadata,
|
||||
created_at=int(row["created_at"].timestamp())
|
||||
)
|
||||
|
||||
|
||||
@router.get("/vector_stores")
|
||||
async def list_vector_stores(
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Alle eigenen Vector Stores auflisten"""
|
||||
rows = await db.fetch(
|
||||
"""SELECT vs.id, vs.name, vs.created_at,
|
||||
COUNT(d.id) AS file_counts
|
||||
FROM vector_stores vs
|
||||
LEFT JOIN documents d ON d.store_id = vs.id
|
||||
WHERE vs.owner_user_id = $1
|
||||
GROUP BY vs.id, vs.name, vs.created_at
|
||||
ORDER BY vs.created_at DESC""",
|
||||
user["user_id"]
|
||||
)
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": str(r["id"]),
|
||||
"object": "vector_store",
|
||||
"name": r["name"],
|
||||
"created_at": int(r["created_at"].timestamp()),
|
||||
"file_counts": {"total": r["file_counts"]}
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.get("/vector_stores/{store_id}")
|
||||
async def get_vector_store(
|
||||
store_id: str,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Einzelnen Vector Store abrufen"""
|
||||
await _check_access(db, store_id, user["user_id"])
|
||||
|
||||
row = await db.fetchrow(
|
||||
"""SELECT vs.id, vs.name, vs.created_at,
|
||||
COUNT(d.id) AS file_counts
|
||||
FROM vector_stores vs
|
||||
LEFT JOIN documents d ON d.store_id = vs.id
|
||||
WHERE vs.id = $1
|
||||
GROUP BY vs.id, vs.name, vs.created_at""",
|
||||
store_id
|
||||
)
|
||||
return {
|
||||
"id": str(row["id"]),
|
||||
"object": "vector_store",
|
||||
"name": row["name"],
|
||||
"created_at": int(row["created_at"].timestamp()),
|
||||
"file_counts": {"total": row["file_counts"]}
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/vector_stores/{store_id}")
|
||||
async def delete_vector_store(
|
||||
store_id: str,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Vector Store loeschen"""
|
||||
deleted = await db.fetchval(
|
||||
"""DELETE FROM vector_stores
|
||||
WHERE id=$1 AND owner_user_id=$2
|
||||
RETURNING id""",
|
||||
store_id, user["user_id"]
|
||||
)
|
||||
if not deleted:
|
||||
raise HTTPException(404, "Vector store nicht gefunden")
|
||||
return {
|
||||
"id": store_id,
|
||||
"object": "vector_store.deleted",
|
||||
"deleted": True
|
||||
}
|
||||
|
||||
|
||||
# Files Endpoints
|
||||
|
||||
@router.post("/vector_stores/{store_id}/files")
|
||||
async def add_files(
|
||||
store_id: str,
|
||||
body: FileUploadRequest,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Dokumente in Vector Store einfuegen"""
|
||||
start = time.time()
|
||||
await _check_access(db, store_id, user["user_id"])
|
||||
|
||||
ids = []
|
||||
for i, text in enumerate(body.texts):
|
||||
embedding = await _embed(text, user["token"])
|
||||
meta = body.metadata[i] if i < len(body.metadata) else {}
|
||||
|
||||
doc_id = await db.fetchval(
|
||||
"""INSERT INTO documents (store_id, content, metadata, embedding)
|
||||
VALUES ($1, $2, $3, $4::vector) RETURNING id""",
|
||||
store_id, text, json.dumps(meta), str(embedding)
|
||||
)
|
||||
ids.append(str(doc_id))
|
||||
|
||||
await track_usage(
|
||||
user_id=user["user_id"],
|
||||
action="upsert",
|
||||
store_id=store_id,
|
||||
duration=time.time() - start
|
||||
)
|
||||
|
||||
return {
|
||||
"object": "vector_store.file_batch",
|
||||
"counts": {
|
||||
"completed": len(ids),
|
||||
"failed": 0,
|
||||
"total": len(body.texts)
|
||||
},
|
||||
"ids": ids
|
||||
}
|
||||
|
||||
|
||||
@router.get("/vector_stores/{store_id}/files")
|
||||
async def list_files(
|
||||
store_id: str,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Alle Dokumente eines Vector Stores auflisten"""
|
||||
await _check_access(db, store_id, user["user_id"])
|
||||
|
||||
rows = await db.fetch(
|
||||
"""SELECT id, content, metadata, created_at
|
||||
FROM documents
|
||||
WHERE store_id=$1
|
||||
ORDER BY created_at DESC""",
|
||||
store_id
|
||||
)
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": str(r["id"]),
|
||||
"object": "vector_store.file",
|
||||
"content": r["content"][:100] + "...",
|
||||
"metadata": r["metadata"],
|
||||
"created_at": int(r["created_at"].timestamp())
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/vector_stores/{store_id}/files/{file_id}")
|
||||
async def delete_file(
|
||||
store_id: str,
|
||||
file_id: str,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Einzelnes Dokument loeschen"""
|
||||
await _check_access(db, store_id, user["user_id"])
|
||||
|
||||
deleted = await db.fetchval(
|
||||
"DELETE FROM documents WHERE id=$1 AND store_id=$2 RETURNING id",
|
||||
file_id, store_id
|
||||
)
|
||||
if not deleted:
|
||||
raise HTTPException(404, "File nicht gefunden")
|
||||
return {
|
||||
"id": file_id,
|
||||
"object": "vector_store.file.deleted",
|
||||
"deleted": True
|
||||
}
|
||||
|
||||
|
||||
# Search Endpoint
|
||||
|
||||
@router.post("/vector_stores/{store_id}/search")
|
||||
async def search(
|
||||
store_id: str,
|
||||
body: SearchRequest,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Aehnliche Dokumente im Vector Store suchen"""
|
||||
start = time.time()
|
||||
await _check_access(db, store_id, user["user_id"])
|
||||
|
||||
q_emb = await _embed(body.query, user["token"])
|
||||
fetch_k = body.top_k * 3 if body.rerank else body.top_k
|
||||
|
||||
rows = await db.fetch(
|
||||
"""SELECT id, content, metadata,
|
||||
1 - (embedding <=> $1::vector) AS score
|
||||
FROM documents
|
||||
WHERE store_id = $2
|
||||
ORDER BY embedding <=> $1::vector
|
||||
LIMIT $3""",
|
||||
str(q_emb), store_id, fetch_k
|
||||
)
|
||||
|
||||
results = []
|
||||
for r in rows:
|
||||
metadata = r["metadata"]
|
||||
if isinstance(metadata, str):
|
||||
try:
|
||||
metadata = json.loads(metadata)
|
||||
except Exception:
|
||||
metadata = {}
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
|
||||
results.append({
|
||||
"id": str(r["id"]),
|
||||
"object": "vector_store.search_result",
|
||||
"score": float(r["score"]),
|
||||
"content": [{"type": "text", "text": r["content"]}],
|
||||
"metadata": metadata
|
||||
})
|
||||
|
||||
if body.rerank:
|
||||
rerank_model = body.rerank_model or "cosair/bge-reranker-v2-m3"
|
||||
results = await _rerank(body.query, results, rerank_model, user["token"])
|
||||
results = results[:body.top_k]
|
||||
|
||||
await track_usage(
|
||||
user_id=user["user_id"],
|
||||
action="search",
|
||||
store_id=store_id,
|
||||
duration=time.time() - start
|
||||
)
|
||||
|
||||
return {"object": "list", "data": results}
|
||||
|
||||
|
||||
# RAG Endpoint
|
||||
|
||||
@router.post("/vector_stores/{store_id}/rag")
|
||||
async def rag(
|
||||
store_id: str,
|
||||
body: RAGRequest,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Retrieval Augmented Generation"""
|
||||
start = time.time()
|
||||
await _check_access(db, store_id, user["user_id"])
|
||||
|
||||
q_emb = await _embed(body.query, user["token"])
|
||||
fetch_k = body.top_k * 3 if body.rerank else body.top_k
|
||||
|
||||
rows = await db.fetch(
|
||||
"""SELECT id, content, metadata,
|
||||
1 - (embedding <=> $1::vector) AS score
|
||||
FROM documents
|
||||
WHERE store_id = $2
|
||||
ORDER BY embedding <=> $1::vector
|
||||
LIMIT $3""",
|
||||
str(q_emb), store_id, fetch_k
|
||||
)
|
||||
|
||||
results = [
|
||||
{
|
||||
"id": str(r["id"]),
|
||||
"content": r["content"],
|
||||
"score": float(r["score"]),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
if body.rerank:
|
||||
results = await _rerank(
|
||||
body.query, results,
|
||||
"cosair/bge-reranker-v2-m3",
|
||||
user["token"]
|
||||
)
|
||||
results = results[:body.top_k]
|
||||
|
||||
context = "\n\n".join([
|
||||
f"[{i+1}] {r['content']}"
|
||||
for i, r in enumerate(results)
|
||||
])
|
||||
|
||||
system_prompt = body.system_prompt or (
|
||||
"Du bist ein hilfreicher Assistent. "
|
||||
"Beantworte Fragen ausschließlich basierend auf dem gegebenen Kontext. "
|
||||
"Wenn die Antwort nicht im Kontext zu finden ist, sage das ehrlich.\n\n"
|
||||
f"Kontext:\n{context}"
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
*body.messages,
|
||||
{"role": "user", "content": body.query}
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
f"{LITELLM_URL}/chat/completions",
|
||||
headers={"Authorization": f"Bearer {user['token']}"},
|
||||
json={"model": body.model, "messages": messages},
|
||||
timeout=60.0
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise HTTPException(502, f"LLM Fehler: {resp.text}")
|
||||
|
||||
llm_data = resp.json()
|
||||
answer = llm_data["choices"][0]["message"]["content"]
|
||||
total_tokens = llm_data.get("usage", {}).get("total_tokens", 0)
|
||||
|
||||
await track_usage(
|
||||
user_id=user["user_id"],
|
||||
action="rag",
|
||||
store_id=store_id,
|
||||
tokens=total_tokens,
|
||||
duration=time.time() - start
|
||||
)
|
||||
|
||||
return {
|
||||
"object": "rag.response",
|
||||
"answer": answer,
|
||||
"sources": [
|
||||
{
|
||||
"id": r["id"],
|
||||
"content": r["content"][:200] + "...",
|
||||
"score": r["score"]
|
||||
}
|
||||
for r in results
|
||||
],
|
||||
"model": body.model,
|
||||
"usage": llm_data.get("usage", {})
|
||||
}
|
||||
|
||||
@router.post("/vector_stores/{store_id}/upload")
|
||||
async def upload_file(
|
||||
store_id: str,
|
||||
file: UploadFile = File(...),
|
||||
chunk_size: int = Form(default=512),
|
||||
chunk_overlap: int = Form(default=50),
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
"""Datei hochladen, chunken und in Vector Store speichern"""
|
||||
start = time.time()
|
||||
await _check_access(db, store_id, user["user_id"])
|
||||
|
||||
content = await file.read()
|
||||
filename = file.filename.lower()
|
||||
|
||||
try:
|
||||
if filename.endswith(".pdf"):
|
||||
pdf = pypdf.PdfReader(io.BytesIO(content))
|
||||
text = "\n".join(
|
||||
page.extract_text()
|
||||
for page in pdf.pages
|
||||
if page.extract_text()
|
||||
)
|
||||
|
||||
elif filename.endswith(".docx"):
|
||||
doc = docx.Document(io.BytesIO(content))
|
||||
text = "\n".join(
|
||||
p.text for p in doc.paragraphs if p.text.strip()
|
||||
)
|
||||
|
||||
elif filename.endswith(".txt"):
|
||||
text = content.decode("utf-8")
|
||||
|
||||
elif filename.endswith(".md"):
|
||||
text = content.decode("utf-8")
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
400,
|
||||
f"Nicht unterstütztes Format: {file.filename}. "
|
||||
f"Unterstützt: .pdf, .docx, .txt, .md"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(422, f"Datei konnte nicht gelesen werden: {e}")
|
||||
|
||||
if not text.strip():
|
||||
raise HTTPException(422, "Datei enthaelt keinen Text")
|
||||
|
||||
chunks = chunk_text(
|
||||
text=text,
|
||||
chunk_size=chunk_size,
|
||||
overlap=chunk_overlap
|
||||
)
|
||||
|
||||
ids = []
|
||||
failed = 0
|
||||
|
||||
for chunk in chunks:
|
||||
try:
|
||||
embedding = await _embed(chunk["text"], user["token"])
|
||||
doc_id = await db.fetchval(
|
||||
"""INSERT INTO documents (store_id, content, metadata, embedding)
|
||||
VALUES ($1, $2, $3, $4::vector) RETURNING id""",
|
||||
store_id,
|
||||
chunk["text"],
|
||||
json.dumps({
|
||||
"source": file.filename,
|
||||
"chunk": chunk["index"],
|
||||
"start": chunk.get("start", 0),
|
||||
}),
|
||||
str(embedding)
|
||||
)
|
||||
ids.append(str(doc_id))
|
||||
except Exception as e:
|
||||
logger.error(f"Chunk {chunk['index']} fehlgeschlagen: {e}")
|
||||
failed += 1
|
||||
|
||||
await track_usage(
|
||||
user_id=user["user_id"],
|
||||
action="upload",
|
||||
store_id=store_id,
|
||||
duration=time.time() - start
|
||||
)
|
||||
|
||||
return {
|
||||
"object": "vector_store.file_batch",
|
||||
"filename": file.filename,
|
||||
"counts": {
|
||||
"completed": len(ids),
|
||||
"failed": failed,
|
||||
"total": len(chunks)
|
||||
},
|
||||
"ids": ids
|
||||
}
|
||||
45
app/routers/stores.py
Normal file
45
app/routers/stores.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from app.auth import verify_api_key
|
||||
from app.database import get_db
|
||||
from app.models import StoreCreate, StoreResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.post("", response_model=StoreResponse)
|
||||
async def create_store(
|
||||
body: StoreCreate,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
store_id = await db.fetchval(
|
||||
"INSERT INTO vector_stores (name, owner_user_id) VALUES ($1,$2) RETURNING id",
|
||||
body.name, user["user_id"]
|
||||
)
|
||||
return StoreResponse(store_id=store_id, name=body.name)
|
||||
|
||||
@router.get("")
|
||||
async def list_stores(
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
rows = await db.fetch(
|
||||
"SELECT id, name, created_at FROM vector_stores WHERE owner_user_id=$1",
|
||||
user["user_id"]
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
@router.delete("/{store_id}")
|
||||
async def delete_store(
|
||||
store_id: str,
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
deleted = await db.fetchval(
|
||||
"""DELETE FROM vector_stores
|
||||
WHERE id=$1 AND owner_user_id=$2
|
||||
RETURNING id""",
|
||||
store_id, user["user_id"]
|
||||
)
|
||||
if not deleted:
|
||||
raise HTTPException(404, "Store not found or access denied")
|
||||
return {"deleted": str(deleted)}
|
||||
0
app/utils/__init__.py
Normal file
0
app/utils/__init__.py
Normal file
36
app/utils/chunking.py
Normal file
36
app/utils/chunking.py
Normal file
@@ -0,0 +1,36 @@
|
||||
def chunk_text(
|
||||
text: str,
|
||||
chunk_size: int = 512,
|
||||
overlap: int = 50,
|
||||
) -> list[dict]:
|
||||
"""Text in ueberlappende Chunks aufteilen"""
|
||||
chunks = []
|
||||
start = 0
|
||||
index = 0
|
||||
|
||||
while start < len(text):
|
||||
end = start + chunk_size
|
||||
chunk = text[start:end]
|
||||
|
||||
if end < len(text):
|
||||
last_period = max(
|
||||
chunk.rfind(". "),
|
||||
chunk.rfind(".\n"),
|
||||
chunk.rfind("! "),
|
||||
chunk.rfind("? "),
|
||||
)
|
||||
if last_period > chunk_size // 2:
|
||||
end = start + last_period + 1
|
||||
chunk = text[start:end]
|
||||
|
||||
if chunk.strip():
|
||||
chunks.append({
|
||||
"text": chunk.strip(),
|
||||
"index": index,
|
||||
"start": start,
|
||||
})
|
||||
|
||||
start = end - overlap
|
||||
index += 1
|
||||
|
||||
return chunks
|
||||
24
app/utils/stats.py
Normal file
24
app/utils/stats.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import time
|
||||
import logging
|
||||
from typing import Optional
|
||||
from app.database import pool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def track_usage(
|
||||
user_id: str,
|
||||
action: str,
|
||||
store_id: Optional[str] = None,
|
||||
tokens: int = 0,
|
||||
duration: float = 0
|
||||
):
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""INSERT INTO usage_stats
|
||||
(user_id, store_id, action, tokens, duration)
|
||||
VALUES ($1, $2, $3, $4, $5)""",
|
||||
user_id, store_id, action, tokens, round(duration, 3)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Tracking Fehler: {e}")
|
||||
Reference in New Issue
Block a user