from fastapi import APIRouter, UploadFile, File, Depends, HTTPException import pypdf import docx import io router = APIRouter() async def extract_text(file: UploadFile) -> str: """Text aus verschiedenen Dateiformaten extrahieren""" content = await file.read() if file.filename.endswith(".pdf"): pdf = pypdf.PdfReader(io.BytesIO(content)) return "\n".join(page.extract_text() for page in pdf.pages) elif file.filename.endswith(".docx"): doc = docx.Document(io.BytesIO(content)) return "\n".join(p.text for p in doc.paragraphs) elif file.filename.endswith(".txt"): return content.decode("utf-8") else: raise HTTPException(400, f"Nicht unterstütztes Format: {file.filename}") @router.post("/v1/vector_stores/{store_id}/upload") async def upload_file( store_id: str, file: UploadFile = File(...), user: dict = Depends(verify_api_key), db=Depends(get_db) ): text = await extract_text(file) chunks = chunk_text(text)