Files
litellm-vector-store/app/routers/files.py
2026-04-29 08:17:35 +00:00

36 lines
1.0 KiB
Python

from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
import pypdf
import docx
import io
router = APIRouter()
async def extract_text(file: UploadFile) -> str:
"""Text aus verschiedenen Dateiformaten extrahieren"""
content = await file.read()
if file.filename.endswith(".pdf"):
pdf = pypdf.PdfReader(io.BytesIO(content))
return "\n".join(page.extract_text() for page in pdf.pages)
elif file.filename.endswith(".docx"):
doc = docx.Document(io.BytesIO(content))
return "\n".join(p.text for p in doc.paragraphs)
elif file.filename.endswith(".txt"):
return content.decode("utf-8")
else:
raise HTTPException(400, f"Nicht unterstütztes Format: {file.filename}")
@router.post("/v1/vector_stores/{store_id}/upload")
async def upload_file(
store_id: str,
file: UploadFile = File(...),
user: dict = Depends(verify_api_key),
db=Depends(get_db)
):
text = await extract_text(file)
chunks = chunk_text(text)