Initial commit
This commit is contained in:
35
app/routers/files.py
Normal file
35
app/routers/files.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
|
||||
import pypdf
|
||||
import docx
|
||||
import io
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
async def extract_text(file: UploadFile) -> str:
|
||||
"""Text aus verschiedenen Dateiformaten extrahieren"""
|
||||
content = await file.read()
|
||||
|
||||
if file.filename.endswith(".pdf"):
|
||||
pdf = pypdf.PdfReader(io.BytesIO(content))
|
||||
return "\n".join(page.extract_text() for page in pdf.pages)
|
||||
|
||||
elif file.filename.endswith(".docx"):
|
||||
doc = docx.Document(io.BytesIO(content))
|
||||
return "\n".join(p.text for p in doc.paragraphs)
|
||||
|
||||
elif file.filename.endswith(".txt"):
|
||||
return content.decode("utf-8")
|
||||
|
||||
else:
|
||||
raise HTTPException(400, f"Nicht unterstütztes Format: {file.filename}")
|
||||
|
||||
|
||||
@router.post("/v1/vector_stores/{store_id}/upload")
|
||||
async def upload_file(
|
||||
store_id: str,
|
||||
file: UploadFile = File(...),
|
||||
user: dict = Depends(verify_api_key),
|
||||
db=Depends(get_db)
|
||||
):
|
||||
text = await extract_text(file)
|
||||
chunks = chunk_text(text)
|
||||
Reference in New Issue
Block a user