Initial commit

This commit is contained in:
root
2026-04-29 08:17:35 +00:00
commit ef55253cbd
49 changed files with 3073 additions and 0 deletions

36
app/utils/chunking.py Normal file
View File

@@ -0,0 +1,36 @@
def chunk_text(
text: str,
chunk_size: int = 512,
overlap: int = 50,
) -> list[dict]:
"""Text in ueberlappende Chunks aufteilen"""
chunks = []
start = 0
index = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
if end < len(text):
last_period = max(
chunk.rfind(". "),
chunk.rfind(".\n"),
chunk.rfind("! "),
chunk.rfind("? "),
)
if last_period > chunk_size // 2:
end = start + last_period + 1
chunk = text[start:end]
if chunk.strip():
chunks.append({
"text": chunk.strip(),
"index": index,
"start": start,
})
start = end - overlap
index += 1
return chunks