Files
litellm-vector-store/app/utils/chunking.py
2026-04-29 08:17:35 +00:00

37 lines
895 B
Python

def chunk_text(
text: str,
chunk_size: int = 512,
overlap: int = 50,
) -> list[dict]:
"""Text in ueberlappende Chunks aufteilen"""
chunks = []
start = 0
index = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
if end < len(text):
last_period = max(
chunk.rfind(". "),
chunk.rfind(".\n"),
chunk.rfind("! "),
chunk.rfind("? "),
)
if last_period > chunk_size // 2:
end = start + last_period + 1
chunk = text[start:end]
if chunk.strip():
chunks.append({
"text": chunk.strip(),
"index": index,
"start": start,
})
start = end - overlap
index += 1
return chunks