def chunk_text( text: str, chunk_size: int = 512, overlap: int = 50, ) -> list[dict]: """Text in ueberlappende Chunks aufteilen""" chunks = [] start = 0 index = 0 while start < len(text): end = start + chunk_size chunk = text[start:end] if end < len(text): last_period = max( chunk.rfind(". "), chunk.rfind(".\n"), chunk.rfind("! "), chunk.rfind("? "), ) if last_period > chunk_size // 2: end = start + last_period + 1 chunk = text[start:end] if chunk.strip(): chunks.append({ "text": chunk.strip(), "index": index, "start": start, }) start = end - overlap index += 1 return chunks