from typing import Optional def chunk_text( text: str, chunk_size: int = 512, overlap: int = 50, separator: str = "\n" ) -> list[dict]: """ Text in ueberlappende Chunks aufteilen chunk_size: Max Zeichen pro Chunk overlap: Ueberlappung zwischen Chunks separator: Trennzeichen fuer saubere Splits """ # Zuerst an Separatoren splitten paragraphs = text.split(separator) chunks = [] current = "" index = 0 for para in paragraphs: if len(current) + len(para) > chunk_size and current: chunks.append({ "text": current.strip(), "index": index, "start": text.find(current.strip()), }) # Overlap: letzten Teil behalten current = current[-overlap:] + para index += 1 else: current += separator + para if current.strip(): chunks.append({ "text": current.strip(), "index": index, "start": text.find(current.strip()), }) return chunks