Skip to content

Commit

Permalink
fix split file
Browse files Browse the repository at this point in the history
  • Loading branch information
rocks6 authored and p-i- committed Apr 17, 2023
1 parent 6b64158 commit def96ff
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions autogpt/commands/file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,12 @@ def log_operation(operation: str, filename: str) -> None:

append_to_file(LOG_FILE, log_entry, shouldLog = False)


def split_file(
content: str, max_length: int = 4000, overlap: int = 0
) -> Generator[str, None, None]:
"""
Split text into chunks of a specified maximum length with a specified overlap
between chunks.
:param content: The input text to be split into chunks
:param max_length: The maximum length of each chunk,
default is 4000 (about 1k token)
Expand All @@ -70,9 +68,14 @@ def split_file(
while start < content_length:
end = start + max_length
if end + overlap < content_length:
chunk = content[start : end + overlap]
chunk = content[start : end + overlap - 1]
else:
chunk = content[start:content_length]

# Account for the case where the last chunk is shorter than the overlap, so it has already been consumed
if len(chunk) <= overlap:
break

yield chunk
start += max_length - overlap

Expand Down

0 comments on commit def96ff

Please sign in to comment.