Skip to content

Commit

Permalink
Create data_ingestion.py
Browse files Browse the repository at this point in the history
This script is use when we want to seed Auto-GPT memory with one or multiple documents.

The document are read, split into chunks and store in the memory.
  • Loading branch information
maiko committed Apr 12, 2023
1 parent c911176 commit 8faa6ef
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions scripts/data_ingestion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import argparse
import logging
from config import Config
from memory import get_memory
from file_operations import ingest_file, ingest_directory

cfg = Config()

def configure_logging():
logging.basicConfig(filename='log-ingestion.txt',
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
datefmt='%H:%M:%S',
level=logging.DEBUG)
return logging.getLogger('AutoGPT-Ingestion')


def main():
logger = configure_logging()

parser = argparse.ArgumentParser(description="Ingest a file or a directory with multiple files into memory. Make sure to set your .env before running this script.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--file", type=str, help="The file to ingest.")
group.add_argument("--dir", type=str, help="The directory containing the files to ingest.")
parser.add_argument("--init", action='store_true', help="Init the memory and wipe its content", default=False)
args = parser.parse_args()


# Initialize memory
memory = get_memory(cfg, init=args.init)
print('Using memory of type: ' + memory.__class__.__name__)

if args.file:
try:
ingest_file(args.file, memory)
print(f"File '{args.file}' ingested successfully.")
except Exception as e:
logger.error(f"Error while ingesting file '{args.file}': {str(e)}")
print(f"Error while ingesting file '{args.file}': {str(e)}")
elif args.dir:
try:
ingest_directory(args.dir, memory)
print(f"Directory '{args.dir}' ingested successfully.")
except Exception as e:
logger.error(f"Error while ingesting directory '{args.dir}': {str(e)}")
print(f"Error while ingesting directory '{args.dir}': {str(e)}")
else:
print("Please provide either a file path (--file) or a directory name (--dir) inside the auto_gpt_workspace directory as input.")


if __name__ == "__main__":
main()

0 comments on commit 8faa6ef

Please sign in to comment.