Skip to content

Commit

Permalink
fix: clean, print num rows
Browse files Browse the repository at this point in the history
  • Loading branch information
zanussbaum committed Mar 27, 2023
1 parent dfee696 commit 10db136
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

prompt_generation_dir = "raw_data_sanity_cleaned_delobotomized"
for file in glob.glob(os.path.join(prompt_generation_dir, "*.jsonl")):
if "clean" in file:
if "clean.jsonl" in file:
continue
data = []
print(file)
Expand Down Expand Up @@ -69,5 +69,5 @@
print(f"Removed {prev_len - curr_len} rows")

clean_name = file.split(".jsonl")[0] + "_clean.jsonl"
print(f"writing to {clean_name}")
print(f"writing to {curr_len} rows to {clean_name}")
df.to_json(clean_name, orient="records", lines=True)

0 comments on commit 10db136

Please sign in to comment.