Skip to content

Commit

Permalink
update xgrammar testing scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Dec 20, 2024
1 parent 69263ec commit 3f60753
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 72 deletions.
5 changes: 5 additions & 0 deletions json_stats/scripts/kill_xgr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/sh

kill `ps fax|grep xgr_test | awk '{print $1}'`
kill -9 `ps fax|grep xgr_test | awk '{print $1}'`
ps fax|grep xgr_test
75 changes: 75 additions & 0 deletions json_stats/scripts/xgr/xgr_combine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python3

import json
import glob

output_path = "tmp/xgr/"


class Stats:
def __init__(self) -> None:
self.ttfm_us = 0
self.max_ttfm_us = 0
self.masks_us = 0
self.max_mask_us = 0
self.num_tokens = 0
self.num_schemas = 0
self.num_schemas_ok = 0
self.num_compilation_errors = 0
self.num_validation_errors = 0
self.num_tests = 0
self.num_valid_tests = 0
self.num_invalid_tests = 0

def log_fraction_plot(times: list[int]):
times.sort()
cutoff = 1
mult = 1.3
count = 0
csv = "cutoff time,count left\n"
total = len(times)
for t in times:
if t > cutoff:
csv += f"{cutoff/1000.0},{(total - count)/total}\n"
cutoff = int(cutoff * mult) + 1
count += 1
return csv

def main():
files = glob.glob(output_path + "*.json")
files = sorted(files)
stats = Stats()
ttfm_us = []
all_masks_us = []
for f in files:
with open(f) as f:
data = json.load(f)
if "num_tests" not in data:
continue
stats.num_schemas += 1
stats.num_tests += data["num_tests"]
if "compile_error" in data:
stats.num_compilation_errors += 1
else:
stats.ttfm_us += data["ttfm_us"]
ttfm_us.append(data["ttfm_us"])
stats.max_ttfm_us = max(data["max_ttfm_us"], stats.max_ttfm_us)
stats.masks_us += data["masks_us"]
stats.max_mask_us = max(data["max_mask_us"], stats.max_mask_us)
stats.num_tokens += data["num_tokens"]
if "validation_error" in data:
stats.num_validation_errors += 1
else:
stats.num_schemas_ok += 1
stats.num_valid_tests += data["num_valid_tests"]
stats.num_invalid_tests += data["num_invalid_tests"]
all_masks_us.extend(data["all_mask_us"])
print(json.dumps(stats.__dict__, indent=2))
with open("tmp/xgr_ttfm_us.csv", "w") as f:
f.write(log_fraction_plot(ttfm_us))
with open("tmp/xgr_masks_us.csv", "w") as f:
f.write(log_fraction_plot(all_masks_us))



main()
22 changes: 8 additions & 14 deletions json_stats/scripts/xgr/xgr_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ def process_file(files: List[str]):
"message": str(e)
}

files = []
file_name = []
for arg in sys.argv[1:]:
if arg.endswith(".json"):
files.append(arg)
file_name.append(arg)
else:
files.extend(glob.glob(arg + "/*.json"))
print(len(files), file=sys.stderr)
file_name.extend(glob.glob(arg + "/*.json"))
print(len(file_name), file=sys.stderr)
missing_files = []
for f in files:
for f in file_name:
file_base = f.split("/")[-1]
output_name = f"{output_base}/{file_base}"
if not os.path.exists(output_name):
Expand All @@ -82,16 +82,10 @@ def process_file(files: List[str]):
with concurrent.futures.ThreadPoolExecutor(max_workers=40) as executor:
futures = {executor.submit(process_file, f): f for f in chunks}
for future in concurrent.futures.as_completed(futures):
files = futures[future]
file_name = futures[future]
try:
r = future.result()
cnt += len(files)
print(cnt)
rs = json.dumps(r)
with open(log_file, "a") as f:
f.write(f"FILES: {files}\n{rs}\n")
print(file_name)
# print(f"OK: {files}")
except Exception as e:
with open(log_file, "a") as f:
f.write(f"ERROR {files}: {repr(e)}")
print(f"ERROR: {files}", repr(e))
print(f"ERROR: {file_name}", repr(e))
165 changes: 107 additions & 58 deletions json_stats/scripts/xgr/xgr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,87 +6,136 @@
import os
import random
import time
import resource

import xgrammar as xgr
import torch
import numpy as np
from transformers import AutoTokenizer, AutoConfig

positive_base = os.environ.get("HOME") + "/src/json-data/positive"
output_base = os.environ.get("HOME") + "/src/json-data/xgr_output"
output_path = "tmp/xgr/"


def do_process(file: str):
def time_us(prev: float) -> int:
return int((time.monotonic() - prev) * 1000000)


def process_file(file: str):
id = os.path.basename(file)
output_name = output_path + id
if os.path.exists(output_name):
return None

with open(output_name, "w") as f:
f.write(json.dumps({ "pending_file": 1 }, indent=2))

with open(file) as f:
pos_data = json.loads(f.read())

schema = json.dumps(pos_data["schema"])
instance = json.dumps(pos_data["tests"][0]["data"], indent=4)

token_bitmask = xgr.allocate_token_bitmask(1, tokenizer_info.vocab_size)
tokens = tokenizer.encode(instance, add_special_tokens=False)

all_mask_us = []
status = {
"file": file,
"ok": False,
"num_tokens": len(tokens),
"accepted_tokens": 0,
"id": id,
"ttfm_us": 0,
"max_ttfm_us": 0,
"masks_us": 0,
"max_mask_us": 0,
"num_tokens": 0,
"num_tests": len(pos_data["tests"]),
"all_mask_us": all_mask_us,
"num_valid_tests": 0,
"num_invalid_tests": 0,
}

try:
t0 = time.monotonic()
compiled_grammar = compiler.compile_json_schema(schema, indent=4)
compiled_grammar = compiler.compile_json_schema(
schema, any_whitespace=True, strict_mode=False
)
matcher = xgr.GrammarMatcher(compiled_grammar)
except Exception as e:
status["compile_error"] = repr(e)
with open(output_name, "w") as f:
f.write(json.dumps(status, indent=2))
return status

status["compile_time"] = int((time.monotonic() - t0) * 1000)
status["ttfm_us"] = time_us(t0)
status["max_ttfm_us"] = status["ttfm_us"]

masks_us = 0
max_mask_us = 0
num_tokens = 0

for i, test in enumerate(pos_data["tests"]):
instance = json.dumps(test["data"], indent=None)
tokens = tokenizer.encode(instance, add_special_tokens=False)

t1 = time.monotonic()
accepted = True
for tidx, t in enumerate(tokens):
t2 = time.monotonic()
matcher.fill_next_token_bitmask(token_bitmask)
ok = matcher.accept_token(t)
mask_time = time_us(t2)
num_tokens += 1
masks_us += mask_time
all_mask_us.append(mask_time)
if mask_time > max_mask_us:
max_mask_us = mask_time
if not ok:
accepted = False
break

if accepted and not test["valid"]:
status["validation_error"] = f"test #{i}: should reject but didn't"
elif not accepted and test["valid"]:
status["validation_error"] = f"test #{i}: should accept but didn't"
else:
if test["valid"]:
status["num_valid_tests"] += 1
else:
status["num_invalid_tests"] += 1

status["masks_us"] = masks_us
status["max_mask_us"] = max_mask_us
status["num_tokens"] = num_tokens

t1 = time.monotonic()
for i, t in enumerate(tokens):
matcher.fill_next_token_bitmask(token_bitmask)
ok = matcher.accept_token(t)
if not ok:
break
status["accepted_tokens"] = i + 1

status["ok"] = status["accepted_tokens"] == len(tokens)
with open(output_name, "w") as f:
f.write(json.dumps(status, indent=2))
return status


def process_file(file: str):
file_base = file.split("/")[-1]
output_name = f"{output_base}/{file_base}"
if os.path.exists(output_name):
return

print("PROCESSING: " + file, file=sys.stderr)
status = do_process(file)
print("RESULT: " + json.dumps(status), file=sys.stderr)
with open(output_name, "w") as f:
f.write(json.dumps(status, indent=4))


# Get tokenizer info
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id)
# This can be larger than tokenizer.vocab_size due to paddings
full_vocab_size = config.vocab_size
tokenizer_info = xgr.TokenizerInfo.from_huggingface(
tokenizer, vocab_size=full_vocab_size
)
compiler = xgr.GrammarCompiler(tokenizer_info, max_threads=1)

files = []
for arg in sys.argv[1:]:
if arg.endswith(".json"):
files.append(arg)
else:
files.extend(glob.glob(arg + "/*.json"))
print(len(files), file=sys.stderr)
random.shuffle(files)

for f in files:
process_file(f)
def main():
global tokenizer_info, compiler, tokenizer

limit_gb = 32
limit_bytes = limit_gb * 1024 * 1024 * 1024
resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))

# Get tokenizer info
model_id = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id)
# This can be larger than tokenizer.vocab_size due to paddings
full_vocab_size = config.vocab_size
tokenizer_info = xgr.TokenizerInfo.from_huggingface(
tokenizer, vocab_size=full_vocab_size
)
compiler = xgr.GrammarCompiler(tokenizer_info, max_threads=1)

files = []
for arg in sys.argv[1:]:
if arg.endswith(".json"):
files.append(arg)
else:
files.extend(glob.glob(arg + "/*.json"))
print(len(files), file=sys.stderr)
random.shuffle(files)

os.makedirs(output_path, exist_ok=True)

for f in files:
print(f, file=sys.stderr)
process_file(f)

main()

0 comments on commit 3f60753

Please sign in to comment.