Skip to content

Commit

Permalink
Add chunk iterator for chrome_snapshot_parser
Browse files Browse the repository at this point in the history
Summary:
Most of the parsing of a chrome snapshot involves iterating over
groups of numbers, and converting them into dicts.

Simplify some of this by making a generator that yields things in
chunks. Then use an iterator over that to find edges.

Also, modify the root object in-place, which will leave unvisited
things still displayed in the final snapshot.

Reviewed By: neildhar

Differential Revision: D23788741

fbshipit-source-id: 08edec05c126db5f3e228cdf9ade307d0b596d5f
  • Loading branch information
Riley Dulin authored and facebook-github-bot committed Sep 23, 2020
1 parent a691fbe commit bbc0abb
Showing 1 changed file with 26 additions and 30 deletions.
56 changes: 26 additions & 30 deletions utils/chrome_snapshot_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,28 @@
SAMPLE_FIELDS = ["timestamp_us", "last_assigned_id"]


def chunk(arr, chunk_size):
assert len(arr) % chunk_size == 0, "arr must be evenly divisible by the chunk size"
assert chunk_size >= 1, "chunk_size must be at least 1"
for i in range(0, len(arr), chunk_size):
yield arr[i : i + chunk_size]


def main():
parser = argparse.ArgumentParser()
parser.add_argument("heapsnapshot")
parser.add_argument("out")
args = parser.parse_args()
with open(args.heapsnapshot, "r") as f:
root = json.load(f)
curr_node = 0
curr_edge = 0
nodes = []
while curr_node < len(root["nodes"]):
raw_type, name, id, self_size, edge_count, trace_node_id = root["nodes"][
curr_node : curr_node + len(NODE_FIELDS)
]
curr_edge = iter(chunk(root["edges"], len(EDGE_FIELDS)))
for raw_type, name, id, self_size, edge_count, trace_node_id in chunk(
root["nodes"], len(NODE_FIELDS)
):
edges = []
end_edge = curr_edge + edge_count * len(EDGE_FIELDS)
while curr_edge < end_edge:
raw_edge_type, name_or_index, to_node = root["edges"][
curr_edge : curr_edge + len(EDGE_FIELDS)
]
for _ in range(edge_count):
raw_edge_type, name_or_index, to_node = next(curr_edge)
real_type = EDGE_TYPES[raw_edge_type]
edges.append(
{
Expand All @@ -76,7 +78,6 @@ def main():
"to_node": to_node // len(NODE_FIELDS),
}
)
curr_edge += len(EDGE_FIELDS)
nodes.append(
{
"type": NODE_TYPES[raw_type],
Expand All @@ -87,37 +88,32 @@ def main():
"trace_node_id": trace_node_id,
}
)
curr_node += len(NODE_FIELDS)
del root["edges"]

# Iterate through locations and add the location resolution to nodes
curr_loc = 0
while curr_loc < len(root["locations"]):
object_index, script_id, line, column = root["locations"][
curr_loc : curr_loc + len(LOCATION_FIELDS)
]
for object_index, script_id, line, column in chunk(
root.get("locations", []), len(LOCATION_FIELDS)
):
nodes[object_index // len(NODE_FIELDS)]["location"] = {
"script_id": script_id,
# Line numbers and column numbers are 0-based internally,
# but 1-based when viewed.
"line": line + 1,
"column": column + 1,
}
curr_loc += len(LOCATION_FIELDS)
del root["locations"]

curr_loc = 0
samples = []
source_samples = root.get("samples", [])
while curr_loc < len(source_samples):
timestamp_us, last_assigned_id = source_samples[
curr_loc : curr_loc + len(SAMPLE_FIELDS)
]
samples.append(
{"timestamp": timestamp_us, "last_assigned_id": last_assigned_id}
root["nodes"] = nodes
root["samples"] = [
{"timestamp": timestamp_us, "last_assigned_id": last_assigned_id}
for timestamp_us, last_assigned_id in chunk(
root.get("samples", []), len(SAMPLE_FIELDS)
)
curr_loc += len(SAMPLE_FIELDS)
]

del root["strings"]
with open(args.out, "w") as f:
json.dump({"nodes": nodes, "samples": samples}, f, indent=2)
json.dump(root, f, indent=2)


if __name__ == "__main__":
Expand Down

0 comments on commit bbc0abb

Please sign in to comment.