Skip to content

Commit

Permalink
Bug 1619554 Improve performance of |mach try fuzzy| preview r=ahal
Browse files Browse the repository at this point in the history
Remove use of requests module in preview pane

Reformat task duration data to avoid reprocessing in preview pane

Avoid loading task durations json more than once.

Increase required fzf version, use temporary file instead of arglist

Differential Revision: https://phabricator.services.mozilla.com/D65094

--HG--
extra : moz-landing-system : lando
  • Loading branch information
Simon Fraser committed Mar 5, 2020
1 parent 01d1683 commit 54f21ea
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 119 deletions.
5 changes: 2 additions & 3 deletions tools/tryselect/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@
from mozboot.util import get_state_dir
from mozbuild.base import MozbuildObject
from mozversioncontrol import get_repository_object, MissingVCSExtension
from .util.estimates import (
duration_summary,
from .util.manage_estimates import (
download_task_history_data,
make_trimmed_taskgraph_cache
)

from .util.estimates import duration_summary

GIT_CINNABAR_NOT_FOUND = """
Could not detect `git-cinnabar`.
Expand Down
14 changes: 7 additions & 7 deletions tools/tryselect/selectors/fuzzy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ..cli import BaseTryParser
from ..tasks import generate_tasks, filter_tasks_by_paths
from ..push import check_working_directory, push_to_try, generate_try_task_config
from ..util.estimates import download_task_history_data, make_trimmed_taskgraph_cache
from ..util.manage_estimates import download_task_history_data, make_trimmed_taskgraph_cache

terminal = Terminal()

Expand Down Expand Up @@ -61,7 +61,7 @@
FZF_VERSION_FAILED = """
Could not obtain the 'fzf' version.
The 'mach try fuzzy' command depends on fzf, and requires version > 0.18.0
The 'mach try fuzzy' command depends on fzf, and requires version > 0.20.0
for some of the features. Please install it following the appropriate
instructions for your platform:
Expand Down Expand Up @@ -202,9 +202,9 @@ def should_force_fzf_update(fzf_bin):
# Some fzf versions have extra, e.g 0.18.0 (ff95134)
fzf_version = fzf_version.split()[0]

# 0.18.0 introduced FZF_PREVIEW_COLUMNS as an env variable
# in preview subprocesses, which is a feature we use.
if StrictVersion(fzf_version) < StrictVersion('0.18.0'):
# 0.20.0 introduced passing selections through a temporary file,
# which is good for large ctrl-a actions.
if StrictVersion(fzf_version) < StrictVersion('0.20.0'):
print("fzf version is old, forcing update.")
return True
return False
Expand Down Expand Up @@ -341,12 +341,12 @@ def run(update=False, query=None, intersect_query=None, try_config=None, full=Fa

if show_estimates:
base_cmd.extend([
'--preview', 'python {} -g {} -s -c {} "{{+}}"'.format(
'--preview', 'python {} -g {} -s -c {} -t "{{+f}}"'.format(
PREVIEW_SCRIPT, dep_cache, cache_dir),
])
else:
base_cmd.extend([
'--preview', 'python {} "{{+}}"'.format(PREVIEW_SCRIPT),
'--preview', 'python {} -t "{{+f}}"'.format(PREVIEW_SCRIPT),
])

if exact:
Expand Down
22 changes: 13 additions & 9 deletions tools/tryselect/selectors/preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,20 @@

here = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(here), 'util'))
from estimates import duration_summary, task_duration_data
from estimates import duration_summary


def process_args():
"""Process preview arguments."""
argparser = argparse.ArgumentParser()
argparser.add_argument('-s', '--show-estimates', action="store_true")
argparser.add_argument('-g', '--graph-cache', type=str, default=None)
argparser.add_argument('-c', '--cache_dir', type=str, default=None)
argparser.add_argument('tasklist', type=str)
argparser.add_argument('-s', '--show-estimates', action="store_true",
help="Show task duration estimates (default: False)")
argparser.add_argument('-g', '--graph-cache', type=str, default=None,
help="Filename of task graph dependencies")
argparser.add_argument('-c', '--cache_dir', type=str, default=None,
help="Path to cache directory containing task durations")
argparser.add_argument('-t', '--tasklist', type=str, default=None,
help="Path to temporary file containing the selected tasks")
return argparser.parse_args()


Expand All @@ -30,9 +34,10 @@ def plain_display(tasklist):
print("\n".join(sorted(s.strip("'") for s in tasklist.split())))


def duration_display(graph_cache_file, tasklist, cache_dir):
def duration_display(graph_cache_file, taskfile, cache_dir):
"""Preview window display with task durations + metadata."""
tasklist = [t.strip("'") for t in tasklist.split()]
with open(taskfile, "r") as f:
tasklist = [line.strip() for line in f]

durations = duration_summary(graph_cache_file, tasklist, cache_dir)
output = ""
Expand All @@ -51,10 +56,9 @@ def duration_display(graph_cache_file, tasklist, cache_dir):
durations["eta_datetime"].strftime("%H:%M"))

duration_width = 5 # show five numbers at most.
task_durations = task_duration_data(cache_dir)
output += "{:>{width}}\n".format("Duration", width=max_columns)
for task in tasklist:
duration = int(task_durations.get(task, 0.0))
duration = durations["task_durations"].get(task, 0.0)
output += "{:{align}{width}} {:{nalign}{nwidth}}s\n".format(
task,
duration,
Expand Down
103 changes: 3 additions & 100 deletions tools/tryselect/util/estimates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,111 +5,14 @@
from __future__ import absolute_import, print_function

import os
import requests
import json
from datetime import datetime, timedelta


TASK_DURATION_URL = 'https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json'
GRAPH_QUANTILES_URL = 'https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv'
TASK_DURATION_CACHE = 'task_duration_history.json'
GRAPH_QUANTILE_CACHE = 'graph_quantile_cache.csv'
TASK_DURATION_TAG_FILE = 'task_duration_tag.json'


def check_downloaded_history(tag_file, duration_cache, quantile_cache):
if not os.path.isfile(tag_file):
return False

try:
with open(tag_file) as f:
duration_tags = json.load(f)
download_date = datetime.strptime(duration_tags.get('download_date'), '%Y-%M-%d')
if download_date < datetime.now() - timedelta(days=30):
return False
except (IOError, ValueError):
return False

if not os.path.isfile(duration_cache):
return False
if not os.path.isfile(quantile_cache):
return False

return True


def download_task_history_data(cache_dir):
"""Fetch task duration data exported from BigQuery."""
task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE)
task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE)
graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE)

if check_downloaded_history(task_duration_tag_file, task_duration_cache, graph_quantile_cache):
return

try:
os.unlink(task_duration_tag_file)
os.unlink(task_duration_cache)
os.unlink(graph_quantile_cache)
except OSError:
print("No existing task history to clean up.")

try:
r = requests.get(TASK_DURATION_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the durations just won't be in the preview window.
print("Error fetching task duration cache from {}: {}".format(TASK_DURATION_URL, exc))
return

# The data retrieved from google storage is a newline-separated
# list of json entries, which Python's json module can't parse.
duration_data = list()
for line in r.content.splitlines():
duration_data.append(json.loads(line))

with open(task_duration_cache, 'w') as f:
json.dump(duration_data, f, indent=4)

try:
r = requests.get(GRAPH_QUANTILES_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the percentile just won't be in the preview window.
print("Error fetching task group percentiles from {}: {}".format(GRAPH_QUANTILES_URL, exc))
return

with open(graph_quantile_cache, 'w') as f:
f.write(r.content)

with open(task_duration_tag_file, 'w') as f:
json.dump({
'download_date': datetime.now().strftime('%Y-%m-%d')
}, f, indent=4)


def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None):
"""Trim the taskgraph cache used for dependencies.
Speeds up the fzf preview window to less human-perceptible
ranges."""
if not os.path.isfile(graph_cache):
return

target_task_set = set()
if target_file:
with open(target_file) as f:
target_task_set = set(json.load(f).keys())

with open(graph_cache) as f:
graph = json.load(f)
graph = {
name: list(defn['dependencies'].values())
for name, defn in graph.items()
if name in target_task_set
}
with open(dep_cache, 'w') as f:
json.dump(graph, f, indent=4)


def find_all_dependencies(graph, tasklist):
all_dependencies = dict()

Expand Down Expand Up @@ -176,8 +79,7 @@ def determine_quantile(quantiles_file, duration):

def task_duration_data(cache_dir):
with open(os.path.join(cache_dir, TASK_DURATION_CACHE)) as f:
durations = json.load(f)
return {d['name']: d['mean_duration_seconds'] for d in durations}
return json.load(f)


def duration_summary(graph_cache_file, tasklist, cache_dir):
Expand Down Expand Up @@ -217,6 +119,7 @@ def duration_summary(graph_cache_file, tasklist, cache_dir):

output["wall_duration_seconds"] = timedelta(seconds=int(longest_path))
output["eta_datetime"] = datetime.now()+timedelta(seconds=longest_path)
# (datetime.now()+timedelta(seconds=longest_path)).strftime("%H:%M")

output["task_durations"] = {task: int(durations.get(task, 0.0)) for task in tasklist}

return output
116 changes: 116 additions & 0 deletions tools/tryselect/util/manage_estimates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function

import os
import requests
import json
from datetime import datetime, timedelta


TASK_DURATION_URL = 'https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json'
GRAPH_QUANTILES_URL = 'https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv'
from .estimates import TASK_DURATION_CACHE, GRAPH_QUANTILE_CACHE, TASK_DURATION_TAG_FILE


def check_downloaded_history(tag_file, duration_cache, quantile_cache):
if not os.path.isfile(tag_file):
return False

try:
with open(tag_file) as f:
duration_tags = json.load(f)
download_date = datetime.strptime(duration_tags.get('download_date'), '%Y-%M-%d')
if download_date < datetime.now() - timedelta(days=7):
return False
except (IOError, ValueError):
return False

if not os.path.isfile(duration_cache):
return False
# Check for old format version of file.
with open(duration_cache) as f:
data = json.load(f)
if isinstance(data, list):
return False
if not os.path.isfile(quantile_cache):
return False

return True


def download_task_history_data(cache_dir):
"""Fetch task duration data exported from BigQuery."""
task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE)
task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE)
graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE)

if check_downloaded_history(task_duration_tag_file, task_duration_cache, graph_quantile_cache):
return

try:
os.unlink(task_duration_tag_file)
os.unlink(task_duration_cache)
os.unlink(graph_quantile_cache)
except OSError:
print("No existing task history to clean up.")

try:
r = requests.get(TASK_DURATION_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the durations just won't be in the preview window.
print("Error fetching task duration cache from {}: {}".format(TASK_DURATION_URL, exc))
return

# The data retrieved from google storage is a newline-separated
# list of json entries, which Python's json module can't parse.
duration_data = list()
for line in r.content.splitlines():
duration_data.append(json.loads(line))

# Reformat duration data to avoid list of dicts, as this is slow in the preview window
duration_data = {d['name']: d['mean_duration_seconds'] for d in duration_data}

with open(task_duration_cache, 'w') as f:
json.dump(duration_data, f, indent=4)

try:
r = requests.get(GRAPH_QUANTILES_URL, stream=True)
except requests.exceptions.RequestException as exc:
# This is fine, the percentile just won't be in the preview window.
print("Error fetching task group percentiles from {}: {}".format(GRAPH_QUANTILES_URL, exc))
return

with open(graph_quantile_cache, 'w') as f:
f.write(r.content)

with open(task_duration_tag_file, 'w') as f:
json.dump({
'download_date': datetime.now().strftime('%Y-%m-%d')
}, f, indent=4)


def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None):
"""Trim the taskgraph cache used for dependencies.
Speeds up the fzf preview window to less human-perceptible
ranges."""
if not os.path.isfile(graph_cache):
return

target_task_set = set()
if target_file:
with open(target_file) as f:
target_task_set = set(json.load(f).keys())

with open(graph_cache) as f:
graph = json.load(f)
graph = {
name: list(defn['dependencies'].values())
for name, defn in graph.items()
if name in target_task_set
}
with open(dep_cache, 'w') as f:
json.dump(graph, f, indent=4)

0 comments on commit 54f21ea

Please sign in to comment.