From 76ab5844dadb6c6dd15ebf854f7dd68fb6e5cbc7 Mon Sep 17 00:00:00 2001 From: Maksym Onufriienko Date: Wed, 13 Mar 2019 23:45:06 +0000 Subject: [PATCH] Possible timeout fix that happens because of hanging readline method. Wait for 3 mins for a new line in output and kill proc(xcodebuild) if no new line appears. We've already tested threading.Timer in fyi/ios12-sdk-simulator and builds did not time out for last 10 days. Bug: 898549 Change-Id: If9789282bdf554aef12180c516ff278fa43ab751 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1475156 Reviewed-by: John Budorick Reviewed-by: Justin Cohen Commit-Queue: Maksym Onufriienko Cr-Commit-Position: refs/heads/master@{#640541} --- ios/build/bots/scripts/run.py | 2 +- ios/build/bots/scripts/test_runner.py | 114 ++++++++++++++------ ios/build/bots/scripts/xcodebuild_runner.py | 68 ++++++------ 3 files changed, 120 insertions(+), 64 deletions(-) diff --git a/ios/build/bots/scripts/run.py b/ios/build/bots/scripts/run.py index 142b994699be4b..33450d211c9f24 100755 --- a/ios/build/bots/scripts/run.py +++ b/ios/build/bots/scripts/run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env vpython # Copyright 2016 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. diff --git a/ios/build/bots/scripts/test_runner.py b/ios/build/bots/scripts/test_runner.py index b34c5fb016b959..0e832685e3466f 100644 --- a/ios/build/bots/scripts/test_runner.py +++ b/ios/build/bots/scripts/test_runner.py @@ -4,29 +4,32 @@ """Test runners for iOS.""" -import collections import errno +import signal +import sys + +import collections import glob import json import logging +from multiprocessing import pool import os import plistlib +import psutil import re import shutil -import signal import subprocess -import sys import tempfile +import threading import time -from multiprocessing import pool - import gtest_utils import xctest_utils - LOGGER = logging.getLogger(__name__) DERIVED_DATA = os.path.expanduser('~/Library/Developer/Xcode/DerivedData') +READLINE_TIMEOUT = 180 + class Error(Exception): """Base class for errors.""" @@ -144,6 +147,76 @@ def __init__(self): 'Sharding has not been implemented!') +def terminate_process(proc): + """Terminates the process. + + If an error occurs ignore it, just print out a message. + + Args: + proc: A subprocess to terminate. + """ + try: + LOGGER.info('Killing hung process %s' % proc.pid) + proc.terminate() + attempts_to_kill = 3 + ps = psutil.Process(proc.pid) + for _ in range(attempts_to_kill): + # Check whether proc.pid process is still alive. + if ps.is_running(): + LOGGER.info( + 'Process iossim is still alive! Xcodebuild process might block it.') + xcodebuild_processes = [ + p for p in psutil.process_iter() if 'xcodebuild' == p.info['name']] + if not xcodebuild_processes: + LOGGER.debug('There are no running xcodebuild processes.') + break + LOGGER.debug('List of running xcodebuild processes:', + xcodebuild_processes) + # Killing xcodebuild processes + for p in xcodebuild_processes: + p.send_signal(signal.SIGKILL) + psutil.wait_procs(xcodebuild_processes) + else: + LOGGER.info('Process was killed!') + break + except OSError as ex: + LOGGER.info('Error while killing a process: %s' % ex) + + +def print_process_output(proc, parser, timeout=READLINE_TIMEOUT): + """Logs process messages in console and waits until process is done. + + Method waits until no output message and if no message for timeout seconds, + process will be terminated. + + Args: + proc: A running process. + Parser: A parser. + timeout: Timeout(in seconds) to subprocess.stdout.readline method. + """ + out = [] + while True: + # subprocess.stdout.readline() might be stuck from time to time + # and tests fail because of TIMEOUT. + # Try to fix the issue by adding timer-thread for `timeout` seconds + # that will kill `frozen` running process if no new line is read + # and will finish test attempt. + # If new line appears in timeout, just cancel timer. + timer = threading.Timer(timeout, terminate_process, [proc]) + timer.start() + line = proc.stdout.readline() + timer.cancel() + if not line: + break + line = line.rstrip() + out.append(line) + parser.ProcessLine(line) + LOGGER.info(line) + sys.stdout.flush() + LOGGER.debug('Finished print_process_output.') + return out + + def get_kif_test_filter(tests, invert=False): """Returns the KIF test filter to filter the given test cases. @@ -528,16 +601,8 @@ def _run(self, cmd, shards=1): stderr=subprocess.STDOUT, ) old_handler = self.set_sigterm_handler( - lambda _signum, _frame: self.handle_sigterm(proc)) - - while True: - line = proc.stdout.readline() - if not line: - break - line = line.rstrip() - parser.ProcessLine(line) - LOGGER.info(line) - sys.stdout.flush() + lambda _signum, _frame: self.handle_sigterm(proc)) + print_process_output(proc, parser) LOGGER.info('Waiting for test process to terminate.') proc.wait() @@ -883,13 +948,7 @@ def run_tests(self, test_shard=None): stderr=subprocess.STDOUT, ) - out = [] - while True: - line = proc.stdout.readline() - if not line: - break - out.append(line.rstrip()) - + out = print_process_output(proc, xctest_utils.XCTestLogParser()) self.deleteSimulator(udid) return (out, udid, proc.returncode) @@ -1167,14 +1226,7 @@ def run_wpr_test(self, udid, recipe_path, replay_path): else: parser = gtest_utils.GTestLogParser() - while True: - line = proc.stdout.readline() - if not line: - break - line = line.rstrip() - parser.ProcessLine(line) - LOGGER.info(line) - sys.stdout.flush() + print_process_output(proc, parser) proc.wait() self.set_sigterm_handler(old_handler) diff --git a/ios/build/bots/scripts/xcodebuild_runner.py b/ios/build/bots/scripts/xcodebuild_runner.py index 6e4760be334abb..251fa418b4f0df 100644 --- a/ios/build/bots/scripts/xcodebuild_runner.py +++ b/ios/build/bots/scripts/xcodebuild_runner.py @@ -21,7 +21,6 @@ import test_runner LOGGER = logging.getLogger(__name__) -READLINE_TIMEOUT = 300 class LaunchCommandCreationError(test_runner.TestRunnerError): @@ -378,23 +377,24 @@ def launch_attempt(self, cmd, out_dir): while True: # It seems that subprocess.stdout.readline() is stuck from time to time # and tests fail because of TIMEOUT. - # Try to fix the issue by adding timer-thread for 5 mins + # Try to fix the issue by adding timer-thread for 3 mins # that will kill `frozen` running process if no new line is read # and will finish test attempt. - # If new line appears in 5 mins, just cancel timer. - timer = threading.Timer(READLINE_TIMEOUT, terminate_process, [proc]) + # If new line appears in 3 mins, just cancel timer. + timer = threading.Timer(test_runner.READLINE_TIMEOUT, + terminate_process, [proc]) timer.start() line = proc.stdout.readline() timer.cancel() if not line: break line = line.rstrip() - print line + LOGGER.info(line) output.append(line) sys.stdout.flush() proc.wait() - print 'Command %s finished with %d' % (cmd, proc.returncode) + LOGGER.info('Command %s finished with %d' % (cmd, proc.returncode)) return proc.returncode, output def launch(self): @@ -426,8 +426,8 @@ def launch(self): env_vars=self.egtests_app.env_vars) # TODO(crbug.com/914878): add heartbeat logging to xcodebuild_runner. - print 'Start test attempt #%d for command [%s]' % ( - attempt, ' '.join(cmd_list)) + LOGGER.info('Start test attempt #%d for command [%s]' % ( + attempt, ' '.join(cmd_list))) _, output = self.launch_attempt(cmd_list, outdir_attempt) self.test_results['attempts'].append( collect_test_results(os.path.join(outdir_attempt, 'Info.plist'), @@ -607,30 +607,34 @@ def get_launch_env(self): def launch(self): """Launches tests using xcodebuild.""" - destinaion_folder = lambda dest: dest.replace( - 'platform=iOS Simulator,', '').replace(',name=', ' ').replace('OS=', '') - launch_commands = [] - for params in self.sharding_data: - launch_commands.append(LaunchCommand( - EgtestsApp(params['app'], filtered_tests=params['test_cases'], - env_vars=self.env_vars, test_args=self.test_args), - params['destination'], - shards=params['shards'], - retries=self.retries, - out_dir=os.path.join(self.out_dir, - destinaion_folder(params['destination'])), - env=self.get_launch_env())) - - pool = multiprocessing.pool.ThreadPool(len(launch_commands)) - self.test_results['commands'] = [] - for result in pool.imap_unordered(LaunchCommand.launch, launch_commands): - self.logs[' '.join(result['cmd'])] = result['test_results'] - self.test_results['commands'].append( - {'cmd': ' '.join(result['cmd']), 'logs': result['logs']}) - self.test_results['end_run'] = int(time.time()) - LOGGER.debug('Test ended.') - # Test is failed if there are failures for the last run. - return not self.test_results['commands'][-1]['logs']['failed'] + try: + destinaion_folder = lambda dest: dest.replace( + 'platform=iOS Simulator,', '').replace( + ',name=', ' ').replace('OS=', '') + launch_commands = [] + for params in self.sharding_data: + launch_commands.append(LaunchCommand( + EgtestsApp(params['app'], filtered_tests=params['test_cases'], + env_vars=self.env_vars, test_args=self.test_args), + params['destination'], + shards=params['shards'], + retries=self.retries, + out_dir=os.path.join(self.out_dir, + destinaion_folder(params['destination'])), + env=self.get_launch_env())) + + pool = multiprocessing.pool.ThreadPool(len(launch_commands)) + self.test_results['commands'] = [] + for result in pool.imap_unordered(LaunchCommand.launch, launch_commands): + self.logs[' '.join(result['cmd'])] = result['test_results'] + self.test_results['commands'].append( + {'cmd': ' '.join(result['cmd']), 'logs': result['logs']}) + self.test_results['end_run'] = int(time.time()) + LOGGER.debug('Test ended.') + # Test is failed if there are failures for the last run. + return not self.test_results['commands'][-1]['logs']['failed'] + finally: + self.tear_down() def erase_all_simulators(self): """Erases all simulator devices.