Skip to content

Commit

Permalink
add more log for dist test for ci test=develop (PaddlePaddle#14813)
Browse files Browse the repository at this point in the history
* add more log for dist test for ci test=develop

* increase deadline test=develop
  • Loading branch information
typhoonzero authored Dec 10, 2018
1 parent 9623b45 commit 554bcdb
Showing 1 changed file with 27 additions and 4 deletions.
31 changes: 27 additions & 4 deletions python/paddle/fluid/tests/unittests/test_dist_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,18 @@ def _run_cluster(self, model, envs, check_error_log):
stderr=tr1_pipe,
env=env1)

# Wait until trainer process terminate
while True:
stat0 = tr0_proc.poll()
time.sleep(0.1)
if stat0 is not None:
break
while True:
stat1 = tr1_proc.poll()
time.sleep(0.1)
if stat1 is not None:
break

tr0_out, tr0_err = tr0_proc.communicate()
tr1_out, tr1_err = tr1_proc.communicate()

Expand All @@ -390,11 +402,21 @@ def _run_cluster(self, model, envs, check_error_log):
ps0.terminate()
ps1.terminate()

# print server log
with open("/tmp/ps0_err.log", "r") as fn:
sys.stderr.write("ps0 stderr: %s\n" % fn.read())
with open("/tmp/ps1_err.log", "r") as fn:
sys.stderr.write("ps1 stderr: %s\n" % fn.read())

# print log
sys.stderr.write('trainer 0 stdout: %s\n' % pickle.loads(tr0_out))
sys.stderr.write('trainer 0 stderr: %s\n' % tr0_err)
sys.stderr.write('trainer 1 stdout: %s\n' % pickle.loads(tr1_out))
sys.stderr.write('trainer 1 stderr: %s\n' % tr1_err)
if stat0 == 0:
sys.stderr.write('trainer 0 stdout: %s\n' % pickle.loads(tr0_out))
with open("/tmp/tr0_err.log", "r") as fn:
sys.stderr.write('trainer 0 stderr: %s\n' % fn.read())
if stat1 == 0:
sys.stderr.write('trainer 1 stdout: %s\n' % pickle.loads(tr1_out))
with open("/tmp/tr1_err.log", "r") as fn:
sys.stderr.write('trainer 1 stderr: %s\n' % fn.read())

return pickle.loads(tr0_out), pickle.loads(tr1_out)

Expand Down Expand Up @@ -474,6 +496,7 @@ def check_with_place(self,
"PYTHONPATH": os.getenv("PYTHONPATH", ""),
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""),
"FLAGS_fraction_of_gpu_memory_to_use": "0.15",
"FLAGS_rpc_deadline": "5000", # 5sec to fail fast
"FLAGS_cudnn_deterministic": "1",
"http_proxy": "",
"NCCL_P2P_DISABLE": "1"
Expand Down

0 comments on commit 554bcdb

Please sign in to comment.