Skip to content

Commit

Permalink
Merge pull request pytorch#72 from jma127/master
Browse files Browse the repository at this point in the history
Cleanup and bugfixing for scripts
  • Loading branch information
jma127 authored Jul 26, 2018
2 parents da2f20c + f6f4225 commit beaaf37
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 65 deletions.
41 changes: 23 additions & 18 deletions scripts/elfgames/go/df_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,21 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Console for DarkForest

import os
from rlpytorch import Evaluator, load_env
import sys

import torch

from console_lib import GoConsoleGTP
from rlpytorch import Evaluator, load_env


if __name__ == '__main__':
def main():
print('Python version:', sys.version)
print('PyTorch version:', torch.__version__)
print('CUDA version', torch.version.cuda)
print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

additional_to_load = {
'evaluator': (
Evaluator.get_option_spec(),
Expand All @@ -23,13 +30,13 @@
# Set game to online model.
env = load_env(
os.environ,
overrides=dict(
num_games=1,
greedy=True,
T=1,
model="online",
additional_labels=['aug_code', 'move_idx'],
),
overrides={
'num_games': 1,
'greedy': True,
'T': 1,
'model': 'online',
'additional_labels': ['aug_code', 'move_idx'],
},
additional_to_load=additional_to_load)

evaluator = env['evaluator']
Expand All @@ -39,14 +46,8 @@
model_loader = env["model_loaders"][0]
model = model_loader.load_model(GC.params)

gpu = model_loader.options.gpu
use_gpu = gpu is not None and gpu >= 0

mi = env['mi']
mi.add_model("model", model)
# mi.add_model(
# "actor", model,
# copy=True, cuda=use_gpu, gpu_id=gpu)
mi.add_model("actor", model)
mi["model"].eval()
mi["actor"].eval()
Expand All @@ -69,7 +70,7 @@ def train(batch):
GC.reg_callback_if_exists("train", train)

GC.start()
GC.GC.setRequest(
GC.GC.getClient().setRequest(
mi["actor"].step, -1, env['game'].options.resign_thres, -1)

evaluator.episode_start(0)
Expand All @@ -79,3 +80,7 @@ def train(batch):
if console.exit:
break
GC.stop()


if __name__ == '__main__':
main()
68 changes: 33 additions & 35 deletions scripts/elfgames/go/selfplay.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
# LICENSE file in the root directory of this source tree.

import os
import sys
import time
import re
from datetime import datetime

import torch

from rlpytorch import \
Evaluator, load_env, ModelInterface

Expand All @@ -23,17 +26,19 @@ def __init__(self):
self.actor_count = 0

def feed(self, batch):
# print("batchsize: %d" % batch.batchsize)
self.total_sel_batchsize += batch.batchsize
self.total_batchsize += batch.max_batchsize
self.actor_count += 1

if self.total_sel_batchsize >= 500000:
print(datetime.now())
print("Batch usage: %d/%d (%.2f%%)" %
(self.total_sel_batchsize, self.total_batchsize,
100.0 * self.total_sel_batchsize / self.total_batchsize))
wr = batch.GC.getGameStats().getWinRateStats()

batch_usage = self.total_sel_batchsize / self.total_batchsize
print(f'Batch usage: '
f'{self.total_sel_batchsize}/{self.total_batchsize} '
f'({100.0 * batch_usage:.2f}%)')

wr = batch.GC.getClient().getGameStats().getWinRateStats()
win_rate = (100.0 * wr.black_wins / wr.total_games
if wr.total_games > 0
else 0.0)
Expand All @@ -42,7 +47,7 @@ def feed(self, batch):

self.total_sel_batchsize = 0
self.total_batchsize = 0
print("Actor Count: %d" % self.actor_count)
print('Actor count:', self.actor_count)


name_matcher = re.compile(r"save-(\d+)")
Expand All @@ -60,15 +65,13 @@ def reload_model(model_loader, params, mi, actor_name, args):
if actor_name not in mi:
mi.add_model(actor_name, model, cuda=(args.gpu >= 0), gpu_id=args.gpu)
else:
# mi["actor"].load(
# real_path, replace_prefix = [("resnet.module", "resnet")])
mi.update_model(actor_name, model)
mi[actor_name].eval()


def reload(mi, model_loader, params, args, root, ver, actor_name):
if model_loader.options.load is None or model_loader.options.load == "":
print("No previous model loaded, loading form " + root)
print('No previous model loaded, loading from', root)
real_path = os.path.join(root, "save-" + str(ver) + ".bin")
else:
this_root = os.path.dirname(model_loader.options.load)
Expand All @@ -78,10 +81,15 @@ def reload(mi, model_loader, params, args, root, ver, actor_name):
model_loader.options.load = real_path
reload_model(model_loader, params, mi, actor_name, args)
else:
print("Warning! Same model, skip loading " + real_path)
print('Warning! Same model, skip loading', real_path)


def main():
print('Python version:', sys.version)
print('PyTorch version:', torch.__version__)
print('CUDA version', torch.version.cuda)
print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

# Set game to online model.
actors = ["actor_black", "actor_white"]
additional_to_load = {
Expand All @@ -99,36 +107,32 @@ def main():
})

env = load_env(
os.environ, num_models=2, overrides=dict(actor_only=True),
os.environ, num_models=2, overrides={'actor_only': True},
additional_to_load=additional_to_load)

GC = env["game"].initialize()

stats = [Stats(), Stats()]

# for actor_name, stat, model_loader, e in \
# zip(actors, stats, env["model_loaders"], evaluators):
for i in range(len(actors)):
actor_name = actors[i]
stat = stats[i]
e = env["eval_" + actor_name]

print("register " + actor_name + " for e = " + str(e))
print(f'register {actor_name} for e = {e!s}')
e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

def actor(batch, e, stat):
reply = e.actor(batch)
stat.feed(batch)
# eval_iters.stats.feed_batch(batch)
return reply

GC.reg_callback(actor_name,
lambda batch, e=e, stat=stat: actor(batch, e, stat))

root = os.environ.get("root", "./")
print("Root: \"%s\"" % root)
print(f'Root: "{root}"')
args = env["game"].options
global loop_end
loop_end = False

def game_start(batch):
Expand All @@ -152,28 +156,22 @@ def game_start(batch):
time.sleep(10)

def game_end(batch):
global loop_end
# print("In game end")
wr = batch.GC.getGameStats().getWinRateStats()
win_rate = 100.0 * wr.black_wins / wr.total_games \
if wr.total_games > 0 else 0.0
print("%s B/W: %d/%d. Black winrate: %.2f (%d)" %
(str(datetime.now()), wr.black_wins, wr.white_wins,
win_rate, wr.total_games))
nonlocal loop_end
wr = batch.GC.getClient().getGameStats().getWinRateStats()
win_rate = (100.0 * wr.black_wins / wr.total_games
if wr.total_games > 0 else 0.0)
print(f'{datetime.now()!s} B/W: {wr.black_wins}/{wr.white_wins}.'
f'Black winrate: {win_rate:.2f} ({wr.total_games})')

if args.suicide_after_n_games > 0 and \
wr.total_games >= args.suicide_after_n_games:
print("#suicide_after_n_games: %d, total_games: %d" %
(args.suicide_after_n_games, wr.total_games))
wr.total_games >= args.suicide_after_n_games:
print(f'#suicide_after_n_games: {args.suicide_after_n_games}, '
f'total_games: {wr.total_games}')
loop_end = True

GC.reg_callback_if_exists("game_start", game_start)
GC.reg_callback_if_exists("game_end", game_end)

# def episode_start(i):
# global GC
# GC.GC.setSelfplayCount(10000)
# evaluator.episode_start(i)

GC.start()
if args.eval_model_pair:
if args.eval_model_pair.find(",") >= 0:
Expand All @@ -188,8 +186,8 @@ def game_end(batch):
env["mi_" + actor_name], actor_name, args)

# We just use one thread to do selfplay.
GC.GC.setRequest(int(black), int(white),
env['game'].options.resign_thres, 1)
GC.GC.getClient().setRequest(
int(black), int(white), env['game'].options.resign_thres, 1)

for actor_name in actors:
env["eval_" + actor_name].episode_start(0)
Expand Down
37 changes: 25 additions & 12 deletions scripts/elfgames/go/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,24 @@
# LICENSE file in the root directory of this source tree.

import os
import sys
import re
import time

import torch

from rlpytorch import load_env, SingleProcessRun, Trainer


matcher = re.compile(r"save-(\d+).bin")

if __name__ == '__main__':

def main():
print('Python version:', sys.version)
print('PyTorch version:', torch.__version__)
print('CUDA version', torch.version.cuda)
print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

additional_to_load = {
'trainer': (
Trainer.get_option_spec(),
Expand Down Expand Up @@ -48,17 +58,16 @@
eval_old_model = env["game"].options.eval_old_model

if eval_old_model >= 0:
GC.GC.setEvalMode(model_ver, eval_old_model)
GC.GC.getServer().setEvalMode(model_ver, eval_old_model)
else:
GC.GC.setInitialVersion(model_ver)
GC.GC.getServer().setInitialVersion(model_ver)

selfplay_ver = model_ver
root = os.environ["save"]
print("Root: " + root)
print("Keep prev_selfplay: " + str(keep_prev_selfplay))
print(f'Root: "{root}"')
print(f'Keep prev_selfplay: {keep_prev_selfplay!s}')

def train(batch, *args, **kwargs):
global trainer, selfplay_ver, keep_prev_selfplay, runner
# Check whether the version match.
if keep_prev_selfplay or \
(batch["selfplay_ver"] != selfplay_ver).sum() == 0:
Expand All @@ -69,12 +78,12 @@ def train(batch, *args, **kwargs):
runner.inc_episode_counter(-1)

def train_ctrl(batch, *args, **kwargs):
global selfplay_ver, env, model_loader, GC, root, trainer
nonlocal selfplay_ver
old_selfplay_ver = selfplay_ver
selfplay_ver = int(batch["selfplay_ver"][0])
print(
f'Train ctrl: selfplay_ver: {old_selfplay_ver} -> {selfplay_ver}')
GC.GC.waitForSufficientSelfplay(selfplay_ver)
GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver)

# Reload old models.
real_path = os.path.join(root, "save-" + str(selfplay_ver) + ".bin")
Expand Down Expand Up @@ -111,21 +120,25 @@ def train_ctrl(batch, *args, **kwargs):
rl_method=env["method"])

def episode_summary(i):
global GC, selfplay_ver
nonlocal selfplay_ver
ver = trainer.episode_summary(i)
# This might block (when evaluation does not catch up with training).
GC.GC.notifyNewVersion(selfplay_ver, ver)
GC.GC.getServer().notifyNewVersion(selfplay_ver, ver)

offline_training = (env["game"].options.mode == "offline_train")

def after_start():
global selfplay_ver, offline_training
nonlocal selfplay_ver
if not offline_training:
print("About to wait for sufficient selfplay")
GC.GC.waitForSufficientSelfplay(selfplay_ver)
GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver)

runner.setup(GC, after_start=after_start,
episode_summary=episode_summary,
episode_start=trainer.episode_start)

runner.run()


if __name__ == '__main__':
main()

0 comments on commit beaaf37

Please sign in to comment.