Skip to content

Commit

Permalink
Search alternative move on root
Browse files Browse the repository at this point in the history
  • Loading branch information
k-matsuzaki authored and zakki committed Apr 28, 2018
1 parent b71f749 commit 56e673b
Showing 1 changed file with 65 additions and 0 deletions.
65 changes: 65 additions & 0 deletions src/UctSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,8 @@ SelectMaxUcbChild( const game_info_t *game, int current, int color )
double dynamic_parameter;
rate_order_t order[PURE_BOARD_MAX + 1];
int width;
double child_ucb[UCT_CHILD_MAX];
double child_lcb[UCT_CHILD_MAX];
double ucb_bonus_weight = bonus_weight * sqrt(bonus_equivalence / (sum + bonus_equivalence));
const bool debug = current == current_root && sum % 10000 == 0 && GetDebugMessageMode();

Expand Down Expand Up @@ -1921,6 +1923,10 @@ SelectMaxUcbChild( const game_info_t *game, int current, int color )
start_child = 1;
}
}

int max_move_count = 0;
int max_move_child = 0;

// UCB値最大の手を求める
for (int i = start_child; i < child_num; i++) {
if (uct_child[i].flag || uct_child[i].open) {
Expand All @@ -1946,6 +1952,7 @@ SelectMaxUcbChild( const game_info_t *game, int current, int color )
#endif
double win = uct_child[i].win;
double move_count = uct_child[i].move_count;
double ucb_value, lcb_value;

if (evaled) {
if (debug && move_count > 0) {
Expand Down Expand Up @@ -1977,13 +1984,15 @@ SelectMaxUcbChild( const game_info_t *game, int current, int color )
double u = sqrt(sum) / (1 + uct_child[i].move_count);
double rate = uct_child[i].nnrate;
ucb_value = p + c_puct * u * rate;
lcb_value = p - c_puct * u * rate;

if (debug && move_count > 0) {
cerr << " P:" << p << " UCB:" << ucb_value << endl;
}
} else {
if (uct_child[i].move_count == 0) {
ucb_value = FPU;
lcb_value = FPU;
} else {
double div, v;
// UCB1-TUNED value
Expand All @@ -1992,17 +2001,73 @@ SelectMaxUcbChild( const game_info_t *game, int current, int color )
div = log(sum) / uct_child[i].move_count;
v = p - p * p + sqrt(2.0 * div);
ucb_value = p + sqrt(div * ((0.25 < v) ? 0.25 : v));
lcb_value = p - sqrt(div * ((0.25 < v) ? 0.25 : v));

// UCB Bonus
ucb_value += ucb_bonus_weight * uct_child[i].rate;
lcb_value += ucb_bonus_weight * uct_child[i].rate;
}
}

child_ucb[i] = ucb_value;
child_lcb[i] = lcb_value;

if (ucb_value > max_value) {
max_value = ucb_value;
max_child = i;
}
if (uct_child[i].move_count > max_move_count) {
max_move_count = uct_child[i].move_count;
max_move_child = i;
}
}
}

if (current == current_root && max_child == max_move_child) {
double next_ucb = child_lcb[max_child];
int next_child = max_child;
for (int i = 0; i < child_num; i++) {
if (max_child == i)
continue;
if (uct_child[i].flag || uct_child[i].open) {
if (child_ucb[i] > next_ucb) {
next_ucb = child_ucb[i];
next_child = i;
}
}
}
if (max_child != next_child
&& uct_child[max_child].move_count > uct_child[next_child].move_count * 1.2) {
//cerr << "Replace " << FormatMove(uct_child[max_child].pos) << " -> " << FormatMove(uct_child[next_child].pos) << endl;
max_child = next_child;
}
}

static ray_clock::time_point previous_time = ray_clock::now();
static mutex mutex_log;
if (current == current_root && sum > 0) {
mutex_log.lock();
if (GetSpendTime(previous_time) > 1.0) {
for (int i = 0; i < child_num; i++) {
if (i > 0 && !uct_child[i].flag && !uct_child[i].open)
continue;
double win = uct_child[i].win;
double move_count = uct_child[i].move_count;
double p0 = win / move_count;

cerr << "|" << setw(4) << FormatMove(uct_child[i].pos);
cerr << "|" << setw(5) << (int) move_count;
auto precision = cerr.precision();
cerr.precision(4);
cerr << "|" << setw(10) << fixed << (p0 * 100);
cerr << "|" << setw(10) << fixed << (child_ucb[i] * 100);
cerr << "|" << setw(10) << fixed << (child_lcb[i] * 100);
cerr << endl;
cerr.precision(precision);
}
previous_time = ray_clock::now();
}
mutex_log.unlock();
}

return max_child;
Expand Down

0 comments on commit 56e673b

Please sign in to comment.