Skip to content

Commit

Permalink
adding baselines
Browse files Browse the repository at this point in the history
  • Loading branch information
qcappart committed May 29, 2020
1 parent 15055e1 commit 929a0af
Show file tree
Hide file tree
Showing 11 changed files with 590 additions and 131 deletions.
97 changes: 97 additions & 0 deletions benchmarking/portfolio.bmk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/bin/bash

function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }

seed=$1
size=$2
time=$3

time_sec=$((time/1000))

capacity_ratio=0.5
lambda_1=1
lambda_2=5
lambda_3=5
lambda_4=5
discrete_coeff=0
beam_size=16

echo "[DQN]"
timeout $time_sec python src/problem/portfolio/baseline/dqn_solving.py --n_item=$size \
--lambda_1=$lambda_1 \
--lambda_2=$lambda_2 \
--lambda_3=$lambda_3 \
--lambda_4=$lambda_4 \
--discrete_coeff=$discrete_coeff \
--seed=$seed

echo "------------------------------------------------------------------------"

echo "[PPO]"
timeout $time_sec python src/problem/portfolio/baseline/ppo_solving.py --n_item=$size \
--lambda_1=$lambda_1 \
--lambda_2=$lambda_2 \
--lambda_3=$lambda_3 \
--lambda_4=$lambda_4 \
--discrete_coeff=$discrete_coeff \
--seed=$seed \
--beam_size=$beam_size


echo "------------------------------------------------------------------------"

echo "[BaB-DQN]"

./solver_portfolio --model=rl-bab-dqn \
--time=$time \
--size=$size \
--capacity_ratio=0.5 \
--lambda_1=1 \
--lambda_2=5 \
--lambda_3=5 \
--lambda_4=5 \
--discrete_coeffs=0 \
--cache=1 \
--seed=$seed



echo "------------------------------------------------------------------------"

echo "[ILDS-DQN]"

./solver_portfolio --model=rl-ilds-dqn \
--time=$time \
--size=$size \
--capacity_ratio=0.5 \
--lambda_1=1 \
--lambda_2=5 \
--lambda_3=5 \
--lambda_4=5 \
--discrete_coeffs=0 \
--cache=1 \
--d_l=5000 \
--seed=$seed


echo "------------------------------------------------------------------------"

echo "[RBS-PPO]"

./solver_portfolio --model=rl-rbs-ppo \
--time=$time \
--size=$size \
--capacity_ratio=0.5 \
--lambda_1=1 \
--lambda_2=5 \
--lambda_3=5 \
--lambda_4=5 \
--discrete_coeffs=0 \
--cache=1 \
--luby=1 \
--temperature=1 \
--seed=$seed




75 changes: 75 additions & 0 deletions benchmarking/tsptw_bmk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash

function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }

seed=$1
size=$2
time=$3
time_sec=$((time/1000))
grid_size=100
max_tw_size=100
max_tw_gap=10
beam_size=16

echo "[DQN]"
timeout $time_sec python src/problem/tsptw/baseline/dqn_solving.py --n_city=$size \
--grid_size=$grid_size \
--max_tw_size=$max_tw_size \
--max_tw_gap=$max_tw_gap \
--seed=$seed
echo "------------------------------------------------------------------------"

echo "[PPO]"
timeout $time_sec python src/problem/tsptw/baseline/ppo_solving.py --n_city=$size \
--grid_size=$grid_size \
--max_tw_size=$max_tw_size \
--max_tw_gap=$max_tw_gap \
--seed=$seed \
--beam_size=$beam_size
echo "------------------------------------------------------------------------"

echo "[CP-nearest]"
./solver_tsptw --model=nearest \
--time=$time \
--size=$size \
--grid_size=$grid_size \
--max_tw_size=$max_tw_size \
--max_tw_gap=$max_tw_gap \
--d_l=5000 \
--cache=1 \
--seed=$seed
echo "------------------------------------------------------------------------"
echo "[BaB-DQN]"
./solver_tsptw --model=rl-bab-dqn \
--time=$time \
--size=$size \
--grid_size=$grid_size \
--max_tw_size=$max_tw_size \
--max_tw_gap=$max_tw_gap \
--cache=1 \
--seed=$seed
echo "------------------------------------------------------------------------"
echo "[ILDS-DQN]"
./solver_tsptw --model=rl-ilds-dqn \
--time=$time \
--size=$size \
--grid_size=$grid_size \
--max_tw_size=$max_tw_size \
--max_tw_gap=$max_tw_gap \
--d_l=5000 \
--cache=1 \
--seed=$seed
echo "------------------------------------------------------------------------"
echo "[RBS-PPO]"
./solver_tsptw --model=rl-rbs-ppo \
--time=$time \
--size=$size \
--grid_size=$grid_size \
--max_tw_size=$max_tw_size \
--max_tw_gap=$max_tw_gap \
--luby=128 \
--temperature=20 \
--cache=1 \
--seed=$seed


Binary file not shown.
Empty file.
74 changes: 74 additions & 0 deletions src/problem/portfolio/baseline/dqn_solving.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import sys
import os
import numpy as np
import argparse
import torch

sys.path.append(os.path.join(sys.path[0],'..','..','..','..'))

from src.problem.portfolio.solving.solver_binding import SolverBinding
from src.problem.portfolio.environment.environment import Environment

def parse_arguments():
parser = argparse.ArgumentParser()

# Instances parameters
parser.add_argument('--n_item', type=int, default=10)
parser.add_argument('--capacity_ratio', type=float, default=0.5)
parser.add_argument('--lambda_1', type=int, default=1)
parser.add_argument('--lambda_2', type=int, default=5)
parser.add_argument('--lambda_3', type=int, default=5)
parser.add_argument('--lambda_4', type=int, default=5)
parser.add_argument('--discrete_coeff', type=int, default=0)
parser.add_argument('--seed', type=int, default=1)

return parser.parse_args()

if __name__ == '__main__':

args = parse_arguments()

sys.stdout.flush()
rl_algorithm = "dqn"

load_folder = "./selected-models/dqn/portfolio/n-item-%d/capacity-ratio-%.1f/moment-factors-%d-%d-%d-%d" % \
(args.n_item, args.capacity_ratio, args.lambda_1, args.lambda_2, args.lambda_3, args.lambda_4)

solver_binding = SolverBinding(load_folder, args.n_item, args.capacity_ratio,
args.lambda_1, args.lambda_2, args.lambda_3, args.lambda_4,
args.discrete_coeff, args.seed, rl_algorithm)


env = Environment(solver_binding.instance, solver_binding.n_feat, 1)

cur_state = env.get_initial_environment()

solution = []
total_profit = 0

while True:

nn_input = env.make_nn_input(cur_state, 'cpu')

avail = env.get_valid_actions(cur_state)

nn_input = nn_input.unsqueeze(0)
available = avail.astype(bool)

with torch.no_grad():
res = solver_binding.model(nn_input)

out = res.cpu().numpy().squeeze(0)

action_idx = np.argmax(out[available])

action = np.arange(len(out))[available][action_idx]

cur_state, reward = env.get_next_state_with_reward(cur_state, action)
solution.append(action)
total_profit += reward
if cur_state.is_done():
break

print("ITEMS INSERTED:", solution)
print("BEST SOLUTION:", total_profit)
114 changes: 114 additions & 0 deletions src/problem/portfolio/baseline/ppo_solving.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import sys
import os
import numpy as np
import argparse
import torch

sys.path.append(os.path.join(sys.path[0],'..','..','..','..'))

from src.problem.portfolio.solving.solver_binding import SolverBinding
from src.problem.portfolio.environment.environment import Environment

def parse_arguments():
parser = argparse.ArgumentParser()

# Instances parameters
parser.add_argument('--n_item', type=int, default=10)
parser.add_argument('--capacity_ratio', type=float, default=0.5)
parser.add_argument('--lambda_1', type=int, default=1)
parser.add_argument('--lambda_2', type=int, default=5)
parser.add_argument('--lambda_3', type=int, default=5)
parser.add_argument('--lambda_4', type=int, default=5)
parser.add_argument('--discrete_coeff', type=int, default=0)
parser.add_argument('--seed', type=int, default=1)
parser.add_argument('--beam_size', type=int, default=4)
return parser.parse_args()

if __name__ == '__main__':

args = parse_arguments()

sys.stdout.flush()
rl_algorithm = "ppo"

load_folder = "./selected-models/ppo/portfolio/n-item-%d/capacity-ratio-%.1f/moment-factors-%d-%d-%d-%d" % \
(args.n_item, args.capacity_ratio, args.lambda_1, args.lambda_2, args.lambda_3, args.lambda_4)

solver_binding = SolverBinding(load_folder, args.n_item, args.capacity_ratio,
args.lambda_1, args.lambda_2, args.lambda_3, args.lambda_4,
args.discrete_coeff, args.seed, rl_algorithm)


env = Environment(solver_binding.instance, solver_binding.n_feat, 1)

cur_state = env.get_initial_environment()

sequences = [[list(), cur_state, 1.0]]

for _ in range(args.n_item):

all_candidates = list()
for i in range(len(sequences)):

seq, state, score = sequences[i]

if state is not None:
state_feats = env.make_nn_input(state, "cpu")
avail = env.get_valid_actions(state)
available_tensor = torch.FloatTensor(avail)
with torch.no_grad():
batched_set = state_feats.unsqueeze(0)
out = solver_binding.model(batched_set)
action_probs = out.squeeze(0)

action_probs = action_probs + torch.abs(torch.min(action_probs))
action_probs = action_probs - torch.max(action_probs * available_tensor)
action_probs = solver_binding.actor_critic_network.masked_softmax(action_probs, available_tensor, dim=0,
temperature=1)
action_probs = action_probs.detach()

for j in range(2):

if state is not None and action_probs[j] > 10e-30:
next_state, reward = env.get_next_state_with_reward(state, j)
candidate = [seq + [j], next_state, score * action_probs[j].detach()]
all_candidates.append(candidate)
else:
candidate = [seq + [-1], None, 0]
all_candidates.append(candidate)
# order all candidates by score
ordered = sorted(all_candidates, key=lambda tup: -tup[2])
# select k best
sequences = ordered[:args.beam_size]

bs_tour = [x for (x, y, z) in sequences]
bs_tour = filter(lambda x: not -1 in x, bs_tour)
bs_tour = list(bs_tour)
best_profit = -1000000
best_sol = []
for k in range(len(bs_tour)):
# print(bs_tour[k])

if not args.discrete_coeff:
tot_mean = sum([a * b for a, b in zip(bs_tour[k], env.instance.means)])
tot_deviation = sum([a * b for a, b in zip(bs_tour[k], env.instance.deviations)]) ** (1. / 2)
tot_skewness = sum([a * b for a, b in zip(bs_tour[k], env.instance.skewnesses)]) ** (1. / 3)
tot_kurtosis = sum([a * b for a, b in zip(bs_tour[k], env.instance.kurtosis)]) ** (1. / 4)

else:
tot_mean = int(sum([a * b for a, b in zip(bs_tour[k], env.instance.means)]))
tot_deviation = int(sum([a * b for a, b in zip(bs_tour[k], env.instance.deviations)]) ** (1. / 2))
tot_skewness = int(sum([a * b for a, b in zip(bs_tour[k], env.instance.skewnesses)]) ** (1. / 3))
tot_kurtosis = int(sum([a * b for a, b in zip(bs_tour[k], env.instance.kurtosis)]) ** (1. / 4))

tot_profit = args.lambda_1 * tot_mean - \
args.lambda_2 * tot_deviation + \
args.lambda_3 * tot_skewness - \
args.lambda_4 * tot_kurtosis

if tot_profit > best_profit:
best_sol = bs_tour[k]
best_profit = tot_profit

print("ITEMS INSERTED:", best_sol)
print("BEST SOLUTION:", best_profit)
Loading

0 comments on commit 929a0af

Please sign in to comment.