diff --git a/Makefile b/Makefile index b11f44595ae..a0fda84893d 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,7 @@ FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_ #CXX = g++ # for valgrind / gdb debugging -#FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -O0 -fPIC +FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -O0 -fPIC # for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes' #FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -fomit-frame-pointer -ffast-math -fno-strict-aliasing -fPIC diff --git a/Makefile.am b/Makefile.am index a299a4b64be..74d7557b063 100644 --- a/Makefile.am +++ b/Makefile.am @@ -26,7 +26,7 @@ nobase_include_HEADERS = vowpalwabbit/allreduce.h \ vowpalwabbit/vw_validate.h \ vowpalwabbit/multilabel.h \ vowpalwabbit/constant.h \ - vowpalwabbit/ezexample.h + vowpalwabbit/ezexample.h \ noinst_HEADERS = vowpalwabbit/accumulate.h \ @@ -37,6 +37,7 @@ noinst_HEADERS = vowpalwabbit/accumulate.h \ vowpalwabbit/cache.h \ vowpalwabbit/cb.h \ vowpalwabbit/cb_algs.h \ + vowpalwabbit/cb_explore.h \ vowpalwabbit/cbify.h \ vowpalwabbit/comp_io.h \ vowpalwabbit/constant.h \ @@ -44,6 +45,7 @@ noinst_HEADERS = vowpalwabbit/accumulate.h \ vowpalwabbit/csoaa.h \ vowpalwabbit/ect.h \ vowpalwabbit/interactions.h \ + vowpalwabbit/gen_cs_example.h \ vowpalwabbit/gd.h \ vowpalwabbit/gd_mf.h \ vowpalwabbit/interact.h \ diff --git a/vowpalwabbit/Makefile.am b/vowpalwabbit/Makefile.am index ad64765298b..f8d5ce6d76e 100644 --- a/vowpalwabbit/Makefile.am +++ b/vowpalwabbit/Makefile.am @@ -4,7 +4,7 @@ liballreduce_la_SOURCES = allreduce_sockets.cc allreduce_threads.cc vw_exception bin_PROGRAMS = vw active_interactor -libvw_la_SOURCES = hash.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc mwt.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc topk.cc stagewise_poly.cc log_multi.cc active.cc active_cover.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc +libvw_la_SOURCES = hash.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc mwt.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc topk.cc stagewise_poly.cc log_multi.cc active.cc active_cover.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc libvw_c_wrapper_la_SOURCES = vwdll.cpp diff --git a/vowpalwabbit/cb_algs.cc b/vowpalwabbit/cb_algs.cc index d8f97086018..e1e0afb7104 100644 --- a/vowpalwabbit/cb_algs.cc +++ b/vowpalwabbit/cb_algs.cc @@ -17,7 +17,7 @@ using namespace CB; struct cb { - cb_to_cs& cbcs; + cb_to_cs* cbcs; COST_SENSITIVE::label cb_cs_ld; }; @@ -48,7 +48,7 @@ template void predict_or_learn(cb& data, base_learner& base, example& ec) { CB::label ld = ec.l.cb; - cb_to_cs& c = data.cbcs; + cb_to_cs& c = *data.cbcs; c.known_cost = get_observed_cost(ld); if (c.known_cost != nullptr && (c.known_cost->action < 1 || c.known_cost->action > c.num_actions)) cerr << "invalid action: " << c.known_cost->action << endl; @@ -76,7 +76,7 @@ void predict_eval(cb&, base_learner&, example&) void learn_eval(cb& data, base_learner&, example& ec) { CB_EVAL::label ld = ec.l.cb_eval; - cb_to_cs& c = data.cbcs; + cb_to_cs& c = *data.cbcs; c.known_cost = get_observed_cost(ld.event); gen_cs_example(c, ec, ld.event, data.cb_cs_ld); @@ -97,7 +97,7 @@ float get_unbiased_cost(CB::cb_class* observation, COST_SENSITIVE::label& scores void output_example(vw& all, cb& data, example& ec, CB::label& ld) { float loss = 0.; - cb_to_cs& c = data.cbcs; + cb_to_cs& c = *data.cbcs; if(!is_test_label(ld)) loss = get_unbiased_cost(c.known_cost, c.pred_scores, ec.pred.multiclass); @@ -121,7 +121,7 @@ void output_example(vw& all, cb& data, example& ec, CB::label& ld) void finish(cb& data) { - cb_to_cs& c = data.cbcs; + cb_to_cs& c = *data.cbcs; data.cb_cs_ld.costs.delete_v(); COST_SENSITIVE::cs_label.delete_label(&c.pred_scores); free(&c); @@ -146,8 +146,8 @@ base_learner* cb_algs_setup(vw& all) add_options(all); cb& data = calloc_or_throw(); - data.cbcs = calloc_or_throw(); - cb_to_cs& c = data.cbcs; + data.cbcs = &calloc_or_throw(); + cb_to_cs& c = *data.cbcs; c.num_actions = (uint32_t)all.vm["cb"].as(); bool eval = false; diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index c03d362f9b1..59dcdea895e 100755 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -17,10 +17,10 @@ struct cb_explore; struct vw_context { - cb_explore& data; - base_learner& l; - example& e; - bool recorded; + cb_explore& data; + base_learner& l; + example& e; + bool recorded; }; void safety(v_array& distribution, float min_prob); @@ -60,12 +60,12 @@ class vw_cover : public IScorer virtual ~vw_cover() { } - v_array& Get_Probabilities() - { - probabilities.erase(); - for (size_t i = 0; i < num_actions; i++) - probabilities.push_back(0); - return probabilities; + v_array& Get_Probabilities() + { + probabilities.erase(); + for (size_t i = 0; i < num_actions; i++) + probabilities.push_back(0); + return probabilities; }; vector Score_Actions(vw_context& ctx); @@ -93,40 +93,41 @@ struct vw_recorder : public IRecorder struct cb_explore { - cb_to_cs& cbcs; + cb_to_cs* cbcs; + v_array preds; - CB::label cb_label; - COST_SENSITIVE::label cs_label; - COST_SENSITIVE::label second_cs_label; + CB::label cb_label; + COST_SENSITIVE::label cs_label; + COST_SENSITIVE::label second_cs_label; - base_learner* cs; + base_learner* cs; - vw_policy* policy; - TauFirstExplorer* tau_explorer; - vw_recorder* recorder; - MwtExplorer* mwt_explorer; - EpsilonGreedyExplorer* greedy_explorer; + vw_policy* policy; + TauFirstExplorer* tau_explorer; + vw_recorder* recorder; + MwtExplorer* mwt_explorer; + EpsilonGreedyExplorer* greedy_explorer; - BootstrapExplorer* bootstrap_explorer; - vector>> policies; + BootstrapExplorer* bootstrap_explorer; + vector>> policies; - vw_cover* cover; - GenericExplorer* generic_explorer; + vw_cover* cover; + GenericExplorer* generic_explorer; }; vector vw_cover::Score_Actions(vw_context& ctx) { float additive_probability = 1.f / (float)size; for (size_t i = 0; i < size; i++) - { //get predicted cost-sensitive predictions - if (i == 0) - ctx.data.cs->predict(ctx.e, i); - else - ctx.data.cs->predict(ctx.e, i + 1); - uint32_t pred = ctx.e.pred.multiclass; - probabilities[pred - 1] += additive_probability; - predictions[i] = (uint32_t)pred; - } - uint32_t num_actions = ctx.data.cbcs.num_actions; + { //get predicted cost-sensitive predictions + if (i == 0) + ctx.data.cs->predict(ctx.e, i); + else + ctx.data.cs->predict(ctx.e, i + 1); + uint32_t pred = ctx.e.pred.multiclass; + probabilities[pred - 1] += additive_probability; + predictions[i] = (uint32_t)pred; + } + uint32_t num_actions = ctx.data.cbcs->num_actions; float min_prob = epsilon * min(1.f / num_actions, 1.f / (float)sqrt(counter * num_actions)); safety(probabilities, min_prob); @@ -140,20 +141,31 @@ vector vw_cover::Score_Actions(vw_context& ctx) return probs_vec; } +void v_array_set(v_array& vec, uint32_t len, uint32_t idx, float val) +{ + vec.erase(); + // vec.resize(len); + for(uint32_t i = 0; i < len;i++) + vec.push_back(0.); + vec[idx] = val; +} + template void predict_or_learn_first(cb_explore& data, base_learner& base, example& ec) { //Explore tau times, then act according to optimal. vw_context vwc = {data, base, ec}; uint32_t action = data.mwt_explorer->Choose_Action(*data.tau_explorer, StringUtils::to_string(ec.example_counter), vwc); - - if (vwc.recorded && is_learn) + v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability); + ec.pred.scalars = data.preds; + + cout<<"Probability = "<probability<<" recorded = "<probability; } template @@ -167,9 +179,10 @@ void predict_or_learn_greedy(cb_explore& data, base_learner& base, example& ec) base.learn(ec); //ec.pred.multiclass = action; - ec.pred.scalars.erase(); - ec.pred.scalars.resize(data.cbcs.num_actions); - ec.pred.scalars[action] = data.recorder->probability; + //ec.pred.scalars.erase(); + v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability); + ec.pred.scalars = data.preds; + // cout<<"Size = "<num_actions<<" "<probability<<" "< @@ -181,32 +194,31 @@ void predict_or_learn_bag(cb_explore& data, base_learner& base, example& ec) if (is_learn) for (size_t i = 0; i < data.policies.size(); i++) - { uint32_t count = BS::weight_gen(); - for (uint32_t j = 0; j < count; j++) - base.learn(ec,i); - } + { uint32_t count = BS::weight_gen(); + for (uint32_t j = 0; j < count; j++) + base.learn(ec,i); + } //ec.pred.multiclass = action; - ec.pred.scalars.erase(); - ec.pred.scalars.resize(data.cbcs.num_actions); - ec.pred.scalars[action] = data.recorder->probability; + v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability); + ec.pred.scalars = data.preds; } void safety(v_array& distribution, float min_prob) { float added_mass = 0.; for (uint32_t i = 0; i < distribution.size(); i++) if (distribution[i] > 0 && distribution[i] <= min_prob) - { added_mass += min_prob - distribution[i]; - distribution[i] = min_prob; - } + { added_mass += min_prob - distribution[i]; + distribution[i] = min_prob; + } float ratio = 1.f / (1.f + added_mass); if (ratio < 0.999) - { for (uint32_t i = 0; i < distribution.size(); i++) - if (distribution[i] > min_prob) - distribution[i] = distribution[i] * ratio; - safety(distribution, min_prob); - } + { for (uint32_t i = 0; i < distribution.size(); i++) + if (distribution[i] > min_prob) + distribution[i] = distribution[i] * ratio; + safety(distribution, min_prob); + } } @@ -215,20 +227,19 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec) { //Randomize over predictions from a base set of predictors //Use cost sensitive oracle to cover actions to form distribution. - uint32_t num_actions = data.cbcs.num_actions; - cb_to_cs&c = data.cbcs; + uint32_t num_actions = data.cbcs->num_actions; data.cs_label.costs.erase(); for (uint32_t j = 0; j < num_actions; j++) - { COST_SENSITIVE::wclass wc; - - //get cost prediction for this label - wc.x = FLT_MAX; - wc.class_index = j+1; - wc.partial_prediction = 0.; - wc.wap_value = 0.; - data.cs_label.costs.push_back(wc); - } + { COST_SENSITIVE::wclass wc; + + //get cost prediction for this label + wc.x = FLT_MAX; + wc.class_index = j+1; + wc.partial_prediction = 0.; + wc.wap_value = 0.; + data.cs_label.costs.push_back(wc); + } float epsilon = data.cover->epsilon; size_t cover_size = data.cover->size; @@ -253,41 +264,38 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec) //1. Compute loss vector data.cs_label.costs.erase(); float norm = min_prob * num_actions; - gen_cs_example(data.cbcs, ec, ld, data.cs_label); + gen_cs_example(*data.cbcs, ec, ld, data.cs_label); ec.l.cs = data.second_cs_label; //2. Update functions for (size_t i = 0; i < cover_size; i++) - { //Create costs of each action based on online cover - for (uint32_t j = 0; j < num_actions; j++) - { float pseudo_cost = data.cs_label.costs[j].x - epsilon * min_prob / (max(probabilities[j], min_prob) / norm) + 1; - data.second_cs_label.costs[j].class_index = j+1; - data.second_cs_label.costs[j].x = pseudo_cost; + { //Create costs of each action based on online cover + for (uint32_t j = 0; j < num_actions; j++) + { float pseudo_cost = data.cs_label.costs[j].x - epsilon * min_prob / (max(probabilities[j], min_prob) / norm) + 1; + data.second_cs_label.costs[j].class_index = j+1; + data.second_cs_label.costs[j].x = pseudo_cost; + } + if (i != 0) + data.cs->learn(ec,i+1); + if (probabilities[predictions[i] - 1] < min_prob) + norm += max(0, additive_probability - (min_prob - probabilities[predictions[i] - 1])); + else + norm += additive_probability; + probabilities[predictions[i] - 1] += additive_probability; } - if (i != 0) - data.cs->learn(ec,i+1); - if (probabilities[predictions[i] - 1] < min_prob) - norm += max(0, additive_probability - (min_prob - probabilities[predictions[i] - 1])); - else - norm += additive_probability; - probabilities[predictions[i] - 1] += additive_probability; - } } ec.l.cb = ld; - ec.pred.scalars.erase(); - ec.pred.scalars.resize(data.cbcs.num_actions); - ec.pred.scalars[action] = data.recorder->probability; - //ALEKH: FIX ME!! - for (uint32_t i = 0; i < num_actions; i++) - ec.pred.scalars.push_back(probabilities[i]); + v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability); + ec.pred.scalars = data.preds; } template inline void delete_it(T* p) { if (p != nullptr) delete p; } void finish(cb_explore& data) -{ - cb_to_cs& c = data.cbcs; +{ + data.preds.delete_v(); + cb_to_cs& c = *data.cbcs; COST_SENSITIVE::cs_label.delete_label(&c.pred_scores); free(&c); CB::cb_label.delete_label(&data.cb_label); @@ -301,9 +309,9 @@ void finish(cb_explore& data) delete_it(data.mwt_explorer); delete_it(data.recorder); if (data.cover != nullptr) - { data.cover->predictions.delete_v(); - data.cover->probabilities.delete_v(); - } + { data.cover->predictions.delete_v(); + data.cover->probabilities.delete_v(); + } delete_it(data.cover); if (data.policies.size() > 0) data.policies.~vector(); @@ -311,34 +319,38 @@ void finish(cb_explore& data) base_learner* cb_explore_setup(vw& all) { //parse and set arguments + cout<<"Setting up cb_explore\n"; if (missing_option(all, "cb_explore", "Online explore-exploit for a action contextual bandit problem")) return nullptr; + cout<<"Still Setting up cb_explore\n"; new_options(all, "CB_EXPLORE options") - ("first", po::value(), "tau-first exploration") - ("epsilon",po::value() ,"epsilon-greedy exploration") - ("bag",po::value() ,"bagging-based exploration") - ("cover",po::value() ,"bagging-based exploration"); + ("first", po::value(), "tau-first exploration") + ("epsilon",po::value() ,"epsilon-greedy exploration") + ("bag",po::value() ,"bagging-based exploration") + ("cover",po::value() ,"bagging-based exploration"); add_options(all); po::variables_map& vm = all.vm; cb_explore& data = calloc_or_throw(); - data.cbcs = calloc_or_throw(); - data.cbcs.num_actions = (uint32_t)vm["cb_explore"].as(); - uint32_t num_actions = data.cbcs.num_actions; + data.cbcs = &(calloc_or_throw()); + data.cbcs->num_actions = (uint32_t)vm["cb_explore"].as(); + uint32_t num_actions = data.cbcs->num_actions; - //ALEKH: All of the allocations need fixing + data.preds = v_init(); if (count(all.args.begin(), all.args.end(),"--cb") == 0) - { all.args.push_back("--cb"); - stringstream ss; - ss << vm["cb_explore"].as(); - all.args.push_back(ss.str()); - } + { all.args.push_back("--cb"); + stringstream ss; + ss << vm["cb_explore"].as(); + all.args.push_back(ss.str()); + } - if (count(all.args.begin(), all.args.end(), "--cb_type") == 0) - data.cbcs.cb_type = CB_TYPE_DR; - else - data.cbcs.cb_type = (size_t)vm["cb_type"].as(); + data.cbcs->cb_type = CB_TYPE_DR; + //ALEKH: Others TBD later + // if (count(all.args.begin(), all.args.end(), "--cb_type") == 0) + // data.cbcs->cb_type = CB_TYPE_DR; + // else + // data.cbcs->cb_type = (size_t)vm["cb_type"].as(); base_learner* base = setup_base(all); @@ -346,43 +358,43 @@ base_learner* cb_explore_setup(vw& all) data.recorder = new vw_recorder(); data.mwt_explorer = new MwtExplorer("vw", *data.recorder); if (vm.count("cover")) - { size_t cover = (uint32_t)vm["cover"].as(); - data.cs = all.cost_sensitive; - data.second_cs_label.costs.resize(num_actions); - data.second_cs_label.costs.end() = data.second_cs_label.costs.begin()+num_actions; - float epsilon = 0.05f; - if (vm.count("epsilon")) - epsilon = vm["epsilon"].as(); - data.cover = new vw_cover(epsilon, cover, (u32)num_actions); - data.generic_explorer = new GenericExplorer(*data.cover, (u32)num_actions); - l = &init_multiclass_learner(&data, base, predict_or_learn_cover, - predict_or_learn_cover, all.p, cover + 1); - } + { size_t cover = (uint32_t)vm["cover"].as(); + data.cs = all.cost_sensitive; + data.second_cs_label.costs.resize(num_actions); + data.second_cs_label.costs.end() = data.second_cs_label.costs.begin()+num_actions; + float epsilon = 0.05f; + if (vm.count("epsilon")) + epsilon = vm["epsilon"].as(); + data.cover = new vw_cover(epsilon, cover, (u32)num_actions); + data.generic_explorer = new GenericExplorer(*data.cover, (u32)num_actions); + l = &init_multiclass_learner(&data, base, predict_or_learn_cover, + predict_or_learn_cover, all.p, cover + 1); + } else if (vm.count("bag")) - { size_t bags = (uint32_t)vm["bag"].as(); - for (size_t i = 0; i < bags; i++) - data.policies.push_back(unique_ptr>(new vw_policy(i))); - data.bootstrap_explorer = new BootstrapExplorer(data.policies, (u32)num_actions); - l = &init_multiclass_learner(&data, base, predict_or_learn_bag, - predict_or_learn_bag, all.p, bags); - } + { size_t bags = (uint32_t)vm["bag"].as(); + for (size_t i = 0; i < bags; i++) + data.policies.push_back(unique_ptr>(new vw_policy(i))); + data.bootstrap_explorer = new BootstrapExplorer(data.policies, (u32)num_actions); + l = &init_multiclass_learner(&data, base, predict_or_learn_bag, + predict_or_learn_bag, all.p, bags); + } else if (vm.count("first") ) - { uint32_t tau = (uint32_t)vm["first"].as(); - data.policy = new vw_policy(0); - data.tau_explorer = new TauFirstExplorer(*data.policy, (u32)tau, (u32)num_actions); - l = &init_multiclass_learner(&data, base, predict_or_learn_first, - predict_or_learn_first, all.p, 1); - } + { uint32_t tau = (uint32_t)vm["first"].as(); + data.policy = new vw_policy(0); + data.tau_explorer = new TauFirstExplorer(*data.policy, (u32)tau, (u32)num_actions); + l = &init_multiclass_learner(&data, base, predict_or_learn_first, + predict_or_learn_first, all.p, 1); + } else - { float epsilon = 0.05f; - if (vm.count("epsilon")) - epsilon = vm["epsilon"].as(); - data.policy = new vw_policy(0); - data.greedy_explorer = new EpsilonGreedyExplorer(*data.policy, epsilon, (u32)num_actions); - l = &init_multiclass_learner(&data, base, predict_or_learn_greedy, - predict_or_learn_greedy, all.p, 1); - } - data.cbcs.scorer = all.scorer; + { float epsilon = 0.05f; + if (vm.count("epsilon")) + epsilon = vm["epsilon"].as(); + data.policy = new vw_policy(0); + data.greedy_explorer = new EpsilonGreedyExplorer(*data.policy, epsilon, (u32)num_actions); + l = &init_multiclass_learner(&data, base, predict_or_learn_greedy, + predict_or_learn_greedy, all.p, 1); + } + data.cbcs->scorer = all.scorer; l->set_finish(finish); return make_base(*l); diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 93b59b40c11..576220871be 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -21,15 +21,15 @@ struct vw_context struct cbify { - CB::label cb_label; + CB::label cb_label; }; float loss(uint32_t label, uint32_t final_prediction) { - if (label != final_prediction) - return 1.; - else - return 0.; + if (label != final_prediction) + return 1.; + else + return 0.; } @@ -40,33 +40,38 @@ void finish(cbify& data) } template -void predict_or_learn(cbify& c, base_learner& base, example& ec) +void predict_or_learn(cbify& data, base_learner& base, example& ec) { - //ALEKH: Ideally, we will be able to return the probability from base.predict, perhaps using the probs field in ec.pred. - //Store the multiclass input label - MULTICLASS::label_t ld = ec.l.multi; - - //Call the cb_explore algorithm. It returns a vector with one non-zero entry denoting the probability of the chosen action - v_array pred = base.predict(ec); - - //Create a new cb label - data.cb_label.costs.erase(); - ec.l.cb = data.cb_label; - - CB::cb_class cl; - uin32_t action = 0; - for (uint32_t i = 0; i < pred.size;i++) - if (pred[i] > 0.) - { - cl.action = i; - cl.probability = pred[i]; - } - cl.cost = loss(ld.label, cl.action); - data.cb_label.costs.push_back(cl); - ec.l.cb = data.cb_label; - base.learn(ec); - ec.l.multi = ld; + //ALEKH: Ideally, we will be able to return the probability from base.predict, perhaps using the probs field in ec.pred. + //Store the multiclass input label + MULTICLASS::label_t ld = ec.l.multi; + //Create a new cb label + data.cb_label.costs.erase(); + ec.l.cb = data.cb_label; + + //Call the cb_explore algorithm. It returns a vector with one non-zero entry denoting the probability of the chosen action + base.predict(ec); + v_array pred = ec.pred.scalars; + + CB::cb_class cl; + for (uint32_t i = 0; i < pred.size();i++) { + if (pred[i] > 0.) + { + cl.action = i+1; + cl.probability = pred[i]; + } + } + + if(!cl.action) + THROW("No action with non-zero probability found!"); + uint32_t action = cl.action; + cl.cost = loss(ld.label, cl.action); + data.cb_label.costs.push_back(cl); + ec.l.cb = data.cb_label; + base.learn(ec); + ec.l.multi = ld; + ec.pred.multiclass = action; } base_learner* cbify_setup(vw& all) @@ -86,6 +91,7 @@ base_learner* cbify_setup(vw& all) base_learner* base = setup_base(all); learner* l; + l = &init_learner(&data, base, predict_or_learn, predict_or_learn); l->set_finish(finish); return make_base(*l); diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc index acd4ab10caf..848dfaa0cc0 100755 --- a/vowpalwabbit/gen_cs_example.cc +++ b/vowpalwabbit/gen_cs_example.cc @@ -1,7 +1,7 @@ /* -Copyright (c) by respective owners including Yahoo!, Microsoft, and -individual contributors. All rights reserved. Released under a BSD (revised) -license as described in the file LICENSE. + Copyright (c) by respective owners including Yahoo!, Microsoft, and + individual contributors. All rights reserved. Released under a BSD (revised) + license as described in the file LICENSE. */ #include @@ -17,70 +17,70 @@ using namespace CB; inline bool observed_cost(cb_class* cl) { //cost observed for this action if it has non zero probability and cost != FLT_MAX - return (cl != nullptr && cl->cost != FLT_MAX && cl->probability > .0); + return (cl != nullptr && cl->cost != FLT_MAX && cl->probability > .0); } cb_class* get_observed_cost(CB::label& ld) { - for (auto& cl : ld.costs) - if (observed_cost(&cl)) - return &cl; - return nullptr; + for (auto& cl : ld.costs) + if (observed_cost(&cl)) + return &cl; + return nullptr; } void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld) { //this implements the inverse propensity score method, where cost are importance weighted by the probability of the chosen action - //generate cost-sensitive example - cs_ld.costs.erase(); - if (ld.costs.size() == 1) //this is a typical example where we can perform all actions - { //in this case generate cost-sensitive example with all actions - for (uint32_t i = 1; i <= c.num_actions; i++) - { - COST_SENSITIVE::wclass wc; - wc.wap_value = 0.; - wc.x = 0.; - wc.class_index = i; - wc.partial_prediction = 0.; - wc.wap_value = 0.; - if (c.known_cost != nullptr && i == c.known_cost->action) - { - wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise - //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything - //update the loss of this regressor - c.nb_ex_regressors++; - c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); - c.last_pred_reg = 0; - c.last_correct_cost = c.known_cost->cost; - } + //generate cost-sensitive example + cs_ld.costs.erase(); + if (ld.costs.size() == 1) //this is a typical example where we can perform all actions + { //in this case generate cost-sensitive example with all actions + for (uint32_t i = 1; i <= c.num_actions; i++) + { + COST_SENSITIVE::wclass wc; + wc.wap_value = 0.; + wc.x = 0.; + wc.class_index = i; + wc.partial_prediction = 0.; + wc.wap_value = 0.; + if (c.known_cost != nullptr && i == c.known_cost->action) + { + wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise + //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything + //update the loss of this regressor + c.nb_ex_regressors++; + c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); + c.last_pred_reg = 0; + c.last_correct_cost = c.known_cost->cost; + } - cs_ld.costs.push_back(wc); - } + cs_ld.costs.push_back(wc); } - else //this is an example where we can only perform a subset of the actions - { //in this case generate cost-sensitive example with only allowed actions - for (auto& cl : ld.costs) - { - COST_SENSITIVE::wclass wc; - wc.wap_value = 0.; - wc.x = 0.; - wc.class_index = cl.action; - wc.partial_prediction = 0.; - wc.wap_value = 0.; - if (c.known_cost != nullptr && cl.action == c.known_cost->action) - { - wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise + } + else //this is an example where we can only perform a subset of the actions + { //in this case generate cost-sensitive example with only allowed actions + for (auto& cl : ld.costs) + { + COST_SENSITIVE::wclass wc; + wc.wap_value = 0.; + wc.x = 0.; + wc.class_index = cl.action; + wc.partial_prediction = 0.; + wc.wap_value = 0.; + if (c.known_cost != nullptr && cl.action == c.known_cost->action) + { + wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise - //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything - //update the loss of this regressor - c.nb_ex_regressors++; - c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); - c.last_pred_reg = 0; - c.last_correct_cost = c.known_cost->cost; - } + //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything + //update the loss of this regressor + c.nb_ex_regressors++; + c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); + c.last_pred_reg = 0; + c.last_correct_cost = c.known_cost->cost; + } - cs_ld.costs.push_back(wc); - } + cs_ld.costs.push_back(wc); } + } } diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h index b9a60638c09..875491b2ad6 100755 --- a/vowpalwabbit/gen_cs_example.h +++ b/vowpalwabbit/gen_cs_example.h @@ -1,7 +1,7 @@ /* -Copyright (c) by respective owners including Yahoo!, Microsoft, and -individual contributors. All rights reserved. Released under a BSD (revised) -license as described in the file LICENSE. + Copyright (c) by respective owners including Yahoo!, Microsoft, and + individual contributors. All rights reserved. Released under a BSD (revised) + license as described in the file LICENSE. */ #include @@ -14,16 +14,16 @@ using namespace CB; struct cb_to_cs { - size_t cb_type; - uint32_t num_actions; - COST_SENSITIVE::label pred_scores; - LEARNER::base_learner* scorer; - float avg_loss_regressors; - size_t nb_ex_regressors; - float last_pred_reg; - float last_correct_cost; - - cb_class* known_cost; + size_t cb_type; + uint32_t num_actions; + COST_SENSITIVE::label pred_scores; + LEARNER::base_learner* scorer; + float avg_loss_regressors; + size_t nb_ex_regressors; + float last_pred_reg; + float last_correct_cost; + + cb_class* known_cost; }; cb_class* get_observed_cost(CB::label& ld); @@ -33,147 +33,152 @@ void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld template void gen_cs_example_dm(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld) { //this implements the direct estimation method, where costs are directly specified by the learned regressor. - CB::label ld = ec.l.cb; - - float min = FLT_MAX; - uint32_t argmin = 1; - //generate cost sensitive example - cs_ld.costs.erase(); - c.pred_scores.costs.erase(); - - if (ld.costs.size() == 1) //this is a typical example where we can perform all actions - { //in this case generate cost-sensitive example with all actions - for (uint32_t i = 1; i <= c.num_actions; i++) - { - COST_SENSITIVE::wclass wc; - wc.wap_value = 0.; - - //get cost prediction for this action - wc.x = CB_ALGS::get_cost_pred(c.scorer, c.known_cost, ec, i, 0); - if (wc.x < min) - { - min = wc.x; - argmin = i; - } - - wc.class_index = i; - wc.partial_prediction = 0.; - wc.wap_value = 0.; - - c.pred_scores.costs.push_back(wc); - - if (c.known_cost != nullptr && c.known_cost->action == i) - { - c.nb_ex_regressors++; - c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors); - c.last_pred_reg = wc.x; - c.last_correct_cost = c.known_cost->cost; - } - - cs_ld.costs.push_back(wc); - } + CB::label ld = ec.l.cb; + + float min = FLT_MAX; + uint32_t argmin = 1; + //generate cost sensitive example + cs_ld.costs.erase(); + c.pred_scores.costs.erase(); + + if (ld.costs.size() == 1) //this is a typical example where we can perform all actions + { //in this case generate cost-sensitive example with all actions + for (uint32_t i = 1; i <= c.num_actions; i++) + { + COST_SENSITIVE::wclass wc; + wc.wap_value = 0.; + + //get cost prediction for this action + wc.x = CB_ALGS::get_cost_pred(c.scorer, c.known_cost, ec, i, 0); + if (wc.x < min) + { + min = wc.x; + argmin = i; + } + + wc.class_index = i; + wc.partial_prediction = 0.; + wc.wap_value = 0.; + + c.pred_scores.costs.push_back(wc); + + if (c.known_cost != nullptr && c.known_cost->action == i) + { + c.nb_ex_regressors++; + c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors); + c.last_pred_reg = wc.x; + c.last_correct_cost = c.known_cost->cost; + } + + cs_ld.costs.push_back(wc); } - else //this is an example where we can only perform a subset of the actions - { //in this case generate cost-sensitive example with only allowed actions - for (auto& cl : ld.costs) - { - COST_SENSITIVE::wclass wc; - wc.wap_value = 0.; - - //get cost prediction for this action - wc.x = CB_ALGS::get_cost_pred(c.scorer, c.known_cost, ec, cl.action, 0); - if (wc.x < min || (wc.x == min && cl.action < argmin)) - { - min = wc.x; - argmin = cl.action; - } - - wc.class_index = cl.action; - wc.partial_prediction = 0.; - wc.wap_value = 0.; - - c.pred_scores.costs.push_back(wc); - - if (c.known_cost != nullptr && c.known_cost->action == cl.action) - { - c.nb_ex_regressors++; - c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors); - c.last_pred_reg = wc.x; - c.last_correct_cost = c.known_cost->cost; - } - - cs_ld.costs.push_back(wc); - } + } + else //this is an example where we can only perform a subset of the actions + { //in this case generate cost-sensitive example with only allowed actions + for (auto& cl : ld.costs) + { + COST_SENSITIVE::wclass wc; + wc.wap_value = 0.; + + //get cost prediction for this action + wc.x = CB_ALGS::get_cost_pred(c.scorer, c.known_cost, ec, cl.action, 0); + if (wc.x < min || (wc.x == min && cl.action < argmin)) + { + min = wc.x; + argmin = cl.action; + } + + wc.class_index = cl.action; + wc.partial_prediction = 0.; + wc.wap_value = 0.; + + c.pred_scores.costs.push_back(wc); + + if (c.known_cost != nullptr && c.known_cost->action == cl.action) + { + c.nb_ex_regressors++; + c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors); + c.last_pred_reg = wc.x; + c.last_correct_cost = c.known_cost->cost; + } + + cs_ld.costs.push_back(wc); } + } - ec.pred.multiclass = argmin; + ec.pred.multiclass = argmin; } template void gen_cs_label(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld, uint32_t label) { - COST_SENSITIVE::wclass wc; - wc.wap_value = 0.; - - //get cost prediction for this label - wc.x = CB_ALGS::get_cost_pred(c.scorer, c.known_cost, ec, label, c.num_actions); - wc.class_index = label; - wc.partial_prediction = 0.; - wc.wap_value = 0.; - - c.pred_scores.costs.push_back(wc); - - //add correction if we observed cost for this action and regressor is wrong - if (c.known_cost != nullptr && c.known_cost->action == label) - { - c.nb_ex_regressors++; - c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors); - c.last_pred_reg = wc.x; - c.last_correct_cost = c.known_cost->cost; - wc.x += (c.known_cost->cost - wc.x) / c.known_cost->probability; - } - //cout<<"Prediction = "<(c.scorer, c.known_cost, ec, label, c.num_actions); + wc.class_index = label; + wc.partial_prediction = 0.; + wc.wap_value = 0.; + + c.pred_scores.costs.push_back(wc); + cout<<"Prediction = "<action == label) + { + c.nb_ex_regressors++; + c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors); + c.last_pred_reg = wc.x; + c.last_correct_cost = c.known_cost->cost; + wc.x += (c.known_cost->cost - wc.x) / c.known_cost->probability; + } + + cs_ld.costs.push_back(wc); } template void gen_cs_example_dr(cb_to_cs& c, example& ec, CB::label& ld, COST_SENSITIVE::label& cs_ld) { //this implements the doubly robust method - cs_ld.costs.erase(); - c.pred_scores.costs.erase(); - if (ld.costs.size() == 0)//a test example - for (uint32_t i = 1; i <= c.num_actions; i++) - { //Explicit declaration for a weak compiler. - COST_SENSITIVE::wclass c = { FLT_MAX, i, 0., 0. }; - cs_ld.costs.push_back(c); - } - else if (ld.costs.size() == 1) //this is a typical example where we can perform all actions - //in this case generate cost-sensitive example with all actions - for (uint32_t i = 1; i <= c.num_actions; i++) - gen_cs_label(c, ec, cs_ld, i); - else //this is an example where we can only perform a subset of the actions - //in this case generate cost-sensitive example with only allowed actions - for (auto& cl : ld.costs) - gen_cs_label(c, ec, cs_ld, cl.action); - //cout<action<<" "<probability<(c, ec, cs_ld, i); + cout<(c, ec, cs_ld, cl.action); + cout< void gen_cs_example(cb_to_cs& c, example& ec, CB::label& ld, COST_SENSITIVE::label& cs_ld) { - switch (c.cb_type) - { - case CB_TYPE_IPS: - gen_cs_example_ips(c, ld, cs_ld); - break; - case CB_TYPE_DM: - gen_cs_example_dm(c, ec, cs_ld); - break; - case CB_TYPE_DR: - gen_cs_example_dr(c, ec, ld, cs_ld); - default: - THROW("Unknown cb_type specified for contextual bandit learning: " << c.cb_type); - } + switch (c.cb_type) + { + case CB_TYPE_IPS: + gen_cs_example_ips(c, ld, cs_ld); + break; + case CB_TYPE_DM: + gen_cs_example_dm(c, ec, cs_ld); + break; + case CB_TYPE_DR: + gen_cs_example_dr(c, ec, ld, cs_ld); + break; + default: + THROW("Unknown cb_type specified for contextual bandit learning: " << c.cb_type); + } } diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc index 87f596ba956..36439ce2e0a 100644 --- a/vowpalwabbit/parse_args.cc +++ b/vowpalwabbit/parse_args.cc @@ -32,6 +32,7 @@ license as described in the file LICENSE. #include "csoaa.h" #include "cb_algs.h" #include "cb_adf.h" +#include "cb_explore.h" #include "mwt.h" #include "confidence.h" #include "scorer.h" @@ -1080,6 +1081,7 @@ void parse_reductions(vw& all) all.reduction_stack.push_back(cb_algs_setup); all.reduction_stack.push_back(cb_adf_setup); all.reduction_stack.push_back(mwt_setup); + all.reduction_stack.push_back(cb_explore_setup); all.reduction_stack.push_back(cbify_setup); all.reduction_stack.push_back(ExpReplay::expreplay_setup<'c', COST_SENSITIVE::cs_label>);