diff --git a/Makefile b/Makefile
index b11f44595ae..a0fda84893d 100644
--- a/Makefile
+++ b/Makefile
@@ -69,7 +69,7 @@ FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_
 #CXX = g++
 
 # for valgrind / gdb debugging
-#FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -O0  -fPIC
+FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -O0  -fPIC
 
 # for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes'
 #FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) -g -fomit-frame-pointer -ffast-math -fno-strict-aliasing  -fPIC
diff --git a/Makefile.am b/Makefile.am
index a299a4b64be..74d7557b063 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -26,7 +26,7 @@ nobase_include_HEADERS = vowpalwabbit/allreduce.h \
 	vowpalwabbit/vw_validate.h \
 	vowpalwabbit/multilabel.h \
 	vowpalwabbit/constant.h \
-	vowpalwabbit/ezexample.h
+	vowpalwabbit/ezexample.h \
 
 
 noinst_HEADERS = vowpalwabbit/accumulate.h \
@@ -37,6 +37,7 @@ noinst_HEADERS = vowpalwabbit/accumulate.h \
 	vowpalwabbit/cache.h \
 	vowpalwabbit/cb.h \
 	vowpalwabbit/cb_algs.h \
+	vowpalwabbit/cb_explore.h \
 	vowpalwabbit/cbify.h \
 	vowpalwabbit/comp_io.h \
 	vowpalwabbit/constant.h \
@@ -44,6 +45,7 @@ noinst_HEADERS = vowpalwabbit/accumulate.h \
 	vowpalwabbit/csoaa.h \
 	vowpalwabbit/ect.h \
 	vowpalwabbit/interactions.h \
+	vowpalwabbit/gen_cs_example.h \
 	vowpalwabbit/gd.h \
 	vowpalwabbit/gd_mf.h \
 	vowpalwabbit/interact.h \
diff --git a/vowpalwabbit/Makefile.am b/vowpalwabbit/Makefile.am
index ad64765298b..f8d5ce6d76e 100644
--- a/vowpalwabbit/Makefile.am
+++ b/vowpalwabbit/Makefile.am
@@ -4,7 +4,7 @@ liballreduce_la_SOURCES = allreduce_sockets.cc allreduce_threads.cc vw_exception
 
 bin_PROGRAMS = vw active_interactor
 
-libvw_la_SOURCES = hash.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc mwt.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc topk.cc stagewise_poly.cc log_multi.cc active.cc active_cover.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc
+libvw_la_SOURCES = hash.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc mwt.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc topk.cc stagewise_poly.cc log_multi.cc active.cc active_cover.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc
 
 libvw_c_wrapper_la_SOURCES = vwdll.cpp
 
diff --git a/vowpalwabbit/cb_algs.cc b/vowpalwabbit/cb_algs.cc
index d8f97086018..e1e0afb7104 100644
--- a/vowpalwabbit/cb_algs.cc
+++ b/vowpalwabbit/cb_algs.cc
@@ -17,7 +17,7 @@ using namespace CB;
 
 struct cb 
 { 
-  cb_to_cs& cbcs;
+  cb_to_cs* cbcs;
   COST_SENSITIVE::label cb_cs_ld;
 };
 
@@ -48,7 +48,7 @@ template <bool is_learn>
 void predict_or_learn(cb& data, base_learner& base, example& ec)
 { CB::label ld = ec.l.cb;
 
-  cb_to_cs& c = data.cbcs;
+  cb_to_cs& c = *data.cbcs;
   c.known_cost = get_observed_cost(ld);
   if (c.known_cost != nullptr && (c.known_cost->action < 1 || c.known_cost->action > c.num_actions))
     cerr << "invalid action: " << c.known_cost->action << endl;
@@ -76,7 +76,7 @@ void predict_eval(cb&, base_learner&, example&)
 void learn_eval(cb& data, base_learner&, example& ec)
 { CB_EVAL::label ld = ec.l.cb_eval;
 
-  cb_to_cs& c = data.cbcs;
+  cb_to_cs& c = *data.cbcs;
   c.known_cost = get_observed_cost(ld.event);
   gen_cs_example<true>(c, ec, ld.event, data.cb_cs_ld);
 
@@ -97,7 +97,7 @@ float get_unbiased_cost(CB::cb_class* observation, COST_SENSITIVE::label& scores
 void output_example(vw& all, cb& data, example& ec, CB::label& ld)
 { float loss = 0.;
 
-  cb_to_cs& c = data.cbcs;
+  cb_to_cs& c = *data.cbcs;
   if(!is_test_label(ld))
     loss = get_unbiased_cost(c.known_cost, c.pred_scores, ec.pred.multiclass);
 
@@ -121,7 +121,7 @@ void output_example(vw& all, cb& data, example& ec, CB::label& ld)
 
 void finish(cb& data)
 { 
-  cb_to_cs& c = data.cbcs;
+  cb_to_cs& c = *data.cbcs;
   data.cb_cs_ld.costs.delete_v(); 
   COST_SENSITIVE::cs_label.delete_label(&c.pred_scores); 
   free(&c);
@@ -146,8 +146,8 @@ base_learner* cb_algs_setup(vw& all)
   add_options(all);
 
   cb& data = calloc_or_throw<cb>();
-  data.cbcs = calloc_or_throw<cb_to_cs>();
-  cb_to_cs& c = data.cbcs;
+  data.cbcs = &calloc_or_throw<cb_to_cs>();
+  cb_to_cs& c = *data.cbcs;
   c.num_actions = (uint32_t)all.vm["cb"].as<size_t>();
 
   bool eval = false;
diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc
index c03d362f9b1..59dcdea895e 100755
--- a/vowpalwabbit/cb_explore.cc
+++ b/vowpalwabbit/cb_explore.cc
@@ -17,10 +17,10 @@ struct cb_explore;
 
 struct vw_context
 {
-	cb_explore& data;
-	base_learner& l;
-	example& e;
-	bool recorded;
+  cb_explore& data;
+  base_learner& l;
+  example& e;
+  bool recorded;
 };
 
 void safety(v_array<float>& distribution, float min_prob);
@@ -60,12 +60,12 @@ class vw_cover : public IScorer<vw_context>
 
   virtual ~vw_cover() { }
 
-  v_array<float>& Get_Probabilities()
-  {
-	  probabilities.erase();
-	  for (size_t i = 0; i < num_actions; i++)
-		  probabilities.push_back(0);
-	  return probabilities;
+  v_array<float>& Get_Probabilities()
+  {
+    probabilities.erase();
+    for (size_t i = 0; i < num_actions; i++)
+      probabilities.push_back(0);
+    return probabilities;
   };
 
   vector<float> Score_Actions(vw_context& ctx);
@@ -93,40 +93,41 @@ struct vw_recorder : public IRecorder<vw_context>
 
 struct cb_explore
 {
-	cb_to_cs& cbcs;
+  cb_to_cs* cbcs;
+  v_array<float> preds;
 
-	CB::label cb_label;
-	COST_SENSITIVE::label cs_label;
-	COST_SENSITIVE::label second_cs_label;
+  CB::label cb_label;
+  COST_SENSITIVE::label cs_label;
+  COST_SENSITIVE::label second_cs_label;
 
-	base_learner* cs;
+  base_learner* cs;
 
-	vw_policy* policy;
-	TauFirstExplorer<vw_context>* tau_explorer;
-	vw_recorder* recorder;
-	MwtExplorer<vw_context>* mwt_explorer;
-	EpsilonGreedyExplorer<vw_context>* greedy_explorer;
+  vw_policy* policy;
+  TauFirstExplorer<vw_context>* tau_explorer;
+  vw_recorder* recorder;
+  MwtExplorer<vw_context>* mwt_explorer;
+  EpsilonGreedyExplorer<vw_context>* greedy_explorer;
 
-	BootstrapExplorer<vw_context>* bootstrap_explorer;
-	vector<unique_ptr<IPolicy<vw_context>>> policies;
+  BootstrapExplorer<vw_context>* bootstrap_explorer;
+  vector<unique_ptr<IPolicy<vw_context>>> policies;
 
-	vw_cover* cover;
-	GenericExplorer<vw_context>* generic_explorer;
+  vw_cover* cover;
+  GenericExplorer<vw_context>* generic_explorer;
 };
 
 vector<float> vw_cover::Score_Actions(vw_context& ctx)
 { float additive_probability = 1.f / (float)size;
   for (size_t i = 0; i < size; i++)
-  { //get predicted cost-sensitive predictions
-    if (i == 0)
-      ctx.data.cs->predict(ctx.e, i);
-    else
-      ctx.data.cs->predict(ctx.e, i + 1);
-    uint32_t pred = ctx.e.pred.multiclass;
-    probabilities[pred - 1] += additive_probability;
-    predictions[i] = (uint32_t)pred;
-  }
-  uint32_t num_actions = ctx.data.cbcs.num_actions;
+    { //get predicted cost-sensitive predictions
+      if (i == 0)
+	ctx.data.cs->predict(ctx.e, i);
+      else
+	ctx.data.cs->predict(ctx.e, i + 1);
+      uint32_t pred = ctx.e.pred.multiclass;
+      probabilities[pred - 1] += additive_probability;
+      predictions[i] = (uint32_t)pred;
+    }
+  uint32_t num_actions = ctx.data.cbcs->num_actions;
   float min_prob = epsilon * min(1.f / num_actions, 1.f / (float)sqrt(counter * num_actions));
 
   safety(probabilities, min_prob);
@@ -140,20 +141,31 @@ vector<float> vw_cover::Score_Actions(vw_context& ctx)
   return probs_vec;
 }
 
+void v_array_set(v_array<float>& vec, uint32_t len, uint32_t idx, float val)
+{
+  vec.erase();
+  //  vec.resize(len);
+  for(uint32_t i = 0; i < len;i++)
+    vec.push_back(0.);
+  vec[idx] = val;
+}
+
 template <bool is_learn>
 void predict_or_learn_first(cb_explore& data, base_learner& base, example& ec)
 { //Explore tau times, then act according to optimal.
 
   vw_context vwc = {data, base, ec};
   uint32_t action = data.mwt_explorer->Choose_Action(*data.tau_explorer, StringUtils::to_string(ec.example_counter), vwc);
-
-  if (vwc.recorded && is_learn)
+  v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability);
+  ec.pred.scalars = data.preds;
+  
+  cout<<"Probability = "<<data.recorder->probability<<" recorded = "<<vwc.recorded<<endl;
+  
+  if (is_learn)
     base.learn(ec);
 
+
   //ec.pred.multiclass = action;
-  ec.pred.scalars.erase();
-  ec.pred.scalars.resize(data.cbcs.num_actions);
-  ec.pred.scalars[action] = data.recorder->probability;
 }
 
 template <bool is_learn>
@@ -167,9 +179,10 @@ void predict_or_learn_greedy(cb_explore& data, base_learner& base, example& ec)
     base.learn(ec);
 
   //ec.pred.multiclass = action;
-  ec.pred.scalars.erase();
-  ec.pred.scalars.resize(data.cbcs.num_actions);
-  ec.pred.scalars[action] = data.recorder->probability;
+  //ec.pred.scalars.erase();
+  v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability);
+  ec.pred.scalars = data.preds;
+  //  cout<<"Size = "<<ec.pred.scalars.size()<<" "<<preds.size()<<" "<<data.cbcs->num_actions<<" "<<data.recorder->probability<<" "<<ec.pred.scalars[action]<<endl;
 }
 
 template <bool is_learn>
@@ -181,32 +194,31 @@ void predict_or_learn_bag(cb_explore& data, base_learner& base, example& ec)
 
   if (is_learn)  
     for (size_t i = 0; i < data.policies.size(); i++)
-    { uint32_t count = BS::weight_gen();
-      for (uint32_t j = 0; j < count; j++)
-        base.learn(ec,i);
-    }
+      { uint32_t count = BS::weight_gen();
+	for (uint32_t j = 0; j < count; j++)
+	  base.learn(ec,i);
+      }
 
   //ec.pred.multiclass = action;
-  ec.pred.scalars.erase();
-  ec.pred.scalars.resize(data.cbcs.num_actions);
-  ec.pred.scalars[action] = data.recorder->probability;
+  v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability);
+  ec.pred.scalars = data.preds;
 }
 
 void safety(v_array<float>& distribution, float min_prob)
 { float added_mass = 0.;
   for (uint32_t i = 0; i < distribution.size(); i++)
     if (distribution[i] > 0 && distribution[i] <= min_prob)
-    { added_mass += min_prob - distribution[i];
-      distribution[i] = min_prob;
-    }
+      { added_mass += min_prob - distribution[i];
+	distribution[i] = min_prob;
+      }
 
   float ratio = 1.f / (1.f + added_mass);
   if (ratio < 0.999)
-  { for (uint32_t i = 0; i < distribution.size(); i++)
-      if (distribution[i] > min_prob)
-        distribution[i] = distribution[i] * ratio;
-    safety(distribution, min_prob);
-  }
+    { for (uint32_t i = 0; i < distribution.size(); i++)
+	if (distribution[i] > min_prob)
+	  distribution[i] = distribution[i] * ratio;
+      safety(distribution, min_prob);
+    }
 }
 
 
@@ -215,20 +227,19 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec)
 { //Randomize over predictions from a base set of predictors
   //Use cost sensitive oracle to cover actions to form distribution.
 
-  uint32_t num_actions = data.cbcs.num_actions;
-  cb_to_cs&c = data.cbcs;
+  uint32_t num_actions = data.cbcs->num_actions;
 
   data.cs_label.costs.erase();
   for (uint32_t j = 0; j < num_actions; j++)
-  { COST_SENSITIVE::wclass wc;
-
-    //get cost prediction for this label
-    wc.x = FLT_MAX;
-    wc.class_index = j+1;
-    wc.partial_prediction = 0.;
-    wc.wap_value = 0.;
-    data.cs_label.costs.push_back(wc);
-  }
+    { COST_SENSITIVE::wclass wc;
+
+      //get cost prediction for this label
+      wc.x = FLT_MAX;
+      wc.class_index = j+1;
+      wc.partial_prediction = 0.;
+      wc.wap_value = 0.;
+      data.cs_label.costs.push_back(wc);
+    }
 
   float epsilon = data.cover->epsilon;
   size_t cover_size = data.cover->size;
@@ -253,41 +264,38 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec)
     //1. Compute loss vector
     data.cs_label.costs.erase();
     float norm = min_prob * num_actions;
-    gen_cs_example<false>(data.cbcs, ec, ld, data.cs_label);
+    gen_cs_example<false>(*data.cbcs, ec, ld, data.cs_label);
 
     ec.l.cs = data.second_cs_label;
     //2. Update functions
     for (size_t i = 0; i < cover_size; i++)
-    { //Create costs of each action based on online cover
-      for (uint32_t j = 0; j < num_actions; j++)
-      { float pseudo_cost = data.cs_label.costs[j].x - epsilon * min_prob / (max(probabilities[j], min_prob) / norm) + 1;
-        data.second_cs_label.costs[j].class_index = j+1;
-        data.second_cs_label.costs[j].x = pseudo_cost;
+      { //Create costs of each action based on online cover
+	for (uint32_t j = 0; j < num_actions; j++)
+	  { float pseudo_cost = data.cs_label.costs[j].x - epsilon * min_prob / (max(probabilities[j], min_prob) / norm) + 1;
+	    data.second_cs_label.costs[j].class_index = j+1;
+	    data.second_cs_label.costs[j].x = pseudo_cost;
+	  }
+	if (i != 0)
+	  data.cs->learn(ec,i+1);
+	if (probabilities[predictions[i] - 1] < min_prob)
+	  norm += max(0, additive_probability - (min_prob - probabilities[predictions[i] - 1]));
+	else
+	  norm += additive_probability;
+	probabilities[predictions[i] - 1] += additive_probability;
       }
-      if (i != 0)
-        data.cs->learn(ec,i+1);
-      if (probabilities[predictions[i] - 1] < min_prob)
-        norm += max(0, additive_probability - (min_prob - probabilities[predictions[i] - 1]));
-      else
-        norm += additive_probability;
-      probabilities[predictions[i] - 1] += additive_probability;
-    }
   }
 
   ec.l.cb = ld;
-  ec.pred.scalars.erase();
-  ec.pred.scalars.resize(data.cbcs.num_actions);
-  ec.pred.scalars[action] = data.recorder->probability;
-  //ALEKH: FIX ME!!
-  for (uint32_t i = 0; i < num_actions; i++)
-	  ec.pred.scalars.push_back(probabilities[i]);
+  v_array_set(data.preds, data.cbcs->num_actions, action-1, data.recorder->probability);
+  ec.pred.scalars = data.preds;
 }
 
 template<class T> inline void delete_it(T* p) { if (p != nullptr) delete p; }
 
 void finish(cb_explore& data)
-{ 
-  cb_to_cs& c = data.cbcs;
+{
+  data.preds.delete_v();
+  cb_to_cs& c = *data.cbcs;
   COST_SENSITIVE::cs_label.delete_label(&c.pred_scores);
   free(&c);
   CB::cb_label.delete_label(&data.cb_label);
@@ -301,9 +309,9 @@ void finish(cb_explore& data)
   delete_it(data.mwt_explorer);
   delete_it(data.recorder);
   if (data.cover != nullptr)
-  { data.cover->predictions.delete_v();
-    data.cover->probabilities.delete_v();
-  }
+    { data.cover->predictions.delete_v();
+      data.cover->probabilities.delete_v();
+    }
   delete_it(data.cover);
   if (data.policies.size() > 0)
     data.policies.~vector();
@@ -311,34 +319,38 @@ void finish(cb_explore& data)
 
 base_learner* cb_explore_setup(vw& all)
 { //parse and set arguments
+  cout<<"Setting up cb_explore\n";
   if (missing_option<size_t, true>(all, "cb_explore", "Online explore-exploit for a <k> action contextual bandit problem"))
     return nullptr;
+  cout<<"Still Setting up cb_explore\n";
   new_options(all, "CB_EXPLORE options")
-  ("first", po::value<size_t>(), "tau-first exploration")
-  ("epsilon",po::value<float>() ,"epsilon-greedy exploration")
-  ("bag",po::value<size_t>() ,"bagging-based exploration")
-  ("cover",po::value<size_t>() ,"bagging-based exploration");
+    ("first", po::value<size_t>(), "tau-first exploration")
+    ("epsilon",po::value<float>() ,"epsilon-greedy exploration")
+    ("bag",po::value<size_t>() ,"bagging-based exploration")
+    ("cover",po::value<size_t>() ,"bagging-based exploration");
   add_options(all);
 
   po::variables_map& vm = all.vm;
   cb_explore& data = calloc_or_throw<cb_explore>();
-  data.cbcs = calloc_or_throw<cb_to_cs>();
-  data.cbcs.num_actions = (uint32_t)vm["cb_explore"].as<size_t>();
-  uint32_t num_actions = data.cbcs.num_actions;
+  data.cbcs = &(calloc_or_throw<cb_to_cs>());
+  data.cbcs->num_actions = (uint32_t)vm["cb_explore"].as<size_t>();
+  uint32_t num_actions = data.cbcs->num_actions;
 
-  //ALEKH: All of the allocations need fixing
+  data.preds = v_init<float>();
 
   if (count(all.args.begin(), all.args.end(),"--cb") == 0)
-  { all.args.push_back("--cb");
-    stringstream ss;
-    ss << vm["cb_explore"].as<size_t>();
-    all.args.push_back(ss.str());
-  }
+    { all.args.push_back("--cb");
+      stringstream ss;
+      ss << vm["cb_explore"].as<size_t>();
+      all.args.push_back(ss.str());
+    }
 
-  if (count(all.args.begin(), all.args.end(), "--cb_type") == 0)
-	data.cbcs.cb_type = CB_TYPE_DR;
-  else
-	data.cbcs.cb_type = (size_t)vm["cb_type"].as<size_t>();
+  data.cbcs->cb_type = CB_TYPE_DR;
+  //ALEKH: Others TBD later
+  // if (count(all.args.begin(), all.args.end(), "--cb_type") == 0)
+  //   data.cbcs->cb_type = CB_TYPE_DR;
+  // else
+  //   data.cbcs->cb_type = (size_t)vm["cb_type"].as<size_t>();
 
   base_learner* base = setup_base(all);
 
@@ -346,43 +358,43 @@ base_learner* cb_explore_setup(vw& all)
   data.recorder = new vw_recorder();
   data.mwt_explorer = new MwtExplorer<vw_context>("vw", *data.recorder);
   if (vm.count("cover"))
-  { size_t cover = (uint32_t)vm["cover"].as<size_t>();
-    data.cs = all.cost_sensitive;
-    data.second_cs_label.costs.resize(num_actions);
-    data.second_cs_label.costs.end() = data.second_cs_label.costs.begin()+num_actions;
-    float epsilon = 0.05f;
-    if (vm.count("epsilon"))
-      epsilon = vm["epsilon"].as<float>();
-    data.cover = new vw_cover(epsilon, cover, (u32)num_actions);
-    data.generic_explorer = new GenericExplorer<vw_context>(*data.cover, (u32)num_actions);
-    l = &init_multiclass_learner(&data, base, predict_or_learn_cover<true>,
-                                 predict_or_learn_cover<false>, all.p, cover + 1);
-  }
+    { size_t cover = (uint32_t)vm["cover"].as<size_t>();
+      data.cs = all.cost_sensitive;
+      data.second_cs_label.costs.resize(num_actions);
+      data.second_cs_label.costs.end() = data.second_cs_label.costs.begin()+num_actions;
+      float epsilon = 0.05f;
+      if (vm.count("epsilon"))
+	epsilon = vm["epsilon"].as<float>();
+      data.cover = new vw_cover(epsilon, cover, (u32)num_actions);
+      data.generic_explorer = new GenericExplorer<vw_context>(*data.cover, (u32)num_actions);
+      l = &init_multiclass_learner(&data, base, predict_or_learn_cover<true>,
+				   predict_or_learn_cover<false>, all.p, cover + 1);
+    }
   else if (vm.count("bag"))
-  { size_t bags = (uint32_t)vm["bag"].as<size_t>();
-    for (size_t i = 0; i < bags; i++)
-      data.policies.push_back(unique_ptr<IPolicy<vw_context>>(new vw_policy(i)));
-    data.bootstrap_explorer = new BootstrapExplorer<vw_context>(data.policies, (u32)num_actions);
-    l = &init_multiclass_learner(&data, base, predict_or_learn_bag<true>,
-                                 predict_or_learn_bag<false>, all.p, bags);
-  }
+    { size_t bags = (uint32_t)vm["bag"].as<size_t>();
+      for (size_t i = 0; i < bags; i++)
+	data.policies.push_back(unique_ptr<IPolicy<vw_context>>(new vw_policy(i)));
+      data.bootstrap_explorer = new BootstrapExplorer<vw_context>(data.policies, (u32)num_actions);
+      l = &init_multiclass_learner(&data, base, predict_or_learn_bag<true>,
+				   predict_or_learn_bag<false>, all.p, bags);
+    }
   else if (vm.count("first") )
-  { uint32_t tau = (uint32_t)vm["first"].as<size_t>();
-    data.policy = new vw_policy(0);
-    data.tau_explorer = new TauFirstExplorer<vw_context>(*data.policy, (u32)tau, (u32)num_actions);
-    l = &init_multiclass_learner(&data, base, predict_or_learn_first<true>,
-                                 predict_or_learn_first<false>, all.p, 1);
-  }
+    { uint32_t tau = (uint32_t)vm["first"].as<size_t>();
+      data.policy = new vw_policy(0);
+      data.tau_explorer = new TauFirstExplorer<vw_context>(*data.policy, (u32)tau, (u32)num_actions);
+      l = &init_multiclass_learner(&data, base, predict_or_learn_first<true>,
+				   predict_or_learn_first<false>, all.p, 1);
+    }
   else
-  { float epsilon = 0.05f;
-    if (vm.count("epsilon"))
-      epsilon = vm["epsilon"].as<float>();
-    data.policy = new vw_policy(0);
-    data.greedy_explorer = new EpsilonGreedyExplorer<vw_context>(*data.policy, epsilon, (u32)num_actions);
-    l = &init_multiclass_learner(&data, base, predict_or_learn_greedy<true>,
-                                 predict_or_learn_greedy<false>, all.p, 1);
-  }
-  data.cbcs.scorer = all.scorer;
+    { float epsilon = 0.05f;
+      if (vm.count("epsilon"))
+	epsilon = vm["epsilon"].as<float>();
+      data.policy = new vw_policy(0);
+      data.greedy_explorer = new EpsilonGreedyExplorer<vw_context>(*data.policy, epsilon, (u32)num_actions);
+      l = &init_multiclass_learner(&data, base, predict_or_learn_greedy<true>,
+				   predict_or_learn_greedy<false>, all.p, 1);
+    }
+  data.cbcs->scorer = all.scorer;
   l->set_finish(finish);
 
   return make_base(*l);
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 93b59b40c11..576220871be 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -21,15 +21,15 @@ struct vw_context
 
 struct cbify
 {
-	CB::label cb_label;
+  CB::label cb_label;
 };
 
 float loss(uint32_t label, uint32_t final_prediction)
 {
-	if (label != final_prediction)
-		return 1.;
-	else
-		return 0.;
+  if (label != final_prediction)
+    return 1.;
+  else
+    return 0.;
 }
 
 
@@ -40,33 +40,38 @@ void finish(cbify& data)
 }
 
 template <bool is_learn>
-void predict_or_learn(cbify& c, base_learner& base, example& ec) 
+void predict_or_learn(cbify& data, base_learner& base, example& ec) 
 {
 
-	//ALEKH: Ideally, we will be able to return the probability from base.predict, perhaps using the probs field in ec.pred.
-	//Store the multiclass input label
-	MULTICLASS::label_t ld = ec.l.multi;
-
-	//Call the cb_explore algorithm. It returns a vector with one non-zero entry denoting the probability of the chosen action
-	v_array<float> pred = base.predict(ec);
-
-	//Create a new cb label
-	data.cb_label.costs.erase();
-	ec.l.cb = data.cb_label;
-
-	CB::cb_class cl;
-	uin32_t action = 0;
-	for (uint32_t i = 0; i < pred.size;i++) 
-		if (pred[i] > 0.)
-		{
-			cl.action = i;
-			cl.probability = pred[i];
-		}
-	cl.cost = loss(ld.label, cl.action);
-	data.cb_label.costs.push_back(cl);
-	ec.l.cb = data.cb_label;
-	base.learn(ec);
-	ec.l.multi = ld;
+  //ALEKH: Ideally, we will be able to return the probability from base.predict, perhaps using the probs field in ec.pred.
+  //Store the multiclass input label
+  MULTICLASS::label_t ld = ec.l.multi;
+  //Create a new cb label
+  data.cb_label.costs.erase();
+  ec.l.cb = data.cb_label;
+  
+  //Call the cb_explore algorithm. It returns a vector with one non-zero entry denoting the probability of the chosen action
+  base.predict(ec);
+  v_array<float> pred = ec.pred.scalars;
+
+  CB::cb_class cl;
+  for (uint32_t i = 0; i < pred.size();i++)  {
+    if (pred[i] > 0.)
+      {
+	cl.action = i+1;
+	cl.probability = pred[i];
+      }
+  }
+
+  if(!cl.action)
+    THROW("No action with non-zero probability found!");
+  uint32_t action = cl.action;
+  cl.cost = loss(ld.label, cl.action);
+  data.cb_label.costs.push_back(cl);
+  ec.l.cb = data.cb_label;
+  base.learn(ec);
+  ec.l.multi = ld;
+  ec.pred.multiclass = action;
 }
 
 base_learner* cbify_setup(vw& all)
@@ -86,6 +91,7 @@ base_learner* cbify_setup(vw& all)
   base_learner* base = setup_base(all);
 
   learner<cbify>* l;
+  l = &init_learner(&data, base, predict_or_learn<true>, predict_or_learn<false>);
   l->set_finish(finish);
 
   return make_base(*l);
diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc
index acd4ab10caf..848dfaa0cc0 100755
--- a/vowpalwabbit/gen_cs_example.cc
+++ b/vowpalwabbit/gen_cs_example.cc
@@ -1,7 +1,7 @@
 /*
-Copyright (c) by respective owners including Yahoo!, Microsoft, and
-individual contributors. All rights reserved.  Released under a BSD (revised)
-license as described in the file LICENSE.
+  Copyright (c) by respective owners including Yahoo!, Microsoft, and
+  individual contributors. All rights reserved.  Released under a BSD (revised)
+  license as described in the file LICENSE.
 */
 #include <float.h>
 
@@ -17,70 +17,70 @@ using namespace CB;
 
 inline bool observed_cost(cb_class* cl)
 { //cost observed for this action if it has non zero probability and cost != FLT_MAX
-	return (cl != nullptr && cl->cost != FLT_MAX && cl->probability > .0);
+  return (cl != nullptr && cl->cost != FLT_MAX && cl->probability > .0);
 }
 
 cb_class* get_observed_cost(CB::label& ld)
 {
-	for (auto& cl : ld.costs)
-		if (observed_cost(&cl))
-			return &cl;
-	return nullptr;
+  for (auto& cl : ld.costs)
+    if (observed_cost(&cl))
+      return &cl;
+  return nullptr;
 }
 
 void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld)
 { //this implements the inverse propensity score method, where cost are importance weighted by the probability of the chosen action
-	//generate cost-sensitive example
-	cs_ld.costs.erase();
-	if (ld.costs.size() == 1)   //this is a typical example where we can perform all actions
-	{ //in this case generate cost-sensitive example with all actions
-		for (uint32_t i = 1; i <= c.num_actions; i++)
-		{
-			COST_SENSITIVE::wclass wc;
-			wc.wap_value = 0.;
-			wc.x = 0.;
-			wc.class_index = i;
-			wc.partial_prediction = 0.;
-			wc.wap_value = 0.;
-			if (c.known_cost != nullptr && i == c.known_cost->action)
-			{
-				wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise
-				//ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
-				//update the loss of this regressor
-				c.nb_ex_regressors++;
-				c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
-				c.last_pred_reg = 0;
-				c.last_correct_cost = c.known_cost->cost;
-			}
+  //generate cost-sensitive example
+  cs_ld.costs.erase();
+  if (ld.costs.size() == 1)   //this is a typical example where we can perform all actions
+    { //in this case generate cost-sensitive example with all actions
+      for (uint32_t i = 1; i <= c.num_actions; i++)
+	{
+	  COST_SENSITIVE::wclass wc;
+	  wc.wap_value = 0.;
+	  wc.x = 0.;
+	  wc.class_index = i;
+	  wc.partial_prediction = 0.;
+	  wc.wap_value = 0.;
+	  if (c.known_cost != nullptr && i == c.known_cost->action)
+	    {
+	      wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise
+	      //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
+	      //update the loss of this regressor
+	      c.nb_ex_regressors++;
+	      c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
+	      c.last_pred_reg = 0;
+	      c.last_correct_cost = c.known_cost->cost;
+	    }
 
-			cs_ld.costs.push_back(wc);
-		}
+	  cs_ld.costs.push_back(wc);
 	}
-	else   //this is an example where we can only perform a subset of the actions
-	{ //in this case generate cost-sensitive example with only allowed actions
-		for (auto& cl : ld.costs)
-		{
-			COST_SENSITIVE::wclass wc;
-			wc.wap_value = 0.;
-			wc.x = 0.;
-			wc.class_index = cl.action;
-			wc.partial_prediction = 0.;
-			wc.wap_value = 0.;
-			if (c.known_cost != nullptr && cl.action == c.known_cost->action)
-			{
-				wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise
+    }
+  else   //this is an example where we can only perform a subset of the actions
+    { //in this case generate cost-sensitive example with only allowed actions
+      for (auto& cl : ld.costs)
+	{
+	  COST_SENSITIVE::wclass wc;
+	  wc.wap_value = 0.;
+	  wc.x = 0.;
+	  wc.class_index = cl.action;
+	  wc.partial_prediction = 0.;
+	  wc.wap_value = 0.;
+	  if (c.known_cost != nullptr && cl.action == c.known_cost->action)
+	    {
+	      wc.x = c.known_cost->cost / c.known_cost->probability; //use importance weighted cost for observed action, 0 otherwise
 
-				//ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
-				//update the loss of this regressor
-				c.nb_ex_regressors++;
-				c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
-				c.last_pred_reg = 0;
-				c.last_correct_cost = c.known_cost->cost;
-			}
+	      //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
+	      //update the loss of this regressor
+	      c.nb_ex_regressors++;
+	      c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
+	      c.last_pred_reg = 0;
+	      c.last_correct_cost = c.known_cost->cost;
+	    }
 
-			cs_ld.costs.push_back(wc);
-		}
+	  cs_ld.costs.push_back(wc);
 	}
+    }
 
 }
 
diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h
index b9a60638c09..875491b2ad6 100755
--- a/vowpalwabbit/gen_cs_example.h
+++ b/vowpalwabbit/gen_cs_example.h
@@ -1,7 +1,7 @@
 /*
-Copyright (c) by respective owners including Yahoo!, Microsoft, and
-individual contributors. All rights reserved.  Released under a BSD (revised)
-license as described in the file LICENSE.
+  Copyright (c) by respective owners including Yahoo!, Microsoft, and
+  individual contributors. All rights reserved.  Released under a BSD (revised)
+  license as described in the file LICENSE.
 */
 #include <float.h>
 
@@ -14,16 +14,16 @@ using namespace CB;
 
 struct cb_to_cs
 {
-	size_t cb_type;
-	uint32_t num_actions;	
-	COST_SENSITIVE::label pred_scores;
-	LEARNER::base_learner* scorer;
-	float avg_loss_regressors;
-	size_t nb_ex_regressors;
-	float last_pred_reg;
-	float last_correct_cost;
-
-	cb_class* known_cost;
+  size_t cb_type;
+  uint32_t num_actions;	
+  COST_SENSITIVE::label pred_scores;
+  LEARNER::base_learner* scorer;
+  float avg_loss_regressors;
+  size_t nb_ex_regressors;
+  float last_pred_reg;
+  float last_correct_cost;
+
+  cb_class* known_cost;
 };
 
 cb_class* get_observed_cost(CB::label& ld);
@@ -33,147 +33,152 @@ void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld
 template <bool is_learn> 
 void gen_cs_example_dm(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld)
 { //this implements the direct estimation method, where costs are directly specified by the learned regressor.
-	CB::label ld = ec.l.cb;
-
-	float min = FLT_MAX;
-	uint32_t argmin = 1;
-	//generate cost sensitive example
-	cs_ld.costs.erase();
-	c.pred_scores.costs.erase();
-
-	if (ld.costs.size() == 1)   //this is a typical example where we can perform all actions
-	{ //in this case generate cost-sensitive example with all actions
-		for (uint32_t i = 1; i <= c.num_actions; i++)
-		{
-			COST_SENSITIVE::wclass wc;
-			wc.wap_value = 0.;
-
-			//get cost prediction for this action
-			wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, i, 0);
-			if (wc.x < min)
-			{
-				min = wc.x;
-				argmin = i;
-			}
-
-			wc.class_index = i;
-			wc.partial_prediction = 0.;
-			wc.wap_value = 0.;
-
-			c.pred_scores.costs.push_back(wc);
-
-			if (c.known_cost != nullptr && c.known_cost->action == i)
-			{
-				c.nb_ex_regressors++;
-				c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors);
-				c.last_pred_reg = wc.x;
-				c.last_correct_cost = c.known_cost->cost;
-			}
-
-			cs_ld.costs.push_back(wc);
-		}
+  CB::label ld = ec.l.cb;
+
+  float min = FLT_MAX;
+  uint32_t argmin = 1;
+  //generate cost sensitive example
+  cs_ld.costs.erase();
+  c.pred_scores.costs.erase();
+
+  if (ld.costs.size() == 1)   //this is a typical example where we can perform all actions
+    { //in this case generate cost-sensitive example with all actions
+      for (uint32_t i = 1; i <= c.num_actions; i++)
+	{
+	  COST_SENSITIVE::wclass wc;
+	  wc.wap_value = 0.;
+
+	  //get cost prediction for this action
+	  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, i, 0);
+	  if (wc.x < min)
+	    {
+	      min = wc.x;
+	      argmin = i;
+	    }
+
+	  wc.class_index = i;
+	  wc.partial_prediction = 0.;
+	  wc.wap_value = 0.;
+
+	  c.pred_scores.costs.push_back(wc);
+
+	  if (c.known_cost != nullptr && c.known_cost->action == i)
+	    {
+	      c.nb_ex_regressors++;
+	      c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors);
+	      c.last_pred_reg = wc.x;
+	      c.last_correct_cost = c.known_cost->cost;
+	    }
+
+	  cs_ld.costs.push_back(wc);
 	}
-	else   //this is an example where we can only perform a subset of the actions
-	{ //in this case generate cost-sensitive example with only allowed actions
-		for (auto& cl : ld.costs)
-		{
-			COST_SENSITIVE::wclass wc;
-			wc.wap_value = 0.;
-
-			//get cost prediction for this action
-			wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, cl.action, 0);
-			if (wc.x < min || (wc.x == min && cl.action < argmin))
-			{
-				min = wc.x;
-				argmin = cl.action;
-			}
-
-			wc.class_index = cl.action;
-			wc.partial_prediction = 0.;
-			wc.wap_value = 0.;
-
-			c.pred_scores.costs.push_back(wc);
-
-			if (c.known_cost != nullptr && c.known_cost->action == cl.action)
-			{
-				c.nb_ex_regressors++;
-				c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors);
-				c.last_pred_reg = wc.x;
-				c.last_correct_cost = c.known_cost->cost;
-			}
-
-			cs_ld.costs.push_back(wc);
-		}
+    }
+  else   //this is an example where we can only perform a subset of the actions
+    { //in this case generate cost-sensitive example with only allowed actions
+      for (auto& cl : ld.costs)
+	{
+	  COST_SENSITIVE::wclass wc;
+	  wc.wap_value = 0.;
+
+	  //get cost prediction for this action
+	  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, cl.action, 0);
+	  if (wc.x < min || (wc.x == min && cl.action < argmin))
+	    {
+	      min = wc.x;
+	      argmin = cl.action;
+	    }
+
+	  wc.class_index = cl.action;
+	  wc.partial_prediction = 0.;
+	  wc.wap_value = 0.;
+
+	  c.pred_scores.costs.push_back(wc);
+
+	  if (c.known_cost != nullptr && c.known_cost->action == cl.action)
+	    {
+	      c.nb_ex_regressors++;
+	      c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors);
+	      c.last_pred_reg = wc.x;
+	      c.last_correct_cost = c.known_cost->cost;
+	    }
+
+	  cs_ld.costs.push_back(wc);
 	}
+    }
 
-	ec.pred.multiclass = argmin;
+  ec.pred.multiclass = argmin;
 }
 
 template <bool is_learn> 
 void gen_cs_label(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld, uint32_t label)
 {
-	COST_SENSITIVE::wclass wc;
-	wc.wap_value = 0.;
-
-	//get cost prediction for this label
-	wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, label, c.num_actions);
-	wc.class_index = label;
-	wc.partial_prediction = 0.;
-	wc.wap_value = 0.;
-
-	c.pred_scores.costs.push_back(wc);
-
-	//add correction if we observed cost for this action and regressor is wrong
-	if (c.known_cost != nullptr && c.known_cost->action == label)
-	{
-		c.nb_ex_regressors++;
-		c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors);
-		c.last_pred_reg = wc.x;
-		c.last_correct_cost = c.known_cost->cost;
-		wc.x += (c.known_cost->cost - wc.x) / c.known_cost->probability;
-	}
-	//cout<<"Prediction = "<<wc.x<<" ";
-	cs_ld.costs.push_back(wc);
+  COST_SENSITIVE::wclass wc;
+  wc.wap_value = 0.;
+
+  //get cost prediction for this label
+  wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, label, c.num_actions);
+  wc.class_index = label;
+  wc.partial_prediction = 0.;
+  wc.wap_value = 0.;
+
+  c.pred_scores.costs.push_back(wc);
+  cout<<"Prediction = "<<wc.x<<" ";
+  //add correction if we observed cost for this action and regressor is wrong
+  if (c.known_cost != nullptr && c.known_cost->action == label)
+    {
+      c.nb_ex_regressors++;
+      c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost - wc.x)*(c.known_cost->cost - wc.x) - c.avg_loss_regressors);
+      c.last_pred_reg = wc.x;
+      c.last_correct_cost = c.known_cost->cost;
+      wc.x += (c.known_cost->cost - wc.x) / c.known_cost->probability;
+    }
+
+  cs_ld.costs.push_back(wc);
 
 }
 
 template <bool is_learn> 
 void gen_cs_example_dr(cb_to_cs& c, example& ec, CB::label& ld, COST_SENSITIVE::label& cs_ld)
 { //this implements the doubly robust method
-	cs_ld.costs.erase();
-	c.pred_scores.costs.erase();
-	if (ld.costs.size() == 0)//a test example
-		for (uint32_t i = 1; i <= c.num_actions; i++)
-		{ //Explicit declaration for a weak compiler.
-			COST_SENSITIVE::wclass c = { FLT_MAX, i, 0., 0. };
-			cs_ld.costs.push_back(c);
-		}
-	else if (ld.costs.size() == 1) //this is a typical example where we can perform all actions
-		//in this case generate cost-sensitive example with all actions
-		for (uint32_t i = 1; i <= c.num_actions; i++)
-			gen_cs_label<is_learn>(c, ec, cs_ld, i);
-	else  //this is an example where we can only perform a subset of the actions
-		//in this case generate cost-sensitive example with only allowed actions
-		for (auto& cl : ld.costs)
-			gen_cs_label<is_learn>(c, ec, cs_ld, cl.action);
-	//cout<<endl;
+  cs_ld.costs.erase();
+  c.pred_scores.costs.erase();
+  if(c.known_cost)
+    cout<<"Learn = "<<is_learn<<" known = "<<c.known_cost->cost<<" "<<c.known_cost->action<<" "<<c.known_cost->probability<<endl;
+  if (ld.costs.size() == 0)//a test example
+    for (uint32_t i = 1; i <= c.num_actions; i++)
+      { //Explicit declaration for a weak compiler.
+	COST_SENSITIVE::wclass c = { FLT_MAX, i, 0., 0. };
+	cs_ld.costs.push_back(c);
+      }
+  else if (ld.costs.size() == 1) //this is a typical example where we can perform all actions
+    //in this case generate cost-sensitive example with all actions
+    for (uint32_t i = 1; i <= c.num_actions; i++) {
+      gen_cs_label<is_learn>(c, ec, cs_ld, i);
+      cout<<cs_ld.costs[i-1].x<<" ";
+    }
+  else  //this is an example where we can only perform a subset of the actions
+    //in this case generate cost-sensitive example with only allowed actions
+    for (auto& cl : ld.costs)
+      gen_cs_label<is_learn>(c, ec, cs_ld, cl.action);
+  cout<<endl;
 }
 
 template <bool is_learn>
 void gen_cs_example(cb_to_cs& c, example& ec, CB::label& ld, COST_SENSITIVE::label& cs_ld)
 {
-	switch (c.cb_type)
-	{
-	case CB_TYPE_IPS:
-		gen_cs_example_ips(c, ld, cs_ld);
-		break;
-	case CB_TYPE_DM:
-		gen_cs_example_dm<is_learn>(c, ec, cs_ld);
-		break;
-	case CB_TYPE_DR:
-		gen_cs_example_dr<is_learn>(c, ec, ld, cs_ld);
-	default:
-		THROW("Unknown cb_type specified for contextual bandit learning: " << c.cb_type);
-	}
+  switch (c.cb_type)
+    {
+    case CB_TYPE_IPS:
+      gen_cs_example_ips(c, ld, cs_ld);
+      break;
+    case CB_TYPE_DM:
+      gen_cs_example_dm<is_learn>(c, ec, cs_ld);
+      break;
+    case CB_TYPE_DR:
+      gen_cs_example_dr<is_learn>(c, ec, ld, cs_ld);
+      break;
+    default:
+      THROW("Unknown cb_type specified for contextual bandit learning: " << c.cb_type);
+    }
 }
 
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index 87f596ba956..36439ce2e0a 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -32,6 +32,7 @@ license as described in the file LICENSE.
 #include "csoaa.h"
 #include "cb_algs.h"
 #include "cb_adf.h"
+#include "cb_explore.h"
 #include "mwt.h"
 #include "confidence.h"
 #include "scorer.h"
@@ -1080,6 +1081,7 @@ void parse_reductions(vw& all)
   all.reduction_stack.push_back(cb_algs_setup);
   all.reduction_stack.push_back(cb_adf_setup);
   all.reduction_stack.push_back(mwt_setup);
+  all.reduction_stack.push_back(cb_explore_setup);
   all.reduction_stack.push_back(cbify_setup);
 
   all.reduction_stack.push_back(ExpReplay::expreplay_setup<'c', COST_SENSITIVE::cs_label>);