Skip to content

Commit

Permalink
FIX boost implementation + memory allocation problems
Browse files Browse the repository at this point in the history
  • Loading branch information
jihelhere committed Dec 20, 2011
1 parent f041281 commit aed2214
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 81 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ AC_ARG_ENABLE(debug, [--enable-debug=[no/yes] turn on debugging
[default=$debug_default]],, enable_debug=$debug_default)

if test "x$enable_debug" = "xyes"; then
CXXFLAGS="$CXXFLAGS -g -pg -DDEBUG -Wall -Wextra -pedantic -std=c++0x"
CXXFLAGS="$CXXFLAGS -g -DDEBUG -Wall -Wextra -pedantic -std=c++0x"
AC_MSG_RESULT(yes)
else
CXXFLAGS="$CXXFLAGS -O6 -DNDEBUG -Wall -Wextra -pedantic -std=c++0x -ffast-math"
Expand Down
47 changes: 20 additions & 27 deletions src/Example.hh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct Feature {
unsigned id;
double value;
Feature(int _id, double _value) : id(_id), value(_value) {};
bool operator<(const Feature &peer) const {
inline bool operator<(const Feature &peer) const {
return value < peer.value;
}
};
Expand All @@ -22,45 +22,38 @@ struct Example {
double loss;
double score;
std::vector<Feature> features;

Example() : loss(0.0), score(0.0), features() {
features.reserve(5000000);
// features.reserve(5000000);
}


Example(char* input) : loss(0.0), score(0.0), features() {
features.reserve(5000000);
// features.reserve(5000000);
load(input);
}

// load an example from a line 'loss feature_id:value .... feature_id:value' # comment
// no error checking
void load(char* line) {
assert(line);
//fprintf(stderr, "%s\n", line);

// features.clear();
// char *input = strdup(line);
// char * save = input;
char * input = line;
char *token = NULL;
// char *comment = strchr(input, '#');
// if(comment != NULL) *comment = '\0'; // skip comments
char * input = line;
char *token = NULL;

//TODO: uncomment this
// char *comment = strchr(input, '#');
// if(comment != NULL) *comment = '\0'; // skip comments

token = strsep(&input, " \t"); // read loss
loss = strtod(token, NULL);
for(;(token = strsep(&input, " \t\n")) && *token != '\0' ;) {
// if(!strcmp(token,"")) continue;
char* value = strrchr(token, ':');
// if(value != NULL) { // read feature_id:value
*value = '\0';
double value_as_double = strtod(value + 1, NULL);
// int feature_id = strtol(token, NULL, 10);
int feature_id = atoi(token);
features.emplace_back(ranker::Feature(feature_id, value_as_double));
// features.emplace_back(feature_id, value_as_double);
// }
char* value = strrchr(token, ':');

*value = '\0';
double value_as_double = strtod(value + 1, NULL);
int feature_id = strtol(token, NULL, 10);
features.emplace_back(feature_id, value_as_double);

}
// free(save);
}

double compute_score(const std::vector<double>& weights) {
Expand All @@ -75,9 +68,9 @@ struct Example {
}

// for sorting examples
struct example_ptr_desc_score_order
struct example_ptr_desc_score_order
{
inline bool operator()(const Example* __restrict__ i, const Example* __restrict__ j) const
inline bool operator()(const Example* __restrict__ i, const Example* __restrict__ j) const
{return (i->score > j->score);}
};

Expand Down
51 changes: 29 additions & 22 deletions src/ExampleMaker.hh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ namespace threadns = std;
#include "Example.hh"

namespace ranker {

typedef threadns::unique_lock<threadns::mutex> lock_type;

struct ExampleMaker
{

Expand All @@ -24,7 +27,7 @@ namespace ranker {
std::vector<double>& weights;
std::vector<Example*>& examples;

ExampleMaker(std::vector<char*> &l, std::vector<double>& w, std::vector<Example*>& e)
ExampleMaker(std::vector<char*> &l, std::vector<double>& w, std::vector<Example*>& e)
: my_thread(), lines(l), weights(w), examples(e) {};

~ExampleMaker() {}
Expand All @@ -33,41 +36,45 @@ namespace ranker {
my_thread.join();
}

void create_examples(std::mutex* mlines, int* processed_lines, int* finished, std::condition_variable* cond_process)
void create_examples(threadns::mutex* mlines, int* processed_lines, int* finished, threadns::condition_variable* cond_process, threadns::mutex* mutex_examples)
{
while(1) {
int index;
char * string = NULL;
char * string;

lock_type lock(*mlines);

std::unique_lock<std::mutex> lock(*mlines);

while(((unsigned) *processed_lines >= lines.size()) && !*finished) {
cond_process->wait(lock);
}

if(*finished && ((unsigned) *processed_lines == lines.size())) break;
while(((unsigned) *processed_lines == lines.size()) && !*finished) {
cond_process->wait(lock);
}

if(*finished && ((unsigned) *processed_lines == lines.size())) { lock.unlock(); break;}

index = (*processed_lines)++;
examples.resize(lines.size(),NULL);
string = lines[index];
lock.unlock();

lock.unlock();

Example * e = new Example(string);
e-> compute_score(weights);

lock.lock();
e->compute_score(weights);


lock_type lock2(*mutex_examples);
if (examples.size() < lines.size())
examples.resize(lines.size(),NULL);
examples[index] = e;
delete lines[index];
lock.unlock();

lock2.unlock();

delete lines[index];

}
}

void start(std::mutex* mlines, int* processed_lines, int* finished, std::condition_variable* cond_process)
void start(threadns::mutex* mlines, int* processed_lines, int* finished, threadns::condition_variable* cond_process, threadns::mutex* mutex_examples)
{
my_thread = threadns::thread(&ExampleMaker::create_examples, this,
mlines, processed_lines, finished, cond_process);
mlines, processed_lines, finished, cond_process,
mutex_examples);
}


Expand Down
63 changes: 32 additions & 31 deletions src/ranker-learn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#include <getopt.h>
#include <fcntl.h>

//#include <thread>

#include "utils.h"
#include "Example.hh"
#include "ExampleMaker.hh"
Expand All @@ -30,8 +28,9 @@ static int verbose_flag = 0;

int processed_lines = 0;
int finished = 0;
std::mutex mutex_processed_lines;
std::condition_variable cond_process;
threadns::mutex mutex_processed_lines;
threadns::mutex mutex_examples;
threadns::condition_variable cond_process;


void print_help_message(char *program_name)
Expand All @@ -46,7 +45,7 @@ void print_help_message(char *program_name)
fprintf(stderr, " --threads,-j <int> : nb of threads (default is %d)\n", NUM_THREADS);
fprintf(stderr, " --filter,-f <command> : filter input through command (\"%%s\" is replaced by the filename)\n");
fprintf(stderr, " --examples,-e <int> : set number of examples (if known in advance)\n");

fprintf(stderr, " -help,-h : print this message\n");
}

Expand Down Expand Up @@ -131,21 +130,21 @@ struct file_reader
file_reader() : buffer(NULL), buffer_size(0) {}

~file_reader() {free(buffer);}

void
process_file(FILE** fp, std::vector<char*>* lines)
{
{
while(0 < read_line(&buffer, &buffer_size, *fp)) {
if(buffer[0] != '\n') {
mutex_processed_lines.lock();

mutex_processed_lines.lock();
lines->push_back(strdup(buffer));
cond_process.notify_all();
// fprintf(stderr, "after notify\n");
mutex_processed_lines.unlock();
cond_process.notify_all();
// fprintf(stderr, "after notify, size lines: %ld\n", lines->size());
mutex_processed_lines.unlock();
}
else break;

}
finished = 1;
cond_process.notify_all();
Expand All @@ -166,7 +165,7 @@ double process(const char* filename, const char* filter, std::vector<double> &we
}

std::vector<char*> lines;
std::vector<ranker::Example*> examples;
std::vector<ranker::Example*> examples;
std::vector<ranker::ExampleMaker*> exampleMakers(num_threads, NULL);

for(int i = 0; i < num_threads; ++i) {
Expand All @@ -180,17 +179,19 @@ double process(const char* filename, const char* filter, std::vector<double> &we
processed_lines = 0;
finished = 0;

threadns::thread thread_read(&file_reader::process_file, &fr, &fp, &lines);

for(int i = 0; i < num_threads; ++i) {
exampleMakers[i]->start(&mutex_processed_lines, &processed_lines, &finished, &cond_process);
// fprintf(stderr, "starting examplemaker %d\n", i);
exampleMakers[i]->start(&mutex_processed_lines, &processed_lines, &finished, &cond_process, &mutex_examples);
}

thread_read.join();
threadns::thread thread_read(&file_reader::process_file, &fr, &fp, &lines);

for(auto i = exampleMakers.begin(); i != exampleMakers.end(); ++i)
for(auto i = exampleMakers.begin(); i != exampleMakers.end(); ++i)
(*i)->join();


thread_read.join();


if(examples.empty())
continue;

Expand All @@ -205,7 +206,7 @@ double process(const char* filename, const char* filter, std::vector<double> &we
// sort the examples by score
sort(examples.begin(), examples.end(), ranker::Example::example_ptr_desc_score_order());
avg_loss += examples[0]->loss;

for(unsigned int i = 0; i < examples.size(); ++i) {
if(examples[i]->score > oracle->score || (examples[i]->score == oracle->score && examples[i]->loss > oracle->loss)) {
++errors;
Expand All @@ -215,33 +216,33 @@ double process(const char* filename, const char* filter, std::vector<double> &we
}

++num;
if(num % 10 == 0)
if(num % 10 == 0)
fprintf(stderr, "\r%d %d %f %f/%f", num, errors, (double)errors/num, avg_loss / num, one_best_loss / num);

// training -> update
if(alter_model) {
mira.update(oracle, num);

// std::for_each(examples.begin(),examples.end(), mira);
// std::for_each(examples.begin(),examples.begin()+1, mira);
mira(examples[0]);

//fprintf(stderr, "after mira\n");
}

// reset data structures for next sentence


// reset data structures for next sentence


for(unsigned i = 0; i < examples.size(); ++i) {
delete examples[i];
}

lines.clear();
examples.clear();
}

fprintf(stderr, "\r%d %d %f %f/%f\n", num, errors, (double)errors/num, avg_loss / num, one_best_loss / num);

fclose(fp);
if(filter != NULL) {
int status;
Expand Down Expand Up @@ -371,7 +372,7 @@ int main(int argc, char** argv) {
}


if(num_examples <= 0)
if(num_examples <= 0)
num_examples = compute_num_examples(trainset, filter);

std::vector<double> weights;
Expand Down

0 comments on commit aed2214

Please sign in to comment.