Skip to content

Commit

Permalink
started api; implemented parallel predictor
Browse files Browse the repository at this point in the history
  • Loading branch information
Benoit Favre committed Jul 25, 2011
1 parent 8bd5146 commit 2169916
Show file tree
Hide file tree
Showing 15 changed files with 393 additions and 8 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@

ACLOCAL_AMFLAGS=-I m4
SUBDIRS= src
4 changes: 4 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ AC_CONFIG_SRCDIR([src/ranker-learn-zcat.cc])
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
AC_CONFIG_HEADERS([config.h])

# libtool
LT_INIT
AC_CONFIG_MACRO_DIR([m4])

# Checks for programs.
CXXFLAGS=""
AC_PROG_CXX
Expand Down
6 changes: 6 additions & 0 deletions src/Example.hh
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#pragma once

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <vector>
#include <string>

namespace ranker {

struct feature {
unsigned id;
double value;
Expand Down Expand Up @@ -73,3 +78,4 @@ struct example {

};

}
2 changes: 2 additions & 0 deletions src/ExampleMaker.hh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "Example.hh"

namespace ranker {
struct example_maker
{
std::thread my_thread;
Expand Down Expand Up @@ -47,6 +48,7 @@ struct example_maker
}

};
};



Expand Down
15 changes: 13 additions & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
AM_CXXFLAGS=
CXXFLAGS=-O2 -g -Wall -Wextra -std=c++0x -ffast-math -pedantic

bin_PROGRAMS = ranker-learn-zcat ranker-predict count count_by_sentence drop_common_features transform_logprob filter filter_and_map test_utils split merge-models ranker-learn-iteration
bin_PROGRAMS = ranker-learn-zcat ranker-predict count count_by_sentence drop_common_features transform_logprob filter filter_and_map test_utils split merge-models ranker-learn-iteration ranker_main ranker_parallel
lib_LTLIBRARIES = libranker-0.1.la
libranker_0_1_la_SOURCES = ranker.cc
libranker_0_1_la_CXXFLAGS = $(CXXFLAGS)

ranker_parallel_SOURCES = ranker_parallel.cc ranker.cc
ranker_parallel_CXXFLAGS = $(CXXFLAGS)
ranker_parallel_LDFLAGS = -lpthread

ranker_main_SOURCES = ranker_main.cc ranker.cc
ranker_main_CXXFLAGS = $(CXXFLAGS)

ranker_learn_iteration_SOURCES = ranker-learn-iteration.cc utils.c
ranker_learn_iteration_CXXFLAGS = -O2 -g -Wall -Wextra -std=c++0x -ffast-math -pedantic
Expand All @@ -11,7 +22,7 @@ ranker_learn_zcat_CPPFLAGS= -DNDEBUG
ranker_learn_zcat_CXXFLAGS= -O6 -Wall -Wextra -std=c++0x -ffast-math -pedantic -pthread
ranker_learn_zcat_LDFLAGS= -lm -lpthread

ranker_predict_SOURCES = ranker-predict.cc
ranker_predict_SOURCES = ranker-predict.cc ranker.cc
ranker_predict_CXXFLAGS = -O2 -g -Wall -Wextra -std=c++0x -ffast-math -pedantic
ranker_predict_LDFLAGS = -lm -lz

Expand Down
3 changes: 3 additions & 0 deletions src/MiraOperator.hh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "Example.hh"

namespace ranker {

struct mira_operator
{
Expand Down Expand Up @@ -117,4 +118,6 @@ struct mira_operator
}
};

};

#endif
6 changes: 6 additions & 0 deletions src/libranker.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#include <stdio.h>
#include <stdlib.h>

int ranker(int x) {
return x * 2;
}
2 changes: 1 addition & 1 deletion src/ranker-learn-iteration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ int process(char* filename, int num_iterations, vector<double> &weights, vector<
}

double value_as_double = strtod(value + 1, NULL);
if(!isinf(value_as_double) && !isnan(value_as_double)) {
if(!std::isinf(value_as_double) && !std::isnan(value_as_double)) {
example->features[location] = value_as_double;
//fprintf(stdout, "%s %d %g\n", token, location, value_as_double);
//if(iteration == 1) fprintf(stdout, "%s %g %g %g\n", token, vector_last(values), weights[location], weights[location + 1]);
Expand Down
1 change: 1 addition & 0 deletions src/ranker-learn-zcat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#define LOOP 10
#define NUM_THREADS 1

using namespace ranker;

static int verbose_flag = 0;

Expand Down
9 changes: 4 additions & 5 deletions src/ranker-predict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
#include <vector>
#include <string>

#include "ranker.hh"

using namespace std;
using namespace ranker;

int main(int argc, char** argv) {
if(argc != 3) {
Expand Down Expand Up @@ -84,14 +87,10 @@ int main(int argc, char** argv) {

char* token;
int first = 1;
int label = 0;
double score = 0.0;
double loss = 0.0;
for(token = strtok(buffer, " \t\n\r"); token != NULL; token = strtok(NULL, " \t\n\r")) {
if(first == 1) {
if(!strcmp(token, "1")) {
label = 1;
}
first = 0;
} else {
char* value_start = strrchr(token, ':');
Expand All @@ -104,7 +103,7 @@ int main(int argc, char** argv) {
unordered_map<string, int>::iterator found = features.find(token_as_string);
if(found != features.end()) {
double value = strtod(value_start + 1, NULL);
if(!isinf(value) && !isnan(value)) {
if(!std::isinf(value) && !std::isnan(value)) {
score += value * weights[(*found).second];
}
}
Expand Down
95 changes: 95 additions & 0 deletions src/ranker.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@

#include <stdio.h>
#include "ranker.hh"

using namespace ranker;

predictor::predictor(int num_threads, std::string modelname, std::string mappingname) {
this->num_threads = num_threads;
load_model(modelname);
load_mapping(mappingname);
}

predictor::predictor(int num_threads, std::string modelname) {
this->num_threads = num_threads;
load_model(modelname);
}

void predictor::load_model(std::string filename) {
FILE* fp = fopen(filename.c_str(), "r");
if(!fp) {
fprintf(stderr, "ERROR: cannot load model from \"%s\"\n", filename.c_str());
return;
}

size_t buffer_size = 0;
char* buffer = NULL;
int length = 0;

while(0 < (length = getline(&buffer, &buffer_size, fp))) {
buffer[length - 1] = '\0'; // chop
char* weight1 = strchr(buffer, ' ');
*weight1 = '\0';
char* weight2 = strchr(weight1 + 1, ' ');
if(weight2 != NULL) {
*weight2 = '\0';
model[strtol(buffer, NULL, 10)] = strtod(weight2 + 1, NULL);
} else {
model[strtol(buffer, NULL, 10)] = strtod(weight1 + 1, NULL);
}
}
fclose(fp);
}

void predictor::load_mapping(std::string filename) {
FILE* fp = fopen(filename.c_str(), "r");
if(!fp) {
fprintf(stderr, "ERROR: cannot load mapping from \"%s\"\n", filename.c_str());
return;
}

size_t buffer_size = 0;
char* buffer = NULL;
int length = 0;

int next_id = 1;
while(0 < (length = getline(&buffer, &buffer_size, fp))) {
buffer[length - 1] = '\0'; // chop
char* end = strchr(buffer, ' ');
*end = '\0';
auto found = model.find(next_id);
if(found != model.end()) {
mapping[std::string(buffer)] = next_id;
}
next_id++;
}
fclose(fp);
}

int predictor::map(std::string feature) {
auto found = mapping.find(feature);
if(found != mapping.end()) return found->second;
return 0;
}

int predictor::predict(std::vector<example> &nbest) {
int argmax = -1;
double max = 0;
for(size_t i = 0; i < nbest.size(); i++) {
compute_score(nbest[i]);
if(argmax == -1 || nbest[i].score > max) {
max = nbest[i].score;
argmax = i;
}
}
return argmax;
}

double predictor::compute_score(example& x) {
double score = 0;
for(auto i = x.features.begin(); i != x.features.end(); i++) {
score += model[i->id] * i->value;
}
x.score = score;
return score;
}
21 changes: 21 additions & 0 deletions src/ranker.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include <string>
#include <vector>
#include <unordered_map>
#include "Example.hh"

namespace ranker {
struct predictor {
int num_threads;
std::unordered_map<std::string, int> mapping;
std::unordered_map<int, double> model;
predictor(int num_threads, std::string modelname);
predictor(int num_threads, std::string modelname, std::string mappingname);
void load_model(std::string filename);
void load_mapping(std::string filename);
int map(std::string feature);
int predict(std::vector<example>& nbest);
double compute_score(example& i);
};
}
46 changes: 46 additions & 0 deletions src/ranker_main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ranker.hh"

int main(int argc, char** argv) {
if(argc < 2) {
fprintf(stderr, "usage: %s <model>\n", argv[0]);
return 1;
}
ranker::predictor model(1, std::string(argv[1]));

char* buffer = NULL;
size_t buffer_length = 0;
ssize_t length = 0;

std::vector<ranker::example> examples;
while(0 <= (length = getline(&buffer, &buffer_length, stdin))) {
if(length == 1) {
fprintf(stdout, "%d\n", model.predict(examples));
examples.clear();
} else {
ranker::example x;
char *inputstring = buffer;
char *token = NULL;
token = strsep(&inputstring, " \t"); // skip label
for(;(token = strsep(&inputstring, " \t\n"));) {
if(!strcmp(token,"")) continue;
char* value = strrchr(token, ':');
if(value != NULL) {
*value = '\0';
double value_as_double = strtod(value + 1, NULL);
//nbe is the loss, not a feature
if(!strcmp(token, "nbe")) {
x.loss = value_as_double;
} else {
int location = strtol(token, NULL, 10);
x.features.push_back(ranker::feature(location, value_as_double));
}
}
}
examples.push_back(x);
}
}
return 0;
}
Loading

0 comments on commit 2169916

Please sign in to comment.