Skip to content

Commit

Permalink
add pos cmdline test, and multithreaded test
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed Sep 24, 2013
1 parent ad278d2 commit 484855d
Show file tree
Hide file tree
Showing 12 changed files with 347 additions and 32 deletions.
38 changes: 28 additions & 10 deletions examples/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
all: cws cws_cmdline pos par ner multi_cws_cmdline
all: cws cws_cmdline multi_cws_cmdline \
pos pos_cmdline multi_pos_cmdline \
par \
ner

cws: cws.cpp
g++ -o cws cws.cpp -I./ \
Expand All @@ -12,11 +15,33 @@ cws_cmdline: cws_cmdline.cpp
-I../thirdparty/boost/include \
-Wl,-dn -L../lib/ -lsegmentor -lboost_regex -Wl,-dy

multi_cws_cmdline: multi_cws_cmdline.cpp
g++ -o multi_cws_cmdline multi_cws_cmdline.cpp \
thirdparty/tinythreadpp/tinythread.cpp \
-I./ \
-I../include/ \
-I../thirdparty/boost/include/ \
-I./thirdparty/tinythreadpp/ \
-Wl,-dn -L../lib/ -lsegmentor -lboost_regex -Wl,-dy -lpthread

pos: pos.cpp
g++ -o pos pos.cpp -I./ \
-I../include/ \
-L../lib/ -lpostagger

pos_cmdline: pos_cmdline.cpp
g++ -o pos_cmdline pos_cmdline.cpp -I./ \
-I../include/ \
-Wl,-dn -L../lib/ -lpostagger -Wl,-dy

multi_pos_cmdline: multi_pos_cmdline.cpp
g++ -o multi_pos_cmdline multi_pos_cmdline.cpp \
thirdparty/tinythreadpp/tinythread.cpp \
-I./ \
-I../include/ \
-I./thirdparty/tinythreadpp/ \
-Wl,-dn -L../lib/ -lpostagger -Wl,-dy -lpthread

ner: ner.cpp
g++ -o ner ner.cpp -I./ \
-I../src/ner/ \
Expand All @@ -27,21 +52,14 @@ par: par.cpp
-I../src/parser/ \
-L../lib -lparser

multi_cws_cmdline: multi_cws_cmdline.cpp
g++ -o multi_cws_cmdline multi_cws_cmdline.cpp \
thirdparty/tinythreadpp/tinythread.cpp \
-I./ \
-I../include/ \
-I../thirdparty/boost/include/ \
-I./thirdparty/tinythreadpp/ \
-Wl,-dn -L../lib/ -lsegmentor -lboost_regex -Wl,-dy -lpthread

.PHONY: clean

clean:
rm cws
rm cws_cmdline
rm pos
rm pos_cmdline
rm ner
rm par
rm multi_cws_cmdline
rm multi_pos_cmdline
14 changes: 7 additions & 7 deletions examples/multi_cws_cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* is not compilable under MSVC
*/
#include <iostream>
#include <cstring>
#include <ctime>
#include <vector>
#include <list>
Expand Down Expand Up @@ -89,13 +90,14 @@ void multithreaded_segment( void * args) {
}

int main(int argc, char ** argv) {
if (argc < 2) {
std::cerr << "multi-cws [model path] [lexicon file]" << std::endl;
if (argc < 1 || (0 == strcmp(argv[1], "-h"))) {
std::cerr << "Example: ./multi_cws_cmdline [model path] [lexicon file]" << std::endl;
std::cerr << std::endl;
std::cerr << "This program recieve input word sequence from stdin." << std::endl;
std::cerr << "One sentence per line." << std::endl;
return -1;
}

string sentence;
vector<string> result;

void * engine = 0;
if (argc == 2) {
engine = segmentor_create_segmentor(argv[1]);
Expand All @@ -107,8 +109,6 @@ int main(int argc, char ** argv) {
return -1;
}

std::vector<std::string> words;

int num_threads = thread::hardware_concurrency();
std::cerr << "TRACE: Model is loaded" << std::endl;
std::cerr << "TRACE: Running " << num_threads << " thread(s)" << std::endl;
Expand Down
146 changes: 146 additions & 0 deletions examples/multi_pos_cmdline.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Multi-threaded postagger test program. The user input a line
* of Chinese sentence an the program will output its segment
* result.
*
* @dependency package: tinythread - a portable c++ wrapper for
* multi-thread library.
* @author: LIU, Yijia
* @data: 2013-09-24
*
* This program is special designed for UNIX user, for get time
* is not compilable under MSVC
*/
#include <iostream>
#include <sstream>
#include <cstring>
#include <ctime>
#include <vector>
#include <list>
#include <sys/time.h>
#include <sys/types.h>

#include "postag_dll.h"
#include "tinythread.h"
#include "fast_mutex.h"

using namespace std;
using namespace tthread;

const int MAX_LEN = 1024;

double get_time(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + (tv.tv_usec / 1000000.0);
}

class Dispatcher {
public:
Dispatcher( void * model ) {
_model = model;
}

int next(std::vector<std::string> &words) {
std::string line;
std::string word;
lock_guard<fast_mutex> guard(_mutex);
if (getline(std::cin, line, '\n')) {
std::stringstream S(line);
words.clear();
while (S >> word) { words.push_back(word); }
} else {
return -1;
}
return 0;
}

void output(const std::vector<std::string> & words,
const std::vector<std::string> &postags) {
lock_guard<fast_mutex> guard(_mutex);
if (words.size() != postags.size()) {
return;
}

for (int i = 0; i < words.size(); ++ i) {
std::cout << words[i] << "_" << postags[i];
std::cout << (i == words.size() - 1 ? '\n' : '|');
}
return;
}

void * model() {
return _model;
}

private:
fast_mutex _mutex;
void * _model;
string _sentence;
};

void multithreaded_postag( void * args) {
std::vector<std::string> words;
std::vector<std::string> postags;

Dispatcher * dispatcher = (Dispatcher *)args;
void * model = dispatcher->model();

while (true) {
int ret = dispatcher->next(words);

if (ret < 0)
break;

postags.clear();
postagger_postag(model, words, postags);
dispatcher->output(words, postags);
}

return;
}

int main(int argc, char ** argv) {
if (argc < 1 || (0 == strcmp(argv[1], "-h"))) {
std::cerr << "Usage: ./multi_pos_cmdline [model path]" << std::endl;
std::cerr << std::endl;
std::cerr << "This program recieve input word sequence from stdin." << std::endl;
std::cerr << "One sentence per line. Words are separated by space." << std::endl;
return -1;
}

void * engine = postagger_create_postagger(argv[1]);

if (!engine) {
return -1;
}

int num_threads = thread::hardware_concurrency();
std::cerr << "TRACE: Model is loaded" << std::endl;
std::cerr << "TRACE: Running " << num_threads << " thread(s)" << std::endl;

Dispatcher * dispatcher = new Dispatcher( engine );

double tm = get_time();
list<thread *> thread_list;
for (int i = 0; i < num_threads; ++ i) {
thread * t = new thread( multithreaded_postag, (void *)dispatcher );
thread_list.push_back( t );
}

for (list<thread *>::iterator i = thread_list.begin();
i != thread_list.end(); ++ i) {
thread * t = *i;
t->join();
delete t;
}

tm = get_time() - tm;
std::cerr << "TRACE: consume "
<< tm
<< " seconds."
<< std::endl;

return 0;
}

1 change: 1 addition & 0 deletions examples/pos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

int main(int argc, char * argv[]) {
if (argc < 1) {
std::cerr << "pos [model path]" << std::endl;
return -1;
}

Expand Down
82 changes: 82 additions & 0 deletions examples/pos_cmdline.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Single-threaded segmentor test program. The user input a line
* of Chinese sentence an the program will output its segment
* result.
*
* @dependency package: tinythread - a portable c++ wrapper for
* multi-thread library.
* @author: LIU, Yijia
* @data: 2013-09-24
*
* This program is special designed for UNIX user, for get time
* is not compilable under MSVC
*/
#include <iostream>
#include <sstream>
#include <ctime>
#include <cstring>
#include <string>
#include <sys/time.h>
#include <sys/types.h>
#include "postag_dll.h"

double get_time(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + (tv.tv_usec / 1000000.0);
}

int main(int argc, char * argv[]) {
if (argc < 1 || (0 == strcmp(argv[1], "-h"))) {
std::cerr << "Example: ./pos_cmdline [model path]" << std::endl;
std::cerr << std::endl;
std::cerr << "This program recieve input word sequence from stdin." << std::endl;
std::cerr << "One sentence per line. Words are separated by space." << std::endl;
std::cerr << std::endl;
return 1;
}

void * engine = postagger_create_postagger(argv[1]);
if (!engine) {
std::cerr << "WARNINIG : Failed to load model." << std::endl;
return -1;
}

std::string line;
std::string word;
std::vector<std::string> words;
std::vector<std::string> postags;

std::cerr << "TRACE: Model is loaded" << std::endl;
double tm = get_time();

while (std::getline(std::cin, line, '\n')) {
std::stringstream S(line);
words.clear();
while (S >> word) { words.push_back(word); }

if (words.size() == 0) { continue; }
int len = postagger_postag(engine, words, postags);
if (postags.size() != words.size()) {
std::cerr << "WARNINIG: Number of postags is different from number of words"
<< std::endl;
}

for (int i = 0; i < len; ++ i) {
std::cout << words[i] << "_" << postags[i];
if (i+1 == len) std::cout <<std::endl;
else std::cout<< "|";
}
}

postagger_release_postagger(engine);

tm = get_time() - tm;
std::cerr << "TRACE: consume "
<< tm
<< " seconds."
<< std::endl;

return 0;
}

8 changes: 4 additions & 4 deletions src/ner/ner_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ class NERWrapper : public ltp::ner::NER {
return false;
}

ltp::ner::rulebase::RuleBase base(model->labels);
decoder = new ltp::ner::Decoder(model->num_labels(), base);

// beg_tag0 = model->labels.index( );
// beg_tag1 = model->labels.index( );

Expand All @@ -42,6 +39,9 @@ class NERWrapper : public ltp::ner::NER {
int recognize(const std::vector<std::string> & words,
const std::vector<std::string> & postags,
std::vector<std::string> & tags) {
ltp::ner::rulebase::RuleBase base(model->labels);
ltp::ner::Decoder deco(model->num_labels(), base);

ltp::ner::Instance * inst = new ltp::ner::Instance;
if (words.size() != postags.size()) {
return 0;
Expand All @@ -54,7 +54,7 @@ class NERWrapper : public ltp::ner::NER {

ltp::ner::NER::extract_features(inst);
ltp::ner::NER::calculate_scores(inst, true);
decoder->decode(inst);
deco.decode(inst);

for (int i = 0; i < words.size(); ++ i) {
tags.push_back(model->labels.at(inst->predicted_tagsidx[i]));
Expand Down
6 changes: 3 additions & 3 deletions src/postagger/postag_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,21 @@ class PostaggerWrapper : public ltp::postagger::Postagger {
return false;
}

decoder = new ltp::postagger::Decoder(model->num_labels());

return true;
}

int postag(const std::vector<std::string> & words,
std::vector<std::string> & tags) {
ltp::postagger::Instance * inst = new ltp::postagger::Instance;
ltp::postagger::Decoder deco(model->num_labels());

for (int i = 0; i < words.size(); ++ i) {
inst->forms.push_back(ltp::strutils::chartypes::sbc2dbc_x(words[i]));
}

ltp::postagger::Postagger::extract_features(inst);
ltp::postagger::Postagger::calculate_scores(inst, true);
decoder->decode(inst);
deco.decode(inst);

ltp::postagger::Postagger::build_labels(inst, tags);

Expand Down
Loading

0 comments on commit 484855d

Please sign in to comment.