Skip to content

Commit

Permalink
[revise] doxygen file, add multithread test in autotest scripts. add …
Browse files Browse the repository at this point in the history
…time flag in multi_*_cmdline
  • Loading branch information
Oneplus committed Nov 14, 2014
1 parent dc2a408 commit d045dd6
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 95 deletions.
4 changes: 2 additions & 2 deletions Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ PROJECT_NAME = "LTP - Language Technology Platform"
# This could be handy for archiving the generated documentation or
# if some version control system is used.

PROJECT_NUMBER = 3.1.1
PROJECT_NUMBER = 3.2.0

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer
Expand Down Expand Up @@ -365,7 +365,7 @@ LOOKUP_CACHE_SIZE = 0
# Private class members and static file members will be hidden unless
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES

EXTRACT_ALL = NO
EXTRACT_ALL = YES

# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
# will be included in the documentation.
Expand Down
4 changes: 2 additions & 2 deletions test/multi_cws_cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ int main(int argc, char ** argv) {
}

tm = ltp::utility::get_time() - tm;
std::cerr << "TRACE: consume "
<< tm
std::cerr << "TRACE: multi-cws-tm-consume "
<< tm
<< " seconds."
<< std::endl;

Expand Down
177 changes: 91 additions & 86 deletions test/multi_pos_cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,110 +28,115 @@ const int MAX_LEN = 1024;

class Dispatcher {
public:
Dispatcher( void * model ) {
_model = model;
Dispatcher( void * model ) {
_model = model;
}

int next(std::vector<std::string> &words) {
std::string line;
std::string word;
lock_guard<mutex> guard(_mutex);
if (getline(std::cin, line, '\n')) {
std::stringstream S(line);
words.clear();
while (S >> word) { words.push_back(word); }
} else {
return -1;
}
return 0;
}

int next(std::vector<std::string> &words) {
std::string line;
std::string word;
lock_guard<mutex> guard(_mutex);
if (getline(std::cin, line, '\n')) {
std::stringstream S(line);
words.clear();
while (S >> word) { words.push_back(word); }
} else {
return -1;
}
return 0;
void output(const std::vector<std::string> & words,
const std::vector<std::string> &postags) {
lock_guard<mutex> guard(_mutex);
if (words.size() != postags.size()) {
return;
}

void output(const std::vector<std::string> & words,
const std::vector<std::string> &postags) {
lock_guard<mutex> guard(_mutex);
if (words.size() != postags.size()) {
return;
}

for (int i = 0; i < words.size(); ++ i) {
std::cout << words[i] << "_" << postags[i];
std::cout << (i == words.size() - 1 ? '\n' : '|');
}
return;
for (int i = 0; i < words.size(); ++ i) {
std::cout << words[i] << "_" << postags[i];
std::cout << (i == words.size() - 1 ? '\n' : '|');
}
return;
}

void * model() {
return _model;
}
void * model() {
return _model;
}

private:
mutex _mutex;
void * _model;
string _sentence;
mutex _mutex;
void * _model;
string _sentence;
};

void multithreaded_postag( void * args) {
std::vector<std::string> words;
std::vector<std::string> postags;
std::vector<std::string> words;
std::vector<std::string> postags;

Dispatcher * dispatcher = (Dispatcher *)args;
void * model = dispatcher->model();
Dispatcher * dispatcher = (Dispatcher *)args;
void * model = dispatcher->model();

while (true) {
int ret = dispatcher->next(words);
while (true) {
int ret = dispatcher->next(words);

if (ret < 0)
break;
if (ret < 0)
break;

postags.clear();
postagger_postag(model, words, postags);
dispatcher->output(words, postags);
}
postags.clear();
postagger_postag(model, words, postags);
dispatcher->output(words, postags);
}

return;
return;
}

int main(int argc, char ** argv) {
if (argc < 2 || (0 == strcmp(argv[1], "-h"))) {
std::cerr << "Usage: ./multi_pos_cmdline [model path]" << std::endl;
std::cerr << std::endl;
std::cerr << "This program recieve input word sequence from stdin." << std::endl;
std::cerr << "One sentence per line. Words are separated by space." << std::endl;
return -1;
}

void * engine = postagger_create_postagger(argv[1]);

if (!engine) {
return -1;
}

int num_threads = thread::hardware_concurrency();
std::cerr << "TRACE: Model is loaded" << std::endl;
std::cerr << "TRACE: Running " << num_threads << " thread(s)" << std::endl;

Dispatcher * dispatcher = new Dispatcher( engine );

double tm = ltp::utility::get_time();
list<thread *> thread_list;
for (int i = 0; i < num_threads; ++ i) {
thread * t = new thread( multithreaded_postag, (void *)dispatcher );
thread_list.push_back( t );
}

for (list<thread *>::iterator i = thread_list.begin();
i != thread_list.end(); ++ i) {
thread * t = *i;
t->join();
delete t;
}

tm = ltp::utility::get_time() - tm;
std::cerr << "TRACE: consume "
<< tm
<< " seconds."
<< std::endl;

return 0;
if (argc < 2 || (0 == strcmp(argv[1], "-h"))) {
std::cerr << "Usage: ./multi_pos_cmdline [model path] threadnum" << std::endl;
std::cerr << std::endl;
std::cerr << "This program recieve input word sequence from stdin." << std::endl;
std::cerr << "One sentence per line. Words are separated by space." << std::endl;
return -1;
}

void * engine = postagger_create_postagger(argv[1]);

if (!engine) {
return -1;
}

int num_threads = atoi(argv[2]);

if(num_threads < 0 || num_threads > thread::hardware_concurrency()) {
num_threads = thread::hardware_concurrency();
}

std::cerr << "TRACE: Model is loaded" << std::endl;
std::cerr << "TRACE: Running " << num_threads << " thread(s)" << std::endl;

Dispatcher * dispatcher = new Dispatcher( engine );

double tm = ltp::utility::get_time();
list<thread *> thread_list;
for (int i = 0; i < num_threads; ++ i) {
thread * t = new thread( multithreaded_postag, (void *)dispatcher );
thread_list.push_back( t );
}

for (list<thread *>::iterator i = thread_list.begin();
i != thread_list.end(); ++ i) {
thread * t = *i;
t->join();
delete t;
}

tm = ltp::utility::get_time() - tm;
std::cerr << "TRACE: multi-pos-tm-consume "
<< tm
<< " seconds."
<< std::endl;

return 0;
}

103 changes: 98 additions & 5 deletions tools/autotest/autotest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
TMPDIR= tempfile.gettempdir()
SRC_EXTENSIONS = (".h", ".hpp", ".c", ".cpp")
SRC_EXLUDES = ("mongoose.h", "mongoose.c")
FINISHED_JOBS= set([])

def which(program):
# From http://stackoverflow.com/questions/377017
Expand Down Expand Up @@ -88,6 +89,8 @@ def static_code_check(rootdir, outputdir, verbose=False):
logging.info("cppcheck: found %d performance comments." % nr_performance)
logging.info("cppcheck: found %d warning comments." % nr_warning)
ifs.close()
global FINISHED_JOBS
FINISHED_JOBS.add("cppcheck")


def executable_check(rootdir, outputdir, input_path, verbose=False):
Expand Down Expand Up @@ -127,6 +130,8 @@ def concatenate(name):
subprocess.call(command, stdout=ofs, stderr=DUMMY)
ofs.close()
logging.info("ltp_test: dynamically executable check is done.")
global FINISHED_JOBS
FINISHED_JOBS.add("ltp_test")
return True


Expand Down Expand Up @@ -165,6 +170,8 @@ def memory_leak_check(rootdir, outputdir, input_path, verbose=False):
line = line.split("==")[-1].strip()
logging.info("memcheck: %s" % line)
ifs.close()
global FINISHED_JOBS
FINISHED_JOBS.add("memcheck")


def callgrind_check(rootdir, outputdir, input_path, verbose=False):
Expand Down Expand Up @@ -217,6 +224,9 @@ def callgrind_check(rootdir, outputdir, input_path, verbose=False):
subprocess.call(command, stdout=DUMMY, stderr=DUMMY)
logging.info("callgrind: dot converting dot output to PNG is done.")

global FINISHED_JOBS
FINISHED_JOBS.add("callgrind")


def speed_check(rootdir, outputdir, input_path, verbose=False):
if os.name == 'nt':
Expand Down Expand Up @@ -288,6 +298,78 @@ def run(exe, model, ifs, ofs):
logging.info("speed: postagger speed %f sent/s" % (float(nr_lines) / postag_tm))
logging.info("speed: parser speed %f M/s" % (float(nr_sz) / 1024/ 1024/ parser_tm))
logging.info("speed: parser speed %f sent/s" % (float(nr_lines) / parser_tm))
global FINISHED_JOBS
FINISHED_JOBS.add("speed")


def multithread_check(rootdir, outputdir, input_path, verbose=False):
global FINISHED_JOBS
if "speed" not in FINISHED_JOBS:
speed_check(rootdir, outputdir, input_path, verbose)

if os.name == 'nt':
logging.info("multithread: windows speed check is not supported.")
return

def build(exe_prefix, model_prefix):
exe = os.path.join(rootdir, "bin", "examples", ("multi_%s_cmdline" % exe_prefix))
model = os.path.join(rootdir, "ltp_data", ("%s.model" % model_prefix))
out = os.path.join(TMPDIR, "ltp.autotest.multi.%s.out" % exe_prefix)
return (exe, model, out)
cws_cmdline, cws_model, cws_out = build("cws", "cws")
pos_cmdline, pos_model, pos_out = build("pos", "pos")

if not input_path:
logging.error("multithread: input not specified.")
logging.info("multithread: speed check is canceled.")
return

nr_sz = os.stat(input_path).st_size
dataset = open(input_path,"r").readlines()
nr_lines = len(dataset)
avg_sent_len = float(sum([len(data.decode("utf-8")) for data in dataset]))/nr_lines
logging.info("multithread: average sentence length %f" % avg_sent_len)

def check(exe):
if not which(exe):
logging.error("multithread: %s is not found." % exe)
logging.info("multithread: speed check is canceled.")
return False
return True

if not check(cws_cmdline):
return
if not check(pos_cmdline):
return

if not os.path.isfile(input_path):
logging.error("multithread: input is not specified.")
logging.info("multithread: speed check is canceled.")
return

speed_log = os.path.join(outputdir, "multi_speed.log")
lfs = open(speed_log, "w")
def run(exe, model, ifs, ofs):
subprocess.call([exe, model, "2"], stdin=ifs, stdout=ofs, stderr=lfs)
ifs.close()
ofs.close()

run(cws_cmdline, cws_model, open(input_path, "r"), open(cws_out, "w"))
run(pos_cmdline, pos_model, open(cws_out, "r"), open(pos_out, "w"))
lfs.close()
lfs = open(speed_log, "r")

for line in lfs:
if "multi-cws-tm-consume" in line:
multi_wordseg_tm = float(line.strip().split(":")[-1].strip().split()[1])
if "multi-pos-tm-consume" in line:
multi_postag_tm = float(line.strip().split(":")[-1].strip().split()[1])

logging.info("multithread: wordseg speed %f M/s" % (float(nr_sz) / 1024/ 1024/multi_wordseg_tm))
logging.info("multithread: wordseg speed %f sent/s" % (float(nr_lines) / multi_wordseg_tm))
logging.info("multithread: postagger speed %f M/s" % (float(nr_sz) / 1024/ 1024/multi_postag_tm))
logging.info("multithread: postagger speed %f sent/s" % (float(nr_lines) / multi_postag_tm))
FINISHED_JOBS.add("multithread")


def server_check(rootdir, outputdir, input_path, verbose=False):
Expand All @@ -303,6 +385,7 @@ def server_check(rootdir, outputdir, input_path, verbose=False):
subprocess.call(command2, stdout=ofs, stderr=DUMMY)
p.kill()


if __name__=="__main__":
usage = "automatically test script for LTP project.\n"
usage += "author: Yijia Liu <[email protected]>, 2014"
Expand All @@ -324,15 +407,25 @@ def server_check(rootdir, outputdir, input_path, verbose=False):
help="specify the details output dir [default=%s]" % default_outputdir)
optparser.add_option("-i", "--input", dest="inputpath", default=default_inputpath,
help="the input path [default=%s]" % default_inputpath)
optparser.add_option("-t", "--tasks", dest="tasks", default="all",
help="the test tasks, tasks are separated by |.")
opts, args = optparser.parse_args()

if not os.path.isdir(opts.outputdir):
os.mkdir(opts.outputdir)

static_code_check(opts.rootdir, opts.outputdir)
tasks = opts.tasks.split("|")
if "all" in tasks or "cppcheck" in tasks:
static_code_check(opts.rootdir, opts.outputdir)
if not executable_check(opts.rootdir, opts.outputdir, opts.inputpath):
sys.exit(1)
memory_leak_check(opts.rootdir, opts.outputdir, opts.inputpath)
callgrind_check(opts.rootdir, opts.outputdir, opts.inputpath)
speed_check(opts.rootdir, opts.outputdir, opts.inputpath)
server_check(opts.rootdir, opts.outputdir, opts.inputpath)
if "all" in tasks or "memcheck" in tasks:
memory_leak_check(opts.rootdir, opts.outputdir, opts.inputpath)
if "all" in tasks or "callgrind" in tasks:
callgrind_check(opts.rootdir, opts.outputdir, opts.inputpath)
if "all" in tasks or "speed" in tasks:
speed_check(opts.rootdir, opts.outputdir, opts.inputpath)
if "all" in tasks or "server" in tasks:
server_check(opts.rootdir, opts.outputdir, opts.inputpath)
if "all" in tasks or "multithread" in tasks:
multithread_check(opts.rootdir, opts.outputdir, opts.inputpath)

0 comments on commit d045dd6

Please sign in to comment.