[revise] doxygen file, add multithread test in autotest scripts. add …

…time flag in multi_*_cmdline
Kitter · Nov 14, 2014 · d045dd6 · d045dd6
1 parent dc2a408
commit d045dd6
Show file tree

Hide file tree

Showing 4 changed files with 193 additions and 95 deletions.
diff --git a/Doxyfile b/Doxyfile
@@ -32,7 +32,7 @@ PROJECT_NAME           = "LTP - Language Technology Platform"
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = 3.1.1
+PROJECT_NUMBER         = 3.2.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer
@@ -365,7 +365,7 @@ LOOKUP_CACHE_SIZE      = 0
 # Private class members and static file members will be hidden unless
 # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
 
-EXTRACT_ALL            = NO
+EXTRACT_ALL            = YES
 
 # If the EXTRACT_PRIVATE tag is set to YES all private members of a class
 # will be included in the documentation.

diff --git a/test/multi_cws_cmdline.cpp b/test/multi_cws_cmdline.cpp
@@ -132,8 +132,8 @@ int main(int argc, char ** argv) {
   }
 
   tm = ltp::utility::get_time() - tm;
-  std::cerr << "TRACE: consume "
-            << tm 
+  std::cerr << "TRACE: multi-cws-tm-consume "
+            << tm
             << " seconds."
             << std::endl;
 

diff --git a/test/multi_pos_cmdline.cpp b/test/multi_pos_cmdline.cpp
@@ -28,110 +28,115 @@ const int MAX_LEN = 1024;
 
 class Dispatcher {
 public:
-    Dispatcher( void * model ) {
-        _model = model;
+  Dispatcher( void * model ) {
+    _model = model;
+  }
+
+  int next(std::vector<std::string> &words) {
+    std::string line;
+    std::string word;
+    lock_guard<mutex> guard(_mutex);
+    if (getline(std::cin, line, '\n')) {
+      std::stringstream S(line);
+      words.clear();
+      while (S >> word) { words.push_back(word); }
+    } else {
+      return -1;
     }
+    return 0;
+  }
 
-    int next(std::vector<std::string> &words) {
-        std::string line;
-        std::string word;
-        lock_guard<mutex> guard(_mutex);
-        if (getline(std::cin, line, '\n')) {
-            std::stringstream S(line);
-            words.clear();
-            while (S >> word) { words.push_back(word); }
-        } else {
-            return -1;
-        }
-        return 0;
+  void output(const std::vector<std::string> & words,
+      const std::vector<std::string> &postags) {
+    lock_guard<mutex> guard(_mutex);
+    if (words.size() != postags.size()) {
+      return;
     }
 
-    void output(const std::vector<std::string> & words,
-            const std::vector<std::string> &postags) {
-        lock_guard<mutex> guard(_mutex);
-        if (words.size() != postags.size()) {
-            return;
-        }
-
-        for (int i = 0; i < words.size(); ++ i) {
-            std::cout << words[i] << "_" << postags[i];
-            std::cout << (i == words.size() - 1 ? '\n' : '|');
-        }
-        return;
+    for (int i = 0; i < words.size(); ++ i) {
+      std::cout << words[i] << "_" << postags[i];
+      std::cout << (i == words.size() - 1 ? '\n' : '|');
     }
+    return;
+  }
 
-    void * model() {
-        return _model;
-    }
+  void * model() {
+    return _model;
+  }
 
 private:
-    mutex  _mutex;
-    void * _model;
-    string _sentence;
+  mutex  _mutex;
+  void * _model;
+  string _sentence;
 };
 
 void multithreaded_postag( void * args) {
-    std::vector<std::string> words;
-    std::vector<std::string> postags;
+  std::vector<std::string> words;
+  std::vector<std::string> postags;
 
-    Dispatcher * dispatcher = (Dispatcher *)args;
-    void * model = dispatcher->model();
+  Dispatcher * dispatcher = (Dispatcher *)args;
+  void * model = dispatcher->model();
 
-    while (true) {
-        int ret = dispatcher->next(words);
+  while (true) {
+    int ret = dispatcher->next(words);
 
-        if (ret < 0)
-            break;
+    if (ret < 0)
+      break;
 
-        postags.clear();
-        postagger_postag(model, words, postags);
-        dispatcher->output(words, postags);
-    }
+    postags.clear();
+    postagger_postag(model, words, postags);
+    dispatcher->output(words, postags);
+  }
 
-    return;
+  return;
 }
 
 int main(int argc, char ** argv) {
-    if (argc < 2 || (0 == strcmp(argv[1], "-h"))) {
-        std::cerr << "Usage: ./multi_pos_cmdline [model path]" << std::endl;
-        std::cerr << std::endl;
-        std::cerr << "This program recieve input word sequence from stdin." << std::endl;
-        std::cerr << "One sentence per line. Words are separated by space." << std::endl;
-        return -1;
-    }
-
-    void * engine = postagger_create_postagger(argv[1]);
-
-    if (!engine) {
-        return -1;
-    }
-
-    int num_threads = thread::hardware_concurrency();
-    std::cerr << "TRACE: Model is loaded" << std::endl;
-    std::cerr << "TRACE: Running " << num_threads << " thread(s)" << std::endl;
-
-    Dispatcher * dispatcher = new Dispatcher( engine );
-
-    double tm = ltp::utility::get_time();
-    list<thread *> thread_list;
-    for (int i = 0; i < num_threads; ++ i) {
-        thread * t = new thread( multithreaded_postag, (void *)dispatcher );
-        thread_list.push_back( t );
-    }
-
-    for (list<thread *>::iterator i = thread_list.begin();
-            i != thread_list.end(); ++ i) {
-        thread * t = *i;
-        t->join();
-        delete t;
-    }
-
-    tm = ltp::utility::get_time() - tm;
-    std::cerr << "TRACE: consume "
-        << tm 
-        << " seconds."
-        << std::endl;
-
-    return 0;
+  if (argc < 2 || (0 == strcmp(argv[1], "-h"))) {
+    std::cerr << "Usage: ./multi_pos_cmdline [model path] threadnum" << std::endl;
+    std::cerr << std::endl;
+    std::cerr << "This program recieve input word sequence from stdin." << std::endl;
+    std::cerr << "One sentence per line. Words are separated by space." << std::endl;
+    return -1;
+  }
+
+  void * engine = postagger_create_postagger(argv[1]);
+
+  if (!engine) {
+    return -1;
+  }
+
+  int num_threads = atoi(argv[2]);
+
+  if(num_threads < 0 || num_threads > thread::hardware_concurrency()) {
+    num_threads = thread::hardware_concurrency();
+  }
+
+  std::cerr << "TRACE: Model is loaded" << std::endl;
+  std::cerr << "TRACE: Running " << num_threads << " thread(s)" << std::endl;
+
+  Dispatcher * dispatcher = new Dispatcher( engine );
+
+  double tm = ltp::utility::get_time();
+  list<thread *> thread_list;
+  for (int i = 0; i < num_threads; ++ i) {
+    thread * t = new thread( multithreaded_postag, (void *)dispatcher );
+    thread_list.push_back( t );
+  }
+
+  for (list<thread *>::iterator i = thread_list.begin();
+      i != thread_list.end(); ++ i) {
+    thread * t = *i;
+    t->join();
+    delete t;
+  }
+
+  tm = ltp::utility::get_time() - tm;
+  std::cerr << "TRACE: multi-pos-tm-consume "
+    << tm
+    << " seconds."
+    << std::endl;
+
+  return 0;
 }
 
diff --git a/tools/autotest/autotest.py b/tools/autotest/autotest.py
@@ -16,6 +16,7 @@
 TMPDIR= tempfile.gettempdir()
 SRC_EXTENSIONS = (".h", ".hpp", ".c", ".cpp")
 SRC_EXLUDES = ("mongoose.h", "mongoose.c")
+FINISHED_JOBS= set([])
 
 def which(program):
     # From http://stackoverflow.com/questions/377017
@@ -88,6 +89,8 @@ def static_code_check(rootdir, outputdir, verbose=False):
     logging.info("cppcheck: found %d performance comments." % nr_performance)
     logging.info("cppcheck: found %d warning comments." % nr_warning)
     ifs.close()
+    global FINISHED_JOBS
+    FINISHED_JOBS.add("cppcheck")
 
 
 def executable_check(rootdir, outputdir, input_path, verbose=False):
@@ -127,6 +130,8 @@ def concatenate(name):
     subprocess.call(command, stdout=ofs, stderr=DUMMY)
     ofs.close()
     logging.info("ltp_test: dynamically executable check is done.")
+    global FINISHED_JOBS
+    FINISHED_JOBS.add("ltp_test")
     return True
 
 
@@ -165,6 +170,8 @@ def memory_leak_check(rootdir, outputdir, input_path, verbose=False):
             line = line.split("==")[-1].strip()
             logging.info("memcheck: %s" % line)
     ifs.close()
+    global FINISHED_JOBS
+    FINISHED_JOBS.add("memcheck")
 
 
 def callgrind_check(rootdir, outputdir, input_path, verbose=False):
@@ -217,6 +224,9 @@ def callgrind_check(rootdir, outputdir, input_path, verbose=False):
     subprocess.call(command, stdout=DUMMY, stderr=DUMMY)
     logging.info("callgrind: dot converting dot output to PNG is done.")
 
+    global FINISHED_JOBS
+    FINISHED_JOBS.add("callgrind")
+
 
 def speed_check(rootdir, outputdir, input_path, verbose=False):
     if os.name == 'nt':
@@ -288,6 +298,78 @@ def run(exe, model, ifs, ofs):
     logging.info("speed: postagger speed %f sent/s" % (float(nr_lines) / postag_tm))
     logging.info("speed: parser speed %f M/s" % (float(nr_sz) / 1024/ 1024/ parser_tm))
     logging.info("speed: parser speed %f sent/s" % (float(nr_lines) / parser_tm))
+    global FINISHED_JOBS
+    FINISHED_JOBS.add("speed")
+
+
+def multithread_check(rootdir, outputdir, input_path, verbose=False):
+    global FINISHED_JOBS
+    if "speed" not in FINISHED_JOBS:
+        speed_check(rootdir, outputdir, input_path, verbose)
+
+    if os.name == 'nt':
+        logging.info("multithread: windows speed check is not supported.")
+        return
+
+    def build(exe_prefix, model_prefix):
+        exe = os.path.join(rootdir, "bin", "examples", ("multi_%s_cmdline" % exe_prefix))
+        model = os.path.join(rootdir, "ltp_data", ("%s.model" % model_prefix))
+        out = os.path.join(TMPDIR, "ltp.autotest.multi.%s.out" % exe_prefix)
+        return (exe, model, out)
+    cws_cmdline, cws_model, cws_out = build("cws", "cws")
+    pos_cmdline, pos_model, pos_out = build("pos", "pos")
+
+    if not input_path:
+        logging.error("multithread: input not specified.")
+        logging.info("multithread: speed check is canceled.")
+        return
+
+    nr_sz = os.stat(input_path).st_size
+    dataset = open(input_path,"r").readlines()
+    nr_lines = len(dataset)
+    avg_sent_len = float(sum([len(data.decode("utf-8")) for data in dataset]))/nr_lines
+    logging.info("multithread: average sentence length %f" % avg_sent_len)
+
+    def check(exe):
+        if not which(exe):
+            logging.error("multithread: %s is not found." % exe)
+            logging.info("multithread: speed check is canceled.")
+            return False
+        return True
+
+    if not check(cws_cmdline):
+        return
+    if not check(pos_cmdline):
+        return
+
+    if not os.path.isfile(input_path):
+        logging.error("multithread: input is not specified.")
+        logging.info("multithread: speed check is canceled.")
+        return
+
+    speed_log = os.path.join(outputdir, "multi_speed.log")
+    lfs = open(speed_log, "w")
+    def run(exe, model, ifs, ofs):
+        subprocess.call([exe, model, "2"], stdin=ifs, stdout=ofs, stderr=lfs)
+        ifs.close()
+        ofs.close()
+
+    run(cws_cmdline, cws_model, open(input_path, "r"), open(cws_out, "w"))
+    run(pos_cmdline, pos_model, open(cws_out, "r"), open(pos_out, "w"))
+    lfs.close()
+    lfs = open(speed_log, "r")
+
+    for line in lfs:
+        if "multi-cws-tm-consume" in line:
+            multi_wordseg_tm = float(line.strip().split(":")[-1].strip().split()[1])
+        if "multi-pos-tm-consume" in line:
+            multi_postag_tm = float(line.strip().split(":")[-1].strip().split()[1])
+
+    logging.info("multithread: wordseg speed %f M/s" % (float(nr_sz) / 1024/ 1024/multi_wordseg_tm))
+    logging.info("multithread: wordseg speed %f sent/s" % (float(nr_lines) / multi_wordseg_tm))
+    logging.info("multithread: postagger speed %f M/s" % (float(nr_sz) / 1024/ 1024/multi_postag_tm))
+    logging.info("multithread: postagger speed %f sent/s" % (float(nr_lines) / multi_postag_tm))
+    FINISHED_JOBS.add("multithread")
 
 
 def server_check(rootdir, outputdir, input_path, verbose=False):
@@ -303,6 +385,7 @@ def server_check(rootdir, outputdir, input_path, verbose=False):
     subprocess.call(command2, stdout=ofs, stderr=DUMMY)
     p.kill()
 
+
 if __name__=="__main__":
     usage = "automatically test script for LTP project.\n"
     usage += "author: Yijia Liu <[email protected]>, 2014"
@@ -324,15 +407,25 @@ def server_check(rootdir, outputdir, input_path, verbose=False):
             help="specify the details output dir [default=%s]" % default_outputdir)
     optparser.add_option("-i", "--input", dest="inputpath", default=default_inputpath,
             help="the input path [default=%s]" % default_inputpath)
+    optparser.add_option("-t", "--tasks", dest="tasks", default="all",
+            help="the test tasks, tasks are separated by |.")
     opts, args = optparser.parse_args()
 
     if not os.path.isdir(opts.outputdir):
         os.mkdir(opts.outputdir)
 
-    static_code_check(opts.rootdir, opts.outputdir)
+    tasks = opts.tasks.split("|")
+    if "all" in tasks or "cppcheck" in tasks:
+        static_code_check(opts.rootdir, opts.outputdir)
     if not executable_check(opts.rootdir, opts.outputdir, opts.inputpath):
         sys.exit(1)
-    memory_leak_check(opts.rootdir, opts.outputdir, opts.inputpath)
-    callgrind_check(opts.rootdir, opts.outputdir, opts.inputpath)
-    speed_check(opts.rootdir, opts.outputdir, opts.inputpath)
-    server_check(opts.rootdir, opts.outputdir, opts.inputpath)
+    if "all" in tasks or "memcheck" in tasks:
+        memory_leak_check(opts.rootdir, opts.outputdir, opts.inputpath)
+    if "all" in tasks or "callgrind" in tasks:
+        callgrind_check(opts.rootdir, opts.outputdir, opts.inputpath)
+    if "all" in tasks or "speed" in tasks:
+        speed_check(opts.rootdir, opts.outputdir, opts.inputpath)
+    if "all" in tasks or "server" in tasks:
+        server_check(opts.rootdir, opts.outputdir, opts.inputpath)
+    if "all" in tasks or "multithread" in tasks:
+        multithread_check(opts.rootdir, opts.outputdir, opts.inputpath)