Skip to content

Commit

Permalink
Merge pull request HIT-SCIR#220 from HIT-SCIR/develop
Browse files Browse the repository at this point in the history
merge from branch `Develop` to `master`. SRL LSTM-model and fix memory leak.
  • Loading branch information
liu946 authored Jun 15, 2017
2 parents 7020891 + c284926 commit 62b1208
Show file tree
Hide file tree
Showing 3,566 changed files with 376,766 additions and 252,349 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
7 changes: 4 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# build #
###############
build
config.h
#config.h

###############
# config #
Expand All @@ -21,11 +21,11 @@ config.h
###############
# output #
###############
#include/
include/
lib/
bin/
tools/train/lgdpj
tools/train/lgsrl
tools/train/srl*
tools/train/otcws
tools/train/otpos
tools/train/otner
Expand All @@ -39,6 +39,7 @@ tools/train/Debug/
###############
new_ltp_data/
ltp_data/
ltp_data

##################
# running folder #
Expand Down
62 changes: 55 additions & 7 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,61 @@
language:
- cpp
sudo: required
dist: precise
language: cpp

compiler:
- g++
matrix:
include:
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- george-edison55-precise-backports
packages:
- g++-4.9
- cmake
- cmake-data
env: COMPILER=g++-4.9
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- george-edison55-precise-backports
packages:
- g++-5
- cmake
- cmake-data
env: COMPILER=g++-5
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-precise-3.6
- george-edison55-precise-backports
packages:
- clang-3.6
- cmake
- cmake-data
env: COMPILER=clang++-3.6
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-precise-3.7
- george-edison55-precise-backports
packages:
- clang-3.7
- cmake
- cmake-data
env: COMPILER=clang++-3.7

before_install:
- sudo apt-get install cmake
- sudo apt-get update -qq

script:
- ./configure
- mkdir -p build
- cd build
- cmake -DCMAKE_CXX_COMPILER=$COMPILER ..
- make

6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required (VERSION 2.8.0)
cmake_minimum_required (VERSION 2.8.8)
project ("LTP - Language Technology Platform")

# project attributes section
Expand All @@ -9,10 +9,12 @@ if (APPLE)
add_definitions(-DGTEST_HAS_TR1_TUPLE=0)
set(CMAKE_CXX_FLAGS "-std=c++0x -Wno-c++11-narrowing")
elseif(UNIX)
set(CMAKE_CXX_FLAGS "-std=c++0x")
set(CMAKE_CXX_FLAGS "-std=c++0x -fPIC")
elseif(MINGW)
set(CMAKE_CXX_FLAGS "-std=c++0x")
elseif(MSVC)
add_definitions(-D_WINDOWS) # make dynet happy at `dynet/mem.cc(7)`
add_definitions(-DBOOST_ALL_NO_LIB) # disable boost auto-linking on windows
set(CMAKE_CXX_FLAGS "/EHsc")
endif(APPLE)

Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
2017-06-15
----------
语言技术平台3.4.0版 发布
* [增加] 新的基于Bi-LSTM的SRL模型
* [增加] 增加了SRL的多线程命令行程序`srl_cmdline`
* [修改] SRL相关的编程接口已经改变,修复了之前内存泄露的相关问题。

2016-03-26
----------
语言技术平台3.3.1版 发布
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
新闻
----

语言技术平台3.4.0版 发布
* [增加] 新的基于Bi-LSTM的SRL模型
* [增加] 增加了SRL的多线程命令行程序`srl_cmdline`
* [修改] SRL相关的编程接口已经改变,修复了之前内存泄露的相关问题。

语言技术平台 3.3.2 版发布
* [修复] 修复了 3.3.1 版本的一些 bug

Expand Down Expand Up @@ -48,7 +53,7 @@
---

* [百度云](http://pan.baidu.com/share/link?shareid=1988562907&uk=2738088569)
* 当前模型版本 3.3.1
* 当前模型版本 3.4.0

其它语言接口
------------
Expand Down
2 changes: 1 addition & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@

假定您下载并将LTP放置于 :file:`/path/to/your/ltp-project` 目录下,那么编译命令例如下::

g++ -o cws cws.cpp -I /path/to/your/ltp-project/include/ -I /path/to/your/ltp-project/thirdparty/boost/include -WL,-dn -L /path/to/your/ltp-project/lib/ -lsegmentor -lboost_regex -WL,-dy
g++ -o cws cws.cpp -I /path/to/your/ltp-project/include/ -I /path/to/your/ltp-project/thirdparty/boost/include -Wl,-dn -L /path/to/your/ltp-project/lib/ -lsegmentor -lboost_regex -Wl,-dy

分词接口
--------
Expand Down
2 changes: 2 additions & 0 deletions doc/appendix.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ LTP 使用的是863词性标注集,其各个词性含义如下表。
+-----+---------------------+------------+-----+-------------------+------------+
| nh | person name | 杜甫, 汤姆 | x | non-lexeme | 萄, 翱 |
+-----+---------------------+------------+-----+-------------------+------------+
| | | | z | descriptive words | 瑟瑟,匆匆 |
+-----+---------------------+------------+-----+-------------------+------------+

命名实体识别标注集
-------------------
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set (srl_DIR ${SOURCE_DIR}/srl/)
set (ltp_DIR ${SOURCE_DIR}/ltp/)
set (server_DIR ${SOURCE_DIR}/server/)


add_subdirectory ("xml4nlp")
add_subdirectory ("splitsnt")
add_subdirectory ("segmentor")
Expand Down
5 changes: 2 additions & 3 deletions src/config.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#ifndef __LTP_CONFIG_H__
#define __LTP_CONFIG_H__

#define LTP_VERSION "3.3.2"
#define LTP_COPYRIGHT "(C) 2012-2016 HIT-SCIR"
#define LTP_VERSION "3.4.0"
#define LTP_COPYRIGHT "(C) 2012-2017 HIT-SCIR"

#define BOOST_ALL_NO_LIB

namespace ltp {

Expand Down
23 changes: 21 additions & 2 deletions src/console/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,20 @@ include_directories (./
${THIRDPARTY_DIR}/boost/include/
${THIRDPARTY_DIR}/tinythreadpp
${THIRDPARTY_DIR}/maxent
${THIRDPARTY_DIR}/tinyxml)
${THIRDPARTY_DIR}/tinyxml
${THIRDPARTY_DIR}/jsoncpp/include)

set (ltp_test_SRC ltp_test.cpp ${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp)

# look for Boost
#if(DEFINED ENV{BOOST_ROOT})
# set(Boost_NO_SYSTEM_PATHS ON)
#endif()
#set(Boost_REALPATH ON)
#find_package(Boost COMPONENTS program_options serialization REQUIRED)
#include_directories(${Boost_INCLUDE_DIR})
#set(LIBS ${LIBS} ${Boost_LIBRARIES})

link_directories ( ${LIBRARY_OUTPUT_PATH} )
add_executable (ltp_test ${ltp_test_SRC})
target_link_libraries (ltp_test
Expand All @@ -20,7 +30,10 @@ target_link_libraries (ltp_test
srl_static_lib
xml4nlp
boost_regex_static_lib
boost_program_options_static_lib)
boost_program_options_static_lib
boost_serialization_static_lib
dynet
jsoncpp_lib_static)

add_executable (cws_cmdline cws_cmdline.cpp
${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp)
Expand Down Expand Up @@ -51,6 +64,12 @@ target_link_libraries (ner_cmdline ner_static_lib
set_target_properties (ner_cmdline PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}/examples/)

add_executable (srl_cmdline srl_cmdline.cpp
${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp)
target_link_libraries (srl_cmdline srl_static_lib
boost_program_options_static_lib)
set_target_properties (srl_cmdline PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}/examples/)

if (NOT MSVC AND NOT MINGW)
target_link_libraries (ltp_test pthread)
Expand Down
18 changes: 18 additions & 0 deletions src/console/dispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

#include <iostream>
#include <string>
#include <vector>
#include <map>
#include "tinythread.h"

using namespace std;

class Dispatcher {
public:
Dispatcher(void* engine, std::istream& is, std::ostream& os):
Expand All @@ -24,6 +27,21 @@ class Dispatcher {
return _max_idx ++;
}

int next_block(vector<std::string>& block) {
block.clear();
tthread::lock_guard<tthread::mutex> guard(_mutex);
std::string line;
while (std::getline(_is, line, '\n')) {
if (line != "") {
block.push_back(line);
} else {
return _max_idx ++;
}
}
if (block.size()) return _max_idx++;
return -1;
}

void output(const size_t& idx, const std::string& result) {
tthread::lock_guard<tthread::mutex> guard(_mutex);
if (idx > _idx) {
Expand Down
37 changes: 28 additions & 9 deletions src/console/ltp_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "ltp/Ltp.h"
#include "utils/strutils.hpp"
#include "utils/time.hpp"
#include "utils/xml4nlp_helper.h"
#include "console/dispatcher.h"
#include "boost/program_options.hpp"

Expand All @@ -21,6 +22,7 @@ using boost::program_options::parse_command_line;
using ltp::strutils::trim;

std::string type;
std::string format;

void multithreaded_ltp( void * args) {
std::string sentence;
Expand All @@ -35,9 +37,9 @@ void multithreaded_ltp( void * args) {
XML4NLP xml4nlp;
xml4nlp.CreateDOMFromString(sentence);

if (type == "sp") {
/*if (type == LTP_SERVICE_NAME_SPLITSENT) {
engine->splitSentence_dummy(xml4nlp);
} else if(type == LTP_SERVICE_NAME_SEGMENT) {
} else*/ if(type == LTP_SERVICE_NAME_SEGMENT) {
engine->wordseg(xml4nlp);
} else if(type == LTP_SERVICE_NAME_POSTAG) {
engine->postag(xml4nlp);
Expand All @@ -52,9 +54,13 @@ void multithreaded_ltp( void * args) {
}

std::string result;
xml4nlp.SaveDOM(result);
xml4nlp.ClearDOM();
if (format == LTP_SERVICE_OUTPUT_FORMAT_JSON) {
result = ltp::utility::xml2jsonstr(xml4nlp, type);
} else { //xml
xml4nlp.SaveDOM(result);
}
dispatcher->output(ret, result);
xml4nlp.ClearDOM();
}
return;
}
Expand All @@ -77,7 +83,10 @@ int main(int argc, char *argv[]) {
"- " LTP_SERVICE_NAME_NER ": Named entity recognization\n"
"- " LTP_SERVICE_NAME_DEPPARSE ": Dependency parsing\n"
"- " LTP_SERVICE_NAME_SRL ": Semantic role labeling (equals to all)\n"
"- all: The whole pipeline [default]")
"- " LTP_SERVICE_NAME_ALL ": The whole pipeline [default]")
("format", value<std::string>(), "Ouput format\n"
"- " LTP_SERVICE_OUTPUT_FORMAT_XML " [default]\n"
"- " LTP_SERVICE_OUTPUT_FORMAT_JSON)
("input", value<std::string>(), "The path to the input file.")
("segmentor-model", value<std::string>(),
"The path to the segment model [default=ltp_data/cws.model].")
Expand Down Expand Up @@ -118,17 +127,27 @@ int main(int argc, char *argv[]) {
}
}

std::string last_stage = "all";
std::string last_stage = LTP_SERVICE_NAME_DEFAULT;
if (vm.count("last-stage")) {
last_stage = vm["last-stage"].as<std::string>();
if (last_stage != LTP_SERVICE_NAME_SEGMENT
&& last_stage != LTP_SERVICE_NAME_POSTAG
&& last_stage != LTP_SERVICE_NAME_NER
&& last_stage != LTP_SERVICE_NAME_DEPPARSE
&& last_stage != LTP_SERVICE_NAME_SRL
&& last_stage != "all") {
std::cerr << "Unknown stage name:" << last_stage << ", reset to 'all'" << std::endl;
last_stage = "all";
&& last_stage != LTP_SERVICE_NAME_ALL) {
std::cerr << "Unknown stage name:" << last_stage << ", reset to '" LTP_SERVICE_NAME_DEFAULT "'" << std::endl;
last_stage = LTP_SERVICE_NAME_DEFAULT;
}
}

format = LTP_SERVICE_OUTPUT_FORMAT_DEFAULT;
if (vm.count("format")) {
format = vm["format"].as<std::string>();
if (format != LTP_SERVICE_OUTPUT_FORMAT_XML
&& format != LTP_SERVICE_OUTPUT_FORMAT_JSON) {
std::cerr << "Unknown format:" << last_stage << ", reset to '" LTP_SERVICE_OUTPUT_FORMAT_DEFAULT "'" << std::endl;
format = LTP_SERVICE_OUTPUT_FORMAT_DEFAULT;
}
}

Expand Down
Loading

0 comments on commit 62b1208

Please sign in to comment.