Skip to content

Commit

Permalink
Merge branch 'ltp-trainer'
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed Apr 5, 2013
2 parents 7e9bbe2 + 7038cd4 commit 806f736
Show file tree
Hide file tree
Showing 76 changed files with 86,565 additions and 65 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "tools/train/corpusproc"]
path = tools/train/corpusproc
url = git://github.com/Oneplus/py-corpusproc.git
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ compiler:

before_install:
- sudo apt-get install cmake
- git submodule update --init --recursive

script:
- ./configure
Expand Down
11 changes: 8 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ set (TOOLS_DIR ${PROJECT_SOURCE_DIR}/tools)

# -- config resource directories
set (CONFIGURE_DIR ${PROJECT_SOURCE_DIR}/conf)
set (DATA_DIR ${PROJECT_SOURCE_DIR}/ltp_data)
set (MODEL_DIR ${PROJECT_SOURCE_DIR}/ltp_data)
set (DATA_DIR ${PROJECT_SOURCE_DIR}/test_data)

# compiling section
# -- compile shipped libraries
Expand All @@ -33,8 +34,12 @@ configure_file (
${EXECUTABLE_OUTPUT_PATH}/ltp_all_modules.conf)

configure_file (
${TOOLS_DIR}/auto-test.sh.in
${EXECUTABLE_OUTPUT_PATH}/auto-test.sh)
${TOOLS_DIR}/testing/auto-test.sh.in
${TOOLS_DIR}/testing/auto-test.sh)

configure_file (
${TOOLS_DIR}/train/ltp-model.in
${TOOLS_DIR}/train/ltp-model)

# enable test
enable_testing()
Expand Down
107 changes: 107 additions & 0 deletions cmake/ConfigureChecks.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#-----------------------------------------------------------------------------
# Include all the necessary files for macros
#-----------------------------------------------------------------------------
INCLUDE (${CMAKE_ROOT}/Modules/CheckFunctionExists.cmake)
INCLUDE (${CMAKE_ROOT}/Modules/CheckIncludeFile.cmake)
INCLUDE (${CMAKE_ROOT}/Modules/CheckIncludeFileCXX.cmake)
INCLUDE (${CMAKE_ROOT}/Modules/CheckIncludeFiles.cmake)
INCLUDE (${CMAKE_ROOT}/Modules/CheckLibraryExists.cmake)
INCLUDE (${CMAKE_ROOT}/Modules/CheckSymbolExists.cmake)
INCLUDE (${CMAKE_ROOT}/Modules/CheckTypeSize.cmake)

#-----------------------------------------------------------------------------
# Always SET this for now IF we are on an OS X box
#-----------------------------------------------------------------------------
IF (APPLE)
LIST(LENGTH CMAKE_OSX_ARCHITECTURES ARCH_LENGTH)
IF(ARCH_LENGTH GREATER 1)
set (CMAKE_OSX_ARCHITECTURES "" CACHE STRING "" FORCE)
message(FATAL_ERROR "Building Universal Binaries on OS X is NOT supported by the HDF5 project. This is"
"due to technical reasons. The best approach would be build each architecture in separate directories"
"and use the 'lipo' tool to combine them into a single executable or library. The 'CMAKE_OSX_ARCHITECTURES'"
"variable has been set to a blank value which will build the default architecture for this system.")
ENDIF()
SET (HDF_AC_APPLE_UNIVERSAL_BUILD 0)
ENDIF (APPLE)

#-----------------------------------------------------------------------------
# This MACRO checks IF the symbol exists in the library and IF it
# does, it appends library to the list.
#-----------------------------------------------------------------------------
SET (LINK_LIBS "")
MACRO (CHECK_LIBRARY_EXISTS_CONCAT LIBRARY SYMBOL VARIABLE)
CHECK_LIBRARY_EXISTS ("${LIBRARY};${LINK_LIBS}" ${SYMBOL} "" ${VARIABLE})
IF (${VARIABLE})
SET (LINK_LIBS ${LINK_LIBS} ${LIBRARY})
ENDIF (${VARIABLE})
ENDMACRO (CHECK_LIBRARY_EXISTS_CONCAT)

# ----------------------------------------------------------------------
# WINDOWS Hard code Values
# ----------------------------------------------------------------------

SET (WINDOWS)
IF (WIN32)
IF (NOT UNIX AND NOT CYGWIN AND NOT MINGW)
SET (WINDOWS 1)
ENDIF (NOT UNIX AND NOT CYGWIN AND NOT MINGW)
ENDIF (WIN32)

IF (WINDOWS)
SET (HAVE_LIBM 1)
SET (HAVE_IO_H 1)
SET (HAVE_SETJMP_H 1)
SET (HAVE_STDDEF_H 1)
SET (HAVE_SYS_STAT_H 1)
SET (HAVE_SYS_TIMEB_H 1)
SET (HAVE_SYS_TYPES_H 1)
SET (HAVE_STRDUP 1)
SET (HAVE_SYSTEM 1)
SET (HAVE_DIFFTIME 1)
SET (HAVE_LONGJMP 1)
SET (STDC_HEADERS 1)
SET (HAVE_GETHOSTNAME 1)
SET (HAVE_TIMEZONE 1)
SET (HAVE_FUNCTION 1)
ENDIF (WINDOWS)

# ----------------------------------------------------------------------
# END of WINDOWS Hard code Values
# ----------------------------------------------------------------------

IF (CYGWIN)
SET (HAVE_LSEEK64 0)
ENDIF (CYGWIN)

#-----------------------------------------------------------------------------
# Check for the math library "m"
#-----------------------------------------------------------------------------
IF (NOT WINDOWS)
CHECK_LIBRARY_EXISTS_CONCAT ("m" random HAVE_LIBM)
ENDIF (NOT WINDOWS)
CHECK_LIBRARY_EXISTS_CONCAT ("ws2_32" WSAStartup HAVE_LIBWS2_32)
CHECK_LIBRARY_EXISTS_CONCAT ("wsock32" gethostbyname HAVE_LIBWSOCK32)
#CHECK_LIBRARY_EXISTS_CONCAT ("dl" dlopen HAVE_LIBDL)
CHECK_LIBRARY_EXISTS_CONCAT ("ucb" gethostname HAVE_LIBUCB)
CHECK_LIBRARY_EXISTS_CONCAT ("socket" connect HAVE_LIBSOCKET)
CHECK_LIBRARY_EXISTS ("c" gethostbyname "" NOT_NEED_LIBNSL)

IF (NOT NOT_NEED_LIBNSL)
CHECK_LIBRARY_EXISTS_CONCAT ("nsl" gethostbyname HAVE_LIBNSL)
ENDIF (NOT NOT_NEED_LIBNSL)


SET (USE_INCLUDES "")
IF (WINDOWS)
SET (USE_INCLUDES ${USE_INCLUDES} "windows.h")
ENDIF (WINDOWS)
#-----------------------------------------------------------------------------
# Check IF header file exists and add it to the list.
#-----------------------------------------------------------------------------
MACRO (CHECK_INCLUDE_FILE_CONCAT FILE VARIABLE)
CHECK_INCLUDE_FILES ("${USE_INCLUDES};${FILE}" ${VARIABLE})
IF (${VARIABLE})
SET (USE_INCLUDES ${USE_INCLUDES} ${FILE})
ENDIF (${VARIABLE})
ENDMACRO (CHECK_INCLUDE_FILE_CONCAT)

2 changes: 1 addition & 1 deletion conf/ltp_all_modules.conf.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#LTP data file path

LTP_dataFolder=${DATA_DIR}/
LTP_dataFolder=${MODEL_DIR}/

#--------------------------------#
17 changes: 14 additions & 3 deletions src/_gparser/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
link_directories (${LIBRARY_OUTPUT_PATH})
include_directories (./
${util_DIR})

Expand Down Expand Up @@ -33,7 +34,6 @@ set (gparser_SRC
KBestParseForest2O.h
KBestParseForest.cpp
KBestParseForest.h
main.cpp
MultiArray.h
Parameter.cpp
Parameter.h
Expand All @@ -42,10 +42,21 @@ set (gparser_SRC
ParserOptions.cpp
ParserOptions.h)

add_library (gparser ${gparser_SRC})
add_library (gparser_lib ${gparser_SRC})

configure_file (
gparser_dll.h
${INCLUDE_OUTPUT_PATH}/ltp/gparser_dll.h)

TARGET_LINK_LIBRARIES(gparser util)
target_link_libraries (gparser_lib util)
set_target_properties (gparser_lib PROPERTIES OUTPUT_NAME gparser)

set (gparser_exe_SRC main.cpp)

add_executable (gparser_exe ${gparser_exe_SRC})
target_link_libraries (gparser_exe gparser_lib)
set_target_properties (gparser_exe
PROPERTIES
OUTPUT_NAME gparser
RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/)

17 changes: 17 additions & 0 deletions src/_srl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,20 @@ target_link_libraries (srl util maxent)
configure_file (
SRL_DLL.h
${INCLUDE_OUTPUT_PATH}/ltp/SRL_DLL.h)

link_directories (${LIBRARY_OUTPUT_PATH})

set (SRLExtract_SRC SRLExtract.cpp Corpus.cpp)
add_executable (SRLExtract ${SRLExtract_SRC})
target_link_libraries (SRLExtract srl)
set_target_properties (SRLExtract
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/)

set (SRLGetInstance_SRC SRLGetInstance.cpp GetInstance.cpp)
add_executable (SRLGetInstance ${SRLGetInstance_SRC})
target_link_libraries (SRLGetInstance srl)
set_target_properties (SRLGetInstance
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/)

53 changes: 53 additions & 0 deletions src/_srl/Corpus.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include "Corpus.h"
#include <stdexcept>

using namespace std;

void Corpus::open_corpus(const string &filename)
{
//close
m_corpus.close();
m_corpus.clear();

// open the corpus file
m_corpus.open(filename.c_str());
if (!m_corpus)
{
throw runtime_error("Can't open corpus file");
}
}

bool Corpus::get_next_block(vector<string> &lines)
{
lines.clear();

// if the file has already been read through, return false
if (m_corpus.eof())
return false;

string line;
while (getline(m_corpus, line))
{
if (string::npos == line.find_first_not_of("\t \n"))
{
if (lines.size() > 0)
{
return true;
}
}
else
{
lines.push_back(line);
}
}

// if the last line of file is not blank, read EOF
if (lines.size() > 0)
{
return true;
}
else // only blank line
{
return false;
}
}
29 changes: 29 additions & 0 deletions src/_srl/Corpus.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef _CORPUS_H_
#define _CORPUS_H_

#include <vector>
#include <string>
#include <fstream>

class Corpus
{
public:
Corpus() {}

// new Corpus corresponding to file "filename"
explicit Corpus(const std::string &filename) {open_corpus(filename);}

~Corpus() {}

// open a corpus file for input
void open_corpus(const std::string &filename);

// get the next block, blocks are separated with a blank line
bool get_next_block(std::vector<std::string> &lines);

private:
std::ifstream m_corpus;
};

#endif

Loading

0 comments on commit 806f736

Please sign in to comment.