forked from HIT-SCIR/ltp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
76 changed files
with
86,565 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "tools/train/corpusproc"] | ||
path = tools/train/corpusproc | ||
url = git://github.com/Oneplus/py-corpusproc.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#----------------------------------------------------------------------------- | ||
# Include all the necessary files for macros | ||
#----------------------------------------------------------------------------- | ||
INCLUDE (${CMAKE_ROOT}/Modules/CheckFunctionExists.cmake) | ||
INCLUDE (${CMAKE_ROOT}/Modules/CheckIncludeFile.cmake) | ||
INCLUDE (${CMAKE_ROOT}/Modules/CheckIncludeFileCXX.cmake) | ||
INCLUDE (${CMAKE_ROOT}/Modules/CheckIncludeFiles.cmake) | ||
INCLUDE (${CMAKE_ROOT}/Modules/CheckLibraryExists.cmake) | ||
INCLUDE (${CMAKE_ROOT}/Modules/CheckSymbolExists.cmake) | ||
INCLUDE (${CMAKE_ROOT}/Modules/CheckTypeSize.cmake) | ||
|
||
#----------------------------------------------------------------------------- | ||
# Always SET this for now IF we are on an OS X box | ||
#----------------------------------------------------------------------------- | ||
IF (APPLE) | ||
LIST(LENGTH CMAKE_OSX_ARCHITECTURES ARCH_LENGTH) | ||
IF(ARCH_LENGTH GREATER 1) | ||
set (CMAKE_OSX_ARCHITECTURES "" CACHE STRING "" FORCE) | ||
message(FATAL_ERROR "Building Universal Binaries on OS X is NOT supported by the HDF5 project. This is" | ||
"due to technical reasons. The best approach would be build each architecture in separate directories" | ||
"and use the 'lipo' tool to combine them into a single executable or library. The 'CMAKE_OSX_ARCHITECTURES'" | ||
"variable has been set to a blank value which will build the default architecture for this system.") | ||
ENDIF() | ||
SET (HDF_AC_APPLE_UNIVERSAL_BUILD 0) | ||
ENDIF (APPLE) | ||
|
||
#----------------------------------------------------------------------------- | ||
# This MACRO checks IF the symbol exists in the library and IF it | ||
# does, it appends library to the list. | ||
#----------------------------------------------------------------------------- | ||
SET (LINK_LIBS "") | ||
MACRO (CHECK_LIBRARY_EXISTS_CONCAT LIBRARY SYMBOL VARIABLE) | ||
CHECK_LIBRARY_EXISTS ("${LIBRARY};${LINK_LIBS}" ${SYMBOL} "" ${VARIABLE}) | ||
IF (${VARIABLE}) | ||
SET (LINK_LIBS ${LINK_LIBS} ${LIBRARY}) | ||
ENDIF (${VARIABLE}) | ||
ENDMACRO (CHECK_LIBRARY_EXISTS_CONCAT) | ||
|
||
# ---------------------------------------------------------------------- | ||
# WINDOWS Hard code Values | ||
# ---------------------------------------------------------------------- | ||
|
||
SET (WINDOWS) | ||
IF (WIN32) | ||
IF (NOT UNIX AND NOT CYGWIN AND NOT MINGW) | ||
SET (WINDOWS 1) | ||
ENDIF (NOT UNIX AND NOT CYGWIN AND NOT MINGW) | ||
ENDIF (WIN32) | ||
|
||
IF (WINDOWS) | ||
SET (HAVE_LIBM 1) | ||
SET (HAVE_IO_H 1) | ||
SET (HAVE_SETJMP_H 1) | ||
SET (HAVE_STDDEF_H 1) | ||
SET (HAVE_SYS_STAT_H 1) | ||
SET (HAVE_SYS_TIMEB_H 1) | ||
SET (HAVE_SYS_TYPES_H 1) | ||
SET (HAVE_STRDUP 1) | ||
SET (HAVE_SYSTEM 1) | ||
SET (HAVE_DIFFTIME 1) | ||
SET (HAVE_LONGJMP 1) | ||
SET (STDC_HEADERS 1) | ||
SET (HAVE_GETHOSTNAME 1) | ||
SET (HAVE_TIMEZONE 1) | ||
SET (HAVE_FUNCTION 1) | ||
ENDIF (WINDOWS) | ||
|
||
# ---------------------------------------------------------------------- | ||
# END of WINDOWS Hard code Values | ||
# ---------------------------------------------------------------------- | ||
|
||
IF (CYGWIN) | ||
SET (HAVE_LSEEK64 0) | ||
ENDIF (CYGWIN) | ||
|
||
#----------------------------------------------------------------------------- | ||
# Check for the math library "m" | ||
#----------------------------------------------------------------------------- | ||
IF (NOT WINDOWS) | ||
CHECK_LIBRARY_EXISTS_CONCAT ("m" random HAVE_LIBM) | ||
ENDIF (NOT WINDOWS) | ||
CHECK_LIBRARY_EXISTS_CONCAT ("ws2_32" WSAStartup HAVE_LIBWS2_32) | ||
CHECK_LIBRARY_EXISTS_CONCAT ("wsock32" gethostbyname HAVE_LIBWSOCK32) | ||
#CHECK_LIBRARY_EXISTS_CONCAT ("dl" dlopen HAVE_LIBDL) | ||
CHECK_LIBRARY_EXISTS_CONCAT ("ucb" gethostname HAVE_LIBUCB) | ||
CHECK_LIBRARY_EXISTS_CONCAT ("socket" connect HAVE_LIBSOCKET) | ||
CHECK_LIBRARY_EXISTS ("c" gethostbyname "" NOT_NEED_LIBNSL) | ||
|
||
IF (NOT NOT_NEED_LIBNSL) | ||
CHECK_LIBRARY_EXISTS_CONCAT ("nsl" gethostbyname HAVE_LIBNSL) | ||
ENDIF (NOT NOT_NEED_LIBNSL) | ||
|
||
|
||
SET (USE_INCLUDES "") | ||
IF (WINDOWS) | ||
SET (USE_INCLUDES ${USE_INCLUDES} "windows.h") | ||
ENDIF (WINDOWS) | ||
#----------------------------------------------------------------------------- | ||
# Check IF header file exists and add it to the list. | ||
#----------------------------------------------------------------------------- | ||
MACRO (CHECK_INCLUDE_FILE_CONCAT FILE VARIABLE) | ||
CHECK_INCLUDE_FILES ("${USE_INCLUDES};${FILE}" ${VARIABLE}) | ||
IF (${VARIABLE}) | ||
SET (USE_INCLUDES ${USE_INCLUDES} ${FILE}) | ||
ENDIF (${VARIABLE}) | ||
ENDMACRO (CHECK_INCLUDE_FILE_CONCAT) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
#LTP data file path | ||
|
||
LTP_dataFolder=${DATA_DIR}/ | ||
LTP_dataFolder=${MODEL_DIR}/ | ||
|
||
#--------------------------------# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#include "Corpus.h" | ||
#include <stdexcept> | ||
|
||
using namespace std; | ||
|
||
void Corpus::open_corpus(const string &filename) | ||
{ | ||
//close | ||
m_corpus.close(); | ||
m_corpus.clear(); | ||
|
||
// open the corpus file | ||
m_corpus.open(filename.c_str()); | ||
if (!m_corpus) | ||
{ | ||
throw runtime_error("Can't open corpus file"); | ||
} | ||
} | ||
|
||
bool Corpus::get_next_block(vector<string> &lines) | ||
{ | ||
lines.clear(); | ||
|
||
// if the file has already been read through, return false | ||
if (m_corpus.eof()) | ||
return false; | ||
|
||
string line; | ||
while (getline(m_corpus, line)) | ||
{ | ||
if (string::npos == line.find_first_not_of("\t \n")) | ||
{ | ||
if (lines.size() > 0) | ||
{ | ||
return true; | ||
} | ||
} | ||
else | ||
{ | ||
lines.push_back(line); | ||
} | ||
} | ||
|
||
// if the last line of file is not blank, read EOF | ||
if (lines.size() > 0) | ||
{ | ||
return true; | ||
} | ||
else // only blank line | ||
{ | ||
return false; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#ifndef _CORPUS_H_ | ||
#define _CORPUS_H_ | ||
|
||
#include <vector> | ||
#include <string> | ||
#include <fstream> | ||
|
||
class Corpus | ||
{ | ||
public: | ||
Corpus() {} | ||
|
||
// new Corpus corresponding to file "filename" | ||
explicit Corpus(const std::string &filename) {open_corpus(filename);} | ||
|
||
~Corpus() {} | ||
|
||
// open a corpus file for input | ||
void open_corpus(const std::string &filename); | ||
|
||
// get the next block, blocks are separated with a blank line | ||
bool get_next_block(std::vector<std::string> &lines); | ||
|
||
private: | ||
std::ifstream m_corpus; | ||
}; | ||
|
||
#endif | ||
|
Oops, something went wrong.