Skip to content

Commit

Permalink
add basic source for new framework
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed Jul 17, 2013
1 parent b7e069a commit 6d9f09d
Show file tree
Hide file tree
Showing 84 changed files with 13,017 additions and 0 deletions.
21 changes: 21 additions & 0 deletions src/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# example of configure time generate header

include_directories (./
${SOURCE_DIR}/parser
${SOURCE_DIR}/utils
${SOURCE_DIR}/math)

message(STATUS ${SOURCE_DIR})

set (lgdpj_SRC collections.cpp
model.cpp
featurespace.cpp
extractor.cpp
options.cpp
decoder1o.cpp
decoder2o.cpp
parser.cpp
lgdpj.cpp)

add_executable(lgdpj ${lgdpj_SRC})

70 changes: 70 additions & 0 deletions src/parser/collections.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include "collections.h"

namespace ltp {
namespace parser {

DictionaryCollections::DictionaryCollections(int num_dicts) :
idx(0) {
dicts.resize( num_dicts );

for (int i = 0; i < num_dicts; ++ i) {
dicts[i] = new Dictionary( this );
}
}

int DictionaryCollections::retrieve(int tid, const char * key, bool create) {
return dicts[tid]->retrieve(key, create);
}

size_t DictionaryCollections::dim() const{
return idx;
}

void DictionaryCollections::dump(ostream & out) {
char chunk[32];
unsigned int sz = dicts.size();
strncpy(chunk, "collections", 16);

out.write(chunk, 16);
out.write(reinterpret_cast<const char *>(&idx), sizeof(int));
out.write(reinterpret_cast<const char *>(&sz), sizeof(unsigned int));
for (int i = 0; i < dicts.size(); ++ i) {
// strncpy(chunk, dicts[i]->dict_name.c_str(), 32);
// out.write(chunk, 32);

dicts[i]->database.dump(out);
}
}

bool DictionaryCollections::load(istream & in) {
char chunk[32];
unsigned int sz;

in.read(chunk, 16);
if (strcmp(chunk, "collections")) {
return false;
}

in.read(reinterpret_cast<char *>(&idx), sizeof(int));
in.read(reinterpret_cast<char *>(&sz), sizeof(unsigned int));

if (sz != dicts.size()) {
return false;
}

for (unsigned i = 0; i < sz; ++ i) {
// in.read(chunk, 32);

// Dictionary * dict = new Dictionary(this);
if (!dicts[i]->database.load(in)) {
return false;
}

// dicts[i].push_back(dict);
}

return true;
}

} // end for namespace parser
} // end for namespace ltp
116 changes: 116 additions & 0 deletions src/parser/collections.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#ifndef __DICT_COLLECTIONS_H__
#define __DICT_COLLECTIONS_H__

#include <iostream>
#include <vector>
#include "stringmap.hpp"
#include "smartmap.hpp"

#include "instance.h"

namespace ltp {
namespace parser {

using namespace std;
using namespace ltp::utility;


// declariation of dictionary, this is specially needed
// by the observer design pattern
class Dictionary;

// class of a collection of dictionary
// a index counter is shared within several dictionary.
class DictionaryCollections {
public:
DictionaryCollections(int num_dicts);
~DictionaryCollections() {}

/*
* Dump the dictionary collections into output stream
*
* @param[out] out the output stream
*/
void dump(ostream & out);

/*
* Load the dictionary collections from input stream,
* return true if dictionary successfully loaded, otherwise
* false.
*
* @param[in] in the input stream
* @return bool true on success, otherwise false.
*/
bool load(istream & in);

/*
* Get the size of dictionary collections
*
* @return size_t the size of the dictionary
*/
size_t dim() const;

/*
* Retrieve the certain key in one of the dictionaries in this
* collection. If create is specified, this key is created on
* the condition that it is not in the dictionary. Return the
* index of the key, -1 on failure
*
* @param[in] tid the index of the dictionary
* @param[in] key the key
* @param[in] create insert the key to dictionary if create
* if true.
* @return int the index of the key, -1 on failure.
*/
int retrieve(int tid, const char * key, bool create);

public:
int idx; /*< the shared index among dictionaries */

private:
vector<Dictionary *> dicts;
};

// the dictionary class
// it's wrapper of class SmartMap<int>
class Dictionary {
public:
Dictionary(DictionaryCollections * coll):
collections(coll) {}

//StringMap<int> database;
SmartMap<int> database;
DictionaryCollections * collections;

inline int retrieve(const char * key, bool create) {
int val;

if (database.get(key, val)) {
return val;
} else {
if (create) {
val = collections->idx;
database.set(key, val);
// database.unsafe_set(key, val);
++ collections->idx;
return val;
}
}

return -1;
}

inline int size() {
return database.size();
}
};

// labelcollections is a bi-direction map.
// it support two way of retrieving
//
// * string key -> int index
// * int index -> string key
//
} // end for namespace parser
} // end for namespace ltp
#endif // end for __FEATURE_COLLECTIONS_H__
98 changes: 98 additions & 0 deletions src/parser/conllreader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#ifndef __CONLL_READER_H__
#define __CONLL_READER_H__

#include <iostream>
#include <fstream>

#include "codecs.hpp"
#include "strutils.hpp"
#include "logging.hpp"

#include "settings.h"
#include "instance.h"
#include "options.h"

namespace ltp {
namespace parser {

using namespace std;
using namespace ltp::strutils;

class CoNLLReader {
public:
/*
* Constructor for ConllReader
* Register a ifstream to the ConllReader
*
* @param f the reference to the ifstream
*/
CoNLLReader(ifstream& _f): f(_f) {}
~CoNLLReader() {}

/*
* Get next instance from ifstream buffer
*/
Instance * next() {
if (f.eof()) {
return NULL;
}

Instance * inst = new Instance;
string line;

inst->forms.push_back( ROOT_FORM );
inst->lemmas.push_back( ROOT_LEMMA );
inst->postags.push_back( ROOT_POSTAG );
inst->heads.push_back( -1 );
if (model_opt.labeled) {
inst->deprels.push_back( ROOT_DEPREL );
}
inst->chars.push_back( vector<string>() );

while (!f.eof()) {
getline(f, line);
chomp(line);

if (line.size() == 0) {
break;
}

vector<string> items = split(line);
if (items.size() != 10) {
WARNING_LOG("Unknown conll format file");
}

inst->forms.push_back( items[1] ); // items[1]: form
inst->lemmas.push_back( items[2] ); // items[2]: lemma
inst->postags.push_back( items[3] ); // items[4]: postag
inst->heads.push_back( to_int(items[6]) );

if (model_opt.labeled) {
inst->deprels.push_back( items[7] );
}

vector<string> chars;
codecs::decode(items[1], chars);
inst->chars.push_back( chars );
}

if (inst->forms.size() == 1) {
delete inst;
inst = NULL;
}
return inst;
}

/*
* Reader reach the end of the file
*/
bool eof() {
return f.eof();
}
private:
ifstream& f;
}; // end for ConllReader
} // end for parser
} // end for namespace ltp

#endif // end for __CONLL_READER_H__
59 changes: 59 additions & 0 deletions src/parser/conllwriter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef __CONLL_WRITER_H__
#define __CONLL_WRITER_H__

#include <iostream>

#include "strutils.hpp"
#include "instance.h"

namespace ltp {
namespace parser {

using namespace ltp::strutils;

class CoNLLWriter {
public:
CoNLLWriter(std::ostream& _f): f(_f) {}
~CoNLLWriter() {}

void write(const Instance * inst) {
int len = inst->size();
bool predicted = (inst->predicted_heads.size() > 0 &&
inst->predicted_heads.size() == len);
bool predicted_label = (inst->predicted_deprels.size() > 0 &&
inst->predicted_deprels.size() == len);

for (int i = 1; i < inst->size(); ++ i) {
f << i + 1
<< "\t" // 0 - index
<< inst->forms[i]
<< "\t" // 1 - form
<< inst->lemmas[i]
<< "\t" // 2 - lemma
<< inst->postags[i]
<< "\t" // 3 - postag
<< "_"
<< "\t" // 4 - unknown
<< "_"
<< "\t" // 5 - unknown
<< inst->heads[i]
<< "\t" // 6 - heads
<< inst->deprels[i]
<< "\t" // 7 - deprels
<< (predicted ? to_str(inst->predicted_heads[i]) : "_")
<< "\t"
<< (predicted_label ? inst->predicted_deprels[i] : "_")
<< endl;
}

f << endl;
}
private:
std::ostream& f;
}; // end for ConnllWriter

} // end for parser
} // end for namespace ltp


#endif // end for __CONLL_WRITER_H__
Loading

0 comments on commit 6d9f09d

Please sign in to comment.