Skip to content

Commit

Permalink
Merge ../mosesdecoder into perf_moses2
Browse files Browse the repository at this point in the history
  • Loading branch information
hieuhoang committed Apr 12, 2016
2 parents b48d329 + 7b205b0 commit 1ff1d04
Show file tree
Hide file tree
Showing 9 changed files with 619 additions and 590 deletions.
119 changes: 60 additions & 59 deletions moses/FF/Dsg-Feature/Desegmenter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,73 +11,74 @@ using namespace std;

namespace Moses
{
void Desegmenter::Load(const string filename){
void Desegmenter::Load(const string filename)
{

std::ifstream myFile(filename.c_str() );
if (myFile.is_open()){
cerr << "Desegmentation File open successful." << endl;
string line;
while (getline(myFile, line)){
stringstream ss(line);
string token;
vector<string> myline;
while (getline(ss, token, '\t')){
myline.push_back(token);
}
mmDesegTable.insert(pair<string, string>(myline[2], myline[1] ));
}
myFile.close();
}
else
cerr << "open() failed: check if Desegmentation file is in right folder" << endl;
std::ifstream myFile(filename.c_str() );
if (myFile.is_open()) {
cerr << "Desegmentation File open successful." << endl;
string line;
while (getline(myFile, line)) {
stringstream ss(line);
string token;
vector<string> myline;
while (getline(ss, token, '\t')) {
myline.push_back(token);
}
mmDesegTable.insert(pair<string, string>(myline[2], myline[1] ));
}
myFile.close();
} else
cerr << "open() failed: check if Desegmentation file is in right folder" << endl;
}


vector<string> Desegmenter::Search(string myKey){
multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey);
vector<string> result;
if (mmiPairFound != mmDesegTable.end()){
size_t nNumPairsInMap = mmDesegTable.count(myKey);
for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter){
if (mmiPairFound != mmDesegTable.end()) {
result.push_back(mmiPairFound->second);
}
++mmiPairFound;
}
return result;
}
else{
string rule_deseg ;
rule_deseg = ApplyRules(myKey);
result.push_back(rule_deseg);
return result;
}
vector<string> Desegmenter::Search(string myKey)
{
multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey);
vector<string> result;
if (mmiPairFound != mmDesegTable.end()) {
size_t nNumPairsInMap = mmDesegTable.count(myKey);
for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter) {
if (mmiPairFound != mmDesegTable.end()) {
result.push_back(mmiPairFound->second);
}
++mmiPairFound;
}
return result;
} else {
string rule_deseg ;
rule_deseg = ApplyRules(myKey);
result.push_back(rule_deseg);
return result;
}
}


string Desegmenter::ApplyRules(string & segToken){
string Desegmenter::ApplyRules(string & segToken)
{

string desegToken=segToken;
if (!simple) {
boost::replace_all(desegToken, "l+ All", "ll");
boost::replace_all(desegToken, "l+ Al", "ll");
boost::replace_all(desegToken, "y+ y ", "y");
boost::replace_all(desegToken, "p+ ", "t");
boost::replace_all(desegToken, "' +", "}");
boost::replace_all(desegToken, "y +", "A");
boost::replace_all(desegToken, "n +n", "n");
boost::replace_all(desegToken, "mn +m", "mm");
boost::replace_all(desegToken, "En +m", "Em");
boost::replace_all(desegToken, "An +lA", "Em");
boost::replace_all(desegToken, "-LRB-", "(");
boost::replace_all(desegToken, "-RRB-", ")");
}

boost::replace_all(desegToken, "+ +", "");
boost::replace_all(desegToken, "+ ", "");
boost::replace_all(desegToken, " +", "");

string desegToken=segToken;
if (!simple){
boost::replace_all(desegToken, "l+ All", "ll");
boost::replace_all(desegToken, "l+ Al", "ll");
boost::replace_all(desegToken, "y+ y ", "y");
boost::replace_all(desegToken, "p+ ", "t");
boost::replace_all(desegToken, "' +", "}");
boost::replace_all(desegToken, "y +", "A");
boost::replace_all(desegToken, "n +n", "n");
boost::replace_all(desegToken, "mn +m", "mm");
boost::replace_all(desegToken, "En +m", "Em");
boost::replace_all(desegToken, "An +lA", "Em");
boost::replace_all(desegToken, "-LRB-", "(");
boost::replace_all(desegToken, "-RRB-", ")");
}

boost::replace_all(desegToken, "+ +", "");
boost::replace_all(desegToken, "+ ", "");
boost::replace_all(desegToken, " +", "");

return desegToken;
return desegToken;
}

Desegmenter::~Desegmenter()
Expand Down
30 changes: 16 additions & 14 deletions moses/FF/Dsg-Feature/Desegmenter.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,23 @@ namespace Moses
class Desegmenter
{
private:
std::multimap<string, string> mmDesegTable;
std::string filename;
bool simple;
void Load(const string filename);
std::multimap<string, string> mmDesegTable;
std::string filename;
bool simple;
void Load(const string filename);

public:
Desegmenter(const std::string& file, const bool scheme){
filename = file;
simple=scheme;
Load(filename);
}
string getFileName(){ return filename; }

vector<string> Search(string myKey);
string ApplyRules(string &);
~Desegmenter();
Desegmenter(const std::string& file, const bool scheme) {
filename = file;
simple=scheme;
Load(filename);
}
string getFileName() {
return filename;
}

vector<string> Search(string myKey);
string ApplyRules(string &);
~Desegmenter();
};
}
Loading

0 comments on commit 1ff1d04

Please sign in to comment.