Skip to content

Commit

Permalink
Initial working commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldb89 committed Jan 28, 2013
1 parent ae1bd32 commit 4ab84a0
Show file tree
Hide file tree
Showing 66 changed files with 3,878 additions and 3 deletions.
87 changes: 85 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ BOOST_PROGRAM_OPTIONS
BOOST_SYSTEM
BOOST_SERIALIZATION
BOOST_TEST
BOOST_FILESYSTEM
AM_PATH_PYTHON
AC_CHECK_HEADER(dlfcn.h,AC_DEFINE(HAVE_DLFCN_H))
AC_CHECK_LIB(dl, dlopen)
Expand Down Expand Up @@ -85,11 +86,92 @@ then
AM_CONDITIONAL([HAVE_CMPH], true)
fi

AM_CONDITIONAL([HAVE_GTEST], false)
AC_ARG_WITH(gtest,
[AC_HELP_STRING([--with-gtest=DIR], [(optional) path to Google Test library])],
[with_gtest=$withval],
[with_gtest=no]
)

if test "x$with_gtest" != 'xno'
then
gtest_CPPFLAGS="-I${with_gtest}/include"
gtest_LDFLAGS="-L${with_gtest} -L${with_gtest}/lib"
gtest_LIBS="-lgtest_main -lgtest -lpthread"

SAVECPP_FLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS $gtest_CPPFLAGS"
AC_CHECK_HEADER(${with_gtest}/include/gtest/gtest.h,
[AC_DEFINE([HAVE_GTEST], [1], [flag for Google Test header])],
[AC_MSG_ERROR([Cannot find Google Test headers!])]
)

SAVE_LDFLAGS="$LDFLAGS"
LDFLAGS="$LDFLAGS $gtest_LDFLAGS"
SAVE_LIBS="$LIBS"
# Google Test needs pthreads.
AC_CHECK_LIB([pthread],
[pthread_mutex_init],
[],
[AC_MSG_ERROR([Cannot find pthread library])]
)
AX_CXX_CHECK_LIB([gtest],
[testing::TestInfo::name() const],
[],
[AC_MSG_ERROR([Cannot find Google Test library libgtest])]
)
AC_CHECK_LIB([gtest_main],
[main],
[],
[AC_MSG_ERROR([Cannot find Google Test library libgtest_main])]
)

AC_SUBST(AS_TR_CPP([GTEST_CPPFLAGS]), ["$gtest_CPPFLAGS"])
AC_SUBST(AS_TR_CPP([GTEST_LDFLAGS]), ["$gtest_LDFLAGS"])
AC_SUBST(AS_TR_CPP([GTEST_LIBS]), ["$gtest_LIBS"])

AM_CONDITIONAL([HAVE_GMOCK], false)
AC_ARG_WITH(gmock,
[AC_HELP_STRING([--with-gmock=DIR], [(optional) path to Google Mock library])],
[with_gmock=$withval],
[with_gmock=no]
)

if test "x$with_gmock" != 'xno'
then
gmock_CPPFLAGS="-I${with_gmock}/include"
gmock_LDFLAGS="-L${with_gmock} -L${with_gmock}/lib"
gmock_LIBS="-lgmock"

CPPFLAGS="$CPPFLAGS $gmock_CPPFLAGS"
AC_CHECK_HEADER(${with_gmock}/include/gmock/gmock.h,
[AC_DEFINE([HAVE_GMOCK], [1], [flag for Google Mock header])],
[AC_MSG_ERROR([Cannot find Google Mock headers!])]
)

LDFLAGS="$LDFLAGS $gmock_LDFLAGS"
AX_CXX_CHECK_LIB([gmock],
[testing::Expectation],
[],
[AC_MSG_ERROR([Cannot find Google Mock library libgmock])]
)

AC_SUBST(AS_TR_CPP([GMOCK_CPPFLAGS]), ["$gmock_CPPFLAGS"])
AC_SUBST(AS_TR_CPP([GMOCK_LDFLAGS]), ["$gmock_LDFLAGS"])
AC_SUBST(AS_TR_CPP([GMOCK_LIBS]), ["$gmock_LIBS"])
fi

CPPFLAGS="$SAVE_CPPFLAGS"
LDFLAGS="$SAVE_LDFLAGS"
LIBS="$SAVE_LIBS"
AM_CONDITIONAL([HAVE_GTEST], true)
fi

#BOOST_THREADS
CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SERIALIZATION_LDFLAGS $BOOST_SYSTEM_LDFLAGS"
LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SERIALIZATION_LDFLAGS $BOOST_SYSTEM_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS"
# $BOOST_THREAD_LDFLAGS"
LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS $ZLIBS"
LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS $BOOST_FILESYSTEM_LIBS $ZLIBS"
# $BOOST_THREAD_LIBS"

AC_CHECK_HEADER(google/dense_hash_map,
Expand All @@ -106,6 +188,7 @@ AC_CONFIG_FILES([mteval/Makefile])
AC_CONFIG_FILES([mteval/meteor_jar.cc])
AC_CONFIG_FILES([decoder/Makefile])
AC_CONFIG_FILES([python/setup.py])
AC_CONFIG_FILES([extractor/Makefile])
AC_CONFIG_FILES([word-aligner/Makefile])

# KenLM stuff
Expand Down
85 changes: 85 additions & 0 deletions extractor/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
bin_PROGRAMS = compile run_extractor

noinst_PROGRAMS = \
binary_search_merger_test \
data_array_test \
linear_merger_test \
matching_comparator_test \
matching_test \
matchings_finder_test \
phrase_test \
precomputation_test \
suffix_array_test \
veb_test

TESTS = precomputation_test
#TESTS = binary_search_merger_test \
# data_array_test \
# linear_merger_test \
# matching_comparator_test \
# matching_test \
# phrase_test \
# suffix_array_test \
# veb_test

binary_search_merger_test_SOURCES = binary_search_merger_test.cc
binary_search_merger_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
data_array_test_SOURCES = data_array_test.cc
data_array_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
linear_merger_test_SOURCES = linear_merger_test.cc
linear_merger_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
matching_comparator_test_SOURCES = matching_comparator_test.cc
matching_comparator_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
matching_test_SOURCES = matching_test.cc
matching_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
matchings_finder_test_SOURCES = matchings_finder_test.cc
matchings_finder_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
phrase_test_SOURCES = phrase_test.cc
phrase_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
precomputation_test_SOURCES = precomputation_test.cc
precomputation_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
suffix_array_test_SOURCES = suffix_array_test.cc
suffix_array_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
veb_test_SOURCES = veb_test.cc
veb_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a

noinst_LIBRARIES = libextractor.a libcompile.a

compile_SOURCES = compile.cc
compile_LDADD = libcompile.a
run_extractor_SOURCES = run_extractor.cc
run_extractor_LDADD = libextractor.a

libcompile_a_SOURCES = \
alignment.cc \
data_array.cc \
phrase_location.cc \
precomputation.cc \
suffix_array.cc \
translation_table.cc

libextractor_a_SOURCES = \
alignment.cc \
binary_search_merger.cc \
data_array.cc \
grammar_extractor.cc \
matching.cc \
matching_comparator.cc \
matchings_finder.cc \
intersector.cc \
linear_merger.cc \
matchings_trie.cc \
phrase.cc \
phrase_builder.cc \
phrase_location.cc \
precomputation.cc \
rule_extractor.cc \
rule_factory.cc \
suffix_array.cc \
translation_table.cc \
veb.cc \
veb_bitset.cc \
veb_tree.cc \
vocabulary.cc

AM_CPPFLAGS = -W -Wall -Wno-sign-compare -std=c++0x $(GTEST_CPPFLAGS) $(GMOCK_CPPFLAGS)
47 changes: 47 additions & 0 deletions extractor/alignment.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include "alignment.h"

#include <fstream>
#include <sstream>
#include <string>
#include <fcntl.h>
#include <unistd.h>
#include <vector>

#include <boost/algorithm/string.hpp>
#include <boost/filesystem.hpp>

namespace fs = boost::filesystem;
using namespace std;

Alignment::Alignment(const string& filename) {
ifstream infile(filename.c_str());
string line;
while (getline(infile, line)) {
vector<string> items;
boost::split(items, line, boost::is_any_of(" -"));
vector<pair<int, int> > alignment;
alignment.reserve(items.size() / 2);
for (size_t i = 0; i < items.size(); i += 2) {
alignment.push_back(make_pair(stoi(items[i]), stoi(items[i + 1])));
}
alignments.push_back(alignment);
}
// Note: shrink_to_fit does nothing for vector<vector<string> > on g++ 4.6.3,
// but let's hope that the bug will be fixed in a newer version.
alignments.shrink_to_fit();
}

vector<pair<int, int> > Alignment::GetLinks(int sentence_index) const {
return alignments[sentence_index];
}

void Alignment::WriteBinary(const fs::path& filepath) {
FILE* file = fopen(filepath.string().c_str(), "w");
int size = alignments.size();
fwrite(&size, sizeof(int), 1, file);
for (vector<pair<int, int> > alignment: alignments) {
size = alignment.size();
fwrite(&size, sizeof(int), 1, file);
fwrite(alignment.data(), sizeof(pair<int, int>), size, file);
}
}
24 changes: 24 additions & 0 deletions extractor/alignment.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef _ALIGNMENT_H_
#define _ALIGNMENT_H_

#include <string>
#include <vector>

#include <boost/filesystem.hpp>

namespace fs = boost::filesystem;
using namespace std;

class Alignment {
public:
Alignment(const string& filename);

vector<pair<int, int> > GetLinks(int sentence_index) const;

void WriteBinary(const fs::path& filepath);

private:
vector<vector<pair<int, int> > > alignments;
};

#endif
Loading

0 comments on commit 4ab84a0

Please sign in to comment.