Skip to content

Commit

Permalink
gcc 4.8 compile fix
Browse files Browse the repository at this point in the history
make ProxyIterator::reference a reference (not a value) in order to truly
satisfy random access iterator requirements, because std algs were trying to
std::swap rvalues

added swap(ProxyIterator &,ProxyIterator &) and (shallow) swap of
SizedProxy.Inner() to go with existing deep swap for SizedProxy; I know there
are other ProxyIterators but the code I'm building doesn't need swap for
those. added a //TODO: comment with a concern I have about SizedProxy's deep
swap (which was only recently added); I (graehl) don't understand KenLM well
enough to judge.

avoid some new gcc 4.8 warnings

made compile_query_only.sh respect CXX env var
  • Loading branch information
graehl committed Jun 4, 2013
1 parent acbf805 commit b4759be
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 44 deletions.
2 changes: 1 addition & 1 deletion clean_query_only.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/bash
rm -rf {lm,util,util/double-conversion}/*.o bin/{query,kenlm_max_order,build_binary}
rm -rf {lm,util,util/double-conversion}/*.o bin/{query,build_binary}
12 changes: 8 additions & 4 deletions compile_query_only.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,21 @@ echo Note: You must use ./bjam if you want language model estimation or filterin
rm {lm,util}/*.o 2>/dev/null
set -e

CXXFLAGS="-I. -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 $CXXFLAGS"
CXX=${CXX:-g++}
CXXFLAGS+=" -I. -O3 -DNDEBUG -DKENLM_MAX_ORDER=6"
echo '$CXX $CXXFLAGS'
echo $CXX $CXXFLAGS

#Grab all cc files in these directories except those ending in test.cc or main.cc
objects=""
for i in util/double-conversion/*.cc util/*.cc lm/*.cc; do
if [ "${i%test.cc}" == "$i" ] && [ "${i%main.cc}" == "$i" ]; then
g++ $CXXFLAGS -c $i -o ${i%.cc}.o
$CXX $CXXFLAGS -c $i -o ${i%.cc}.o
objects="$objects ${i%.cc}.o"
fi
done

mkdir -p bin
g++ $CXXFLAGS lm/build_binary_main.cc $objects -lrt -o bin/build_binary
g++ $CXXFLAGS lm/query_main.cc $objects -lrt -o bin/query
[[ `uname` = Darwin ]] || CXXFLAGS+=" -lrt"
$CXX $CXXFLAGS lm/build_binary_main.cc $objects $lrt -o bin/build_binary
$CXX $CXXFLAGS lm/query_main.cc $objects $lrt -o bin/query
23 changes: 11 additions & 12 deletions lm/search_hashed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ template <class Weights> class ActivateUnigram {
Weights *modify_;
};

// Find the lower order entry, inserting blanks along the way as necessary.
// Find the lower order entry, inserting blanks along the way as necessary.
template <class Value> void FindLower(
const std::vector<uint64_t> &keys,
typename Value::Weights &unigram,
Expand All @@ -64,7 +64,7 @@ template <class Value> void FindLower(
typename Value::ProbingEntry entry;
// Backoff will always be 0.0. We'll get the probability and rest in another pass.
entry.value.backoff = kNoExtensionBackoff;
// Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb.
// Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb.
for (int lower = keys.size() - 2; ; --lower) {
if (lower == -1) {
between.push_back(&unigram);
Expand All @@ -77,11 +77,11 @@ template <class Value> void FindLower(
}
}

// Between usually has single entry, the value to adjust. But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here.
// Between usually has single entry, the value to adjust. But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here.
template <class Added, class Build> void AdjustLower(
const Added &added,
const Build &build,
std::vector<typename Build::Value::Weights *> &between,
std::vector<typename Build::Value::Weights *> &between,
const unsigned int n,
const std::vector<WordIndex> &vocab_ids,
typename Build::Value::Weights *unigrams,
Expand All @@ -93,14 +93,14 @@ template <class Added, class Build> void AdjustLower(
}
typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
float prob = -fabs(between.back()->prob);
// Order of the n-gram on which probabilities are based.
// Order of the n-gram on which probabilities are based.
unsigned char basis = n - between.size();
assert(basis != 0);
typename Build::Value::Weights **change = &between.back();
// Skip the basis.
--change;
if (basis == 1) {
// Hallucinate a bigram based on a unigram's backoff and a unigram probability.
// Hallucinate a bigram based on a unigram's backoff and a unigram probability.
float &backoff = unigrams[vocab_ids[1]].backoff;
SetExtension(backoff);
prob += backoff;
Expand Down Expand Up @@ -128,14 +128,14 @@ template <class Added, class Build> void AdjustLower(
typename std::vector<typename Value::Weights *>::const_iterator i(between.begin());
build.MarkExtends(**i, added);
const typename Value::Weights *longer = *i;
// Everything has probability but is not marked as extending.
// Everything has probability but is not marked as extending.
for (++i; i != between.end(); ++i) {
build.MarkExtends(**i, *longer);
longer = *i;
}
}

// Continue marking lower entries even they know that they extend left. This is used for upper/lower bounds.
// Continue marking lower entries even they know that they extend left. This is used for upper/lower bounds.
template <class Build> void MarkLower(
const std::vector<uint64_t> &keys,
const Build &build,
Expand All @@ -145,7 +145,7 @@ template <class Build> void MarkLower(
const typename Build::Value::Weights &longer) {
if (start_order == 0) return;
typename util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash>::MutableIterator iter;
// Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code.
// Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code.
for (int even_lower = start_order - 2 /* index in middle */; ; --even_lower) {
if (even_lower == -1) {
build.MarkExtends(unigram, longer);
Expand All @@ -168,7 +168,6 @@ template <class Build, class Activate, class Store> void ReadNGrams(
Store &store,
PositiveProbWarn &warn) {
typedef typename Build::Value Value;
typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
assert(n >= 2);
ReadNGramHeader(f, n);

Expand All @@ -186,7 +185,7 @@ template <class Build, class Activate, class Store> void ReadNGrams(
for (unsigned int h = 1; h < n - 1; ++h) {
keys[h] = detail::CombineWordHash(keys[h-1], vocab_ids[h+1]);
}
// Initially the sign bit is on, indicating it does not extend left. Most already have this but there might +0.0.
// Initially the sign bit is on, indicating it does not extend left. Most already have this but there might +0.0.
util::SetSign(entry.value.prob);
entry.key = keys[n-2];

Expand All @@ -203,7 +202,7 @@ template <class Build, class Activate, class Store> void ReadNGrams(

} // namespace
namespace detail {

template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
std::size_t allocated = Unigram::Size(counts[0]);
unigram_ = Unigram(start, counts[0], allocated);
Expand Down
13 changes: 7 additions & 6 deletions lm/search_hashed.hh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ template <class Value> class HashedSearch {
static const bool kDifferentRest = Value::kDifferentRest;
static const unsigned int kVersion = 0;

// TODO: move probing_multiplier here with next binary file format update.
// TODO: move probing_multiplier here with next binary file format update.
static void UpdateConfigFromBinary(int, const std::vector<uint64_t> &, Config &) {}

static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
Expand Down Expand Up @@ -103,6 +103,7 @@ template <class Value> class HashedSearch {
}

#pragma GCC diagnostic ignored "-Wuninitialized"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const {
node = extend_pointer;
typename Middle::ConstIterator found;
Expand All @@ -126,14 +127,14 @@ template <class Value> class HashedSearch {
}

LongestPointer LookupLongest(WordIndex word, const Node &node) const {
// Sign bit is always on because longest n-grams do not extend left.
// Sign bit is always on because longest n-grams do not extend left.
typename Longest::ConstIterator found;
if (!longest_.Find(CombineWordHash(node, word), found)) return LongestPointer();
return LongestPointer(found->value.prob);
}

// Generate a node without necessarily checking that it actually exists.
// Optionally return false if it's know to not exist.
// Generate a node without necessarily checking that it actually exists.
// Optionally return false if it's know to not exist.
bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const {
assert(begin != end);
node = static_cast<Node>(*begin);
Expand All @@ -144,7 +145,7 @@ template <class Value> class HashedSearch {
}

private:
// Interpret config's rest cost build policy and pass the right template argument to ApplyBuild.
// Interpret config's rest cost build policy and pass the right template argument to ApplyBuild.
void DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn);

template <class Build> void ApplyBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build);
Expand All @@ -153,7 +154,7 @@ template <class Value> class HashedSearch {
public:
Unigram() {}

Unigram(void *start, uint64_t count, std::size_t /*allocated*/) :
Unigram(void *start, uint64_t count, std::size_t /*allocated*/) :
unigram_(static_cast<typename Value::Weights*>(start))
#ifdef DEBUG
, count_(count)
Expand Down
6 changes: 5 additions & 1 deletion util/double-conversion/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,11 @@ template <class Dest, class Source>
inline Dest BitCast(const Source& source) {
// Compile time assertion: sizeof(Dest) == sizeof(Source)
// A compile error here means your Dest and Source have different sizes.
typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]
#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
__attribute__((unused))
#endif
;

Dest dest;
memmove(&dest, &source, sizeof(dest));
Expand Down
14 changes: 7 additions & 7 deletions util/file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ std::size_t GuardLarge(std::size_t size) {
// The following operating systems have broken read/write/pread/pwrite that
// only supports up to 2^31.
#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID)
return std::min(static_cast<std::size_t>(INT_MAX), size);
return std::min(static_cast<std::size_t>(static_cast<unsigned>(-1)), size);
#else
return size;
#endif
Expand Down Expand Up @@ -209,7 +209,7 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
#endif
errno = 0;
do {
ret =
ret =
#if defined(_WIN32) || defined(_WIN64)
_write
#else
Expand All @@ -229,7 +229,7 @@ void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
}

void FSyncOrThrow(int fd) {
// Apparently windows doesn't have fsync?
// Apparently windows doesn't have fsync?
#if !defined(_WIN32) && !defined(_WIN64)
UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
#endif
Expand All @@ -248,7 +248,7 @@ template <> struct CheckOffT<8> {
typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
#endif

// Can't we all just get along?
// Can't we all just get along?
void InternalSeek(int fd, int64_t off, int whence) {
if (
#if defined(_WIN32) || defined(_WIN64)
Expand Down Expand Up @@ -457,9 +457,9 @@ bool TryName(int fd, std::string &out) {
std::ostringstream convert;
convert << fd;
name += convert.str();

struct stat sb;
if (-1 == lstat(name.c_str(), &sb))
if (-1 == lstat(name.c_str(), &sb))
return false;
out.resize(sb.st_size + 1);
ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
Expand All @@ -471,7 +471,7 @@ bool TryName(int fd, std::string &out) {
}
out.resize(ret);
// Don't use the non-file names.
if (!out.empty() && out[0] != '/')
if (!out.empty() && out[0] != '/')
return false;
return true;
#endif
Expand Down
25 changes: 15 additions & 10 deletions util/proxy_iterator.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

/* This is a RandomAccessIterator that uses a proxy to access the underlying
* data. Useful for packing data at bit offsets but still using STL
* algorithms.
* algorithms.
*
* Normally I would use boost::iterator_facade but some people are too lazy to
* install boost and still want to use my language model. It's amazing how
* many operators an iterator has.
* many operators an iterator has.
*
* The Proxy needs to provide:
* class InnerIterator;
Expand All @@ -22,32 +22,37 @@
* operator<(InnerIterator)
* operator+=(std::ptrdiff_t)
* operator-(InnerIterator)
* and of course whatever Proxy needs to dereference it.
* and of course whatever Proxy needs to dereference it.
*
* It's also a good idea to specialize std::swap for Proxy.
* It's also a good idea to specialize std::swap for Proxy.
*/

namespace util {
template <class Proxy> class ProxyIterator {
private:
// Self.
// Self.
typedef ProxyIterator<Proxy> S;
typedef typename Proxy::InnerIterator InnerIterator;

public:
typedef std::random_access_iterator_tag iterator_category;
typedef typename Proxy::value_type value_type;
typedef std::ptrdiff_t difference_type;
typedef Proxy reference;
typedef Proxy & reference;
typedef Proxy * pointer;

ProxyIterator() {}

// For cast from non const to const.
// For cast from non const to const.
template <class AlternateProxy> ProxyIterator(const ProxyIterator<AlternateProxy> &in) : p_(*in) {}
explicit ProxyIterator(const Proxy &p) : p_(p) {}

// p_'s operator= does value copying, but here we want iterator copying.
// p_'s swap does value swapping, but here we want iterator swapping
friend inline void swap(ProxyIterator<Proxy> &first, ProxyIterator<Proxy> &second) {
swap(first.I(), second.I());
}

// p_'s operator= does value copying, but here we want iterator copying.
S &operator=(const S &other) {
I() = other.I();
return *this;
Expand All @@ -72,8 +77,8 @@ template <class Proxy> class ProxyIterator {

std::ptrdiff_t operator-(const S &other) const { return I() - other.I(); }

Proxy operator*() { return p_; }
const Proxy operator*() const { return p_; }
Proxy &operator*() { return p_; }
const Proxy &operator*() const { return p_; }
Proxy *operator->() { return &p_; }
const Proxy *operator->() const { return &p_; }
Proxy operator[](std::ptrdiff_t amount) const { return *(*this + amount); }
Expand Down
21 changes: 18 additions & 3 deletions util/sized_iterator.hh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ class SizedInnerIterator {
void *Data() { return ptr_; }
std::size_t EntrySize() const { return size_; }

friend inline void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
std::swap(first.ptr_, second.ptr_);
std::swap(first.size_, second.size_);
}

private:
uint8_t *ptr_;
std::size_t size_;
Expand Down Expand Up @@ -64,9 +69,19 @@ class SizedProxy {
const void *Data() const { return inner_.Data(); }
void *Data() { return inner_.Data(); }

/**
// TODO: this (deep) swap was recently added. why? if any std heap sort etc
// algs are using swap, that's going to be worse performance than using
// =. i'm not sure why we *want* a deep swap. if C++11 compilers are
// choosing between move constructor and swap, then we'd better implement a
// (deep) move constructor. it may also be that this is moot since i made
// ProxyIterator a reference and added a shallow ProxyIterator swap? (I
// need Ken or someone competent to judge whether that's correct also. -
// let me know at [email protected]
*/
friend void swap(SizedProxy &first, SizedProxy &second) {
std::swap_ranges(
static_cast<char*>(first.inner_.Data()),
static_cast<char*>(first.inner_.Data()),
static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
static_cast<char*>(second.inner_.Data()));
}
Expand All @@ -87,7 +102,7 @@ typedef ProxyIterator<SizedProxy> SizedIterator;

inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); }

// Useful wrapper for a comparison function i.e. sort.
// Useful wrapper for a comparison function i.e. sort.
template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> {
public:
explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {}
Expand All @@ -106,7 +121,7 @@ template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public
}

const Delegate &GetDelegate() const { return delegate_; }

private:
const Delegate delegate_;
};
Expand Down

0 comments on commit b4759be

Please sign in to comment.