Skip to content

Commit

Permalink
Remove trailing spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
kpu committed May 19, 2015
1 parent 022739d commit 8e659b2
Show file tree
Hide file tree
Showing 112 changed files with 597 additions and 597 deletions.
6 changes: 3 additions & 3 deletions lm/bhiksha.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ namespace lm {
namespace ngram {
namespace trie {

DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
next_(util::BitsMask::ByMax(max_next)) {}

const uint8_t kArrayBhikshaVersion = 0;

// TODO: put this in binary file header instead when I change the binary file format again.
// TODO: put this in binary file header instead when I change the binary file format again.
void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
uint8_t buffer[2];
file.ReadForConfig(buffer, 2, offset);
Expand All @@ -33,7 +33,7 @@ uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
uint8_t required = util::RequiredBits(max_next);
uint8_t best_chop = 0;
int64_t lowest_change = std::numeric_limits<int64_t>::max();
// There are probably faster ways but I don't care because this is only done once per order at construction time.
// There are probably faster ways but I don't care because this is only done once per order at construction time.
for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
int64_t change = (max_next >> (required - chop)) * 64 /* table cost in bits */
- max_offset * static_cast<int64_t>(chop); /* savings in bits*/
Expand Down
6 changes: 3 additions & 3 deletions lm/bhiksha.hh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* pages={388--391},
* }
*
* Currently only used for next pointers.
* Currently only used for next pointers.
*/

#ifndef LM_BHIKSHA_H
Expand Down Expand Up @@ -86,9 +86,9 @@ class ArrayBhiksha {
// assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1));
--end_it;
// assert(end_it >= begin_it);
out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
out.end = ((end_it - offset_begin_) << next_inline_.bits) |
out.end = ((end_it - offset_begin_) << next_inline_.bits) |
util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
// If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052
assert(out.end >= out.begin);
Expand Down
2 changes: 1 addition & 1 deletion lm/binary_format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void MatchCheck(ModelType model_type, unsigned int search_version, const Paramet

const std::size_t kInvalidSize = static_cast<std::size_t>(-1);

BinaryFormat::BinaryFormat(const Config &config)
BinaryFormat::BinaryFormat(const Config &config)
: write_method_(config.write_method), write_mmap_(config.write_mmap), load_method_(config.load_method),
header_size_(kInvalidSize), vocab_size_(kInvalidSize), vocab_string_offset_(kInvalidOffset) {}

Expand Down
12 changes: 6 additions & 6 deletions lm/binary_format.hh
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,26 @@ namespace ngram {

extern const char *kModelNames[6];

/*Inspect a file to determine if it is a binary lm. If not, return false.
/*Inspect a file to determine if it is a binary lm. If not, return false.
* If so, return true and set recognized to the type. This is the only API in
* this header designed for use by decoder authors.
* this header designed for use by decoder authors.
*/
bool RecognizeBinary(const char *file, ModelType &recognized);

struct FixedWidthParameters {
unsigned char order;
float probing_multiplier;
// What type of model is this?
// What type of model is this?
ModelType model_type;
// Does the end of the file have the actual strings in the vocabulary?
// Does the end of the file have the actual strings in the vocabulary?
bool has_vocabulary;
unsigned int search_version;
};

// This is a macro instead of an inline function so constants can be assigned using it.
#define ALIGN8(a) ((std::ptrdiff_t(((a)-1)/8)+1)*8)

// Parameters stored in the header of a binary file.
// Parameters stored in the header of a binary file.
struct Parameters {
FixedWidthParameters fixed;
std::vector<uint64_t> counts;
Expand Down Expand Up @@ -79,7 +79,7 @@ class BinaryFormat {
const char *write_mmap_;
util::LoadMethod load_method_;

// File behind memory, if any.
// File behind memory, if any.
util::scoped_fd file_;

// If there is a file involved, a single mapping.
Expand Down
6 changes: 3 additions & 3 deletions lm/blank.hh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ namespace ngram {
* kNoExtensionBackoff. If the n-gram might be extended, then out_state must
* contain the full n-gram, in which case kExtensionBackoff is set. In any
* case, if an n-gram has non-zero backoff, the full state is returned so
* backoff can be properly charged.
* backoff can be properly charged.
* These differ only in sign bit because the backoff is in fact zero in either
* case.
* case.
*/
const float kNoExtensionBackoff = -0.0;
const float kExtensionBackoff = 0.0;
Expand All @@ -28,7 +28,7 @@ inline void SetExtension(float &backoff) {
if (backoff == kNoExtensionBackoff) backoff = kExtensionBackoff;
}

// This compiles down nicely.
// This compiles down nicely.
inline bool HasExtension(const float &backoff) {
typedef union { float f; uint32_t i; } UnionValue;
UnionValue compare, interpret;
Expand Down
2 changes: 1 addition & 1 deletion lm/build_binary_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void Usage(const char *name, const char *default_mem) {
exit(1);
}

// I could really use boost::lexical_cast right about now.
// I could really use boost::lexical_cast right about now.
float ParseFloat(const char *from) {
char *end;
float ret = strtod(from, &end);
Expand Down
8 changes: 4 additions & 4 deletions lm/builder/adjust_counts.hh
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ struct DiscountConfig {
WarningAction bad_action;
};

/* Compute adjusted counts.
/* Compute adjusted counts.
* Input: unique suffix sorted N-grams (and just the N-grams) with raw counts.
* Output: [1,N]-grams with adjusted counts.
* Output: [1,N]-grams with adjusted counts.
* [1,N)-grams are in suffix order
* N-grams are in undefined order (they're going to be sorted anyway).
*/
Expand All @@ -50,13 +50,13 @@ class AdjustCounts {
const DiscountConfig &discount_config,
std::vector<Discount> &discounts)
: prune_thresholds_(prune_thresholds), counts_(counts), counts_pruned_(counts_pruned),
prune_words_(prune_words), discount_config_(discount_config), discounts_(discounts)
prune_words_(prune_words), discount_config_(discount_config), discounts_(discounts)
{}

void Run(const util::stream::ChainPositions &positions);

private:
const std::vector<uint64_t> &prune_thresholds_;
const std::vector<uint64_t> &prune_thresholds_;
std::vector<uint64_t> &counts_;
std::vector<uint64_t> &counts_pruned_;
const std::vector<bool> &prune_words_;
Expand Down
2 changes: 1 addition & 1 deletion lm/builder/adjust_counts_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ BOOST_AUTO_TEST_CASE(Simple) {
}
BOOST_REQUIRE_EQUAL(4UL, counts.size());
BOOST_CHECK_EQUAL(4UL, counts[0]);
// These are no longer set because the discounts are bad.
// These are no longer set because the discounts are bad.
/* BOOST_CHECK_EQUAL(4UL, counts[1]);
BOOST_CHECK_EQUAL(3UL, counts[2]);
BOOST_CHECK_EQUAL(3UL, counts[3]);*/
Expand Down
2 changes: 1 addition & 1 deletion lm/builder/combine_counts.hh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace builder {
struct CombineCounts {
bool operator()(void *first_void, const void *second_void, const SuffixOrder &compare) const {
NGram<BuildingPayload> first(first_void, compare.Order());
// There isn't a const version of NGram.
// There isn't a const version of NGram.
NGram<BuildingPayload> second(const_cast<void*>(second_void), compare.Order());
if (memcmp(first.begin(), second.begin(), sizeof(WordIndex) * compare.Order())) return false;
first.Value().count += second.Value().count;
Expand Down
28 changes: 14 additions & 14 deletions lm/builder/corpus_count.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
std::size_t operator()(const WordIndex *start) const {
return util::MurmurHashNative(start, size_);
}

private:
const std::size_t size_;
};

class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
public:
explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}

bool operator()(const WordIndex *first, const WordIndex *second) const {
return !memcmp(first, second, size_);
}
}

private:
const std::size_t size_;
};
Expand All @@ -70,7 +70,7 @@ typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;

class Writer {
public:
Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
: block_(position), gram_(block_->Get(), order),
dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
Expand All @@ -79,7 +79,7 @@ class Writer {
dedupe_.Clear();
assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
if (order == 1) {
// Add special words. AdjustCounts is responsible if order != 1.
// Add special words. AdjustCounts is responsible if order != 1.
AddUnigramWord(kUNK);
AddUnigramWord(kBOS);
}
Expand Down Expand Up @@ -109,16 +109,16 @@ class Writer {
memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
return;
}
// Complete the write.
// Complete the write.
gram_.Value().count = 1;
// Prepare the next n-gram.
// Prepare the next n-gram.
if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
NGram<BuildingPayload> last(gram_);
gram_.NextInMemory();
std::copy(last.begin() + 1, last.end(), gram_.begin());
return;
}
// Block end. Need to store the context in a temporary buffer.
// Block end. Need to store the context in a temporary buffer.
std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
dedupe_.Clear();
block_->SetValidSize(block_size_);
Expand Down Expand Up @@ -146,7 +146,7 @@ class Writer {
// Hash table combiner implementation.
Dedupe dedupe_;

// Small buffer to hold existing ngrams when shifting across a block boundary.
// Small buffer to hold existing ngrams when shifting across a block boundary.
boost::scoped_array<WordIndex> buffer_;

const std::size_t block_size_;
Expand Down Expand Up @@ -212,25 +212,25 @@ void CorpusCount::Run(const util::stream::ChainPosition &position) {
} catch (const util::EndOfFileException &e) {}
token_count_ = count;
type_count_ = vocab.Size();

// Create list of unigrams that are supposed to be pruned
if (!prune_vocab_filename_.empty()) {
try {
util::FilePiece prune_vocab_file(prune_vocab_filename_.c_str());

prune_words_.resize(vocab.Size(), true);
try {
while (true) {
StringPiece word(prune_vocab_file.ReadDelimited(delimiters));
prune_words_[vocab.Index(word)] = false;
}
} catch (const util::EndOfFileException &e) {}

// Never prune <unk>, <s>, </s>
prune_words_[kUNK] = false;
prune_words_[kBOS] = false;
prune_words_[kEOS] = false;

} catch (const util::Exception &e) {
std::cerr << e.what() << std::endl;
abort();
Expand Down
2 changes: 1 addition & 1 deletion lm/builder/corpus_count.hh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class CorpusCount {
uint64_t &token_count_;
WordIndex &type_count_;
std::vector<bool>& prune_words_;
const std::string& prune_vocab_filename_;
const std::string& prune_vocab_filename_;

std::size_t dedupe_mem_size_;
util::scoped_malloc dedupe_mem_;
Expand Down
Loading

0 comments on commit 8e659b2

Please sign in to comment.