Skip to content

Commit

Permalink
Remove trailing spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
kpu committed Feb 23, 2016
1 parent 58c829e commit 0ae2150
Show file tree
Hide file tree
Showing 18 changed files with 46 additions and 46 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)

# We need boost
find_package(Boost 1.36.0 REQUIRED COMPONENTS
program_options
program_options
system
thread
unit_test_framework
Expand All @@ -36,7 +36,7 @@ find_package(Boost 1.36.0 REQUIRED COMPONENTS

# Define where include files live
include_directories(
${PROJECT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}
${Boost_INCLUDE_DIRS}
)

Expand Down
4 changes: 2 additions & 2 deletions lm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
# that should be included in the kenlm library,
# (this excludes any unit test files)
# you should add them to the following list:
set(KENLM_LM_SOURCE
set(KENLM_LM_SOURCE
bhiksha.cc
binary_format.cc
config.cc
Expand All @@ -31,7 +31,7 @@ set(KENLM_LM_SOURCE
)


# Group these objects together for later use.
# Group these objects together for later use.
#
# Given add_library(foo OBJECT ${my_foo_sources}),
# refer to these objects as $<TARGET_OBJECTS:foo>
Expand Down
4 changes: 2 additions & 2 deletions lm/builder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# in case this variable is referenced by CMake files in the parent directory,
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
#
set(KENLM_BUILDER_SOURCE
set(KENLM_BUILDER_SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/adjust_counts.cc
${CMAKE_CURRENT_SOURCE_DIR}/corpus_count.cc
${CMAKE_CURRENT_SOURCE_DIR}/initial_probabilities.cc
Expand All @@ -21,7 +21,7 @@ set(KENLM_BUILDER_SOURCE
)


# Group these objects together for later use.
# Group these objects together for later use.
#
# Given add_library(foo OBJECT ${my_foo_sources}),
# refer to these objects as $<TARGET_OBJECTS:foo>
Expand Down
2 changes: 1 addition & 1 deletion lm/common/model_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const char kMetadataHeader[] = "KenLM intermediate binary file";
ModelBuffer::ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q)
: file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer), output_q_(output_q),
vocab_file_(keep_buffer ? util::CreateOrThrow((file_base_ + ".vocab").c_str()) : util::MakeTemp(file_base_)) {}

ModelBuffer::ModelBuffer(StringPiece file_base)
: file_base_(file_base.data(), file_base.size()), keep_buffer_(false) {
const std::string full_name = file_base_ + ".kenlm_intermediate";
Expand Down
4 changes: 2 additions & 2 deletions lm/filter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
# in case this variable is referenced by CMake files in the parent directory,
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
#
set(KENLM_FILTER_SOURCE
set(KENLM_FILTER_SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/arpa_io.cc
${CMAKE_CURRENT_SOURCE_DIR}/phrase.cc
${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc
)


# Group these objects together for later use.
# Group these objects together for later use.
#
# Given add_library(foo OBJECT ${my_foo_sources}),
# refer to these objects as $<TARGET_OBJECTS:foo>
Expand Down
2 changes: 1 addition & 1 deletion lm/filter/arpa_io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) {
return true;
}

ARPAOutput::ARPAOutput(const char *name, size_t buffer_size)
ARPAOutput::ARPAOutput(const char *name, size_t buffer_size)
: file_backing_(util::CreateOrThrow(name)), file_(file_backing_.get(), buffer_size) {}

void ARPAOutput::ReserveForCounts(std::streampos reserve) {
Expand Down
2 changes: 1 addition & 1 deletion lm/interpolate/tune_derivatives.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Accum Derivatives(Instances &in, const Vector &weights, Vector &gradient, Matrix
- interp_uni(word) * Z_epsilon * weighted_backoffs * ln_p_i_backed * ln_p_i_backed.transpose();
}

Accum Z_context =
Accum Z_context =
weighted_backoffs * Z_epsilon * (1.0 - sum_x_p_I) // Back off and unnormalize the unigrams for which there is no extension.
+ unnormalized_sum_x_p_I_full; // Add the extensions.
sum_ln_Z_context += log(Z_context);
Expand Down
2 changes: 1 addition & 1 deletion lm/interpolate/tune_derivatives_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace {

BOOST_AUTO_TEST_CASE(Small) {
MockInstances mock;

{
// Three vocabulary words plus <s>, two models.
Matrix unigrams(4, 2);
Expand Down
16 changes: 8 additions & 8 deletions lm/interpolate/tune_instances.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* instance is an n-gram in the tuning file. To tune towards these, we want
* the correct probability p_i(w_n | w_1^{n-1}) from each model as well as
* all the denominators p_i(v | w_1^{n-1}) that appear in normalization.
*
*
* In other words, we filter the models to only those n-grams whose context
* appears in the tuning data. This can be divided into two categories:
* - All unigrams. This goes into Instances::ln_unigrams_
Expand All @@ -11,7 +11,7 @@
* w_1^{n-1}v since that is what will be used for the probability.
* Because there is a large number of extensions (we tried keeping them in RAM
* and ran out), the streaming framework is used to keep track of extensions
* and sort them so they can be streamed in. Downstream code
* and sort them so they can be streamed in. Downstream code
* (tune_derivatives.hh) takes a stream of extensions ordered by tuning
* instance, the word v, and the model the extension came from.
*/
Expand Down Expand Up @@ -186,7 +186,7 @@ class JointOrderCallback {
std::size_t full_order_minus_1,
ContextMap &contexts,
util::stream::Stream &out,
const InstanceMatch *base_instance)
const InstanceMatch *base_instance)
: full_order_minus_1_(full_order_minus_1),
contexts_(contexts),
out_(out),
Expand Down Expand Up @@ -231,13 +231,13 @@ class JointOrderCallback {
// Mapping is constant but values are being manipulated to tell them about
// n-grams.
ContextMap &contexts_;

// Reused variable. model is set correctly.
InitialExtension ext_;

util::stream::Stream &out_;
const InstanceMatch *const base_instance_;

const InstanceMatch *const base_instance_;
};

// This populates the ln_unigrams_ matrix. It can (and should for efficiency)
Expand All @@ -247,7 +247,7 @@ class ReadUnigrams {
explicit ReadUnigrams(Matrix::ColXpr out) : out_(out) {}

// Read renumbered unigrams, fill with <unk> otherwise.
void Run(const util::stream::ChainPosition &position) {
void Run(const util::stream::ChainPosition &position) {
NGramStream<ProbBackoff> stream(position);
assert(stream);
Accum unk = stream->Value().prob * M_LN10;
Expand Down Expand Up @@ -315,7 +315,7 @@ class IdentifyTuning : public EnumerateVocab {
// Store information about the first iteration.
class ExtensionsFirstIteration {
public:
explicit ExtensionsFirstIteration(std::size_t instances, std::size_t models, std::size_t max_order, util::stream::Chain &extension_input, const util::stream::SortConfig &config)
explicit ExtensionsFirstIteration(std::size_t instances, std::size_t models, std::size_t max_order, util::stream::Chain &extension_input, const util::stream::SortConfig &config)
: backoffs_by_instance_(new std::vector<Matrix>(instances)), sort_(extension_input, config) {
// Initialize all the backoff matrices to zeros.
for (std::vector<Matrix>::iterator i = backoffs_by_instance_->begin(); i != backoffs_by_instance_->end(); ++i) {
Expand Down
2 changes: 1 addition & 1 deletion lm/interpolate/tune_instances.hh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class Instances {

// backoffs_(instance, model) is the backoff all the way to unigrams.
BackoffMatrix ln_backoffs_;

// neg_correct_sum_(model) = -\sum_{instances} ln p_{model}(correct(instance) | context(instance)).
// This appears as a term in the gradient.
Vector neg_ln_correct_sum_;
Expand Down
8 changes: 4 additions & 4 deletions lm/interpolate/tune_instances_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ BOOST_AUTO_TEST_CASE(Toy) {

BOOST_CHECK_EQUAL(1, inst.BOS());
const Matrix &ln_unigrams = inst.LNUnigrams();

// <unk>=0
BOOST_CHECK_CLOSE(-0.90309 * M_LN10, ln_unigrams(0, 0), 0.001);
BOOST_CHECK_CLOSE(-1 * M_LN10, ln_unigrams(0, 1), 0.001);
Expand All @@ -64,15 +64,15 @@ BOOST_AUTO_TEST_CASE(Toy) {
BOOST_CHECK_CLOSE(-0.90309 * M_LN10, ln_unigrams(4, 0), 0.001); // <unk>
BOOST_CHECK_CLOSE(-0.7659168 * M_LN10, ln_unigrams(4, 1), 0.001);
// too lazy to do b = 5.

// Two instances:
// <s> predicts c
// <s> c predicts </s>
BOOST_REQUIRE_EQUAL(2, inst.NumInstances());
BOOST_CHECK_CLOSE(-0.30103 * M_LN10, inst.LNBackoffs(0)(0), 0.001);
BOOST_CHECK_CLOSE(-0.30103 * M_LN10, inst.LNBackoffs(0)(1), 0.001);


// Backoffs of <s> c
BOOST_CHECK_CLOSE(0.0, inst.LNBackoffs(1)(0), 0.001);
BOOST_CHECK_CLOSE((-0.30103 - 0.30103) * M_LN10, inst.LNBackoffs(1)(1), 0.001);
Expand All @@ -88,7 +88,7 @@ BOOST_AUTO_TEST_CASE(Toy) {
// <s> b from model 0
// c </s> from model 1
// Magic probabilities come from querying the models directly.

// <s> a from model 0
BOOST_REQUIRE(stream);
BOOST_CHECK_EQUAL(0, stream->instance);
Expand Down
2 changes: 1 addition & 1 deletion lm/interpolate/tune_matrix.hh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#pragma GCC diagnostic pop

namespace lm { namespace interpolate {

typedef Eigen::MatrixXf Matrix;
typedef Eigen::VectorXf Vector;

Expand Down
26 changes: 13 additions & 13 deletions util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,30 @@
# Because we do not set PARENT_SCOPE in the following definition,
# CMake files in the parent directory won't be able to access this variable.
#
set(KENLM_UTIL_SOURCE
bit_packing.cc
ersatz_progress.cc
exception.cc
file.cc
file_piece.cc
set(KENLM_UTIL_SOURCE
bit_packing.cc
ersatz_progress.cc
exception.cc
file.cc
file_piece.cc
float_to_string.cc
integer_to_string.cc
mmap.cc
murmur_hash.cc
mmap.cc
murmur_hash.cc
parallel_read.cc
pool.cc
read_compressed.cc
scoped.cc
pool.cc
read_compressed.cc
scoped.cc
spaces.cc
string_piece.cc
string_piece.cc
usage.cc
)

# This directory has children that need to be processed
add_subdirectory(double-conversion)
add_subdirectory(stream)

# Group these objects together for later use.
# Group these objects together for later use.
add_library(kenlm_util ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL_STREAM_SOURCE} ${KENLM_UTIL_SOURCE})

AddExes(EXES probing_hash_table_benchmark
Expand Down
2 changes: 1 addition & 1 deletion util/double-conversion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# when this variable is referenced by CMake files in the parent directory,
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
#
set(KENLM_UTIL_DOUBLECONVERSION_SOURCE
set(KENLM_UTIL_DOUBLECONVERSION_SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/bignum-dtoa.cc
${CMAKE_CURRENT_SOURCE_DIR}/bignum.cc
${CMAKE_CURRENT_SOURCE_DIR}/cached-powers.cc
Expand Down
6 changes: 3 additions & 3 deletions util/fake_ostream.hh
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ namespace util {

/* Like std::ostream but without being incredibly slow.
* Supports most of the built-in types except for long double.
*
*
* The FakeOStream class is intended to be inherited from. The inherting class
* should provide:
* public:
* Derived &flush();
* Derived &write(const void *data, std::size_t length);
*
*
* private: or protected:
* friend class FakeOStream;
* char *Ensure(std::size_t amount);
Expand All @@ -29,7 +29,7 @@ namespace util {
* The Ensure function makes enough space for an in-place write and returns
* where to write. The AdvanceTo function happens after the write, saying how
* much was actually written.
*
*
* Precondition:
* amount <= kToStringMaxBytes for in-place writes.
*/
Expand Down
2 changes: 1 addition & 1 deletion util/file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ std::size_t PartialRead(int fd, void *to, std::size_t amount) {
DWORD ret;
HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
DWORD larger_size = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, amount));
DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size.
DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size.
if (!ReadFile(file_handle, to, larger_size, &ret, NULL))
{
DWORD last_error = GetLastError();
Expand Down
2 changes: 1 addition & 1 deletion util/pool.hh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Pool {
void *new_base = More(new_total);
std::memcpy(new_base, base, new_total - additional);
base = new_base;
}
}
}

void FreeAll();
Expand Down
2 changes: 1 addition & 1 deletion util/stream/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# when this variable is referenced by CMake files in the parent directory,
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
#
set(KENLM_UTIL_STREAM_SOURCE
set(KENLM_UTIL_STREAM_SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/chain.cc
${CMAKE_CURRENT_SOURCE_DIR}/count_records.cc
${CMAKE_CURRENT_SOURCE_DIR}/io.cc
Expand Down

0 comments on commit 0ae2150

Please sign in to comment.