Skip to content

Commit

Permalink
Change ARPA output to use FakeOFStream
Browse files Browse the repository at this point in the history
  • Loading branch information
kpu committed Sep 27, 2015
1 parent 3ea15ca commit 235f607
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 66 deletions.
62 changes: 15 additions & 47 deletions lm/filter/arpa_io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,8 @@ ARPAInputException::ARPAInputException(const StringPiece &message, const StringP

ARPAInputException::~ARPAInputException() throw() {}

ARPAOutputException::ARPAOutputException(const char *message, const std::string &file_name) throw() {
*this << message << " in file " << file_name;
}

ARPAOutputException::~ARPAOutputException() throw() {}

// Seeking is the responsibility of the caller.
void WriteCounts(std::ostream &out, const std::vector<uint64_t> &number) {
template <class Stream> void WriteCounts(Stream &out, const std::vector<uint64_t> &number) {
out << "\n\\data\\\n";
for (unsigned int i = 0; i < number.size(); ++i) {
out << "ngram " << i+1 << "=" << number[i] << '\n';
Expand All @@ -38,9 +32,10 @@ void WriteCounts(std::ostream &out, const std::vector<uint64_t> &number) {
}

size_t SizeNeededForCounts(const std::vector<uint64_t> &number) {
std::ostringstream buf;
WriteCounts(buf, number);
return buf.tellp();
std::string buf;
util::FakeSStream stream(buf);
WriteCounts(stream, number);
return buf.size();
}

bool IsEntirelyWhiteSpace(const StringPiece &line) {
Expand All @@ -50,59 +45,32 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) {
return true;
}

ARPAOutput::ARPAOutput(const char *name, size_t buffer_size) : file_name_(name), buffer_(new char[buffer_size]) {
try {
file_.exceptions(std::ostream::eofbit | std::ostream::failbit | std::ostream::badbit);
if (!file_.rdbuf()->pubsetbuf(buffer_.get(), buffer_size)) {
std::cerr << "Warning: could not enlarge buffer for " << name << std::endl;
buffer_.reset();
}
file_.open(name, std::ios::out | std::ios::binary);
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Opening", file_name_);
}
}
ARPAOutput::ARPAOutput(const char *name, size_t buffer_size)
: file_backing_(util::CreateOrThrow(name)), file_(file_backing_.get(), buffer_size) {}

void ARPAOutput::ReserveForCounts(std::streampos reserve) {
try {
for (std::streampos i = 0; i < reserve; i += std::streampos(1)) {
file_ << '\n';
}
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing blanks to reserve space for counts to ", file_name_);
for (std::streampos i = 0; i < reserve; i += std::streampos(1)) {
file_ << '\n';
}
}

void ARPAOutput::BeginLength(unsigned int length) {
fast_counter_ = 0;
try {
file_ << '\\' << length << "-grams:" << '\n';
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing n-gram header to ", file_name_);
}
file_ << '\\' << length << "-grams:" << '\n';
}

void ARPAOutput::EndLength(unsigned int length) {
try {
file_ << '\n';
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing blank at end of count list to ", file_name_);
}
file_ << '\n';
if (length > counts_.size()) {
counts_.resize(length);
}
counts_[length - 1] = fast_counter_;
}

void ARPAOutput::Finish() {
try {
file_ << "\\end\\\n";
file_.seekp(0);
WriteCounts(file_, counts_);
file_ << std::flush;
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Finishing including writing counts at beginning to ", file_name_);
}
file_ << "\\end\\\n";
file_.seekp(0);
WriteCounts(file_, counts_);
file_.flush();
}

} // namespace lm
23 changes: 4 additions & 19 deletions lm/filter/arpa_io.hh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
#include "lm/read_arpa.hh"
#include "util/exception.hh"
#include "util/fake_ofstream.hh"
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"

Expand All @@ -28,17 +29,6 @@ class ARPAInputException : public util::Exception {
virtual ~ARPAInputException() throw();
};

class ARPAOutputException : public util::ErrnoException {
public:
ARPAOutputException(const char *prefix, const std::string &file_name) throw();
virtual ~ARPAOutputException() throw();

const std::string &File() const throw() { return file_name_; }

private:
const std::string file_name_;
};

// Handling for the counts of n-grams at the beginning of ARPA files.
size_t SizeNeededForCounts(const std::vector<uint64_t> &number);

Expand All @@ -55,11 +45,7 @@ class ARPAOutput : boost::noncopyable {
void BeginLength(unsigned int length);

void AddNGram(const StringPiece &line) {
try {
file_ << line << '\n';
} catch (const std::ios_base::failure &f) {
throw ARPAOutputException("Writing an n-gram", file_name_);
}
file_ << line << '\n';
++fast_counter_;
}

Expand All @@ -76,9 +62,8 @@ class ARPAOutput : boost::noncopyable {
void Finish();

private:
const std::string file_name_;
boost::scoped_array<char> buffer_;
std::fstream file_;
util::scoped_fd file_backing_;
util::FakeOFStream file_;
size_t fast_counter_;
std::vector<uint64_t> counts_;
};
Expand Down

0 comments on commit 235f607

Please sign in to comment.