Skip to content

Commit

Permalink
Add error message for gzipped files.
Browse files Browse the repository at this point in the history
git-svn-id: file:///dev/shm/somefilter.svn@417 e102df66-1e2e-11dd-9b44-c24451a4db5e
  • Loading branch information
kpu committed Oct 22, 2010
1 parent 6c1f4a3 commit 321ed13
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
12 changes: 11 additions & 1 deletion lm/read_arpa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) {
template <class F> void GenericReadARPACounts(F &in, std::vector<uint64_t> &number) {
number.clear();
StringPiece line;
if (!IsEntirelyWhiteSpace(line = in.ReadLine())) UTIL_THROW(FormatLoadException, "First line was \"" << line << "\" not blank");
if (!IsEntirelyWhiteSpace(line = in.ReadLine())) {
if ((line.size() >= 2) && (line.data()[0] == 0x1f) && (static_cast<unsigned char>(line.data()[1]) == 0x8b)) {
UTIL_THROW(FormatLoadException, "Looks like a gzip file. If this is an ARPA file, run\nzcat " << in.FileName() << " |kenlm/build_binary /dev/stdin " << in.FileName() << ".binary\nIf this already in binary format, you need to decompress it because mmap doesn't work on top of gzip.");
}
UTIL_THROW(FormatLoadException, "First line was \"" << static_cast<int>(line.data()[1]) << "\" not blank");
}
if ((line = in.ReadLine()) != "\\data\\") UTIL_THROW(FormatLoadException, "second line was \"" << line << "\" not \\data\\.");
while (!IsEntirelyWhiteSpace(line = in.ReadLine())) {
if (line.size() < 6 || strncmp(line.data(), "ngram ", 6)) UTIL_THROW(FormatLoadException, "count line \"" << line << "\"doesn't begin with \"ngram \"");
Expand Down Expand Up @@ -72,6 +77,11 @@ class FakeFilePiece {
return ret;
}

const char *FileName() const {
// This only used for error messages and we don't know the file name. . .
return "$file";
}

private:
std::istream &in_;
std::string buffer_;
Expand Down
1 change: 1 addition & 0 deletions util/file_piece.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ FilePiece::FilePiece(const char *name, int fd, std::ostream *show_progress, off_
}

void FilePiece::Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) {
file_name_ = name;
if (total_size_ == kBadSize) {
fallback_to_read_ = true;
if (show_progress)
Expand Down
4 changes: 4 additions & 0 deletions util/file_piece.hh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class FilePiece {
void ForceFallbackToRead() {
fallback_to_read_ = true;
}

const std::string &FileName() const { return file_name_; }

private:
void Initialize(const char *name, std::ostream *show_progress, off_t min_buffer);
Expand Down Expand Up @@ -98,6 +100,8 @@ class FilePiece {
bool fallback_to_read_;

ErsatzProgress progress_;

std::string file_name_;
};

} // namespace util
Expand Down

0 comments on commit 321ed13

Please sign in to comment.