Skip to content

Commit

Permalink
Make use of portable uint64_t type to make possible file access
Browse files Browse the repository at this point in the history
  in 64-bit.

  Currently, a signed off_t type is being used for the following
  interfaces for both offset and the length in bytes:
  * `Allocate`
  * `RangeSync`

  On Linux `off_t` is automatically either 32 or 64-bit depending on
  the platform. On Windows it is always a 32-bit signed long which
  limits file access and in particular space pre-allocation
  to effectively 2 Gb.

  Proposal is to replace off_t with uint64_t as a portable type
  always access files with 64-bit interfaces.

  May need to modify posix code but lack resources to test it.
  • Loading branch information
yuslepukhin committed Nov 11, 2015
1 parent 505accd commit 5270b33
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 29 deletions.
12 changes: 6 additions & 6 deletions include/rocksdb/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ class WritableFile {
// This asks the OS to initiate flushing the cached data to disk,
// without waiting for completion.
// Default implementation does nothing.
virtual Status RangeSync(off_t offset, off_t nbytes) { return Status::OK(); }
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) { return Status::OK(); }

// PrepareWrite performs any necessary preparation for a write
// before the write actually occurs. This allows for pre-allocation
Expand All @@ -590,8 +590,8 @@ class WritableFile {
if (new_last_preallocated_block > last_preallocated_block_) {
size_t num_spanned_blocks =
new_last_preallocated_block - last_preallocated_block_;
Allocate(static_cast<off_t>(block_size * last_preallocated_block_),
static_cast<off_t>(block_size * num_spanned_blocks));
Allocate(block_size * last_preallocated_block_,
block_size * num_spanned_blocks);
last_preallocated_block_ = new_last_preallocated_block;
}
}
Expand All @@ -600,7 +600,7 @@ class WritableFile {
/*
* Pre-allocate space for a file.
*/
virtual Status Allocate(off_t offset, off_t len) {
virtual Status Allocate(uint64_t offset, uint64_t len) {
return Status::OK();
}

Expand Down Expand Up @@ -920,10 +920,10 @@ class WritableFileWrapper : public WritableFile {
}

protected:
Status Allocate(off_t offset, off_t len) override {
Status Allocate(uint64_t offset, uint64_t len) override {
return target_->Allocate(offset, len);
}
Status RangeSync(off_t offset, off_t nbytes) override {
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
return target_->RangeSync(offset, nbytes);
}

Expand Down
10 changes: 2 additions & 8 deletions port/win/env_win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,6 @@ ThreadStatusUpdater* CreateThreadStatusUpdater() {
return new ThreadStatusUpdater();
}

// A wrapper for fadvise, if the platform doesn't support fadvise,
// it will simply return Status::NotSupport.
int Fadvise(int fd, off_t offset, size_t len, int advice) {
return 0; // simply do nothing.
}

inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
return Status::IOError(context, GetWindowsErrSz(err));
}
Expand Down Expand Up @@ -605,7 +599,7 @@ class WinMmapFile : public WritableFile {
return Status::OK();
}

virtual Status Allocate(off_t offset, off_t len) override {
virtual Status Allocate(uint64_t offset, uint64_t len) override {
return Status::OK();
}
};
Expand Down Expand Up @@ -1053,7 +1047,7 @@ class WinWritableFile : public WritableFile {
return filesize_;
}

virtual Status Allocate(off_t offset, off_t len) override {
virtual Status Allocate(uint64_t offset, uint64_t len) override {
Status status;
TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds);

Expand Down
4 changes: 2 additions & 2 deletions util/env_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -971,11 +971,11 @@ TEST_F(EnvPosixTest, WritableFileWrapper) {
}

protected:
Status Allocate(off_t offset, off_t len) override {
Status Allocate(uint64_t offset, uint64_t len) override {
inc(11);
return Status::OK();
}
Status RangeSync(off_t offset, off_t nbytes) override {
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
inc(12);
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion util/file_reader_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ Status WritableFileWriter::SyncInternal(bool use_fsync) {
return s;
}

Status WritableFileWriter::RangeSync(off_t offset, off_t nbytes) {
Status WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
IOSTATS_TIMER_GUARD(range_sync_nanos);
TEST_SYNC_POINT("WritableFileWriter::RangeSync:0");
return writable_file_->RangeSync(offset, nbytes);
Expand Down
2 changes: 1 addition & 1 deletion util/file_reader_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class WritableFileWriter {
Status WriteUnbuffered();
// Normal write
Status WriteBuffered(const char* data, size_t size);
Status RangeSync(off_t offset, off_t nbytes);
Status RangeSync(uint64_t offset, uint64_t nbytes);
size_t RequestToken(size_t bytes, bool align);
Status SyncInternal(bool use_fsync);
};
Expand Down
4 changes: 2 additions & 2 deletions util/file_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ TEST_F(WritableFileWriterTest, RangeSync) {
}

protected:
Status Allocate(off_t offset, off_t len) override { return Status::OK(); }
Status RangeSync(off_t offset, off_t nbytes) override {
Status Allocate(uint64_t offset, uint64_t len) override { return Status::OK(); }
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
EXPECT_EQ(offset % 4096, 0u);
EXPECT_EQ(nbytes % 4096, 0u);

Expand Down
21 changes: 15 additions & 6 deletions util/io_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,12 +478,15 @@ Status PosixMmapFile::InvalidateCache(size_t offset, size_t length) {
}

#ifdef ROCKSDB_FALLOCATE_PRESENT
Status PosixMmapFile::Allocate(off_t offset, off_t len) {
Status PosixMmapFile::Allocate(uint64_t offset, uint64_t len) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(len <= std::numeric_limits<off_t>::max());
TEST_KILL_RANDOM("PosixMmapFile::Allocate:0", rocksdb_kill_odds);
int alloc_status = 0;
if (allow_fallocate_) {
alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
static_cast<off_t>(offset), static_cast<off_t>(len));
}
if (alloc_status == 0) {
return Status::OK();
Expand Down Expand Up @@ -606,13 +609,16 @@ Status PosixWritableFile::InvalidateCache(size_t offset, size_t length) {
}

#ifdef ROCKSDB_FALLOCATE_PRESENT
Status PosixWritableFile::Allocate(off_t offset, off_t len) {
Status PosixWritableFile::Allocate(uint64_t offset, uint64_t len) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(len <= std::numeric_limits<off_t>::max());
TEST_KILL_RANDOM("PosixWritableFile::Allocate:0", rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = 0;
if (allow_fallocate_) {
alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
static_cast<off_t>(offset), static_cast<off_t>(len));
}
if (alloc_status == 0) {
return Status::OK();
Expand All @@ -621,8 +627,11 @@ Status PosixWritableFile::Allocate(off_t offset, off_t len) {
}
}

Status PosixWritableFile::RangeSync(off_t offset, off_t nbytes) {
if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) {
Status PosixWritableFile::RangeSync(uint64_t offset, uint64_t nbytes) {
assert(offset <= std::numeric_limits<off_t>::max());
assert(nbytes <= std::numeric_limits<off_t>::max());
if (sync_file_range(fd_, static_cast<off_t>(offset),
static_cast<off_t>(nbytes), SYNC_FILE_RANGE_WRITE) == 0) {
return Status::OK();
} else {
return IOError(filename_, errno);
Expand Down
6 changes: 3 additions & 3 deletions util/io_posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ class PosixWritableFile : public WritableFile {
virtual uint64_t GetFileSize() override;
virtual Status InvalidateCache(size_t offset, size_t length) override;
#ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override;
virtual Status RangeSync(off_t offset, off_t nbytes) override;
virtual Status Allocate(uint64_t offset, uint64_t len) override;
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override;
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
#endif
};
Expand Down Expand Up @@ -157,7 +157,7 @@ class PosixMmapFile : public WritableFile {
virtual uint64_t GetFileSize() override;
virtual Status InvalidateCache(size_t offset, size_t length) override;
#ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override;
virtual Status Allocate(uint64_t offset, uint64_t len) override;
#endif
};

Expand Down

0 comments on commit 5270b33

Please sign in to comment.