Skip to content

Commit 5dd1bb2

Browse files
committed
improvements for getFileSize
1 parent 7717791 commit 5dd1bb2

35 files changed

+110
-50
lines changed

src/Client/ClientBase.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1125,7 +1125,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
11251125
if (need_render_progress && have_data_in_stdin)
11261126
{
11271127
/// Set total_bytes_to_read for current fd.
1128-
FileProgress file_progress(0, std_in.size());
1128+
FileProgress file_progress(0, std_in.getFileSize());
11291129
progress_indication.updateProgress(Progress(file_progress));
11301130

11311131
/// Set callback to be called on file progress.

src/Common/filesystemHelpers.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <IO/ReadBufferFromFile.h>
1818
#include <IO/Operators.h>
1919
#include <IO/WriteBufferFromString.h>
20+
#include <Common/Exception.h>
2021

2122
namespace fs = std::filesystem;
2223

@@ -29,6 +30,7 @@ namespace ErrorCodes
2930
extern const int SYSTEM_ERROR;
3031
extern const int NOT_IMPLEMENTED;
3132
extern const int CANNOT_STAT;
33+
extern const int CANNOT_FSTAT;
3234
extern const int CANNOT_STATVFS;
3335
extern const int PATH_ACCESS_DENIED;
3436
extern const int CANNOT_CREATE_FILE;
@@ -215,6 +217,20 @@ bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path
215217
return fileOrSymlinkPathStartsWith(filesystem_path, filesystem_prefix_path);
216218
}
217219

220+
size_t getSizeFromFileDescriptor(int fd, const String & file_name)
221+
{
222+
struct stat buf;
223+
int res = fstat(fd, &buf);
224+
if (-1 == res)
225+
{
226+
throwFromErrnoWithPath(
227+
"Cannot execute fstat" + (file_name.empty() ? "" : " file: " + file_name),
228+
file_name,
229+
ErrorCodes::CANNOT_FSTAT);
230+
}
231+
return buf.st_size;
232+
}
233+
218234
}
219235

220236

src/Common/filesystemHelpers.h

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ bool pathStartsWith(const String & path, const String & prefix_path);
6464
/// (Path is made absolute and normalized.)
6565
bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path);
6666

67+
size_t getSizeFromFileDescriptor(int fd, const String & file_name = "");
68+
6769
}
6870

6971
namespace FS

src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ String AsynchronousReadIndirectBufferFromRemoteFS::getInfoForLog()
6767
return impl->getInfoForLog();
6868
}
6969

70-
std::optional<size_t> AsynchronousReadIndirectBufferFromRemoteFS::getFileSize()
70+
size_t AsynchronousReadIndirectBufferFromRemoteFS::getFileSize()
7171
{
7272
return impl->getFileSize();
7373
}

src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ struct ReadSettings;
2727
*
2828
* We pass either `memory` or `prefetch_buffer` through all this chain and return it back.
2929
*/
30-
class AsynchronousReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase, public WithFileSize
30+
class AsynchronousReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase
3131
{
3232
public:
3333
explicit AsynchronousReadIndirectBufferFromRemoteFS(
@@ -51,7 +51,7 @@ class AsynchronousReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase
5151

5252
String getInfoForLog() override;
5353

54-
std::optional<size_t> getFileSize() override;
54+
size_t getFileSize() override;
5555

5656
private:
5757
bool nextImpl() override;

src/Disks/IO/CachedReadBufferFromRemoteFS.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t of
129129

130130
auto buf = createReadBufferFromFileBase(path, local_read_settings);
131131
auto * from_fd = dynamic_cast<ReadBufferFromFileDescriptor*>(buf.get());
132-
if (from_fd && from_fd->size() == 0)
132+
if (from_fd && from_fd->getFileSize() == 0)
133133
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path);
134134

135135
return buf;
@@ -371,7 +371,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
371371
{
372372
#ifndef NDEBUG
373373
auto * file_reader = dynamic_cast<ReadBufferFromFileDescriptor *>(read_buffer_for_file_segment.get());
374-
size_t file_size = file_reader->size();
374+
size_t file_size = file_reader->getFileSize();
375375

376376
if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end)
377377
throw Exception(
@@ -803,7 +803,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
803803
#ifndef NDEBUG
804804
if (auto * cache_file_reader = dynamic_cast<ReadBufferFromFileDescriptor *>(implementation_buffer.get()))
805805
{
806-
auto cache_file_size = cache_file_reader->size();
806+
auto cache_file_size = cache_file_reader->getFileSize();
807807
if (cache_file_size == 0)
808808
throw Exception(
809809
ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {} (just before actual read)", cache_file_size);
@@ -917,7 +917,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
917917
{
918918
std::optional<size_t> cache_file_size;
919919
if (auto * cache_file_reader = dynamic_cast<ReadBufferFromFileDescriptor *>(implementation_buffer.get()))
920-
cache_file_size = cache_file_reader->size();
920+
cache_file_size = cache_file_reader->getFileSize();
921921

922922
throw Exception(
923923
ErrorCodes::LOGICAL_ERROR,

src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS(
1919
{
2020
}
2121

22+
size_t ReadIndirectBufferFromRemoteFS::getFileSize()
23+
{
24+
return impl->getFileSize();
25+
}
2226

2327
off_t ReadIndirectBufferFromRemoteFS::getPosition()
2428
{

src/Disks/IO/ReadIndirectBufferFromRemoteFS.h

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class ReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase
3030

3131
void setReadUntilEnd() override;
3232

33+
size_t getFileSize() override;
34+
3335
private:
3436
bool nextImpl() override;
3537

src/IO/AsynchronousReadBufferFromFileDescriptor.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <Common/CurrentMetrics.h>
88
#include <IO/AsynchronousReadBufferFromFileDescriptor.h>
99
#include <IO/WriteHelpers.h>
10+
#include <Common/filesystemHelpers.h>
1011

1112

1213
namespace ProfileEvents
@@ -243,4 +244,9 @@ void AsynchronousReadBufferFromFileDescriptor::rewind()
243244
file_offset_of_buffer_end = 0;
244245
}
245246

247+
size_t AsynchronousReadBufferFromFileDescriptor::getFileSize()
248+
{
249+
return getSizeFromFileDescriptor(fd, getFileName());
250+
}
251+
246252
}

src/IO/AsynchronousReadBufferFromFileDescriptor.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ class AsynchronousReadBufferFromFileDescriptor : public ReadBufferFromFileBase
6464
/// Seek to the beginning, discarding already read data if any. Useful to reread file that changes on every read.
6565
void rewind();
6666

67+
size_t getFileSize() override;
68+
6769
private:
6870
std::future<IAsynchronousReader::Result> asyncReadInto(char * data, size_t size);
6971
};
7072

7173
}
72-

src/IO/ConcatSeekableReadBuffer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class ConcatSeekableReadBuffer : public SeekableReadBuffer, public WithFileSize
2121
off_t seek(off_t off, int whence) override;
2222
off_t getPosition() override;
2323

24-
std::optional<size_t> getFileSize() override { return total_size; }
24+
size_t getFileSize() override { return total_size; }
2525

2626
private:
2727
bool nextImpl() override;

src/IO/MMapReadBufferFromFileDescriptor.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <Common/ProfileEvents.h>
77
#include <Common/formatReadable.h>
88
#include <Common/Exception.h>
9+
#include <Common/filesystemHelpers.h>
910
#include <base/getPageSize.h>
1011
#include <IO/WriteHelpers.h>
1112
#include <IO/MMapReadBufferFromFileDescriptor.h>
@@ -86,4 +87,8 @@ off_t MMapReadBufferFromFileDescriptor::seek(off_t offset, int whence)
8687
return new_pos;
8788
}
8889

90+
size_t MMapReadBufferFromFileDescriptor::getFileSize()
91+
{
92+
return getSizeFromFileDescriptor(getFD(), getFileName());
93+
}
8994
}

src/IO/MMapReadBufferFromFileDescriptor.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,12 @@ class MMapReadBufferFromFileDescriptor : public ReadBufferFromFileBase
3333
void finish();
3434

3535
off_t getPosition() override;
36+
3637
std::string getFileName() const override;
38+
3739
int getFD() const;
40+
41+
size_t getFileSize() override;
3842
};
3943

4044
}
41-

src/IO/ParallelReadBuffer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ off_t ParallelReadBuffer::seek(off_t offset, int whence)
150150
return offset;
151151
}
152152

153-
std::optional<size_t> ParallelReadBuffer::getFileSize()
153+
size_t ParallelReadBuffer::getFileSize()
154154
{
155155
return reader_factory->getFileSize();
156156
}

src/IO/ParallelReadBuffer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class ParallelReadBuffer : public SeekableReadBuffer
4343
~ParallelReadBuffer() override { finishAndWait(); }
4444

4545
off_t seek(off_t off, int whence) override;
46-
std::optional<size_t> getFileSize();
46+
size_t getFileSize();
4747
off_t getPosition() override;
4848

4949
const ReadBufferFactory & getReadBufferFactory() const { return *reader_factory; }

src/IO/ReadBufferFromEmptyFile.h

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class ReadBufferFromEmptyFile : public ReadBufferFromFileBase
1919
std::string getFileName() const override { return "<empty>"; }
2020
off_t seek(off_t /*off*/, int /*whence*/) override { return 0; }
2121
off_t getPosition() override { return 0; }
22+
size_t getFileSize() override { return 0; }
2223
};
2324

2425
}

src/IO/ReadBufferFromEncryptedFile.h

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class ReadBufferFromEncryptedFile : public ReadBufferFromFileBase
3030

3131
void setReadUntilEnd() override { in->setReadUntilEnd(); }
3232

33+
size_t getFileSize() override { return in->getFileSize(); }
34+
3335
private:
3436
bool nextImpl() override;
3537

src/IO/ReadBufferFromFileBase.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
namespace DB
44
{
55

6+
namespace ErrorCodes
7+
{
8+
extern const int NOT_IMPLEMENTED;
9+
}
10+
611
ReadBufferFromFileBase::ReadBufferFromFileBase() : BufferWithOwnMemory<SeekableReadBuffer>(0)
712
{
813
}
@@ -19,4 +24,11 @@ ReadBufferFromFileBase::ReadBufferFromFileBase(
1924

2025
ReadBufferFromFileBase::~ReadBufferFromFileBase() = default;
2126

27+
size_t ReadBufferFromFileBase::getFileSize()
28+
{
29+
if (file_size)
30+
return *file_size;
31+
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot find out file size for read buffer");
32+
}
33+
2234
}

src/IO/ReadBufferFromFileBase.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121
namespace DB
2222
{
23-
class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileName
23+
24+
class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileName, public WithFileSize
2425
{
2526
public:
2627
ReadBufferFromFileBase();
@@ -48,6 +49,8 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>, p
4849
clock_type = clock_type_;
4950
}
5051

52+
size_t getFileSize() override;
53+
5154
protected:
5255
std::optional<size_t> file_size;
5356
ProfileCallback profile_callback;

src/IO/ReadBufferFromFileDecorator.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,9 @@ bool ReadBufferFromFileDecorator::nextImpl()
5353
return result;
5454
}
5555

56+
size_t ReadBufferFromFileDecorator::getFileSize()
57+
{
58+
return getFileSizeFromReadBuffer(*impl);
59+
}
60+
5661
}

src/IO/ReadBufferFromFileDecorator.h

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ class ReadBufferFromFileDecorator : public ReadBufferFromFileBase
2727

2828
ReadBuffer & getWrappedReadBuffer() { return *impl; }
2929

30+
size_t getFileSize() override;
31+
3032
protected:
3133
std::unique_ptr<SeekableReadBuffer> impl;
3234
String file_name;

src/IO/ReadBufferFromFileDescriptor.cpp

+3-6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <IO/ReadBufferFromFileDescriptor.h>
99
#include <IO/WriteHelpers.h>
1010
#include <IO/Progress.h>
11+
#include <Common/filesystemHelpers.h>
1112
#include <sys/stat.h>
1213

1314

@@ -249,13 +250,9 @@ bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds)
249250
}
250251

251252

252-
off_t ReadBufferFromFileDescriptor::size()
253+
size_t ReadBufferFromFileDescriptor::getFileSize()
253254
{
254-
struct stat buf;
255-
int res = fstat(fd, &buf);
256-
if (-1 == res)
257-
throwFromErrnoWithPath("Cannot execute fstat " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSTAT);
258-
return buf.st_size;
255+
return getSizeFromFileDescriptor(fd, getFileName());
259256
}
260257

261258

src/IO/ReadBufferFromFileDescriptor.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class ReadBufferFromFileDescriptor : public ReadBufferFromFileBase
5757
/// Seek to the beginning, discarding already read data if any. Useful to reread file that changes on every read.
5858
void rewind();
5959

60-
off_t size();
60+
size_t getFileSize() override;
6161

6262
void setProgressCallback(ContextPtr context);
6363

src/IO/ReadBufferFromS3.cpp

+4-9
Original file line numberDiff line numberDiff line change
@@ -222,20 +222,15 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
222222
return offset;
223223
}
224224

225-
std::optional<size_t> ReadBufferFromS3::getFileSize()
225+
size_t ReadBufferFromS3::getFileSize()
226226
{
227227
if (file_size)
228-
return file_size;
228+
return *file_size;
229229

230230
auto object_size = S3::getObjectSize(client_ptr, bucket, key, version_id, false);
231231

232-
if (!object_size)
233-
{
234-
return std::nullopt;
235-
}
236-
237232
file_size = object_size;
238-
return file_size;
233+
return *file_size;
239234
}
240235

241236
off_t ReadBufferFromS3::getPosition()
@@ -339,7 +334,7 @@ off_t ReadBufferS3Factory::seek(off_t off, [[maybe_unused]] int whence)
339334
return off;
340335
}
341336

342-
std::optional<size_t> ReadBufferS3Factory::getFileSize()
337+
size_t ReadBufferS3Factory::getFileSize()
343338
{
344339
return object_size;
345340
}

src/IO/ReadBufferFromS3.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class ReadBufferFromS3 : public SeekableReadBuffer, public WithFileName, public
6565

6666
off_t getPosition() override;
6767

68-
std::optional<size_t> getFileSize() override;
68+
size_t getFileSize() override;
6969

7070
void setReadUntilPosition(size_t position) override;
7171

@@ -120,7 +120,7 @@ class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory, public
120120

121121
off_t seek(off_t off, [[maybe_unused]] int whence) override;
122122

123-
std::optional<size_t> getFileSize() override;
123+
size_t getFileSize() override;
124124

125125
String getFileName() const override { return bucket + "/" + key; }
126126

0 commit comments

Comments
 (0)