Skip to content

Commit

Permalink
Merge pull request ClickHouse#74338 from azat/s3-rewrite-detect-fix
Browse files Browse the repository at this point in the history
Fix detection of "use the Rewrite method in the JSON API" for native copy on GCS
  • Loading branch information
alexkats authored Jan 16, 2025
2 parents 87aa79d + 063a654 commit 5efa76f
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
14 changes: 13 additions & 1 deletion src/IO/S3/Client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,23 @@ bool Client::RetryStrategy::ShouldRetry(const Aws::Client::AWSError<Aws::Client:
return false;

if (CurrentThread::isInitialized() && CurrentThread::get().isQueryCanceled())
return false;
return false;

/// It does not make sense to retry when GCS suggest to use Rewrite
if (useGCSRewrite(error))
return false;

return error.ShouldRetry();
}

bool Client::RetryStrategy::useGCSRewrite(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error)
{
return error.GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT
&& error.GetExceptionName() == "InternalError"
&& error.GetMessage().contains("use the Rewrite method in the JSON API");
}


/// NOLINTNEXTLINE(google-runtime-int)
long Client::RetryStrategy::CalculateDelayBeforeNextRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>&, long attemptedRetries) const
{
Expand Down
10 changes: 10 additions & 0 deletions src/IO/S3/Client.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,16 @@ class Client : private Aws::S3::S3Client
/// NOLINTNEXTLINE(google-runtime-int)
long GetMaxAttempts() const override;

/// Sometimes [1] GCS may suggest to use Rewrite over CopyObject, i.e.:
///
/// AWSError 'InternalError': Copy spanning locations and/or storage classes could not complete within 30 seconds. Please use the Rewrite method in the JSON API (https://cloud.google.com/storage/docs/json_api/v1/objects/rewrite) instead.
///
/// Note, that GCS may return other errors (like EntityTooLarge), but
/// those are not retriable by default S3 RetryStrategy.
///
/// [1]: https://github.com/ClickHouse/ClickHouse/issues/59660
static bool useGCSRewrite(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error);

private:
uint32_t maxRetries;
uint32_t scaleFactor;
Expand Down
4 changes: 1 addition & 3 deletions src/IO/S3/copyS3File.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,9 +759,7 @@ namespace
outcome.GetError().GetExceptionName() == "InvalidRequest" ||
outcome.GetError().GetExceptionName() == "InvalidArgument" ||
outcome.GetError().GetExceptionName() == "AccessDenied" ||
(outcome.GetError().GetExceptionName() == "InternalError" &&
outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT &&
outcome.GetError().GetMessage().contains("use the Rewrite method in the JSON API")))
S3::Client::RetryStrategy::useGCSRewrite(outcome.GetError()))
{
if (!supports_multipart_copy || outcome.GetError().GetExceptionName() == "AccessDenied")
{
Expand Down

0 comments on commit 5efa76f

Please sign in to comment.