From 082259493ec6d9892eac7bf7b07f579ca9f7dedb Mon Sep 17 00:00:00 2001 From: konsti Date: Thu, 31 Oct 2024 16:23:12 +0100 Subject: [PATCH] Skip existing, second iteration: Check the index before uploading (#8531) Co-authored-by: Zanie Blue --- Cargo.lock | 4 +- crates/uv-cli/src/lib.rs | 16 ++ crates/uv-client/src/registry_client.rs | 23 +- crates/uv-extract/src/hash.rs | 9 - crates/uv-publish/Cargo.toml | 4 +- crates/uv-publish/src/lib.rs | 218 +++++++++++---- crates/uv-static/src/env_vars.rs | 3 + crates/uv/src/commands/publish.rs | 47 +++- crates/uv/src/lib.rs | 3 + crates/uv/src/settings.rs | 4 +- crates/uv/tests/it/publish.rs | 4 +- docs/guides/publish.md | 18 +- docs/reference/cli.md | 11 + scripts/publish/test_publish.py | 338 ++++++++++++++---------- 14 files changed, 483 insertions(+), 219 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 81507fd74255..5fac0433329d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4983,15 +4983,17 @@ dependencies = [ "rustc-hash", "serde", "serde_json", - "sha2", "thiserror", "tokio", "tokio-util", "tracing", "url", + "uv-cache", "uv-client", "uv-configuration", "uv-distribution-filename", + "uv-distribution-types", + "uv-extract", "uv-fs", "uv-metadata", "uv-pypi-types", diff --git a/crates/uv-cli/src/lib.rs b/crates/uv-cli/src/lib.rs index fd05d1ee6181..1d5d57c46a9d 100644 --- a/crates/uv-cli/src/lib.rs +++ b/crates/uv-cli/src/lib.rs @@ -4819,6 +4819,22 @@ pub struct PublishArgs { value_parser = parse_insecure_host, )] pub allow_insecure_host: Option>>, + + /// Check an index URL for existing files to skip duplicate uploads. + /// + /// This option allows retrying publishing that failed after only some, but not all files have + /// been uploaded, and handles error due to parallel uploads of the same file. + /// + /// Before uploading, the index is checked. If the exact same file already exists in the index, + /// the file will not be uploaded. If an error occurred during the upload, the index is checked + /// again, to handle cases where the identical file was uploaded twice in parallel. + /// + /// The exact behavior will vary based on the index. When uploading to PyPI, uploading the same + /// file succeeds even without `--check-url`, while most other indexes error. + /// + /// The index must provide one of the supported hashes (SHA-256, SHA-384, or SHA-512). + #[arg(long,env = EnvVars::UV_PUBLISH_CHECK_URL)] + pub check_url: Option, } /// See [PEP 517](https://peps.python.org/pep-0517/) and diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index 91bc991c0156..0052b92d0515 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -31,7 +31,7 @@ use crate::cached_client::CacheControl; use crate::html::SimpleHtml; use crate::remote_metadata::wheel_metadata_from_remote_zip; use crate::rkyvutil::OwnedArchive; -use crate::{CachedClient, CachedClientError, Error, ErrorKind}; +use crate::{BaseClient, CachedClient, CachedClientError, Error, ErrorKind}; /// A builder for an [`RegistryClient`]. #[derive(Debug, Clone)] @@ -143,6 +143,27 @@ impl<'a> RegistryClientBuilder<'a> { timeout, } } + + /// Share the underlying client between two different middleware configurations. + pub fn wrap_existing(self, existing: &BaseClient) -> RegistryClient { + // Wrap in any relevant middleware and handle connectivity. + let client = self.base_client_builder.wrap_existing(existing); + + let timeout = client.timeout(); + let connectivity = client.connectivity(); + + // Wrap in the cache middleware. + let client = CachedClient::new(client); + + RegistryClient { + index_urls: self.index_urls, + index_strategy: self.index_strategy, + cache: self.cache, + connectivity, + client, + timeout, + } + } } impl<'a> TryFrom> for RegistryClientBuilder<'a> { diff --git a/crates/uv-extract/src/hash.rs b/crates/uv-extract/src/hash.rs index ac1efb397d74..e7abdc668924 100644 --- a/crates/uv-extract/src/hash.rs +++ b/crates/uv-extract/src/hash.rs @@ -23,15 +23,6 @@ impl Hasher { Hasher::Sha512(hasher) => hasher.update(data), } } - - pub fn finalize(self) -> Vec { - match self { - Hasher::Md5(hasher) => hasher.finalize().to_vec(), - Hasher::Sha256(hasher) => hasher.finalize().to_vec(), - Hasher::Sha384(hasher) => hasher.finalize().to_vec(), - Hasher::Sha512(hasher) => hasher.finalize().to_vec(), - } - } } impl From for Hasher { diff --git a/crates/uv-publish/Cargo.toml b/crates/uv-publish/Cargo.toml index 6c640f70a876..06f4fb9401eb 100644 --- a/crates/uv-publish/Cargo.toml +++ b/crates/uv-publish/Cargo.toml @@ -13,9 +13,12 @@ license.workspace = true doctest = false [dependencies] +uv-cache = { workspace = true } uv-client = { workspace = true } uv-configuration = { workspace = true } uv-distribution-filename = { workspace = true } +uv-distribution-types = { workspace = true } +uv-extract = { workspace = true } uv-fs = { workspace = true } uv-metadata = { workspace = true } uv-pypi-types = { workspace = true } @@ -35,7 +38,6 @@ reqwest-retry = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -sha2 = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true , features = ["io"] } diff --git a/crates/uv-publish/src/lib.rs b/crates/uv-publish/src/lib.rs index 4820974a5176..2928963f25fe 100644 --- a/crates/uv-publish/src/lib.rs +++ b/crates/uv-publish/src/lib.rs @@ -3,7 +3,7 @@ mod trusted_publishing; use crate::trusted_publishing::TrustedPublishingError; use base64::prelude::BASE64_STANDARD; use base64::Engine; -use fs_err::File; +use fs_err::tokio::File; use futures::TryStreamExt; use glob::{glob, GlobError, PatternError}; use itertools::Itertools; @@ -14,26 +14,27 @@ use reqwest_middleware::RequestBuilder; use reqwest_retry::{Retryable, RetryableStrategy}; use rustc_hash::FxHashSet; use serde::Deserialize; -use sha2::{Digest, Sha256}; -use std::io::BufReader; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::{env, fmt, io}; use thiserror::Error; -use tokio::io::AsyncReadExt; +use tokio::io::{AsyncReadExt, BufReader}; use tokio_util::io::ReaderStream; use tracing::{debug, enabled, trace, Level}; use url::Url; -use uv_client::{BaseClient, UvRetryableStrategy}; +use uv_client::{BaseClient, OwnedArchive, RegistryClientBuilder, UvRetryableStrategy}; use uv_configuration::{KeyringProviderType, TrustedPublishing}; use uv_distribution_filename::{DistFilename, SourceDistExtension, SourceDistFilename}; use uv_fs::{ProgressReader, Simplified}; use uv_metadata::read_metadata_async_seek; -use uv_pypi_types::{Metadata23, MetadataError}; +use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata23, MetadataError}; use uv_static::EnvVars; use uv_warnings::{warn_user, warn_user_once}; pub use trusted_publishing::TrustedPublishingToken; +use uv_cache::{Cache, Refresh}; +use uv_distribution_types::{IndexCapabilities, IndexUrl}; +use uv_extract::hash::{HashReader, Hasher}; #[derive(Error, Debug)] pub enum PublishError { @@ -56,6 +57,17 @@ pub enum PublishError { TrustedPublishing(#[from] TrustedPublishingError), #[error("{0} are not allowed when using trusted publishing")] MixedCredentials(String), + #[error("Failed to query check URL")] + CheckUrlIndex(#[source] uv_client::Error), + #[error("Local file and index file for {filename} do not match. Local: {hash_algorithm}={local}, Remote: {hash_algorithm}={remote}")] + HashMismatch { + filename: Box, + hash_algorithm: HashAlgorithm, + local: Box, + remote: Box, + }, + #[error("Hash is missing in index for {0}")] + MissingHash(Box), } /// Failure to get the metadata for a specific file. @@ -105,6 +117,15 @@ pub trait Reporter: Send + Sync + 'static { fn on_download_complete(&self, id: usize); } +/// Context for using a fresh registry client for check URL requests. +pub struct CheckUrlClient<'a> { + pub index_url: IndexUrl, + pub registry_client_builder: RegistryClientBuilder<'a>, + pub client: &'a BaseClient, + pub index_capabilities: IndexCapabilities, + pub cache: &'a Cache, +} + impl PublishSendError { /// Extract `code` from the PyPI json error response, if any. /// @@ -330,6 +351,7 @@ pub async fn upload( retries: u32, username: Option<&str>, password: Option<&str>, + check_url_client: Option<&CheckUrlClient<'_>>, reporter: Arc, ) -> Result { let form_metadata = form_metadata(file, filename) @@ -369,26 +391,135 @@ pub async fn upload( ) })?; - return handle_response(registry, response) + return match handle_response(registry, response).await { + Ok(()) => { + // Upload successful; for PyPI this can also mean a hash match in a raced upload + // (but it doesn't tell us), for other registries it should mean a fresh upload. + Ok(true) + } + Err(err) => { + if matches!( + err, + PublishSendError::Status(..) | PublishSendError::StatusNoBody(..) + ) { + if let Some(check_url_client) = &check_url_client { + if check_url(check_url_client, file, filename).await? { + // There was a raced upload of the same file, so even though our upload failed, + // the right file now exists in the registry. + return Ok(false); + } + } + } + Err(PublishError::PublishSend( + file.to_path_buf(), + registry.clone(), + err, + )) + } + }; + } +} + +/// Check whether we should skip the upload of a file because it already exists on the index. +pub async fn check_url( + check_url_client: &CheckUrlClient<'_>, + file: &Path, + filename: &DistFilename, +) -> Result { + let CheckUrlClient { + index_url, + registry_client_builder, + client, + index_capabilities, + cache, + } = check_url_client; + + // Avoid using the PyPI 10min default cache. + let cache_refresh = (*cache) + .clone() + .with_refresh(Refresh::from_args(None, vec![filename.name().clone()])); + let registry_client = registry_client_builder + .clone() + .cache(cache_refresh) + .wrap_existing(client); + + debug!("Checking for {filename} in the registry"); + let response = registry_client + .simple(filename.name(), Some(index_url), index_capabilities) + .await + .map_err(PublishError::CheckUrlIndex)?; + let [(_, simple_metadata)] = response.as_slice() else { + unreachable!("We queried a single index, we must get a single response"); + }; + let simple_metadata = OwnedArchive::deserialize(simple_metadata); + let Some(metadatum) = simple_metadata + .iter() + .find(|metadatum| &metadatum.version == filename.version()) + else { + return Ok(false); + }; + + let archived_file = match filename { + DistFilename::SourceDistFilename(source_dist) => metadatum + .files + .source_dists + .iter() + .find(|entry| &entry.name == source_dist) + .map(|entry| &entry.file), + DistFilename::WheelFilename(wheel) => metadatum + .files + .wheels + .iter() + .find(|entry| &entry.name == wheel) + .map(|entry| &entry.file), + }; + let Some(archived_file) = archived_file else { + return Ok(false); + }; + + // TODO(konsti): Do we have a preference for a hash here? + if let Some(remote_hash) = archived_file.hashes.first() { + // We accept the risk for TOCTOU errors here, since we already read the file once before the + // streaming upload to compute the hash for the form metadata. + let local_hash = hash_file(file, Hasher::from(remote_hash.algorithm)) .await - .map_err(|err| PublishError::PublishSend(file.to_path_buf(), registry.clone(), err)); + .map_err(|err| { + PublishError::PublishPrepare( + file.to_path_buf(), + Box::new(PublishPrepareError::Io(err)), + ) + })?; + if local_hash.digest == remote_hash.digest { + debug!( + "Found {filename} in the registry with matching hash {}", + remote_hash.digest + ); + Ok(true) + } else { + Err(PublishError::HashMismatch { + filename: Box::new(filename.clone()), + hash_algorithm: remote_hash.algorithm, + local: local_hash.digest, + remote: remote_hash.digest.clone(), + }) + } + } else { + Err(PublishError::MissingHash(Box::new(filename.clone()))) } } /// Calculate the SHA256 of a file. -fn hash_file(path: impl AsRef) -> Result { - // Ideally, this would be async, but in case we actually want to make parallel uploads we should - // use `spawn_blocking` since sha256 is cpu intensive. - let mut file = BufReader::new(File::open(path.as_ref())?); - let mut hasher = Sha256::new(); - io::copy(&mut file, &mut hasher)?; - Ok(format!("{:x}", hasher.finalize())) +async fn hash_file(path: impl AsRef, hasher: Hasher) -> Result { + debug!("Hashing {}", path.as_ref().display()); + let file = BufReader::new(File::open(path.as_ref()).await?); + let mut hashers = vec![hasher]; + HashReader::new(file, &mut hashers).finish().await?; + Ok(HashDigest::from(hashers.remove(0))) } // Not in `uv-metadata` because we only support tar files here. async fn source_dist_pkg_info(file: &Path) -> Result, PublishPrepareError> { - let file = fs_err::tokio::File::open(&file).await?; - let reader = tokio::io::BufReader::new(file); + let reader = BufReader::new(File::open(&file).await?); let decoded = async_compression::tokio::bufread::GzipDecoder::new(reader); let mut archive = tokio_tar::Archive::new(decoded); let mut pkg_infos: Vec<(PathBuf, Vec)> = archive @@ -441,8 +572,7 @@ async fn metadata(file: &Path, filename: &DistFilename) -> Result { - let file = fs_err::tokio::File::open(&file).await?; - let reader = tokio::io::BufReader::new(file); + let reader = BufReader::new(File::open(&file).await?); read_metadata_async_seek(wheel, reader).await? } }; @@ -456,13 +586,13 @@ async fn form_metadata( file: &Path, filename: &DistFilename, ) -> Result, PublishPrepareError> { - let hash_hex = hash_file(file)?; + let hash_hex = hash_file(file, Hasher::from(HashAlgorithm::Sha256)).await?; let metadata = metadata(file, filename).await?; let mut form_metadata = vec![ (":action", "file_upload".to_string()), - ("sha256_digest", hash_hex), + ("sha256_digest", hash_hex.digest.to_string()), ("protocol_version", "1".to_string()), ("metadata_version", metadata.metadata_version.clone()), // Twine transforms the name with `re.sub("[^A-Za-z0-9.]+", "-", name)` @@ -545,7 +675,7 @@ async fn build_request( form = form.text(*key, value.clone()); } - let file = fs_err::tokio::File::open(file).await?; + let file = File::open(file).await?; let idx = reporter.on_download_start(&filename.to_string(), Some(file.metadata().await?.len())); let reader = ProgressReader::new(file, move |read| { reporter.on_download_progress(idx, read as u64); @@ -591,8 +721,8 @@ async fn build_request( Ok((request, idx)) } -/// Returns `true` if the file was newly uploaded and `false` if it already existed. -async fn handle_response(registry: &Url, response: Response) -> Result { +/// Log response information and map response to an error variant if not successful. +async fn handle_response(registry: &Url, response: Response) -> Result<(), PublishSendError> { let status_code = response.status(); debug!("Response code for {registry}: {status_code}"); trace!("Response headers for {registry}: {response:?}"); @@ -619,7 +749,7 @@ async fn handle_response(registry: &Url, response: Response) -> Result Result, @@ -22,6 +28,8 @@ pub(crate) async fn publish( allow_insecure_host: Vec, username: Option, password: Option, + check_url: Option, + cache: &Cache, connectivity: Connectivity, native_tls: bool, printer: Printer, @@ -50,7 +58,7 @@ pub(crate) async fn publish( .retries(0) .keyring(keyring_provider) .native_tls(native_tls) - .allow_insecure_host(allow_insecure_host) + .allow_insecure_host(allow_insecure_host.clone()) // Don't try cloning the request to make an unauthenticated request first. .auth_integration(AuthIntegration::OnlyAuthenticated) // Set a very high timeout for uploads, connections are often 10x slower on upload than @@ -61,6 +69,31 @@ pub(crate) async fn publish( .auth_integration(AuthIntegration::NoAuthMiddleware) .wrap_existing(&upload_client); + // Initialize the registry client. + let check_url_client = if let Some(index_url) = check_url { + let index_urls = IndexLocations::new( + vec![Index::from_index_url(index_url.clone())], + Vec::new(), + false, + ) + .index_urls(); + let registry_client_builder = RegistryClientBuilder::new(cache.clone()) + .native_tls(native_tls) + .connectivity(connectivity) + .index_urls(index_urls) + .keyring(keyring_provider) + .allow_insecure_host(allow_insecure_host.clone()); + Some(CheckUrlClient { + index_url, + registry_client_builder, + client: &upload_client, + index_capabilities: IndexCapabilities::default(), + cache, + }) + } else { + None + }; + // If applicable, attempt obtaining a token for trusted publishing. let trusted_publishing_token = check_trusted_publishing( username.as_deref(), @@ -121,6 +154,13 @@ pub(crate) async fn publish( } for (file, raw_filename, filename) in files { + if let Some(check_url_client) = &check_url_client { + if uv_publish::check_url(check_url_client, &file, &filename).await? { + writeln!(printer.stderr(), "File {filename} already exists, skipping")?; + continue; + } + } + let size = fs_err::metadata(&file)?.len(); let (bytes, unit) = human_readable_bytes(size); writeln!( @@ -139,6 +179,7 @@ pub(crate) async fn publish( DEFAULT_RETRIES, username.as_deref(), password.as_deref(), + check_url_client.as_ref(), // Needs to be an `Arc` because the reqwest `Body` static lifetime requirement Arc::new(reporter), ) diff --git a/crates/uv/src/lib.rs b/crates/uv/src/lib.rs index 7055a9bfb8d8..addb2e679255 100644 --- a/crates/uv/src/lib.rs +++ b/crates/uv/src/lib.rs @@ -1137,6 +1137,7 @@ async fn run(mut cli: Cli) -> Result { trusted_publishing, keyring_provider, allow_insecure_host, + check_url, } = PublishSettings::resolve(args, filesystem); commands::publish( @@ -1147,6 +1148,8 @@ async fn run(mut cli: Cli) -> Result { allow_insecure_host, username, password, + check_url, + &cache, globals.connectivity, globals.native_tls, printer, diff --git a/crates/uv/src/settings.rs b/crates/uv/src/settings.rs index b4259fe6e7d2..d1a4fbc68fc0 100644 --- a/crates/uv/src/settings.rs +++ b/crates/uv/src/settings.rs @@ -24,7 +24,7 @@ use uv_configuration::{ NoBinary, NoBuild, PreviewMode, ProjectBuildBackend, Reinstall, SourceStrategy, TargetTriple, TrustedHost, TrustedPublishing, Upgrade, VersionControlSystem, }; -use uv_distribution_types::{DependencyMetadata, Index, IndexLocations}; +use uv_distribution_types::{DependencyMetadata, Index, IndexLocations, IndexUrl}; use uv_install_wheel::linker::LinkMode; use uv_normalize::PackageName; use uv_pep508::{ExtraName, RequirementOrigin}; @@ -2614,6 +2614,7 @@ pub(crate) struct PublishSettings { pub(crate) trusted_publishing: TrustedPublishing, pub(crate) keyring_provider: KeyringProviderType, pub(crate) allow_insecure_host: Vec, + pub(crate) check_url: Option, } impl PublishSettings { @@ -2667,6 +2668,7 @@ impl PublishSettings { }) .combine(allow_insecure_host) .unwrap_or_default(), + check_url: args.check_url, } } } diff --git a/crates/uv/tests/it/publish.rs b/crates/uv/tests/it/publish.rs index 2d62f6a336b9..752d7c427a84 100644 --- a/crates/uv/tests/it/publish.rs +++ b/crates/uv/tests/it/publish.rs @@ -22,7 +22,7 @@ fn username_password_no_longer_supported() { Publishing 1 file to https://test.pypi.org/legacy/ Uploading ok-1.0.0-py3-none-any.whl ([SIZE]) error: Failed to publish `../../scripts/links/ok-1.0.0-py3-none-any.whl` to https://test.pypi.org/legacy/ - Caused by: Permission denied (status code 403 Forbidden): 403 Username/Password authentication is no longer supported. Migrate to API Tokens or Trusted Publishers instead. See https://test.pypi.org/help/#apitoken and https://test.pypi.org/help/#trusted-publishers + Caused by: Upload failed with status code 403 Forbidden. Server says: 403 Username/Password authentication is no longer supported. Migrate to API Tokens or Trusted Publishers instead. See https://test.pypi.org/help/#apitoken and https://test.pypi.org/help/#trusted-publishers "### ); } @@ -48,7 +48,7 @@ fn invalid_token() { Publishing 1 file to https://test.pypi.org/legacy/ Uploading ok-1.0.0-py3-none-any.whl ([SIZE]) error: Failed to publish `../../scripts/links/ok-1.0.0-py3-none-any.whl` to https://test.pypi.org/legacy/ - Caused by: Permission denied (status code 403 Forbidden): 403 Invalid or non-existent authentication information. See https://test.pypi.org/help/#invalid-auth for more information. + Caused by: Upload failed with status code 403 Forbidden. Server says: 403 Invalid or non-existent authentication information. See https://test.pypi.org/help/#invalid-auth for more information. "### ); } diff --git a/docs/guides/publish.md b/docs/guides/publish.md index 6c44e3fa96d3..fb99f498cf5d 100644 --- a/docs/guides/publish.md +++ b/docs/guides/publish.md @@ -43,12 +43,9 @@ $ uv publish ``` Set a PyPI token with `--token` or `UV_PUBLISH_TOKEN`, or set a username with `--username` or -`UV_PUBLISH_USERNAME` and password with `--password` or `UV_PUBLISH_PASSWORD`. - -!!! info - - For publishing to PyPI from GitHub Actions, you don't need to set any credentials. Instead, - [add a trusted publisher to the PyPI project](https://docs.pypi.org/trusted-publishers/adding-a-publisher/). +`UV_PUBLISH_USERNAME` and password with `--password` or `UV_PUBLISH_PASSWORD`. For publishing to +PyPI from GitHub Actions, you don't need to set any credentials. Instead, +[add a trusted publisher to the PyPI project](https://docs.pypi.org/trusted-publishers/adding-a-publisher/). !!! note @@ -56,6 +53,15 @@ Set a PyPI token with `--token` or `UV_PUBLISH_TOKEN`, or set a username with `- generate a token. Using a token is equivalent to setting `--username __token__` and using the token as password. +Even though `uv publish` retries failed uploads, it can happen that publishing fails in the middle, +with some files uploaded and some files still missing. With PyPI, you can retry the exact same +command, existing identical files will be ignored. With other registries, use +`--check-url ` with the index URL (not the publish URL) the packages belong to. uv will +skip uploading files that are identical to files in the registry, and it will also handle raced +parallel uploads. Note that existing files need to match exactly with those previously uploaded to +the registry, this avoids accidentally publishing source distribution and wheels with different +contents for the same version. + ## Installing your package Test that the package can be installed and imported with `uv run`: diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 55e5f2646652..057d220d74df 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -7600,6 +7600,17 @@ uv publish [OPTIONS] [FILES]...

To view the location of the cache directory, run uv cache dir.

May also be set with the UV_CACHE_DIR environment variable.

+
--check-url check-url

Check an index URL for existing files to skip duplicate uploads.

+ +

This option allows retrying publishing that failed after only some, but not all files have been uploaded, and handles error due to parallel uploads of the same file.

+ +

Before uploading, the index is checked. If the exact same file already exists in the index, the file will not be uploaded. If an error occurred during the upload, the index is checked again, to handle cases where the identical file was uploaded twice in parallel.

+ +

The exact behavior will vary based on the index. When uploading to PyPI, uploading the same file succeeds even without --check-url, while most other indexes error.

+ +

The index must provide one of the supported hashes (SHA-256, SHA-384, or SHA-512).

+ +

May also be set with the UV_PUBLISH_CHECK_URL environment variable.

--color color-choice

Control colors in output

[default: auto]

diff --git a/scripts/publish/test_publish.py b/scripts/publish/test_publish.py index c285886eeb6c..a509bfb24cdb 100644 --- a/scripts/publish/test_publish.py +++ b/scripts/publish/test_publish.py @@ -6,9 +6,10 @@ # ] # /// -""" -Test `uv publish` by uploading a new version of astral-test- to one of -multiple indexes, exercising different options of passing credentials. +"""Test `uv publish`. + +Upload a new version of astral-test- to one of multiple indexes, exercising +different options of passing credentials. Locally, execute the credentials setting script, then run: ```shell @@ -43,66 +44,66 @@ Docs: https://docs.gitlab.com/ee/user/packages/pypi_repository/ **codeberg** -The username is astral-test-user, the password is a token (the actual account password would also -work). +The username is astral-test-user, the password is a token (the actual account password +would also work). Web: https://codeberg.org/astral-test-user/-/packages/pypi/astral-test-token/0.1.0 Docs: https://forgejo.org/docs/latest/user/packages/pypi/ """ import os import re +import time from argparse import ArgumentParser from pathlib import Path from shutil import rmtree -from subprocess import check_call +from subprocess import PIPE, check_call, check_output, run +from time import sleep import httpx from packaging.utils import parse_sdist_filename, parse_wheel_filename +from packaging.version import Version + +TEST_PYPI_PUBLISH_URL = "https://test.pypi.org/legacy/" cwd = Path(__file__).parent -# Map CLI target name to package name. +# Map CLI target name to package name and index url. # Trusted publishing can only be tested on GitHub Actions, so we have separate local # and all targets. -local_targets = { - "pypi-token": "astral-test-token", - "pypi-password-env": "astral-test-password", - "pypi-keyring": "astral-test-keyring", - "gitlab": "astral-test-token", - "codeberg": "astral-test-token", - "cloudsmith": "astral-test-token", -} -all_targets = local_targets | { - "pypi-trusted-publishing": "astral-test-trusted-publishing" +local_targets: dict[str, tuple[str, str]] = { + "pypi-token": ("astral-test-token", "https://test.pypi.org/simple/"), + "pypi-password-env": ("astral-test-password", "https://test.pypi.org/simple/"), + "pypi-keyring": ("astral-test-keyring", "https://test.pypi.org/simple/"), + "gitlab": ( + "astral-test-token", + "https://gitlab.com/api/v4/projects/61853105/packages/pypi/simple/", + ), + "codeberg": ( + "astral-test-token", + "https://codeberg.org/api/packages/astral-test-user/pypi/simple/", + ), + "cloudsmith": ( + "astral-test-token", + "https://dl.cloudsmith.io/public/astral-test/astral-test-1/python/simple/", + ), } - -project_urls = { - "astral-test-password": ["https://test.pypi.org/simple/astral-test-password/"], - "astral-test-keyring": ["https://test.pypi.org/simple/astral-test-keyring/"], - "astral-test-trusted-publishing": [ - "https://test.pypi.org/simple/astral-test-trusted-publishing/" - ], - "astral-test-token": [ - "https://test.pypi.org/simple/astral-test-token/", - "https://gitlab.com/api/v4/projects/61853105/packages/pypi/simple/astral-test-token", - "https://codeberg.org/api/packages/astral-test-user/pypi/simple/astral-test-token", - "https://dl.cloudsmith.io/public/astral-test/astral-test-1/python/simple/astral-test-token/", - ], +all_targets: dict[str, tuple[str, str]] = local_targets | { + "pypi-trusted-publishing": ( + "astral-test-trusted-publishing", + "https://test.pypi.org/simple/", + ) } -def get_new_version(project_name: str, client: httpx.Client) -> str: - """Return the next free path version on pypi""" +def get_new_version(project_name: str, client: httpx.Client) -> Version: + """Return the next free patch version on all indexes of the package.""" # To keep the number of packages small we reuse them across targets, so we have to # pick a version that doesn't exist on any target yet versions = set() - for url in project_urls[project_name]: - try: - data = client.get(url).text - except httpx.HTTPError as err: - raise RuntimeError(f"Failed to fetch {url}") from err - href_text = "]+>([^<>]+)" - for filename in list(m.group(1) for m in re.finditer(href_text, data)): + for project_name_, index_url in all_targets.values(): + if project_name_ != project_name: + continue + for filename in get_filenames((index_url + project_name + "/"), client): if filename.endswith(".whl"): [_name, version, _build, _tags] = parse_wheel_filename(filename) else: @@ -113,10 +114,32 @@ def get_new_version(project_name: str, client: httpx.Client) -> str: # Bump the path version to obtain an empty version release = list(max_version.release) release[-1] += 1 - return ".".join(str(i) for i in release) + return Version(".".join(str(i) for i in release)) -def create_project(project_name: str, uv: Path, client: httpx.Client): +def get_filenames(url: str, client: httpx.Client) -> list[str]: + """Get the filenames (source dists and wheels) from an index URL.""" + # Get with retries + error = None + for _ in range(5): + try: + response = client.get(url) + data = response.text + break + except httpx.HTTPError as err: + error = err + print(f"Error getting version, sleeping for 1s: {err}") + time.sleep(1) + else: + raise RuntimeError(f"Failed to fetch {url}") from error + # Works for the indexes in the list + href_text = r"([^<>]+)" + return [m.group(1) for m in re.finditer(href_text, data)] + + +def build_new_version(project_name: str, uv: Path, client: httpx.Client) -> Version: + """Build a source dist and a wheel with the project name and an unclaimed + version.""" if cwd.joinpath(project_name).exists(): rmtree(cwd.joinpath(project_name)) check_call([uv, "init", "--lib", project_name], cwd=cwd) @@ -128,122 +151,163 @@ def create_project(project_name: str, uv: Path, client: httpx.Client): toml = re.sub('version = ".*"', f'version = "{new_version}"', toml) pyproject_toml.write_text(toml) + # Build the project + check_call([uv, "build"], cwd=cwd.joinpath(project_name)) -def publish_project(target: str, uv: Path, client: httpx.Client): - project_name = all_targets[target] - - print(f"\nPublish {project_name} for {target}") + return new_version - # Create the project - create_project(project_name, uv, client) - # Build the project - check_call([uv, "build"], cwd=cwd.joinpath(project_name)) +def wait_for_index(index_url: str, project_name: str, version: Version, uv: Path): + """Check that the index URL was updated, wait up to 10s if necessary. - # Upload the project - if target == "pypi-token": - env = os.environ.copy() - env["UV_PUBLISH_TOKEN"] = os.environ["UV_TEST_PUBLISH_TOKEN"] - check_call( + Often enough the index takes a few seconds until the index is updated after an + upload. We need to specifically run this through uv since to query the same cache + (invalidation) as the registry client in skip existing in uv publish will later, + just `get_filenames` fails non-deterministically. + """ + for _ in range(10): + output = check_output( [ uv, - "publish", - "--publish-url", - "https://test.pypi.org/legacy/", + "pip", + "compile", + "--index", + index_url, + "--quiet", + "--generate-hashes", + "--no-header", + "--refresh-package", + project_name, + "-", ], - cwd=cwd.joinpath(project_name), - env=env, + text=True, + input=project_name, ) - elif target == "pypi-password-env": - env = os.environ.copy() - env["UV_PUBLISH_PASSWORD"] = os.environ["UV_TEST_PUBLISH_PASSWORD"] - check_call( - [ - uv, - "publish", - "--publish-url", - "https://test.pypi.org/legacy/", - "--username", - "__token__", - ], - cwd=cwd.joinpath(project_name), - env=env, + if f"{project_name}=={version}" in output and output.count("--hash") == 2: + break + + print( + f"uv pip compile not updated, missing 2 files for {version}: `{output.replace("\\\n ", "")}`, " + f"sleeping for 1s: `{index_url}`" ) - elif target == "pypi-keyring": - check_call( - [ - uv, - "publish", - "--publish-url", - "https://test.pypi.org/legacy/?astral-test-keyring", - "--username", - "__token__", - "--keyring-provider", - "subprocess", - ], - cwd=cwd.joinpath(project_name), + sleep(1) + + +def publish_project(target: str, uv: Path, client: httpx.Client): + """Test that: + + 1. An upload with a fresh version succeeds. + 2. If we're using PyPI, uploading the same files again succeeds. + 3. Check URL works and reports the files as skipped. + """ + project_name = all_targets[target][0] + + print(f"\nPublish {project_name} for {target}") + + # The distributions are build to the dist directory of the project. + version = build_new_version(project_name, uv, client) + + # Upload configuration + env, extra_args, publish_url = target_configuration(target, client) + index_url = all_targets[target][1] + env = {**os.environ, **env} + uv_cwd = cwd.joinpath(project_name) + expected_filenames = [path.name for path in uv_cwd.joinpath("dist").iterdir()] + # Ignore the gitignore file in dist + expected_filenames.remove(".gitignore") + + print( + f"\n=== 1. Publishing a new version: {project_name} {version} {publish_url} ===" + ) + args = [uv, "publish", "--publish-url", publish_url, *extra_args] + check_call(args, cwd=uv_cwd, env=env) + + if publish_url == TEST_PYPI_PUBLISH_URL: + # Confirm pypi behaviour: Uploading the same file again is fine. + print(f"\n=== 2. Publishing {project_name} {version} again (PyPI) ===") + wait_for_index(index_url, project_name, version, uv) + args = [uv, "publish", "-v", "--publish-url", publish_url, *extra_args] + output = run( + args, cwd=uv_cwd, env=env, text=True, check=True, stderr=PIPE + ).stderr + if ( + output.count("Uploading") != len(expected_filenames) + or output.count("already exists") != 0 + ): + raise RuntimeError( + f"PyPI re-upload of the same files failed: " + f"{output.count("Uploading")}, {output.count("already exists")}\n" + f"---\n{output}\n---" + ) + + print(f"\n=== 3. Publishing {project_name} {version} again with check URL ===") + wait_for_index(index_url, project_name, version, uv) + args = [ + uv, + "publish", + "-v", + "--publish-url", + publish_url, + "--check-url", + index_url, + *extra_args, + ] + output = run(args, cwd=uv_cwd, env=env, text=True, check=True, stderr=PIPE).stderr + + if output.count("Uploading") != 0 or output.count("already exists") != len( + expected_filenames + ): + raise RuntimeError( + f"Re-upload with check URL failed: " + f"{output.count("Uploading")}, {output.count("already exists")}\n" + f"---\n{output}\n---" ) + + +def target_configuration( + target: str, client: httpx.Client +) -> tuple[dict[str, str], list[str], str]: + if target == "pypi-token": + publish_url = TEST_PYPI_PUBLISH_URL + extra_args = [] + env = {"UV_PUBLISH_TOKEN": os.environ["UV_TEST_PUBLISH_TOKEN"]} + elif target == "pypi-password-env": + publish_url = TEST_PYPI_PUBLISH_URL + extra_args = ["--username", "__token__"] + env = {"UV_PUBLISH_PASSWORD": os.environ["UV_TEST_PUBLISH_PASSWORD"]} + elif target == "pypi-keyring": + publish_url = "https://test.pypi.org/legacy/?astral-test-keyring" + extra_args = ["--username", "__token__", "--keyring-provider", "subprocess"] + env = {} elif target == "pypi-trusted-publishing": - check_call( - [ - uv, - "publish", - "--publish-url", - "https://test.pypi.org/legacy/", - "--trusted-publishing", - "always", - ], - cwd=cwd.joinpath(project_name), - ) + publish_url = TEST_PYPI_PUBLISH_URL + extra_args = ["--trusted-publishing", "always"] + env = {} elif target == "gitlab": - env = os.environ.copy() - env["UV_PUBLISH_PASSWORD"] = os.environ["UV_TEST_PUBLISH_GITLAB_PAT"] - check_call( - [ - uv, - "publish", - "--publish-url", - "https://gitlab.com/api/v4/projects/61853105/packages/pypi", - "--username", - "astral-test-gitlab-pat", - ], - cwd=cwd.joinpath(project_name), - env=env, - ) + env = {"UV_PUBLISH_PASSWORD": os.environ["UV_TEST_PUBLISH_GITLAB_PAT"]} + publish_url = "https://gitlab.com/api/v4/projects/61853105/packages/pypi" + extra_args = ["--username", "astral-test-gitlab-pat"] elif target == "codeberg": - env = os.environ.copy() - env["UV_PUBLISH_USERNAME"] = "astral-test-user" - env["UV_PUBLISH_PASSWORD"] = os.environ["UV_TEST_PUBLISH_CODEBERG_TOKEN"] - check_call( - [ - uv, - "publish", - "--publish-url", - "https://codeberg.org/api/packages/astral-test-user/pypi", - ], - cwd=cwd.joinpath(project_name), - env=env, - ) + publish_url = "https://codeberg.org/api/packages/astral-test-user/pypi" + extra_args = [] + env = { + "UV_PUBLISH_USERNAME": "astral-test-user", + "UV_PUBLISH_PASSWORD": os.environ["UV_TEST_PUBLISH_CODEBERG_TOKEN"], + } elif target == "cloudsmith": - env = os.environ.copy() - env["UV_PUBLISH_TOKEN"] = os.environ["UV_TEST_PUBLISH_CLOUDSMITH_TOKEN"] - check_call( - [ - uv, - "publish", - "--publish-url", - "https://python.cloudsmith.io/astral-test/astral-test-1/", - ], - cwd=cwd.joinpath(project_name), - env=env, - ) + publish_url = "https://python.cloudsmith.io/astral-test/astral-test-1/" + extra_args = [] + env = { + "UV_PUBLISH_TOKEN": os.environ["UV_TEST_PUBLISH_CLOUDSMITH_TOKEN"], + } else: raise ValueError(f"Unknown target: {target}") + return env, extra_args, publish_url def main(): parser = ArgumentParser() - target_choices = list(all_targets) + ["local", "all"] + target_choices = [*all_targets, "local", "all"] parser.add_argument("targets", choices=target_choices, nargs="+") parser.add_argument("--uv") args = parser.parse_args()