Skip to content

Commit

Permalink
Fix ADLSLocation file parsing (apache#11395)
Browse files Browse the repository at this point in the history
* Azure: Fix ADLSLocation file parsing

* Azure: Remove invalid test cases from ADLSLocationTest

* Update Javadocs with reference to ADLS URI
  • Loading branch information
mrcnc authored Nov 5, 2024
1 parent ad24d4b commit 9be7f00
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;

/**
* This class represents a fully qualified location in Azure expressed as a URI.
* This class represents a fully qualified location to a file or directory in Azure Data Lake
* Storage Gen2 storage.
*
* <p>Locations follow the conventions used by Hadoop's Azure support, i.e.
* <p>Locations follow a URI like structure to identify resources
*
* <pre>{@code abfs[s]://[<container>@]<storage account host>/<file path>}</pre>
*
* <p>See <a href="https://hadoop.apache.org/docs/stable/hadoop-azure/abfs.html">Hadoop Azure
* Support</a>
* <p>See <a
* href="https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri#uri-syntax">Azure
* Data Lake Storage URI</a>
*/
class ADLSLocation {
private static final Pattern URI_PATTERN = Pattern.compile("^abfss?://([^/?#]+)(.*)?$");
Expand Down Expand Up @@ -64,8 +66,7 @@ class ADLSLocation {
}

String uriPath = matcher.group(2);
uriPath = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath;
this.path = uriPath.split("\\?", -1)[0].split("#", -1)[0];
this.path = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath;
}

/** Returns Azure storage account. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,23 +82,11 @@ public void testNoPath() {
assertThat(location.path()).isEqualTo("");
}

@Test
public void testQueryAndFragment() {
String p1 = "abfs://[email protected]/path/to/file?query=foo#123";
ADLSLocation location = new ADLSLocation(p1);

assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
assertThat(location.container().get()).isEqualTo("container");
assertThat(location.path()).isEqualTo("path/to/file");
}

@Test
public void testQueryAndFragmentNoPath() {
String p1 = "abfs://[email protected]?query=foo#123";
ADLSLocation location = new ADLSLocation(p1);

assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
assertThat(location.container().get()).isEqualTo("container");
assertThat(location.path()).isEqualTo("");
@ParameterizedTest
@ValueSource(strings = {"file?.txt", "file%3F.txt"})
public void testQuestionMarkInFileName(String path) {
String fullPath = String.format("abfs://[email protected]/%s", path);
ADLSLocation location = new ADLSLocation(fullPath);
assertThat(location.path()).contains(path);
}
}

0 comments on commit 9be7f00

Please sign in to comment.