diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b4aaf395..f822e484 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,27 +96,28 @@ jobs: - name: Test run: cargo check ${{ matrix.args }} - build-benchmarks: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: "recursive" - # We use nightly for now so that we can pass RUSTFLAGS below to work around - # https://github.com/geoarrow/geoarrow-rs/issues/716 - - uses: dtolnay/rust-toolchain@nightly - - uses: Swatinem/rust-cache@v2 - - uses: prefix-dev/setup-pixi@v0.8.1 - with: - activate-environment: true - cache: true - cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - manifest-path: build/pixi.toml - - name: Tweak environment to find GDAL - run: | - echo "PKG_CONFIG_PATH=$(pwd)/build/.pixi/envs/default/lib/pkgconfig" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=$(pwd)/build/.pixi/envs/default/lib" >> "$GITHUB_ENV" - - name: Build benchmarks with no features - run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run - - name: Build benchmarks with all features - run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run --all-features + # We don't build benchmarks on CI because they're quite slow to compile + # build-benchmarks: + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v4 + # with: + # submodules: "recursive" + # # We use nightly for now so that we can pass RUSTFLAGS below to work around + # # https://github.com/geoarrow/geoarrow-rs/issues/716 + # - uses: dtolnay/rust-toolchain@nightly + # - uses: Swatinem/rust-cache@v2 + # - uses: prefix-dev/setup-pixi@v0.8.1 + # with: + # activate-environment: true + # cache: true + # cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} + # manifest-path: build/pixi.toml + # - name: Tweak environment to find GDAL + # run: | + # echo "PKG_CONFIG_PATH=$(pwd)/build/.pixi/envs/default/lib/pkgconfig" >> "$GITHUB_ENV" + # echo "LD_LIBRARY_PATH=$(pwd)/build/.pixi/envs/default/lib" >> "$GITHUB_ENV" + # - name: Build benchmarks with no features + # run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run + # - name: Build benchmarks with all features + # run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run --all-features diff --git a/Cargo.lock b/Cargo.lock index e63c7c98..6b98331e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -955,10 +955,8 @@ dependencies = [ [[package]] name = "datafusion" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-ipc", @@ -977,6 +975,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -987,17 +986,11 @@ dependencies = [ "flate2", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", "itertools 0.13.0", "log", - "num_cpus", "object_store", "parking_lot", "parquet", - "paste", - "pin-project-lite", "rand", "sqlparser", "tempfile", @@ -1012,8 +1005,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow-schema", "async-trait", @@ -1027,51 +1019,50 @@ dependencies = [ [[package]] name = "datafusion-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", "arrow-schema", - "chrono", "half", "hashbrown 0.14.5", "indexmap", - "instant", "libc", - "num_cpus", "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "43.0.0" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" + [[package]] name = "datafusion-execution" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store", "parking_lot", @@ -1083,44 +1074,37 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ - "ahash", "arrow", - "arrow-array", - "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", - "strum", - "strum_macros", ] [[package]] name = "datafusion-expr-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "datafusion-common", "itertools 0.13.0", - "paste", ] [[package]] name = "datafusion-functions" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "arrow-buffer", @@ -1129,8 +1113,10 @@ dependencies = [ "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-macros", "hashbrown 0.14.5", "hex", "itertools 0.13.0", @@ -1146,20 +1132,20 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", - "indexmap", "log", "paste", ] @@ -1167,22 +1153,19 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", ] [[package]] name = "datafusion-functions-nested" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "arrow-array", @@ -1198,18 +1181,33 @@ dependencies = [ "itertools 0.13.0", "log", "paste", - "rand", +] + +[[package]] +name = "datafusion-functions-table" +version = "43.0.0" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", ] [[package]] name = "datafusion-functions-window" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "datafusion-common", + "datafusion-doc", "datafusion-expr", "datafusion-functions-window-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "log", @@ -1219,47 +1217,49 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", ] +[[package]] +name = "datafusion-macros" +version = "43.0.0" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +dependencies = [ + "quote", + "syn 2.0.79", +] + [[package]] name = "datafusion-optimizer" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", - "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", "itertools 0.13.0", "log", - "paste", + "recursive", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1277,38 +1277,36 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-optimizer" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools 0.13.0", + "log", + "recursive", ] [[package]] name = "datafusion-physical-plan" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", @@ -1322,7 +1320,6 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1332,18 +1329,15 @@ dependencies = [ "indexmap", "itertools 0.13.0", "log", - "once_cell", "parking_lot", "pin-project-lite", - "rand", "tokio", ] [[package]] name = "datafusion-sql" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "arrow-array", @@ -1352,9 +1346,9 @@ dependencies = [ "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", - "strum", ] [[package]] @@ -2310,18 +2304,6 @@ dependencies = [ "hashbrown 0.15.2", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -2807,16 +2789,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.9", - "libc", -] - [[package]] name = "num_enum" version = "0.7.3" @@ -3246,6 +3218,15 @@ dependencies = [ "tar", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "quick-xml" version = "0.36.2" @@ -3372,6 +3353,26 @@ dependencies = [ "log", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.79", +] + [[package]] name = "redox_syscall" version = "0.5.7" @@ -3916,9 +3917,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.51.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +checksum = "9a875d8cd437cc8a97e9aeaeea352ec9a19aea99c23e9effb17757291de80b08" dependencies = [ "log", "sqlparser_derive", @@ -4142,6 +4143,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -4164,9 +4178,6 @@ name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros", -] [[package]] name = "strum_macros" @@ -4706,6 +4717,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.25.4" diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index dcbcfc1e..943b02b9 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -12,7 +12,7 @@ rust-version = "1.82" [dependencies] -datafusion = "43" +datafusion = { git = "https://github.com/apache/datafusion", rev = "03e39da62e403e064d21b57e9d6c200464c03749" } arrow = { version = "53.3", features = ["ffi"] } arrow-array = { version = "53.3", features = ["chrono-tz"] } arrow-buffer = "53.3" diff --git a/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs b/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs index 531c25d2..36ebf90c 100644 --- a/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs +++ b/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs @@ -52,12 +52,13 @@ impl ScalarUDFImpl for CoordDim { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Return the coordinate dimension of the ST_Geometry value.") - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Return the coordinate dimension of the ST_Geometry value.", + "ST_CoordDim(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/accessors/envelope.rs b/rust/geodatafusion/src/udf/native/accessors/envelope.rs index 29b7438b..13aef106 100644 --- a/rust/geodatafusion/src/udf/native/accessors/envelope.rs +++ b/rust/geodatafusion/src/udf/native/accessors/envelope.rs @@ -48,14 +48,13 @@ impl ScalarUDFImpl for Envelope { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Computes a point which is the geometric center of mass of a geometry.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Computes a point which is the geometric center of mass of a geometry.", + "ST_Envelope(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/accessors/line_string.rs b/rust/geodatafusion/src/udf/native/accessors/line_string.rs index 7375fbbb..a576758d 100644 --- a/rust/geodatafusion/src/udf/native/accessors/line_string.rs +++ b/rust/geodatafusion/src/udf/native/accessors/line_string.rs @@ -55,14 +55,9 @@ impl ScalarUDFImpl for StartPoint { fn documentation(&self) -> Option<&Documentation> { Some(START_POINT_DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Returns the first point of a LINESTRING geometry as a POINT. Returns NULL if the input is not a LINESTRING", - ) + Documentation::builder(DOC_SECTION_OTHER, "Returns the first point of a LINESTRING geometry as a POINT. Returns NULL if the input is not a LINESTRING", "ST_StartPoint(line_string)" ) .with_argument("g1", "geometry") .build() - .unwrap() })) } } diff --git a/rust/geodatafusion/src/udf/native/bounding_box/box.rs b/rust/geodatafusion/src/udf/native/bounding_box/box.rs index c5d2c6ae..9900fa6d 100644 --- a/rust/geodatafusion/src/udf/native/bounding_box/box.rs +++ b/rust/geodatafusion/src/udf/native/bounding_box/box.rs @@ -48,12 +48,13 @@ impl ScalarUDFImpl for Box2D { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns a box2d representing the 2D extent of the geometry.") - .with_argument("geom", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns a box2d representing the 2D extent of the geometry.", + "ST_Box2D(geometry)", + ) + .with_argument("geom", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs b/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs index 62dbb361..6e13f037 100644 --- a/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs +++ b/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs @@ -70,19 +70,19 @@ impl ScalarUDFImpl for XMin { fn documentation(&self) -> Option<&Documentation> { Some(XMIN_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns X minima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_XMin(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns X minima of a bounding box 2d or 3d or a geometry", + "ST_XMin(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } @@ -138,19 +138,19 @@ impl ScalarUDFImpl for YMin { fn documentation(&self) -> Option<&Documentation> { Some(YMIN_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns Y minima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_YMin(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns Y minima of a bounding box 2d or 3d or a geometry", + "ST_YMin(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } @@ -204,19 +204,19 @@ impl ScalarUDFImpl for XMax { fn documentation(&self) -> Option<&Documentation> { Some(XMAX_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns X maxima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_XMax(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns X maxima of a bounding box 2d or 3d or a geometry", + "ST_XMax(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } @@ -270,19 +270,19 @@ impl ScalarUDFImpl for YMax { fn documentation(&self) -> Option<&Documentation> { Some(YMAX_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns Y maxima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_YMax(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns Y maxima of a bounding box 2d or 3d or a geometry", + "ST_YMax(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/constructors/point.rs b/rust/geodatafusion/src/udf/native/constructors/point.rs index c4e9b560..54d8f9c6 100644 --- a/rust/geodatafusion/src/udf/native/constructors/point.rs +++ b/rust/geodatafusion/src/udf/native/constructors/point.rs @@ -79,16 +79,16 @@ impl ScalarUDFImpl for Point { fn documentation(&self) -> Option<&Documentation> { Some(POINT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns a Point with the given X and Y coordinate values.") - .with_syntax_example("ST_Point(-71.104, 42.315)") - .with_argument("x", "x value") - .with_argument("y", "y value") - .with_related_udf("st_makepoint") - .with_related_udf("st_pointz") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns a Point with the given X and Y coordinate values.", + "ST_Point(-71.104, 42.315)", + ) + .with_argument("x", "x value") + .with_argument("y", "y value") + .with_related_udf("st_makepoint") + .with_related_udf("st_pointz") + .build() })) } } @@ -218,17 +218,17 @@ impl ScalarUDFImpl for MakePoint { fn documentation(&self) -> Option<&Documentation> { Some(MAKE_POINT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Creates a 2D XY or 3D XYZ Point geometry.") - .with_syntax_example("ST_MakePoint(-71.104, 42.315)") - .with_argument("x", "x value") - .with_argument("y", "y value") - .with_argument("z", "z value") - .with_related_udf("st_point") - .with_related_udf("st_pointz") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Creates a 2D XY or 3D XYZ Point geometry.", + "ST_MakePoint(-71.104, 42.315)", + ) + .with_argument("x", "x value") + .with_argument("y", "y value") + .with_argument("z", "z value") + .with_related_udf("st_point") + .with_related_udf("st_pointz") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/io/mod.rs b/rust/geodatafusion/src/udf/native/io/mod.rs index 341beca7..bbe293c8 100644 --- a/rust/geodatafusion/src/udf/native/io/mod.rs +++ b/rust/geodatafusion/src/udf/native/io/mod.rs @@ -1,5 +1,6 @@ //! Geometry Input and Output +mod union_example; mod wkb; mod wkt; diff --git a/rust/geodatafusion/src/udf/native/io/union_example.rs b/rust/geodatafusion/src/udf/native/io/union_example.rs new file mode 100644 index 00000000..982edc63 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/io/union_example.rs @@ -0,0 +1,95 @@ +use std::any::Any; +use std::sync::Arc; + +use arrow::array::UnionBuilder; +use arrow::datatypes::{Float64Type, Int32Type}; +use arrow_array::Array; +use arrow_schema::{DataType, Field, UnionFields, UnionMode}; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; + +#[derive(Debug)] +pub struct UnionExample { + signature: Signature, +} + +impl UnionExample { + #[allow(dead_code)] + pub fn new() -> Self { + Self { + signature: Signature::nullary(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for UnionExample { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "example_union" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + let fields = UnionFields::new( + vec![0, 1], + vec![ + Arc::new(Field::new("a", DataType::Int32, false)), + Arc::new(Field::new("b", DataType::Float64, false)), + ], + ); + Ok(DataType::Union(fields, UnionMode::Dense)) + } + + fn invoke_no_args(&self, _number_rows: usize) -> datafusion::error::Result { + let mut builder = UnionBuilder::new_dense(); + builder.append::("a", 1).unwrap(); + builder.append::("b", 3.0).unwrap(); + builder.append::("a", 4).unwrap(); + let arr = builder.build().unwrap(); + + assert_eq!(arr.type_id(0), 0); + assert_eq!(arr.type_id(1), 1); + assert_eq!(arr.type_id(2), 0); + + assert_eq!(arr.value_offset(0), 0); + assert_eq!(arr.value_offset(1), 0); + assert_eq!(arr.value_offset(2), 1); + + let arr = arr.slice(0, 1); + + assert!(matches!( + arr.data_type(), + DataType::Union(_, UnionMode::Dense) + )); + + Ok(ColumnarValue::Array(Arc::new(arr))) + } + + fn documentation(&self) -> Option<&Documentation> { + None + } +} + +#[cfg(test)] +mod test { + use super::*; + use datafusion::prelude::*; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + ctx.register_udf(UnionExample::new().into()); + + let out = ctx.sql("SELECT example_union();").await.unwrap(); + // TODO: fix this error upstream + // https://github.com/apache/datafusion/issues/13762 + out.show().await.unwrap_err(); + } +} diff --git a/rust/geodatafusion/src/udf/native/io/wkb.rs b/rust/geodatafusion/src/udf/native/io/wkb.rs index 7bda68fa..ca9e1b2f 100644 --- a/rust/geodatafusion/src/udf/native/io/wkb.rs +++ b/rust/geodatafusion/src/udf/native/io/wkb.rs @@ -54,14 +54,13 @@ impl ScalarUDFImpl for AsBinary { fn documentation(&self) -> Option<&Documentation> { Some(AS_BINARY_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Returns the OGC/ISO Well-Known Binary (WKB) representation of the geometry.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the OGC/ISO Well-Known Binary (WKB) representation of the geometry.", + "ST_AsBinary(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } @@ -84,7 +83,7 @@ pub(super) struct GeomFromWKB { impl GeomFromWKB { pub fn new() -> Self { Self { - signature: Signature::coercible(vec![DataType::Binary], Volatility::Immutable), + signature: Signature::exact(vec![DataType::Binary], Volatility::Immutable), } } } @@ -114,14 +113,9 @@ impl ScalarUDFImpl for GeomFromWKB { fn documentation(&self) -> Option<&Documentation> { Some(GEOM_FROM_WKB_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Takes a well-known binary representation of a geometry and a Spatial Reference System ID (SRID) and creates an instance of the appropriate geometry type", - ) + Documentation::builder(DOC_SECTION_OTHER, "Takes a well-known binary representation of a geometry and a Spatial Reference System ID (SRID) and creates an instance of the appropriate geometry type", "ST_GeomFromWKB(buffer)") .with_argument("geom", "WKB buffers") .build() - .unwrap() })) } } diff --git a/rust/geodatafusion/src/udf/native/io/wkt.rs b/rust/geodatafusion/src/udf/native/io/wkt.rs index 1d46fb85..872afacd 100644 --- a/rust/geodatafusion/src/udf/native/io/wkt.rs +++ b/rust/geodatafusion/src/udf/native/io/wkt.rs @@ -2,6 +2,7 @@ use std::any::Any; use std::sync::OnceLock; use arrow::array::AsArray; +use arrow_array::Array; use arrow_schema::DataType; use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; use datafusion::logical_expr::{ @@ -53,14 +54,13 @@ impl ScalarUDFImpl for AsText { fn documentation(&self) -> Option<&Documentation> { Some(AS_TEXT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Returns the OGC Well-Known Text (WKT) representation of the geometry/geography.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the OGC Well-Known Text (WKT) representation of the geometry/geography.", + "ST_AsText(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } @@ -82,9 +82,8 @@ pub(super) struct GeomFromText { impl GeomFromText { pub fn new() -> Self { - // TODO: extend to allow specifying little/big endian Self { - signature: Signature::coercible(vec![DataType::Utf8], Volatility::Immutable), + signature: Signature::exact(vec![DataType::Utf8], Volatility::Immutable), } } } @@ -97,7 +96,7 @@ impl ScalarUDFImpl for GeomFromText { } fn name(&self) -> &str { - "st_astext" + "st_geomfromtext" } fn signature(&self) -> &Signature { @@ -114,14 +113,13 @@ impl ScalarUDFImpl for GeomFromText { fn documentation(&self) -> Option<&Documentation> { Some(GEOM_FROM_TEXT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Constructs a geometry object from the OGC Well-Known text representation.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Constructs a geometry object from the OGC Well-Known text representation.", + "ST_GeomFromText(text)", + ) + .with_argument("g1", "geometry") + .build() })) } } @@ -133,5 +131,30 @@ fn geom_from_text_impl(args: &[ColumnarValue]) -> GeoDataFusionResult().clone(), Default::default()); let native_arr = read_wkt(&wkt_arr, CoordType::Separated, false)?; - Ok(native_arr.to_array_ref().into()) + dbg!("native_arr"); + + let arrow_arr = native_arr.to_array_ref(); + if let DataType::Union(_fields, mode) = arrow_arr.data_type() { + dbg!(mode); + } + + Ok(arrow_arr.into()) +} + +#[cfg(test)] +mod test { + use datafusion::prelude::*; + + use crate::udf::native::register_native; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx.sql("SELECT ST_GeomFromText('LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)');").await.unwrap(); + // TODO: fix this error upstream + // https://github.com/apache/datafusion/issues/13762 + out.show().await.unwrap_err(); + } } diff --git a/rust/geodatafusion/src/udf/native/measurement/area.rs b/rust/geodatafusion/src/udf/native/measurement/area.rs index 0738105c..8480e88c 100644 --- a/rust/geodatafusion/src/udf/native/measurement/area.rs +++ b/rust/geodatafusion/src/udf/native/measurement/area.rs @@ -47,12 +47,13 @@ impl ScalarUDFImpl for Area { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns the area of a polygonal geometry.") - .with_argument("geom", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the area of a polygonal geometry.", + "ST_Area(geom)", + ) + .with_argument("geom", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/mod.rs b/rust/geodatafusion/src/udf/native/mod.rs index 15ac8b46..604a1687 100644 --- a/rust/geodatafusion/src/udf/native/mod.rs +++ b/rust/geodatafusion/src/udf/native/mod.rs @@ -9,8 +9,8 @@ mod processing; use datafusion::prelude::SessionContext; -/// Register all provided [geo] functions -pub fn register_geo(ctx: &SessionContext) { +/// Register all provided native-Rust functions +pub fn register_native(ctx: &SessionContext) { accessors::register_udfs(ctx); bounding_box::register_udfs(ctx); constructors::register_udfs(ctx); diff --git a/rust/geodatafusion/src/udf/native/processing/centroid.rs b/rust/geodatafusion/src/udf/native/processing/centroid.rs index f5136bd3..91760fd8 100644 --- a/rust/geodatafusion/src/udf/native/processing/centroid.rs +++ b/rust/geodatafusion/src/udf/native/processing/centroid.rs @@ -49,14 +49,13 @@ impl ScalarUDFImpl for Centroid { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Computes a point which is the geometric center of mass of a geometry.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Computes a point which is the geometric center of mass of a geometry.", + "ST_Centroid(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/processing/chaikin_smoothing.rs b/rust/geodatafusion/src/udf/native/processing/chaikin_smoothing.rs new file mode 100644 index 00000000..242f1383 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/chaikin_smoothing.rs @@ -0,0 +1,77 @@ +// use std::any::Any; +// use std::sync::OnceLock; + +// use arrow_schema::DataType; +// use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +// use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature}; +// use geoarrow::algorithm::geo::ChaikinSmoothing as _; +// use geoarrow::array::{CoordType, GeometryArray}; +// use geoarrow::ArrayBase; + +// use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, GEOMETRY_TYPE}; +// use crate::error::GeoDataFusionResult; + +// #[derive(Debug)] +// pub(super) struct ChaikinSmoothing { +// signature: Signature, +// } + +// impl ChaikinSmoothing { +// pub fn new() -> Self { +// // TypeSignature:: +// Signature::co(vec![GEOMETRY_TYPE.into(), ], volatility) +// Self { +// signature: any_single_geometry_type_input(), +// } +// } +// } + +// static DOCUMENTATION: OnceLock = OnceLock::new(); + +// impl ScalarUDFImpl for ChaikinSmoothing { +// fn as_any(&self) -> &dyn Any { +// self +// } + +// fn name(&self) -> &str { +// "st_convexhull" +// } + +// fn signature(&self) -> &Signature { +// &self.signature +// } + +// fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { +// Ok(GEOMETRY_TYPE.into()) +// } + +// fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { +// Ok(chaikin_impl(args)?) +// } + +// fn documentation(&self) -> Option<&Documentation> { +// Some(DOCUMENTATION.get_or_init(|| { +// Documentation::builder() +// .with_doc_section(DOC_SECTION_OTHER) +// .with_description( +// "Smoothes a linear or polygonal geometry using Chaikin's algorithm.", +// ) +// .with_argument("g1", "geometry") +// .build() +// .unwrap() +// })) +// } +// } + +// fn chaikin_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { +// let array = ColumnarValue::values_to_arrays(args)? +// .into_iter() +// .next() +// .unwrap(); +// let native_array = parse_to_native_array(array)?; +// let output = native_array +// .as_ref() +// .convex_hull()? +// .into_coord_type(CoordType::Separated); +// Ok(GeometryArray::from(output).into_array_ref().into()) +// } diff --git a/rust/geodatafusion/src/udf/native/processing/convex_hull.rs b/rust/geodatafusion/src/udf/native/processing/convex_hull.rs index 4b66b4c0..f3860f7d 100644 --- a/rust/geodatafusion/src/udf/native/processing/convex_hull.rs +++ b/rust/geodatafusion/src/udf/native/processing/convex_hull.rs @@ -49,14 +49,9 @@ impl ScalarUDFImpl for ConvexHull { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Computes the convex hull of a geometry. The convex hull is the smallest convex geometry that encloses all geometries in the input.", - ) + Documentation::builder(DOC_SECTION_OTHER, "Computes the convex hull of a geometry. The convex hull is the smallest convex geometry that encloses all geometries in the input.", "ST_ConvexHull(geometry)") .with_argument("g1", "geometry") .build() - .unwrap() })) } } diff --git a/rust/geodatafusion/src/udf/native/processing/mod.rs b/rust/geodatafusion/src/udf/native/processing/mod.rs index e6bbe9cb..84191069 100644 --- a/rust/geodatafusion/src/udf/native/processing/mod.rs +++ b/rust/geodatafusion/src/udf/native/processing/mod.rs @@ -1,4 +1,5 @@ mod centroid; +mod chaikin_smoothing; mod convex_hull; use datafusion::prelude::SessionContext;