diff --git a/python/Makefile b/python/Makefile index 444a3785bcc5..932c90dd775b 100644 --- a/python/Makefile +++ b/python/Makefile @@ -17,7 +17,7 @@ install: pip install poetry - poetry install -E pyarrow -E hive -E s3fs -E glue -E adlfs -E duckdb -E ray + poetry install -E pyarrow -E hive -E s3fs -E glue -E adlfs -E duckdb -E ray -E sql-postgres check-license: ./dev/check-license diff --git a/python/mkdocs/docs/configuration.md b/python/mkdocs/docs/configuration.md index f461f249c1b8..f6b24745c1f6 100644 --- a/python/mkdocs/docs/configuration.md +++ b/python/mkdocs/docs/configuration.md @@ -24,7 +24,7 @@ hide: # Catalogs -PyIceberg currently has native support for REST, Hive and Glue. +PyIceberg currently has native support for REST, SQL, Hive, Glue and DynamoDB. There are three ways to pass in configuration: @@ -107,6 +107,18 @@ catalog: | rest.signing-region | us-east-1 | The region to use when SigV4 signing a request | | rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | +## SQL Catalog + +The SQL catalog requires a database for its backend. As of now, pyiceberg only supports PostgreSQL through psycopg2. +The database connection has to be configured using the `uri` property (see SQLAlchemy's [documentation for URL format](https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls)): + +```yaml +catalog: + default: + type: sql + uri: postgresql+psycopg2://username:password@localhost/mydatabase +``` + ## Hive Catalog ```yaml diff --git a/python/poetry.lock b/python/poetry.lock index f8fab0c39053..cd7a34d9485d 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "adlfs" version = "2023.4.0" description = "Access Azure Datalake Gen1 with fsspec and dask" +category = "main" optional = true python-versions = ">=3.8" files = [ @@ -26,6 +27,7 @@ docs = ["furo", "myst-parser", "numpydoc", "sphinx"] name = "aiobotocore" version = "2.5.1" description = "Async client for aws services using botocore and aiohttp" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -47,6 +49,7 @@ boto3 = ["boto3 (>=1.26.161,<1.26.162)"] name = "aiohttp" version = "3.8.4" description = "Async http client/server framework (asyncio)" +category = "main" optional = true python-versions = ">=3.6" files = [ @@ -155,6 +158,7 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aioitertools" version = "0.11.0" description = "itertools and builtins for AsyncIO and mixed iterables" +category = "main" optional = true python-versions = ">=3.6" files = [ @@ -169,6 +173,7 @@ typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -183,6 +188,7 @@ frozenlist = ">=1.1.0" name = "async-timeout" version = "4.0.2" description = "Timeout context manager for asyncio programs" +category = "main" optional = true python-versions = ">=3.6" files = [ @@ -194,6 +200,7 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -212,6 +219,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "azure-core" version = "1.27.1" description = "Microsoft Azure Core Library for Python" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -231,6 +239,7 @@ aio = ["aiohttp (>=3.0)"] name = "azure-datalake-store" version = "0.0.53" description = "Azure Data Lake Store Filesystem Client Library for Python" +category = "main" optional = true python-versions = "*" files = [ @@ -247,6 +256,7 @@ requests = ">=2.20.0" name = "azure-identity" version = "1.13.0" description = "Microsoft Azure Identity Library for Python" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -265,6 +275,7 @@ six = ">=1.12.0" name = "azure-storage-blob" version = "12.16.0" description = "Microsoft Azure Blob Storage Client Library for Python" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -285,6 +296,7 @@ aio = ["azure-core[aio] (>=1.26.0,<2.0.0)"] name = "boto3" version = "1.26.161" description = "The AWS SDK for Python" +category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -304,6 +316,7 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "botocore" version = "1.29.161" description = "Low-level, data-driven core of boto 3." +category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -323,6 +336,7 @@ crt = ["awscrt (==0.16.9)"] name = "build" version = "0.10.0" description = "A simple, correct Python build frontend" +category = "dev" optional = false python-versions = ">= 3.7" files = [ @@ -346,6 +360,7 @@ virtualenv = ["virtualenv (>=20.0.35)"] name = "certifi" version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -357,6 +372,7 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = "*" files = [ @@ -433,6 +449,7 @@ pycparser = "*" name = "cfgv" version = "3.3.1" description = "Validate configuration and produce human readable error messages." +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -444,6 +461,7 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -528,6 +546,7 @@ files = [ name = "click" version = "8.1.3" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -542,6 +561,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -553,6 +573,7 @@ files = [ name = "coverage" version = "7.2.7" description = "Code coverage measurement for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -628,6 +649,7 @@ toml = ["tomli"] name = "cryptography" version = "41.0.1" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -669,6 +691,7 @@ test-randomorder = ["pytest-randomly"] name = "distlib" version = "0.3.6" description = "Distribution utilities" +category = "dev" optional = false python-versions = "*" files = [ @@ -680,6 +703,7 @@ files = [ name = "docutils" version = "0.20.1" description = "Docutils -- Python Documentation Utilities" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -691,6 +715,7 @@ files = [ name = "duckdb" version = "0.8.1" description = "DuckDB embedded database" +category = "main" optional = true python-versions = "*" files = [ @@ -752,6 +777,7 @@ files = [ name = "exceptiongroup" version = "1.1.2" description = "Backport of PEP 654 (exception groups)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -766,6 +792,7 @@ test = ["pytest (>=6)"] name = "fastavro" version = "1.7.4" description = "Fast read/write of AVRO files" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -812,6 +839,7 @@ zstandard = ["zstandard"] name = "filelock" version = "3.12.2" description = "A platform independent file lock." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -827,6 +855,7 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "p name = "frozenlist" version = "1.3.3" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -910,6 +939,7 @@ files = [ name = "fsspec" version = "2023.6.0" description = "File-system specification" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -941,10 +971,85 @@ smb = ["smbprotocol"] ssh = ["paramiko"] tqdm = ["tqdm"] +[[package]] +name = "greenlet" +version = "2.0.2" +description = "Lightweight in-process concurrent programming" +category = "main" +optional = true +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" +files = [ + {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, + {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, + {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, + {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, + {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d75209eed723105f9596807495d58d10b3470fa6732dd6756595e89925ce2470"}, + {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a51c9751078733d88e013587b108f1b7a1fb106d402fb390740f002b6f6551a"}, + {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, + {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, + {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:eff4eb9b7eb3e4d0cae3d28c283dc16d9bed6b193c2e1ace3ed86ce48ea8df19"}, + {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5454276c07d27a740c5892f4907c86327b632127dd9abec42ee62e12427ff7e3"}, + {file = "greenlet-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:7cafd1208fdbe93b67c7086876f061f660cfddc44f404279c1585bbf3cdc64c5"}, + {file = "greenlet-2.0.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:910841381caba4f744a44bf81bfd573c94e10b3045ee00de0cbf436fe50673a6"}, + {file = "greenlet-2.0.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:18a7f18b82b52ee85322d7a7874e676f34ab319b9f8cce5de06067384aa8ff43"}, + {file = "greenlet-2.0.2-cp35-cp35m-win32.whl", hash = "sha256:03a8f4f3430c3b3ff8d10a2a86028c660355ab637cee9333d63d66b56f09d52a"}, + {file = "greenlet-2.0.2-cp35-cp35m-win_amd64.whl", hash = "sha256:4b58adb399c4d61d912c4c331984d60eb66565175cdf4a34792cd9600f21b394"}, + {file = "greenlet-2.0.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:703f18f3fda276b9a916f0934d2fb6d989bf0b4fb5a64825260eb9bfd52d78f0"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:32e5b64b148966d9cccc2c8d35a671409e45f195864560829f395a54226408d3"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dd11f291565a81d71dab10b7033395b7a3a5456e637cf997a6f33ebdf06f8db"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0f72c9ddb8cd28532185f54cc1453f2c16fb417a08b53a855c4e6a418edd099"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd021c754b162c0fb55ad5d6b9d960db667faad0fa2ff25bb6e1301b0b6e6a75"}, + {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:3c9b12575734155d0c09d6c3e10dbd81665d5c18e1a7c6597df72fd05990c8cf"}, + {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b9ec052b06a0524f0e35bd8790686a1da006bd911dd1ef7d50b77bfbad74e292"}, + {file = "greenlet-2.0.2-cp36-cp36m-win32.whl", hash = "sha256:dbfcfc0218093a19c252ca8eb9aee3d29cfdcb586df21049b9d777fd32c14fd9"}, + {file = "greenlet-2.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:9f35ec95538f50292f6d8f2c9c9f8a3c6540bbfec21c9e5b4b751e0a7c20864f"}, + {file = "greenlet-2.0.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:d5508f0b173e6aa47273bdc0a0b5ba055b59662ba7c7ee5119528f466585526b"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:f82d4d717d8ef19188687aa32b8363e96062911e63ba22a0cff7802a8e58e5f1"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c59a2120b55788e800d82dfa99b9e156ff8f2227f07c5e3012a45a399620b7"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2780572ec463d44c1d3ae850239508dbeb9fed38e294c68d19a24d925d9223ca"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:937e9020b514ceedb9c830c55d5c9872abc90f4b5862f89c0887033ae33c6f73"}, + {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:36abbf031e1c0f79dd5d596bfaf8e921c41df2bdf54ee1eed921ce1f52999a86"}, + {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:18e98fb3de7dba1c0a852731c3070cf022d14f0d68b4c87a19cc1016f3bb8b33"}, + {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, + {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acd2162a36d3de67ee896c43effcd5ee3de247eb00354db411feb025aa319857"}, + {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0bf60faf0bc2468089bdc5edd10555bab6e85152191df713e2ab1fcc86382b5a"}, + {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, + {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, + {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be4ed120b52ae4d974aa40215fcdfde9194d63541c7ded40ee12eb4dda57b76b"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94c817e84245513926588caf1152e3b559ff794d505555211ca041f032abbb6b"}, + {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1a819eef4b0e0b96bb0d98d797bef17dc1b4a10e8d7446be32d1da33e095dbb8"}, + {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7efde645ca1cc441d6dc4b48c0f7101e8d86b54c8530141b09fd31cef5149ec9"}, + {file = "greenlet-2.0.2-cp39-cp39-win32.whl", hash = "sha256:ea9872c80c132f4663822dd2a08d404073a5a9b5ba6155bea72fb2a79d1093b5"}, + {file = "greenlet-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:db1a39669102a1d8d12b57de2bb7e2ec9066a6f2b3da35ae511ff93b01b5d564"}, + {file = "greenlet-2.0.2.tar.gz", hash = "sha256:e7c8dc13af7db097bed64a051d2dd49e9f0af495c26995c00a9ee842690d34c0"}, +] + +[package.extras] +docs = ["Sphinx", "docutils (<0.18)"] +test = ["objgraph", "psutil"] + [[package]] name = "grpcio" version = "1.49.1" description = "HTTP/2-based RPC framework" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1005,6 +1110,7 @@ protobuf = ["grpcio-tools (>=1.49.1)"] name = "grpcio" version = "1.51.3" description = "HTTP/2-based RPC framework" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1062,6 +1168,7 @@ protobuf = ["grpcio-tools (>=1.51.3)"] name = "identify" version = "2.5.24" description = "File identification library for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1076,6 +1183,7 @@ license = ["ukkonen"] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1087,6 +1195,7 @@ files = [ name = "importlib-metadata" version = "6.7.0" description = "Read metadata from Python packages" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1106,6 +1215,7 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs name = "importlib-resources" version = "5.12.0" description = "Read resources from Python packages" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1124,6 +1234,7 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1135,6 +1246,7 @@ files = [ name = "isodate" version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" +category = "main" optional = true python-versions = "*" files = [ @@ -1149,6 +1261,7 @@ six = "*" name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1166,6 +1279,7 @@ i18n = ["Babel (>=2.7)"] name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1177,6 +1291,7 @@ files = [ name = "jsonschema" version = "4.17.3" description = "An implementation of JSON Schema validation for Python" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1198,6 +1313,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1222,6 +1338,7 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1281,6 +1398,7 @@ files = [ name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1292,6 +1410,7 @@ files = [ name = "mmhash3" version = "3.0.1" description = "Python wrapper for MurmurHash (MurmurHash3), a set of fast and robust hash functions." +category = "main" optional = false python-versions = "*" files = [ @@ -1335,6 +1454,7 @@ files = [ name = "moto" version = "4.1.13" description = "" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1381,6 +1501,7 @@ xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] name = "msal" version = "1.22.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." +category = "main" optional = true python-versions = "*" files = [ @@ -1400,6 +1521,7 @@ broker = ["pymsalruntime (>=0.13.2,<0.14)"] name = "msal-extensions" version = "1.0.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +category = "main" optional = true python-versions = "*" files = [ @@ -1418,6 +1540,7 @@ portalocker = [ name = "msgpack" version = "1.0.5" description = "MessagePack serializer" +category = "main" optional = true python-versions = "*" files = [ @@ -1490,6 +1613,7 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1573,6 +1697,7 @@ files = [ name = "nodeenv" version = "1.8.0" description = "Node.js virtual environment builder" +category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -1587,6 +1712,7 @@ setuptools = "*" name = "numpy" version = "1.24.4" description = "Fundamental package for array computing in Python" +category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1624,6 +1750,7 @@ files = [ name = "packaging" version = "23.1" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1635,6 +1762,7 @@ files = [ name = "pandas" version = "2.0.3" description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1702,6 +1830,7 @@ xml = ["lxml (>=4.6.3)"] name = "pkgutil-resolve-name" version = "1.3.10" description = "Resolve a name to an object." +category = "main" optional = true python-versions = ">=3.6" files = [ @@ -1713,6 +1842,7 @@ files = [ name = "platformdirs" version = "3.8.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1728,6 +1858,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "pluggy" version = "1.2.0" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1743,6 +1874,7 @@ testing = ["pytest", "pytest-benchmark"] name = "portalocker" version = "2.7.0" description = "Wraps the portalocker recipe for easy usage" +category = "main" optional = true python-versions = ">=3.5" files = [ @@ -1762,6 +1894,7 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p name = "pre-commit" version = "3.3.3" description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1780,6 +1913,7 @@ virtualenv = ">=20.10.0" name = "protobuf" version = "4.23.3" description = "" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1798,10 +1932,83 @@ files = [ {file = "protobuf-4.23.3.tar.gz", hash = "sha256:7a92beb30600332a52cdadbedb40d33fd7c8a0d7f549c440347bc606fb3fe34b"}, ] +[[package]] +name = "psycopg2-binary" +version = "2.9.6" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "psycopg2-binary-2.9.6.tar.gz", hash = "sha256:1f64dcfb8f6e0c014c7f55e51c9759f024f70ea572fbdef123f85318c297947c"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d26e0342183c762de3276cca7a530d574d4e25121ca7d6e4a98e4f05cb8e4df7"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c48d8f2db17f27d41fb0e2ecd703ea41984ee19362cbce52c097963b3a1b4365"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffe9dc0a884a8848075e576c1de0290d85a533a9f6e9c4e564f19adf8f6e54a7"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a76e027f87753f9bd1ab5f7c9cb8c7628d1077ef927f5e2446477153a602f2c"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6460c7a99fc939b849431f1e73e013d54aa54293f30f1109019c56a0b2b2ec2f"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae102a98c547ee2288637af07393dd33f440c25e5cd79556b04e3fca13325e5f"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9972aad21f965599ed0106f65334230ce826e5ae69fda7cbd688d24fa922415e"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7a40c00dbe17c0af5bdd55aafd6ff6679f94a9be9513a4c7e071baf3d7d22a70"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:cacbdc5839bdff804dfebc058fe25684cae322987f7a38b0168bc1b2df703fb1"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7f0438fa20fb6c7e202863e0d5ab02c246d35efb1d164e052f2f3bfe2b152bd0"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-win32.whl", hash = "sha256:b6c8288bb8a84b47e07013bb4850f50538aa913d487579e1921724631d02ea1b"}, + {file = "psycopg2_binary-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:61b047a0537bbc3afae10f134dc6393823882eb263088c271331602b672e52e9"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:964b4dfb7c1c1965ac4c1978b0f755cc4bd698e8aa2b7667c575fb5f04ebe06b"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afe64e9b8ea66866a771996f6ff14447e8082ea26e675a295ad3bdbffdd72afb"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e2ee79e7cf29582ef770de7dab3d286431b01c3bb598f8e05e09601b890081"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfa74c903a3c1f0d9b1c7e7b53ed2d929a4910e272add6700c38f365a6002820"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b83456c2d4979e08ff56180a76429263ea254c3f6552cd14ada95cff1dec9bb8"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0645376d399bfd64da57148694d78e1f431b1e1ee1054872a5713125681cf1be"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e99e34c82309dd78959ba3c1590975b5d3c862d6f279f843d47d26ff89d7d7e1"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4ea29fc3ad9d91162c52b578f211ff1c931d8a38e1f58e684c45aa470adf19e2"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:4ac30da8b4f57187dbf449294d23b808f8f53cad6b1fc3623fa8a6c11d176dd0"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e78e6e2a00c223e164c417628572a90093c031ed724492c763721c2e0bc2a8df"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-win32.whl", hash = "sha256:1876843d8e31c89c399e31b97d4b9725a3575bb9c2af92038464231ec40f9edb"}, + {file = "psycopg2_binary-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b4b24f75d16a89cc6b4cdff0eb6a910a966ecd476d1e73f7ce5985ff1328e9a6"}, + {file = "psycopg2_binary-2.9.6-cp36-cp36m-win32.whl", hash = "sha256:498807b927ca2510baea1b05cc91d7da4718a0f53cb766c154c417a39f1820a0"}, + {file = "psycopg2_binary-2.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0d236c2825fa656a2d98bbb0e52370a2e852e5a0ec45fc4f402977313329174d"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:34b9ccdf210cbbb1303c7c4db2905fa0319391bd5904d32689e6dd5c963d2ea8"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84d2222e61f313c4848ff05353653bf5f5cf6ce34df540e4274516880d9c3763"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30637a20623e2a2eacc420059be11527f4458ef54352d870b8181a4c3020ae6b"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8122cfc7cae0da9a3077216528b8bb3629c43b25053284cc868744bfe71eb141"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38601cbbfe600362c43714482f43b7c110b20cb0f8172422c616b09b85a750c5"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c7e62ab8b332147a7593a385d4f368874d5fe4ad4e341770d4983442d89603e3"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2ab652e729ff4ad76d400df2624d223d6e265ef81bb8aa17fbd63607878ecbee"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c83a74b68270028dc8ee74d38ecfaf9c90eed23c8959fca95bd703d25b82c88e"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d4e6036decf4b72d6425d5b29bbd3e8f0ff1059cda7ac7b96d6ac5ed34ffbacd"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-win32.whl", hash = "sha256:a8c28fd40a4226b4a84bdf2d2b5b37d2c7bd49486b5adcc200e8c7ec991dfa7e"}, + {file = "psycopg2_binary-2.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:51537e3d299be0db9137b321dfb6a5022caaab275775680e0c3d281feefaca6b"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cf4499e0a83b7b7edcb8dabecbd8501d0d3a5ef66457200f77bde3d210d5debb"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7e13a5a2c01151f1208d5207e42f33ba86d561b7a89fca67c700b9486a06d0e2"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e0f754d27fddcfd74006455b6e04e6705d6c31a612ec69ddc040a5468e44b4e"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d57c3fd55d9058645d26ae37d76e61156a27722097229d32a9e73ed54819982a"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71f14375d6f73b62800530b581aed3ada394039877818b2d5f7fc77e3bb6894d"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:441cc2f8869a4f0f4bb408475e5ae0ee1f3b55b33f350406150277f7f35384fc"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:65bee1e49fa6f9cf327ce0e01c4c10f39165ee76d35c846ade7cb0ec6683e303"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:af335bac6b666cc6aea16f11d486c3b794029d9df029967f9938a4bed59b6a19"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:cfec476887aa231b8548ece2e06d28edc87c1397ebd83922299af2e051cf2827"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:65c07febd1936d63bfde78948b76cd4c2a411572a44ac50719ead41947d0f26b"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-win32.whl", hash = "sha256:4dfb4be774c4436a4526d0c554af0cc2e02082c38303852a36f6456ece7b3503"}, + {file = "psycopg2_binary-2.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:02c6e3cf3439e213e4ee930308dc122d6fb4d4bea9aef4a12535fbd605d1a2fe"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e9182eb20f41417ea1dd8e8f7888c4d7c6e805f8a7c98c1081778a3da2bee3e4"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8a6979cf527e2603d349a91060f428bcb135aea2be3201dff794813256c274f1"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8338a271cb71d8da40b023a35d9c1e919eba6cbd8fa20a54b748a332c355d896"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3ed340d2b858d6e6fb5083f87c09996506af483227735de6964a6100b4e6a54"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f81e65376e52f03422e1fb475c9514185669943798ed019ac50410fb4c4df232"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfb13af3c5dd3a9588000910178de17010ebcccd37b4f9794b00595e3a8ddad3"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4c727b597c6444a16e9119386b59388f8a424223302d0c06c676ec8b4bc1f963"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4d67fbdaf177da06374473ef6f7ed8cc0a9dc640b01abfe9e8a2ccb1b1402c1f"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0892ef645c2fabb0c75ec32d79f4252542d0caec1d5d949630e7d242ca4681a3"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:02c0f3757a4300cf379eb49f543fb7ac527fb00144d39246ee40e1df684ab514"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-win32.whl", hash = "sha256:c3dba7dab16709a33a847e5cd756767271697041fbe3fe97c215b1fc1f5c9848"}, + {file = "psycopg2_binary-2.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:f6a88f384335bb27812293fdb11ac6aee2ca3f51d3c7820fe03de0a304ab6249"}, +] + [[package]] name = "pyarrow" version = "12.0.1" description = "Python library for Apache Arrow" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1839,6 +2046,7 @@ numpy = ">=1.16.6" name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1850,6 +2058,7 @@ files = [ name = "pydantic" version = "1.10.11" description = "Data validation and settings management using python type hints" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1902,6 +2111,7 @@ email = ["email-validator (>=1.0.3)"] name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1916,6 +2126,7 @@ plugins = ["importlib-metadata"] name = "pyjwt" version = "2.7.0" description = "JSON Web Token implementation in Python" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1936,6 +2147,7 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pyparsing" version = "3.0.9" description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "main" optional = false python-versions = ">=3.6.8" files = [ @@ -1950,6 +2162,7 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "pyproject-hooks" version = "1.0.0" description = "Wrappers to call pyproject.toml-based build backend hooks." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1964,6 +2177,7 @@ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} name = "pyrsistent" version = "0.19.3" description = "Persistent/Functional/Immutable data structures" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2000,6 +2214,7 @@ files = [ name = "pytest" version = "7.4.0" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2022,6 +2237,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-checkdocs" version = "2.9.0" description = "check the README when running tests" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2042,6 +2258,7 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec name = "pytest-mock" version = "3.11.1" description = "Thin-wrapper around the mock package for easier use with pytest" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2059,6 +2276,7 @@ dev = ["pre-commit", "pytest-asyncio", "tox"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2073,6 +2291,7 @@ six = ">=1.5" name = "python-snappy" version = "0.6.1" description = "Python library for the snappy compression library from Google" +category = "main" optional = true python-versions = "*" files = [ @@ -2130,6 +2349,7 @@ files = [ name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" +category = "main" optional = true python-versions = "*" files = [ @@ -2141,6 +2361,7 @@ files = [ name = "pywin32" version = "306" description = "Python for Window Extensions" +category = "main" optional = true python-versions = "*" files = [ @@ -2164,6 +2385,7 @@ files = [ name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2213,6 +2435,7 @@ files = [ name = "ray" version = "2.5.1" description = "Ray provides a simple, universal API for building distributed applications." +category = "main" optional = true python-versions = "*" files = [ @@ -2276,6 +2499,7 @@ tune = ["pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2297,6 +2521,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-mock" version = "1.11.0" description = "Mock out responses from the requests package" +category = "dev" optional = false python-versions = "*" files = [ @@ -2316,6 +2541,7 @@ test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "tes name = "responses" version = "0.23.1" description = "A utility library for mocking out the `requests` Python library." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2336,6 +2562,7 @@ tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asy name = "rich" version = "13.4.2" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -2355,6 +2582,7 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "s3fs" version = "2023.6.0" description = "Convenient Filesystem interface over S3" +category = "main" optional = true python-versions = ">= 3.8" files = [ @@ -2375,6 +2603,7 @@ boto3 = ["aiobotocore[boto3] (>=2.5.0,<2.6.0)"] name = "s3transfer" version = "0.6.1" description = "An Amazon S3 Transfer Manager" +category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -2392,6 +2621,7 @@ crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] name = "setuptools" version = "68.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2408,6 +2638,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2419,6 +2650,7 @@ files = [ name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +category = "main" optional = false python-versions = "*" files = [ @@ -2426,10 +2658,90 @@ files = [ {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, ] +[[package]] +name = "sqlalchemy" +version = "2.0.18" +description = "Database Abstraction Library" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7ddd6d35c598af872f9a0a5bce7f7c4a1841684a72dab3302e3df7f17d1b5249"}, + {file = "SQLAlchemy-2.0.18-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:00aa050faf24ce5f2af643e2b86822fa1d7149649995f11bc1e769bbfbf9010b"}, + {file = "SQLAlchemy-2.0.18-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b52c6741073de5a744d27329f9803938dcad5c9fee7e61690c705f72973f4175"}, + {file = "SQLAlchemy-2.0.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7db97eabd440327c35b751d5ebf78a107f505586485159bcc87660da8bb1fdca"}, + {file = "SQLAlchemy-2.0.18-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:589aba9a35869695b319ed76c6f673d896cd01a7ff78054be1596df7ad9b096f"}, + {file = "SQLAlchemy-2.0.18-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9da4ee8f711e077633730955c8f3cd2485c9abf5ea0f80aac23221a3224b9a8c"}, + {file = "SQLAlchemy-2.0.18-cp310-cp310-win32.whl", hash = "sha256:5dd574a37be388512c72fe0d7318cb8e31743a9b2699847a025e0c08c5bf579d"}, + {file = "SQLAlchemy-2.0.18-cp310-cp310-win_amd64.whl", hash = "sha256:6852cd34d96835e4c9091c1e6087325efb5b607b75fd9f7075616197d1c4688a"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10e001a84f820fea2640e4500e12322b03afc31d8f4f6b813b44813b2a7c7e0d"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bffd6cd47c2e68970039c0d3e355c9ed761d3ca727b204e63cd294cad0e3df90"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b7b3ebfa9416c8eafaffa65216e229480c495e305a06ba176dcac32710744e6"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79228a7b90d95957354f37b9d46f2cc8926262ae17b0d3ed8f36c892f2a37e06"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ba633b51835036ff0f402c21f3ff567c565a22ff0a5732b060a68f4660e2a38f"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8da677135eff43502b7afab5a1e641edfb2dc734ba7fc146e9b1b86817a728e2"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-win32.whl", hash = "sha256:82edf3a6090554a83942cec79151d6b5eb96e63d143e80e4cf6671e5d772f6be"}, + {file = "SQLAlchemy-2.0.18-cp311-cp311-win_amd64.whl", hash = "sha256:69ae0e9509c43474e33152abe1385b8954922544616426bf793481e1a37e094f"}, + {file = "SQLAlchemy-2.0.18-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:09397a18733fa2a4c7680b746094f980060666ee549deafdb5e102a99ce4619b"}, + {file = "SQLAlchemy-2.0.18-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45b07470571bda5ee7f5ec471271bbde97267cc8403fce05e280c36ea73f4754"}, + {file = "SQLAlchemy-2.0.18-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1aac42a21a7fa6c9665392c840b295962992ddf40aecf0a88073bc5c76728117"}, + {file = "SQLAlchemy-2.0.18-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:da46beef0ce882546d92b7b2e8deb9e04dbb8fec72945a8eb28b347ca46bc15a"}, + {file = "SQLAlchemy-2.0.18-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a6f1d8256d06f58e6ece150fbe05c63c7f9510df99ee8ac37423f5476a2cebb4"}, + {file = "SQLAlchemy-2.0.18-cp37-cp37m-win32.whl", hash = "sha256:67fbb40db3985c0cfb942fe8853ad94a5e9702d2987dec03abadc2f3b6a24afb"}, + {file = "SQLAlchemy-2.0.18-cp37-cp37m-win_amd64.whl", hash = "sha256:afb322ca05e2603deedbcd2e9910f11a3fd2f42bdeafe63018e5641945c7491c"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:908c850b98cac1e203ababd4ba76868d19ae0d7172cdc75d3f1b7829b16837d2"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10514adc41fc8f5922728fbac13d401a1aefcf037f009e64ca3b92464e33bf0e"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b791577c546b6bbd7b43953565fcb0a2fec63643ad605353dd48afbc3c48317"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:420bc6d06d4ae7fb6921524334689eebcbea7bf2005efef070a8562cc9527a37"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ebdd2418ab4e2e26d572d9a1c03877f8514a9b7436729525aa571862507b3fea"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:556dc18e39b6edb76239acfd1c010e37395a54c7fde8c57481c15819a3ffb13e"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-win32.whl", hash = "sha256:7b8cba5a25e95041e3413d91f9e50616bcfaec95afa038ce7dc02efefe576745"}, + {file = "SQLAlchemy-2.0.18-cp38-cp38-win_amd64.whl", hash = "sha256:0f7fdcce52cd882b559a57b484efc92e108efeeee89fab6b623aba1ac68aad2e"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d7a2c1e711ce59ac9d0bba780318bcd102d2958bb423209f24c6354d8c4da930"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5c95e3e7cc6285bf7ff263eabb0d3bfe3def9a1ff98124083d45e5ece72f4579"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc44e50f9d5e96af1a561faa36863f9191f27364a4df3eb70bca66e9370480b6"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfa1a0f83bdf8061db8d17c2029454722043f1e4dd1b3d3d3120d1b54e75825a"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:194f2d5a7cb3739875c4d25b3fe288ab0b3dc33f7c857ba2845830c8c51170a0"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4ebc542d2289c0b016d6945fd07a7e2e23f4abc41e731ac8ad18a9e0c2fd0ec2"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-win32.whl", hash = "sha256:774bd401e7993452ba0596e741c0c4d6d22f882dd2a798993859181dbffadc62"}, + {file = "SQLAlchemy-2.0.18-cp39-cp39-win_amd64.whl", hash = "sha256:2756485f49e7df5c2208bdc64263d19d23eba70666f14ad12d6d8278a2fff65f"}, + {file = "SQLAlchemy-2.0.18-py3-none-any.whl", hash = "sha256:6c5bae4c288bda92a7550fe8de9e068c0a7cd56b1c5d888aae5b40f0e13b40bd"}, + {file = "SQLAlchemy-2.0.18.tar.gz", hash = "sha256:1fb792051db66e09c200e7bc3bda3b1eb18a5b8eb153d2cedb2b14b56a68b8cb"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +typing-extensions = ">=4.2.0" + +[package.extras] +aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3-binary"] + [[package]] name = "strictyaml" version = "1.7.3" description = "Strict, typed YAML parser" +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -2444,6 +2756,7 @@ python-dateutil = ">=2.6.0" name = "thrift" version = "0.16.0" description = "Python bindings for the Apache Thrift RPC system" +category = "main" optional = true python-versions = "*" files = [ @@ -2462,6 +2775,7 @@ twisted = ["twisted"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2473,6 +2787,7 @@ files = [ name = "types-pyyaml" version = "6.0.12.10" description = "Typing stubs for PyYAML" +category = "dev" optional = false python-versions = "*" files = [ @@ -2484,6 +2799,7 @@ files = [ name = "typing-extensions" version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2495,6 +2811,7 @@ files = [ name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" +category = "main" optional = true python-versions = ">=2" files = [ @@ -2506,6 +2823,7 @@ files = [ name = "urllib3" version = "1.26.16" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -2522,6 +2840,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "virtualenv" version = "20.23.1" description = "Virtual Python Environment builder" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2542,6 +2861,7 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess name = "werkzeug" version = "2.3.6" description = "The comprehensive WSGI web application library." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2559,6 +2879,7 @@ watchdog = ["watchdog (>=2.3)"] name = "wrapt" version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." +category = "main" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -2643,6 +2964,7 @@ files = [ name = "xmltodict" version = "0.13.0" description = "Makes working with XML feel like you are working with JSON" +category = "dev" optional = false python-versions = ">=3.4" files = [ @@ -2654,6 +2976,7 @@ files = [ name = "yarl" version = "1.9.2" description = "Yet another URL library" +category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2741,6 +3064,7 @@ multidict = ">=4.0" name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2756,6 +3080,7 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more name = "zstandard" version = "0.21.0" description = "Zstandard bindings for Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2818,12 +3143,13 @@ glue = ["boto3"] hive = ["thrift"] pandas = ["pandas", "pyarrow"] pyarrow = ["pyarrow"] -ray = ["pandas", "pyarrow", "ray"] +ray = ["ray", "pyarrow", "pandas"] s3fs = ["s3fs"] snappy = ["python-snappy"] +sql-postgres = ["sqlalchemy", "psycopg2-binary"] zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "07c9f390baa9f3c0faf74a7148fea4d369cb17b1850907375fa72140cdeeb75b" +content-hash = "b385b4c5ea9f841eb4bf535301914fef34b3e3c8245d3badba79ade885a1b4be" diff --git a/python/pyiceberg/catalog/__init__.py b/python/pyiceberg/catalog/__init__.py index f1be22b99e82..bc42e5ab0987 100644 --- a/python/pyiceberg/catalog/__init__.py +++ b/python/pyiceberg/catalog/__init__.py @@ -80,6 +80,7 @@ class CatalogType(Enum): HIVE = "hive" GLUE = "glue" DYNAMODB = "dynamodb" + SQL = "sql" def load_rest(name: str, conf: Properties) -> Catalog: @@ -115,11 +116,21 @@ def load_dynamodb(name: str, conf: Properties) -> Catalog: raise NotInstalledError("AWS DynamoDB support not installed: pip install 'pyiceberg[dynamodb]'") from exc +def load_sql(name: str, conf: Properties) -> Catalog: + try: + from pyiceberg.catalog.sql import SqlCatalog + + return SqlCatalog(name, **conf) + except ImportError as exc: + raise NotInstalledError("SQLAlchemy support not installed: pip install 'pyiceberg[sql-postgres]'") from exc + + AVAILABLE_CATALOGS: dict[CatalogType, Callable[[str, Properties], Catalog]] = { CatalogType.REST: load_rest, CatalogType.HIVE: load_hive, CatalogType.GLUE: load_glue, CatalogType.DYNAMODB: load_dynamodb, + CatalogType.SQL: load_sql, } @@ -142,6 +153,8 @@ def infer_catalog_type(name: str, catalog_properties: RecursiveDict) -> Optional return CatalogType.REST elif uri.startswith("thrift"): return CatalogType.HIVE + elif uri.startswith("postgresql"): + return CatalogType.SQL else: raise ValueError(f"Could not infer the catalog type from the uri: {uri}") else: diff --git a/python/pyiceberg/catalog/sql.py b/python/pyiceberg/catalog/sql.py new file mode 100644 index 000000000000..4a02deb8f5c0 --- /dev/null +++ b/python/pyiceberg/catalog/sql.py @@ -0,0 +1,480 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ( + List, + Optional, + Set, + Union, +) + +from sqlalchemy import ( + String, + create_engine, + delete, + insert, + select, + union, + update, +) +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import ( + DeclarativeBase, + Mapped, + MappedAsDataclass, + Session, + mapped_column, +) + +from pyiceberg.catalog import ( + METADATA_LOCATION, + Catalog, + Identifier, + Properties, + PropertiesUpdateSummary, +) +from pyiceberg.exceptions import ( + NamespaceAlreadyExistsError, + NamespaceNotEmptyError, + NoSuchNamespaceError, + NoSuchPropertyException, + NoSuchTableError, + TableAlreadyExistsError, +) +from pyiceberg.io import load_file_io +from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec +from pyiceberg.schema import Schema +from pyiceberg.serializers import FromInputFile +from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table +from pyiceberg.table.metadata import new_table_metadata +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder +from pyiceberg.typedef import EMPTY_DICT + + +class SqlCatalogBaseTable(MappedAsDataclass, DeclarativeBase): + pass + + +class IcebergTables(SqlCatalogBaseTable): + __tablename__ = "iceberg_tables" + + catalog_name: Mapped[str] = mapped_column(String(255), nullable=False, primary_key=True) + table_namespace: Mapped[str] = mapped_column(String(255), nullable=False, primary_key=True) + table_name: Mapped[str] = mapped_column(String(255), nullable=False, primary_key=True) + metadata_location: Mapped[Optional[str]] = mapped_column(String(1000), nullable=True) + previous_metadata_location: Mapped[Optional[str]] = mapped_column(String(1000), nullable=True) + + +class IcebergNamespaceProperties(SqlCatalogBaseTable): + __tablename__ = "iceberg_namespace_properties" + # Catalog minimum Namespace Properties + NAMESPACE_MINIMAL_PROPERTIES = {"exists": "true"} + + catalog_name: Mapped[str] = mapped_column(String(255), nullable=False, primary_key=True) + namespace: Mapped[str] = mapped_column(String(255), nullable=False, primary_key=True) + property_key: Mapped[str] = mapped_column(String(255), nullable=False, primary_key=True) + property_value: Mapped[str] = mapped_column(String(1000), nullable=False) + + +class SqlCatalog(Catalog): + def __init__(self, name: str, **properties: str): + super().__init__(name, **properties) + + if not (uri_prop := self.properties.get("uri")): + raise NoSuchPropertyException("SQL connection URI is required") + self.engine = create_engine(uri_prop, echo=True) + + def create_tables(self) -> None: + SqlCatalogBaseTable.metadata.create_all(self.engine) + + def destroy_tables(self) -> None: + SqlCatalogBaseTable.metadata.drop_all(self.engine) + + def _convert_orm_to_iceberg(self, orm_table: IcebergTables) -> Table: + # Check for expected properties. + if not (metadata_location := orm_table.metadata_location): + raise NoSuchTableError(f"Table property {METADATA_LOCATION} is missing") + if not (table_namespace := orm_table.table_namespace): + raise NoSuchTableError(f"Table property {IcebergTables.table_namespace} is missing") + if not (table_name := orm_table.table_name): + raise NoSuchTableError(f"Table property {IcebergTables.table_name} is missing") + + io = load_file_io(properties=self.properties, location=metadata_location) + file = io.new_input(metadata_location) + metadata = FromInputFile.table_metadata(file) + return Table( + identifier=(self.name, table_namespace, table_name), + metadata=metadata, + metadata_location=metadata_location, + io=self._load_file_io(metadata.properties, metadata_location), + catalog=self, + ) + + def create_table( + self, + identifier: Union[str, Identifier], + schema: Schema, + location: Optional[str] = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, + sort_order: SortOrder = UNSORTED_SORT_ORDER, + properties: Properties = EMPTY_DICT, + ) -> Table: + """ + Create an Iceberg table. + + Args: + identifier: Table identifier. + schema: Table's schema. + location: Location for the table. Optional Argument. + partition_spec: PartitionSpec for the table. + sort_order: SortOrder for the table. + properties: Table properties that can be a string based dictionary. + + Returns: + Table: the created table instance. + + Raises: + AlreadyExistsError: If a table with the name already exists. + ValueError: If the identifier is invalid, or no path is given to store metadata. + + """ + database_name, table_name = self.identifier_to_database_and_table(identifier) + if not self._namespace_exists(database_name): + raise NoSuchNamespaceError(f"Namespace does not exist: {database_name}") + + location = self._resolve_table_location(location, database_name, table_name) + metadata_location = self._get_metadata_location(location=location) + metadata = new_table_metadata( + location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties + ) + io = load_file_io(properties=self.properties, location=metadata_location) + self._write_metadata(metadata, io, metadata_location) + + with Session(self.engine) as session: + try: + session.add( + IcebergTables( + catalog_name=self.name, + table_namespace=database_name, + table_name=table_name, + metadata_location=metadata_location, + previous_metadata_location=None, + ) + ) + session.commit() + except IntegrityError as e: + raise TableAlreadyExistsError(f"Table {database_name}.{table_name} already exists") from e + + return self.load_table(identifier=identifier) + + def load_table(self, identifier: Union[str, Identifier]) -> Table: + """Loads the table's metadata and returns the table instance. + + You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'. + Note: This method doesn't scan data stored in the table. + + Args: + identifier (str | Identifier): Table identifier. + + Returns: + Table: the table instance with its metadata. + + Raises: + NoSuchTableError: If a table with the name does not exist. + """ + database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError) + with Session(self.engine) as session: + stmt = select(IcebergTables).where( + IcebergTables.catalog_name == self.name, + IcebergTables.table_namespace == database_name, + IcebergTables.table_name == table_name, + ) + result = session.scalar(stmt) + if result: + return self._convert_orm_to_iceberg(result) + raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") + + def drop_table(self, identifier: Union[str, Identifier]) -> None: + """Drop a table. + + Args: + identifier (str | Identifier): Table identifier. + + Raises: + NoSuchTableError: If a table with the name does not exist. + """ + database_name, table_name = self.identifier_to_database_and_table(identifier, NoSuchTableError) + with Session(self.engine) as session: + res = session.execute( + delete(IcebergTables).where( + IcebergTables.catalog_name == self.name, + IcebergTables.table_namespace == database_name, + IcebergTables.table_name == table_name, + ) + ) + session.commit() + if res.rowcount < 1: + raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") + + def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table: + """Rename a fully classified table name. + + Args: + from_identifier (str | Identifier): Existing table identifier. + to_identifier (str | Identifier): New table identifier. + + Returns: + Table: the updated table instance with its metadata. + + Raises: + NoSuchTableError: If a table with the name does not exist. + TableAlreadyExistsError: If a table with the new name already exist. + NoSuchNamespaceError: If the target namespace does not exist. + """ + from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier, NoSuchTableError) + to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier) + if not self._namespace_exists(to_database_name): + raise NoSuchNamespaceError(f"Namespace does not exist: {to_database_name}") + with Session(self.engine) as session: + try: + stmt = ( + update(IcebergTables) + .where( + IcebergTables.catalog_name == self.name, + IcebergTables.table_namespace == from_database_name, + IcebergTables.table_name == from_table_name, + ) + .values(table_namespace=to_database_name, table_name=to_table_name) + ) + result = session.execute(stmt) + if result.rowcount < 1: + raise NoSuchTableError(f"Table does not exist: {from_table_name}") + session.commit() + except IntegrityError as e: + raise TableAlreadyExistsError(f"Table {to_database_name}.{to_table_name} already exists") from e + return self.load_table(to_identifier) + + def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: + """Updates one or more tables. + + Args: + table_request (CommitTableRequest): The table requests to be carried out. + + Returns: + CommitTableResponse: The updated metadata. + + Raises: + NoSuchTableError: If a table with the given identifier does not exist. + """ + raise NotImplementedError + + def _namespace_exists(self, identifier: Union[str, Identifier]) -> bool: + namespace = self.identifier_to_database(identifier) + with Session(self.engine) as session: + stmt = ( + select(IcebergTables) + .where(IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == namespace) + .limit(1) + ) + result = session.execute(stmt).all() + if result: + return True + stmt = ( + select(IcebergNamespaceProperties) + .where( + IcebergNamespaceProperties.catalog_name == self.name, + IcebergNamespaceProperties.namespace == namespace, + ) + .limit(1) + ) + result = session.execute(stmt).all() + if result: + return True + return False + + def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None: + """Create a namespace in the catalog. + + Args: + namespace (str | Identifier): Namespace identifier. + properties (Properties): A string dictionary of properties for the given namespace. + + Raises: + NamespaceAlreadyExistsError: If a namespace with the given name already exists. + """ + if not properties: + properties = IcebergNamespaceProperties.NAMESPACE_MINIMAL_PROPERTIES + database_name = self.identifier_to_database(namespace) + if self._namespace_exists(database_name): + raise NamespaceAlreadyExistsError(f"Database {database_name} already exists") + + create_properties = properties if properties else IcebergNamespaceProperties.NAMESPACE_MINIMAL_PROPERTIES + with Session(self.engine) as session: + for key, value in create_properties.items(): + session.add( + IcebergNamespaceProperties( + catalog_name=self.name, namespace=database_name, property_key=key, property_value=value + ) + ) + session.commit() + + def drop_namespace(self, namespace: Union[str, Identifier]) -> None: + """Drop a namespace. + + Args: + namespace (str | Identifier): Namespace identifier. + + Raises: + NoSuchNamespaceError: If a namespace with the given name does not exist. + NamespaceNotEmptyError: If the namespace is not empty. + """ + database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) + if self._namespace_exists(database_name): + if tables := self.list_tables(database_name): + raise NamespaceNotEmptyError(f"Database {database_name} is not empty. {len(tables)} tables exist.") + + with Session(self.engine) as session: + session.execute( + delete(IcebergNamespaceProperties).where( + IcebergNamespaceProperties.catalog_name == self.name, + IcebergNamespaceProperties.namespace == database_name, + ) + ) + session.commit() + + def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: + """List tables under the given namespace in the catalog. + + If namespace not provided, will list all tables in the catalog. + + Args: + namespace (str | Identifier): Namespace identifier to search. + + Returns: + List[Identifier]: list of table identifiers. + + Raises: + NoSuchNamespaceError: If a namespace with the given name does not exist. + """ + database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) + + stmt = select(IcebergTables).where( + IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == database_name + ) + with Session(self.engine) as session: + result = session.scalars(stmt) + return [(table.table_namespace, table.table_name) for table in result] + + def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]: + """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. + + Args: + namespace (str | Identifier): Namespace identifier to search. + + Returns: + List[Identifier]: a List of namespace identifiers. + + Raises: + NoSuchNamespaceError: If a namespace with the given name does not exist. + """ + if namespace and not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") + + table_stmt = select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == self.name) + namespace_stmt = select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name == self.name) + if namespace: + database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) + table_stmt = table_stmt.where(IcebergTables.table_namespace.like(database_name)) + namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(database_name)) + stmt = union( + table_stmt, + namespace_stmt, + ) + with Session(self.engine) as session: + return [self.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()] + + def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: + """Get properties for a namespace. + + Args: + namespace (str | Identifier): Namespace identifier. + + Returns: + Properties: Properties for the given namespace. + + Raises: + NoSuchNamespaceError: If a namespace with the given name does not exist. + """ + database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) + + stmt = select(IcebergNamespaceProperties).where( + IcebergNamespaceProperties.catalog_name == self.name, IcebergNamespaceProperties.namespace == database_name + ) + with Session(self.engine) as session: + result = session.scalars(stmt) + return {props.property_key: props.property_value for props in result} + + def update_namespace_properties( + self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT + ) -> PropertiesUpdateSummary: + """Removes provided property keys and updates properties for a namespace. + + Args: + namespace (str | Identifier): Namespace identifier. + removals (Set[str]): Set of property keys that need to be removed. Optional Argument. + updates (Properties): Properties to be updated for the given namespace. + + Raises: + NoSuchNamespaceError: If a namespace with the given name does not exist. + ValueError: If removals and updates have overlapping keys. + """ + database_name = self.identifier_to_database(namespace) + if not self._namespace_exists(database_name): + raise NoSuchNamespaceError(f"Database {database_name} does not exists") + + current_properties = self.load_namespace_properties(namespace=namespace) + properties_update_summary = self._get_updated_props_and_update_summary( + current_properties=current_properties, removals=removals, updates=updates + )[0] + + with Session(self.engine) as session: + if removals: + delete_stmt = delete(IcebergNamespaceProperties).where( + IcebergNamespaceProperties.catalog_name == self.name, + IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.property_key.in_(removals), + ) + session.execute(delete_stmt) + + if updates: + # SQLAlchemy does not (yet) support engine agnostic UPSERT + # https://docs.sqlalchemy.org/en/20/orm/queryguide/dml.html#orm-upsert-statements + # This is not a problem since it runs in a single transaction + delete_stmt = delete(IcebergNamespaceProperties).where( + IcebergNamespaceProperties.catalog_name == self.name, + IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.property_key.in_(set(updates.keys())), + ) + session.execute(delete_stmt) + insert_stmt = insert(IcebergNamespaceProperties) + for property_key, property_value in updates.items(): + insert_stmt = insert_stmt.values( + catalog_name=self.name, namespace=database_name, property_key=property_key, property_value=property_value + ) + session.execute(insert_stmt) + session.commit() + return properties_update_summary diff --git a/python/pyproject.toml b/python/pyproject.toml index 14bed5e4672e..c8175fa28c3f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -62,6 +62,8 @@ thrift = { version = ">=0.13.0,<1.0.0", optional = true } boto3 = { version = ">=1.17.106", optional = true } s3fs = { version = ">=2021.08.0,<2024.1.0", optional = true } # Upper bound set arbitrarily, to be reassessed in early 2024. adlfs = { version = ">=2021.07.0,<2024.1.0", optional = true } # Upper bound set arbitrarily, to be reassessed in early 2024. +psycopg2-binary = { version = ">=2.9.6", optional = true } +sqlalchemy = { version = "^2.0.18", optional = true } [tool.poetry.dev-dependencies] pytest = "7.4.0" @@ -97,6 +99,7 @@ glue = ["boto3"] adlfs = ["adlfs"] dynamodb = ["boto3"] zstandard = ["zstandard"] +sql-postgres = ["sqlalchemy", "psycopg2-binary"] [tool.pytest.ini_options] markers = [ @@ -252,5 +255,9 @@ ignore_missing_imports = true module = "numpy.*" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "sqlalchemy.*" +ignore_missing_imports = true + [tool.coverage.run] source = ['pyiceberg/'] diff --git a/python/tests/catalog/test_sql.py b/python/tests/catalog/test_sql.py new file mode 100644 index 000000000000..2b903093a491 --- /dev/null +++ b/python/tests/catalog/test_sql.py @@ -0,0 +1,355 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from pathlib import Path +from typing import Generator, List + +import pytest +from pytest import TempPathFactory +from sqlalchemy.exc import ArgumentError, IntegrityError + +from pyiceberg.catalog import Identifier +from pyiceberg.catalog.sql import SqlCatalog +from pyiceberg.exceptions import ( + NamespaceAlreadyExistsError, + NamespaceNotEmptyError, + NoSuchNamespaceError, + NoSuchPropertyException, + NoSuchTableError, + TableAlreadyExistsError, +) +from pyiceberg.schema import Schema +from pyiceberg.table.sorting import ( + NullOrder, + SortDirection, + SortField, + SortOrder, +) +from pyiceberg.transforms import IdentityTransform + + +@pytest.fixture(name="warehouse", scope="session") +def fixture_warehouse(tmp_path_factory: TempPathFactory) -> Path: + return tmp_path_factory.mktemp("test_sql") + + +@pytest.fixture(name="random_identifier") +def fixture_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: + os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) + return database_name, table_name + + +@pytest.fixture(name="another_random_identifier") +def fixture_another_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: + database_name = database_name + "_new" + table_name = table_name + "_new" + os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) + return database_name, table_name + + +@pytest.fixture(name="test_catalog", scope="module") +def fixture_test_catalog(warehouse: Path) -> Generator[SqlCatalog, None, None]: + props = { + "uri": "sqlite+pysqlite:///:memory:", + "warehouse": f"file://{warehouse}", + } + test_catalog = SqlCatalog("test_sql_catalog", **props) + test_catalog.create_tables() + yield test_catalog + test_catalog.destroy_tables() + + +def test_creation_with_no_uri() -> None: + with pytest.raises(NoSuchPropertyException): + SqlCatalog("test_ddb_catalog", not_uri="unused") + + +def test_creation_with_unsupported_uri() -> None: + with pytest.raises(ArgumentError): + SqlCatalog("test_ddb_catalog", uri="unsupported:xxx") + + +def test_create_tables_idempotency(test_catalog: SqlCatalog) -> None: + # Second initialization should not fail even if tables are already created + test_catalog.create_tables() + test_catalog.create_tables() + + +def test_create_table_default_sort_order( + test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier +) -> None: + database_name, _table_name = random_identifier + test_catalog.create_namespace(database_name) + table = test_catalog.create_table(random_identifier, table_schema_nested) + assert table.sort_order().order_id == 0, "Order ID must match" + assert table.sort_order().is_unsorted is True, "Order must be unsorted" + test_catalog.drop_table(random_identifier) + + +def test_create_table_custom_sort_order( + test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier +) -> None: + database_name, _table_name = random_identifier + test_catalog.create_namespace(database_name) + order = SortOrder(SortField(source_id=2, transform=IdentityTransform(), null_order=NullOrder.NULLS_FIRST)) + table = test_catalog.create_table(random_identifier, table_schema_nested, sort_order=order) + given_sort_order = table.sort_order() + assert given_sort_order.order_id == 1, "Order ID must match" + assert len(given_sort_order.fields) == 1, "Order must have 1 field" + assert given_sort_order.fields[0].direction == SortDirection.ASC, "Direction must match" + assert given_sort_order.fields[0].null_order == NullOrder.NULLS_FIRST, "Null order must match" + assert isinstance(given_sort_order.fields[0].transform, IdentityTransform), "Transform must match" + test_catalog.drop_table(random_identifier) + + +def test_create_table_with_default_warehouse_location( + warehouse: Path, test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier +) -> None: + database_name, _table_name = random_identifier + test_catalog.create_namespace(database_name) + test_catalog.create_table(random_identifier, table_schema_nested) + table = test_catalog.load_table(random_identifier) + assert table.identifier == (test_catalog.name,) + random_identifier + assert table.metadata_location.startswith(f"file://{warehouse}") + assert os.path.exists(table.metadata_location[len("file://") :]) + test_catalog.drop_table(random_identifier) + + +def test_create_duplicated_table(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: + database_name, _table_name = random_identifier + test_catalog.create_namespace(database_name) + test_catalog.create_table(random_identifier, table_schema_nested) + with pytest.raises(TableAlreadyExistsError): + test_catalog.create_table(random_identifier, table_schema_nested) + + +def test_create_table_with_non_existing_namespace(test_catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: + identifier = ("invalid", table_name) + with pytest.raises(NoSuchNamespaceError): + test_catalog.create_table(identifier, table_schema_nested) + + +def test_create_table_without_namespace(test_catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: + with pytest.raises(ValueError): + test_catalog.create_table(table_name, table_schema_nested) + + +def test_load_table(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: + database_name, _table_name = random_identifier + test_catalog.create_namespace(database_name) + table = test_catalog.create_table(random_identifier, table_schema_nested) + loaded_table = test_catalog.load_table(random_identifier) + assert table.identifier == loaded_table.identifier + assert table.metadata_location == loaded_table.metadata_location + assert table.metadata == loaded_table.metadata + + +def test_drop_table(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: + database_name, _table_name = random_identifier + test_catalog.create_namespace(database_name) + table = test_catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (test_catalog.name,) + random_identifier + test_catalog.drop_table(random_identifier) + with pytest.raises(NoSuchTableError): + test_catalog.load_table(random_identifier) + + +def test_drop_table_that_does_not_exist(test_catalog: SqlCatalog, random_identifier: Identifier) -> None: + with pytest.raises(NoSuchTableError): + test_catalog.drop_table(random_identifier) + + +def test_rename_table( + test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier +) -> None: + from_database_name, _from_table_name = random_identifier + to_database_name, _to_table_name = another_random_identifier + test_catalog.create_namespace(from_database_name) + test_catalog.create_namespace(to_database_name) + table = test_catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (test_catalog.name,) + random_identifier + test_catalog.rename_table(random_identifier, another_random_identifier) + new_table = test_catalog.load_table(another_random_identifier) + assert new_table.identifier == (test_catalog.name,) + another_random_identifier + assert new_table.metadata_location == table.metadata_location + with pytest.raises(NoSuchTableError): + test_catalog.load_table(random_identifier) + + +def test_rename_table_to_existing_one( + test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier +) -> None: + from_database_name, _from_table_name = random_identifier + to_database_name, _to_table_name = another_random_identifier + test_catalog.create_namespace(from_database_name) + test_catalog.create_namespace(to_database_name) + table = test_catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (test_catalog.name,) + random_identifier + new_table = test_catalog.create_table(another_random_identifier, table_schema_nested) + assert new_table.identifier == (test_catalog.name,) + another_random_identifier + with pytest.raises(TableAlreadyExistsError): + test_catalog.rename_table(random_identifier, another_random_identifier) + + +def test_rename_missing_table( + test_catalog: SqlCatalog, random_identifier: Identifier, another_random_identifier: Identifier +) -> None: + to_database_name, _to_table_name = another_random_identifier + test_catalog.create_namespace(to_database_name) + with pytest.raises(NoSuchTableError): + test_catalog.rename_table(random_identifier, another_random_identifier) + + +def test_rename_table_to_missing_namespace( + test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier +) -> None: + from_database_name, _from_table_name = random_identifier + test_catalog.create_namespace(from_database_name) + table = test_catalog.create_table(random_identifier, table_schema_nested) + assert table.identifier == (test_catalog.name,) + random_identifier + with pytest.raises(NoSuchNamespaceError): + test_catalog.rename_table(random_identifier, another_random_identifier) + + +def test_list_tables( + test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier +) -> None: + database_name_1, _table_name_1 = random_identifier + database_name_2, _table_name_2 = another_random_identifier + test_catalog.create_namespace(database_name_1) + test_catalog.create_namespace(database_name_2) + test_catalog.create_table(random_identifier, table_schema_nested) + test_catalog.create_table(another_random_identifier, table_schema_nested) + identifier_list = test_catalog.list_tables(database_name_1) + assert len(identifier_list) == 1 + assert random_identifier in identifier_list + + identifier_list = test_catalog.list_tables(database_name_2) + assert len(identifier_list) == 1 + assert another_random_identifier in identifier_list + + +def test_create_namespace(test_catalog: SqlCatalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + assert (database_name,) in test_catalog.list_namespaces() + + +def test_create_duplicate_namespace(test_catalog: SqlCatalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + with pytest.raises(NamespaceAlreadyExistsError): + test_catalog.create_namespace(database_name) + + +def test_create_namespaces_sharing_same_prefix(test_catalog: SqlCatalog, database_name: str) -> None: + test_catalog.create_namespace(database_name + "_1") + # Second namespace is a prefix of the first one, make sure it can be added. + test_catalog.create_namespace(database_name) + + +def test_create_namespace_with_comment_and_location(test_catalog: SqlCatalog, database_name: str) -> None: + test_location = "/test/location" + test_properties = { + "comment": "this is a test description", + "location": test_location, + } + test_catalog.create_namespace(namespace=database_name, properties=test_properties) + loaded_database_list = test_catalog.list_namespaces() + assert (database_name,) in loaded_database_list + properties = test_catalog.load_namespace_properties(database_name) + assert properties["comment"] == "this is a test description" + assert properties["location"] == test_location + + +def test_create_namespace_with_null_properties(test_catalog: SqlCatalog, database_name: str) -> None: + with pytest.raises(IntegrityError): + test_catalog.create_namespace(namespace=database_name, properties={None: "value"}) # type: ignore + + with pytest.raises(IntegrityError): + test_catalog.create_namespace(namespace=database_name, properties={"key": None}) # type: ignore + + +def test_list_namespaces(test_catalog: SqlCatalog, database_list: List[str]) -> None: + for database_name in database_list: + test_catalog.create_namespace(database_name) + db_list = test_catalog.list_namespaces() + for database_name in database_list: + assert (database_name,) in db_list + assert len(test_catalog.list_namespaces(database_name)) == 1 + + +def test_list_non_existing_namespaces(test_catalog: SqlCatalog) -> None: + with pytest.raises(NoSuchNamespaceError): + test_catalog.list_namespaces("does_not_exist") + + +def test_drop_namespace(test_catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: + database_name, table_name = random_identifier + test_catalog.create_namespace(database_name) + assert (database_name,) in test_catalog.list_namespaces() + test_catalog.create_table((database_name, table_name), table_schema_nested) + with pytest.raises(NamespaceNotEmptyError): + test_catalog.drop_namespace(database_name) + test_catalog.drop_table((database_name, table_name)) + test_catalog.drop_namespace(database_name) + assert (database_name,) not in test_catalog.list_namespaces() + + +def test_load_namespace_properties(test_catalog: SqlCatalog, database_name: str) -> None: + warehouse_location = "/test/location" + test_properties = { + "comment": "this is a test description", + "location": f"{warehouse_location}/{database_name}.db", + "test_property1": "1", + "test_property2": "2", + "test_property3": "3", + } + + test_catalog.create_namespace(database_name, test_properties) + listed_properties = test_catalog.load_namespace_properties(database_name) + for k, v in listed_properties.items(): + assert k in test_properties + assert v == test_properties[k] + + +def test_load_empty_namespace_properties(test_catalog: SqlCatalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + listed_properties = test_catalog.load_namespace_properties(database_name) + assert listed_properties == {"exists": "true"} + + +def test_update_namespace_properties(test_catalog: SqlCatalog, database_name: str) -> None: + warehouse_location = "/test/location" + test_properties = { + "comment": "this is a test description", + "location": f"{warehouse_location}/{database_name}.db", + "test_property1": "1", + "test_property2": "2", + "test_property3": "3", + } + removals = {"test_property1", "test_property2", "test_property3", "should_not_removed"} + updates = {"test_property4": "4", "test_property5": "5", "comment": "updated test description"} + test_catalog.create_namespace(database_name, test_properties) + update_report = test_catalog.update_namespace_properties(database_name, removals, updates) + for k in updates.keys(): + assert k in update_report.updated + for k in removals: + if k == "should_not_removed": + assert k in update_report.missing + else: + assert k in update_report.removed + assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"]